1998-01-16 07:13:05 -05:00
|
|
|
|
/** Network Kanji Filter. (PDS Version)
|
|
|
|
|
************************************************************************
|
|
|
|
|
** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
** $BO"Mm@h!'(B $B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j(B
|
|
|
|
|
** $B!J(BE-Mail Address: ichikawa@flab.fujitsu.co.jp$B!K(B
|
|
|
|
|
** Copyright (C) 1996,1998
|
|
|
|
|
** $BO"Mm@h!'(B $BN05eBg3X>pJs9)3X2J(B $B2OLn(B $B??<#(B mine/X0208 support
|
|
|
|
|
** $B!J(BE-Mail Address: kono@ie.u-ryukyu.ac.jp$B!K(B
|
|
|
|
|
** $BO"Mm@h!'(B COW for DOS & Win16 & Win32 & OS/2
|
|
|
|
|
** $B!J(BE-Mail Address: GHG00637@niftyserve.or.p$B!K(B
|
|
|
|
|
** $B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"(B
|
|
|
|
|
** $B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#(B
|
|
|
|
|
** $B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#(B
|
|
|
|
|
** $B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#(B
|
1998-01-16 07:13:05 -05:00
|
|
|
|
** Everyone is permitted to do anything on this program
|
1999-08-13 01:37:52 -04:00
|
|
|
|
** including copying, modifying, improving.
|
|
|
|
|
** as long as you don't try to pretend that you wrote it.
|
1998-01-16 07:13:05 -05:00
|
|
|
|
** i.e., the above copyright notice has to appear in all copies.
|
1999-08-13 01:37:52 -04:00
|
|
|
|
** You don't have to ask before copying or publishing.
|
1998-01-16 07:13:05 -05:00
|
|
|
|
** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
|
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
|
|
static char *CopyRight =
|
1999-08-13 01:37:52 -04:00
|
|
|
|
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),1998 S. Kono, COW";
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static char *Version =
|
1999-08-13 01:37:52 -04:00
|
|
|
|
"1.7";
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static char *Patchlevel =
|
1999-08-13 01:37:52 -04:00
|
|
|
|
"0/9711/Shinji Kono";
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
**
|
|
|
|
|
**
|
|
|
|
|
**
|
|
|
|
|
** USAGE: nkf [flags] [file]
|
|
|
|
|
**
|
|
|
|
|
** Flags:
|
|
|
|
|
** b Output is bufferred (DEFAULT)
|
|
|
|
|
** u Output is unbufferred
|
|
|
|
|
**
|
|
|
|
|
** t no operation
|
|
|
|
|
**
|
|
|
|
|
** j Outout code is JIS 7 bit (DEFAULT SELECT)
|
|
|
|
|
** s Output code is MS Kanji (DEFAULT SELECT)
|
|
|
|
|
** e Output code is AT&T JIS (DEFAULT SELECT)
|
|
|
|
|
** l Output code is JIS 7bit and ISO8859-1 Latin-1
|
|
|
|
|
**
|
|
|
|
|
** m MIME conversion for ISO-2022-JP
|
|
|
|
|
** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
|
|
|
|
|
** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
|
|
|
|
|
**
|
|
|
|
|
** r {de/en}crypt ROT13/47
|
|
|
|
|
**
|
|
|
|
|
** v display Version
|
|
|
|
|
**
|
|
|
|
|
** T Text mode output (for MS-DOS)
|
|
|
|
|
**
|
|
|
|
|
** x Do not convert X0201 kana into X0208
|
|
|
|
|
** Z Convert X0208 alphabet to ASCII
|
|
|
|
|
**
|
|
|
|
|
** f60 fold option
|
|
|
|
|
**
|
|
|
|
|
** m MIME decode
|
|
|
|
|
** B try to fix broken JIS, missing Escape
|
|
|
|
|
** B[1-9] broken level
|
|
|
|
|
**
|
|
|
|
|
** O Output to 'nkf.out' file
|
|
|
|
|
** d Delete \r in line feed
|
|
|
|
|
** c Add \r in line feed
|
|
|
|
|
**/
|
|
|
|
|
/******************************/
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* Select DEFAULT_CODE */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#define DEFAULT_CODE_JIS
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* #define DEFAULT_CODE_SJIS */
|
|
|
|
|
/* #define DEFAULT_CODE_EUC */
|
|
|
|
|
/******************************/
|
|
|
|
|
|
|
|
|
|
#if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
|
|
|
|
|
#define MSDOS
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#ifndef PERL_XS
|
1998-01-16 07:13:05 -05:00
|
|
|
|
#include <stdio.h>
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
#if defined(MSDOS) || defined(__OS2__)
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <io.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef MSDOS
|
|
|
|
|
#ifdef LSI_C
|
|
|
|
|
#define setbinmode(fp) fsetbin(fp)
|
|
|
|
|
#else /* Microsoft C, Turbo C */
|
|
|
|
|
#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
|
|
|
|
|
#endif
|
|
|
|
|
#else /* UNIX,OS/2 */
|
|
|
|
|
#define setbinmode(fp)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef _IOFBF /* SysV and MSDOS */
|
|
|
|
|
#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
|
|
|
|
|
#else /* BSD */
|
|
|
|
|
#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*Borland C++ 4.5 EasyWin*/
|
|
|
|
|
#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
|
|
|
|
|
#define EASYWIN
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define FALSE 0
|
|
|
|
|
#define TRUE 1
|
|
|
|
|
|
|
|
|
|
/* state of output_mode and input_mode */
|
|
|
|
|
|
|
|
|
|
#define ASCII 0
|
|
|
|
|
#define X0208 1
|
|
|
|
|
#define X0201 2
|
|
|
|
|
#define NO_X0201 3
|
|
|
|
|
#define JIS_INPUT 4
|
|
|
|
|
#define SJIS_INPUT 5
|
|
|
|
|
#define LATIN1_INPUT 6
|
|
|
|
|
#define FIXED_MIME 7
|
|
|
|
|
#define DOUBLE_SPACE -2
|
|
|
|
|
|
|
|
|
|
#define NL 0x0a
|
|
|
|
|
#define ESC 0x1b
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#define SPACE 0x20
|
1998-01-16 07:13:05 -05:00
|
|
|
|
#define AT 0x40
|
|
|
|
|
#define SSP 0xa0
|
|
|
|
|
#define DEL 0x7f
|
|
|
|
|
#define SI 0x0f
|
|
|
|
|
#define SO 0x0e
|
|
|
|
|
#define SSO 0x8e
|
|
|
|
|
|
|
|
|
|
#define HOLD_SIZE 32
|
|
|
|
|
#define IOBUF_SIZE 16384
|
|
|
|
|
|
|
|
|
|
#define DEFAULT_J 'B'
|
|
|
|
|
#define DEFAULT_R 'B'
|
|
|
|
|
|
|
|
|
|
#define SJ0162 0x00e1 /* 01 - 62 ku offset */
|
|
|
|
|
#define SJ6394 0x0161 /* 63 - 94 ku offset */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* MIME preprocessor */
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#undef STRICT_MIME /* do stupid strict mime integrity check */
|
|
|
|
|
#define GETC(p) ((!mime_mode)?getc(p):mime_getc(p))
|
|
|
|
|
#define UNGETC(c,p) ((!mime_mode)?ungetc(c,p):mime_ungetc(c))
|
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
#ifdef EASYWIN /*Easy Win */
|
|
|
|
|
extern POINT _BufferSize;
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* function prototype */
|
|
|
|
|
|
|
|
|
|
#ifndef _
|
|
|
|
|
# ifdef __STDC__
|
|
|
|
|
# define _(args) args
|
|
|
|
|
# else
|
|
|
|
|
# define _(args) ()
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifndef PERL_XS
|
|
|
|
|
static void noconvert _((FILE *f));
|
|
|
|
|
static int mime_integrity _((FILE *f,unsigned char *p));
|
|
|
|
|
static int usage _((void));
|
|
|
|
|
static char stdibuf[IOBUF_SIZE];
|
|
|
|
|
static char stdobuf[IOBUF_SIZE];
|
|
|
|
|
static unsigned int mime_input = 0; /* undecoded */
|
|
|
|
|
static int end_check;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static void kanji_convert _((FILE *f));
|
|
|
|
|
static void h_conv _((FILE *f,int c2,int c1));
|
|
|
|
|
static int push_hold_buf _((int c2,int c1));
|
|
|
|
|
static void s_iconv _((int c2,int c1));
|
|
|
|
|
static void e_oconv _((int c2,int c1));
|
|
|
|
|
static void s_oconv _((int c2,int c1));
|
|
|
|
|
static void j_oconv _((int c2,int c1));
|
|
|
|
|
static int line_fold _((int c2,int c1));
|
|
|
|
|
static int pre_convert _((int c1,int c2));
|
|
|
|
|
static int mime_begin _((FILE *f));
|
|
|
|
|
static int mime_getc _((FILE *f));
|
|
|
|
|
static int mime_ungetc _((unsigned int c));
|
|
|
|
|
static int base64decode _((int c));
|
|
|
|
|
static void arguments _((char *c));
|
|
|
|
|
static void reinit _((void));
|
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* buffers */
|
|
|
|
|
|
|
|
|
|
static unsigned char hold_buf[HOLD_SIZE*2];
|
|
|
|
|
static int hold_count;
|
|
|
|
|
|
|
|
|
|
/* MIME preprocessor fifo */
|
|
|
|
|
|
|
|
|
|
#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
|
|
|
|
|
#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
|
|
|
|
|
#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
|
|
|
|
|
static unsigned char mime_buf[MIME_BUF_SIZE];
|
|
|
|
|
static unsigned int mime_top = 0;
|
|
|
|
|
static unsigned int mime_last = 0; /* decoded */
|
|
|
|
|
|
|
|
|
|
/* flags */
|
|
|
|
|
static int unbuf_f = FALSE;
|
|
|
|
|
static int estab_f = FALSE;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int nop_f = FALSE;
|
|
|
|
|
static int binmode_f = TRUE; /* binary mode */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static int rot_f = FALSE; /* rot14/43 mode */
|
|
|
|
|
static int input_f = FALSE; /* non fixed input code */
|
|
|
|
|
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int mime_f = TRUE; /* convert MIME B base64 or Q */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static int mimebuf_f = FALSE; /* MIME buffered input */
|
|
|
|
|
static int broken_f = FALSE; /* convert ESC-less broken JIS */
|
|
|
|
|
static int iso8859_f = FALSE; /* ISO8859 through */
|
|
|
|
|
#if defined(MSDOS) || defined(__OS2__)
|
|
|
|
|
static int x0201_f = TRUE; /* Assume JISX0201 kana */
|
|
|
|
|
#else
|
|
|
|
|
static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* X0208 -> ASCII converter */
|
|
|
|
|
|
|
|
|
|
static int c1_return;
|
|
|
|
|
|
|
|
|
|
/* fold parameter */
|
|
|
|
|
static int line = 0; /* chars in line */
|
|
|
|
|
static int prev = 0;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int fold_f = FALSE;
|
|
|
|
|
static int fold_len = 0;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
/* options */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static char kanji_intro = DEFAULT_J,
|
1998-01-16 07:13:05 -05:00
|
|
|
|
ascii_intro = DEFAULT_R;
|
|
|
|
|
|
|
|
|
|
/* Folding */
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
int line_fold();
|
1998-01-16 07:13:05 -05:00
|
|
|
|
#define FOLD_MARGIN 10
|
|
|
|
|
#define DEFAULT_FOLD 60
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* converters */
|
|
|
|
|
|
|
|
|
|
#ifdef DEFAULT_CODE_JIS
|
|
|
|
|
# define DEFAULT_CONV j_oconv
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEFAULT_CODE_SJIS
|
|
|
|
|
# define DEFAULT_CONV s_oconv
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEFAULT_CODE_EUC
|
|
|
|
|
# define DEFAULT_CONV e_oconv
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static void (*iconv) _((int c2,int c1));
|
|
|
|
|
/* s_iconv or oconv */
|
|
|
|
|
static void (*oconv) _((int c2,int c1)) = DEFAULT_CONV;
|
|
|
|
|
/* [ejs]_oconv */
|
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* Global states */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int output_mode = ASCII, /* output kanji mode */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
input_mode = ASCII, /* input kanji mode */
|
|
|
|
|
shift_mode = FALSE; /* TRUE shift out, or X0201 */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int mime_mode = FALSE; /* MIME mode B base64, Q hex */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
/* X0201 / X0208 conversion tables */
|
|
|
|
|
|
|
|
|
|
/* X0201 kana conversion table */
|
|
|
|
|
/* 90-9F A0-DF */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
unsigned char cv[]= {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
|
|
|
|
|
0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
|
|
|
|
|
0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
|
|
|
|
|
0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
|
|
|
|
|
0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
|
|
|
|
|
0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
|
|
|
|
|
0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
|
|
|
|
|
0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
|
|
|
|
|
0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
|
|
|
|
|
0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
|
|
|
|
|
0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
|
|
|
|
|
0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
|
|
|
|
|
0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
|
|
|
|
|
0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
|
|
|
|
|
0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
|
|
|
|
|
0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
|
|
|
|
|
0x00,0x00};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* X0201 kana conversion table for daguten */
|
|
|
|
|
/* 90-9F A0-DF */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
unsigned char dv[]= {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
|
|
|
|
|
0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
|
|
|
|
|
0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
|
|
|
|
|
0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
|
|
|
|
|
0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
|
|
|
|
|
0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00};
|
|
|
|
|
|
|
|
|
|
/* X0201 kana conversion table for han-daguten */
|
|
|
|
|
/* 90-9F A0-DF */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
unsigned char ev[]= {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
|
|
|
|
|
0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* X0208 kigou conversion table */
|
|
|
|
|
/* 0x8140 - 0x819e */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
unsigned char fv[] = {
|
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
|
|
|
|
|
0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
|
|
|
|
|
0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
|
|
|
|
|
0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
|
|
|
|
|
0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
|
|
|
|
|
0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
|
|
|
|
|
0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
|
|
|
|
} ;
|
|
|
|
|
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static int file_out = FALSE;
|
|
|
|
|
static int add_cr = FALSE;
|
|
|
|
|
static int del_cr = FALSE;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#ifndef PERL_XS
|
|
|
|
|
int
|
|
|
|
|
main(argc, argv)
|
1998-01-16 07:13:05 -05:00
|
|
|
|
int argc;
|
|
|
|
|
char **argv;
|
|
|
|
|
{
|
1999-08-13 01:37:52 -04:00
|
|
|
|
FILE *fin;
|
|
|
|
|
char *cp;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
#ifdef EASYWIN /*Easy Win */
|
|
|
|
|
_BufferSize.y = 400;/*Set Scroll Buffer Size*/
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
|
|
|
|
|
cp = *argv;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
arguments(cp);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(iso8859_f && (oconv != j_oconv || !x0201_f )) {
|
|
|
|
|
fprintf(stderr,"Mixed ISO8859/JISX0201/SJIS/EUC output is not allowed.\n");
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(binmode_f == TRUE)
|
|
|
|
|
#ifdef __OS2__
|
|
|
|
|
if(freopen("","wb",stdout) == NULL)
|
|
|
|
|
return (-1);
|
|
|
|
|
#else
|
|
|
|
|
setbinmode(stdout);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if(unbuf_f)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
setbuf(stdout, (char *) NULL);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else
|
1999-08-13 01:37:52 -04:00
|
|
|
|
setvbuffer(stdout, stdobuf, IOBUF_SIZE);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
if(argc == 0) {
|
|
|
|
|
if(binmode_f == TRUE)
|
|
|
|
|
#ifdef __OS2__
|
|
|
|
|
if(freopen("","rb",stdin) == NULL) return (-1);
|
|
|
|
|
#else
|
|
|
|
|
setbinmode(stdin);
|
|
|
|
|
#endif
|
1999-08-13 01:37:52 -04:00
|
|
|
|
setvbuffer(stdin, stdibuf, IOBUF_SIZE);
|
|
|
|
|
if(nop_f)
|
|
|
|
|
noconvert(stdin);
|
|
|
|
|
else
|
|
|
|
|
kanji_convert(stdin);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else {
|
|
|
|
|
while (argc--) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((fin = fopen(*argv++, "r")) == NULL) {
|
|
|
|
|
perror(*--argv);
|
|
|
|
|
return(-1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else {
|
|
|
|
|
/* reopen file for stdout */
|
|
|
|
|
if(file_out == TRUE){
|
|
|
|
|
if(argc == 1 ) {
|
|
|
|
|
if(freopen(*argv++, "w", stdout) == NULL) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
perror(*--argv);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return (-1);
|
|
|
|
|
}
|
|
|
|
|
argc--;
|
|
|
|
|
} else {
|
|
|
|
|
if(freopen("nkf.out", "w", stdout) == NULL) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
perror(*--argv);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return (-1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(binmode_f == TRUE) {
|
|
|
|
|
#ifdef __OS2__
|
|
|
|
|
if(freopen("","wb",stdout) == NULL)
|
|
|
|
|
return (-1);
|
|
|
|
|
#else
|
|
|
|
|
setbinmode(stdout);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(binmode_f == TRUE)
|
|
|
|
|
#ifdef __OS2__
|
|
|
|
|
if(freopen("","rb",fin) == NULL)
|
|
|
|
|
return (-1);
|
|
|
|
|
#else
|
|
|
|
|
setbinmode(fin);
|
|
|
|
|
#endif
|
1999-08-13 01:37:52 -04:00
|
|
|
|
setvbuffer(fin, stdibuf, IOBUF_SIZE);
|
|
|
|
|
if(nop_f)
|
|
|
|
|
noconvert(fin);
|
|
|
|
|
else
|
|
|
|
|
kanji_convert(fin);
|
|
|
|
|
fclose(fin);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef EASYWIN /*Easy Win */
|
|
|
|
|
if(file_out == FALSE)
|
|
|
|
|
scanf("%d",&end_check);
|
|
|
|
|
else
|
|
|
|
|
fclose(stdout);
|
|
|
|
|
#else /* for Other OS */
|
|
|
|
|
if(file_out == TRUE)
|
|
|
|
|
fclose(stdout);
|
|
|
|
|
#endif
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static void
|
|
|
|
|
arguments(cp)
|
|
|
|
|
char *cp;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while (*cp) {
|
|
|
|
|
switch (*cp++) {
|
|
|
|
|
case 'b': /* buffered mode */
|
|
|
|
|
unbuf_f = FALSE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'u': /* non bufferd mode */
|
|
|
|
|
unbuf_f = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 't': /* transparent mode */
|
|
|
|
|
nop_f = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'j': /* JIS output */
|
|
|
|
|
case 'n':
|
|
|
|
|
oconv = j_oconv;
|
|
|
|
|
continue;
|
|
|
|
|
case 'e': /* AT&T EUC output */
|
|
|
|
|
oconv = e_oconv;
|
|
|
|
|
continue;
|
|
|
|
|
case 's': /* SJIS output */
|
|
|
|
|
oconv = s_oconv;
|
|
|
|
|
continue;
|
|
|
|
|
case 'l': /* ISO8859 Latin-1 support, no conversion */
|
|
|
|
|
iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
|
|
|
|
|
input_f = LATIN1_INPUT;
|
|
|
|
|
continue;
|
|
|
|
|
case 'i': /* Kanji IN ESC-$-@/B */
|
|
|
|
|
if(*cp=='@'||*cp=='B')
|
|
|
|
|
kanji_intro = *cp++;
|
|
|
|
|
continue;
|
|
|
|
|
case 'o': /* ASCII IN ESC-(-J/B */
|
|
|
|
|
if(*cp=='J'||*cp=='B'||*cp=='H')
|
|
|
|
|
ascii_intro = *cp++;
|
|
|
|
|
continue;
|
|
|
|
|
case 'r':
|
|
|
|
|
rot_f = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
#if defined(MSDOS) || defined(__OS2__)
|
|
|
|
|
case 'T':
|
|
|
|
|
binmode_f = FALSE;
|
|
|
|
|
continue;
|
|
|
|
|
#endif
|
|
|
|
|
#ifndef PERL_XS
|
|
|
|
|
case 'v':
|
|
|
|
|
usage();
|
|
|
|
|
exit(1);
|
|
|
|
|
break;
|
|
|
|
|
#endif
|
|
|
|
|
/* Input code assumption */
|
|
|
|
|
case 'J': /* JIS input */
|
|
|
|
|
case 'E': /* AT&T EUC input */
|
|
|
|
|
input_f = JIS_INPUT;
|
|
|
|
|
continue;
|
|
|
|
|
case 'S': /* MS Kanji input */
|
|
|
|
|
input_f = SJIS_INPUT;
|
|
|
|
|
if(x0201_f==NO_X0201) x0201_f=TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'Z': /* Convert X0208 alphabet to asii */
|
|
|
|
|
/* bit:0 Convert X0208
|
|
|
|
|
bit:1 Convert Kankaku to one space
|
|
|
|
|
bit:2 Convert Kankaku to two spaces
|
|
|
|
|
*/
|
|
|
|
|
if('9'>= *cp && *cp>='0')
|
|
|
|
|
alpha_f |= 1<<(*cp++ -'0');
|
|
|
|
|
else
|
|
|
|
|
alpha_f |= TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
|
|
|
|
|
x0201_f = FALSE; /* No X0201->X0208 conversion */
|
|
|
|
|
/* accept X0201
|
|
|
|
|
ESC-(-I in JIS, EUC, MS Kanji
|
|
|
|
|
SI/SO in JIS, EUC, MS Kanji
|
|
|
|
|
SSO in EUC, JIS, not in MS Kanji
|
|
|
|
|
MS Kanji (0xa0-0xdf)
|
|
|
|
|
output X0201
|
|
|
|
|
ESC-(-I in JIS (0x20-0x5f)
|
|
|
|
|
SSO in EUC (0xa0-0xdf)
|
|
|
|
|
0xa0-0xd in MS Kanji (0xa0-0xdf)
|
|
|
|
|
*/
|
|
|
|
|
continue;
|
|
|
|
|
case 'X': /* Assume X0201 kana */
|
|
|
|
|
/* Default value is NO_X0201 for EUC/MS-Kanji mix */
|
|
|
|
|
x0201_f = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'f': /* folding -f60 or -f */
|
|
|
|
|
fold_f = TRUE;
|
|
|
|
|
fold_len = atoi(cp);
|
|
|
|
|
if(!(0<fold_len && fold_len<BUFSIZ))
|
|
|
|
|
fold_len = DEFAULT_FOLD;
|
|
|
|
|
while('0'<= *cp && *cp <='9') cp++;
|
|
|
|
|
continue;
|
|
|
|
|
case 'm': /* MIME support */
|
|
|
|
|
mime_f = TRUE;
|
|
|
|
|
if(*cp=='B'||*cp=='Q') {
|
|
|
|
|
mime_mode = *cp++;
|
|
|
|
|
mimebuf_f = FIXED_MIME;
|
|
|
|
|
} else if (*cp=='0') {
|
|
|
|
|
mime_f = FALSE;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
case 'M': /* MIME output */
|
|
|
|
|
oconv = j_oconv; /* sorry... not yet done.. */
|
|
|
|
|
continue;
|
|
|
|
|
case 'B': /* Broken JIS support */
|
|
|
|
|
/* bit:0 no ESC JIS
|
|
|
|
|
bit:1 allow any x on ESC-(-x or ESC-$-x
|
|
|
|
|
bit:2 reset to ascii on NL
|
|
|
|
|
*/
|
|
|
|
|
if('9'>= *cp && *cp>='0')
|
|
|
|
|
broken_f |= 1<<(*cp++ -'0');
|
|
|
|
|
else
|
|
|
|
|
broken_f |= TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
#ifndef PERL_XS
|
|
|
|
|
case 'O':/* for Output file */
|
|
|
|
|
file_out = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
#endif
|
|
|
|
|
case 'c':/* add cr code */
|
|
|
|
|
add_cr = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
case 'd':/* delete cr code */
|
|
|
|
|
del_cr = TRUE;
|
|
|
|
|
continue;
|
|
|
|
|
default:
|
|
|
|
|
/* bogus option but ignored */
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#ifndef PERL_XS
|
|
|
|
|
static void
|
|
|
|
|
noconvert(f)
|
|
|
|
|
FILE *f;
|
|
|
|
|
{
|
|
|
|
|
int c;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while ((c = getc(f)) != EOF)
|
|
|
|
|
putchar(c);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
kanji_convert(f)
|
|
|
|
|
FILE *f;
|
|
|
|
|
{
|
|
|
|
|
int c1, c2;
|
|
|
|
|
|
|
|
|
|
c2 = 0;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
|
|
|
|
|
estab_f = TRUE; iconv = oconv;
|
|
|
|
|
} else if(input_f == SJIS_INPUT) {
|
|
|
|
|
estab_f = TRUE; iconv = s_iconv;
|
|
|
|
|
} else {
|
|
|
|
|
estab_f = FALSE; iconv = oconv;
|
|
|
|
|
}
|
|
|
|
|
input_mode = ASCII;
|
|
|
|
|
output_mode = ASCII;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
|
|
|
|
|
#define NEXT continue /* no output, get next */
|
|
|
|
|
#define SEND ; /* output c1 and c2, get next */
|
|
|
|
|
#define LAST break /* end of loop, go closing */
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while ((c1 = GETC(f)) != EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2) {
|
|
|
|
|
/* second byte */
|
|
|
|
|
if(c2 > DEL) {
|
|
|
|
|
/* in case of 8th bit is on */
|
|
|
|
|
if(!estab_f) {
|
|
|
|
|
/* in case of not established yet */
|
|
|
|
|
if(c1 > SSP) {
|
|
|
|
|
/* It is still ambiguious */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
h_conv(f, c2, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c2 = 0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c1 < AT) {
|
|
|
|
|
/* ignore bogus code */
|
|
|
|
|
c2 = 0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
|
|
|
|
/* established */
|
|
|
|
|
/* it seems to be MS Kanji */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = s_iconv;
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
/* in case of already established */
|
|
|
|
|
if(c1 < AT) {
|
|
|
|
|
/* ignore bogus code */
|
|
|
|
|
c2 = 0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else
|
|
|
|
|
/* 7 bit code */
|
|
|
|
|
/* it might be kanji shitfted */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 == DEL) || (c1 <= SPACE)) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* ignore bogus first code */
|
|
|
|
|
c2 = 0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else {
|
|
|
|
|
/* first byte */
|
|
|
|
|
if(c1 > DEL) {
|
|
|
|
|
/* 8 bit code */
|
|
|
|
|
if(!estab_f && !iso8859_f) {
|
|
|
|
|
/* not established yet */
|
|
|
|
|
if(c1 < SSP) {
|
|
|
|
|
/* it seems to be MS Kanji */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = s_iconv;
|
|
|
|
|
} else if(c1 < 0xe0) {
|
|
|
|
|
/* it seems to be EUC */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = oconv;
|
|
|
|
|
} else {
|
|
|
|
|
/* still ambiguious */
|
|
|
|
|
}
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else { /* estab_f==TRUE */
|
|
|
|
|
if(iso8859_f) {
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
|
|
|
|
|
/* SJIS X0201 Case... */
|
|
|
|
|
/* This is too arrogant, but ... */
|
|
|
|
|
if(x0201_f==NO_X0201) {
|
|
|
|
|
iconv = oconv;
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
if(x0201_f) {
|
|
|
|
|
if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
|
|
|
|
|
/* look ahead for X0201/X0208conversion */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c2 = GETC(f)) == EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
LAST;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if(c2==(0xde)) { /* $BByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* $BH>ByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
UNGETC(c2,f); c2 = 0;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(c1==SSO && iconv != s_iconv) {
|
|
|
|
|
/* EUC X0201 Case */
|
|
|
|
|
/* This is too arrogant
|
|
|
|
|
if(x0201_f == NO_X0201) {
|
|
|
|
|
estab_f = FALSE;
|
|
|
|
|
c2 = 0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
c1 = GETC(f); /* skip SSO */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
euc_1byte_check:
|
|
|
|
|
if(x0201_f && SSP<=c1 && c1<0xe0) {
|
|
|
|
|
if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c2 = GETC(f)) == EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
LAST;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* forward lookup $BByE@(B/$BH>ByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2 != SSO) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
UNGETC(c2,f); c2 = 0;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
NEXT;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if((c2 = GETC(f)) == EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
(*oconv)(0,SSO);
|
|
|
|
|
LAST;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if(c2==(0xde)) { /* $BByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* $BH>ByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
/* we have to check this c2 */
|
|
|
|
|
/* and no way to push back SSO */
|
|
|
|
|
c1 = c2; c2 = 0;
|
|
|
|
|
goto euc_1byte_check;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(c1 < SSP && iconv != s_iconv) {
|
|
|
|
|
/* strange code in EUC */
|
|
|
|
|
iconv = s_iconv; /* try SJIS */
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
|
|
|
|
/* already established */
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if((c1 > SPACE) && (c1 != DEL)) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* in case of Roman characters */
|
|
|
|
|
if(shift_mode) {
|
|
|
|
|
c1 |= 0x80;
|
|
|
|
|
/* output 1 shifted byte */
|
|
|
|
|
if(x0201_f && (!iso8859_f||input_mode==X0201) &&
|
|
|
|
|
SSP<=c1 && c1<0xe0 ) {
|
|
|
|
|
if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c2 = GETC(f)) == EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
LAST;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if(c2==(0xde&0x7f)) { /* $BByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* $BH>ByE@(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
(*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
|
|
|
|
|
c2=0;
|
|
|
|
|
NEXT;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
UNGETC(c2,f); c2 = 0;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
(*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(c1 == '(' && broken_f && input_mode == X0208
|
|
|
|
|
&& !mime_mode ) {
|
|
|
|
|
/* Try to recover missing escape */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, '(');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else {
|
|
|
|
|
if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
|
|
|
|
|
input_mode = ASCII; shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, '(');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* do not modify various input_mode */
|
|
|
|
|
/* It can be vt100 sequence */
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if(input_mode == X0208) {
|
|
|
|
|
/* in case of Kanji shifted */
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
/* goto next_byte */
|
|
|
|
|
} else if(c1 == '=' && mime_f && !mime_mode ) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = getc(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, '=');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else if(c1 == '?') {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* =? is mime conversiooon start sequence */
|
|
|
|
|
if(mime_begin(f) == EOF) /* check in detail */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
else
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, '=');
|
|
|
|
|
ungetc(c1,f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
NEXT;
|
|
|
|
|
}
|
|
|
|
|
} else if(c1 == '$' && broken_f && !mime_mode) {
|
|
|
|
|
/* try to recover missing escape */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, '$');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else if(c1 == '@'|| c1 == 'B') {
|
|
|
|
|
/* in case of Kanji in ESC sequence */
|
|
|
|
|
input_mode = X0208;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
|
|
|
|
/* sorry */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, '$');
|
|
|
|
|
(*oconv)(0, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
NEXT;
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(c1 == SI) {
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c1 == SO) {
|
|
|
|
|
shift_mode = TRUE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c1 == ESC ) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, ESC);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else if(c1 == '$') {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '$');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else if(c1 == '@'|| c1 == 'B') {
|
|
|
|
|
/* This is kanji introduction */
|
|
|
|
|
input_mode = X0208;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
} else if(c1 == '(') {
|
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '$');
|
|
|
|
|
(*oconv)(0, '(');
|
|
|
|
|
LAST;
|
|
|
|
|
} else if(c1 == '@'|| c1 == 'B') {
|
|
|
|
|
/* This is kanji introduction */
|
|
|
|
|
input_mode = X0208;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '$');
|
|
|
|
|
(*oconv)(0, '(');
|
|
|
|
|
(*oconv)(0, c1);
|
|
|
|
|
NEXT;
|
|
|
|
|
}
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else if(broken_f&0x2) {
|
|
|
|
|
input_mode = X0208;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '$');
|
|
|
|
|
(*oconv)(0, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
NEXT;
|
|
|
|
|
}
|
|
|
|
|
} else if(c1 == '(') {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = GETC(f)) == EOF) {
|
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '(');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
LAST;
|
|
|
|
|
} else {
|
|
|
|
|
if(c1 == 'I') {
|
|
|
|
|
/* This is X0201 kana introduction */
|
|
|
|
|
input_mode = X0201; shift_mode = X0201;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
|
|
|
|
|
/* This is X0208 kanji introduction */
|
|
|
|
|
input_mode = ASCII; shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else if(broken_f&0x2) {
|
|
|
|
|
input_mode = ASCII; shift_mode = FALSE;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, ESC);
|
|
|
|
|
(*oconv)(0, '(');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* maintain various input_mode here */
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* lonely ESC */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(0, ESC);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
} else if(c1 == NL && broken_f&4) {
|
|
|
|
|
input_mode = ASCII;
|
|
|
|
|
SEND;
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
/* send: */
|
|
|
|
|
if(input_mode == X0208)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*iconv)(c2, c1); /* can be EUC/SJIS */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c2 = 0;
|
|
|
|
|
continue;
|
|
|
|
|
/* goto next_word */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* epilogue */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*iconv)(EOF, 0);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static void
|
1999-08-13 01:37:52 -04:00
|
|
|
|
h_conv(f, c2, c1)
|
|
|
|
|
FILE *f;
|
|
|
|
|
int c1, c2;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
1999-08-13 01:37:52 -04:00
|
|
|
|
int wc;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** it must NOT be in the kanji shifte sequence */
|
|
|
|
|
/** it must NOT be written in JIS7 */
|
|
|
|
|
/** and it must be after 2 byte 8bit code */
|
|
|
|
|
|
|
|
|
|
hold_count = 0;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
push_hold_buf(c2, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c2 = 0;
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while ((c1 = GETC(f)) != EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2) {
|
|
|
|
|
/* second byte */
|
|
|
|
|
if(!estab_f) {
|
|
|
|
|
/* not established */
|
|
|
|
|
if(c1 > SSP) {
|
|
|
|
|
/* it is still ambiguious yet */
|
|
|
|
|
SEND;
|
|
|
|
|
} else if(c1 < AT) {
|
|
|
|
|
/* ignore bogus first byte */
|
|
|
|
|
c2 = 0;
|
|
|
|
|
SEND;
|
|
|
|
|
} else {
|
|
|
|
|
/* now established */
|
|
|
|
|
/* it seems to be MS Kanji */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = s_iconv;
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
SEND;
|
|
|
|
|
} else {
|
|
|
|
|
/* First byte */
|
|
|
|
|
if(c1 > DEL) {
|
|
|
|
|
/* 8th bit is on */
|
|
|
|
|
if(c1 < SSP) {
|
|
|
|
|
/* it seems to be MS Kanji */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = s_iconv;
|
|
|
|
|
} else if(c1 < 0xe0) {
|
|
|
|
|
/* it seems to be EUC */
|
|
|
|
|
estab_f = TRUE;
|
|
|
|
|
iconv = oconv;
|
|
|
|
|
} else {
|
|
|
|
|
/* still ambiguious */
|
|
|
|
|
}
|
|
|
|
|
c2 = c1;
|
|
|
|
|
NEXT;
|
|
|
|
|
} else
|
|
|
|
|
/* 7 bit code , then send without any process */
|
|
|
|
|
SEND;
|
|
|
|
|
}
|
|
|
|
|
/* send: */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((push_hold_buf(c2, c1) == EOF) || estab_f)
|
1998-01-16 07:13:05 -05:00
|
|
|
|
break;
|
|
|
|
|
c2 = 0;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** now,
|
|
|
|
|
** 1) EOF is detected, or
|
|
|
|
|
** 2) Code is established, or
|
|
|
|
|
** 3) Buffer is FULL (but last word is pushed)
|
|
|
|
|
**
|
|
|
|
|
** in 1) and 3) cases, we continue to use
|
|
|
|
|
** Kanji codes by oconv and leave estab_f unchanged.
|
|
|
|
|
**/
|
|
|
|
|
|
|
|
|
|
for (wc = 0; wc < hold_count; wc += 2) {
|
|
|
|
|
c2 = hold_buf[wc];
|
|
|
|
|
c1 = hold_buf[wc+1];
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*iconv)(c2, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
1999-08-13 01:37:52 -04:00
|
|
|
|
push_hold_buf(c2, c1)
|
|
|
|
|
int c2, c1;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
if(hold_count >= HOLD_SIZE*2)
|
|
|
|
|
return (EOF);
|
|
|
|
|
hold_buf[hold_count++] = c2;
|
|
|
|
|
hold_buf[hold_count++] = c1;
|
|
|
|
|
return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
1999-08-13 01:37:52 -04:00
|
|
|
|
s_iconv(c2, c1)
|
|
|
|
|
int c2,
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c1;
|
|
|
|
|
{
|
|
|
|
|
if((c2 == EOF) || (c2 == 0)) {
|
|
|
|
|
/* NOP */
|
|
|
|
|
} else {
|
|
|
|
|
c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
|
|
|
|
|
if(c1 < 0x9f)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else {
|
|
|
|
|
c1 = c1 - 0x7e;
|
|
|
|
|
c2++;
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
(*oconv)(c2, c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
1999-08-13 01:37:52 -04:00
|
|
|
|
e_oconv(c2, c1)
|
|
|
|
|
int c2, c1;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
c2 = pre_convert(c1,c2); c1 = c1_return;
|
|
|
|
|
if(fold_f) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
switch(line_fold(c2,c1)) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
case '\n':
|
|
|
|
|
if(add_cr == TRUE) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c1 = '\n';
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\n');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
break;
|
|
|
|
|
case 0: return;
|
|
|
|
|
case '\r':
|
|
|
|
|
c1 = '\n'; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
case '\t':
|
|
|
|
|
case ' ':
|
|
|
|
|
c1 = ' '; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(c2==DOUBLE_SPACE) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(' '); putchar(' ');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if(c2 == EOF)
|
|
|
|
|
return;
|
|
|
|
|
else if(c2 == 0 && (c1&0x80)) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(SSO); putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else if(c2 == 0) {
|
|
|
|
|
if(c1 == '\n' && add_cr == TRUE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 != '\r')
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else if(del_cr == FALSE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else {
|
|
|
|
|
if((c1<0x20 || 0x7e<c1) ||
|
|
|
|
|
(c2<0x20 || 0x7e<c2)) {
|
|
|
|
|
estab_f = FALSE;
|
|
|
|
|
return; /* too late to rescue this char */
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c2 | 0x080);
|
|
|
|
|
putchar(c1 | 0x080);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
1999-08-13 01:37:52 -04:00
|
|
|
|
s_oconv(c2, c1)
|
|
|
|
|
int c2, c1;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
c2 = pre_convert(c1,c2); c1 = c1_return;
|
|
|
|
|
if(fold_f) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
switch(line_fold(c2,c1)) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
case '\n':
|
|
|
|
|
if(add_cr == TRUE) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c1 = '\n';
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\n');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
break;
|
|
|
|
|
case '\r':
|
|
|
|
|
c1 = '\n'; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 0: return;
|
|
|
|
|
case '\t':
|
|
|
|
|
case ' ':
|
|
|
|
|
c1 = ' '; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(c2==DOUBLE_SPACE) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(' '); putchar(' ');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if(c2 == EOF)
|
|
|
|
|
return;
|
|
|
|
|
else if(c2 == 0) {
|
|
|
|
|
if(c1 == '\n' && add_cr == TRUE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 != '\r')
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else if(del_cr == FALSE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else {
|
|
|
|
|
if((c1<0x20 || 0x7e<c1) ||
|
|
|
|
|
(c2<0x20 || 0x7e<c2)) {
|
|
|
|
|
estab_f = FALSE;
|
|
|
|
|
return; /* too late to rescue this char */
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
|
|
|
|
|
putchar((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
static void
|
1999-08-13 01:37:52 -04:00
|
|
|
|
j_oconv(c2, c1)
|
|
|
|
|
int c2, c1;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
c2 = pre_convert(c1,c2); c1 = c1_return;
|
|
|
|
|
if(fold_f) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
switch(line_fold(c2,c1)) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
case '\n':
|
|
|
|
|
if(output_mode) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('(');
|
|
|
|
|
putchar(ascii_intro);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
if(add_cr == TRUE) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
c1 = '\n';
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\n');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
output_mode = ASCII;
|
|
|
|
|
break;
|
|
|
|
|
case '\r':
|
|
|
|
|
c1 = '\n'; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
case '\t':
|
|
|
|
|
case ' ':
|
|
|
|
|
c1 = ' '; c2 = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 0: return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(c2 == EOF) {
|
|
|
|
|
if(output_mode) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('(');
|
|
|
|
|
putchar(ascii_intro);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
} else if(c2 == 0 && (c1 & 0x80)) {
|
|
|
|
|
if(input_mode==X0201 || !iso8859_f) {
|
|
|
|
|
if(output_mode!=X0201) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('(');
|
|
|
|
|
putchar('I');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
output_mode = X0201;
|
|
|
|
|
}
|
|
|
|
|
c1 &= 0x7f;
|
|
|
|
|
} else {
|
|
|
|
|
/* iso8859 introduction, or 8th bit on */
|
|
|
|
|
/* Can we convert in 7bit form using ESC-'-'-A ?
|
|
|
|
|
Is this popular? */
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else if(c2 == 0) {
|
|
|
|
|
if(output_mode) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('(');
|
|
|
|
|
putchar(ascii_intro);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
output_mode = ASCII;
|
|
|
|
|
}
|
|
|
|
|
if(c1 == '\n' && add_cr == TRUE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 != '\r')
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else if(del_cr == FALSE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else if(c2 == DOUBLE_SPACE) {
|
|
|
|
|
if(output_mode) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('(');
|
|
|
|
|
putchar(ascii_intro);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
output_mode = ASCII;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(' ');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 == '\n' && add_cr == TRUE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 != '\r')
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else if(del_cr == FALSE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
} else {
|
|
|
|
|
if(output_mode != X0208) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(ESC);
|
|
|
|
|
putchar('$');
|
|
|
|
|
putchar(kanji_intro);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
output_mode = X0208;
|
|
|
|
|
}
|
|
|
|
|
if(c1<0x20 || 0x7e<c1)
|
|
|
|
|
return;
|
|
|
|
|
if(c2<0x20 || 0x7e<c2)
|
|
|
|
|
return;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c2);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 == '\n' && add_cr == TRUE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar('\r');
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1 != '\r')
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
else if(del_cr == FALSE)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
putchar(c1);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
#define rot13(c) ( \
|
|
|
|
|
( c < 'A' ) ? c: \
|
|
|
|
|
(c <= 'M') ? (c + 13): \
|
|
|
|
|
(c <= 'Z') ? (c - 13): \
|
|
|
|
|
(c < 'a') ? (c): \
|
|
|
|
|
(c <= 'm') ? (c + 13): \
|
|
|
|
|
(c <= 'z') ? (c - 13): \
|
|
|
|
|
(c) \
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#define rot47(c) ( \
|
|
|
|
|
( c < '!' ) ? c: \
|
|
|
|
|
( c <= 'O' ) ? (c + 47) : \
|
|
|
|
|
( c <= '~' ) ? (c - 47) : \
|
|
|
|
|
c \
|
|
|
|
|
)
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/*
|
1999-08-13 01:37:52 -04:00
|
|
|
|
Return value of line_fold()
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
\n add newline and output char
|
|
|
|
|
\r add newline and output nothing
|
|
|
|
|
' ' space
|
|
|
|
|
0 skip
|
|
|
|
|
1 (or else) normal output
|
|
|
|
|
|
|
|
|
|
fold state in prev (previous character)
|
|
|
|
|
|
|
|
|
|
>0x80 Japanese (X0208/X0201)
|
|
|
|
|
<0x80 ASCII
|
|
|
|
|
\n new line
|
|
|
|
|
' ' space
|
|
|
|
|
|
|
|
|
|
This fold algorthm does not preserve heading space in a line.
|
|
|
|
|
This is the main difference from fmt.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static int
|
1999-08-13 01:37:52 -04:00
|
|
|
|
line_fold(c2,c1)
|
|
|
|
|
int c2,c1;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
int prev0;
|
|
|
|
|
if(c1=='\r')
|
|
|
|
|
return 0; /* ignore cr */
|
|
|
|
|
if(c1== 8) {
|
|
|
|
|
if(line>0) line--;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
if(c2==EOF && line != 0) /* close open last line */
|
|
|
|
|
return '\n';
|
|
|
|
|
/* new line */
|
|
|
|
|
if(c1=='\n') {
|
|
|
|
|
if(prev == c1) { /* duplicate newline */
|
|
|
|
|
if(line) {
|
|
|
|
|
line = 0;
|
|
|
|
|
return '\n'; /* output two newline */
|
|
|
|
|
} else {
|
|
|
|
|
line = 0;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if(prev&0x80) { /* Japanese? */
|
|
|
|
|
prev = c1;
|
|
|
|
|
return 0; /* ignore given single newline */
|
|
|
|
|
} else if(prev==' ') {
|
|
|
|
|
return 0;
|
|
|
|
|
} else {
|
|
|
|
|
prev = c1;
|
|
|
|
|
if(++line<=fold_len)
|
|
|
|
|
return ' ';
|
|
|
|
|
else {
|
|
|
|
|
line = 0;
|
|
|
|
|
return '\r'; /* fold and output nothing */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(c1=='\f') {
|
|
|
|
|
prev = '\n';
|
|
|
|
|
if(line==0)
|
|
|
|
|
return 1;
|
|
|
|
|
line = 0;
|
|
|
|
|
return '\n'; /* output newline and clear */
|
|
|
|
|
}
|
|
|
|
|
/* X0208 kankaku or ascii space */
|
|
|
|
|
if( (c2==0&&c1==' ')||
|
|
|
|
|
(c2==0&&c1=='\t')||
|
|
|
|
|
(c2==DOUBLE_SPACE)||
|
|
|
|
|
(c2=='!'&& c1=='!')) {
|
|
|
|
|
if(prev == ' ') {
|
|
|
|
|
return 0; /* remove duplicate spaces */
|
|
|
|
|
}
|
|
|
|
|
prev = ' ';
|
|
|
|
|
if(++line<=fold_len)
|
|
|
|
|
return ' '; /* output ASCII space only */
|
|
|
|
|
else {
|
|
|
|
|
prev = ' '; line = 0;
|
|
|
|
|
return '\r'; /* fold and output nothing */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
prev0 = prev; /* we still need this one... , but almost done */
|
|
|
|
|
prev = c1;
|
|
|
|
|
if(c2 || (SSP<=c1 && c1<=0xdf))
|
|
|
|
|
prev |= 0x80; /* this is Japanese */
|
|
|
|
|
line += (c2==0)?1:2;
|
|
|
|
|
if(line<=fold_len) { /* normal case */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
if(line>=fold_len+FOLD_MARGIN) { /* too many kinsou suspension */
|
|
|
|
|
line = (c2==0)?1:2;
|
|
|
|
|
return '\n'; /* We can't wait, do fold now */
|
|
|
|
|
}
|
|
|
|
|
/* simple kinsoku rules return 1 means no folding */
|
|
|
|
|
if(c2==0) {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if(c1==0xde) return 1; /* $B!+(B*/
|
|
|
|
|
if(c1==0xdf) return 1; /* $B!,(B*/
|
|
|
|
|
if(c1==0xa4) return 1; /* $B!#(B*/
|
|
|
|
|
if(c1==0xa3) return 1; /* $B!$(B*/
|
|
|
|
|
if(c1==0xa1) return 1; /* $B!W(B*/
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1==0xb0) return 1; /* - */
|
|
|
|
|
if(SSP<=c1 && c1<=0xdf) { /* X0201 */
|
|
|
|
|
line = 1;
|
|
|
|
|
return '\n';/* add one new line before this character */
|
|
|
|
|
}
|
|
|
|
|
/* fold point in ASCII { [ ( */
|
|
|
|
|
if(( c1!=')'&&
|
|
|
|
|
c1!=']'&&
|
|
|
|
|
c1!='}'&&
|
|
|
|
|
c1!='.'&&
|
|
|
|
|
c1!=','&&
|
|
|
|
|
c1!='!'&&
|
|
|
|
|
c1!='?'&&
|
|
|
|
|
c1!='/'&&
|
|
|
|
|
c1!=':'&&
|
|
|
|
|
c1!=';')&&
|
|
|
|
|
((prev0=='\n')|| (prev0==' ')|| /* ignored new line */
|
|
|
|
|
(prev0&0x80)) /* X0208 - ASCII */
|
|
|
|
|
) {
|
|
|
|
|
line = 1;
|
|
|
|
|
return '\n';/* add one new line before this character */
|
|
|
|
|
}
|
|
|
|
|
return 1; /* default no fold in ASCII */
|
|
|
|
|
} else {
|
|
|
|
|
if(c2=='!') {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if(c1=='"') return 1; /* $B!"(B */
|
|
|
|
|
if(c1=='#') return 1; /* $B!#(B */
|
|
|
|
|
if(c1=='$') return 1; /* $B!$(B */
|
|
|
|
|
if(c1=='%') return 1; /* $B!%(B */
|
|
|
|
|
if(c1=='\'') return 1; /* $B!\(B */
|
|
|
|
|
if(c1=='(') return 1; /* $B!((B */
|
|
|
|
|
if(c1==')') return 1; /* $B!)(B */
|
|
|
|
|
if(c1=='*') return 1; /* $B!*(B */
|
|
|
|
|
if(c1=='+') return 1; /* $B!+(B */
|
|
|
|
|
if(c1==',') return 1; /* $B!,(B */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
line = 2;
|
|
|
|
|
return '\n'; /* add one new line before this character */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
pre_convert(c1,c2)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
int c1,c2;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
if(c2) c1 &= 0x7f;
|
|
|
|
|
c1_return = c1;
|
|
|
|
|
if(c2==EOF) return c2;
|
|
|
|
|
c2 &= 0x7f;
|
|
|
|
|
if(rot_f) {
|
|
|
|
|
if(c2) {
|
|
|
|
|
c1 = rot47(c1);
|
|
|
|
|
c2 = rot47(c2);
|
|
|
|
|
} else {
|
|
|
|
|
if(!(c1 & 0x80))
|
|
|
|
|
c1 = rot13(c1);
|
|
|
|
|
}
|
|
|
|
|
c1_return = c1;
|
|
|
|
|
}
|
|
|
|
|
/* JISX0208 Alphabet */
|
|
|
|
|
if(alpha_f && c2 == 0x23 ) return 0;
|
|
|
|
|
/* JISX0208 Kigou */
|
|
|
|
|
if(alpha_f && c2 == 0x21 ) {
|
|
|
|
|
if(0x21==c1) {
|
|
|
|
|
if(alpha_f&0x2) {
|
|
|
|
|
c1_return = ' ';
|
|
|
|
|
return 0;
|
|
|
|
|
} else if(alpha_f&0x4) {
|
|
|
|
|
c1_return = ' ';
|
|
|
|
|
return DOUBLE_SPACE;
|
|
|
|
|
} else {
|
|
|
|
|
return c2;
|
|
|
|
|
}
|
|
|
|
|
} else if(0x20<c1 && c1<0x7f && fv[c1-0x20]) {
|
|
|
|
|
c1_return = fv[c1-0x20];
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return c2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#ifdef STRICT_MIME
|
|
|
|
|
/* This converts */
|
|
|
|
|
|
|
|
|
|
unsigned char *mime_pattern[] = {
|
|
|
|
|
(unsigned char *)"\075?ISO-8859-1?Q?",
|
|
|
|
|
(unsigned char *)"\075?ISO-2022-JP?B?",
|
|
|
|
|
(unsigned char *)"\075?ISO-2022-JP?Q?",
|
|
|
|
|
(unsigned char *)"\075?JAPANESE_EUC?B?",
|
|
|
|
|
(unsigned char *)"\075?SHIFT_JIS?B?",
|
|
|
|
|
NULL
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int mime_encode[] = {
|
|
|
|
|
'Q', 'B', 'Q',
|
|
|
|
|
0
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define MAXRECOVER 20
|
|
|
|
|
int iso8859_f_save;
|
|
|
|
|
|
|
|
|
|
#ifdef STRICT_MIME
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
|
|
|
|
|
/* I don't trust portablity of toupper */
|
|
|
|
|
|
|
|
|
|
static int
|
1999-08-13 01:37:52 -04:00
|
|
|
|
mime_begin(f)
|
|
|
|
|
FILE *f;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
int c1;
|
|
|
|
|
int i,j,k;
|
|
|
|
|
unsigned char *p,*q;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
mime_mode = FALSE;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* =? has been checked */
|
1998-01-16 07:13:05 -05:00
|
|
|
|
j = 0;
|
|
|
|
|
p = mime_pattern[j];
|
|
|
|
|
r[0]='='; r[1]='?';
|
|
|
|
|
|
|
|
|
|
for(i=2;p[i]>' ';i++) { /* start at =? */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if( ((((r[i] = c1 = getc(f))==EOF) || nkf_toupper(c1) != p[i] ) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
/* pattern fails, try next one */
|
|
|
|
|
q = p;
|
|
|
|
|
while (p = mime_pattern[++j]) {
|
|
|
|
|
for(k=2;k<i;k++) /* assume length(p) > i */
|
|
|
|
|
if(p[k]!=q[k]) break;
|
|
|
|
|
if(k==i && nkf_toupper(c1)==p[k]) break;
|
|
|
|
|
}
|
|
|
|
|
if(p) continue; /* found next one, continue */
|
|
|
|
|
/* all fails, output from recovery buffer */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
ungetc(c1,f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
for(j=0;j<i;j++) {
|
|
|
|
|
(*oconv)(0,r[j]);
|
|
|
|
|
}
|
|
|
|
|
return c1;
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
mime_mode = mime_encode[j];
|
1998-01-16 07:13:05 -05:00
|
|
|
|
iso8859_f_save = iso8859_f;
|
|
|
|
|
if(j==0) {
|
|
|
|
|
iso8859_f = TRUE;
|
|
|
|
|
}
|
|
|
|
|
if(mime_mode=='B') {
|
|
|
|
|
mimebuf_f = unbuf_f;
|
|
|
|
|
if(!unbuf_f) {
|
|
|
|
|
/* do MIME integrity check */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return mime_integrity(f,mime_pattern[j]);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mimebuf_f = TRUE;
|
|
|
|
|
return c1;
|
|
|
|
|
}
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#define mime_getc0(f) (mimebuf_f?getc(f):Fifo(mime_input++))
|
|
|
|
|
#define mime_ungetc0(c,f) (mimebuf_f?ungetc(c,f):mime_input--)
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
static int
|
|
|
|
|
mime_begin(f)
|
|
|
|
|
FILE *f;
|
|
|
|
|
{
|
|
|
|
|
int c1;
|
|
|
|
|
int i,j;
|
|
|
|
|
int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
|
|
|
|
|
|
|
|
|
|
mime_mode = FALSE;
|
|
|
|
|
/* =? has been checked */
|
|
|
|
|
j = 0;
|
|
|
|
|
r[0]='='; r[1]='?';
|
|
|
|
|
for(i=2;i<MAXRECOVER;i++) { /* start at =? */
|
|
|
|
|
/* We accept any charcter type even if it is breaked by new lines */
|
|
|
|
|
if( (r[i] = c1 = getc(f))==EOF) break;
|
|
|
|
|
if(c1=='=') break;
|
|
|
|
|
if(c1<' '&& c1!='\r' && c1!='\n') break;
|
|
|
|
|
if(c1=='?') {
|
|
|
|
|
i++;
|
|
|
|
|
if(!(i<MAXRECOVER) || (r[i] = c1 = getc(f))==EOF) break;
|
|
|
|
|
if(c1=='b'||c1=='B') {
|
|
|
|
|
mime_mode = 'B';
|
|
|
|
|
} else if(c1=='q'||c1=='Q') {
|
|
|
|
|
mime_mode = 'Q';
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
i++;
|
|
|
|
|
if(!(i<MAXRECOVER) || (r[i] = c1 = getc(f))==EOF) break;
|
|
|
|
|
if(c1=='?') {
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
mime_mode = FALSE;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(!mime_mode || c1==EOF || i==MAXRECOVER) {
|
|
|
|
|
ungetc(c1,f);
|
|
|
|
|
if (i == MAXRECOVER)
|
|
|
|
|
i--;
|
|
|
|
|
for(j=0;j<i;j++) {
|
|
|
|
|
(*oconv)(0,r[j]);
|
|
|
|
|
}
|
|
|
|
|
return c1;
|
|
|
|
|
}
|
|
|
|
|
iso8859_f_save = iso8859_f;
|
|
|
|
|
/* do no MIME integrity check */
|
|
|
|
|
return c1; /* used only for checking EOF */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define mime_getc0(f) getc(f)
|
|
|
|
|
#define mime_ungetc0(c,f) ungetc(c,f)
|
|
|
|
|
|
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
static int
|
1999-08-13 01:37:52 -04:00
|
|
|
|
mime_getc(f)
|
|
|
|
|
FILE *f;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
int c1, c2, c3, c4, cc;
|
|
|
|
|
int t1, t2, t3, t4, mode, exit_mode;
|
|
|
|
|
|
|
|
|
|
if(mime_top != mime_last) { /* Something is in FIFO */
|
|
|
|
|
return Fifo(mime_top++);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(mimebuf_f == FIXED_MIME)
|
|
|
|
|
exit_mode = mime_mode;
|
|
|
|
|
else
|
|
|
|
|
exit_mode = FALSE;
|
|
|
|
|
if(mime_mode == 'Q') {
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c1 = mime_getc0(f)) == EOF) return (EOF);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1=='_') return ' ';
|
|
|
|
|
if(c1!='=' && c1!='?')
|
|
|
|
|
return c1;
|
|
|
|
|
mime_mode = exit_mode; /* prepare for quit */
|
|
|
|
|
if(c1<=' ') return c1;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c2 = mime_getc0(f)) == EOF) return (EOF);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2<=' ') return c2;
|
|
|
|
|
if(c1=='?'&&c2=='=') {
|
|
|
|
|
/* end Q encoding */
|
|
|
|
|
input_mode = exit_mode;
|
|
|
|
|
iso8859_f = iso8859_f_save;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return getc(f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
if(c1=='?') {
|
|
|
|
|
mime_mode = 'Q'; /* still in MIME */
|
1999-08-13 01:37:52 -04:00
|
|
|
|
mime_ungetc0(c2,f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return c1;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c3 = mime_getc0(f)) == EOF) return (EOF);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2<=' ') return c2;
|
|
|
|
|
mime_mode = 'Q'; /* still in MIME */
|
|
|
|
|
#define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
|
|
|
|
|
('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
|
|
|
|
|
return ((hex(c2)<<4) + hex(c3));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(mime_mode != 'B') {
|
|
|
|
|
mime_mode = FALSE;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
return getc(f);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Base64 encoding */
|
|
|
|
|
/*
|
|
|
|
|
MIME allows line break in the middle of
|
|
|
|
|
Base64, but we are very pessimistic in decoding
|
|
|
|
|
in unbuf mode because MIME encoded code may broken by
|
|
|
|
|
less or editor's control sequence (such as ESC-[-K in unbuffered
|
|
|
|
|
mode. ignore incomplete MIME.
|
|
|
|
|
*/
|
|
|
|
|
mode = mime_mode;
|
|
|
|
|
mime_mode = exit_mode; /* prepare for quit */
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while ((c1 = mime_getc0(f))<=' ') {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c1==EOF)
|
|
|
|
|
return (EOF);
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c2 = mime_getc0(f))<=' ') {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c2==EOF)
|
|
|
|
|
return (EOF);
|
|
|
|
|
if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
|
|
|
|
|
return c2;
|
|
|
|
|
}
|
|
|
|
|
if((c1 == '?') && (c2 == '=')) {
|
|
|
|
|
input_mode = ASCII;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while((c1 = getc(f))==' ' /* || c1=='\n' || c1=='\r' */);
|
1998-01-16 07:13:05 -05:00
|
|
|
|
return c1;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c3 = mime_getc0(f))<=' ') {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c3==EOF)
|
|
|
|
|
return (EOF);
|
|
|
|
|
if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
|
|
|
|
|
return c3;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
if((c4 = mime_getc0(f))<=' ') {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(c4==EOF)
|
|
|
|
|
return (EOF);
|
|
|
|
|
if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
|
|
|
|
|
return c4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mime_mode = mode; /* still in MIME sigh... */
|
|
|
|
|
|
|
|
|
|
/* BASE 64 decoding */
|
|
|
|
|
|
|
|
|
|
t1 = 0x3f & base64decode(c1);
|
|
|
|
|
t2 = 0x3f & base64decode(c2);
|
|
|
|
|
t3 = 0x3f & base64decode(c3);
|
|
|
|
|
t4 = 0x3f & base64decode(c4);
|
|
|
|
|
cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
|
|
|
|
|
if(c2 != '=') {
|
|
|
|
|
Fifo(mime_last++) = cc;
|
|
|
|
|
cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
|
|
|
|
|
if(c3 != '=') {
|
|
|
|
|
Fifo(mime_last++) = cc;
|
|
|
|
|
cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
|
|
|
|
|
if(c4 != '=')
|
|
|
|
|
Fifo(mime_last++) = cc;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return c1;
|
|
|
|
|
}
|
|
|
|
|
return Fifo(mime_top++);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
mime_ungetc(c)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
unsigned int c;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
Fifo(mime_last++) = c;
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#ifdef STRICT_MIME
|
|
|
|
|
int
|
|
|
|
|
mime_integrity(f,p)
|
|
|
|
|
FILE *f;
|
|
|
|
|
unsigned char *p;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
int c,d;
|
|
|
|
|
unsigned int q;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
/* In buffered mode, read until =? or NL or buffer fffull
|
1998-01-16 07:13:05 -05:00
|
|
|
|
*/
|
|
|
|
|
mime_input = mime_top;
|
|
|
|
|
mime_last = mime_top;
|
|
|
|
|
while(*p) Fifo(mime_input++) = *p++;
|
|
|
|
|
d = 0;
|
|
|
|
|
q = mime_input;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
while((c=getc(f))!=EOF) {
|
1998-01-16 07:13:05 -05:00
|
|
|
|
if(((mime_input-mime_top)&MIME_BUF_MASK)==0) break;
|
|
|
|
|
if(c=='=' && d=='?') {
|
|
|
|
|
/* checked. skip header, start decode */
|
|
|
|
|
Fifo(mime_input++) = c;
|
|
|
|
|
mime_input = q;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
if(!( (c=='+'||c=='/'|| c=='=' || c=='?' ||
|
|
|
|
|
('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))))
|
|
|
|
|
break;
|
|
|
|
|
/* Should we check length mod 4? */
|
|
|
|
|
Fifo(mime_input++) = c;
|
|
|
|
|
d=c;
|
|
|
|
|
}
|
|
|
|
|
/* In case of Incomplete MIME, no MIME decode */
|
|
|
|
|
Fifo(mime_input++) = c;
|
|
|
|
|
mime_last = mime_input; /* point undecoded buffer */
|
|
|
|
|
mime_mode = 1; /* no decode on Fifo last in mime_getc */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
base64decode(c)
|
1999-08-13 01:37:52 -04:00
|
|
|
|
int c;
|
1998-01-16 07:13:05 -05:00
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
if(c > '@')
|
|
|
|
|
if(c < '[')
|
|
|
|
|
i = c - 'A'; /* A..Z 0-25 */
|
|
|
|
|
else
|
|
|
|
|
i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
|
|
|
|
|
else if(c > '/')
|
|
|
|
|
i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
|
|
|
|
|
else if(c == '+')
|
|
|
|
|
i = '>' /* 62 */ ; /* + 62 */
|
|
|
|
|
else
|
|
|
|
|
i = '?' /* 63 */ ; /* / 63 */
|
|
|
|
|
return (i);
|
|
|
|
|
}
|
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
|
static void
|
|
|
|
|
reinit()
|
|
|
|
|
{
|
|
|
|
|
unbuf_f = FALSE;
|
|
|
|
|
estab_f = FALSE;
|
|
|
|
|
nop_f = FALSE;
|
|
|
|
|
binmode_f = TRUE;
|
|
|
|
|
rot_f = FALSE;
|
|
|
|
|
input_f = FALSE;
|
|
|
|
|
alpha_f = FALSE;
|
|
|
|
|
mime_f = TRUE;
|
|
|
|
|
mimebuf_f = FALSE;
|
|
|
|
|
broken_f = FALSE;
|
|
|
|
|
iso8859_f = FALSE;
|
|
|
|
|
x0201_f = TRUE;
|
|
|
|
|
x0201_f = NO_X0201;
|
|
|
|
|
fold_f = FALSE;
|
|
|
|
|
kanji_intro = DEFAULT_J;
|
|
|
|
|
ascii_intro = DEFAULT_R;
|
|
|
|
|
oconv = DEFAULT_CONV;
|
|
|
|
|
output_mode = ASCII;
|
|
|
|
|
input_mode = ASCII;
|
|
|
|
|
shift_mode = FALSE;
|
|
|
|
|
mime_mode = FALSE;
|
|
|
|
|
file_out = FALSE;
|
|
|
|
|
add_cr = FALSE;
|
|
|
|
|
del_cr = FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef PERL_XS
|
1998-01-16 07:13:05 -05:00
|
|
|
|
int
|
|
|
|
|
usage()
|
|
|
|
|
{
|
|
|
|
|
fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
|
|
|
|
|
fprintf(stderr,"Flags:\n");
|
|
|
|
|
fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
|
|
|
|
|
#ifdef DEFAULT_CODE_SJIS
|
|
|
|
|
fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC)\n");
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEFAULT_CODE_JIS
|
|
|
|
|
fprintf(stderr,"j,s,e Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC)\n");
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEFAULT_CODE_EUC
|
|
|
|
|
fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT)\n");
|
|
|
|
|
#endif
|
|
|
|
|
fprintf(stderr,"J,S,E Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC)\n");
|
|
|
|
|
fprintf(stderr,"t no conversion\n");
|
|
|
|
|
fprintf(stderr,"i_ Output sequence to designate JIS-kanji (DEFAULT B)\n");
|
|
|
|
|
fprintf(stderr,"o_ Output sequence to designate ASCII (DEFAULT B)\n");
|
|
|
|
|
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
|
|
|
|
|
fprintf(stderr,"v Show this usage\n");
|
1999-08-13 01:37:52 -04:00
|
|
|
|
fprintf(stderr,"m[BQ0] MIME decode [B:base64,Q:quoted,0:no decode]\n");
|
1998-01-16 07:13:05 -05:00
|
|
|
|
fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
|
|
|
|
|
fprintf(stderr,"f Folding: -f60 or -f\n");
|
|
|
|
|
fprintf(stderr,"Z[0-2] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces\n");
|
|
|
|
|
fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
|
|
|
|
|
fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
|
|
|
|
|
#ifdef MSDOS
|
|
|
|
|
fprintf(stderr,"T Text mode output\n");
|
|
|
|
|
#endif
|
|
|
|
|
fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
|
|
|
|
|
fprintf(stderr,"d,c Delete \\r in line feed, Add \\r in line feed\n");
|
|
|
|
|
fprintf(stderr,"Network Kanji Filter Version %s (%s) "
|
|
|
|
|
#if defined(MSDOS) && !defined(_Windows)
|
|
|
|
|
"for DOS"
|
|
|
|
|
#endif
|
|
|
|
|
#if !defined(__WIN32__) && defined(_Windows)
|
|
|
|
|
"for Win16"
|
|
|
|
|
#endif
|
|
|
|
|
#if defined(__WIN32__) && defined(_Windows)
|
|
|
|
|
"for Win32"
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef __OS2__
|
|
|
|
|
"for OS/2"
|
|
|
|
|
#endif
|
|
|
|
|
,Version,Patchlevel);
|
|
|
|
|
fprintf(stderr,"\n%s\n",CopyRight);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
1999-08-13 01:37:52 -04:00
|
|
|
|
#endif
|
1998-01-16 07:13:05 -05:00
|
|
|
|
|
|
|
|
|
/**
|
1999-08-13 01:37:52 -04:00
|
|
|
|
** $B%Q%C%A@):n<T(B
|
1998-01-16 07:13:05 -05:00
|
|
|
|
** void@merope.pleiades.or.jp (Kusakabe Youichi)
|
|
|
|
|
** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
|
|
|
|
|
** ohta@src.ricoh.co.jp (Junn Ohta)
|
|
|
|
|
** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
|
|
|
|
|
** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
|
|
|
|
|
** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
|
|
|
|
|
** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
|
|
|
|
|
** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
|
|
|
|
|
** GHG00637@nifty-serve.or.jp (COW)
|
|
|
|
|
**
|
1999-08-13 01:37:52 -04:00
|
|
|
|
** $B:G=*99?7F|(B
|
|
|
|
|
** 1998.11.7
|
1998-01-16 07:13:05 -05:00
|
|
|
|
**/
|
|
|
|
|
|
|
|
|
|
/* end */
|