mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	* ext/nkf/nkt-utf8: update nkf.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15266 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									f3fe101d55
								
							
						
					
					
						commit
						086e5b1a63
					
				
					 6 changed files with 4689 additions and 4590 deletions
				
			
		| 
						 | 
				
			
			@ -1,3 +1,7 @@
 | 
			
		|||
Sun Jan 27 16:27:22 2008  NARUSE, Yui  <naruse@ruby-lang.org>
 | 
			
		||||
 | 
			
		||||
	* ext/nkf/nkt-utf8: update nkf.
 | 
			
		||||
 | 
			
		||||
Sun Jan 27 16:25:27 2008  NARUSE, Yui  <naruse@ruby-lang.org>
 | 
			
		||||
 | 
			
		||||
	* re.c (rb_reg_source): set encoding as regexp encoding.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,91 +1,51 @@
 | 
			
		|||
#ifndef _CONFIG_H_
 | 
			
		||||
#define _CONFIG_H_
 | 
			
		||||
 | 
			
		||||
/* UTF8 $BF~=PNO(B */
 | 
			
		||||
/* UTF8 input and output */
 | 
			
		||||
#define UTF8_INPUT_ENABLE
 | 
			
		||||
#define UTF8_OUTPUT_ENABLE
 | 
			
		||||
 | 
			
		||||
/* Shift_JIS $BHO0O30$NJ8;z$r!"(BCP932 $B$GF1CM$JJ8;z$KFI$_49$($k(B */
 | 
			
		||||
/* invert characters invalid in Shift_JIS to CP932 */
 | 
			
		||||
#define SHIFTJIS_CP932
 | 
			
		||||
 | 
			
		||||
/* $B%*%W%7%g%s$GF~NO$r;XDj$7$?;~$K!"J8;z%3!<%I$r8GDj$9$k(B */
 | 
			
		||||
/* fix input encoding when given by option */
 | 
			
		||||
#define INPUT_CODE_FIX
 | 
			
		||||
 | 
			
		||||
/* --overwrite $B%*%W%7%g%s(B */
 | 
			
		||||
/* --overwrite option */
 | 
			
		||||
/* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
 | 
			
		||||
#define OVERWRITE
 | 
			
		||||
 | 
			
		||||
/* --cap-input, --url-input $B%*%W%7%g%s(B */
 | 
			
		||||
/* --cap-input, --url-input option */
 | 
			
		||||
#define INPUT_OPTION
 | 
			
		||||
 | 
			
		||||
/* --numchar-input $B%*%W%7%g%s(B */
 | 
			
		||||
/* --numchar-input option */
 | 
			
		||||
#define NUMCHAR_OPTION
 | 
			
		||||
 | 
			
		||||
/* --debug, --no-output $B%*%W%7%g%s(B */
 | 
			
		||||
/* --debug, --no-output option */
 | 
			
		||||
#define CHECK_OPTION
 | 
			
		||||
 | 
			
		||||
/* JIS X0212 */
 | 
			
		||||
#define X0212_ENABLE
 | 
			
		||||
 | 
			
		||||
/* --exec-in, --exec-out $B%*%W%7%g%s(B
 | 
			
		||||
 * pipe, fork, execvp $B$"$?$j$,L5$$$HF0$-$^$;$s!#(B
 | 
			
		||||
 * MS-DOS, MinGW $B$J$I$G$O(B undef $B$K$7$F$/$@$5$$(B
 | 
			
		||||
 * child process $B=*N;;~$N=hM}$,$$$$$+$2$s$J$N$G!"(B
 | 
			
		||||
 * $B%G%U%)%k%H$GL58z$K$7$F$$$^$9!#(B
 | 
			
		||||
/* --exec-in, --exec-out option
 | 
			
		||||
 * require pipe, fork, execvp and so on.
 | 
			
		||||
 * please undef this on MS-DOS, MinGW
 | 
			
		||||
 * this is still buggy arround child process
 | 
			
		||||
 */
 | 
			
		||||
/* #define EXEC_IO */
 | 
			
		||||
 | 
			
		||||
/* SunOS $B$N(B cc $B$r;H$&$H$-$O(B undef $B$K$7$F$/$@$5$$(B */
 | 
			
		||||
#define ANSI_C_PROTOTYPE
 | 
			
		||||
 | 
			
		||||
/* int $B$,(B 32bit $BL$K~$N4D6-$G(B NUMCHAR_OPTION $B$r;H$&$K$O!"(B
 | 
			
		||||
 * $B%3%a%s%H$r30$7$F$/$@$5$$!#(B
 | 
			
		||||
 */
 | 
			
		||||
/* #define INT_IS_SHORT */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(INT_IS_SHORT)
 | 
			
		||||
typedef long nkf_char;
 | 
			
		||||
typedef unsigned char nkf_nfchar;
 | 
			
		||||
#else
 | 
			
		||||
typedef int nkf_char;
 | 
			
		||||
typedef int nkf_nfchar;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Unicode Normalization */
 | 
			
		||||
#define UNICODE_NORMALIZATION
 | 
			
		||||
 | 
			
		||||
#ifndef WIN32DLL
 | 
			
		||||
/******************************/
 | 
			
		||||
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
 | 
			
		||||
/* Select DEFAULT_CODE */
 | 
			
		||||
/* #define DEFAULT_CODE_JIS */
 | 
			
		||||
/* #define DEFAULT_CODE_SJIS */
 | 
			
		||||
/* #define DEFAULT_CODE_EUC */
 | 
			
		||||
#define DEFAULT_CODE_UTF8
 | 
			
		||||
/******************************/
 | 
			
		||||
#else
 | 
			
		||||
#define DEFAULT_CODE_SJIS
 | 
			
		||||
#endif
 | 
			
		||||
/*
 | 
			
		||||
 * Select Default Output Encoding
 | 
			
		||||
 * 
 | 
			
		||||
 * If not defined, locale encoding is used.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#if defined(NUMCHAR_OPTION) && !defined(UTF8_INPUT_ENABLE)
 | 
			
		||||
#define UTF8_INPUT_ENABLE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef UNICODE_NORMALIZATION
 | 
			
		||||
#ifndef UTF8_INPUT_ENABLE
 | 
			
		||||
#define UTF8_INPUT_ENABLE
 | 
			
		||||
#endif
 | 
			
		||||
#define NORMALIZATION_TABLE_LENGTH 942
 | 
			
		||||
#define NORMALIZATION_TABLE_NFC_LENGTH 3
 | 
			
		||||
#define NORMALIZATION_TABLE_NFD_LENGTH 9
 | 
			
		||||
struct normalization_pair{
 | 
			
		||||
    const nkf_nfchar nfc[NORMALIZATION_TABLE_NFC_LENGTH];
 | 
			
		||||
    const nkf_nfchar nfd[NORMALIZATION_TABLE_NFD_LENGTH];
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define MIME_DECODE_DEFAULT 0
 | 
			
		||||
#define X0201_DEFAULT 0
 | 
			
		||||
/* #define DEFAULT_CODE_JIS   */
 | 
			
		||||
/* #define DEFAULT_CODE_SJIS  */
 | 
			
		||||
/* #define DEFAULT_CODE_EUC   */
 | 
			
		||||
/* #define DEFAULT_CODE_UTF8  */
 | 
			
		||||
 | 
			
		||||
#endif /* _CONFIG_H_ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										166
									
								
								ext/nkf/nkf-utf8/nkf.h
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										166
									
								
								ext/nkf/nkf-utf8/nkf.h
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,166 @@
 | 
			
		|||
/*
 | 
			
		||||
 * 
 | 
			
		||||
 * nkf.h - Header file for nkf
 | 
			
		||||
 * 
 | 
			
		||||
 * $Id: nkf.h,v 1.2 2008/01/23 09:10:25 naruse Exp $
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifndef NKF_H
 | 
			
		||||
 | 
			
		||||
/* Wrapper of configurations */
 | 
			
		||||
 | 
			
		||||
#ifndef MIME_DECODE_DEFAULT
 | 
			
		||||
#define MIME_DECODE_DEFAULT STRICT_MIME
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef X0201_DEFAULT
 | 
			
		||||
#define X0201_DEFAULT TRUE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if DEFAULT_NEWLINE == 0x0D0A
 | 
			
		||||
#define PUT_NEWLINE(func) do {\
 | 
			
		||||
    func(0x0D);\
 | 
			
		||||
    func(0x0A);\
 | 
			
		||||
} while (0)
 | 
			
		||||
#define OCONV_NEWLINE(func) do {\
 | 
			
		||||
    func(0, 0x0D);\
 | 
			
		||||
    func(0, 0x0A);\
 | 
			
		||||
} while (0)
 | 
			
		||||
#elif DEFAULT_NEWLINE == 0x0D
 | 
			
		||||
#define PUT_NEWLINE(func) func(0x0D)
 | 
			
		||||
#define OCONV_NEWLINE(func) func(0, 0x0D)
 | 
			
		||||
#else
 | 
			
		||||
#define DEFAULT_NEWLINE 0x0A
 | 
			
		||||
#define PUT_NEWLINE(func) func(0x0A)
 | 
			
		||||
#define OCONV_NEWLINE(func) func(0, 0x0A)
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef HELP_OUTPUT_STDERR
 | 
			
		||||
#define HELP_OUTPUT stderr
 | 
			
		||||
#else
 | 
			
		||||
#define HELP_OUTPUT stdout
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Compatibility definitions */
 | 
			
		||||
 | 
			
		||||
#ifdef nkf_char
 | 
			
		||||
#elif defined(INT_IS_SHORT)
 | 
			
		||||
typedef long nkf_char;
 | 
			
		||||
#define NKF_INT32_C(n)   (n##L)
 | 
			
		||||
#else
 | 
			
		||||
typedef int nkf_char;
 | 
			
		||||
#define NKF_INT32_C(n)   (n)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
 | 
			
		||||
#define MSDOS
 | 
			
		||||
#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
 | 
			
		||||
#define __WIN32__
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef PERL_XS
 | 
			
		||||
#undef OVERWRITE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef PERL_XS
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#if defined(MSDOS) || defined(__OS2__)
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <io.h>
 | 
			
		||||
#if defined(_MSC_VER) || defined(__WATCOMC__)
 | 
			
		||||
#define mktemp _mktemp
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef MSDOS
 | 
			
		||||
#ifdef LSI_C
 | 
			
		||||
#define setbinmode(fp) fsetbin(fp)
 | 
			
		||||
#elif defined(__DJGPP__)
 | 
			
		||||
#include <libc/dosio.h>
 | 
			
		||||
void  setbinmode(FILE *fp)
 | 
			
		||||
{
 | 
			
		||||
    /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
 | 
			
		||||
    int fd, m;
 | 
			
		||||
    fd = fileno(fp);
 | 
			
		||||
    m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
 | 
			
		||||
    __file_handle_set(fd, m);
 | 
			
		||||
}
 | 
			
		||||
#else /* Microsoft C, Turbo C */
 | 
			
		||||
#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
 | 
			
		||||
#endif
 | 
			
		||||
#else /* UNIX */
 | 
			
		||||
#define setbinmode(fp)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef _IOFBF /* SysV and MSDOS, Windows */
 | 
			
		||||
#define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
 | 
			
		||||
#else /* BSD */
 | 
			
		||||
#define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*Borland C++ 4.5 EasyWin*/
 | 
			
		||||
#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
 | 
			
		||||
#define         EASYWIN
 | 
			
		||||
#ifndef __WIN16__
 | 
			
		||||
#define __WIN16__
 | 
			
		||||
#endif
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OVERWRITE
 | 
			
		||||
/* added by satoru@isoternet.org */
 | 
			
		||||
#if defined(__EMX__)
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include <sys/stat.h>
 | 
			
		||||
#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#if defined(__WATCOMC__)
 | 
			
		||||
#include <sys/utime.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <utime.h>
 | 
			
		||||
#endif
 | 
			
		||||
#else /* defined(MSDOS) */
 | 
			
		||||
#ifdef __WIN32__
 | 
			
		||||
#ifdef __BORLANDC__ /* BCC32 */
 | 
			
		||||
#include <utime.h>
 | 
			
		||||
#else /* !defined(__BORLANDC__) */
 | 
			
		||||
#include <sys/utime.h>
 | 
			
		||||
#endif /* (__BORLANDC__) */
 | 
			
		||||
#else /* !defined(__WIN32__) */
 | 
			
		||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
 | 
			
		||||
#include <sys/utime.h>
 | 
			
		||||
#elif defined(__TURBOC__) /* BCC */
 | 
			
		||||
#include <utime.h>
 | 
			
		||||
#elif defined(LSI_C) /* LSI C */
 | 
			
		||||
#endif /* (__WIN32__) */
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef __WIN32__ /* not win32 is posix */
 | 
			
		||||
#define HAVE_LANGINFO_H
 | 
			
		||||
#define HAVE_LOCALE_H
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LANGINFO_H
 | 
			
		||||
#include <langinfo.h>
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef HAVE_LOCALE_H
 | 
			
		||||
#include <locale.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define         FALSE   0
 | 
			
		||||
#define         TRUE    1
 | 
			
		||||
 | 
			
		||||
#ifdef WIN32DLL
 | 
			
		||||
#include "nkf32.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -1,4 +1,11 @@
 | 
			
		|||
/*
 | 
			
		||||
 * utf8tbl.c - Convertion Table for nkf
 | 
			
		||||
 *
 | 
			
		||||
 * $Id$
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#include "utf8tbl.h"
 | 
			
		||||
 | 
			
		||||
#ifdef UTF8_OUTPUT_ENABLE
 | 
			
		||||
static const unsigned short euc_to_utf8_A1[] = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,15 @@
 | 
			
		|||
/*
 | 
			
		||||
 * utf8tbl.h - Header file for Convertion Table
 | 
			
		||||
 *
 | 
			
		||||
 * $Id$
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef _UTF8TBL_H_
 | 
			
		||||
#define _UTF8TBL_H_
 | 
			
		||||
 | 
			
		||||
#ifdef UTF8_OUTPUT_ENABLE
 | 
			
		||||
#define sizeof_euc_to_utf8_1byte 94
 | 
			
		||||
#define sizeof_euc_to_utf8_2bytes 94
 | 
			
		||||
extern const unsigned short euc_to_utf8_1byte[];
 | 
			
		||||
extern const unsigned short *const euc_to_utf8_2bytes[];
 | 
			
		||||
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
 | 
			
		||||
| 
						 | 
				
			
			@ -10,6 +18,10 @@ extern const unsigned short *const x0212_to_utf8_2bytes[];
 | 
			
		|||
#endif /* UTF8_OUTPUT_ENABLE */
 | 
			
		||||
 | 
			
		||||
#ifdef UTF8_INPUT_ENABLE
 | 
			
		||||
#define sizeof_utf8_to_euc_C2 64
 | 
			
		||||
#define sizeof_utf8_to_euc_E5B8 64
 | 
			
		||||
#define sizeof_utf8_to_euc_2bytes 112
 | 
			
		||||
#define sizeof_utf8_to_euc_3bytes 16
 | 
			
		||||
extern const unsigned short *const utf8_to_euc_2bytes[];
 | 
			
		||||
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
 | 
			
		||||
extern const unsigned short *const utf8_to_euc_2bytes_932[];
 | 
			
		||||
| 
						 | 
				
			
			@ -21,11 +33,23 @@ extern const unsigned short *const *const utf8_to_euc_3bytes_mac[];
 | 
			
		|||
#endif /* UTF8_INPUT_ENABLE */
 | 
			
		||||
 | 
			
		||||
#ifdef UNICODE_NORMALIZATION
 | 
			
		||||
 | 
			
		||||
#define NORMALIZATION_TABLE_LENGTH 942
 | 
			
		||||
#define NORMALIZATION_TABLE_NFC_LENGTH 3
 | 
			
		||||
#define NORMALIZATION_TABLE_NFD_LENGTH 9
 | 
			
		||||
struct normalization_pair {
 | 
			
		||||
    const unsigned char nfc[NORMALIZATION_TABLE_NFC_LENGTH];
 | 
			
		||||
    const unsigned char nfd[NORMALIZATION_TABLE_NFD_LENGTH];
 | 
			
		||||
};
 | 
			
		||||
extern const struct normalization_pair normalization_table[];
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef SHIFTJIS_CP932
 | 
			
		||||
#define CP932_TABLE_BEGIN 0xFA
 | 
			
		||||
#define CP932_TABLE_END   0xFC
 | 
			
		||||
extern const unsigned short shiftjis_cp932[3][189];
 | 
			
		||||
#define CP932INV_TABLE_BEGIN 0xED
 | 
			
		||||
#define CP932INV_TABLE_END   0xEE
 | 
			
		||||
extern const unsigned short cp932inv[2][189];
 | 
			
		||||
#endif /* SHIFTJIS_CP932 */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue