1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* ext/nkf/nkf.c (rb_nkf_enc_get): use rb_define_dummy_encoding.

* ext/nkf/nkf.c (Init_nkf): use rb_nkf_enc_get("ASCII").

* ext/nkf/nkf-utf8/nkf.c: Update 1.161.

* ext/nkf/nkf-utf9/config.h: default output encoding is now UTF-8.

* ext/nkf/lib/kconv.rb (Kconv.kconv): replace Encoding#name by
  Encoding#to_s.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14520 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2007-12-23 09:37:51 +00:00
parent 6c6f996d71
commit 03a1e25542
5 changed files with 104 additions and 82 deletions

View file

@ -1,3 +1,16 @@
Sun Dec 23 18:31:49 2007 NARUSE, Yui <naruse@ruby-lang.org>
* ext/nkf/nkf.c (rb_nkf_enc_get): use rb_define_dummy_encoding.
* ext/nkf/nkf.c (Init_nkf): use rb_nkf_enc_get("ASCII").
* ext/nkf/nkf-utf8/nkf.c: Update 1.161.
* ext/nkf/nkf-utf9/config.h: default output encoding is now UTF-8.
* ext/nkf/lib/kconv.rb (Kconv.kconv): replace Encoding#name by
Encoding#to_s.
Sun Dec 23 18:02:52 2007 Eric Hodel <drbrain@segment7.net>
* lib/rubygems/gem_open_uri.rb: Fix version check.

View file

@ -87,8 +87,8 @@ module Kconv
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
def kconv(str, to_enc, from_enc=nil)
opt = ''
opt += ' --ic=' + from_enc.name if from_enc
opt += ' --oc=' + to_enc.name if to_enc
opt += ' --ic=' + from_enc.to_s if from_enc
opt += ' --oc=' + to_enc.to_s if to_enc
::NKF::nkf(opt, str)
end
@ -210,13 +210,8 @@ class String
#
# Convert <code>self</code> to out_code.
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
#
# *Note*
# This method decode MIME encoded string and
# convert halfwidth katakana to fullwidth katakana.
# If you don't want to decode them, use NKF.nkf.
def kconv(to_enc, from_enc=nil)
form_enc = self.encoding.name if !from_enc && self.encoding != Encoding.list[0]
form_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0]
Kconv::kconv(self, to_enc, from_enc)
end

View file

@ -59,10 +59,10 @@ typedef int nkf_nfchar;
/******************************/
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
/* Select DEFAULT_CODE */
#define DEFAULT_CODE_JIS
/* #define DEFAULT_CODE_JIS */
/* #define DEFAULT_CODE_SJIS */
/* #define DEFAULT_CODE_EUC */
/* #define DEFAULT_CODE_UTF8 */
#define DEFAULT_CODE_UTF8
/******************************/
#else
#define DEFAULT_CODE_SJIS

View file

@ -32,7 +32,7 @@
***********************************************************************/
/* $Id$ */
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2007-12-22"
#define NKF_RELEASE_DATE "2007-12-23"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
@ -72,6 +72,11 @@
#define PUT_NEWLINE(func) func(0x0A)
#define OCONV_NEWLINE(func) func(0, 0x0A)
#endif
#ifdef HELP_OUTPUT_STDERR
#define HELP_OUTPUT stderr
#else
#define HELP_OUTPUT stdout
#endif
#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
#define MSDOS
@ -905,13 +910,13 @@ char* nkf_strcpy(const char *str)
return result;
}
static void nkf_str_upcase(const char *str, char *res, size_t length)
static void nkf_str_upcase(const char *src, char *dest, size_t length)
{
int i = 0;
for (; i < length && str[i]; i++) {
res[i] = nkf_toupper(str[i]);
for (; i < length && src[i]; i++) {
dest[i] = nkf_toupper(src[i]);
}
res[i] = 0;
dest[i] = 0;
}
static nkf_encoding *nkf_enc_from_index(int idx)
@ -1056,8 +1061,7 @@ int main(int argc, char **argv)
iconv_for_check = 0;
#endif
if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
perror(*--argv);
*argv++;
perror(*(argv-1));
is_argument_error = TRUE;
continue;
} else {
@ -1354,6 +1358,7 @@ void options(unsigned char *cp)
char codeset[32];
nkf_encoding *enc;
if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
if (option_mode==1)
return;
while(*cp && *cp++!='-');
@ -1389,7 +1394,7 @@ void options(unsigned char *cp)
cp = (unsigned char *)long_option[i].alias;
}else{
if (strcmp(long_option[i].name, "ic=") == 0){
nkf_str_upcase(p, codeset, 32);
nkf_str_upcase((char *)p, codeset, 32);
enc = nkf_enc_find(codeset);
switch (nkf_enc_to_index(enc)) {
case ISO_2022_JP:
@ -1529,8 +1534,10 @@ void options(unsigned char *cp)
}
if (strcmp(long_option[i].name, "oc=") == 0){
x0201_f = FALSE;
nkf_str_upcase(p, codeset, 32);
output_encoding = nkf_enc_find(codeset);
nkf_str_upcase((char *)p, codeset, 32);
enc = nkf_enc_find(codeset);
if (enc <= 0) continue;
output_encoding = enc;
switch (nkf_enc_to_index(output_encoding)) {
case ISO_2022_JP:
output_conv = j_oconv;
@ -1889,8 +1896,8 @@ void options(unsigned char *cp)
case 't': /* transparent mode */
if (*cp=='1') {
/* alias of -t */
cp++;
nop_f = TRUE;
*cp++;
} else if (*cp=='2') {
/*
* -t with put/get
@ -1898,8 +1905,8 @@ void options(unsigned char *cp)
* nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
*
*/
cp++;
nop_f = 2;
*cp++;
} else
nop_f = TRUE;
continue;
@ -6370,87 +6377,87 @@ nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
#endif
void usage(void)
{
fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
fprintf(stderr,"Flags:\n");
fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
fprintf(HELP_OUTPUT,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
fprintf(HELP_OUTPUT,"Flags:\n");
fprintf(HELP_OUTPUT,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
#ifdef DEFAULT_CODE_SJIS
fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_JIS
fprintf(stderr,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_EUC
fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_UTF8
fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
#endif
#ifdef UTF8_OUTPUT_ENABLE
fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
fprintf(HELP_OUTPUT," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
#endif
fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
fprintf(HELP_OUTPUT,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
#ifdef UTF8_INPUT_ENABLE
fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
fprintf(HELP_OUTPUT," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
#endif
fprintf(stderr,"t no conversion\n");
fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
fprintf(stderr,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n");
fprintf(stderr," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n");
fprintf(stderr," 4: JISX0208 Katakana to JISX0201 Katakana\n");
fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
fprintf(HELP_OUTPUT,"t no conversion\n");
fprintf(HELP_OUTPUT,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
fprintf(HELP_OUTPUT,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
fprintf(HELP_OUTPUT,"r {de/en}crypt ROT13/47\n");
fprintf(HELP_OUTPUT,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(HELP_OUTPUT,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
fprintf(HELP_OUTPUT,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(HELP_OUTPUT,"l ISO8859-1 (Latin-1) support\n");
fprintf(HELP_OUTPUT,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
fprintf(HELP_OUTPUT,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n");
fprintf(HELP_OUTPUT," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n");
fprintf(HELP_OUTPUT," 4: JISX0208 Katakana to JISX0201 Katakana\n");
fprintf(HELP_OUTPUT,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
fprintf(HELP_OUTPUT,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
#ifdef MSDOS
fprintf(stderr,"T Text mode output\n");
fprintf(HELP_OUTPUT,"T Text mode output\n");
#endif
fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n");
fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
fprintf(stderr,"v, V Show this usage. V: show configuration\n");
fprintf(stderr,"\n");
fprintf(stderr,"Long name options\n");
fprintf(stderr," --ic=<input codeset> --oc=<output codeset>\n");
fprintf(stderr," Specify the input or output codeset\n");
fprintf(stderr," --fj --unix --mac --windows\n");
fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
fprintf(stderr," Convert for the system or code\n");
fprintf(stderr," --hiragana --katakana --katakana-hiragana\n");
fprintf(stderr," To Hiragana/Katakana Conversion\n");
fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
fprintf(HELP_OUTPUT,"O Output to File (DEFAULT 'nkf.out')\n");
fprintf(HELP_OUTPUT,"I Convert non ISO-2022-JP charactor to GETA\n");
fprintf(HELP_OUTPUT,"d,c Convert line breaks -d: LF -c: CRLF\n");
fprintf(HELP_OUTPUT,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
fprintf(HELP_OUTPUT,"v, V Show this usage. V: show configuration\n");
fprintf(HELP_OUTPUT,"\n");
fprintf(HELP_OUTPUT,"Long name options\n");
fprintf(HELP_OUTPUT," --ic=<input codeset> --oc=<output codeset>\n");
fprintf(HELP_OUTPUT," Specify the input or output codeset\n");
fprintf(HELP_OUTPUT," --fj --unix --mac --windows\n");
fprintf(HELP_OUTPUT," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
fprintf(HELP_OUTPUT," Convert for the system or code\n");
fprintf(HELP_OUTPUT," --hiragana --katakana --katakana-hiragana\n");
fprintf(HELP_OUTPUT," To Hiragana/Katakana Conversion\n");
fprintf(HELP_OUTPUT," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
fprintf(HELP_OUTPUT," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif
#ifdef NUMCHAR_OPTION
fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
fprintf(HELP_OUTPUT," --numchar-input Convert Unicode Character Reference\n");
#endif
#ifdef UTF8_INPUT_ENABLE
fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
fprintf(stderr," Specify how nkf handles unassigned characters\n");
fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n");
fprintf(HELP_OUTPUT," Specify how nkf handles unassigned characters\n");
#endif
#ifdef OVERWRITE
fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
fprintf(stderr," Overwrite original listed files by filtered result\n");
fprintf(stderr," --overwrite preserves timestamp of original files\n");
fprintf(HELP_OUTPUT," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
fprintf(HELP_OUTPUT," Overwrite original listed files by filtered result\n");
fprintf(HELP_OUTPUT," --overwrite preserves timestamp of original files\n");
#endif
fprintf(stderr," -g --guess Guess the input code\n");
fprintf(stderr," --help --version Show this help/the version\n");
fprintf(stderr," For more information, see also man nkf\n");
fprintf(stderr,"\n");
fprintf(HELP_OUTPUT," -g --guess Guess the input code\n");
fprintf(HELP_OUTPUT," --help --version Show this help/the version\n");
fprintf(HELP_OUTPUT," For more information, see also man nkf\n");
fprintf(HELP_OUTPUT,"\n");
version();
}
void show_configuration(void)
{
fprintf(stderr, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
fprintf(stderr, " Compile-time options:\n");
fprintf(stderr, " Default output encoding: "
fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
fprintf(HELP_OUTPUT, " Compile-time options:\n");
fprintf(HELP_OUTPUT, " Default output encoding: "
#if defined(DEFAULT_CODE_JIS)
"ISO-2022-JP"
#elif defined(DEFAULT_CODE_SJIS)
@ -6461,7 +6468,7 @@ void show_configuration(void)
"UTF-8"
#endif
"\n");
fprintf(stderr, " Default output newline: "
fprintf(HELP_OUTPUT, " Default output newline: "
#if DEFAULT_NEWLINE == CR
"CR"
#elif DEFAULT_NEWLINE == CRLF
@ -6470,24 +6477,31 @@ void show_configuration(void)
"LF"
#endif
"\n");
fprintf(stderr, " Decode MIME encoded string: "
fprintf(HELP_OUTPUT, " Decode MIME encoded string: "
#if MIME_DECODE_DEFAULT
"ON"
#else
"OFF"
#endif
"\n");
fprintf(stderr, " Convert JIS X 0201 Katakana: "
fprintf(HELP_OUTPUT, " Convert JIS X 0201 Katakana: "
#if X0201_DEFAULT
"ON"
#else
"OFF"
#endif
"\n");
fprintf(HELP_OUTPUT, " --help, --version output: "
#if HELP_OUTPUT_HELP_OUTPUT
"HELP_OUTPUT"
#else
"STDOUT"
#endif
"\n");
}
void version(void)
{
fprintf(stderr,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
}
#endif /*PERL_XS*/

View file

@ -69,7 +69,7 @@ rb_encoding* rb_nkf_enc_get(const char *name)
nkf_native_encoding * nkf_base_enc = nkf_enc_to_base_encoding(nkf_enc);
idx = rb_enc_find_index(nkf_enc_name(nkf_base_enc));
if (idx < 0) {
idx = rb_enc_replicate(name, rb_ascii8bit_encoding());
idx = rb_define_dummy_encoding(name);
} else {
rb_encoding *rb_enc = rb_enc_from_index(idx);
idx = rb_enc_replicate(name, rb_enc);
@ -485,7 +485,7 @@ Init_nkf()
rb_define_const(mNKF, "NOCONV", Qnil);
rb_define_const(mNKF, "UNKNOWN", Qnil);
rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_ascii8bit_encoding()));
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("ASCII")));
rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));