1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c}: Update nkf.

* ext/nkf/nkf.c: fix documents.

* ext/nkf/lib/kconv.rb: fix documents.
  (Kconv.is*): use valid_encoding?.
  (Kconv.isjis): defined.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14833 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2008-01-01 15:22:25 +00:00
parent 5f41f87d2b
commit 7d847f7c37
6 changed files with 801 additions and 867 deletions

View file

@ -1,3 +1,13 @@
Wed Jan 2 00:14:41 2008 NARUSE, Yui <naruse@ruby-lang.org>
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c}: Update nkf.
* ext/nkf/nkf.c: fix documents.
* ext/nkf/lib/kconv.rb: fix documents.
(Kconv.is*): use valid_encoding?.
(Kconv.isjis): defined.
Tue Jan 1 23:17:03 2008 Tanaka Akira <akr@fsij.org> Tue Jan 1 23:17:03 2008 Tanaka Akira <akr@fsij.org>
* common.mk: dependency updated. * common.mk: dependency updated.

View file

@ -44,38 +44,6 @@ module Kconv
# UNKNOWN # UNKNOWN
UNKNOWN = NKF::UNKNOWN UNKNOWN = NKF::UNKNOWN
#
#
# Private Constants
#
#Regexp of Encoding
# Regexp of Shift_JIS string (private constant)
RegexpShiftjis = /\A(?:
[\x00-\x7f\xa1-\xdf] |
[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
)*\z/nx
# Regexp of EUC-JP string (private constant)
RegexpEucjp = /\A(?:
[\x00-\x7f] |
\x8e [\xa1-\xdf] |
\x8f [\xa1-\xfe] [\xa1-\xfe] |
[\xa1-\xfe] [\xa1-\xfe]
)*\z/nx
# Regexp of UTF-8 string (private constant)
RegexpUtf8 = /\A(?:
[\x00-\x7f] |
[\xc2-\xdf] [\x80-\xbf] |
\xe0 [\xa0-\xbf] [\x80-\xbf] |
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
)*\z/nx
# #
# Public Methods # Public Methods
# #
@ -99,7 +67,7 @@ module Kconv
# #
# call-seq: # call-seq:
# Kconv.tojis(str) -> string # Kconv.tojis(str) => string
# #
# Convert <code>str</code> to ISO-2022-JP # Convert <code>str</code> to ISO-2022-JP
def tojis(str) def tojis(str)
@ -108,7 +76,7 @@ module Kconv
module_function :tojis module_function :tojis
# call-seq: # call-seq:
# Kconv.toeuc(str) -> string # Kconv.toeuc(str) => string
# #
# Convert <code>str</code> to EUC-JP # Convert <code>str</code> to EUC-JP
def toeuc(str) def toeuc(str)
@ -117,7 +85,7 @@ module Kconv
module_function :toeuc module_function :toeuc
# call-seq: # call-seq:
# Kconv.tosjis(str) -> string # Kconv.tosjis(str) => string
# #
# Convert <code>str</code> to Shift_JIS # Convert <code>str</code> to Shift_JIS
def tosjis(str) def tosjis(str)
@ -126,7 +94,7 @@ module Kconv
module_function :tosjis module_function :tosjis
# call-seq: # call-seq:
# Kconv.toutf8(str) -> string # Kconv.toutf8(str) => string
# #
# Convert <code>str</code> to UTF-8 # Convert <code>str</code> to UTF-8
def toutf8(str) def toutf8(str)
@ -135,7 +103,7 @@ module Kconv
module_function :toutf8 module_function :toutf8
# call-seq: # call-seq:
# Kconv.toutf16(str) -> string # Kconv.toutf16(str) => string
# #
# Convert <code>str</code> to UTF-16 # Convert <code>str</code> to UTF-16
def toutf16(str) def toutf16(str)
@ -144,7 +112,7 @@ module Kconv
module_function :toutf16 module_function :toutf16
# call-seq: # call-seq:
# Kconv.toutf32(str) -> string # Kconv.toutf32(str) => string
# #
# Convert <code>str</code> to UTF-32 # Convert <code>str</code> to UTF-32
def toutf32(str) def toutf32(str)
@ -152,12 +120,21 @@ module Kconv
end end
module_function :toutf32 module_function :toutf32
# call-seq:
# Kconv.tolocale => string
#
# Convert <code>self</code> to locale encoding
def tolocale
kconv(str, Encoding.locale_charmap)
end
module_function :tolocale
# #
# guess # guess
# #
# call-seq: # call-seq:
# Kconv.guess(str) -> integer # Kconv.guess(str) => encoding
# #
# Guess input encoding by NKF.guess # Guess input encoding by NKF.guess
def guess(str) def guess(str)
@ -170,38 +147,52 @@ module Kconv
# #
# call-seq: # call-seq:
# Kconv.iseuc(str) -> obj or nil # Kconv.iseuc(str) => true or false
# #
# Returns whether input encoding is EUC-JP or not. # Returns whether input encoding is EUC-JP or not.
# #
# *Note* don't expect this return value is MatchData. # *Note* don't expect this return value is MatchData.
def iseuc(str) def iseuc(str)
RegexpEucjp.match( str ) str.dup.force_encoding(EUC).valid_encoding?
end end
module_function :iseuc module_function :iseuc
# call-seq: # call-seq:
# Kconv.issjis(str) -> obj or nil # Kconv.issjis(str) => true or false
# #
# Returns whether input encoding is Shift_JIS or not. # Returns whether input encoding is Shift_JIS or not.
#
# *Note* don't expect this return value is MatchData.
def issjis(str) def issjis(str)
RegexpShiftjis.match( str ) str.dup.force_encoding(SJIS).valid_encoding?
end end
module_function :issjis module_function :issjis
# call-seq: # call-seq:
# Kconv.isutf8(str) -> obj or nil # Kconv.isjis(str) => true or false
#
# Returns whether input encoding is ISO-2022-JP or not.
def isjis(str)
/\A [\t\n\r\x20-\x7E]*
(?:
(?:\x1b \x28 I [\x21-\x7E]*
|\x1b \x28 J [\x21-\x7E]*
|\x1b \x24 @ (?:[\x21-\x7E]{2})*
|\x1b \x24 B (?:[\x21-\x7E]{2})*
|\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
)*
\x1b \x28 B [\t\n\r\x20-\x7E]*
)*
\z/nox =~ str.dup.force_encoding(nil) ? true : false
end
module_function :isjis
# call-seq:
# Kconv.isutf8(str) => true or false
# #
# Returns whether input encoding is UTF-8 or not. # Returns whether input encoding is UTF-8 or not.
#
# *Note* don't expect this return value is MatchData.
def isutf8(str) def isutf8(str)
RegexpUtf8.match( str ) str.dup.force_encoding(UTF8).valid_encoding?
end end
module_function :isutf8 module_function :isutf8
end end
class String class String
@ -220,66 +211,72 @@ class String
# #
# call-seq: # call-seq:
# String#tojis -> string # String#tojis => string
# #
# Convert <code>self</code> to ISO-2022-JP # Convert <code>self</code> to ISO-2022-JP
def tojis; Kconv.tojis(self) end def tojis; Kconv.tojis(self) end
# call-seq: # call-seq:
# String#toeuc -> string # String#toeuc => string
# #
# Convert <code>self</code> to EUC-JP # Convert <code>self</code> to EUC-JP
def toeuc; Kconv.toeuc(self) end def toeuc; Kconv.toeuc(self) end
# call-seq: # call-seq:
# String#tosjis -> string # String#tosjis => string
# #
# Convert <code>self</code> to Shift_JIS # Convert <code>self</code> to Shift_JIS
def tosjis; Kconv.tosjis(self) end def tosjis; Kconv.tosjis(self) end
# call-seq: # call-seq:
# String#toutf8 -> string # String#toutf8 => string
# #
# Convert <code>self</code> to UTF-8 # Convert <code>self</code> to UTF-8
def toutf8; Kconv.toutf8(self) end def toutf8; Kconv.toutf8(self) end
# call-seq: # call-seq:
# String#toutf16 -> string # String#toutf16 => string
# #
# Convert <code>self</code> to UTF-16 # Convert <code>self</code> to UTF-16
def toutf16; Kconv.toutf16(self) end def toutf16; Kconv.toutf16(self) end
# call-seq: # call-seq:
# String#toutf32 -> string # String#toutf32 => string
# #
# Convert <code>self</code> to UTF-32 # Convert <code>self</code> to UTF-32
def toutf32; Kconv.toutf32(self) end def toutf32; Kconv.toutf32(self) end
# call-seq:
# String#tolocale => string
#
# Convert <code>self</code> to locale encoding
def tolocale; Kconv.tolocale(self) end
# #
# is Encoding # is Encoding
# #
# call-seq: # call-seq:
# String#iseuc -> obj or nil # String#iseuc => true or false
# #
# Returns whether <code>self</code>'s encoding is EUC-JP or not. # Returns whether <code>self</code>'s encoding is EUC-JP or not.
#
# *Note* don't expect this return value is MatchData.
def iseuc; Kconv.iseuc(self) end def iseuc; Kconv.iseuc(self) end
# call-seq: # call-seq:
# String#issjis -> obj or nil # String#issjis => true or false
# #
# Returns whether <code>self</code>'s encoding is Shift_JIS or not. # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
#
# *Note* don't expect this return value is MatchData.
def issjis; Kconv.issjis(self) end def issjis; Kconv.issjis(self) end
# call-seq: # call-seq:
# String#isutf8 -> obj or nil # String#isjis => true or false
#
# Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
def isjis; Kconv.isjis(self) end
# call-seq:
# String#isutf8 => true or false
# #
# Returns whether <code>self</code>'s encoding is UTF-8 or not. # Returns whether <code>self</code>'s encoding is UTF-8 or not.
#
# *Note* don't expect this return value is MatchData.
def isutf8; Kconv.isutf8(self) end def isutf8; Kconv.isutf8(self) end
end end

View file

@ -32,7 +32,7 @@
***********************************************************************/ ***********************************************************************/
/* $Id$ */ /* $Id$ */
#define NKF_VERSION "2.0.8" #define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2007-12-23" #define NKF_RELEASE_DATE "2007-01-02"
#define COPY_RIGHT \ #define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon" "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
@ -205,11 +205,12 @@ void djgpp_setbinmode(FILE *fp)
/* byte order */ /* byte order */
enum byte_order {
#define ENDIAN_BIG 1234 ENDIAN_BIG = 1,
#define ENDIAN_LITTLE 4321 ENDIAN_LITTLE = 2,
#define ENDIAN_2143 2143 ENDIAN_2143 = 3,
#define ENDIAN_3412 3412 ENDIAN_3412 = 4
};
/* ASCII CODE */ /* ASCII CODE */
@ -266,10 +267,10 @@ enum nkf_encodings {
UTF_32LE, UTF_32LE,
UTF_32LE_BOM, UTF_32LE_BOM,
JIS_X_0201=0x1000, JIS_X_0201=0x1000,
JIS_X_0208, JIS_X_0208=0x1001,
JIS_X_0212, JIS_X_0212=0x1002,
JIS_X_0213_1, JIS_X_0213_1=0x1003,
JIS_X_0213_2, JIS_X_0213_2=0x1004,
BINARY BINARY
}; };
@ -286,9 +287,9 @@ void w_oconv16(nkf_char c2, nkf_char c1);
void w_oconv32(nkf_char c2, nkf_char c1); void w_oconv32(nkf_char c2, nkf_char c1);
typedef struct { typedef struct {
char *name; const char *name;
nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
void (*oconv_func)(nkf_char c2, nkf_char c1); void (*oconv)(nkf_char c2, nkf_char c1);
} nkf_native_encoding; } nkf_native_encoding;
nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv }; nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv };
@ -300,21 +301,21 @@ nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
typedef struct { typedef struct {
int id; const int id;
char *name; const char *name;
nkf_native_encoding *based_encoding; const nkf_native_encoding *base_encoding;
} nkf_encoding; } nkf_encoding;
nkf_encoding nkf_encoding_table[] = { nkf_encoding nkf_encoding_table[] = {
{ASCII, "ASCII", &NkfEncodingASCII}, {ASCII, "ASCII", &NkfEncodingASCII},
{ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
{ISO_2022_JP, "ISO-2022-JP", &NkfEncodingASCII}, {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
{CP50220, "CP50220", &NkfEncodingISO_2022_JP}, {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
{CP50221, "CP50221", &NkfEncodingISO_2022_JP}, {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
{CP50222, "CP50222", &NkfEncodingISO_2022_JP}, {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP}, {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP}, {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
{SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS}, {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
{WINDOWS_31J, "WINDOWS-31J", &NkfEncodingShift_JIS}, {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
{CP10001, "CP10001", &NkfEncodingShift_JIS}, {CP10001, "CP10001", &NkfEncodingShift_JIS},
{EUC_JP, "EUC-JP", &NkfEncodingEUC_JP}, {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
{CP51932, "CP51932", &NkfEncodingEUC_JP}, {CP51932, "CP51932", &NkfEncodingEUC_JP},
@ -476,7 +477,8 @@ struct input_code{
}; };
static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
static nkf_encoding *output_encoding; static nkf_encoding *input_encoding = NULL;
static nkf_encoding *output_encoding = NULL;
#if !defined(PERL_XS) && !defined(WIN32DLL) #if !defined(PERL_XS) && !defined(WIN32DLL)
static nkf_char noconvert(FILE *f); static nkf_char noconvert(FILE *f);
@ -601,7 +603,6 @@ static int nop_f = FALSE;
static int binmode_f = TRUE; /* binary mode */ static int binmode_f = TRUE; /* binary mode */
static int rot_f = FALSE; /* rot14/43 mode */ static int rot_f = FALSE; /* rot14/43 mode */
static int hira_f = FALSE; /* hira/kata henkan */ static int hira_f = FALSE; /* hira/kata henkan */
static int input_f = FALSE; /* non fixed input code */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */ static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
static int mime_decode_f = FALSE; /* mime decode is explicitly on */ static int mime_decode_f = FALSE; /* mime decode is explicitly on */
@ -753,11 +754,8 @@ static int fold_margin = FOLD_MARGIN;
#endif #endif
/* process default */ /* process default */
static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
/* s_iconv or oconv */
static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2; static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection; static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection; static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
@ -948,7 +946,20 @@ static nkf_encoding *nkf_enc_find(const char *name)
#define nkf_enc_name(enc) (enc)->name #define nkf_enc_name(enc) (enc)->name
#define nkf_enc_to_index(enc) (enc)->id #define nkf_enc_to_index(enc) (enc)->id
#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
#define nkf_enc_asciicompat(enc) (\
nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
#define nkf_enc_unicode_p(enc) (\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
#define nkf_enc_cp5022x_p(enc) (\
nkf_enc_to_index(enc) == CP50220 ||\
nkf_enc_to_index(enc) == CP50221 ||\
nkf_enc_to_index(enc) == CP50222)
#ifdef WIN32DLL #ifdef WIN32DLL
#include "nkf32dll.c" #include "nkf32dll.c"
@ -1294,7 +1305,7 @@ static const struct {
{"katakana","h2"}, {"katakana","h2"},
{"katakana-hiragana","h3"}, {"katakana-hiragana","h3"},
{"guess=", ""}, {"guess=", ""},
{"guess", "g1"}, {"guess", "g2"},
{"cp932", ""}, {"cp932", ""},
{"no-cp932", ""}, {"no-cp932", ""},
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
@ -1358,7 +1369,6 @@ void options(unsigned char *cp)
char codeset[32]; char codeset[32];
nkf_encoding *enc; nkf_encoding *enc;
if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
if (option_mode==1) if (option_mode==1)
return; return;
while(*cp && *cp++!='-'); while(*cp && *cp++!='-');
@ -1396,14 +1406,12 @@ void options(unsigned char *cp)
if (strcmp(long_option[i].name, "ic=") == 0){ if (strcmp(long_option[i].name, "ic=") == 0){
nkf_str_upcase((char *)p, codeset, 32); nkf_str_upcase((char *)p, codeset, 32);
enc = nkf_enc_find(codeset); enc = nkf_enc_find(codeset);
switch (nkf_enc_to_index(enc)) { if (!enc) continue;
case ISO_2022_JP: input_encoding = enc;
input_f = JIS_INPUT; switch (nkf_enc_to_index(input_encoding)) {
break;
case CP50220: case CP50220:
case CP50221: case CP50221:
case CP50222: case CP50222:
input_f = JIS_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = TRUE; cp51932_f = TRUE;
#endif #endif
@ -1412,23 +1420,17 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case ISO_2022_JP_1: case ISO_2022_JP_1:
input_f = JIS_INPUT;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
break; break;
case ISO_2022_JP_3: case ISO_2022_JP_3:
input_f = JIS_INPUT;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
x0213_f = TRUE; x0213_f = TRUE;
break; break;
case SHIFT_JIS:
input_f = SJIS_INPUT;
break;
case WINDOWS_31J: case WINDOWS_31J:
input_f = SJIS_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = TRUE; cp51932_f = TRUE;
#endif #endif
@ -1437,7 +1439,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case CP10001: case CP10001:
input_f = SJIS_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = TRUE; cp51932_f = TRUE;
#endif #endif
@ -1445,11 +1446,7 @@ void options(unsigned char *cp)
ms_ucs_map_f = UCS_MAP_CP10001; ms_ucs_map_f = UCS_MAP_CP10001;
#endif #endif
break; break;
case EUC_JP:
input_f = EUC_INPUT;
break;
case CP51932: case CP51932:
input_f = EUC_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = TRUE; cp51932_f = TRUE;
#endif #endif
@ -1458,7 +1455,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case EUCJP_MS: case EUCJP_MS:
input_f = EUC_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = FALSE; cp51932_f = FALSE;
#endif #endif
@ -1467,7 +1463,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case EUCJP_ASCII: case EUCJP_ASCII:
input_f = EUC_INPUT;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = FALSE; cp51932_f = FALSE;
#endif #endif
@ -1477,7 +1472,6 @@ void options(unsigned char *cp)
break; break;
case SHIFT_JISX0213: case SHIFT_JISX0213:
case SHIFT_JIS_2004: case SHIFT_JIS_2004:
input_f = SJIS_INPUT;
x0213_f = TRUE; x0213_f = TRUE;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = FALSE; cp51932_f = FALSE;
@ -1485,50 +1479,36 @@ void options(unsigned char *cp)
break; break;
case EUC_JISX0213: case EUC_JISX0213:
case EUC_JIS_2004: case EUC_JIS_2004:
input_f = EUC_INPUT;
x0213_f = TRUE; x0213_f = TRUE;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp51932_f = FALSE; cp51932_f = FALSE;
#endif #endif
break; break;
#ifdef UTF8_INPUT_ENABLE #ifdef UTF8_INPUT_ENABLE
case UTF_8:
case UTF_8N:
case UTF_8_BOM:
input_f = UTF8_INPUT;
break;
#ifdef UNICODE_NORMALIZATION #ifdef UNICODE_NORMALIZATION
case UTF8_MAC: case UTF8_MAC:
input_f = UTF8_INPUT;
nfc_f = TRUE; nfc_f = TRUE;
break; break;
#endif #endif
case UTF_16: case UTF_16:
case UTF_16BE: case UTF_16BE:
case UTF_16BE_BOM: case UTF_16BE_BOM:
input_f = UTF16_INPUT;
input_endian = ENDIAN_BIG; input_endian = ENDIAN_BIG;
break; break;
case UTF_16LE: case UTF_16LE:
case UTF_16LE_BOM: case UTF_16LE_BOM:
input_f = UTF16_INPUT;
input_endian = ENDIAN_LITTLE; input_endian = ENDIAN_LITTLE;
break; break;
case UTF_32: case UTF_32:
case UTF_32BE: case UTF_32BE:
case UTF_32BE_BOM: case UTF_32BE_BOM:
input_f = UTF32_INPUT;
input_endian = ENDIAN_BIG; input_endian = ENDIAN_BIG;
break; break;
case UTF_32LE: case UTF_32LE:
case UTF_32LE_BOM: case UTF_32LE_BOM:
input_f = UTF32_INPUT;
input_endian = ENDIAN_LITTLE; input_endian = ENDIAN_LITTLE;
break; break;
#endif #endif
default:
fprintf(stderr, "unknown input encoding: %s\n", codeset);
break;
} }
continue; continue;
} }
@ -1539,21 +1519,16 @@ void options(unsigned char *cp)
if (enc <= 0) continue; if (enc <= 0) continue;
output_encoding = enc; output_encoding = enc;
switch (nkf_enc_to_index(output_encoding)) { switch (nkf_enc_to_index(output_encoding)) {
case ISO_2022_JP:
output_conv = j_oconv;
break;
case CP50220: case CP50220:
output_conv = j_oconv; x0201_f = TRUE;
x0201_f = TRUE;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp932inv_f = FALSE; cp932inv_f = FALSE;
#endif #endif
#ifdef UTF8_OUTPUT_ENABLE #ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932; ms_ucs_map_f = UCS_MAP_CP932;
#endif #endif
break; break;
case CP50221: case CP50221:
output_conv = j_oconv;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp932inv_f = FALSE; cp932inv_f = FALSE;
#endif #endif
@ -1562,7 +1537,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case ISO_2022_JP_1: case ISO_2022_JP_1:
output_conv = j_oconv;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
@ -1571,7 +1545,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case ISO_2022_JP_3: case ISO_2022_JP_3:
output_conv = j_oconv;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
@ -1580,26 +1553,17 @@ void options(unsigned char *cp)
cp932inv_f = FALSE; cp932inv_f = FALSE;
#endif #endif
break; break;
case SHIFT_JIS:
output_conv = s_oconv;
break;
case WINDOWS_31J: case WINDOWS_31J:
output_conv = s_oconv;
#ifdef UTF8_OUTPUT_ENABLE #ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932; ms_ucs_map_f = UCS_MAP_CP932;
#endif #endif
break; break;
case CP10001: case CP10001:
output_conv = s_oconv;
#ifdef UTF8_OUTPUT_ENABLE #ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP10001; ms_ucs_map_f = UCS_MAP_CP10001;
#endif #endif
break; break;
case EUC_JP:
output_conv = e_oconv;
break;
case CP51932: case CP51932:
output_conv = e_oconv;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp932inv_f = FALSE; cp932inv_f = FALSE;
#endif #endif
@ -1608,7 +1572,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case EUCJP_MS: case EUCJP_MS:
output_conv = e_oconv;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
@ -1617,7 +1580,6 @@ void options(unsigned char *cp)
#endif #endif
break; break;
case EUCJP_ASCII: case EUCJP_ASCII:
output_conv = e_oconv;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
@ -1627,15 +1589,13 @@ void options(unsigned char *cp)
break; break;
case SHIFT_JISX0213: case SHIFT_JISX0213:
case SHIFT_JIS_2004: case SHIFT_JIS_2004:
output_conv = s_oconv; x0213_f = TRUE;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932 #ifdef SHIFTJIS_CP932
cp932inv_f = FALSE; cp932inv_f = FALSE;
#endif #endif
break; break;
case EUC_JISX0213: case EUC_JISX0213:
case EUC_JIS_2004: case EUC_JIS_2004:
output_conv = e_oconv;
#ifdef X0212_ENABLE #ifdef X0212_ENABLE
x0212_f = TRUE; x0212_f = TRUE;
#endif #endif
@ -1645,60 +1605,41 @@ void options(unsigned char *cp)
#endif #endif
break; break;
#ifdef UTF8_OUTPUT_ENABLE #ifdef UTF8_OUTPUT_ENABLE
case UTF_8:
case UTF_8N:
output_conv = w_oconv;
break;
case UTF_8_BOM: case UTF_8_BOM:
output_conv = w_oconv;
output_bom_f = TRUE; output_bom_f = TRUE;
break; break;
case UTF_16BE:
output_conv = w_oconv16;
break;
case UTF_16: case UTF_16:
case UTF_16BE_BOM: case UTF_16BE_BOM:
output_conv = w_oconv16;
output_bom_f = TRUE; output_bom_f = TRUE;
break; break;
case UTF_16LE: case UTF_16LE:
output_conv = w_oconv16;
output_endian = ENDIAN_LITTLE; output_endian = ENDIAN_LITTLE;
output_bom_f = FALSE;
break; break;
case UTF_16LE_BOM: case UTF_16LE_BOM:
output_conv = w_oconv16;
output_endian = ENDIAN_LITTLE; output_endian = ENDIAN_LITTLE;
output_bom_f = TRUE; output_bom_f = TRUE;
break; break;
case UTF_32:
case UTF_32BE:
output_conv = w_oconv32;
break;
case UTF_32BE_BOM: case UTF_32BE_BOM:
output_conv = w_oconv32;
output_bom_f = TRUE; output_bom_f = TRUE;
break; break;
case UTF_32LE: case UTF_32LE:
output_conv = w_oconv32;
output_endian = ENDIAN_LITTLE; output_endian = ENDIAN_LITTLE;
output_bom_f = FALSE;
break; break;
case UTF_32LE_BOM: case UTF_32LE_BOM:
output_conv = w_oconv32;
output_endian = ENDIAN_LITTLE; output_endian = ENDIAN_LITTLE;
output_bom_f = TRUE; output_bom_f = TRUE;
break; break;
#endif #endif
default:
fprintf(stderr, "unknown output encoding: %s\n", codeset);
break;
} }
continue; continue;
} }
if (strcmp(long_option[i].name, "guess=") == 0){ if (strcmp(long_option[i].name, "guess=") == 0){
if (p[0] == '1') { if (p[0] == '0' || p[0] == '1') {
guess_f = 2;
} else {
guess_f = 1; guess_f = 1;
} else {
guess_f = 2;
} }
continue; continue;
} }
@ -1872,7 +1813,6 @@ void options(unsigned char *cp)
#endif #endif
#ifdef UNICODE_NORMALIZATION #ifdef UNICODE_NORMALIZATION
if (strcmp(long_option[i].name, "utf8mac-input") == 0){ if (strcmp(long_option[i].name, "utf8mac-input") == 0){
input_f = UTF8_INPUT;
nfc_f = TRUE; nfc_f = TRUE;
continue; continue;
} }
@ -1912,21 +1852,18 @@ void options(unsigned char *cp)
continue; continue;
case 'j': /* JIS output */ case 'j': /* JIS output */
case 'n': case 'n':
output_conv = j_oconv;
output_encoding = nkf_enc_from_index(ISO_2022_JP); output_encoding = nkf_enc_from_index(ISO_2022_JP);
continue; continue;
case 'e': /* AT&T EUC output */ case 'e': /* AT&T EUC output */
output_conv = e_oconv;
cp932inv_f = FALSE; cp932inv_f = FALSE;
output_encoding = nkf_enc_from_index(EUC_JP); output_encoding = nkf_enc_from_index(EUC_JP);
continue; continue;
case 's': /* SJIS output */ case 's': /* SJIS output */
output_conv = s_oconv; output_encoding = nkf_enc_from_index(WINDOWS_31J);
output_encoding = nkf_enc_from_index(SHIFT_JIS);
continue; continue;
case 'l': /* ISO8859 Latin-1 support, no conversion */ case 'l': /* ISO8859 Latin-1 support, no conversion */
iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
input_f = LATIN1_INPUT; input_encoding = nkf_enc_from_index(ISO_8859_1);
continue; continue;
case 'i': /* Kanji IN ESC-$-@/B */ case 'i': /* Kanji IN ESC-$-@/B */
if (*cp=='@'||*cp=='B') if (*cp=='@'||*cp=='B')
@ -1967,7 +1904,7 @@ void options(unsigned char *cp)
#ifdef UTF8_OUTPUT_ENABLE #ifdef UTF8_OUTPUT_ENABLE
case 'w': /* UTF-8 output */ case 'w': /* UTF-8 output */
if (cp[0] == '8') { if (cp[0] == '8') {
output_conv = w_oconv; cp++; cp++;
if (cp[0] == '0'){ if (cp[0] == '0'){
cp++; cp++;
output_encoding = nkf_enc_from_index(UTF_8N); output_encoding = nkf_enc_from_index(UTF_8N);
@ -1978,13 +1915,12 @@ void options(unsigned char *cp)
} else { } else {
int enc_idx; int enc_idx;
if ('1'== cp[0] && '6'==cp[1]) { if ('1'== cp[0] && '6'==cp[1]) {
output_conv = w_oconv16; cp+=2; cp += 2;
enc_idx = UTF_16; enc_idx = UTF_16;
} else if ('3'== cp[0] && '2'==cp[1]) { } else if ('3'== cp[0] && '2'==cp[1]) {
output_conv = w_oconv32; cp+=2; cp += 2;
enc_idx = UTF_32; enc_idx = UTF_32;
} else { } else {
output_conv = w_oconv;
output_encoding = nkf_enc_from_index(UTF_8); output_encoding = nkf_enc_from_index(UTF_8);
continue; continue;
} }
@ -2016,18 +1952,19 @@ void options(unsigned char *cp)
case 'W': /* UTF input */ case 'W': /* UTF input */
if (cp[0] == '8') { if (cp[0] == '8') {
cp++; cp++;
input_f = UTF8_INPUT; input_encoding = nkf_enc_from_index(UTF_8);
}else{ }else{
int enc_idx;
if ('1'== cp[0] && '6'==cp[1]) { if ('1'== cp[0] && '6'==cp[1]) {
cp += 2; cp += 2;
input_f = UTF16_INPUT;
input_endian = ENDIAN_BIG; input_endian = ENDIAN_BIG;
enc_idx = UTF_16;
} else if ('3'== cp[0] && '2'==cp[1]) { } else if ('3'== cp[0] && '2'==cp[1]) {
cp += 2; cp += 2;
input_f = UTF32_INPUT;
input_endian = ENDIAN_BIG; input_endian = ENDIAN_BIG;
enc_idx = UTF_32;
} else { } else {
input_f = UTF8_INPUT; input_encoding = nkf_enc_from_index(UTF_8);
continue; continue;
} }
if (cp[0]=='L') { if (cp[0]=='L') {
@ -2035,20 +1972,25 @@ void options(unsigned char *cp)
input_endian = ENDIAN_LITTLE; input_endian = ENDIAN_LITTLE;
} else if (cp[0] == 'B') { } else if (cp[0] == 'B') {
cp++; cp++;
input_endian = ENDIAN_BIG;
} }
enc_idx = enc_idx == UTF_16
? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
: (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
input_encoding = nkf_enc_from_index(enc_idx);
} }
continue; continue;
#endif #endif
/* Input code assumption */ /* Input code assumption */
case 'J': /* JIS input */ case 'J': /* ISO-2022-JP input */
input_f = JIS_INPUT; input_encoding = nkf_enc_from_index(ISO_2022_JP);
continue; continue;
case 'E': /* AT&T EUC input */ case 'E': /* EUC-JP input */
input_f = EUC_INPUT; input_encoding = nkf_enc_from_index(EUC_JP);
continue; continue;
case 'S': /* MS Kanji input */ case 'S': /* Windows-31J input */
input_f = SJIS_INPUT; input_encoding = nkf_enc_from_index(WINDOWS_31J);
continue; continue;
case 'Z': /* Convert X0208 alphabet to asii */ case 'Z': /* Convert X0208 alphabet to asii */
/* alpha_f /* alpha_f
bit:0 Convert JIS X 0208 Alphabet to ASCII bit:0 Convert JIS X 0208 Alphabet to ASCII
@ -2160,10 +2102,10 @@ void options(unsigned char *cp)
continue; continue;
#ifndef PERL_XS #ifndef PERL_XS
case 'g': case 'g':
if (*cp == '1') { if ('2' <= *cp && *cp <= '9') {
guess_f = 2; guess_f = 2;
cp++; cp++;
} else if (*cp == '0') { } else if (*cp == '0' || *cp == '1') {
guess_f = 1; guess_f = 1;
cp++; cp++;
} else { } else {
@ -2200,7 +2142,7 @@ struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf
void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
{ {
#ifdef INPUT_CODE_FIX #ifdef INPUT_CODE_FIX
if (f || !input_f) if (f || !input_encoding)
#endif #endif
if (estab_f != f){ if (estab_f != f){
estab_f = f; estab_f = f;
@ -2208,7 +2150,7 @@ void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_ch
if (iconv_func if (iconv_func
#ifdef INPUT_CODE_FIX #ifdef INPUT_CODE_FIX
&& (f == -TRUE || !input_f) /* -TRUE means "FORCE" */ && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
#endif #endif
){ ){
iconv = iconv_func; iconv = iconv_func;
@ -2588,7 +2530,8 @@ nkf_char noconvert(FILE *f)
void module_connection(void) void module_connection(void)
{ {
oconv = output_conv; if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
oconv = nkf_enc_to_oconv(output_encoding);
o_putc = std_putc; o_putc = std_putc;
/* replace continucation module, from output side */ /* replace continucation module, from output side */
@ -2648,7 +2591,7 @@ void module_connection(void)
} }
#endif #endif
#ifdef UNICODE_NORMALIZATION #ifdef UNICODE_NORMALIZATION
if (nfc_f && input_f == UTF8_INPUT){ if (nfc_f){
i_nfc_getc = i_getc; i_getc = nfc_getc; i_nfc_getc = i_getc; i_getc = nfc_getc;
i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc; i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
} }
@ -2661,18 +2604,8 @@ void module_connection(void)
i_bgetc = i_getc; i_getc = broken_getc; i_bgetc = i_getc; i_getc = broken_getc;
i_bungetc = i_ungetc; i_ungetc = broken_ungetc; i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
} }
if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) { if (input_encoding) {
set_iconv(-TRUE, e_iconv); set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
} else if (input_f == SJIS_INPUT) {
set_iconv(-TRUE, s_iconv);
#ifdef UTF8_INPUT_ENABLE
} else if (input_f == UTF8_INPUT) {
set_iconv(-TRUE, w_iconv);
} else if (input_f == UTF16_INPUT) {
set_iconv(-TRUE, w_iconv16);
} else if (input_f == UTF32_INPUT) {
set_iconv(-TRUE, w_iconv32);
#endif
} else { } else {
set_iconv(FALSE, e_iconv); set_iconv(FALSE, e_iconv);
} }
@ -2696,7 +2629,7 @@ void check_bom(FILE *f)
if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){
if((c2 = (*i_getc)(f)) == 0xFE){ if((c2 = (*i_getc)(f)) == 0xFE){
if((c2 = (*i_getc)(f)) == 0xFF){ if((c2 = (*i_getc)(f)) == 0xFF){
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv32); set_iconv(TRUE, w_iconv32);
} }
if (iconv == w_iconv32) { if (iconv == w_iconv32) {
@ -2708,7 +2641,7 @@ void check_bom(FILE *f)
(*i_ungetc)(0xFE,f); (*i_ungetc)(0xFE,f);
}else if(c2 == 0xFF){ }else if(c2 == 0xFF){
if((c2 = (*i_getc)(f)) == 0xFE){ if((c2 = (*i_getc)(f)) == 0xFE){
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv32); set_iconv(TRUE, w_iconv32);
} }
if (iconv == w_iconv32) { if (iconv == w_iconv32) {
@ -2726,7 +2659,7 @@ void check_bom(FILE *f)
case 0xEF: case 0xEF:
if((c2 = (*i_getc)(f)) == 0xBB){ if((c2 = (*i_getc)(f)) == 0xBB){
if((c2 = (*i_getc)(f)) == 0xBF){ if((c2 = (*i_getc)(f)) == 0xBF){
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv); set_iconv(TRUE, w_iconv);
} }
if (iconv == w_iconv) { if (iconv == w_iconv) {
@ -2742,7 +2675,7 @@ void check_bom(FILE *f)
if((c2 = (*i_getc)(f)) == 0xFF){ if((c2 = (*i_getc)(f)) == 0xFF){
if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){
if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv32); set_iconv(TRUE, w_iconv32);
} }
if (iconv == w_iconv32) { if (iconv == w_iconv32) {
@ -2753,7 +2686,7 @@ void check_bom(FILE *f)
}else (*i_ungetc)(c2,f); }else (*i_ungetc)(c2,f);
(*i_ungetc)(0x00,f); (*i_ungetc)(0x00,f);
}else (*i_ungetc)(c2,f); }else (*i_ungetc)(c2,f);
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv16); set_iconv(TRUE, w_iconv16);
} }
if (iconv == w_iconv16) { if (iconv == w_iconv16) {
@ -2768,7 +2701,7 @@ void check_bom(FILE *f)
if((c2 = (*i_getc)(f)) == 0xFE){ if((c2 = (*i_getc)(f)) == 0xFE){
if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){
if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv32); set_iconv(TRUE, w_iconv32);
} }
if (iconv == w_iconv32) { if (iconv == w_iconv32) {
@ -2779,7 +2712,7 @@ void check_bom(FILE *f)
}else (*i_ungetc)(c2,f); }else (*i_ungetc)(c2,f);
(*i_ungetc)(0x00,f); (*i_ungetc)(0x00,f);
}else (*i_ungetc)(c2,f); }else (*i_ungetc)(c2,f);
if(!input_f){ if(!input_encoding){
set_iconv(TRUE, w_iconv16); set_iconv(TRUE, w_iconv16);
} }
if (iconv == w_iconv16) { if (iconv == w_iconv16) {
@ -2805,11 +2738,7 @@ nkf_char kanji_convert(FILE *f)
nkf_char c3, c2=0, c1, c0=0; nkf_char c3, c2=0, c1, c0=0;
int is_8bit = FALSE; int is_8bit = FALSE;
if(input_f == SJIS_INPUT || input_f == EUC_INPUT if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
#ifdef UTF8_INPUT_ENABLE
|| input_f == UTF8_INPUT || input_f == UTF16_INPUT
#endif
){
is_8bit = TRUE; is_8bit = TRUE;
} }
@ -2826,12 +2755,12 @@ nkf_char kanji_convert(FILE *f)
while ((c1 = (*i_getc)(f)) != EOF) { while ((c1 = (*i_getc)(f)) != EOF) {
#ifdef INPUT_CODE_FIX #ifdef INPUT_CODE_FIX
if (!input_f) if (!input_encoding)
#endif #endif
code_status(c1); code_status(c1);
if (c2) { if (c2) {
/* second byte */ /* second byte */
if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) { if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
/* in case of 8th bit is on */ /* in case of 8th bit is on */
if (!estab_f&&!mime_decode_mode) { if (!estab_f&&!mime_decode_mode) {
/* in case of not established yet */ /* in case of not established yet */
@ -2920,7 +2849,7 @@ nkf_char kanji_convert(FILE *f)
SEND; SEND;
} else } else
#endif #endif
if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) { if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
/* 8 bit code */ /* 8 bit code */
if (!estab_f && !iso8859_f) { if (!estab_f && !iso8859_f) {
/* not established yet */ /* not established yet */
@ -5019,7 +4948,7 @@ void hira_conv(nkf_char c2, nkf_char c1)
c2 = 0x24; c2 = 0x24;
(*o_hira_conv)(c2,c1); (*o_hira_conv)(c2,c1);
return; return;
} else if (c1 == 0x74 && (output_conv == w_oconv || output_conv == w_oconv16)) { } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
c2 = 0; c2 = 0;
c1 = CLASS_UNICODE | 0x3094; c1 = CLASS_UNICODE | 0x3094;
(*o_hira_conv)(c2,c1); (*o_hira_conv)(c2,c1);
@ -6252,7 +6181,6 @@ void reinit(void)
binmode_f = TRUE; binmode_f = TRUE;
rot_f = FALSE; rot_f = FALSE;
hira_f = FALSE; hira_f = FALSE;
input_f = FALSE;
alpha_f = FALSE; alpha_f = FALSE;
mime_f = MIME_DECODE_DEFAULT; mime_f = MIME_DECODE_DEFAULT;
mime_decode_f = FALSE; mime_decode_f = FALSE;
@ -6318,7 +6246,6 @@ void reinit(void)
kanji_intro = DEFAULT_J; kanji_intro = DEFAULT_J;
ascii_intro = DEFAULT_R; ascii_intro = DEFAULT_R;
fold_margin = FOLD_MARGIN; fold_margin = FOLD_MARGIN;
output_conv = DEFAULT_CONV;
oconv = DEFAULT_CONV; oconv = DEFAULT_CONV;
o_zconv = no_connection; o_zconv = no_connection;
o_fconv = no_connection; o_fconv = no_connection;
@ -6353,6 +6280,7 @@ void reinit(void)
iconv_for_check = 0; iconv_for_check = 0;
#endif #endif
input_codename = NULL; input_codename = NULL;
input_encoding = NULL;
output_encoding = nkf_enc_from_index(DEFAULT_ENCODING); output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
#ifdef WIN32DLL #ifdef WIN32DLL
reinitdll(); reinitdll();
@ -6491,7 +6419,7 @@ void show_configuration(void)
"OFF" "OFF"
#endif #endif
"\n"); "\n");
fprintf(HELP_OUTPUT, " --help, --version output: " fprintf(HELP_OUTPUT, " --help, --version output: "
#if HELP_OUTPUT_HELP_OUTPUT #if HELP_OUTPUT_HELP_OUTPUT
"HELP_OUTPUT" "HELP_OUTPUT"
#else #else

File diff suppressed because it is too large Load diff

View file

@ -66,8 +66,7 @@ rb_encoding* rb_nkf_enc_get(const char *name)
int idx = rb_enc_find_index(name); int idx = rb_enc_find_index(name);
if (idx < 0) { if (idx < 0) {
nkf_encoding *nkf_enc = nkf_enc_find(name); nkf_encoding *nkf_enc = nkf_enc_find(name);
nkf_native_encoding * nkf_base_enc = nkf_enc_to_base_encoding(nkf_enc); idx = rb_enc_find_index(nkf_enc_name(nkf_enc_to_base_encoding(nkf_enc)));
idx = rb_enc_find_index(nkf_enc_name(nkf_base_enc));
if (idx < 0) { if (idx < 0) {
idx = rb_define_dummy_encoding(name); idx = rb_define_dummy_encoding(name);
} else { } else {
@ -127,7 +126,7 @@ int nkf_split_options(const char *arg)
/* /*
* call-seq: * call-seq:
* NKF.nkf(opt, str) -> string * NKF.nkf(opt, str) => string
* *
* Convert _str_ and return converted result. * Convert _str_ and return converted result.
* Conversion details are specified by _opt_ as String. * Conversion details are specified by _opt_ as String.
@ -176,7 +175,7 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
/* /*
* call-seq: * call-seq:
* NKF.guess(str) -> encoding * NKF.guess(str) => encoding
* *
* Returns guessed encoding of _str_ by nkf routine. * Returns guessed encoding of _str_ by nkf routine.
* *
@ -402,7 +401,7 @@ rb_nkf_guess(VALUE obj, VALUE src)
* *
* [Shift_JIS] SJIS, MS-Kanji * [Shift_JIS] SJIS, MS-Kanji
* *
* [CP932] a.k.a. Windows-31J * [Windows-31J] a.k.a. CP932
* *
* [UTF-8] same as UTF-8N * [UTF-8] same as UTF-8N
* *

View file

@ -25,7 +25,7 @@ class TestKconv < Test::Unit::TestCase
def test_eucjp def test_eucjp
assert(@euc_str.dup.force_encoding(nil).iseuc) assert(@euc_str.iseuc)
assert_equal(::Kconv::EUC, Kconv.guess(@euc_str)) assert_equal(::Kconv::EUC, Kconv.guess(@euc_str))
assert_equal(@euc_str, @euc_str.toeuc) assert_equal(@euc_str, @euc_str.toeuc)
assert_equal(@euc_str, @sjis_str.toeuc) assert_equal(@euc_str, @sjis_str.toeuc)
@ -37,7 +37,7 @@ class TestKconv < Test::Unit::TestCase
assert_equal(@euc_str, @jis_str.kconv(::NKF::EUC)) assert_equal(@euc_str, @jis_str.kconv(::NKF::EUC))
end end
def test_shiftjis def test_shiftjis
assert(@sjis_str.dup.force_encoding(nil).issjis) assert(@sjis_str.issjis)
assert_equal(::Kconv::SJIS, Kconv.guess(@sjis_str)) assert_equal(::Kconv::SJIS, Kconv.guess(@sjis_str))
assert_equal(@sjis_str, @euc_str.tosjis) assert_equal(@sjis_str, @euc_str.tosjis)
assert_equal(@sjis_str, @sjis_str.tosjis) assert_equal(@sjis_str, @sjis_str.tosjis)
@ -49,7 +49,7 @@ class TestKconv < Test::Unit::TestCase
assert_equal(@sjis_str, @jis_str.kconv(::NKF::SJIS)) assert_equal(@sjis_str, @jis_str.kconv(::NKF::SJIS))
end end
def test_utf8 def test_utf8
assert(@utf8_str.dup.force_encoding(nil).isutf8) assert(@utf8_str.isutf8)
assert_equal(::Kconv::UTF8, Kconv.guess(@utf8_str)) assert_equal(::Kconv::UTF8, Kconv.guess(@utf8_str))
assert_equal(@utf8_str, @euc_str.toutf8) assert_equal(@utf8_str, @euc_str.toutf8)
assert_equal(@utf8_str, @sjis_str.toutf8) assert_equal(@utf8_str, @sjis_str.toutf8)