mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* ext/nkf/nkf-utf8.c: Update nkf.c rev:1.157.
* ext/nkf/nkf.c (rb_nkf_enc_get): replicate proper based encoding. * ext/nkf/kconv.c (Kconv#kconv, to*): use self.encoding as from_enc when from_enc isn't given. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14481 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
eff45d2cc8
commit
573bd7500b
4 changed files with 149 additions and 165 deletions
|
@ -1,3 +1,12 @@
|
||||||
|
Sat Dec 22 17:35:59 2007 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* ext/nkf/nkf-utf8.c: Update nkf.c rev:1.157.
|
||||||
|
|
||||||
|
* ext/nkf/nkf.c (rb_nkf_enc_get): replicate proper based encoding.
|
||||||
|
|
||||||
|
* ext/nkf/kconv.c (Kconv#kconv, to*): use self.encoding as from_enc
|
||||||
|
when from_enc isn't given.
|
||||||
|
|
||||||
Sat Dec 22 17:31:41 2007 NAKAMURA Usaku <usa@ruby-lang.org>
|
Sat Dec 22 17:31:41 2007 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||||
|
|
||||||
* lib/mkmf.rb ($extmk): fixed broken condition.
|
* lib/mkmf.rb ($extmk): fixed broken condition.
|
||||||
|
|
|
@ -85,11 +85,6 @@ module Kconv
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to out_code.
|
# Convert <code>str</code> to out_code.
|
||||||
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want to decode them, use NKF.nkf.
|
|
||||||
def kconv(str, to_enc, from_enc=nil)
|
def kconv(str, to_enc, from_enc=nil)
|
||||||
opt = ''
|
opt = ''
|
||||||
opt += ' --ic=' + from_enc.name if from_enc
|
opt += ' --ic=' + from_enc.name if from_enc
|
||||||
|
@ -107,13 +102,8 @@ module Kconv
|
||||||
# Kconv.tojis(str) -> string
|
# Kconv.tojis(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to ISO-2022-JP
|
# Convert <code>str</code> to ISO-2022-JP
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-jxm0', str).
|
|
||||||
def tojis(str)
|
def tojis(str)
|
||||||
::NKF::nkf('-jm', str)
|
kconv(str, JIS)
|
||||||
end
|
end
|
||||||
module_function :tojis
|
module_function :tojis
|
||||||
|
|
||||||
|
@ -121,13 +111,8 @@ module Kconv
|
||||||
# Kconv.toeuc(str) -> string
|
# Kconv.toeuc(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to EUC-JP
|
# Convert <code>str</code> to EUC-JP
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-exm0', str).
|
|
||||||
def toeuc(str)
|
def toeuc(str)
|
||||||
::NKF::nkf('-em', str)
|
kconv(str, EUC)
|
||||||
end
|
end
|
||||||
module_function :toeuc
|
module_function :toeuc
|
||||||
|
|
||||||
|
@ -135,13 +120,8 @@ module Kconv
|
||||||
# Kconv.tosjis(str) -> string
|
# Kconv.tosjis(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to Shift_JIS
|
# Convert <code>str</code> to Shift_JIS
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-sxm0', str).
|
|
||||||
def tosjis(str)
|
def tosjis(str)
|
||||||
::NKF::nkf('-sm', str)
|
kconv(str, SJIS)
|
||||||
end
|
end
|
||||||
module_function :tosjis
|
module_function :tosjis
|
||||||
|
|
||||||
|
@ -149,13 +129,8 @@ module Kconv
|
||||||
# Kconv.toutf8(str) -> string
|
# Kconv.toutf8(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to UTF-8
|
# Convert <code>str</code> to UTF-8
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-wxm0', str).
|
|
||||||
def toutf8(str)
|
def toutf8(str)
|
||||||
::NKF::nkf('-wm', str)
|
kconv(str, UTF8)
|
||||||
end
|
end
|
||||||
module_function :toutf8
|
module_function :toutf8
|
||||||
|
|
||||||
|
@ -163,13 +138,8 @@ module Kconv
|
||||||
# Kconv.toutf16(str) -> string
|
# Kconv.toutf16(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to UTF-16
|
# Convert <code>str</code> to UTF-16
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
|
||||||
def toutf16(str)
|
def toutf16(str)
|
||||||
::NKF::nkf('-w16m', str)
|
kconv(str, UTF16)
|
||||||
end
|
end
|
||||||
module_function :toutf16
|
module_function :toutf16
|
||||||
|
|
||||||
|
@ -177,13 +147,8 @@ module Kconv
|
||||||
# Kconv.toutf32(str) -> string
|
# Kconv.toutf32(str) -> string
|
||||||
#
|
#
|
||||||
# Convert <code>str</code> to UTF-32
|
# Convert <code>str</code> to UTF-32
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w32xm0', str).
|
|
||||||
def toutf32(str)
|
def toutf32(str)
|
||||||
::NKF::nkf('-w32m', str)
|
kconv(str, UTF32)
|
||||||
end
|
end
|
||||||
module_function :toutf32
|
module_function :toutf32
|
||||||
|
|
||||||
|
@ -251,6 +216,7 @@ class String
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
# convert halfwidth katakana to fullwidth katakana.
|
||||||
# If you don't want to decode them, use NKF.nkf.
|
# If you don't want to decode them, use NKF.nkf.
|
||||||
def kconv(to_enc, from_enc=nil)
|
def kconv(to_enc, from_enc=nil)
|
||||||
|
form_enc = self.encoding.name if !from_enc && self.encoding != Encoding.list[0]
|
||||||
Kconv::kconv(self, to_enc, from_enc)
|
Kconv::kconv(self, to_enc, from_enc)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -262,66 +228,36 @@ class String
|
||||||
# String#tojis -> string
|
# String#tojis -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to ISO-2022-JP
|
# Convert <code>self</code> to ISO-2022-JP
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-jxm0', str).
|
|
||||||
def tojis; Kconv.tojis(self) end
|
def tojis; Kconv.tojis(self) end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# String#toeuc -> string
|
# String#toeuc -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to EUC-JP
|
# Convert <code>self</code> to EUC-JP
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-exm0', str).
|
|
||||||
def toeuc; Kconv.toeuc(self) end
|
def toeuc; Kconv.toeuc(self) end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# String#tosjis -> string
|
# String#tosjis -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to Shift_JIS
|
# Convert <code>self</code> to Shift_JIS
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-sxm0', str).
|
|
||||||
def tosjis; Kconv.tosjis(self) end
|
def tosjis; Kconv.tosjis(self) end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# String#toutf8 -> string
|
# String#toutf8 -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to UTF-8
|
# Convert <code>self</code> to UTF-8
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-wxm0', str).
|
|
||||||
def toutf8; Kconv.toutf8(self) end
|
def toutf8; Kconv.toutf8(self) end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# String#toutf16 -> string
|
# String#toutf16 -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to UTF-16
|
# Convert <code>self</code> to UTF-16
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
|
||||||
def toutf16; Kconv.toutf16(self) end
|
def toutf16; Kconv.toutf16(self) end
|
||||||
|
|
||||||
# call-seq:
|
# call-seq:
|
||||||
# String#toutf32 -> string
|
# String#toutf32 -> string
|
||||||
#
|
#
|
||||||
# Convert <code>self</code> to UTF-32
|
# Convert <code>self</code> to UTF-32
|
||||||
#
|
|
||||||
# *Note*
|
|
||||||
# This method decode MIME encoded string and
|
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w32xm0', str).
|
|
||||||
def toutf32; Kconv.toutf32(self) end
|
def toutf32; Kconv.toutf32(self) end
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
@ -32,7 +32,7 @@
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
/* $Id$ */
|
/* $Id$ */
|
||||||
#define NKF_VERSION "2.0.8"
|
#define NKF_VERSION "2.0.8"
|
||||||
#define NKF_RELEASE_DATE "2007-12-19"
|
#define NKF_RELEASE_DATE "2007-12-22"
|
||||||
#define COPY_RIGHT \
|
#define COPY_RIGHT \
|
||||||
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
|
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
|
||||||
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
|
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
|
||||||
|
@ -228,8 +228,6 @@ void djgpp_setbinmode(FILE *fp)
|
||||||
|
|
||||||
enum nkf_encodings {
|
enum nkf_encodings {
|
||||||
ASCII,
|
ASCII,
|
||||||
JIS_X_0208,
|
|
||||||
JIS_X_0201,
|
|
||||||
ISO_8859_1,
|
ISO_8859_1,
|
||||||
ISO_2022_JP,
|
ISO_2022_JP,
|
||||||
CP50220,
|
CP50220,
|
||||||
|
@ -262,52 +260,84 @@ enum nkf_encodings {
|
||||||
UTF_32BE_BOM,
|
UTF_32BE_BOM,
|
||||||
UTF_32LE,
|
UTF_32LE,
|
||||||
UTF_32LE_BOM,
|
UTF_32LE_BOM,
|
||||||
JIS_X_0212=0x2844,
|
JIS_X_0201=0x1000,
|
||||||
JIS_X_0213_1=0x284F,
|
JIS_X_0208,
|
||||||
JIS_X_0213_2=0x2850,
|
JIS_X_0212,
|
||||||
|
JIS_X_0213_1,
|
||||||
|
JIS_X_0213_2,
|
||||||
BINARY
|
BINARY
|
||||||
};
|
};
|
||||||
static const struct {
|
|
||||||
const int id;
|
nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
const char *name;
|
nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
} encoding_id_to_name_table[] = {
|
nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
{ASCII, "ASCII"},
|
nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
{ISO_8859_1, "ISO-8859-1"},
|
nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
{ISO_2022_JP, "ISO-2022-JP"},
|
void j_oconv(nkf_char c2, nkf_char c1);
|
||||||
{CP50220, "CP50220"},
|
void s_oconv(nkf_char c2, nkf_char c1);
|
||||||
{CP50221, "CP50221"},
|
void e_oconv(nkf_char c2, nkf_char c1);
|
||||||
{CP50222, "CP50222"},
|
void w_oconv(nkf_char c2, nkf_char c1);
|
||||||
{ISO_2022_JP_1, "ISO-2022-JP-1"},
|
void w_oconv16(nkf_char c2, nkf_char c1);
|
||||||
{ISO_2022_JP_3, "ISO-2022-JP-3"},
|
void w_oconv32(nkf_char c2, nkf_char c1);
|
||||||
{SHIFT_JIS, "Shift_JIS"},
|
|
||||||
{WINDOWS_31J, "WINDOWS-31J"},
|
typedef struct {
|
||||||
{CP10001, "CP10001"},
|
char *name;
|
||||||
{EUC_JP, "EUC-JP"},
|
nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
|
||||||
{CP51932, "CP51932"},
|
void (*oconv_func)(nkf_char c2, nkf_char c1);
|
||||||
{EUCJP_MS, "eucJP-MS"},
|
} nkf_native_encoding;
|
||||||
{EUCJP_ASCII, "eucJP-ASCII"},
|
|
||||||
{SHIFT_JISX0213, "Shift_JISX0213"},
|
nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv };
|
||||||
{SHIFT_JIS_2004, "Shift_JIS-2004"},
|
nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
|
||||||
{EUC_JISX0213, "EUC-JISX0213"},
|
nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
|
||||||
{EUC_JIS_2004, "EUC-JIS-2004"},
|
nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
|
||||||
{UTF_8, "UTF-8"},
|
nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
|
||||||
{UTF_8N, "UTF-8N"},
|
nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
|
||||||
{UTF_8_BOM, "UTF-8-BOM"},
|
nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
|
||||||
{UTF8_MAC, "UTF8-MAC"},
|
|
||||||
{UTF_16, "UTF-16"},
|
typedef struct {
|
||||||
{UTF_16BE, "UTF-16BE"},
|
int id;
|
||||||
{UTF_16BE_BOM, "UTF-16BE-BOM"},
|
char *name;
|
||||||
{UTF_16LE, "UTF-16LE"},
|
nkf_native_encoding *based_encoding;
|
||||||
{UTF_16LE_BOM, "UTF-16LE-BOM"},
|
} nkf_encoding;
|
||||||
{UTF_32, "UTF-32"},
|
nkf_encoding nkf_encoding_table[] = {
|
||||||
{UTF_32BE, "UTF-32BE"},
|
{ASCII, "ASCII", &NkfEncodingASCII},
|
||||||
{UTF_32BE_BOM, "UTF-32BE-BOM"},
|
{ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
|
||||||
{UTF_32LE, "UTF-32LE"},
|
{ISO_2022_JP, "ISO-2022-JP", &NkfEncodingASCII},
|
||||||
{UTF_32LE_BOM, "UTF-32LE-BOM"},
|
{CP50220, "CP50220", &NkfEncodingISO_2022_JP},
|
||||||
{BINARY, "BINARY"},
|
{CP50221, "CP50221", &NkfEncodingISO_2022_JP},
|
||||||
{-1, ""}
|
{CP50222, "CP50222", &NkfEncodingISO_2022_JP},
|
||||||
|
{ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
|
||||||
|
{ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
|
||||||
|
{SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
|
||||||
|
{WINDOWS_31J, "WINDOWS-31J", &NkfEncodingShift_JIS},
|
||||||
|
{CP10001, "CP10001", &NkfEncodingShift_JIS},
|
||||||
|
{EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
|
||||||
|
{CP51932, "CP51932", &NkfEncodingEUC_JP},
|
||||||
|
{EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
|
||||||
|
{EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
|
||||||
|
{SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
|
||||||
|
{SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
|
||||||
|
{EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
|
||||||
|
{EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
|
||||||
|
{UTF_8, "UTF-8", &NkfEncodingUTF_8},
|
||||||
|
{UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
|
||||||
|
{UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
|
||||||
|
{UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
|
||||||
|
{UTF_16, "UTF-16", &NkfEncodingUTF_16},
|
||||||
|
{UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
|
||||||
|
{UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
|
||||||
|
{UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
|
||||||
|
{UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
|
||||||
|
{UTF_32, "UTF-32", &NkfEncodingUTF_32},
|
||||||
|
{UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
|
||||||
|
{UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
|
||||||
|
{UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
|
||||||
|
{UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
|
||||||
|
{BINARY, "BINARY", &NkfEncodingASCII},
|
||||||
|
{-1, NULL, NULL}
|
||||||
};
|
};
|
||||||
static const struct {
|
#define NKF_ENCODING_TABLE_SIZE 34
|
||||||
|
struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
const int id;
|
const int id;
|
||||||
} encoding_name_to_id_table[] = {
|
} encoding_name_to_id_table[] = {
|
||||||
|
@ -354,7 +384,7 @@ static const struct {
|
||||||
{"UTF-32LE", UTF_32LE},
|
{"UTF-32LE", UTF_32LE},
|
||||||
{"UTF-32LE-BOM", UTF_32LE_BOM},
|
{"UTF-32LE-BOM", UTF_32LE_BOM},
|
||||||
{"BINARY", BINARY},
|
{"BINARY", BINARY},
|
||||||
{"", -1}
|
{NULL, -1}
|
||||||
};
|
};
|
||||||
#if defined(DEFAULT_CODE_JIS)
|
#if defined(DEFAULT_CODE_JIS)
|
||||||
#define DEFAULT_ENCODING ISO_2022_JP
|
#define DEFAULT_ENCODING ISO_2022_JP
|
||||||
|
@ -441,7 +471,7 @@ struct input_code{
|
||||||
};
|
};
|
||||||
|
|
||||||
static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
|
static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
|
||||||
static int output_encoding = DEFAULT_ENCODING;
|
static nkf_encoding *output_encoding;
|
||||||
|
|
||||||
#if !defined(PERL_XS) && !defined(WIN32DLL)
|
#if !defined(PERL_XS) && !defined(WIN32DLL)
|
||||||
static nkf_char noconvert(FILE *f);
|
static nkf_char noconvert(FILE *f);
|
||||||
|
@ -451,9 +481,7 @@ static nkf_char kanji_convert(FILE *f);
|
||||||
static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1);
|
static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1);
|
||||||
static nkf_char push_hold_buf(nkf_char c2);
|
static nkf_char push_hold_buf(nkf_char c2);
|
||||||
static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
|
static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
|
||||||
static nkf_char s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
|
|
||||||
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
|
static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
|
||||||
static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
|
|
||||||
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
|
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
|
||||||
/* UCS Mapping
|
/* UCS Mapping
|
||||||
* 0: Shift_JIS, eucJP-ascii
|
* 0: Shift_JIS, eucJP-ascii
|
||||||
|
@ -482,9 +510,6 @@ static void encode_fallback_perl(nkf_char c);
|
||||||
static void encode_fallback_subchar(nkf_char c);
|
static void encode_fallback_subchar(nkf_char c);
|
||||||
static void (*encode_fallback)(nkf_char c) = NULL;
|
static void (*encode_fallback)(nkf_char c) = NULL;
|
||||||
static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
|
static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
|
||||||
static nkf_char w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
|
|
||||||
static nkf_char w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
|
|
||||||
static nkf_char w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
|
|
||||||
static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
|
static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
|
||||||
static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
|
static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
|
||||||
static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
|
static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
|
||||||
|
@ -496,14 +521,8 @@ static void w_status(struct input_code *, nkf_char);
|
||||||
static int output_bom_f = FALSE;
|
static int output_bom_f = FALSE;
|
||||||
static int output_endian = ENDIAN_BIG;
|
static int output_endian = ENDIAN_BIG;
|
||||||
static nkf_char e2w_conv(nkf_char c2,nkf_char c1);
|
static nkf_char e2w_conv(nkf_char c2,nkf_char c1);
|
||||||
static void w_oconv(nkf_char c2,nkf_char c1);
|
|
||||||
static void w_oconv16(nkf_char c2,nkf_char c1);
|
|
||||||
static void w_oconv32(nkf_char c2,nkf_char c1);
|
|
||||||
#endif
|
#endif
|
||||||
static void e_oconv(nkf_char c2,nkf_char c1);
|
|
||||||
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
|
static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
|
||||||
static void s_oconv(nkf_char c2,nkf_char c1);
|
|
||||||
static void j_oconv(nkf_char c2,nkf_char c1);
|
|
||||||
static void fold_conv(nkf_char c2,nkf_char c1);
|
static void fold_conv(nkf_char c2,nkf_char c1);
|
||||||
static void nl_conv(nkf_char c2,nkf_char c1);
|
static void nl_conv(nkf_char c2,nkf_char c1);
|
||||||
static void z_conv(nkf_char c2,nkf_char c1);
|
static void z_conv(nkf_char c2,nkf_char c1);
|
||||||
|
@ -895,6 +914,14 @@ static void nkf_str_upcase(const char *str, char *res, size_t length)
|
||||||
res[i] = 0;
|
res[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nkf_encoding *nkf_enc_from_index(int idx)
|
||||||
|
{
|
||||||
|
if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return &nkf_encoding_table[idx];
|
||||||
|
}
|
||||||
|
|
||||||
static int nkf_enc_find_index(const char *name)
|
static int nkf_enc_find_index(const char *name)
|
||||||
{
|
{
|
||||||
int i, index = -1;
|
int i, index = -1;
|
||||||
|
@ -906,19 +933,17 @@ static int nkf_enc_find_index(const char *name)
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(PERL_XS) || defined(WIN32DLL)
|
static nkf_encoding *nkf_enc_find(const char *name)
|
||||||
static char* nkf_enc_name(const int index)
|
|
||||||
{
|
{
|
||||||
int i;
|
int idx = -1;
|
||||||
const char* name = "ASCII";
|
idx = nkf_enc_find_index(name);
|
||||||
for (i = 0; encoding_id_to_name_table[i].id >= 0; i++) {
|
if (idx < 0) return 0;
|
||||||
if (encoding_id_to_name_table[i].id == index) {
|
return nkf_enc_from_index(idx);
|
||||||
return nkf_strcpy(encoding_id_to_name_table[i].name);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return nkf_strcpy(name);
|
#define nkf_enc_name(enc) (enc)->name
|
||||||
}
|
#define nkf_enc_to_index(enc) (enc)->id
|
||||||
#endif
|
#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
|
||||||
|
|
||||||
#ifdef WIN32DLL
|
#ifdef WIN32DLL
|
||||||
#include "nkf32dll.c"
|
#include "nkf32dll.c"
|
||||||
|
@ -1327,6 +1352,7 @@ void options(unsigned char *cp)
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
unsigned char *cp_back = NULL;
|
unsigned char *cp_back = NULL;
|
||||||
char codeset[32];
|
char codeset[32];
|
||||||
|
nkf_encoding *enc;
|
||||||
|
|
||||||
if (option_mode==1)
|
if (option_mode==1)
|
||||||
return;
|
return;
|
||||||
|
@ -1364,8 +1390,8 @@ void options(unsigned char *cp)
|
||||||
}else{
|
}else{
|
||||||
if (strcmp(long_option[i].name, "ic=") == 0){
|
if (strcmp(long_option[i].name, "ic=") == 0){
|
||||||
nkf_str_upcase(p, codeset, 32);
|
nkf_str_upcase(p, codeset, 32);
|
||||||
i = nkf_enc_find_index(codeset);
|
enc = nkf_enc_find(codeset);
|
||||||
switch (i) {
|
switch (nkf_enc_to_index(enc)) {
|
||||||
case ISO_2022_JP:
|
case ISO_2022_JP:
|
||||||
input_f = JIS_INPUT;
|
input_f = JIS_INPUT;
|
||||||
break;
|
break;
|
||||||
|
@ -1502,10 +1528,10 @@ void options(unsigned char *cp)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (strcmp(long_option[i].name, "oc=") == 0){
|
if (strcmp(long_option[i].name, "oc=") == 0){
|
||||||
nkf_str_upcase(p, codeset, 32);
|
|
||||||
output_encoding = nkf_enc_find_index(codeset);
|
|
||||||
x0201_f = FALSE;
|
x0201_f = FALSE;
|
||||||
switch (output_encoding) {
|
nkf_str_upcase(p, codeset, 32);
|
||||||
|
output_encoding = nkf_enc_find(codeset);
|
||||||
|
switch (nkf_enc_to_index(output_encoding)) {
|
||||||
case ISO_2022_JP:
|
case ISO_2022_JP:
|
||||||
output_conv = j_oconv;
|
output_conv = j_oconv;
|
||||||
break;
|
break;
|
||||||
|
@ -1880,16 +1906,16 @@ void options(unsigned char *cp)
|
||||||
case 'j': /* JIS output */
|
case 'j': /* JIS output */
|
||||||
case 'n':
|
case 'n':
|
||||||
output_conv = j_oconv;
|
output_conv = j_oconv;
|
||||||
output_encoding = ISO_2022_JP;
|
output_encoding = nkf_enc_from_index(ISO_2022_JP);
|
||||||
continue;
|
continue;
|
||||||
case 'e': /* AT&T EUC output */
|
case 'e': /* AT&T EUC output */
|
||||||
output_conv = e_oconv;
|
output_conv = e_oconv;
|
||||||
cp932inv_f = FALSE;
|
cp932inv_f = FALSE;
|
||||||
output_encoding = EUC_JP;
|
output_encoding = nkf_enc_from_index(EUC_JP);
|
||||||
continue;
|
continue;
|
||||||
case 's': /* SJIS output */
|
case 's': /* SJIS output */
|
||||||
output_conv = s_oconv;
|
output_conv = s_oconv;
|
||||||
output_encoding = SHIFT_JIS;
|
output_encoding = nkf_enc_from_index(SHIFT_JIS);
|
||||||
continue;
|
continue;
|
||||||
case 'l': /* ISO8859 Latin-1 support, no conversion */
|
case 'l': /* ISO8859 Latin-1 support, no conversion */
|
||||||
iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
|
iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
|
||||||
|
@ -1937,21 +1963,22 @@ void options(unsigned char *cp)
|
||||||
output_conv = w_oconv; cp++;
|
output_conv = w_oconv; cp++;
|
||||||
if (cp[0] == '0'){
|
if (cp[0] == '0'){
|
||||||
cp++;
|
cp++;
|
||||||
output_encoding = UTF_8N;
|
output_encoding = nkf_enc_from_index(UTF_8N);
|
||||||
} else {
|
} else {
|
||||||
output_bom_f = TRUE;
|
output_bom_f = TRUE;
|
||||||
output_encoding = UTF_8_BOM;
|
output_encoding = nkf_enc_from_index(UTF_8_BOM);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
int enc_idx;
|
||||||
if ('1'== cp[0] && '6'==cp[1]) {
|
if ('1'== cp[0] && '6'==cp[1]) {
|
||||||
output_conv = w_oconv16; cp+=2;
|
output_conv = w_oconv16; cp+=2;
|
||||||
output_encoding = UTF_16;
|
enc_idx = UTF_16;
|
||||||
} else if ('3'== cp[0] && '2'==cp[1]) {
|
} else if ('3'== cp[0] && '2'==cp[1]) {
|
||||||
output_conv = w_oconv32; cp+=2;
|
output_conv = w_oconv32; cp+=2;
|
||||||
output_encoding = UTF_32;
|
enc_idx = UTF_32;
|
||||||
} else {
|
} else {
|
||||||
output_conv = w_oconv;
|
output_conv = w_oconv;
|
||||||
output_encoding = UTF_8;
|
output_encoding = nkf_enc_from_index(UTF_8);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (cp[0]=='L') {
|
if (cp[0]=='L') {
|
||||||
|
@ -1960,19 +1987,21 @@ void options(unsigned char *cp)
|
||||||
} else if (cp[0] == 'B') {
|
} else if (cp[0] == 'B') {
|
||||||
cp++;
|
cp++;
|
||||||
} else {
|
} else {
|
||||||
|
output_encoding = nkf_enc_from_index(enc_idx);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (cp[0] == '0'){
|
if (cp[0] == '0'){
|
||||||
cp++;
|
cp++;
|
||||||
output_encoding = output_encoding == UTF_16
|
enc_idx = enc_idx == UTF_16
|
||||||
? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
|
? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
|
||||||
: (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
|
: (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
|
||||||
} else {
|
} else {
|
||||||
output_bom_f = TRUE;
|
output_bom_f = TRUE;
|
||||||
output_encoding = output_encoding == UTF_16
|
enc_idx = enc_idx == UTF_16
|
||||||
? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
|
? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
|
||||||
: (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
|
: (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
|
||||||
}
|
}
|
||||||
|
output_encoding = nkf_enc_from_index(enc_idx);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
#endif
|
#endif
|
||||||
|
@ -3028,11 +3057,11 @@ nkf_char kanji_convert(FILE *f)
|
||||||
shift_mode = FALSE;
|
shift_mode = FALSE;
|
||||||
NEXT;
|
NEXT;
|
||||||
#endif /* X0212_ENABLE */
|
#endif /* X0212_ENABLE */
|
||||||
} else if (c1 == (JIS_X_0213_1&0x7F)){
|
} else if (c1 == 0x4F){
|
||||||
input_mode = JIS_X_0213_1;
|
input_mode = JIS_X_0213_1;
|
||||||
shift_mode = FALSE;
|
shift_mode = FALSE;
|
||||||
NEXT;
|
NEXT;
|
||||||
} else if (c1 == (JIS_X_0213_2&0x7F)){
|
} else if (c1 == 0x50){
|
||||||
input_mode = JIS_X_0213_2;
|
input_mode = JIS_X_0213_2;
|
||||||
shift_mode = FALSE;
|
shift_mode = FALSE;
|
||||||
NEXT;
|
NEXT;
|
||||||
|
@ -4469,7 +4498,7 @@ void j_oconv(nkf_char c2, nkf_char c1)
|
||||||
(*o_putc)(ESC);
|
(*o_putc)(ESC);
|
||||||
(*o_putc)('$');
|
(*o_putc)('$');
|
||||||
(*o_putc)('(');
|
(*o_putc)('(');
|
||||||
(*o_putc)(JIS_X_0213_2&0x7F);
|
(*o_putc)(0x50);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
if(output_mode!=JIS_X_0212){
|
if(output_mode!=JIS_X_0212){
|
||||||
|
@ -4477,7 +4506,7 @@ void j_oconv(nkf_char c2, nkf_char c1)
|
||||||
(*o_putc)(ESC);
|
(*o_putc)(ESC);
|
||||||
(*o_putc)('$');
|
(*o_putc)('$');
|
||||||
(*o_putc)('(');
|
(*o_putc)('(');
|
||||||
(*o_putc)(JIS_X_0212&0x7F);
|
(*o_putc)(0x44);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(*o_putc)(c2 & 0x7f);
|
(*o_putc)(c2 & 0x7f);
|
||||||
|
@ -4515,7 +4544,7 @@ void j_oconv(nkf_char c2, nkf_char c1)
|
||||||
(*o_putc)(ESC);
|
(*o_putc)(ESC);
|
||||||
(*o_putc)('$');
|
(*o_putc)('$');
|
||||||
(*o_putc)('(');
|
(*o_putc)('(');
|
||||||
(*o_putc)(JIS_X_0213_1&0x7F);
|
(*o_putc)(0x4F);
|
||||||
}
|
}
|
||||||
}else if (output_mode != JIS_X_0208) {
|
}else if (output_mode != JIS_X_0208) {
|
||||||
output_mode = JIS_X_0208;
|
output_mode = JIS_X_0208;
|
||||||
|
@ -6317,7 +6346,7 @@ void reinit(void)
|
||||||
iconv_for_check = 0;
|
iconv_for_check = 0;
|
||||||
#endif
|
#endif
|
||||||
input_codename = NULL;
|
input_codename = NULL;
|
||||||
output_encoding = DEFAULT_ENCODING;
|
output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
|
||||||
#ifdef WIN32DLL
|
#ifdef WIN32DLL
|
||||||
reinitdll();
|
reinitdll();
|
||||||
#endif /*WIN32DLL*/
|
#endif /*WIN32DLL*/
|
||||||
|
|
|
@ -64,7 +64,17 @@ rb_nkf_putchar(unsigned int c)
|
||||||
rb_encoding* rb_nkf_enc_get(const char *name)
|
rb_encoding* rb_nkf_enc_get(const char *name)
|
||||||
{
|
{
|
||||||
int idx = rb_enc_find_index(name);
|
int idx = rb_enc_find_index(name);
|
||||||
if (idx < 0) idx = rb_enc_replicate(name, rb_ascii_encoding());
|
if (idx < 0) {
|
||||||
|
nkf_encoding *nkf_enc = nkf_enc_find(name);
|
||||||
|
nkf_native_encoding * nkf_base_enc = nkf_enc_to_base_encoding(nkf_enc);
|
||||||
|
idx = rb_enc_find_index(nkf_enc_name(nkf_base_enc));
|
||||||
|
if (idx < 0) {
|
||||||
|
idx = rb_enc_replicate(name, rb_ascii_encoding());
|
||||||
|
} else {
|
||||||
|
rb_encoding *rb_enc = rb_enc_from_index(idx);
|
||||||
|
idx = rb_enc_replicate(name, rb_enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
return rb_enc_from_index(idx);
|
return rb_enc_from_index(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -475,11 +485,11 @@ Init_nkf()
|
||||||
rb_define_const(mNKF, "NOCONV", Qnil);
|
rb_define_const(mNKF, "NOCONV", Qnil);
|
||||||
rb_define_const(mNKF, "UNKNOWN", Qnil);
|
rb_define_const(mNKF, "UNKNOWN", Qnil);
|
||||||
rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
|
rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
|
||||||
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US_ASCII")));
|
rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US-ASCII")));
|
||||||
rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
|
rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
|
||||||
rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
|
rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
|
||||||
rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
|
rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
|
||||||
rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_nkf_enc_get("UTF-8")));
|
rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_utf8_encoding()));
|
||||||
rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16")));
|
rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16")));
|
||||||
rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32")));
|
rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32")));
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue