mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c, config.h}: imported nkf 2.0.6.
* Add --ic / --oc option and mapping tables. * Add fallback option. * Add --no-best-fit-chars option. * Fix some bugs. * ext/nkf/nkf.c (nkf_split_options): added for parse option string. * ext/nkf/lib/kconv.rb (Kconv.to*): add -m0. Note that Kconv.to* still imply -X. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@10055 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
15c3ddd0ff
commit
bb1e80fd4e
6 changed files with 7379 additions and 3266 deletions
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
||||||
|
Mon Mar 27 22:19:09 2006 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c, config.h}: imported nkf 2.0.6.
|
||||||
|
* Add --ic / --oc option and mapping tables.
|
||||||
|
* Add fallback option.
|
||||||
|
* Add --no-best-fit-chars option.
|
||||||
|
* Fix some bugs.
|
||||||
|
|
||||||
|
* ext/nkf/nkf.c (nkf_split_options): added for parse option string.
|
||||||
|
|
||||||
|
* ext/nkf/lib/kconv.rb (Kconv.to*): add -m0.
|
||||||
|
Note that Kconv.to* still imply -X.
|
||||||
|
|
||||||
Fri Mar 24 17:20:03 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
|
Fri Mar 24 17:20:03 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||||
|
|
||||||
* process.c (rb_f_sleep): remove description about SIGALRM which
|
* process.c (rb_f_sleep): remove description about SIGALRM which
|
||||||
|
|
|
@ -130,11 +130,10 @@ module Kconv
|
||||||
# Convert <code>str</code> to ISO-2022-JP
|
# Convert <code>str</code> to ISO-2022-JP
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-jxm0', str).
|
# If you don't want it, use NKF.nkf('-jxm0', str).
|
||||||
def tojis(str)
|
def tojis(str)
|
||||||
::NKF::nkf('-j', str)
|
::NKF::nkf('-jm0', str)
|
||||||
end
|
end
|
||||||
module_function :tojis
|
module_function :tojis
|
||||||
|
|
||||||
|
@ -144,11 +143,10 @@ module Kconv
|
||||||
# Convert <code>str</code> to EUC-JP
|
# Convert <code>str</code> to EUC-JP
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-exm0', str).
|
# If you don't want it, use NKF.nkf('-exm0', str).
|
||||||
def toeuc(str)
|
def toeuc(str)
|
||||||
::NKF::nkf('-e', str)
|
::NKF::nkf('-em0', str)
|
||||||
end
|
end
|
||||||
module_function :toeuc
|
module_function :toeuc
|
||||||
|
|
||||||
|
@ -158,11 +156,10 @@ module Kconv
|
||||||
# Convert <code>str</code> to Shift_JIS
|
# Convert <code>str</code> to Shift_JIS
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-sxm0', str).
|
# If you don't want it, use NKF.nkf('-sxm0', str).
|
||||||
def tosjis(str)
|
def tosjis(str)
|
||||||
::NKF::nkf('-s', str)
|
::NKF::nkf('-sm0', str)
|
||||||
end
|
end
|
||||||
module_function :tosjis
|
module_function :tosjis
|
||||||
|
|
||||||
|
@ -172,11 +169,10 @@ module Kconv
|
||||||
# Convert <code>str</code> to UTF-8
|
# Convert <code>str</code> to UTF-8
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-wxm0', str).
|
# If you don't want it, use NKF.nkf('-wxm0', str).
|
||||||
def toutf8(str)
|
def toutf8(str)
|
||||||
::NKF::nkf('-w', str)
|
::NKF::nkf('-wm0', str)
|
||||||
end
|
end
|
||||||
module_function :toutf8
|
module_function :toutf8
|
||||||
|
|
||||||
|
@ -186,11 +182,10 @@ module Kconv
|
||||||
# Convert <code>str</code> to UTF-16
|
# Convert <code>str</code> to UTF-16
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
||||||
def toutf16(str)
|
def toutf16(str)
|
||||||
::NKF::nkf('-w16', str)
|
::NKF::nkf('-w16m0', str)
|
||||||
end
|
end
|
||||||
module_function :toutf16
|
module_function :toutf16
|
||||||
|
|
||||||
|
@ -263,8 +258,7 @@ class String
|
||||||
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want to decode them, use NKF.nkf.
|
# If you don't want to decode them, use NKF.nkf.
|
||||||
def kconv(out_code, in_code=Kconv::AUTO)
|
def kconv(out_code, in_code=Kconv::AUTO)
|
||||||
Kconv::kconv(self, out_code, in_code)
|
Kconv::kconv(self, out_code, in_code)
|
||||||
|
@ -280,8 +274,7 @@ class String
|
||||||
# Convert <code>self</code> to ISO-2022-JP
|
# Convert <code>self</code> to ISO-2022-JP
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-jxm0', str).
|
# If you don't want it, use NKF.nkf('-jxm0', str).
|
||||||
def tojis; Kconv.tojis(self) end
|
def tojis; Kconv.tojis(self) end
|
||||||
|
|
||||||
|
@ -291,8 +284,7 @@ class String
|
||||||
# Convert <code>self</code> to EUC-JP
|
# Convert <code>self</code> to EUC-JP
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-exm0', str).
|
# If you don't want it, use NKF.nkf('-exm0', str).
|
||||||
def toeuc; Kconv.toeuc(self) end
|
def toeuc; Kconv.toeuc(self) end
|
||||||
|
|
||||||
|
@ -302,8 +294,7 @@ class String
|
||||||
# Convert <code>self</code> to Shift_JIS
|
# Convert <code>self</code> to Shift_JIS
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-sxm0', str).
|
# If you don't want it, use NKF.nkf('-sxm0', str).
|
||||||
def tosjis; Kconv.tosjis(self) end
|
def tosjis; Kconv.tosjis(self) end
|
||||||
|
|
||||||
|
@ -313,8 +304,7 @@ class String
|
||||||
# Convert <code>self</code> to UTF-8
|
# Convert <code>self</code> to UTF-8
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-wxm0', str).
|
# If you don't want it, use NKF.nkf('-wxm0', str).
|
||||||
def toutf8; Kconv.toutf8(self) end
|
def toutf8; Kconv.toutf8(self) end
|
||||||
|
|
||||||
|
@ -324,8 +314,7 @@ class String
|
||||||
# Convert <code>self</code> to UTF-16
|
# Convert <code>self</code> to UTF-16
|
||||||
#
|
#
|
||||||
# *Note*
|
# *Note*
|
||||||
# This method decode MIME encoded string and
|
# This method convert halfwidth katakana to fullwidth katakana.
|
||||||
# convert halfwidth katakana to fullwidth katakana.
|
|
||||||
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
# If you don't want it, use NKF.nkf('-w16xm0', str).
|
||||||
def toutf16; Kconv.toutf16(self) end
|
def toutf16; Kconv.toutf16(self) end
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,9 @@
|
||||||
/* --debug, --no-output $B%*%W%7%g%s(B */
|
/* --debug, --no-output $B%*%W%7%g%s(B */
|
||||||
#define CHECK_OPTION
|
#define CHECK_OPTION
|
||||||
|
|
||||||
|
/* JIS X0212 */
|
||||||
|
#define X0212_ENABLE
|
||||||
|
|
||||||
/* --exec-in, --exec-out $B%*%W%7%g%s(B
|
/* --exec-in, --exec-out $B%*%W%7%g%s(B
|
||||||
* pipe, fork, execvp $B$"$?$j$,L5$$$HF0$-$^$;$s!#(B
|
* pipe, fork, execvp $B$"$?$j$,L5$$$HF0$-$^$;$s!#(B
|
||||||
* MS-DOS, MinGW $B$J$I$G$O(B undef $B$K$7$F$/$@$5$$(B
|
* MS-DOS, MinGW $B$J$I$G$O(B undef $B$K$7$F$/$@$5$$(B
|
||||||
|
@ -40,6 +43,10 @@
|
||||||
*/
|
*/
|
||||||
/* #define INT_IS_SHORT */
|
/* #define INT_IS_SHORT */
|
||||||
|
|
||||||
|
/* Unicode Normalization */
|
||||||
|
#define UNICODE_NORMALIZATION
|
||||||
|
|
||||||
|
#ifndef WIN32DLL
|
||||||
/******************************/
|
/******************************/
|
||||||
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
|
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
|
||||||
/* Select DEFAULT_CODE */
|
/* Select DEFAULT_CODE */
|
||||||
|
@ -48,5 +55,25 @@
|
||||||
/* #define DEFAULT_CODE_EUC */
|
/* #define DEFAULT_CODE_EUC */
|
||||||
/* #define DEFAULT_CODE_UTF8 */
|
/* #define DEFAULT_CODE_UTF8 */
|
||||||
/******************************/
|
/******************************/
|
||||||
|
#else
|
||||||
|
#define DEFAULT_CODE_SJIS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(NUMCHAR_OPTION) && !defined(UTF8_INPUT_ENABLE)
|
||||||
|
#define UTF8_INPUT_ENABLE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef UNICODE_NORMALIZATION
|
||||||
|
#ifndef UTF8_INPUT_ENABLE
|
||||||
|
#define UTF8_INPUT_ENABLE
|
||||||
|
#endif
|
||||||
|
#define NORMALIZATION_TABLE_LENGTH 942
|
||||||
|
#define NORMALIZATION_TABLE_NFC_LENGTH 3
|
||||||
|
#define NORMALIZATION_TABLE_NFD_LENGTH 9
|
||||||
|
struct normalization_pair{
|
||||||
|
const int nfc[NORMALIZATION_TABLE_NFC_LENGTH];
|
||||||
|
const int nfd[NORMALIZATION_TABLE_NFD_LENGTH];
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _CONFIG_H_ */
|
#endif /* _CONFIG_H_ */
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -78,6 +78,54 @@ rb_nkf_putchar(c)
|
||||||
#include "nkf-utf8/utf8tbl.c"
|
#include "nkf-utf8/utf8tbl.c"
|
||||||
#include "nkf-utf8/nkf.c"
|
#include "nkf-utf8/nkf.c"
|
||||||
|
|
||||||
|
int nkf_split_options(arg)
|
||||||
|
const char* arg;
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
char option[256];
|
||||||
|
int i = 0, j = 0;
|
||||||
|
int is_escaped = FALSE;
|
||||||
|
int is_single_quoted = FALSE;
|
||||||
|
int is_double_quoted = FALSE;
|
||||||
|
for(i = 0; arg[i]; i++){
|
||||||
|
if(j == 255){
|
||||||
|
return -1;
|
||||||
|
}else if(is_single_quoted){
|
||||||
|
if(arg[i] == '\''){
|
||||||
|
is_single_quoted = FALSE;
|
||||||
|
}else{
|
||||||
|
option[j++] = arg[i];
|
||||||
|
}
|
||||||
|
}else if(is_escaped){
|
||||||
|
is_escaped = FALSE;
|
||||||
|
option[j++] = arg[i];
|
||||||
|
}else if(arg[i] == '\\'){
|
||||||
|
is_escaped = TRUE;
|
||||||
|
}else if(is_double_quoted){
|
||||||
|
if(arg[i] == '"'){
|
||||||
|
is_double_quoted = FALSE;
|
||||||
|
}else{
|
||||||
|
option[j++] = arg[i];
|
||||||
|
}
|
||||||
|
}else if(arg[i] == '\''){
|
||||||
|
is_single_quoted = TRUE;
|
||||||
|
}else if(arg[i] == '"'){
|
||||||
|
is_double_quoted = TRUE;
|
||||||
|
}else if(arg[i] == ' '){
|
||||||
|
option[j] = '\0';
|
||||||
|
options(option);
|
||||||
|
j = 0;
|
||||||
|
}else{
|
||||||
|
option[j++] = arg[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(j){
|
||||||
|
option[j] = '\0';
|
||||||
|
options(option);
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* NKF.nkf(opt, str) -> string
|
* NKF.nkf(opt, str) -> string
|
||||||
|
@ -104,7 +152,7 @@ rb_nkf_kconv(obj, opt, src)
|
||||||
StringValue(opt);
|
StringValue(opt);
|
||||||
opt_ptr = RSTRING(opt)->ptr;
|
opt_ptr = RSTRING(opt)->ptr;
|
||||||
opt_end = opt_ptr + RSTRING(opt)->len;
|
opt_end = opt_ptr + RSTRING(opt)->len;
|
||||||
options(opt_ptr);
|
nkf_split_options(opt_ptr);
|
||||||
|
|
||||||
incsize = INCSIZE;
|
incsize = INCSIZE;
|
||||||
|
|
||||||
|
@ -565,7 +613,9 @@ rb_nkf_guess2(obj, src)
|
||||||
void
|
void
|
||||||
Init_nkf()
|
Init_nkf()
|
||||||
{
|
{
|
||||||
|
/* hoge */
|
||||||
VALUE mKconv = rb_define_module("NKF");
|
VALUE mKconv = rb_define_module("NKF");
|
||||||
|
/* hoge */
|
||||||
|
|
||||||
rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
|
rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
|
||||||
rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1);
|
rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue