1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c, config.h}: imported nkf 2.0.6.

* Add --ic / --oc option and mapping tables.
  * Add fallback option.
  * Add --no-best-fit-chars option.
  * Fix some bugs.

* ext/nkf/nkf.c (nkf_split_options): added for parse option string.

* ext/nkf/lib/kconv.rb (Kconv.to*): add -m0.
  Note that Kconv.to* still imply -X.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@10055 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2006-03-27 13:28:15 +00:00
parent 15c3ddd0ff
commit bb1e80fd4e
6 changed files with 7379 additions and 3266 deletions

View file

@ -1,3 +1,16 @@
Mon Mar 27 22:19:09 2006 NARUSE, Yui <naruse@ruby-lang.org>
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c, config.h}: imported nkf 2.0.6.
* Add --ic / --oc option and mapping tables.
* Add fallback option.
* Add --no-best-fit-chars option.
* Fix some bugs.
* ext/nkf/nkf.c (nkf_split_options): added for parse option string.
* ext/nkf/lib/kconv.rb (Kconv.to*): add -m0.
Note that Kconv.to* still imply -X.
Fri Mar 24 17:20:03 2006 Yukihiro Matsumoto <matz@ruby-lang.org> Fri Mar 24 17:20:03 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* process.c (rb_f_sleep): remove description about SIGALRM which * process.c (rb_f_sleep): remove description about SIGALRM which

View file

@ -130,11 +130,10 @@ module Kconv
# Convert <code>str</code> to ISO-2022-JP # Convert <code>str</code> to ISO-2022-JP
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-jxm0', str). # If you don't want it, use NKF.nkf('-jxm0', str).
def tojis(str) def tojis(str)
::NKF::nkf('-j', str) ::NKF::nkf('-jm0', str)
end end
module_function :tojis module_function :tojis
@ -144,11 +143,10 @@ module Kconv
# Convert <code>str</code> to EUC-JP # Convert <code>str</code> to EUC-JP
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-exm0', str). # If you don't want it, use NKF.nkf('-exm0', str).
def toeuc(str) def toeuc(str)
::NKF::nkf('-e', str) ::NKF::nkf('-em0', str)
end end
module_function :toeuc module_function :toeuc
@ -158,11 +156,10 @@ module Kconv
# Convert <code>str</code> to Shift_JIS # Convert <code>str</code> to Shift_JIS
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-sxm0', str). # If you don't want it, use NKF.nkf('-sxm0', str).
def tosjis(str) def tosjis(str)
::NKF::nkf('-s', str) ::NKF::nkf('-sm0', str)
end end
module_function :tosjis module_function :tosjis
@ -172,11 +169,10 @@ module Kconv
# Convert <code>str</code> to UTF-8 # Convert <code>str</code> to UTF-8
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-wxm0', str). # If you don't want it, use NKF.nkf('-wxm0', str).
def toutf8(str) def toutf8(str)
::NKF::nkf('-w', str) ::NKF::nkf('-wm0', str)
end end
module_function :toutf8 module_function :toutf8
@ -186,11 +182,10 @@ module Kconv
# Convert <code>str</code> to UTF-16 # Convert <code>str</code> to UTF-16
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-w16xm0', str). # If you don't want it, use NKF.nkf('-w16xm0', str).
def toutf16(str) def toutf16(str)
::NKF::nkf('-w16', str) ::NKF::nkf('-w16m0', str)
end end
module_function :toutf16 module_function :toutf16
@ -263,8 +258,7 @@ class String
# <code>out_code</code> and <code>in_code</code> are given as constants of Kconv. # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want to decode them, use NKF.nkf. # If you don't want to decode them, use NKF.nkf.
def kconv(out_code, in_code=Kconv::AUTO) def kconv(out_code, in_code=Kconv::AUTO)
Kconv::kconv(self, out_code, in_code) Kconv::kconv(self, out_code, in_code)
@ -280,8 +274,7 @@ class String
# Convert <code>self</code> to ISO-2022-JP # Convert <code>self</code> to ISO-2022-JP
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-jxm0', str). # If you don't want it, use NKF.nkf('-jxm0', str).
def tojis; Kconv.tojis(self) end def tojis; Kconv.tojis(self) end
@ -291,8 +284,7 @@ class String
# Convert <code>self</code> to EUC-JP # Convert <code>self</code> to EUC-JP
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-exm0', str). # If you don't want it, use NKF.nkf('-exm0', str).
def toeuc; Kconv.toeuc(self) end def toeuc; Kconv.toeuc(self) end
@ -302,8 +294,7 @@ class String
# Convert <code>self</code> to Shift_JIS # Convert <code>self</code> to Shift_JIS
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-sxm0', str). # If you don't want it, use NKF.nkf('-sxm0', str).
def tosjis; Kconv.tosjis(self) end def tosjis; Kconv.tosjis(self) end
@ -313,8 +304,7 @@ class String
# Convert <code>self</code> to UTF-8 # Convert <code>self</code> to UTF-8
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-wxm0', str). # If you don't want it, use NKF.nkf('-wxm0', str).
def toutf8; Kconv.toutf8(self) end def toutf8; Kconv.toutf8(self) end
@ -324,8 +314,7 @@ class String
# Convert <code>self</code> to UTF-16 # Convert <code>self</code> to UTF-16
# #
# *Note* # *Note*
# This method decode MIME encoded string and # This method convert halfwidth katakana to fullwidth katakana.
# convert halfwidth katakana to fullwidth katakana.
# If you don't want it, use NKF.nkf('-w16xm0', str). # If you don't want it, use NKF.nkf('-w16xm0', str).
def toutf16; Kconv.toutf16(self) end def toutf16; Kconv.toutf16(self) end

View file

@ -24,6 +24,9 @@
/* --debug, --no-output $B%*%W%7%g%s(B */ /* --debug, --no-output $B%*%W%7%g%s(B */
#define CHECK_OPTION #define CHECK_OPTION
/* JIS X0212 */
#define X0212_ENABLE
/* --exec-in, --exec-out $B%*%W%7%g%s(B /* --exec-in, --exec-out $B%*%W%7%g%s(B
* pipe, fork, execvp $B$"$?$j$,L5$$$HF0$-$^$;$s!#(B * pipe, fork, execvp $B$"$?$j$,L5$$$HF0$-$^$;$s!#(B
* MS-DOS, MinGW $B$J$I$G$O(B undef $B$K$7$F$/$@$5$$(B * MS-DOS, MinGW $B$J$I$G$O(B undef $B$K$7$F$/$@$5$$(B
@ -40,6 +43,10 @@
*/ */
/* #define INT_IS_SHORT */ /* #define INT_IS_SHORT */
/* Unicode Normalization */
#define UNICODE_NORMALIZATION
#ifndef WIN32DLL
/******************************/ /******************************/
/* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */ /* $B%G%U%)%k%H$N=PNO%3!<%IA*Br(B */
/* Select DEFAULT_CODE */ /* Select DEFAULT_CODE */
@ -48,5 +55,25 @@
/* #define DEFAULT_CODE_EUC */ /* #define DEFAULT_CODE_EUC */
/* #define DEFAULT_CODE_UTF8 */ /* #define DEFAULT_CODE_UTF8 */
/******************************/ /******************************/
#else
#define DEFAULT_CODE_SJIS
#endif
#if defined(NUMCHAR_OPTION) && !defined(UTF8_INPUT_ENABLE)
#define UTF8_INPUT_ENABLE
#endif
#ifdef UNICODE_NORMALIZATION
#ifndef UTF8_INPUT_ENABLE
#define UTF8_INPUT_ENABLE
#endif
#define NORMALIZATION_TABLE_LENGTH 942
#define NORMALIZATION_TABLE_NFC_LENGTH 3
#define NORMALIZATION_TABLE_NFD_LENGTH 9
struct normalization_pair{
const int nfc[NORMALIZATION_TABLE_NFC_LENGTH];
const int nfd[NORMALIZATION_TABLE_NFD_LENGTH];
};
#endif
#endif /* _CONFIG_H_ */ #endif /* _CONFIG_H_ */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -78,6 +78,54 @@ rb_nkf_putchar(c)
#include "nkf-utf8/utf8tbl.c" #include "nkf-utf8/utf8tbl.c"
#include "nkf-utf8/nkf.c" #include "nkf-utf8/nkf.c"
int nkf_split_options(arg)
const char* arg;
{
int count = 0;
char option[256];
int i = 0, j = 0;
int is_escaped = FALSE;
int is_single_quoted = FALSE;
int is_double_quoted = FALSE;
for(i = 0; arg[i]; i++){
if(j == 255){
return -1;
}else if(is_single_quoted){
if(arg[i] == '\''){
is_single_quoted = FALSE;
}else{
option[j++] = arg[i];
}
}else if(is_escaped){
is_escaped = FALSE;
option[j++] = arg[i];
}else if(arg[i] == '\\'){
is_escaped = TRUE;
}else if(is_double_quoted){
if(arg[i] == '"'){
is_double_quoted = FALSE;
}else{
option[j++] = arg[i];
}
}else if(arg[i] == '\''){
is_single_quoted = TRUE;
}else if(arg[i] == '"'){
is_double_quoted = TRUE;
}else if(arg[i] == ' '){
option[j] = '\0';
options(option);
j = 0;
}else{
option[j++] = arg[i];
}
}
if(j){
option[j] = '\0';
options(option);
}
return count;
}
/* /*
* call-seq: * call-seq:
* NKF.nkf(opt, str) -> string * NKF.nkf(opt, str) -> string
@ -104,7 +152,7 @@ rb_nkf_kconv(obj, opt, src)
StringValue(opt); StringValue(opt);
opt_ptr = RSTRING(opt)->ptr; opt_ptr = RSTRING(opt)->ptr;
opt_end = opt_ptr + RSTRING(opt)->len; opt_end = opt_ptr + RSTRING(opt)->len;
options(opt_ptr); nkf_split_options(opt_ptr);
incsize = INCSIZE; incsize = INCSIZE;
@ -565,7 +613,9 @@ rb_nkf_guess2(obj, src)
void void
Init_nkf() Init_nkf()
{ {
/* hoge */
VALUE mKconv = rb_define_module("NKF"); VALUE mKconv = rb_define_module("NKF");
/* hoge */
rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2); rb_define_module_function(mKconv, "nkf", rb_nkf_kconv, 2);
rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1); rb_define_module_function(mKconv, "guess1", rb_nkf_guess1, 1);