1998-01-16 07:13:05 -05:00
|
|
|
# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
|
|
|
|
|
2000-12-05 04:36:54 -05:00
|
|
|
if $VERBOSE && $KCODE == "NONE"
|
2003-06-02 00:49:46 -04:00
|
|
|
warn "Warning: $KCODE is NONE."
|
2000-12-05 04:36:54 -05:00
|
|
|
end
|
|
|
|
|
2000-07-10 00:49:24 -04:00
|
|
|
$vsave, $VERBOSE = $VERBOSE, false
|
1998-01-16 07:13:05 -05:00
|
|
|
class String
|
2003-06-02 00:49:46 -04:00
|
|
|
warn "feel free for some warnings:\n" if $VERBOSE
|
1998-01-16 07:13:05 -05:00
|
|
|
|
2000-09-19 03:54:28 -04:00
|
|
|
def _regex_quote(str)
|
2003-03-21 23:31:24 -05:00
|
|
|
str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do
|
2001-09-06 00:34:36 -04:00
|
|
|
$1 || $2 || '\\' + $3
|
|
|
|
end
|
2000-09-19 03:54:28 -04:00
|
|
|
end
|
|
|
|
private :_regex_quote
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
|
|
|
|
PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
|
|
|
|
PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
|
|
|
|
|
2003-03-24 04:38:37 -05:00
|
|
|
RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')
|
|
|
|
RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')
|
|
|
|
RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')
|
1999-11-04 03:39:57 -05:00
|
|
|
|
|
|
|
SUCC = {}
|
|
|
|
SUCC['s'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0x3f
|
|
|
|
SUCC['s'][i.chr] = 0x40 - i
|
|
|
|
end
|
|
|
|
SUCC['s']["\x7e"] = 0x80 - 0x7e
|
|
|
|
SUCC['s']["\xfd"] = 0x100 - 0xfd
|
|
|
|
SUCC['s']["\xfe"] = 0x100 - 0xfe
|
|
|
|
SUCC['s']["\xff"] = 0x100 - 0xff
|
|
|
|
SUCC['e'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0xa0
|
|
|
|
SUCC['e'][i.chr] = 0xa1 - i
|
|
|
|
end
|
|
|
|
SUCC['e']["\xfe"] = 2
|
|
|
|
SUCC['u'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0x7f
|
|
|
|
SUCC['u'][i.chr] = 0x80 - i
|
|
|
|
end
|
|
|
|
SUCC['u']["\xbf"] = 0x100 - 0xbf
|
1998-01-16 07:13:05 -05:00
|
|
|
|
1999-01-19 23:59:39 -05:00
|
|
|
def mbchar?
|
1999-08-13 01:45:20 -04:00
|
|
|
case $KCODE[0]
|
|
|
|
when ?s, ?S
|
1999-11-04 03:39:57 -05:00
|
|
|
self =~ RE_SJIS
|
1999-08-13 01:45:20 -04:00
|
|
|
when ?e, ?E
|
1999-11-04 03:39:57 -05:00
|
|
|
self =~ RE_EUC
|
|
|
|
when ?u, ?U
|
|
|
|
self =~ RE_UTF8
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-11-04 03:39:57 -05:00
|
|
|
nil
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def end_regexp
|
|
|
|
case $KCODE[0]
|
|
|
|
when ?s, ?S
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_SJIS}$/on
|
1999-11-04 03:39:57 -05:00
|
|
|
when ?e, ?E
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_EUC}$/on
|
1999-11-04 03:39:57 -05:00
|
|
|
when ?u, ?U
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_UTF8}$/on
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2003-03-23 10:38:44 -05:00
|
|
|
/.$/on
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
alias original_succ! succ!
|
|
|
|
private :original_succ!
|
1998-01-16 07:13:05 -05:00
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
alias original_succ succ
|
|
|
|
private :original_succ
|
|
|
|
|
|
|
|
def succ!
|
|
|
|
reg = end_regexp
|
2006-09-28 23:31:01 -04:00
|
|
|
if $KCODE != 'NONE' && self =~ reg
|
1999-11-04 03:39:57 -05:00
|
|
|
succ_table = SUCC[$KCODE[0,1].downcase]
|
2006-07-07 04:49:34 -04:00
|
|
|
last1 = self[-1].ord
|
|
|
|
last2 = self[-2].ord
|
1999-11-04 03:39:57 -05:00
|
|
|
begin
|
2006-07-07 04:49:34 -04:00
|
|
|
last1 += succ_table[last1]
|
|
|
|
last2 += 1 if last1 == 0
|
|
|
|
self[-2..-1] = [last2, last1].pack("C*")
|
1999-11-04 03:39:57 -05:00
|
|
|
end while self !~ reg
|
|
|
|
self
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-11-04 03:39:57 -05:00
|
|
|
original_succ!
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-11-04 03:39:57 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def succ
|
|
|
|
(str = self.dup).succ! or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
def _expand_ch str
|
1998-01-16 07:13:05 -05:00
|
|
|
a = []
|
2001-09-06 04:12:24 -04:00
|
|
|
str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
|
|
|
|
from = $1 || $2
|
|
|
|
to = $3 || $4
|
|
|
|
one = $5 || $6
|
|
|
|
if one
|
|
|
|
a.push one
|
|
|
|
elsif from.length != to.length
|
2001-09-06 00:34:36 -04:00
|
|
|
next
|
2001-09-06 04:12:24 -04:00
|
|
|
elsif from.length == 1
|
|
|
|
from[0].upto(to[0]) { |c| a.push c.chr }
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2001-09-06 04:12:24 -04:00
|
|
|
from.upto(to) { |c| a.push c }
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
a
|
|
|
|
end
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
def expand_ch_hash from, to
|
|
|
|
h = {}
|
|
|
|
afrom = _expand_ch(from)
|
|
|
|
ato = _expand_ch(to)
|
|
|
|
afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
|
|
|
|
h
|
|
|
|
end
|
|
|
|
|
|
|
|
HashCache = {}
|
|
|
|
TrPatternCache = {}
|
|
|
|
DeletePatternCache = {}
|
|
|
|
SqueezePatternCache = {}
|
|
|
|
|
|
|
|
public
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
def tr!(from, to)
|
2003-03-23 08:24:45 -05:00
|
|
|
return nil if from == ""
|
|
|
|
return self.delete!(from) if to == ""
|
1998-01-16 07:13:05 -05:00
|
|
|
|
2000-09-19 03:54:28 -04:00
|
|
|
pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
|
1999-08-13 01:45:20 -04:00
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2001-09-06 00:34:36 -04:00
|
|
|
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
|
1999-09-18 00:48:51 -04:00
|
|
|
self.gsub!(pattern) do |c| h[c] end
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr(from, to)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).tr!(from, to) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete!(del)
|
2003-03-20 01:27:22 -05:00
|
|
|
return nil if del == ""
|
2000-09-19 03:54:28 -04:00
|
|
|
self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete(del)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).delete!(del) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze!(del=nil)
|
2003-03-20 01:27:22 -05:00
|
|
|
return nil if del == ""
|
1999-08-13 01:45:20 -04:00
|
|
|
pattern =
|
|
|
|
if del
|
2000-09-19 03:54:28 -04:00
|
|
|
SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-08-13 01:45:20 -04:00
|
|
|
/(.|\n)\1+/
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(pattern, '\1')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze(del=nil)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).squeeze!(del) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s!(from, to)
|
|
|
|
return self.delete!(from) if to.length == 0
|
1999-08-13 01:45:20 -04:00
|
|
|
|
* sprintf.c (rb_str_format): allow %c to print one character
string (e.g. ?x).
* lib/tempfile.rb (Tempfile::make_tmpname): put dot between
basename and pid. [ruby-talk:196272]
* parse.y (do_block): remove -> style block.
* parse.y (parser_yylex): remove tLAMBDA_ARG.
* eval.c (rb_call0): binding for the return event hook should have
consistent scope. [ruby-core:07928]
* eval.c (proc_invoke): return behavior should depend whether it
is surrounded by a lambda or a mere block.
* eval.c (formal_assign): handles post splat arguments.
* eval.c (rb_call0): ditto.
* st.c (strhash): use FNV-1a hash.
* parse.y (parser_yylex): removed experimental ';;' terminator.
* eval.c (rb_node_arity): should be aware of post splat arguments.
* eval.c (rb_proc_arity): ditto.
* parse.y (f_args): syntax rule enhanced to support arguments
after the splat.
* parse.y (block_param): ditto for block parameters.
* parse.y (f_post_arg): mandatory formal arguments after the splat
argument.
* parse.y (new_args_gen): generate nodes for mandatory formal
arguments after the splat argument.
* eval.c (rb_eval): dispatch mandatory formal arguments after the
splat argument.
* parse.y (args): allow more than one splat in the argument list.
* parse.y (method_call): allow aref [] to accept all kind of
method argument, including assocs, splat, and block argument.
* eval.c (SETUP_ARGS0): prepare block argument as well.
* lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931]
* eval.c (error_line): print receivers true/false/nil specially.
* eval.c (rb_proc_yield): handles parameters in yield semantics.
* eval.c (nil_yield): gives LocalJumpError to denote no block
error.
* io.c (rb_io_getc): now takes one-character string.
* string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo
hashing algorithm.
* string.c (rb_str_aref): str[0] now returns 1 character string,
instead of a fixnum. [Ruby2]
* parse.y (parser_yylex): ?c now returns 1 character string,
instead of a fixnum. [Ruby2]
* string.c (rb_str_aset): no longer support fixnum insertion.
* eval.c (umethod_bind): should not update original class.
[ruby-dev:28636]
* eval.c (ev_const_get): should support constant access from
within instance_eval(). [ruby-dev:28327]
* time.c (time_timeval): should round for usec floating
number. [ruby-core:07896]
* time.c (time_add): ditto.
* dir.c (sys_warning): should not call a vararg function
rb_sys_warning() indirectly. [ruby-core:07886]
* numeric.c (flo_divmod): the first element of Float#divmod should
be an integer. [ruby-dev:28589]
* test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder.
* re.c (rb_reg_initialize): should not allow modifying literal
regexps. frozen check moved from rb_reg_initialize_m as well.
* re.c (rb_reg_initialize): should not modify untainted objects in
safe levels higher than 3.
* re.c (rb_memcmp): type change from char* to const void*.
* dir.c (dir_close): should not close untainted dir stream.
* dir.c (GetDIR): add tainted/frozen check for each dir operation.
* lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg):
typo fixed. a patch from Florian Gross <florg at florg.net>.
* eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from
event_hooks. no guarantee for arbitrary hook deletion.
[ruby-dev:28632]
* util.c (ruby_strtod): differ addition to minimize error.
[ruby-dev:28619]
* util.c (ruby_strtod): should not raise ERANGE when the input
string does not have any digits. [ruby-dev:28629]
* eval.c (proc_invoke): should restore old ruby_frame->block.
thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833]
also fix [ruby-dev:28614] as well.
* signal.c (trap): sig should be less then NSIG. Coverity found
this bug. a patch from Kevin Tew <tewk at tewk.com>.
[ruby-core:07823]
* math.c (math_log2): add new method inspired by
[ruby-talk:191237].
* math.c (math_log): add optional base argument to Math::log().
[ruby-talk:191308]
* ext/syck/emitter.c (syck_scan_scalar): avoid accessing
uninitialized array element. a patch from Pat Eyler
<rubypate at gmail.com>. [ruby-core:07809]
* array.c (rb_ary_fill): initialize local variables first. a
patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810]
* ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free
type_tag. a patch from Pat Eyler <rubypate at gmail.com>.
[ruby-core:07808]
* ext/socket/socket.c (make_hostent_internal): accept ai_family
check from Sam Roberts <sroberts at uniserve.com>.
[ruby-core:07691]
* util.c (ruby_strtod): should not cut off 18 digits for no
reason. [ruby-core:07796]
* array.c (rb_ary_fill): internalize local variable "beg" to
pacify Coverity. [ruby-core:07770]
* pack.c (pack_unpack): now supports CRLF newlines. a patch from
<tommy at tmtm.org>. [ruby-dev:28601]
* applied code clean-up patch from Stefan Huehner
<stefan at huehner.org>. [ruby-core:07764]
* lib/jcode.rb (String::tr_s): should have translated non
squeezing character sequence (i.e. a character) as well. thanks
to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090]
* ext/socket/socket.c: document update patch from Sam Roberts
<sroberts at uniserve.com>. [ruby-core:07701]
* lib/mathn.rb (Integer): need not to remove gcd2. a patch from
NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570]
* parse.y (arg): too much NEW_LIST()
* eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen.
* eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1.
[ruby-dev:28585]
* parse.y (arg): use NODE_ARGSCAT for placeholder.
* lib/getoptlong.rb (GetoptLong::get): RDoc update patch from
mathew <meta at pobox.com>. [ruby-core:07738]
* variable.c (rb_const_set): raise error when no target klass is
supplied. [ruby-dev:28582]
* prec.c (prec_prec_f): documentation patch from
<gerardo.santana at gmail.com>. [ruby-core:07689]
* bignum.c (rb_big_pow): second operand may be too big even if
it's a Fixnum. [ruby-talk:187984]
* README.EXT: update symbol description. [ruby-talk:188104]
* COPYING: explicitly note GPLv2. [ruby-talk:187922]
* parse.y: remove some obsolete syntax rules (unparenthesized
method calls in argument list).
* eval.c (rb_call0): insecure calling should be checked for non
NODE_SCOPE method invocations too.
* eval.c (rb_alias): should preserve the current safe level as
well as method definition.
* process.c (rb_f_sleep): remove RDoc description about SIGALRM
which is not valid on the current implementation. [ruby-dev:28464]
Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* eval.c (method_missing): should support argument splat in
super. a bug in combination of super, splat and
method_missing. [ruby-talk:185438]
* configure.in: Solaris SunPro compiler -rapth patch from
<kuwa at labs.fujitsu.com>. [ruby-dev:28443]
* configure.in: remove enable_rpath=no for Solaris.
[ruby-dev:28440]
* ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior
of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby
String object.
* ruby.1: a clarification patch from David Lutterkort
<dlutter at redhat.com>. [ruby-core:7508]
* lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems
directories. a patch from Eric Hodel <drbrain at segment7.net>.
[ruby-core:07423]
* eval.c (rb_clear_cache_by_class): clearing wrong cache.
* ext/extmk.rb: use :remove_destination to install extension libraries
to avoid SEGV. [ruby-dev:28417]
* eval.c (rb_thread_fd_writable): should not re-schedule output
from KILLED thread (must be error printing).
* array.c (rb_ary_flatten_bang): allow specifying recursion
level. [ruby-talk:182170]
* array.c (rb_ary_flatten): ditto.
* gc.c (add_heap): a heap_slots may overflow. a patch from Stefan
Weil <weil at mail.berlios.de>.
* eval.c (rb_call): use separate cache for fcall/vcall
invocation.
* eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local
functions.
* eval.c (rb_mod_local): a new method to specify newly added
visibility "local".
* eval.c (search_method): search for local methods which are
visible only from the current class.
* class.c (rb_class_local_methods): a method to list local methods.
* object.c (Init_Object): add BasicObject class as a top level
BlankSlate class.
* ruby.h (SYM2ID): should not cast to signed long.
[ruby-core:07414]
* class.c (rb_include_module): allow module duplication.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-09 17:20:17 -04:00
|
|
|
pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/
|
1999-08-13 01:45:20 -04:00
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
|
|
|
else
|
2001-09-06 00:34:36 -04:00
|
|
|
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(pattern) do h[$1] end
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s(from, to)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).tr_s!(from,to) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
2005-11-15 02:16:46 -05:00
|
|
|
def reverse
|
|
|
|
self.split(//).reverse.join
|
|
|
|
end
|
|
|
|
|
|
|
|
def reverse!
|
|
|
|
self.replace(self.reverse)
|
|
|
|
self
|
|
|
|
end
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
def chop!
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(/(?:.|\r?\n)\z/, '')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def chop
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).chop! or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-08-13 01:45:20 -04:00
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def jlength
|
|
|
|
self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
|
|
|
|
end
|
|
|
|
alias jsize jlength
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
def jcount(str)
|
|
|
|
self.delete("^#{str}").jlength
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def each_char
|
2000-06-28 04:31:35 -04:00
|
|
|
if block_given?
|
2000-06-12 03:48:31 -04:00
|
|
|
scan(/./m) do |x|
|
1999-11-04 03:39:57 -05:00
|
|
|
yield x
|
|
|
|
end
|
|
|
|
else
|
2000-06-12 03:48:31 -04:00
|
|
|
scan(/./m)
|
1999-11-04 03:39:57 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
$VERBOSE = $vsave
|