ruby--ruby/lib/jcode.rb

232 lines
4.5 KiB
Ruby
Raw Normal View History

# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
if $VERBOSE && $KCODE == "NONE"
warn "Warning: $KCODE is NONE."
end
$vsave, $VERBOSE = $VERBOSE, false
class String
warn "feel free for some warnings:\n" if $VERBOSE
def _regex_quote(str)
str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do
$1 || $2 || '\\' + $3
end
end
private :_regex_quote
PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')
RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')
RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')
SUCC = {}
SUCC['s'] = Hash.new(1)
for i in 0 .. 0x3f
SUCC['s'][i.chr] = 0x40 - i
end
SUCC['s']["\x7e"] = 0x80 - 0x7e
SUCC['s']["\xfd"] = 0x100 - 0xfd
SUCC['s']["\xfe"] = 0x100 - 0xfe
SUCC['s']["\xff"] = 0x100 - 0xff
SUCC['e'] = Hash.new(1)
for i in 0 .. 0xa0
SUCC['e'][i.chr] = 0xa1 - i
end
SUCC['e']["\xfe"] = 2
SUCC['u'] = Hash.new(1)
for i in 0 .. 0x7f
SUCC['u'][i.chr] = 0x80 - i
end
SUCC['u']["\xbf"] = 0x100 - 0xbf
def mbchar?
case $KCODE[0]
when ?s, ?S
self =~ RE_SJIS
when ?e, ?E
self =~ RE_EUC
when ?u, ?U
self =~ RE_UTF8
else
nil
end
end
def end_regexp
case $KCODE[0]
when ?s, ?S
/#{PATTERN_SJIS}$/on
when ?e, ?E
/#{PATTERN_EUC}$/on
when ?u, ?U
/#{PATTERN_UTF8}$/on
else
/.$/on
end
end
alias original_succ! succ!
private :original_succ!
alias original_succ succ
private :original_succ
def succ!
reg = end_regexp
if $KCODE != 'NONE' && self =~ reg
succ_table = SUCC[$KCODE[0,1].downcase]
last1 = self[-1].ord
last2 = self[-2].ord
begin
last1 += succ_table[last1]
last2 += 1 if last1 == 0
self[-2..-1] = [last2, last1].pack("C*")
end while self !~ reg
self
else
original_succ!
end
end
def succ
(str = self.dup).succ! or str
end
private
def _expand_ch str
a = []
str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
from = $1 || $2
to = $3 || $4
one = $5 || $6
if one
a.push one
elsif from.length != to.length
next
elsif from.length == 1
from[0].upto(to[0]) { |c| a.push c.chr }
else
from.upto(to) { |c| a.push c }
end
end
a
end
def expand_ch_hash from, to
h = {}
afrom = _expand_ch(from)
ato = _expand_ch(to)
afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
h
end
HashCache = {}
TrPatternCache = {}
DeletePatternCache = {}
SqueezePatternCache = {}
public
def tr!(from, to)
return nil if from == ""
return self.delete!(from) if to == ""
pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
if from[0] == ?^
last = /.$/.match(to)[0]
self.gsub!(pattern, last)
else
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
self.gsub!(pattern) do |c| h[c] end
end
end
def tr(from, to)
(str = self.dup).tr!(from, to) or str
end
def delete!(del)
return nil if del == ""
self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
end
def delete(del)
(str = self.dup).delete!(del) or str
end
def squeeze!(del=nil)
return nil if del == ""
pattern =
if del
SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
else
/(.|\n)\1+/
end
self.gsub!(pattern, '\1')
end
def squeeze(del=nil)
(str = self.dup).squeeze!(del) or str
end
def tr_s!(from, to)
return self.delete!(from) if to.length == 0
* sprintf.c (rb_str_format): allow %c to print one character string (e.g. ?x). * lib/tempfile.rb (Tempfile::make_tmpname): put dot between basename and pid. [ruby-talk:196272] * parse.y (do_block): remove -> style block. * parse.y (parser_yylex): remove tLAMBDA_ARG. * eval.c (rb_call0): binding for the return event hook should have consistent scope. [ruby-core:07928] * eval.c (proc_invoke): return behavior should depend whether it is surrounded by a lambda or a mere block. * eval.c (formal_assign): handles post splat arguments. * eval.c (rb_call0): ditto. * st.c (strhash): use FNV-1a hash. * parse.y (parser_yylex): removed experimental ';;' terminator. * eval.c (rb_node_arity): should be aware of post splat arguments. * eval.c (rb_proc_arity): ditto. * parse.y (f_args): syntax rule enhanced to support arguments after the splat. * parse.y (block_param): ditto for block parameters. * parse.y (f_post_arg): mandatory formal arguments after the splat argument. * parse.y (new_args_gen): generate nodes for mandatory formal arguments after the splat argument. * eval.c (rb_eval): dispatch mandatory formal arguments after the splat argument. * parse.y (args): allow more than one splat in the argument list. * parse.y (method_call): allow aref [] to accept all kind of method argument, including assocs, splat, and block argument. * eval.c (SETUP_ARGS0): prepare block argument as well. * lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931] * eval.c (error_line): print receivers true/false/nil specially. * eval.c (rb_proc_yield): handles parameters in yield semantics. * eval.c (nil_yield): gives LocalJumpError to denote no block error. * io.c (rb_io_getc): now takes one-character string. * string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo hashing algorithm. * string.c (rb_str_aref): str[0] now returns 1 character string, instead of a fixnum. [Ruby2] * parse.y (parser_yylex): ?c now returns 1 character string, instead of a fixnum. [Ruby2] * string.c (rb_str_aset): no longer support fixnum insertion. * eval.c (umethod_bind): should not update original class. [ruby-dev:28636] * eval.c (ev_const_get): should support constant access from within instance_eval(). [ruby-dev:28327] * time.c (time_timeval): should round for usec floating number. [ruby-core:07896] * time.c (time_add): ditto. * dir.c (sys_warning): should not call a vararg function rb_sys_warning() indirectly. [ruby-core:07886] * numeric.c (flo_divmod): the first element of Float#divmod should be an integer. [ruby-dev:28589] * test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder. * re.c (rb_reg_initialize): should not allow modifying literal regexps. frozen check moved from rb_reg_initialize_m as well. * re.c (rb_reg_initialize): should not modify untainted objects in safe levels higher than 3. * re.c (rb_memcmp): type change from char* to const void*. * dir.c (dir_close): should not close untainted dir stream. * dir.c (GetDIR): add tainted/frozen check for each dir operation. * lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg): typo fixed. a patch from Florian Gross <florg at florg.net>. * eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from event_hooks. no guarantee for arbitrary hook deletion. [ruby-dev:28632] * util.c (ruby_strtod): differ addition to minimize error. [ruby-dev:28619] * util.c (ruby_strtod): should not raise ERANGE when the input string does not have any digits. [ruby-dev:28629] * eval.c (proc_invoke): should restore old ruby_frame->block. thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833] also fix [ruby-dev:28614] as well. * signal.c (trap): sig should be less then NSIG. Coverity found this bug. a patch from Kevin Tew <tewk at tewk.com>. [ruby-core:07823] * math.c (math_log2): add new method inspired by [ruby-talk:191237]. * math.c (math_log): add optional base argument to Math::log(). [ruby-talk:191308] * ext/syck/emitter.c (syck_scan_scalar): avoid accessing uninitialized array element. a patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07809] * array.c (rb_ary_fill): initialize local variables first. a patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810] * ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free type_tag. a patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07808] * ext/socket/socket.c (make_hostent_internal): accept ai_family check from Sam Roberts <sroberts at uniserve.com>. [ruby-core:07691] * util.c (ruby_strtod): should not cut off 18 digits for no reason. [ruby-core:07796] * array.c (rb_ary_fill): internalize local variable "beg" to pacify Coverity. [ruby-core:07770] * pack.c (pack_unpack): now supports CRLF newlines. a patch from <tommy at tmtm.org>. [ruby-dev:28601] * applied code clean-up patch from Stefan Huehner <stefan at huehner.org>. [ruby-core:07764] * lib/jcode.rb (String::tr_s): should have translated non squeezing character sequence (i.e. a character) as well. thanks to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090] * ext/socket/socket.c: document update patch from Sam Roberts <sroberts at uniserve.com>. [ruby-core:07701] * lib/mathn.rb (Integer): need not to remove gcd2. a patch from NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570] * parse.y (arg): too much NEW_LIST() * eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen. * eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1. [ruby-dev:28585] * parse.y (arg): use NODE_ARGSCAT for placeholder. * lib/getoptlong.rb (GetoptLong::get): RDoc update patch from mathew <meta at pobox.com>. [ruby-core:07738] * variable.c (rb_const_set): raise error when no target klass is supplied. [ruby-dev:28582] * prec.c (prec_prec_f): documentation patch from <gerardo.santana at gmail.com>. [ruby-core:07689] * bignum.c (rb_big_pow): second operand may be too big even if it's a Fixnum. [ruby-talk:187984] * README.EXT: update symbol description. [ruby-talk:188104] * COPYING: explicitly note GPLv2. [ruby-talk:187922] * parse.y: remove some obsolete syntax rules (unparenthesized method calls in argument list). * eval.c (rb_call0): insecure calling should be checked for non NODE_SCOPE method invocations too. * eval.c (rb_alias): should preserve the current safe level as well as method definition. * process.c (rb_f_sleep): remove RDoc description about SIGALRM which is not valid on the current implementation. [ruby-dev:28464] Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp> * eval.c (method_missing): should support argument splat in super. a bug in combination of super, splat and method_missing. [ruby-talk:185438] * configure.in: Solaris SunPro compiler -rapth patch from <kuwa at labs.fujitsu.com>. [ruby-dev:28443] * configure.in: remove enable_rpath=no for Solaris. [ruby-dev:28440] * ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby String object. * ruby.1: a clarification patch from David Lutterkort <dlutter at redhat.com>. [ruby-core:7508] * lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems directories. a patch from Eric Hodel <drbrain at segment7.net>. [ruby-core:07423] * eval.c (rb_clear_cache_by_class): clearing wrong cache. * ext/extmk.rb: use :remove_destination to install extension libraries to avoid SEGV. [ruby-dev:28417] * eval.c (rb_thread_fd_writable): should not re-schedule output from KILLED thread (must be error printing). * array.c (rb_ary_flatten_bang): allow specifying recursion level. [ruby-talk:182170] * array.c (rb_ary_flatten): ditto. * gc.c (add_heap): a heap_slots may overflow. a patch from Stefan Weil <weil at mail.berlios.de>. * eval.c (rb_call): use separate cache for fcall/vcall invocation. * eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local functions. * eval.c (rb_mod_local): a new method to specify newly added visibility "local". * eval.c (search_method): search for local methods which are visible only from the current class. * class.c (rb_class_local_methods): a method to list local methods. * object.c (Init_Object): add BasicObject class as a top level BlankSlate class. * ruby.h (SYM2ID): should not cast to signed long. [ruby-core:07414] * class.c (rb_include_module): allow module duplication. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-09 21:20:17 +00:00
pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/
if from[0] == ?^
last = /.$/.match(to)[0]
self.gsub!(pattern, last)
else
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
self.gsub!(pattern) do h[$1] end
end
end
def tr_s(from, to)
(str = self.dup).tr_s!(from,to) or str
end
def reverse
self.split(//).reverse.join
end
def reverse!
self.replace(self.reverse)
self
end
def chop!
self.gsub!(/(?:.|\r?\n)\z/, '')
end
def chop
(str = self.dup).chop! or str
end
def jlength
self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
end
alias jsize jlength
def jcount(str)
self.delete("^#{str}").jlength
end
def each_char
if block_given?
scan(/./m) do |x|
yield x
end
else
scan(/./m)
end
end
end
$VERBOSE = $vsave