1998-01-16 07:13:05 -05:00
|
|
|
# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
|
|
|
|
|
2000-12-05 04:36:54 -05:00
|
|
|
if $VERBOSE && $KCODE == "NONE"
|
|
|
|
STDERR.puts "Warning: $KCODE is NONE."
|
|
|
|
end
|
|
|
|
|
2000-07-10 00:49:24 -04:00
|
|
|
$vsave, $VERBOSE = $VERBOSE, false
|
1998-01-16 07:13:05 -05:00
|
|
|
class String
|
|
|
|
printf STDERR, "feel free for some warnings:\n" if $VERBOSE
|
|
|
|
|
2000-09-19 03:54:28 -04:00
|
|
|
def _regex_quote(str)
|
2003-03-21 23:31:24 -05:00
|
|
|
str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do
|
2001-09-06 00:34:36 -04:00
|
|
|
$1 || $2 || '\\' + $3
|
|
|
|
end
|
2000-09-19 03:54:28 -04:00
|
|
|
end
|
|
|
|
private :_regex_quote
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
|
|
|
|
PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
|
|
|
|
PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
|
|
|
|
|
2003-03-24 04:38:37 -05:00
|
|
|
RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')
|
|
|
|
RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')
|
|
|
|
RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')
|
1999-11-04 03:39:57 -05:00
|
|
|
|
|
|
|
SUCC = {}
|
|
|
|
SUCC['s'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0x3f
|
|
|
|
SUCC['s'][i.chr] = 0x40 - i
|
|
|
|
end
|
|
|
|
SUCC['s']["\x7e"] = 0x80 - 0x7e
|
|
|
|
SUCC['s']["\xfd"] = 0x100 - 0xfd
|
|
|
|
SUCC['s']["\xfe"] = 0x100 - 0xfe
|
|
|
|
SUCC['s']["\xff"] = 0x100 - 0xff
|
|
|
|
SUCC['e'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0xa0
|
|
|
|
SUCC['e'][i.chr] = 0xa1 - i
|
|
|
|
end
|
|
|
|
SUCC['e']["\xfe"] = 2
|
|
|
|
SUCC['u'] = Hash.new(1)
|
|
|
|
for i in 0 .. 0x7f
|
|
|
|
SUCC['u'][i.chr] = 0x80 - i
|
|
|
|
end
|
|
|
|
SUCC['u']["\xbf"] = 0x100 - 0xbf
|
1998-01-16 07:13:05 -05:00
|
|
|
|
1999-01-19 23:59:39 -05:00
|
|
|
def mbchar?
|
1999-08-13 01:45:20 -04:00
|
|
|
case $KCODE[0]
|
|
|
|
when ?s, ?S
|
1999-11-04 03:39:57 -05:00
|
|
|
self =~ RE_SJIS
|
1999-08-13 01:45:20 -04:00
|
|
|
when ?e, ?E
|
1999-11-04 03:39:57 -05:00
|
|
|
self =~ RE_EUC
|
|
|
|
when ?u, ?U
|
|
|
|
self =~ RE_UTF8
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-11-04 03:39:57 -05:00
|
|
|
nil
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def end_regexp
|
|
|
|
case $KCODE[0]
|
|
|
|
when ?s, ?S
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_SJIS}$/on
|
1999-11-04 03:39:57 -05:00
|
|
|
when ?e, ?E
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_EUC}$/on
|
1999-11-04 03:39:57 -05:00
|
|
|
when ?u, ?U
|
2003-03-23 10:38:44 -05:00
|
|
|
/#{PATTERN_UTF8}$/on
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2003-03-23 10:38:44 -05:00
|
|
|
/.$/on
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
alias original_succ! succ!
|
|
|
|
private :original_succ!
|
1998-01-16 07:13:05 -05:00
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
alias original_succ succ
|
|
|
|
private :original_succ
|
|
|
|
|
|
|
|
def succ!
|
|
|
|
reg = end_regexp
|
|
|
|
if self =~ reg
|
|
|
|
succ_table = SUCC[$KCODE[0,1].downcase]
|
|
|
|
begin
|
|
|
|
self[-1] += succ_table[self[-1]]
|
|
|
|
self[-2] += 1 if self[-1] == 0
|
|
|
|
end while self !~ reg
|
|
|
|
self
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-11-04 03:39:57 -05:00
|
|
|
original_succ!
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-11-04 03:39:57 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def succ
|
|
|
|
(str = self.dup).succ! or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
def _expand_ch str
|
1998-01-16 07:13:05 -05:00
|
|
|
a = []
|
2001-09-06 04:12:24 -04:00
|
|
|
str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
|
|
|
|
from = $1 || $2
|
|
|
|
to = $3 || $4
|
|
|
|
one = $5 || $6
|
|
|
|
if one
|
|
|
|
a.push one
|
|
|
|
elsif from.length != to.length
|
2001-09-06 00:34:36 -04:00
|
|
|
next
|
2001-09-06 04:12:24 -04:00
|
|
|
elsif from.length == 1
|
|
|
|
from[0].upto(to[0]) { |c| a.push c.chr }
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2001-09-06 04:12:24 -04:00
|
|
|
from.upto(to) { |c| a.push c }
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
a
|
|
|
|
end
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
def expand_ch_hash from, to
|
|
|
|
h = {}
|
|
|
|
afrom = _expand_ch(from)
|
|
|
|
ato = _expand_ch(to)
|
|
|
|
afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
|
|
|
|
h
|
|
|
|
end
|
|
|
|
|
|
|
|
HashCache = {}
|
|
|
|
TrPatternCache = {}
|
|
|
|
DeletePatternCache = {}
|
|
|
|
SqueezePatternCache = {}
|
|
|
|
|
|
|
|
public
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
def tr!(from, to)
|
2003-03-23 08:24:45 -05:00
|
|
|
return nil if from == ""
|
|
|
|
return self.delete!(from) if to == ""
|
1998-01-16 07:13:05 -05:00
|
|
|
|
2000-09-19 03:54:28 -04:00
|
|
|
pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
|
1999-08-13 01:45:20 -04:00
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
2001-09-06 00:34:36 -04:00
|
|
|
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
|
1999-09-18 00:48:51 -04:00
|
|
|
self.gsub!(pattern) do |c| h[c] end
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr(from, to)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).tr!(from, to) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete!(del)
|
2003-03-20 01:27:22 -05:00
|
|
|
return nil if del == ""
|
2000-09-19 03:54:28 -04:00
|
|
|
self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def delete(del)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).delete!(del) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze!(del=nil)
|
2003-03-20 01:27:22 -05:00
|
|
|
return nil if del == ""
|
1999-08-13 01:45:20 -04:00
|
|
|
pattern =
|
|
|
|
if del
|
2000-09-19 03:54:28 -04:00
|
|
|
SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
|
1998-01-16 07:13:05 -05:00
|
|
|
else
|
1999-08-13 01:45:20 -04:00
|
|
|
/(.|\n)\1+/
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(pattern, '\1')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def squeeze(del=nil)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).squeeze!(del) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s!(from, to)
|
|
|
|
return self.delete!(from) if to.length == 0
|
1999-08-13 01:45:20 -04:00
|
|
|
|
2001-09-06 00:34:36 -04:00
|
|
|
pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1+/
|
1999-08-13 01:45:20 -04:00
|
|
|
if from[0] == ?^
|
|
|
|
last = /.$/.match(to)[0]
|
|
|
|
self.gsub!(pattern, last)
|
|
|
|
else
|
2001-09-06 00:34:36 -04:00
|
|
|
h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(pattern) do h[$1] end
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def tr_s(from, to)
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).tr_s!(from,to) or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def chop!
|
1999-08-13 01:45:20 -04:00
|
|
|
self.gsub!(/(?:.|\r?\n)\z/, '')
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def chop
|
1999-01-19 23:59:39 -05:00
|
|
|
(str = self.dup).chop! or str
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
1999-08-13 01:45:20 -04:00
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def jlength
|
|
|
|
self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
|
|
|
|
end
|
|
|
|
alias jsize jlength
|
|
|
|
|
1999-08-13 01:45:20 -04:00
|
|
|
def jcount(str)
|
|
|
|
self.delete("^#{str}").jlength
|
|
|
|
end
|
|
|
|
|
1999-11-04 03:39:57 -05:00
|
|
|
def each_char
|
2000-06-28 04:31:35 -04:00
|
|
|
if block_given?
|
2000-06-12 03:48:31 -04:00
|
|
|
scan(/./m) do |x|
|
1999-11-04 03:39:57 -05:00
|
|
|
yield x
|
|
|
|
end
|
|
|
|
else
|
2000-06-12 03:48:31 -04:00
|
|
|
scan(/./m)
|
1999-11-04 03:39:57 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
1998-01-16 07:13:05 -05:00
|
|
|
end
|
|
|
|
$VERBOSE = $vsave
|