mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* forgot to add this file in the previous commit.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4615 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d57fc5a489
commit
eb8ee5e401
1 changed files with 11 additions and 8 deletions
|
@ -117,12 +117,13 @@ public
|
|||
CharsetMap.index(label.downcase)
|
||||
end
|
||||
|
||||
# Original regexps: http://www.din.or.jp/~ohzaki/perl.htm
|
||||
# ascii_euc = '[\x00-\x7F]'
|
||||
ascii_euc = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted.
|
||||
# us_ascii = '[\x00-\x7F]'
|
||||
us_ascii = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted.
|
||||
USASCIIRegexp = Regexp.new("\\A#{ us_ascii }*\\z", nil, "NONE")
|
||||
|
||||
twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'
|
||||
threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'
|
||||
character_euc = "(?:#{ ascii_euc }|#{ twobytes_euc }|#{ threebytes_euc })"
|
||||
character_euc = "(?:#{ us_ascii }|#{ twobytes_euc }|#{ threebytes_euc })"
|
||||
EUCRegexp = Regexp.new("\\A#{ character_euc }*\\z", nil, "NONE")
|
||||
|
||||
# onebyte_sjis = '[\x00-\x7F\xA1-\xDF]'
|
||||
|
@ -132,17 +133,19 @@ public
|
|||
SJISRegexp = Regexp.new("\\A#{ character_sjis }*\\z", nil, "NONE")
|
||||
|
||||
# 0xxxxxxx
|
||||
#ascii_utf8 = '[\0-\x7F]'
|
||||
ascii_utf8 = '[\x9\xA\xD\x20-\x7F]' # XML 1.0 restricted.
|
||||
# 110yyyyy 10xxxxxx
|
||||
twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])'
|
||||
# 1110zzzz 10yyyyyy 10xxxxxx
|
||||
threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])'
|
||||
# 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx
|
||||
fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])'
|
||||
character_utf8 = "(?:#{ ascii_utf8 }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })"
|
||||
character_utf8 = "(?:#{ us_ascii }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })"
|
||||
UTF8Regexp = Regexp.new("\\A#{ character_utf8 }*\\z", nil, "NONE")
|
||||
|
||||
def Charset.is_us_ascii(str)
|
||||
USASCIIRegexp =~ str
|
||||
end
|
||||
|
||||
def Charset.is_utf8(str)
|
||||
UTF8Regexp =~ str
|
||||
end
|
||||
|
@ -158,7 +161,7 @@ public
|
|||
def Charset.is_ces(str, code = $KCODE)
|
||||
case code
|
||||
when 'NONE'
|
||||
true
|
||||
is_us_ascii(str)
|
||||
when 'UTF8'
|
||||
is_utf8(str)
|
||||
when 'EUC'
|
||||
|
|
Loading…
Add table
Reference in a new issue