mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* tool/enc-unicode.rb,
enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt, enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src: Add Age property to regexp. [ruby-core:33019] patched by Ammar Ali, tested by Run Paint Run Run git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29717 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
294070d86a
commit
a0265b0662
8 changed files with 22339 additions and 2687 deletions
|
@ -1,3 +1,11 @@
|
|||
Mon Nov 8 13:41:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* tool/enc-unicode.rb,
|
||||
enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
|
||||
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
|
||||
Add Age property to regexp. [ruby-core:33019]
|
||||
patched by Ammar Ali, tested by Run Paint Run Run
|
||||
|
||||
Mon Nov 8 12:16:39 2010 Ben Walton <bwalton@artsci.utoronto.ca>
|
||||
|
||||
* configure.in: support -h for solaris linker when gcc not used
|
||||
|
|
7
NEWS
7
NEWS
|
@ -83,7 +83,12 @@ with all sufficient information, see the ChangeLog file.
|
|||
|
||||
=== Language changes
|
||||
|
||||
* Regexps now support Unicode 6.0 (new characters and scripts)
|
||||
* Regexps now support Unicode 6.0. (new characters and scripts)
|
||||
|
||||
* [experimental] Regexps now support Age property.
|
||||
Unlike Perl, current implementation takes interpretation of the
|
||||
interpretation of UTS #18.
|
||||
http://www.unicode.org/reports/tr18/
|
||||
|
||||
=== Compatibility issues (excluding feature bug fixes)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -798,6 +798,25 @@ class TestRegexp < Test::Unit::TestCase
|
|||
assert_nothing_raised { 0x7fffffff.chr("utf-8").size }
|
||||
end
|
||||
|
||||
def test_unicode_age
|
||||
assert_match(/^\p{Age=6.0}$/u, "\u261c")
|
||||
assert_match(/^\p{Age=1.1}$/u, "\u261c")
|
||||
assert_no_match(/^\P{age=6.0}$/u, "\u261c")
|
||||
|
||||
assert_match(/^\p{age=6.0}$/u, "\u31f6")
|
||||
assert_match(/^\p{age=3.2}$/u, "\u31f6")
|
||||
assert_no_match(/^\p{age=3.1}$/u, "\u31f6")
|
||||
assert_no_match(/^\p{age=3.0}$/u, "\u31f6")
|
||||
assert_no_match(/^\p{age=1.1}$/u, "\u31f6")
|
||||
|
||||
assert_match(/^\p{age=6.0}$/u, "\u2754")
|
||||
assert_no_match(/^\p{age=5.0}$/u, "\u2754")
|
||||
assert_no_match(/^\p{age=4.0}$/u, "\u2754")
|
||||
assert_no_match(/^\p{age=3.0}$/u, "\u2754")
|
||||
assert_no_match(/^\p{age=2.0}$/u, "\u2754")
|
||||
assert_no_match(/^\p{age=1.1}$/u, "\u2754")
|
||||
end
|
||||
|
||||
def test_matchdata
|
||||
a = "haystack".match(/hay/)
|
||||
b = "haystack".match(/hay/)
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
# property names and POSIX character classes
|
||||
#
|
||||
# To use this, get UnicodeData.txt, Scripts.txt, PropList.txt,
|
||||
# PropertyAliases.txt, PropertyValueAliases.txt, and
|
||||
# DerivedCoreProperties.txt from unicode.org.
|
||||
# PropertyAliases.txt, PropertyValueAliases.txt, DerivedCoreProperties.txt,
|
||||
# and DerivedAge.txt from unicode.org.
|
||||
# (http://unicode.org/Public/UNIDATA/) And run following command.
|
||||
# ruby1.9 tool/enc-unicode.rb data_dir > enc/unicode/name2ctype.kwd
|
||||
# You can get source file for gperf. After this, simply make ruby.
|
||||
|
@ -174,6 +174,32 @@ def parse_aliases(data)
|
|||
kv
|
||||
end
|
||||
|
||||
# According to Unicode6.0.0/ch03.pdf, Section 3.1, "An update version
|
||||
# never involves any additions to the character repertoire." Versions
|
||||
# in DerivedAge.txt should always be /\d+\.\d+/
|
||||
def parse_age(data)
|
||||
current = nil
|
||||
last_constname = nil
|
||||
cps = []
|
||||
ages = []
|
||||
IO.foreach(get_file('DerivedAge.txt')) do |line|
|
||||
if /^# Total code points: / =~ line
|
||||
constname = constantize_agename(current)
|
||||
# each version matches all previous versions
|
||||
cps.concat(data[last_constname]) if last_constname
|
||||
data[constname] = cps
|
||||
make_const(constname, cps, "Derived Age #{current}")
|
||||
ages << current
|
||||
last_constname = constname
|
||||
cps = []
|
||||
elsif /^(\h+)(?:..(\h+))?\s*;\s*(\d+\.\d+)/ =~ line
|
||||
current = $3
|
||||
$2 ? cps.concat(($1.to_i(16)..$2.to_i(16)).to_a) : cps.push($1.to_i(16))
|
||||
end
|
||||
end
|
||||
ages
|
||||
end
|
||||
|
||||
$const_cache = {}
|
||||
# make_const(property, pairs, name): Prints a 'static const' structure for a
|
||||
# given property, group of paired codepoints, and a human-friendly name for
|
||||
|
@ -202,6 +228,10 @@ def normalize_propname(name)
|
|||
name
|
||||
end
|
||||
|
||||
def constantize_agename(name)
|
||||
"Age_#{name.sub(/\./, '_')}"
|
||||
end
|
||||
|
||||
def get_file(name)
|
||||
File.join(ARGV[0], name)
|
||||
end
|
||||
|
@ -224,6 +254,7 @@ end
|
|||
props.concat parse_scripts(data)
|
||||
puts '#endif /* USE_UNICODE_PROPERTIES */'
|
||||
aliases = parse_aliases(data)
|
||||
ages = parse_age(data)
|
||||
define_posix_props(data)
|
||||
POSIX_NAMES.each do |name|
|
||||
make_const(name, data[name], "[[:#{name}:]]")
|
||||
|
@ -235,6 +266,7 @@ __HEREDOC
|
|||
POSIX_NAMES.each{|name|puts" CR_#{name},"}
|
||||
puts "#ifdef USE_UNICODE_PROPERTIES"
|
||||
props.each{|name|puts" CR_#{name},"}
|
||||
ages.each{|name| puts" CR_#{constantize_agename(name)},"}
|
||||
|
||||
puts(<<'__HEREDOC')
|
||||
#endif /* USE_UNICODE_PROPERTIES */
|
||||
|
@ -268,6 +300,12 @@ aliases.each_pair do |k, v|
|
|||
next unless v = name_to_index[v]
|
||||
puts "%-40s %3d" % [k + ',', v]
|
||||
end
|
||||
ages.each do |name|
|
||||
i += 1
|
||||
name = "age=#{name}"
|
||||
name_to_index[name] = i
|
||||
puts "%-40s %3d" % [name + ',', i]
|
||||
end
|
||||
puts(<<'__HEREDOC')
|
||||
#endif /* USE_UNICODE_PROPERTIES */
|
||||
%%
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue