1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* tool/enc-unicode.rb,

enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  Add Age property to regexp. [ruby-core:33019]
  patched by Ammar Ali, tested by Run Paint Run Run

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29717 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2010-11-08 05:32:45 +00:00
parent 294070d86a
commit a0265b0662
8 changed files with 22339 additions and 2687 deletions

View file

@ -1,3 +1,11 @@
Mon Nov 8 13:41:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
* tool/enc-unicode.rb,
enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
Add Age property to regexp. [ruby-core:33019]
patched by Ammar Ali, tested by Run Paint Run Run
Mon Nov 8 12:16:39 2010 Ben Walton <bwalton@artsci.utoronto.ca>
* configure.in: support -h for solaris linker when gcc not used

7
NEWS
View file

@ -83,7 +83,12 @@ with all sufficient information, see the ChangeLog file.
=== Language changes
* Regexps now support Unicode 6.0 (new characters and scripts)
* Regexps now support Unicode 6.0. (new characters and scripts)
* [experimental] Regexps now support Age property.
Unlike Perl, current implementation takes interpretation of the
interpretation of UTS #18.
http://www.unicode.org/reports/tr18/
=== Compatibility issues (excluding feature bug fixes)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -798,6 +798,25 @@ class TestRegexp < Test::Unit::TestCase
assert_nothing_raised { 0x7fffffff.chr("utf-8").size }
end
def test_unicode_age
assert_match(/^\p{Age=6.0}$/u, "\u261c")
assert_match(/^\p{Age=1.1}$/u, "\u261c")
assert_no_match(/^\P{age=6.0}$/u, "\u261c")
assert_match(/^\p{age=6.0}$/u, "\u31f6")
assert_match(/^\p{age=3.2}$/u, "\u31f6")
assert_no_match(/^\p{age=3.1}$/u, "\u31f6")
assert_no_match(/^\p{age=3.0}$/u, "\u31f6")
assert_no_match(/^\p{age=1.1}$/u, "\u31f6")
assert_match(/^\p{age=6.0}$/u, "\u2754")
assert_no_match(/^\p{age=5.0}$/u, "\u2754")
assert_no_match(/^\p{age=4.0}$/u, "\u2754")
assert_no_match(/^\p{age=3.0}$/u, "\u2754")
assert_no_match(/^\p{age=2.0}$/u, "\u2754")
assert_no_match(/^\p{age=1.1}$/u, "\u2754")
end
def test_matchdata
a = "haystack".match(/hay/)
b = "haystack".match(/hay/)

View file

@ -4,8 +4,8 @@
# property names and POSIX character classes
#
# To use this, get UnicodeData.txt, Scripts.txt, PropList.txt,
# PropertyAliases.txt, PropertyValueAliases.txt, and
# DerivedCoreProperties.txt from unicode.org.
# PropertyAliases.txt, PropertyValueAliases.txt, DerivedCoreProperties.txt,
# and DerivedAge.txt from unicode.org.
# (http://unicode.org/Public/UNIDATA/) And run following command.
# ruby1.9 tool/enc-unicode.rb data_dir > enc/unicode/name2ctype.kwd
# You can get source file for gperf. After this, simply make ruby.
@ -174,6 +174,32 @@ def parse_aliases(data)
kv
end
# According to Unicode6.0.0/ch03.pdf, Section 3.1, "An update version
# never involves any additions to the character repertoire." Versions
# in DerivedAge.txt should always be /\d+\.\d+/
def parse_age(data)
current = nil
last_constname = nil
cps = []
ages = []
IO.foreach(get_file('DerivedAge.txt')) do |line|
if /^# Total code points: / =~ line
constname = constantize_agename(current)
# each version matches all previous versions
cps.concat(data[last_constname]) if last_constname
data[constname] = cps
make_const(constname, cps, "Derived Age #{current}")
ages << current
last_constname = constname
cps = []
elsif /^(\h+)(?:..(\h+))?\s*;\s*(\d+\.\d+)/ =~ line
current = $3
$2 ? cps.concat(($1.to_i(16)..$2.to_i(16)).to_a) : cps.push($1.to_i(16))
end
end
ages
end
$const_cache = {}
# make_const(property, pairs, name): Prints a 'static const' structure for a
# given property, group of paired codepoints, and a human-friendly name for
@ -202,6 +228,10 @@ def normalize_propname(name)
name
end
def constantize_agename(name)
"Age_#{name.sub(/\./, '_')}"
end
def get_file(name)
File.join(ARGV[0], name)
end
@ -224,6 +254,7 @@ end
props.concat parse_scripts(data)
puts '#endif /* USE_UNICODE_PROPERTIES */'
aliases = parse_aliases(data)
ages = parse_age(data)
define_posix_props(data)
POSIX_NAMES.each do |name|
make_const(name, data[name], "[[:#{name}:]]")
@ -235,6 +266,7 @@ __HEREDOC
POSIX_NAMES.each{|name|puts" CR_#{name},"}
puts "#ifdef USE_UNICODE_PROPERTIES"
props.each{|name|puts" CR_#{name},"}
ages.each{|name| puts" CR_#{constantize_agename(name)},"}
puts(<<'__HEREDOC')
#endif /* USE_UNICODE_PROPERTIES */
@ -268,6 +300,12 @@ aliases.each_pair do |k, v|
next unless v = name_to_index[v]
puts "%-40s %3d" % [k + ',', v]
end
ages.each do |name|
i += 1
name = "age=#{name}"
name_to_index[name] = i
puts "%-40s %3d" % [name + ',', i]
end
puts(<<'__HEREDOC')
#endif /* USE_UNICODE_PROPERTIES */
%%