mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
unicode_normalize: UNICODE_VERSION constant
* template/unicode_norm_gen.tmpl (UnicodeNormalize): embed the version of Unicode data files used to generate. * test/test_unicode_normalize.rb (TestUnicodeNormalize): use the embedded version to load the test data. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48357 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
8cba9dccf6
commit
79c0b7fcc8
2 changed files with 12 additions and 2 deletions
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
# Constants for input and ouput directory
|
# Constants for input and ouput directory
|
||||||
InputDataDir = ARGV[0] || 'enc/unicode/data'
|
InputDataDir = ARGV[0] || 'enc/unicode/data'
|
||||||
OuputDataDir = ARGV[1] || 'lib/unicode_normalize'
|
unicode_version = InputDataDir[/[\d.]+\z/]
|
||||||
|
|
||||||
# convenience methods
|
# convenience methods
|
||||||
class Integer
|
class Integer
|
||||||
|
@ -67,6 +67,12 @@ end
|
||||||
|
|
||||||
# read the file 'CompositionExclusions.txt'
|
# read the file 'CompositionExclusions.txt'
|
||||||
composition_exclusions = vpath.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
|
composition_exclusions = vpath.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
|
||||||
|
base = Regexp.quote(File.basename(f.path, '.*'))
|
||||||
|
ext = Regexp.quote(File.extname(f.path))
|
||||||
|
version = (line = f.gets)[/^# *#{base}-([\d.]+)#{ext}\s*$/, 1] or
|
||||||
|
abort "No file version in #{f.path}: #{line}"
|
||||||
|
(unicode_version ||= version) == version or
|
||||||
|
abort "Unicode version of directory (#{unicode_version}) and file (#{version}) mismatch"
|
||||||
f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex}
|
f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,6 +157,8 @@ end
|
||||||
# automatically generated by template/unicode_norm_gen.tmpl
|
# automatically generated by template/unicode_norm_gen.tmpl
|
||||||
|
|
||||||
module UnicodeNormalize
|
module UnicodeNormalize
|
||||||
|
UNICODE_VERSION = "<%=unicode_version%>".freeze
|
||||||
|
|
||||||
accents = "" \
|
accents = "" \
|
||||||
"[<% accent_array.each_regexp_chars do |rx|%><%=rx%>" \
|
"[<% accent_array.each_regexp_chars do |rx|%><%=rx%>" \
|
||||||
"<% end%>]" \
|
"<% end%>]" \
|
||||||
|
|
|
@ -3,15 +3,17 @@
|
||||||
# Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
|
# Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
|
require 'unicode_normalize/normalize'
|
||||||
|
|
||||||
class TestUnicodeNormalize < Test::Unit::TestCase
|
class TestUnicodeNormalize < Test::Unit::TestCase
|
||||||
|
|
||||||
UNICODE_VERSION = '7.0.0'
|
UNICODE_VERSION = UnicodeNormalize::UNICODE_VERSION
|
||||||
|
|
||||||
NormTest = Struct.new :source, :NFC, :NFD, :NFKC, :NFKD, :line
|
NormTest = Struct.new :source, :NFC, :NFD, :NFKC, :NFKD, :line
|
||||||
|
|
||||||
def read_tests
|
def read_tests
|
||||||
IO.readlines(File.expand_path("../enc/unicode/data/#{UNICODE_VERSION}/NormalizationTest.txt", __dir__), encoding: 'utf-8')
|
IO.readlines(File.expand_path("../enc/unicode/data/#{UNICODE_VERSION}/NormalizationTest.txt", __dir__), encoding: 'utf-8')
|
||||||
|
.tap { |lines| assert_include(lines[0], "NormalizationTest-#{UNICODE_VERSION}.txt")}
|
||||||
.collect.with_index { |linedata, linenumber| [linedata, linenumber]}
|
.collect.with_index { |linedata, linenumber| [linedata, linenumber]}
|
||||||
.reject { |line| line[0] =~ /^[\#@]/ }
|
.reject { |line| line[0] =~ /^[\#@]/ }
|
||||||
.collect do |line|
|
.collect do |line|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue