mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
enc-unicode.rb: check Unicode version
* tool/enc-unicode.rb (data_foreach): check Unicode version in data files, and yield each lines. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55685 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
230a91c133
commit
0fd7666d57
1 changed files with 25 additions and 7 deletions
|
@ -15,10 +15,11 @@ if ARGV[0] == "--header"
|
||||||
ARGV.shift
|
ARGV.shift
|
||||||
end
|
end
|
||||||
unless ARGV.size == 1
|
unless ARGV.size == 1
|
||||||
$stderr.puts "Usage: #{$0} data_directory"
|
abort "Usage: #{$0} data_directory"
|
||||||
exit(1)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
|
||||||
|
|
||||||
POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII]
|
POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII]
|
||||||
|
|
||||||
def pair_codepoints(codepoints)
|
def pair_codepoints(codepoints)
|
||||||
|
@ -137,7 +138,7 @@ def parse_scripts(data, categories)
|
||||||
cps = []
|
cps = []
|
||||||
names = {}
|
names = {}
|
||||||
files.each do |file|
|
files.each do |file|
|
||||||
IO.foreach(get_file(file[:fn])) do |line|
|
data_foreach(file[:fn]) do |line|
|
||||||
if /^# Total code points: / =~ line
|
if /^# Total code points: / =~ line
|
||||||
data[current] = cps
|
data[current] = cps
|
||||||
categories[current] = file[:title]
|
categories[current] = file[:title]
|
||||||
|
@ -158,12 +159,12 @@ end
|
||||||
|
|
||||||
def parse_aliases(data)
|
def parse_aliases(data)
|
||||||
kv = {}
|
kv = {}
|
||||||
IO.foreach(get_file('PropertyAliases.txt')) do |line|
|
data_foreach('PropertyAliases.txt') do |line|
|
||||||
next unless /^(\w+)\s*; (\w+)/ =~ line
|
next unless /^(\w+)\s*; (\w+)/ =~ line
|
||||||
data[$1] = data[$2]
|
data[$1] = data[$2]
|
||||||
kv[normalize_propname($1)] = normalize_propname($2)
|
kv[normalize_propname($1)] = normalize_propname($2)
|
||||||
end
|
end
|
||||||
IO.foreach(get_file('PropertyValueAliases.txt')) do |line|
|
data_foreach('PropertyValueAliases.txt') do |line|
|
||||||
next unless /^(sc|gc)\s*; (\w+)\s*; (\w+)(?:\s*; (\w+))?/ =~ line
|
next unless /^(sc|gc)\s*; (\w+)\s*; (\w+)(?:\s*; (\w+))?/ =~ line
|
||||||
if $1 == 'gc'
|
if $1 == 'gc'
|
||||||
data[$3] = data[$2]
|
data[$3] = data[$2]
|
||||||
|
@ -188,7 +189,7 @@ def parse_age(data)
|
||||||
last_constname = nil
|
last_constname = nil
|
||||||
cps = []
|
cps = []
|
||||||
ages = []
|
ages = []
|
||||||
IO.foreach(get_file('DerivedAge.txt')) do |line|
|
data_foreach('DerivedAge.txt') do |line|
|
||||||
if /^# Total code points: / =~ line
|
if /^# Total code points: / =~ line
|
||||||
constname = constantize_agename(current)
|
constname = constantize_agename(current)
|
||||||
# each version matches all previous versions
|
# each version matches all previous versions
|
||||||
|
@ -211,7 +212,7 @@ def parse_block(data)
|
||||||
last_constname = nil
|
last_constname = nil
|
||||||
cps = []
|
cps = []
|
||||||
blocks = []
|
blocks = []
|
||||||
IO.foreach(get_file('Blocks.txt')) do |line|
|
data_foreach('Blocks.txt') do |line|
|
||||||
if /^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+);\s*(.*)/ =~ line
|
if /^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+);\s*(.*)/ =~ line
|
||||||
cps = ($1.to_i(16)..$2.to_i(16)).to_a
|
cps = ($1.to_i(16)..$2.to_i(16)).to_a
|
||||||
constname = constantize_blockname($3)
|
constname = constantize_blockname($3)
|
||||||
|
@ -276,6 +277,23 @@ def get_file(name)
|
||||||
File.join(ARGV[0], name)
|
File.join(ARGV[0], name)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def data_foreach(name, &block)
|
||||||
|
fn = get_file(name)
|
||||||
|
warn "Reading #{name}"
|
||||||
|
pat = /^# #{name.sub(/\./, '-([\\d.]+)\\.')}/
|
||||||
|
File.open(fn) do |f|
|
||||||
|
line = f.gets
|
||||||
|
unless pat =~ line
|
||||||
|
raise ArgumentError, "#{name}: no Unicode version"
|
||||||
|
end
|
||||||
|
if !$unicode_version
|
||||||
|
$unicode_version = $1
|
||||||
|
elsif $unicode_version != $1
|
||||||
|
raise ArgumentError, "#{name}: Unicode version mismatch: #$1"
|
||||||
|
end
|
||||||
|
f.each(&block)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Write Data
|
# Write Data
|
||||||
class Unifdef
|
class Unifdef
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue