mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
case-folding.rb: modularize
* enc/unicode/case-folding.rb (CaseFolding): modularize, and add --output-file option. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46267 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
0148bd15e4
commit
1d588b4fde
3 changed files with 2330 additions and 2293 deletions
|
@ -1,4 +1,7 @@
|
||||||
Sat May 31 08:49:52 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sat May 31 08:54:58 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* enc/unicode/case-folding.rb (CaseFolding): modularize, and add
|
||||||
|
--output-file option.
|
||||||
|
|
||||||
* enc/unicode/case-folding.rb: script to convert CaseFolding.txt,
|
* enc/unicode/case-folding.rb: script to convert CaseFolding.txt,
|
||||||
tranlated from CaseFolding.py.
|
tranlated from CaseFolding.py.
|
||||||
|
|
|
@ -4,35 +4,44 @@
|
||||||
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
|
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
|
||||||
# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h
|
# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h
|
||||||
|
|
||||||
|
class CaseFolding
|
||||||
|
module Util
|
||||||
|
module_function
|
||||||
|
|
||||||
def hex_seq(v)
|
def hex_seq(v)
|
||||||
v.map {|i| "0x%04x" % i}.join(", ")
|
v.map {|i| "0x%04x" % i}.join(", ")
|
||||||
end
|
end
|
||||||
|
|
||||||
def print_table(table, data)
|
def print_table_1(dest, data)
|
||||||
print("static const #{table}[] = {\n")
|
|
||||||
for k, v in data.sort
|
for k, v in data.sort
|
||||||
if Array === k and k.length > 1
|
sk = (Array === k and k.length > 1) ? "{#{hex_seq(k)}}" : ("0x%04x" % k)
|
||||||
sk = "{#{hex_seq(k)}}"
|
dest.print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n")
|
||||||
else
|
|
||||||
sk = "0x%04x" % k
|
|
||||||
end
|
end
|
||||||
print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n")
|
|
||||||
end
|
|
||||||
print("};\n\n")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def print_case_folding_data(filename)
|
def print_table(dest, type, data)
|
||||||
|
data.each do |n, d|
|
||||||
|
dest.print("static const #{type}_Type #{n}[] = {\n")
|
||||||
|
print_table_1(dest, d)
|
||||||
|
dest.print("};\n\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
include Util
|
||||||
|
|
||||||
|
attr_reader :fold, :fold_locale, :unfold, :unfold_locale
|
||||||
|
|
||||||
|
def load(filename)
|
||||||
pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
|
pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
|
||||||
|
|
||||||
fold = {}
|
@fold = fold = {}
|
||||||
unfold = [{}, {}, {}]
|
@unfold = unfold = [{}, {}, {}]
|
||||||
turkic = []
|
turkic = []
|
||||||
|
|
||||||
IO.foreach(filename) do |line|
|
IO.foreach(filename) do |line|
|
||||||
next unless res = pattern.match(line)
|
next unless res = pattern.match(line)
|
||||||
ch_from = res[1].to_i(16)
|
ch_from = res[1].to_i(16)
|
||||||
ch_to = []
|
|
||||||
|
|
||||||
if res[2] == 'T'
|
if res[2] == 'T'
|
||||||
# Turkic case folding
|
# Turkic case folding
|
||||||
|
@ -41,10 +50,9 @@ def print_case_folding_data(filename)
|
||||||
end
|
end
|
||||||
|
|
||||||
# store folding data
|
# store folding data
|
||||||
(3..6).each do |i|
|
ch_to = res[3..6].inject([]) do |a, i|
|
||||||
if res[i]
|
break a unless i
|
||||||
ch_to << res[i].to_i(16)
|
a << i.to_i(16)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
fold[ch_from] = ch_to
|
fold[ch_from] = ch_to
|
||||||
|
|
||||||
|
@ -54,54 +62,80 @@ def print_case_folding_data(filename)
|
||||||
end
|
end
|
||||||
|
|
||||||
# move locale dependent data to (un)fold_locale
|
# move locale dependent data to (un)fold_locale
|
||||||
fold_locale = {}
|
@fold_locale = fold_locale = {}
|
||||||
unfold_locale = [{}, {}]
|
@unfold_locale = unfold_locale = [{}, {}]
|
||||||
for ch_from in turkic
|
for ch_from in turkic
|
||||||
key = fold[ch_from]
|
key = fold[ch_from]
|
||||||
i = key.length - 1
|
i = key.length - 1
|
||||||
unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
|
unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
|
||||||
fold_locale[ch_from] = fold.delete(ch_from)
|
fold_locale[ch_from] = fold.delete(ch_from)
|
||||||
end
|
end
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
def display(dest)
|
||||||
# print the header
|
# print the header
|
||||||
print("/* DO NOT EDIT THIS FILE. */\n")
|
dest.print("/* DO NOT EDIT THIS FILE. */\n")
|
||||||
print("/* Generated by tool/CaseFolding.py */\n\n")
|
dest.print("/* Generated by enc/unicode/case-folding.rb */\n\n")
|
||||||
|
|
||||||
# print folding data
|
# print folding data
|
||||||
|
|
||||||
# CaseFold
|
# CaseFold + CaseFold_Locale
|
||||||
print_table("CaseFold_11_Type CaseFold", fold)
|
name = "CaseFold_11"
|
||||||
|
print_table(dest, name, "CaseFold"=>fold)
|
||||||
# CaseFold_Locale
|
print_table(dest, name, "CaseFold_Locale"=>fold_locale)
|
||||||
print_table("CaseFold_11_Type CaseFold_Locale", fold_locale)
|
|
||||||
|
|
||||||
# print unfolding data
|
# print unfolding data
|
||||||
|
|
||||||
# CaseUnfold_11
|
# CaseUnfold_11 + CaseUnfold_11_Locale
|
||||||
print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0])
|
name = "CaseUnfold_11"
|
||||||
|
print_table(dest, name, name=>unfold[0])
|
||||||
|
print_table(dest, name, "#{name}_Locale"=>unfold_locale[0])
|
||||||
|
|
||||||
# CaseUnfold_11_Locale
|
# CaseUnfold_12 + CaseUnfold_12_Locale
|
||||||
print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0])
|
name = "CaseUnfold_12"
|
||||||
|
print_table(dest, name, name=>unfold[1])
|
||||||
# CaseUnfold_12
|
print_table(dest, name, "#{name}_Locale"=>unfold_locale[1])
|
||||||
print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1])
|
|
||||||
|
|
||||||
# CaseUnfold_12_Locale
|
|
||||||
print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1])
|
|
||||||
|
|
||||||
# CaseUnfold_13
|
# CaseUnfold_13
|
||||||
print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2])
|
name = "CaseUnfold_13"
|
||||||
|
print_table(dest, name, name=>unfold[2])
|
||||||
|
|
||||||
# table sizes
|
# table sizes
|
||||||
fold_table_size = fold.size + fold_locale.size
|
fold_table_size = fold.size + fold_locale.size
|
||||||
printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2))
|
dest.printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2))
|
||||||
unfold1_table_size = unfold[0].size + unfold_locale[0].size
|
unfold1_table_size = unfold[0].size + unfold_locale[0].size
|
||||||
printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2))
|
dest.printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2))
|
||||||
unfold2_table_size = unfold[1].size + unfold_locale[1].size
|
unfold2_table_size = unfold[1].size + unfold_locale[1].size
|
||||||
printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5))
|
dest.printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5))
|
||||||
unfold3_table_size = unfold[2].size
|
unfold3_table_size = unfold[2].size
|
||||||
printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
|
dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.load(*args)
|
||||||
|
new.load(*args)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if $0 == __FILE__
|
||||||
|
require 'optparse'
|
||||||
|
dest = nil
|
||||||
|
fold_1 = false
|
||||||
|
ARGV.options do |opt|
|
||||||
|
opt.banner << " [INPUT]"
|
||||||
|
opt.on("--output-file=FILE", "-o", "output to the FILE instead of STDOUT") {|output|
|
||||||
|
dest = (output unless output == '-')
|
||||||
|
}
|
||||||
|
opt.parse!
|
||||||
|
abort(opt.to_s) if ARGV.size > 1
|
||||||
|
end
|
||||||
filename = ARGV[0] || 'CaseFolding.txt'
|
filename = ARGV[0] || 'CaseFolding.txt'
|
||||||
print_case_folding_data(filename)
|
data = CaseFolding.load(filename)
|
||||||
|
if dest
|
||||||
|
open(dest, "wb") do |f|
|
||||||
|
data.display(f)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
data.display(STDOUT)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* DO NOT EDIT THIS FILE. */
|
/* DO NOT EDIT THIS FILE. */
|
||||||
/* Generated by tool/CaseFolding.py */
|
/* Generated by enc/unicode/case-folding.rb */
|
||||||
|
|
||||||
static const CaseFold_11_Type CaseFold[] = {
|
static const CaseFold_11_Type CaseFold[] = {
|
||||||
{0x0041, {1, {0x0061}}},
|
{0x0041, {1, {0x0061}}},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue