mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
case-folding.rb: conversion script
* enc/unicode/case-folding.rb: script to convert CaseFolding.txt, tranlated from CaseFolding.py. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46266 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
40ec552861
commit
0148bd15e4
2 changed files with 112 additions and 0 deletions
|
@ -1,3 +1,8 @@
|
|||
Sat May 31 08:49:52 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* enc/unicode/case-folding.rb: script to convert CaseFolding.txt,
|
||||
tranlated from CaseFolding.py.
|
||||
|
||||
Sat May 31 08:31:41 2014 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* test/lib/minitest/unit.rb: Check Tempfile leaks for each test method
|
||||
|
|
107
enc/unicode/case-folding.rb
Executable file
107
enc/unicode/case-folding.rb
Executable file
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/ruby
|
||||
|
||||
# Usage:
|
||||
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
|
||||
# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h
|
||||
|
||||
|
||||
def hex_seq(v)
|
||||
v.map {|i| "0x%04x" % i}.join(", ")
|
||||
end
|
||||
|
||||
def print_table(table, data)
|
||||
print("static const #{table}[] = {\n")
|
||||
for k, v in data.sort
|
||||
if Array === k and k.length > 1
|
||||
sk = "{#{hex_seq(k)}}"
|
||||
else
|
||||
sk = "0x%04x" % k
|
||||
end
|
||||
print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n")
|
||||
end
|
||||
print("};\n\n")
|
||||
end
|
||||
|
||||
def print_case_folding_data(filename)
|
||||
pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
|
||||
|
||||
fold = {}
|
||||
unfold = [{}, {}, {}]
|
||||
turkic = []
|
||||
|
||||
IO.foreach(filename) do |line|
|
||||
next unless res = pattern.match(line)
|
||||
ch_from = res[1].to_i(16)
|
||||
ch_to = []
|
||||
|
||||
if res[2] == 'T'
|
||||
# Turkic case folding
|
||||
turkic << ch_from
|
||||
next
|
||||
end
|
||||
|
||||
# store folding data
|
||||
(3..6).each do |i|
|
||||
if res[i]
|
||||
ch_to << res[i].to_i(16)
|
||||
end
|
||||
end
|
||||
fold[ch_from] = ch_to
|
||||
|
||||
# store unfolding data
|
||||
i = ch_to.length - 1
|
||||
(unfold[i][ch_to] ||= []) << ch_from
|
||||
end
|
||||
|
||||
# move locale dependent data to (un)fold_locale
|
||||
fold_locale = {}
|
||||
unfold_locale = [{}, {}]
|
||||
for ch_from in turkic
|
||||
key = fold[ch_from]
|
||||
i = key.length - 1
|
||||
unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
|
||||
fold_locale[ch_from] = fold.delete(ch_from)
|
||||
end
|
||||
|
||||
# print the header
|
||||
print("/* DO NOT EDIT THIS FILE. */\n")
|
||||
print("/* Generated by tool/CaseFolding.py */\n\n")
|
||||
|
||||
# print folding data
|
||||
|
||||
# CaseFold
|
||||
print_table("CaseFold_11_Type CaseFold", fold)
|
||||
|
||||
# CaseFold_Locale
|
||||
print_table("CaseFold_11_Type CaseFold_Locale", fold_locale)
|
||||
|
||||
# print unfolding data
|
||||
|
||||
# CaseUnfold_11
|
||||
print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0])
|
||||
|
||||
# CaseUnfold_11_Locale
|
||||
print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0])
|
||||
|
||||
# CaseUnfold_12
|
||||
print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1])
|
||||
|
||||
# CaseUnfold_12_Locale
|
||||
print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1])
|
||||
|
||||
# CaseUnfold_13
|
||||
print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2])
|
||||
|
||||
# table sizes
|
||||
fold_table_size = fold.size + fold_locale.size
|
||||
printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2))
|
||||
unfold1_table_size = unfold[0].size + unfold_locale[0].size
|
||||
printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2))
|
||||
unfold2_table_size = unfold[1].size + unfold_locale[1].size
|
||||
printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5))
|
||||
unfold3_table_size = unfold[2].size
|
||||
printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
|
||||
end
|
||||
|
||||
filename = ARGV[0] || 'CaseFolding.txt'
|
||||
print_case_folding_data(filename)
|
Loading…
Reference in a new issue