1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding

optional argument.

* enc/trans/single_byte.trans use valid_encoding argument for
  transcode_tblgen.

* enc/trans/chinese.trans: ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26941 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2010-03-15 12:25:20 +00:00
parent 21c7d36573
commit a73374bb57
4 changed files with 34 additions and 19 deletions

View file

@ -1,3 +1,13 @@
Mon Mar 15 21:22:49 2010 Tanaka Akira <akr@fsij.org>
* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
optional argument.
* enc/trans/single_byte.trans use valid_encoding argument for
transcode_tblgen.
* enc/trans/chinese.trans: ditto.
Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@ruby-lang.org> Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
* random.c (default_rand): removed initial buffer. * random.c (default_rand): removed initial buffer.

View file

@ -1,16 +1,18 @@
#include "transcode_data.h" #include "transcode_data.h"
<% <%
set_valid_byte_pattern 'GB2312', 'EUC-KR' gb2312_valid_byte_pattern = ValidEncoding['EUC-KR']
set_valid_byte_pattern 'GB12345', 'EUC-KR' gb12345_valid_byte_pattern = ValidEncoding['EUC-KR']
transcode_tblgen "GB2312", "UTF-8", transcode_tblgen "GB2312", "UTF-8",
[["{00-7f}", :nomap]] + [["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS") citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"),
gb2312_valid_byte_pattern
transcode_tblgen "GB12345", "UTF-8", transcode_tblgen "GB12345", "UTF-8",
[["{00-7f}", :nomap]] + [["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS") citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"),
gb12345_valid_byte_pattern
transcode_tblgen "UTF-8", "GB2312", transcode_tblgen "UTF-8", "GB2312",
[["{00-7f}", :nomap]] + [["{00-7f}", :nomap]] +

View file

@ -22,9 +22,8 @@
require(name.downcase + "-tbl") require(name.downcase + "-tbl")
control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : [] control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL") tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
set_valid_byte_pattern(name, '1byte')
code = '' code = ''
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }]) code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}')
code << "\n" code << "\n"
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
code code

View file

@ -165,7 +165,7 @@ class ActionMap
end end
def self.build_tree(rects) def self.build_tree(rects)
expand("", rects) {|actions| expand("", rects) {|prefix, actions|
unambiguous_action(actions) unambiguous_action(actions)
} }
end end
@ -186,12 +186,12 @@ class ActionMap
all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] } all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] }
} }
tree = expand("", all_rects) {|actions| tree = expand("", all_rects) {|prefix, actions|
args = Array.new(rects_list.length) { [] } args = Array.new(rects_list.length) { [] }
actions.each {|i, action| actions.each {|i, action|
args[i] << action args[i] << action
} }
yield(args) yield(prefix, *args)
} }
self.new("", tree) self.new("", tree)
@ -213,7 +213,7 @@ class ActionMap
end end
if has_empty if has_empty
actions = rects.map {|min, max, action| action }.uniq actions = rects.map {|min, max, action| action }.uniq
act = block.call(actions) act = block.call(prefix, actions)
tree = Action.new(act) tree = Action.new(act)
else else
tree = [] tree = []
@ -649,18 +649,22 @@ def encode_utf8(map)
r r
end end
def transcode_compile_tree(name, from, map) def transcode_compile_tree(name, from, map, valid_encoding=nil)
map = encode_utf8(map) map = encode_utf8(map)
h = {} h = {}
map.each {|k, v| map.each {|k, v|
h[k] = v unless h[k] # use first mapping h[k] = v unless h[k] # use first mapping
} }
if valid_encoding = ValidEncoding[from] valid_encoding = ValidEncoding[from] if valid_encoding == nil
if valid_encoding
rects = ActionMap.parse_to_rects(h) rects = ActionMap.parse_to_rects(h)
undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef) undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef)
am = ActionMap.merge(rects, undef_rects) {|a1, a2| am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2|
a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1) a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2) a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2)
if !a2
raise "invalid mapping: #{prefix}"
end
a1 || a2 a1 || a2
} }
else else
@ -675,7 +679,7 @@ end
TRANSCODERS = [] TRANSCODERS = []
TRANSCODE_GENERATED_TRANSCODER_CODE = '' TRANSCODE_GENERATED_TRANSCODER_CODE = ''
def transcode_tbl_only(from, to, map) def transcode_tbl_only(from, to, map, valid_encoding=nil)
if VERBOSE_MODE if VERBOSE_MODE
if from.empty? || to.empty? if from.empty? || to.empty?
STDERR.puts "converter for #{from.empty? ? to : from}" STDERR.puts "converter for #{from.empty? ? to : from}"
@ -692,12 +696,12 @@ def transcode_tbl_only(from, to, map)
else else
tree_name = "from_#{id_from}_to_#{id_to}" tree_name = "from_#{id_from}_to_#{id_to}"
end end
real_tree_name, max_input = transcode_compile_tree(tree_name, from, map) real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding)
return map, tree_name, real_tree_name, max_input return map, tree_name, real_tree_name, max_input
end end
def transcode_tblgen(from, to, map) def transcode_tblgen(from, to, map, valid_encoding=nil)
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map) map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
transcoder_name = "rb_#{tree_name}" transcoder_name = "rb_#{tree_name}"
TRANSCODERS << transcoder_name TRANSCODERS << transcoder_name
input_unit_length = UnitLength[from] input_unit_length = UnitLength[from]