From a73374bb5716287b82879e6ced116315aac77f7d Mon Sep 17 00:00:00 2001 From: akr Date: Mon, 15 Mar 2010 12:25:20 +0000 Subject: [PATCH] * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding optional argument. * enc/trans/single_byte.trans use valid_encoding argument for transcode_tblgen. * enc/trans/chinese.trans: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26941 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 10 ++++++++++ enc/trans/chinese.trans | 10 ++++++---- enc/trans/single_byte.trans | 3 +-- tool/transcode-tblgen.rb | 30 +++++++++++++++++------------- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5d60415fd4..1ab1b159bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Mon Mar 15 21:22:49 2010 Tanaka Akira + + * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding + optional argument. + + * enc/trans/single_byte.trans use valid_encoding argument for + transcode_tblgen. + + * enc/trans/chinese.trans: ditto. + Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada * random.c (default_rand): removed initial buffer. diff --git a/enc/trans/chinese.trans b/enc/trans/chinese.trans index 1db6565254..3689696018 100644 --- a/enc/trans/chinese.trans +++ b/enc/trans/chinese.trans @@ -1,16 +1,18 @@ #include "transcode_data.h" <% - set_valid_byte_pattern 'GB2312', 'EUC-KR' - set_valid_byte_pattern 'GB12345', 'EUC-KR' + gb2312_valid_byte_pattern = ValidEncoding['EUC-KR'] + gb12345_valid_byte_pattern = ValidEncoding['EUC-KR'] transcode_tblgen "GB2312", "UTF-8", [["{00-7f}", :nomap]] + - citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS") + citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"), + gb2312_valid_byte_pattern transcode_tblgen "GB12345", "UTF-8", [["{00-7f}", :nomap]] + - citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS") + citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"), + gb12345_valid_byte_pattern transcode_tblgen "UTF-8", "GB2312", [["{00-7f}", :nomap]] + diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index db47074570..55d8430932 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -22,9 +22,8 @@ require(name.downcase + "-tbl") control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : [] tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL") - set_valid_byte_pattern(name, '1byte') code = '' - code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }]) + code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}') code << "\n" code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index 48407ac3df..34d5e8e77b 100755 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -165,7 +165,7 @@ class ActionMap end def self.build_tree(rects) - expand("", rects) {|actions| + expand("", rects) {|prefix, actions| unambiguous_action(actions) } end @@ -186,12 +186,12 @@ class ActionMap all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] } } - tree = expand("", all_rects) {|actions| + tree = expand("", all_rects) {|prefix, actions| args = Array.new(rects_list.length) { [] } actions.each {|i, action| args[i] << action } - yield(args) + yield(prefix, *args) } self.new("", tree) @@ -213,7 +213,7 @@ class ActionMap end if has_empty actions = rects.map {|min, max, action| action }.uniq - act = block.call(actions) + act = block.call(prefix, actions) tree = Action.new(act) else tree = [] @@ -649,18 +649,22 @@ def encode_utf8(map) r end -def transcode_compile_tree(name, from, map) +def transcode_compile_tree(name, from, map, valid_encoding=nil) map = encode_utf8(map) h = {} map.each {|k, v| h[k] = v unless h[k] # use first mapping } - if valid_encoding = ValidEncoding[from] + valid_encoding = ValidEncoding[from] if valid_encoding == nil + if valid_encoding rects = ActionMap.parse_to_rects(h) undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef) - am = ActionMap.merge(rects, undef_rects) {|a1, a2| - a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1) - a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2) + am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2| + a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1) + a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2) + if !a2 + raise "invalid mapping: #{prefix}" + end a1 || a2 } else @@ -675,7 +679,7 @@ end TRANSCODERS = [] TRANSCODE_GENERATED_TRANSCODER_CODE = '' -def transcode_tbl_only(from, to, map) +def transcode_tbl_only(from, to, map, valid_encoding=nil) if VERBOSE_MODE if from.empty? || to.empty? STDERR.puts "converter for #{from.empty? ? to : from}" @@ -692,12 +696,12 @@ def transcode_tbl_only(from, to, map) else tree_name = "from_#{id_from}_to_#{id_to}" end - real_tree_name, max_input = transcode_compile_tree(tree_name, from, map) + real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding) return map, tree_name, real_tree_name, max_input end -def transcode_tblgen(from, to, map) - map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map) +def transcode_tblgen(from, to, map, valid_encoding=nil) + map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding) transcoder_name = "rb_#{tree_name}" TRANSCODERS << transcoder_name input_unit_length = UnitLength[from]