mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
optional argument. * enc/trans/single_byte.trans use valid_encoding argument for transcode_tblgen. * enc/trans/chinese.trans: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26941 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
21c7d36573
commit
a73374bb57
4 changed files with 34 additions and 19 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
||||||
|
Mon Mar 15 21:22:49 2010 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
|
||||||
|
optional argument.
|
||||||
|
|
||||||
|
* enc/trans/single_byte.trans use valid_encoding argument for
|
||||||
|
transcode_tblgen.
|
||||||
|
|
||||||
|
* enc/trans/chinese.trans: ditto.
|
||||||
|
|
||||||
Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* random.c (default_rand): removed initial buffer.
|
* random.c (default_rand): removed initial buffer.
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
#include "transcode_data.h"
|
#include "transcode_data.h"
|
||||||
|
|
||||||
<%
|
<%
|
||||||
set_valid_byte_pattern 'GB2312', 'EUC-KR'
|
gb2312_valid_byte_pattern = ValidEncoding['EUC-KR']
|
||||||
set_valid_byte_pattern 'GB12345', 'EUC-KR'
|
gb12345_valid_byte_pattern = ValidEncoding['EUC-KR']
|
||||||
|
|
||||||
transcode_tblgen "GB2312", "UTF-8",
|
transcode_tblgen "GB2312", "UTF-8",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS")
|
citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"),
|
||||||
|
gb2312_valid_byte_pattern
|
||||||
|
|
||||||
transcode_tblgen "GB12345", "UTF-8",
|
transcode_tblgen "GB12345", "UTF-8",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS")
|
citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"),
|
||||||
|
gb12345_valid_byte_pattern
|
||||||
|
|
||||||
transcode_tblgen "UTF-8", "GB2312",
|
transcode_tblgen "UTF-8", "GB2312",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
|
|
|
@ -22,9 +22,8 @@
|
||||||
require(name.downcase + "-tbl")
|
require(name.downcase + "-tbl")
|
||||||
control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
|
control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
|
||||||
tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
|
tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
|
||||||
set_valid_byte_pattern(name, '1byte')
|
|
||||||
code = ''
|
code = ''
|
||||||
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }])
|
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}')
|
||||||
code << "\n"
|
code << "\n"
|
||||||
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
|
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
|
||||||
code
|
code
|
||||||
|
|
|
@ -165,7 +165,7 @@ class ActionMap
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.build_tree(rects)
|
def self.build_tree(rects)
|
||||||
expand("", rects) {|actions|
|
expand("", rects) {|prefix, actions|
|
||||||
unambiguous_action(actions)
|
unambiguous_action(actions)
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
@ -186,12 +186,12 @@ class ActionMap
|
||||||
all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] }
|
all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] }
|
||||||
}
|
}
|
||||||
|
|
||||||
tree = expand("", all_rects) {|actions|
|
tree = expand("", all_rects) {|prefix, actions|
|
||||||
args = Array.new(rects_list.length) { [] }
|
args = Array.new(rects_list.length) { [] }
|
||||||
actions.each {|i, action|
|
actions.each {|i, action|
|
||||||
args[i] << action
|
args[i] << action
|
||||||
}
|
}
|
||||||
yield(args)
|
yield(prefix, *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
self.new("", tree)
|
self.new("", tree)
|
||||||
|
@ -213,7 +213,7 @@ class ActionMap
|
||||||
end
|
end
|
||||||
if has_empty
|
if has_empty
|
||||||
actions = rects.map {|min, max, action| action }.uniq
|
actions = rects.map {|min, max, action| action }.uniq
|
||||||
act = block.call(actions)
|
act = block.call(prefix, actions)
|
||||||
tree = Action.new(act)
|
tree = Action.new(act)
|
||||||
else
|
else
|
||||||
tree = []
|
tree = []
|
||||||
|
@ -649,18 +649,22 @@ def encode_utf8(map)
|
||||||
r
|
r
|
||||||
end
|
end
|
||||||
|
|
||||||
def transcode_compile_tree(name, from, map)
|
def transcode_compile_tree(name, from, map, valid_encoding=nil)
|
||||||
map = encode_utf8(map)
|
map = encode_utf8(map)
|
||||||
h = {}
|
h = {}
|
||||||
map.each {|k, v|
|
map.each {|k, v|
|
||||||
h[k] = v unless h[k] # use first mapping
|
h[k] = v unless h[k] # use first mapping
|
||||||
}
|
}
|
||||||
if valid_encoding = ValidEncoding[from]
|
valid_encoding = ValidEncoding[from] if valid_encoding == nil
|
||||||
|
if valid_encoding
|
||||||
rects = ActionMap.parse_to_rects(h)
|
rects = ActionMap.parse_to_rects(h)
|
||||||
undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef)
|
undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef)
|
||||||
am = ActionMap.merge(rects, undef_rects) {|a1, a2|
|
am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2|
|
||||||
a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1)
|
a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
|
||||||
a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2)
|
a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2)
|
||||||
|
if !a2
|
||||||
|
raise "invalid mapping: #{prefix}"
|
||||||
|
end
|
||||||
a1 || a2
|
a1 || a2
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -675,7 +679,7 @@ end
|
||||||
TRANSCODERS = []
|
TRANSCODERS = []
|
||||||
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
||||||
|
|
||||||
def transcode_tbl_only(from, to, map)
|
def transcode_tbl_only(from, to, map, valid_encoding=nil)
|
||||||
if VERBOSE_MODE
|
if VERBOSE_MODE
|
||||||
if from.empty? || to.empty?
|
if from.empty? || to.empty?
|
||||||
STDERR.puts "converter for #{from.empty? ? to : from}"
|
STDERR.puts "converter for #{from.empty? ? to : from}"
|
||||||
|
@ -692,12 +696,12 @@ def transcode_tbl_only(from, to, map)
|
||||||
else
|
else
|
||||||
tree_name = "from_#{id_from}_to_#{id_to}"
|
tree_name = "from_#{id_from}_to_#{id_to}"
|
||||||
end
|
end
|
||||||
real_tree_name, max_input = transcode_compile_tree(tree_name, from, map)
|
real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding)
|
||||||
return map, tree_name, real_tree_name, max_input
|
return map, tree_name, real_tree_name, max_input
|
||||||
end
|
end
|
||||||
|
|
||||||
def transcode_tblgen(from, to, map)
|
def transcode_tblgen(from, to, map, valid_encoding=nil)
|
||||||
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map)
|
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
|
||||||
transcoder_name = "rb_#{tree_name}"
|
transcoder_name = "rb_#{tree_name}"
|
||||||
TRANSCODERS << transcoder_name
|
TRANSCODERS << transcoder_name
|
||||||
input_unit_length = UnitLength[from]
|
input_unit_length = UnitLength[from]
|
||||||
|
|
Loading…
Reference in a new issue