diff --git a/ChangeLog b/ChangeLog index f77871d28f..71e06cb2e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sun Mar 14 02:40:38 2010 Tanaka Akira + + * tool/transcode-tblgen.rb: reject ambiguous mapping. + + * enc/trans/single_byte.trans: remove ambiguous maping such as + \xD6 -> U+05F2 and \xD6\xC7 -> U+FB1F in Windows-1255 + Sat Mar 13 23:48:27 2010 Yukihiro Matsumoto * file.c (file_expand_path): should not just copy the encoding diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index 0d42740d30..db47074570 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -24,7 +24,7 @@ tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL") set_valid_byte_pattern(name, '1byte') code = '' - code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs]) + code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }]) code << "\n" code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) code diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index dbd03e3028..14a09dbadf 100755 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -133,6 +133,12 @@ class StrSet } end + def has_nonempty? + @pat.any? {|seq| + !seq.empty? + } + end + def first_bytes result = {} @pat.each {|seq| @@ -228,6 +234,18 @@ class ActionMap @map.keys.map {|k| k.max_length }.max end + def check_conflict + has_empty = false + has_nonempty = false + @map.each {|ss, action| + has_empty = true if ss.emptyable? + has_nonempty = true if ss.has_nonempty? + } + if has_empty && has_nonempty + raise "conflict between empty and nonempty sequence" + end + end + def empty_action @map.each {|ss, action| return action if ss.emptyable? @@ -440,6 +458,7 @@ End table = Array.new(0x100, :invalid) each_firstbyte(valid_encoding) {|byte, rest, rest_valid_encoding| + rest.check_conflict if a = rest.empty_action table[byte] = a else