mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* tool/transcode-tblgen.rb (transcode_compile_tree): make
valid_encoding mandatory unless from_encoding is registered in ValidEncoding. (transcode_tbl_only): ditto. (transcode_tblgen): ditto. (ValidEncoding): new function. * enc/trans/escape.trans: specify valid_encoding. * enc/trans/emoji_sjis_docomo.trans: ditto. * enc/trans/emoji.trans: ditto. * enc/trans/emoji_iso2022_kddi.trans: ditto. * enc/trans/big5.trans: ditto. * enc/trans/emoji_sjis_softbank.trans: ditto. * enc/trans/emoji_sjis_kddi.trans: ditto. * enc/trans/chinese.trans: use ValidEncoding() instead of ValidEncoding[]. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26995 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
f623979f65
commit
49d993729f
10 changed files with 63 additions and 21 deletions
26
ChangeLog
26
ChangeLog
|
@ -1,3 +1,29 @@
|
||||||
|
Sun Mar 21 12:32:39 2010 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* tool/transcode-tblgen.rb (transcode_compile_tree): make
|
||||||
|
valid_encoding mandatory unless from_encoding is registered in
|
||||||
|
ValidEncoding.
|
||||||
|
(transcode_tbl_only): ditto.
|
||||||
|
(transcode_tblgen): ditto.
|
||||||
|
(ValidEncoding): new function.
|
||||||
|
|
||||||
|
* enc/trans/escape.trans: specify valid_encoding.
|
||||||
|
|
||||||
|
* enc/trans/emoji_sjis_docomo.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/emoji.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/emoji_iso2022_kddi.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/big5.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/emoji_sjis_softbank.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/emoji_sjis_kddi.trans: ditto.
|
||||||
|
|
||||||
|
* enc/trans/chinese.trans: use ValidEncoding() instead of
|
||||||
|
ValidEncoding[].
|
||||||
|
|
||||||
Sun Mar 21 09:43:01 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sun Mar 21 09:43:01 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* io.c (IO_[CRW]BUF_CAPA_MIN): replaced magic numbers.
|
* io.c (IO_[CRW]BUF_CAPA_MIN): replaced magic numbers.
|
||||||
|
|
|
@ -8,10 +8,10 @@
|
||||||
transcode_tblgen "Big5", "UTF-8", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL]
|
transcode_tblgen "Big5", "UTF-8", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL]
|
||||||
transcode_tblgen "UTF-8", "Big5", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL.map {|a,b| [b,a] }]
|
transcode_tblgen "UTF-8", "Big5", [["{00-7f}", :nomap], *BIG5_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||||
|
|
||||||
transcode_tblgen "Big5-HKSCS", "UTF-8", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL]
|
transcode_tblgen "Big5-HKSCS", "UTF-8", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL], ValidEncoding('Big5')
|
||||||
transcode_tblgen "UTF-8", "Big5-HKSCS", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL.map {|a,b| [b,a] }]
|
transcode_tblgen "UTF-8", "Big5-HKSCS", [["{00-7f}", :nomap], *BIG5_HKSCS_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||||
|
|
||||||
transcode_tblgen "Big5-UAO", "UTF-8", [["{00-7f}", :nomap], *BIG5_UAO_TO_UCS_TBL]
|
transcode_tblgen "Big5-UAO", "UTF-8", [["{00-7f}", :nomap], *BIG5_UAO_TO_UCS_TBL], ValidEncoding('Big5')
|
||||||
transcode_tblgen "UTF-8", "Big5-UAO", [["{00-7f}", :nomap], *BIG5_UAO_TO_UCS_TBL.map {|a,b| [b,a] }]
|
transcode_tblgen "UTF-8", "Big5-UAO", [["{00-7f}", :nomap], *BIG5_UAO_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||||
|
|
||||||
%>
|
%>
|
||||||
|
@ -22,4 +22,4 @@ void
|
||||||
Init_big5(void)
|
Init_big5(void)
|
||||||
{
|
{
|
||||||
<%= transcode_register_code %>
|
<%= transcode_register_code %>
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
#include "transcode_data.h"
|
#include "transcode_data.h"
|
||||||
|
|
||||||
<%
|
<%
|
||||||
gb2312_valid_byte_pattern = ValidEncoding['EUC-KR']
|
gb2312_valid_byte_pattern = ValidEncoding('EUC-KR')
|
||||||
gb12345_valid_byte_pattern = ValidEncoding['EUC-KR']
|
gb12345_valid_byte_pattern = ValidEncoding('EUC-KR')
|
||||||
|
|
||||||
transcode_tblgen "GB2312", "UTF-8",
|
transcode_tblgen "GB2312", "UTF-8",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
to_encoding = (to_company == "Unicode") ? "UTF-8" : "UTF8-"+to_company
|
to_encoding = (to_company == "Unicode") ? "UTF-8" : "UTF8-"+to_company
|
||||||
|
|
||||||
strict_mapping = EMOJI_EXCHANGE_TBL[from_encoding][to_encoding]
|
strict_mapping = EMOJI_EXCHANGE_TBL[from_encoding][to_encoding]
|
||||||
transcode_tblgen(from_encoding, to_encoding, strict_mapping + nomap_table)
|
transcode_tblgen(from_encoding, to_encoding, strict_mapping + nomap_table, nil)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
%>
|
%>
|
||||||
|
|
|
@ -32,7 +32,8 @@
|
||||||
CP932VDC:NEC_IBM/UCS
|
CP932VDC:NEC_IBM/UCS
|
||||||
CP932VDC:IBM/UCS
|
CP932VDC:IBM/UCS
|
||||||
CP932UDA/UCS
|
CP932UDA/UCS
|
||||||
JISX0208:MS/UCS).join(","))
|
JISX0208:MS/UCS).join(",")),
|
||||||
|
nil
|
||||||
|
|
||||||
# mapping from UTF8-KDDI to stateless-ISO-2022-JP-KDDI
|
# mapping from UTF8-KDDI to stateless-ISO-2022-JP-KDDI
|
||||||
transcode_tblgen "UTF8-KDDI", "stateless-ISO-2022-JP-KDDI",
|
transcode_tblgen "UTF8-KDDI", "stateless-ISO-2022-JP-KDDI",
|
||||||
|
@ -43,7 +44,8 @@
|
||||||
UCS/CP932VDC:NEC_IBM
|
UCS/CP932VDC:NEC_IBM
|
||||||
UCS/CP932VDC:IBM
|
UCS/CP932VDC:IBM
|
||||||
UCS/CP932UDA
|
UCS/CP932UDA
|
||||||
UCS/JISX0208:MS).join(","))
|
UCS/JISX0208:MS).join(",")),
|
||||||
|
ValidEncoding('UTF-8')
|
||||||
%>
|
%>
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
|
@ -9,7 +9,8 @@
|
||||||
CP932VDC:NEC_IBM/UCS
|
CP932VDC:NEC_IBM/UCS
|
||||||
CP932VDC:IBM/UCS
|
CP932VDC:IBM/UCS
|
||||||
CP932UDA/UCS
|
CP932UDA/UCS
|
||||||
JISX0208:MS/UCS).join(","))
|
JISX0208:MS/UCS).join(",")),
|
||||||
|
ValidEncoding('Shift_JIS')
|
||||||
|
|
||||||
transcode_tblgen "UTF8-DOCOMO", "SJIS-DOCOMO",
|
transcode_tblgen "UTF8-DOCOMO", "SJIS-DOCOMO",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
|
@ -19,7 +20,8 @@
|
||||||
UCS/CP932VDC:NEC_IBM
|
UCS/CP932VDC:NEC_IBM
|
||||||
UCS/CP932VDC:IBM
|
UCS/CP932VDC:IBM
|
||||||
UCS/CP932UDA
|
UCS/CP932UDA
|
||||||
UCS/JISX0208:MS).join(","))
|
UCS/JISX0208:MS).join(",")),
|
||||||
|
ValidEncoding('UTF-8')
|
||||||
%>
|
%>
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
|
@ -9,7 +9,8 @@
|
||||||
CP932VDC:NEC_IBM/UCS
|
CP932VDC:NEC_IBM/UCS
|
||||||
CP932VDC:IBM/UCS
|
CP932VDC:IBM/UCS
|
||||||
CP932UDA/UCS
|
CP932UDA/UCS
|
||||||
JISX0208:MS/UCS).join(","))
|
JISX0208:MS/UCS).join(",")),
|
||||||
|
ValidEncoding('Shift_JIS')
|
||||||
|
|
||||||
transcode_tblgen "UTF8-KDDI", "SJIS-KDDI",
|
transcode_tblgen "UTF8-KDDI", "SJIS-KDDI",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
|
@ -20,7 +21,8 @@
|
||||||
UCS/CP932VDC:NEC_IBM
|
UCS/CP932VDC:NEC_IBM
|
||||||
UCS/CP932VDC:IBM
|
UCS/CP932VDC:IBM
|
||||||
UCS/CP932UDA
|
UCS/CP932UDA
|
||||||
UCS/JISX0208:MS).join(","))
|
UCS/JISX0208:MS).join(",")),
|
||||||
|
ValidEncoding('UTF-8')
|
||||||
%>
|
%>
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
|
@ -9,7 +9,8 @@
|
||||||
CP932VDC:NEC_IBM/UCS
|
CP932VDC:NEC_IBM/UCS
|
||||||
CP932VDC:IBM/UCS
|
CP932VDC:IBM/UCS
|
||||||
CP932UDA/UCS
|
CP932UDA/UCS
|
||||||
JISX0208:MS/UCS).join(","))
|
JISX0208:MS/UCS).join(",")),
|
||||||
|
ValidEncoding('Shift_JIS')
|
||||||
|
|
||||||
transcode_tblgen "UTF8-SoftBank", "SJIS-SoftBank",
|
transcode_tblgen "UTF8-SoftBank", "SJIS-SoftBank",
|
||||||
[["{00-7f}", :nomap]] +
|
[["{00-7f}", :nomap]] +
|
||||||
|
@ -19,7 +20,8 @@
|
||||||
UCS/CP932VDC:NEC_IBM
|
UCS/CP932VDC:NEC_IBM
|
||||||
UCS/CP932VDC:IBM
|
UCS/CP932VDC:IBM
|
||||||
UCS/CP932UDA
|
UCS/CP932UDA
|
||||||
UCS/JISX0208:MS).join(","))
|
UCS/JISX0208:MS).join(",")),
|
||||||
|
ValidEncoding('UTF-8')
|
||||||
%>
|
%>
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
|
@ -8,14 +8,14 @@
|
||||||
transcode_tblgen("", "amp_escape", [
|
transcode_tblgen("", "amp_escape", [
|
||||||
["{00-25,27-FF}", :nomap],
|
["{00-25,27-FF}", :nomap],
|
||||||
["26", hexstr("&")]
|
["26", hexstr("&")]
|
||||||
])
|
], nil)
|
||||||
|
|
||||||
transcode_tblgen("", "xml_text_escape", [
|
transcode_tblgen("", "xml_text_escape", [
|
||||||
["{00-25,27-3B,3D,3F-FF}", :nomap],
|
["{00-25,27-3B,3D,3F-FF}", :nomap],
|
||||||
["26", hexstr("&")],
|
["26", hexstr("&")],
|
||||||
["3C", hexstr("<")],
|
["3C", hexstr("<")],
|
||||||
["3E", hexstr(">")]
|
["3E", hexstr(">")]
|
||||||
])
|
], nil)
|
||||||
|
|
||||||
transcode_tblgen("", "xml_attr_content_escape", [
|
transcode_tblgen("", "xml_attr_content_escape", [
|
||||||
["{00-21,23-25,27-3B,3D,3F-FF}", :nomap],
|
["{00-21,23-25,27-3B,3D,3F-FF}", :nomap],
|
||||||
|
@ -23,7 +23,7 @@
|
||||||
["26", hexstr("&")],
|
["26", hexstr("&")],
|
||||||
["3C", hexstr("<")],
|
["3C", hexstr("<")],
|
||||||
["3E", hexstr(">")]
|
["3E", hexstr(">")]
|
||||||
])
|
], nil)
|
||||||
|
|
||||||
map_xml_attr_quote = {}
|
map_xml_attr_quote = {}
|
||||||
map_xml_attr_quote["{00-FF}"] = :func_so
|
map_xml_attr_quote["{00-FF}"] = :func_so
|
||||||
|
|
|
@ -758,13 +758,17 @@ def encode_utf8(map)
|
||||||
r
|
r
|
||||||
end
|
end
|
||||||
|
|
||||||
def transcode_compile_tree(name, from, map, valid_encoding=nil)
|
UnspecifiedValidEncoding = Object.new
|
||||||
|
|
||||||
|
def transcode_compile_tree(name, from, map, valid_encoding)
|
||||||
map = encode_utf8(map)
|
map = encode_utf8(map)
|
||||||
h = {}
|
h = {}
|
||||||
map.each {|k, v|
|
map.each {|k, v|
|
||||||
h[k] = v unless h[k] # use first mapping
|
h[k] = v unless h[k] # use first mapping
|
||||||
}
|
}
|
||||||
valid_encoding = ValidEncoding[from] if valid_encoding == nil
|
if valid_encoding.equal? UnspecifiedValidEncoding
|
||||||
|
valid_encoding = ValidEncoding.fetch(from)
|
||||||
|
end
|
||||||
if valid_encoding
|
if valid_encoding
|
||||||
am = ActionMap.merge2(h, {valid_encoding => :undef}) {|prefix, as1, as2|
|
am = ActionMap.merge2(h, {valid_encoding => :undef}) {|prefix, as1, as2|
|
||||||
a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
|
a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
|
||||||
|
@ -787,7 +791,7 @@ end
|
||||||
TRANSCODERS = []
|
TRANSCODERS = []
|
||||||
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
|
||||||
|
|
||||||
def transcode_tbl_only(from, to, map, valid_encoding=nil)
|
def transcode_tbl_only(from, to, map, valid_encoding=UnspecifiedValidEncoding)
|
||||||
if VERBOSE_MODE
|
if VERBOSE_MODE
|
||||||
if from.empty? || to.empty?
|
if from.empty? || to.empty?
|
||||||
STDERR.puts "converter for #{from.empty? ? to : from}"
|
STDERR.puts "converter for #{from.empty? ? to : from}"
|
||||||
|
@ -808,7 +812,7 @@ def transcode_tbl_only(from, to, map, valid_encoding=nil)
|
||||||
return map, tree_name, real_tree_name, max_input
|
return map, tree_name, real_tree_name, max_input
|
||||||
end
|
end
|
||||||
|
|
||||||
def transcode_tblgen(from, to, map, valid_encoding=nil)
|
def transcode_tblgen(from, to, map, valid_encoding=UnspecifiedValidEncoding)
|
||||||
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
|
map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
|
||||||
transcoder_name = "rb_#{tree_name}"
|
transcoder_name = "rb_#{tree_name}"
|
||||||
TRANSCODERS << transcoder_name
|
TRANSCODERS << transcoder_name
|
||||||
|
@ -912,6 +916,10 @@ ValidEncoding = {
|
||||||
{81-fe}{30-39}{81-fe}{30-39}',
|
{81-fe}{30-39}{81-fe}{30-39}',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def ValidEncoding(enc)
|
||||||
|
ValidEncoding.fetch(enc)
|
||||||
|
end
|
||||||
|
|
||||||
def set_valid_byte_pattern(encoding, pattern_or_label)
|
def set_valid_byte_pattern(encoding, pattern_or_label)
|
||||||
pattern =
|
pattern =
|
||||||
if ValidEncoding[pattern_or_label]
|
if ValidEncoding[pattern_or_label]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue