1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* tool/transcode-tblgen.rb (transcode_generated_code): defined for

generating table at once.
  (transcode_tblgen): returns an empty string.
  (transcode_generate_node): ditto.

* enc/trans/newline.trans: use transcode_generated_code.

* enc/trans/iso2022.trans: ditto.

* enc/trans/single_byte.trans: ditto.

* enc/trans/utf_16_32.trans: ditto.

* enc/trans/japanese.trans: ditto.

* enc/trans/korean.trans: ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19006 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-31 17:35:00 +00:00
parent 6b2efe107f
commit 8841969485
8 changed files with 152 additions and 125 deletions

View file

@ -1,3 +1,22 @@
Mon Sep 1 02:31:16 2008 Tanaka Akira <akr@fsij.org>
* tool/transcode-tblgen.rb (transcode_generated_code): defined for
generating table at once.
(transcode_tblgen): returns an empty string.
(transcode_generate_node): ditto.
* enc/trans/newline.trans: use transcode_generated_code.
* enc/trans/iso2022.trans: ditto.
* enc/trans/single_byte.trans: ditto.
* enc/trans/utf_16_32.trans: ditto.
* enc/trans/japanese.trans: ditto.
* enc/trans/korean.trans: ditto.
Mon Sep 1 02:10:03 2008 Tanaka Akira <akr@fsij.org>
* tool/transcode-tblgen.rb (citrus_decode_mapsrc): print logging

View file

@ -10,10 +10,22 @@
map_jisx0208_rest = {}
map_jisx0208_rest["{21-7e}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp")
transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest")
map_eucjp = {
"{0e,0f,1b}" => :undef,
"{00-0d,10-1a,1c-7f}" => :func_so,
"{a1-fe}{a1-fe}" => :func_so,
"8e{a1-fe}" => :undef,
"8f{a1-fe}{a1-fe}" => :undef,
}
transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp")
%>
<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %>
<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %>
<%= transcode_generated_code %>
static VALUE
fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
@ -65,18 +77,6 @@ rb_ISO_2022_JP_to_EUC_JP = {
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
};
<%
map_eucjp = {
"{0e,0f,1b}" => :undef,
"{00-0d,10-1a,1c-7f}" => :func_so,
"{a1-fe}{a1-fe}" => :func_so,
"8e{a1-fe}" => :undef,
"8f{a1-fe}{a1-fe}" => :undef,
}
%>
<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %>
static int
fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o)
{

View file

@ -1,64 +1,68 @@
#include "transcode_data.h"
<%= transcode_tblgen "Shift_JIS", "UTF-8", [
<%
transcode_tblgen "Shift_JIS", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("mskanji", 2, "JISX0208:1990/UCS"),
] %>
<%= transcode_tblgen "Windows-31J", "UTF-8", [
]
transcode_tblgen "Windows-31J", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("mskanji", 2,
"JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,CP932VDC:IBM/UCS,CP932UDA/UCS,JISX0208:MS/UCS"),
] %>
]
<%= transcode_tblgen "UTF-8", "Shift_JIS", [
transcode_tblgen "UTF-8", "Shift_JIS", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("mskanji", 2, "UCS/JISX0208:1990"),
] %>
<%= transcode_tblgen "UTF-8", "Windows-31J", [
]
transcode_tblgen "UTF-8", "Windows-31J", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("mskanji", 1, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("mskanji", 2,
"UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/CP932VDC:IBM,UCS/CP932UDA,UCS/JISX0208:MS"),
] %>
]
<%= transcode_tblgen "EUC-JP", "UTF-8", [
transcode_tblgen "EUC-JP", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("euc", 0x8000, "JISX0212/UCS"),
] %>
<%= transcode_tblgen "EUC-JP-MS", "UTF-8", [
]
transcode_tblgen "EUC-JP-MS", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,JISX0208UDC/UCS,JISX0208:MS/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
*citrus_decode_mapsrc("euc", 0x8000, "JISX0212VDC:IBM/UCS,JISX0212UDC/UCS,JISX0212:MS/UCS"),
] %>
<%= transcode_tblgen "CP51932", "UTF-8", [
]
transcode_tblgen "CP51932", "UTF-8", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS"),
*citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS"),
] %>
]
<%= transcode_tblgen "UTF-8", "EUC-JP", [
transcode_tblgen "UTF-8", "EUC-JP", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212"),
] %>
<%= transcode_tblgen "UTF-8", "EUC-JP-MS", [
]
transcode_tblgen "UTF-8", "EUC-JP-MS", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/JISX0208UDC,UCS/JISX0208:MS"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
*citrus_decode_mapsrc("euc", 0x8000, "UCS/JISX0212VDC:IBM,UCS/JISX0212UDC,UCS/JISX0212:MS"),
] %>
<%= transcode_tblgen "UTF-8", "CP51932", [
]
transcode_tblgen "UTF-8", "CP51932", [
["{00-7f}", :nomap],
*citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM,UCS/JISX0208:MS"),
*citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA"),
] %>
]
%>
<%= transcode_generated_code %>
void
Init_japanese(void)

View file

@ -3,12 +3,14 @@
<%
require "euckr-tbl"
require "cp949-tbl"
transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL]
transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL]
transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL]
transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL]
%>
<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %>
<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %>
<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %>
<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %>
<%= transcode_generated_code %>
void
Init_korean(void)

View file

@ -3,9 +3,23 @@
<%
map_normalize = {}
map_normalize["{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline")
map_crlf = {}
map_crlf["{00-09,0b-ff}"] = :nomap
map_crlf["0a"] = "0d0a"
transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline")
map_cr = {}
map_cr["{00-09,0b-ff}"] = :nomap
map_cr["0a"] = "0d"
transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
%>
<%= transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") %>
<%= transcode_generated_code %>
static int
fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
@ -48,14 +62,6 @@ rb_universal_newline = {
NULL, NULL, NULL, fun_so_universal_newline
};
<%
map_crlf = {}
map_crlf["{00-09,0b-ff}"] = :nomap
map_crlf["0a"] = "0d0a"
%>
<%= transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") %>
static const rb_transcoder
rb_crlf_newline = {
"", "crlf_newline", &crlf_newline,
@ -66,14 +72,6 @@ rb_crlf_newline = {
NULL, NULL, NULL, NULL
};
<%
map_cr = {}
map_cr["{00-09,0b-ff}"] = :nomap
map_cr["0a"] = "0d"
%>
<%= transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") %>
static const rb_transcoder
rb_cr_newline = {
"", "cr_newline", &cr_newline,

View file

@ -20,14 +20,11 @@
require 'iso-8859-14-tbl'
require 'iso-8859-15-tbl'
%>
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map
<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %>
<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %>
<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %>
<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %>
<%
def transcode_tblgen_iso8859(name, tbl_to_ucs)
tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs
name_ident = name.tr('-','_')
@ -37,22 +34,24 @@
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
code
end
transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
%>
<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %>
<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %>
<%= transcode_generated_code %>
void
Init_single_byte(void)

View file

@ -1,5 +1,42 @@
#include "transcode_data.h"
<%
map = {}
map["{00-ff}{00-d7,e0-ff}0000"] = :func_so
map["{00-ff}{00-ff}{01-10}00"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE")
map = {}
map["{00-d7,e0-ff}{00-ff}"] = :func_so
map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE")
map = {}
map["{00-7f}"] = :func_so
map["{c2-df}{80-bf}"] = :func_so
map["e0{a0-bf}{80-bf}"] = :func_so
map["{e1-ec}{80-bf}{80-bf}"] = :func_so
map["ed{80-9f}{80-bf}"] = :func_so
map["{ee-ef}{80-bf}{80-bf}"] = :func_so
map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
am = ActionMap.parse(map)
transcode_generate_node(am, "to_UTF_16BE")
map = {}
map["{00-ff}{00-d7,e0-ff}"] = :func_so
map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE")
map = {}
map["0000{00-d7,e0-ff}{00-ff}"] = :func_so
map["00{01-10}{00-ff}{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
%>
<%= transcode_generated_code %>
static int
fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
{
@ -222,13 +259,6 @@ fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
return 4;
}
<%=
map = {}
map["{00-d7,e0-ff}{00-ff}"] = :func_so
map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE")
%>
static const rb_transcoder
rb_from_UTF_16BE = {
"UTF-16BE", "UTF-8", &from_UTF_16BE,
@ -239,21 +269,6 @@ rb_from_UTF_16BE = {
NULL, NULL, NULL, &fun_so_from_utf_16be
};
<%=
map = {}
map["{00-7f}"] = :func_so
map["{c2-df}{80-bf}"] = :func_so
map["e0{a0-bf}{80-bf}"] = :func_so
map["{e1-ec}{80-bf}{80-bf}"] = :func_so
map["ed{80-9f}{80-bf}"] = :func_so
map["{ee-ef}{80-bf}{80-bf}"] = :func_so
map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
am = ActionMap.parse(map)
transcode_generate_node(am, "to_UTF_16BE")
%>
static const rb_transcoder
rb_to_UTF_16BE = {
"UTF-8", "UTF-16BE", &to_UTF_16BE,
@ -264,13 +279,6 @@ rb_to_UTF_16BE = {
NULL, NULL, NULL, &fun_so_to_utf_16be
};
<%=
map = {}
map["{00-ff}{00-d7,e0-ff}"] = :func_so
map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE")
%>
static const rb_transcoder
rb_from_UTF_16LE = {
"UTF-16LE", "UTF-8", &from_UTF_16LE,
@ -291,13 +299,6 @@ rb_to_UTF_16LE = {
NULL, NULL, NULL, &fun_so_to_utf_16le
};
<%=
map = {}
map["0000{00-d7,e0-ff}{00-ff}"] = :func_so
map["00{01-10}{00-ff}{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE")
%>
static const rb_transcoder
rb_from_UTF_32BE = {
"UTF-32BE", "UTF-8", &from_UTF_32BE,
@ -318,13 +319,6 @@ rb_to_UTF_32BE = {
NULL, NULL, NULL, &fun_so_to_utf_32be
};
<%=
map = {}
map["{00-ff}{00-d7,e0-ff}0000"] = :func_so
map["{00-ff}{00-ff}{01-10}00"] = :func_so
transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE")
%>
static const rb_transcoder
rb_from_UTF_32LE = {
"UTF-32LE", "UTF-8", &from_UTF_32LE,

View file

@ -389,6 +389,12 @@ End
code << generate_lookup_node(name_hint, table)
name_hint
end
def gennode(name_hint=nil, valid_encoding=nil)
code = ''
name = generate_node(code, name_hint, valid_encoding)
return name, code
end
end
def citrus_mskanji_cstomb(csid, index)
@ -529,12 +535,12 @@ def transcode_compile_tree(name, from, map)
valid_encoding = nil
end
code = ''
defined_name = am.generate_node(code, name, valid_encoding)
defined_name, code = am.gennode(name, valid_encoding)
return defined_name, code, max_input
end
TRANSCODERS = []
TRANSCODE_GENERATED_CODE = ''
def transcode_tblgen(from, to, map)
STDERR.puts "converter from #{from} to #{to}" if VERBOSE_MODE
@ -565,14 +571,19 @@ static const rb_transcoder
NULL, NULL, NULL
};
End
tree_code + "\n" + transcoder_code
TRANSCODE_GENERATED_CODE << tree_code + "\n" + transcoder_code
''
end
def transcode_generate_node(am, name_hint=nil)
STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE
code = ''
am.generate_node(code, name_hint)
code
name, code = am.gennode(name_hint)
TRANSCODE_GENERATED_CODE << code
''
end
def transcode_generated_code
TRANSCODE_GENERATED_CODE
end
def transcode_register_code