mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/trans/single_byte.trans: refactoring to make it easier
to add more transcodings (with Yoshihiro Kambayashi) * enc/trans/iso-8859-1-tbl.rb: new file to avoid having to treat ISO-8859-1 as special git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20054 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
5cdd7f52cc
commit
6fd14ccae5
3 changed files with 143 additions and 54 deletions
|
@ -1,3 +1,11 @@
|
|||
Thu Oct 30 14:45:45 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/single_byte.trans: refactoring to make it easier
|
||||
to add more transcodings (with Yoshihiro Kambayashi)
|
||||
|
||||
* enc/trans/iso-8859-1-tbl.rb: new file to avoid having to
|
||||
treat ISO-8859-1 as special
|
||||
|
||||
Thu Oct 30 10:34:20 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* ruby.c (moreswitches): splits option string and passes arguments.
|
||||
|
|
98
enc/trans/iso-8859-1-tbl.rb
Normal file
98
enc/trans/iso-8859-1-tbl.rb
Normal file
|
@ -0,0 +1,98 @@
|
|||
ISO_8859_1_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A5",0xA5],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0xA9],
|
||||
["AA",0xAA],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0xB4],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0xB8],
|
||||
["B9",0xB9],
|
||||
["BA",0xBA],
|
||||
["BB",0xBB],
|
||||
["BC",0xBC],
|
||||
["BD",0xBD],
|
||||
["BE",0xBE],
|
||||
["BF",0xBF],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0xD0],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0xD8],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0xDE],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0xF0],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0xF8],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0xFE],
|
||||
["FF",0xFF],
|
||||
]
|
|
@ -3,38 +3,25 @@
|
|||
<%
|
||||
us_ascii_map = [["{00-7f}", :nomap]]
|
||||
|
||||
ISO_8859_1_TO_UCS_TBL = (0x80..0xff).map {|c| ["%02X" % c, c] }
|
||||
CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] }
|
||||
|
||||
require 'iso-8859-2-tbl'
|
||||
require 'iso-8859-3-tbl'
|
||||
require 'iso-8859-4-tbl'
|
||||
require 'iso-8859-5-tbl'
|
||||
require 'iso-8859-6-tbl'
|
||||
require 'iso-8859-7-tbl'
|
||||
require 'iso-8859-8-tbl'
|
||||
require 'iso-8859-9-tbl'
|
||||
require 'iso-8859-10-tbl'
|
||||
require 'iso-8859-11-tbl'
|
||||
require 'iso-8859-13-tbl'
|
||||
require 'iso-8859-14-tbl'
|
||||
require 'iso-8859-15-tbl'
|
||||
require 'windows-874-tbl'
|
||||
require 'windows-1250-tbl'
|
||||
require 'windows-1251-tbl'
|
||||
require 'windows-1252-tbl'
|
||||
require 'windows-1253-tbl'
|
||||
require 'windows-1254-tbl'
|
||||
require 'windows-1255-tbl'
|
||||
require 'windows-1256-tbl'
|
||||
require 'windows-1257-tbl'
|
||||
|
||||
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
|
||||
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
|
||||
transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map
|
||||
transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map
|
||||
|
||||
def transcode_tblgen_singlebyte(name, tbl_to_ucs)
|
||||
CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] }
|
||||
|
||||
# Generate transcoding tables for single byte encoding from
|
||||
# encoding name using table file.
|
||||
#
|
||||
# Conventions:
|
||||
# name: encoding name as string, UPPER case, hyphens (e.g. 'ISO-8859-3')
|
||||
# file name: lower case, hyphens, -tbl.rb suffix (e.g. iso-8859-3-tbl.rb)
|
||||
# variable name: UPPER case, underscores, _TO_UCS_TBL suffix (e.g. ISO_8859_3_TO_UCS_TBL)
|
||||
# If the name starts with "ISO-8859", the C1 control code area is added automatically.
|
||||
def transcode_tblgen_singlebyte (name)
|
||||
require(name.downcase + "-tbl")
|
||||
control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
|
||||
tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
|
||||
set_valid_byte_pattern(name, '1byte')
|
||||
code = ''
|
||||
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs])
|
||||
|
@ -43,33 +30,29 @@
|
|||
code
|
||||
end
|
||||
|
||||
def transcode_tblgen_iso8859(name, tbl_to_ucs)
|
||||
transcode_tblgen_singlebyte(name, CONTROL1_TO_UCS_TBL + tbl_to_ucs)
|
||||
end
|
||||
|
||||
transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-874", WINDOWS_874_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1250", WINDOWS_1250_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1251", WINDOWS_1251_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1253", WINDOWS_1253_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1254", WINDOWS_1254_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1255", WINDOWS_1255_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1256", WINDOWS_1256_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte("WINDOWS-1257", WINDOWS_1257_TO_UCS_TBL)
|
||||
transcode_tblgen_singlebyte "ISO-8859-1"
|
||||
transcode_tblgen_singlebyte "ISO-8859-2"
|
||||
transcode_tblgen_singlebyte "ISO-8859-3"
|
||||
transcode_tblgen_singlebyte "ISO-8859-4"
|
||||
transcode_tblgen_singlebyte "ISO-8859-5"
|
||||
transcode_tblgen_singlebyte "ISO-8859-6"
|
||||
transcode_tblgen_singlebyte "ISO-8859-7"
|
||||
transcode_tblgen_singlebyte "ISO-8859-8"
|
||||
transcode_tblgen_singlebyte "ISO-8859-9"
|
||||
transcode_tblgen_singlebyte "ISO-8859-10"
|
||||
transcode_tblgen_singlebyte "ISO-8859-11"
|
||||
transcode_tblgen_singlebyte "ISO-8859-13"
|
||||
transcode_tblgen_singlebyte "ISO-8859-14"
|
||||
transcode_tblgen_singlebyte "ISO-8859-15"
|
||||
transcode_tblgen_singlebyte "WINDOWS-874"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1250"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1251"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1252"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1253"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1254"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1255"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1256"
|
||||
transcode_tblgen_singlebyte "WINDOWS-1257"
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
|
Loading…
Reference in a new issue