1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* enc/euc_jp.c: added EUC-JP-2004 and its alias EUC-JISX0213.

[ruby-dev:45571] [Feature #6349]
  Requested by Kyouhei Yanagita <yanagi@shakenbu.org>.

* enc/trans/japanese_euc.trans: ditto.

* enc/trans/JIS/JISX0213-[12]%UCS@{BMP,SIP}.src: JIS X 0213:2004 ->
  Unicode mapping table from NetBSD.

* enc/trans/JIS/UCS@{BMP,SIP}%JISX0213-[12].src: Unicode -> JIX X
  0213:2004 mapping table from NetBSD.

* tool/transcode-tblgen.rb: added SIP support.

* test/ruby/test_transcode.rb: tests of above changes.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@35460 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
usa 2012-04-24 11:14:18 +00:00
parent 66d247bcb5
commit 756ffef448
13 changed files with 9084 additions and 4 deletions

View file

@ -1,3 +1,21 @@
Tue Apr 24 19:59:31 2012 NAKAMURA Usaku <usa@ruby-lang.org>
* enc/euc_jp.c: added EUC-JP-2004 and its alias EUC-JISX0213.
[ruby-dev:45571] [Feature #6349]
Requested by Kyouhei Yanagita <yanagi@shakenbu.org>.
* enc/trans/japanese_euc.trans: ditto.
* enc/trans/JIS/JISX0213-[12]%UCS@{BMP,SIP}.src: JIS X 0213:2004 ->
Unicode mapping table from NetBSD.
* enc/trans/JIS/UCS@{BMP,SIP}%JISX0213-[12].src: Unicode -> JIX X
0213:2004 mapping table from NetBSD.
* tool/transcode-tblgen.rb: added SIP support.
* test/ruby/test_transcode.rb: tests of above changes.
Tue Apr 24 18:12:13 2012 Koichi Sasada <ko1@atdot.net>
* compile.c: fix to output warning when the same literals

View file

@ -635,3 +635,10 @@ ENC_ALIAS("euc-jp-ms", "eucJP-ms")
* Link: http://msyk.at.webry.info/200511/article_2.html
*/
ENC_REPLICATE("CP51932", "EUC-JP")
/*
* Name: EUC-JP-2004
* Link: http://ja.wikipedia.org/wiki/EUC-JP-2004
*/
ENC_REPLICATE("EUC-JP-2004", "EUC-JP") /* defined at JIS X 0213:2004 */
ENC_ALIAS("EUC-JISX0213", "EUC-JP-2004") /* defined at JIS X 0213:2000, and obsolete at JIS X 0213:2004 */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,60 @@
# $NetBSD: JISX0213-1%UCS@SIP.src,v 1.1 2007/03/05 16:58:33 tnozaki Exp $
TYPE ROWCOL
NAME "JISX0213-1/UCS:SIP"
SRC_ZONE 0x21-0x7E / 0x21-0x7E / 8
OOB_MODE INVALID
DST_INVALID 0xFFFE
DST_UNIT_BITS 16
BEGIN_MAP
## JIS X 0213:2004 vs Unicode mapping table
##
## Date: 22 May 2006
## License:
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
## Copyright (C) 2001 I'O, All Rights Reserved.
## Copyright (C) 2006 Project X0213, All Rights Reserved.
## You can use, modify, distribute this table freely.
## Note:
## 3-XXXX JIS X 0213:2004 plane 1 (GL encoding)
## 4-XXXX JIS X 0213:2000 plane 2 (GL encoding)
## [1983] JIS codepoint defined by JIS X 0208-1983
## [1990] JIS codepoint defined by JIS X 0208-1990
## [2000] JIS codepoint defined by JIS X 0213:2000
## [2004] JIS codepoint defined by JIS X 0213:2004
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
## Fullwidth UCS fullwidth form (U+Fxxx)
## Windows Windows (CP932) mapping
## Some 0213 character can't represent by one UCS character.
## In this table, such characters are described as 'U+xxxx+xxxx'.
##
## JIS Unicode Name Note
0x2E22 = 0x000B
0x2F42 = 0x123D
0x2F4C = 0x131B
0x2F60 = 0x146E
0x2F7B = 0x18BD
0x4F54 = 0x0B9F
0x4F63 = 0x16B4
0x4F6E = 0x1E34
0x753A = 0x31C4
0x7572 = 0x35C4
0x7629 = 0x373F
0x7632 = 0x3763
0x7660 = 0x3CFE
0x776C = 0x47F1
0x787E = 0x548E
0x7929 = 0x550E
0x7947 = 0x5771
0x7954 = 0x59C4
0x796E = 0x5DA1
0x7A5D = 0x6AFF
0x7B33 = 0x6E40
0x7B49 = 0x70F4
0x7B6C = 0x7684
0x7C49 = 0x8277
0x7C51 = 0x83CD
0x7E66 = 0xA190
END_MAP

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,311 @@
# $NetBSD: JISX0213-2%UCS@SIP.src,v 1.1 2007/03/05 16:58:33 tnozaki Exp $
TYPE ROWCOL
NAME "JISX0213-2/UCS:SIP"
SRC_ZONE 0x21-0x7E / 0x21-0x7E / 8
OOB_MODE INVALID
DST_INVALID 0xFFFE
DST_UNIT_BITS 16
BEGIN_MAP
## JIS X 0213:2004 vs Unicode mapping table
##
## Date: 22 May 2006
## License:
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
## Copyright (C) 2001 I'O, All Rights Reserved.
## Copyright (C) 2006 Project X0213, All Rights Reserved.
## You can use, modify, distribute this table freely.
## Note:
## 3-XXXX JIS X 0213:2004 plane 1 (GL encoding)
## 4-XXXX JIS X 0213:2000 plane 2 (GL encoding)
## [1983] JIS codepoint defined by JIS X 0208-1983
## [1990] JIS codepoint defined by JIS X 0208-1990
## [2000] JIS codepoint defined by JIS X 0213:2000
## [2004] JIS codepoint defined by JIS X 0213:2004
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
## Fullwidth UCS fullwidth form (U+Fxxx)
## Windows Windows (CP932) mapping
## Some 0213 character can't represent by one UCS character.
## In this table, such characters are described as 'U+xxxx+xxxx'.
##
## JIS Unicode Name Note
0x2121 = 0x0089
0x212B = 0x00A2
0x212E = 0x00A4
0x2136 = 0x01A2
0x2146 = 0x0213
0x2170 = 0x032B
0x2177 = 0x0381
0x2179 = 0x0371
0x2322 = 0x03F9
0x2325 = 0x044A
0x2327 = 0x0509
0x2331 = 0x05D6
0x2332 = 0x0628
0x2338 = 0x074F
0x233F = 0x0807
0x2341 = 0x083A
0x234A = 0x08B9
0x2352 = 0x097C
0x2353 = 0x099D
0x2359 = 0x0AD3
0x235C = 0x0B1D
0x2377 = 0x0D45
0x242A = 0x0DE1
0x2431 = 0x0E95
0x2432 = 0x0E6D
0x243A = 0x0E64
0x243D = 0x0F5F
0x2459 = 0x1201
0x245C = 0x1255
0x245E = 0x127B
0x2463 = 0x1274
0x246A = 0x12E4
0x246B = 0x12D7
0x2472 = 0x12FD
0x2474 = 0x1336
0x2475 = 0x1344
0x2525 = 0x13C4
0x2532 = 0x146D
0x253E = 0x15D7
0x2544 = 0x6C29
0x2547 = 0x1647
0x2555 = 0x1706
0x2556 = 0x1742
0x257E = 0x19C3
0x2830 = 0x1C56
0x2837 = 0x1D2D
0x2838 = 0x1D45
0x283A = 0x1D78
0x283B = 0x1D62
0x283F = 0x1DA1
0x2840 = 0x1D9C
0x2845 = 0x1D92
0x2848 = 0x1DB7
0x284A = 0x1DE0
0x284B = 0x1E33
0x285B = 0x1F1E
0x2866 = 0x1F76
0x286C = 0x1FFA
0x2C22 = 0x217B
0x2C2B = 0x231E
0x2C30 = 0x23AD
0x2C50 = 0x26F3
0x2C65 = 0x285B
0x2C6D = 0x28AB
0x2C72 = 0x298F
0x2D24 = 0x2AB8
0x2D29 = 0x2B4F
0x2D2A = 0x2B50
0x2D32 = 0x2B46
0x2D34 = 0x2C1D
0x2D35 = 0x2BA6
0x2D39 = 0x2C24
0x2D56 = 0x2DE1
0x2D7D = 0x31C3
0x2E23 = 0x31F5
0x2E24 = 0x31B6
0x2E3A = 0x3372
0x2E3C = 0x33D3
0x2E3D = 0x33D2
0x2E42 = 0x33D0
0x2E43 = 0x33E4
0x2E44 = 0x33D5
0x2E47 = 0x33DA
0x2E49 = 0x33DF
0x2E55 = 0x344A
0x2E56 = 0x3451
0x2E57 = 0x344B
0x2E5B = 0x3465
0x2E77 = 0x34E4
0x2E78 = 0x355A
0x2F2A = 0x3594
0x2F3F = 0x3639
0x2F40 = 0x3647
0x2F42 = 0x3638
0x2F43 = 0x363A
0x2F4E = 0x371C
0x2F59 = 0x370C
0x2F61 = 0x3764
0x2F69 = 0x37FF
0x2F6A = 0x37E7
0x2F70 = 0x3824
0x2F75 = 0x383D
0x6E23 = 0x3A98
0x6E34 = 0x3C7F
0x6E49 = 0x3D00
0x6E5C = 0x3D40
0x6E5E = 0x3DFA
0x6E5F = 0x3DF9
0x6E60 = 0x3DD3
0x6F32 = 0x3F7E
0x6F47 = 0x4096
0x6F4D = 0x4103
0x6F61 = 0x41C6
0x6F64 = 0x41FE
0x7022 = 0x43BC
0x7033 = 0x4629
0x7039 = 0x46A5
0x7053 = 0x4896
0x707B = 0x4A4D
0x712E = 0x4B56
0x7130 = 0x4B6F
0x7135 = 0x4C16
0x7144 = 0x4D14
0x715D = 0x4E0E
0x7161 = 0x4E37
0x7166 = 0x4E6A
0x7169 = 0x4E8B
0x7175 = 0x504A
0x7177 = 0x5055
0x717A = 0x5122
0x7221 = 0x51A9
0x7223 = 0x51E5
0x7224 = 0x51CD
0x7228 = 0x521E
0x722C = 0x524C
0x723D = 0x542E
0x7248 = 0x54D9
0x725B = 0x55A7
0x7275 = 0x57A9
0x7276 = 0x57B4
0x7332 = 0x59D4
0x733D = 0x5AE4
0x733E = 0x5AE3
0x7340 = 0x5AF1
0x7352 = 0x5BB2
0x735D = 0x5C4B
0x735E = 0x5C64
0x7373 = 0x5E2E
0x7374 = 0x5E56
0x7375 = 0x5E65
0x7377 = 0x5E62
0x737B = 0x5ED8
0x737D = 0x5EC2
0x7422 = 0x5EE8
0x7424 = 0x5F23
0x7427 = 0x5F5C
0x742E = 0x5FE0
0x742F = 0x5FD4
0x7434 = 0x600C
0x7435 = 0x5FFB
0x743D = 0x6017
0x7442 = 0x6060
0x744F = 0x60ED
0x7469 = 0x6270
0x746B = 0x6286
0x7472 = 0x634C
0x7475 = 0x3D0E
0x7479 = 0x6402
0x7535 = 0x667E
0x753A = 0x66B0
0x7546 = 0x671D
0x7556 = 0x68DD
0x7558 = 0x68EA
0x755A = 0x6951
0x755D = 0x696F
0x755F = 0x69DD
0x7563 = 0x6A1E
0x756A = 0x6A58
0x7570 = 0x6A8C
0x7573 = 0x6AB7
0x7644 = 0x6C73
0x764E = 0x6CDD
0x765D = 0x6E65
0x7675 = 0x6F94
0x767E = 0x6FF8
0x7721 = 0x6FF6
0x7722 = 0x6FF7
0x7733 = 0x710D
0x7736 = 0x7139
0x7764 = 0x73DB
0x7765 = 0x73DA
0x776B = 0x73FE
0x776E = 0x7410
0x7773 = 0x7449
0x7829 = 0x7615
0x782A = 0x7614
0x782C = 0x7631
0x7834 = 0x7693
0x783C = 0x770E
0x783E = 0x7723
0x7842 = 0x7752
0x7856 = 0x7985
0x7863 = 0x7A84
0x7877 = 0x7BB3
0x7879 = 0x7BBE
0x787A = 0x7BC7
0x7925 = 0x7CB8
0x792F = 0x7DA0
0x7932 = 0x7E10
0x7939 = 0x7FB7
0x7942 = 0x808A
0x7948 = 0x80BB
0x7959 = 0x8282
0x795E = 0x82F3
0x7966 = 0x840C
0x796B = 0x8455
0x797A = 0x856B
0x797E = 0x85C8
0x7A21 = 0x85C9
0x7A2C = 0x86D7
0x7A2F = 0x86FA
0x7A4F = 0x8949
0x7A50 = 0x8946
0x7A57 = 0x896B
0x7A65 = 0x8987
0x7A66 = 0x8988
0x7A71 = 0x89BA
0x7A72 = 0x89BB
0x7A7E = 0x8A1E
0x7B21 = 0x8A29
0x7B2C = 0x8A71
0x7B2D = 0x8A43
0x7B36 = 0x8A99
0x7B37 = 0x8ACD
0x7B3D = 0x8AE4
0x7B3E = 0x8ADD
0x7B4E = 0x8BC1
0x7B4F = 0x8BEF
0x7B57 = 0x8D10
0x7B5A = 0x8D71
0x7B5C = 0x8DFB
0x7B5D = 0x8E1F
0x7B61 = 0x8E36
0x7B65 = 0x8E89
0x7B67 = 0x8EEB
0x7B69 = 0x8F32
0x7B71 = 0x8FF8
0x7C22 = 0x92A0
0x7C23 = 0x92B1
0x7C38 = 0x9490
0x7C42 = 0x95CF
0x7C4C = 0x967F
0x7C56 = 0x96F0
0x7C59 = 0x9719
0x7C5D = 0x9750
0x7C76 = 0x98C6
0x7D2C = 0x9A72
0x7D4B = 0x9DDB
0x7D4C = 0x9E3D
0x7D59 = 0x9E15
0x7D5B = 0x9E8A
0x7D5D = 0x9E49
0x7D67 = 0x9EC4
0x7D6D = 0x9EE9
0x7D70 = 0x9EDB
0x7E25 = 0x9FCE
0x7E29 = 0xA02F
0x7E2B = 0xA01A
0x7E32 = 0xA0F9
0x7E35 = 0xA082
0x7E53 = 0x2218
0x7E58 = 0xA38C
0x7E5A = 0xA437
0x7E6E = 0xA5F1
0x7E70 = 0xA602
0x7E72 = 0xA61A
0x7E76 = 0xA6B2
END_MAP

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
# $NetBSD: UCS@SIP%JISX0213-1.src,v 1.1 2007/03/05 16:58:34 tnozaki Exp $
TYPE ROWCOL
NAME "UCS:SIP/JISX0213-1"
SRC_ZONE 0x000B - 0xA190
OOB_MODE INVALID
DST_INVALID 0xFFFF
DST_UNIT_BITS 16
BEGIN_MAP
## Shift_JIS-2004 (JIS X 0213:2004) vs Unicode mapping table
##
## Date: 12 Feb 2005 10:15:00 GMT
## License:
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
## Copyright (C) 2001 I'O, All Rights Reserved.
## You can use, modify, distribute this table freely.
## Note:
## [1983] JIS codepoint defined by JIS X 0208-1983
## [1990] JIS codepoint defined by JIS X 0208-1990
## [2000] JIS codepoint defined by JIS X 0213:2000
## [2004] JIS codepoint defined by JIS X 0213:2004
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
## Fullwidth UCS fullwidth form (U+Fxxx)
## Windows Windows (CP932) mapping
## Some 0213 character can't represent by one UCS character.
## In this table, such characters are described as 'U+xxxx+xxxx'.
##
0x000B = 0x2E22
0x0B9F = 0x4F54
0x123D = 0x2F42
0x131B = 0x2F4C
0x146E = 0x2F60
0x16B4 = 0x4F63
0x18BD = 0x2F7B
0x1E34 = 0x4F6E
0x31C4 = 0x753A
0x35C4 = 0x7572
0x373F = 0x7629
0x3763 = 0x7632
0x3CFE = 0x7660
0x47F1 = 0x776C
0x548E = 0x787E
0x550E = 0x7929
0x5771 = 0x7947
0x59C4 = 0x7954
0x5DA1 = 0x796E
0x6AFF = 0x7A5D
0x6E40 = 0x7B33
0x70F4 = 0x7B49
0x7684 = 0x7B6C
0x8277 = 0x7C49
0x83CD = 0x7C51
0xA190 = 0x7E66
END_MAP

View file

@ -0,0 +1,307 @@
# $NetBSD: UCS@SIP%JISX0213-2.src,v 1.1 2007/03/05 16:58:34 tnozaki Exp $
TYPE ROWCOL
NAME "UCS:SIP/JISX0213-2"
SRC_ZONE 0x0089 - 0xA6B2
OOB_MODE INVALID
DST_INVALID 0xFFFF
DST_UNIT_BITS 16
BEGIN_MAP
## Shift_JIS-2004 (JIS X 0213:2004) vs Unicode mapping table
##
## Date: 12 Feb 2005 10:15:00 GMT
## License:
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
## Copyright (C) 2001 I'O, All Rights Reserved.
## You can use, modify, distribute this table freely.
## Note:
## [1983] JIS codepoint defined by JIS X 0208-1983
## [1990] JIS codepoint defined by JIS X 0208-1990
## [2000] JIS codepoint defined by JIS X 0213:2000
## [2004] JIS codepoint defined by JIS X 0213:2004
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
## Fullwidth UCS fullwidth form (U+Fxxx)
## Windows Windows (CP932) mapping
## Some 0213 character can't represent by one UCS character.
## In this table, such characters are described as 'U+xxxx+xxxx'.
##
0x0089 = 0x2121
0x00A2 = 0x212B
0x00A4 = 0x212E
0x01A2 = 0x2136
0x0213 = 0x2146
0x032B = 0x2170
0x0371 = 0x2179
0x0381 = 0x2177
0x03F9 = 0x2322
0x044A = 0x2325
0x0509 = 0x2327
0x05D6 = 0x2331
0x0628 = 0x2332
0x074F = 0x2338
0x0807 = 0x233F
0x083A = 0x2341
0x08B9 = 0x234A
0x097C = 0x2352
0x099D = 0x2353
0x0AD3 = 0x2359
0x0B1D = 0x235C
0x0D45 = 0x2377
0x0DE1 = 0x242A
0x0E64 = 0x243A
0x0E6D = 0x2432
0x0E95 = 0x2431
0x0F5F = 0x243D
0x1201 = 0x2459
0x1255 = 0x245C
0x1274 = 0x2463
0x127B = 0x245E
0x12D7 = 0x246B
0x12E4 = 0x246A
0x12FD = 0x2472
0x1336 = 0x2474
0x1344 = 0x2475
0x13C4 = 0x2525
0x146D = 0x2532
0x15D7 = 0x253E
0x1647 = 0x2547
0x1706 = 0x2555
0x1742 = 0x2556
0x19C3 = 0x257E
0x1C56 = 0x2830
0x1D2D = 0x2837
0x1D45 = 0x2838
0x1D62 = 0x283B
0x1D78 = 0x283A
0x1D92 = 0x2845
0x1D9C = 0x2840
0x1DA1 = 0x283F
0x1DB7 = 0x2848
0x1DE0 = 0x284A
0x1E33 = 0x284B
0x1F1E = 0x285B
0x1F76 = 0x2866
0x1FFA = 0x286C
0x217B = 0x2C22
0x2218 = 0x7E53
0x231E = 0x2C2B
0x23AD = 0x2C30
0x26F3 = 0x2C50
0x285B = 0x2C65
0x28AB = 0x2C6D
0x298F = 0x2C72
0x2AB8 = 0x2D24
0x2B46 = 0x2D32
0x2B4F = 0x2D29
0x2B50 = 0x2D2A
0x2BA6 = 0x2D35
0x2C1D = 0x2D34
0x2C24 = 0x2D39
0x2DE1 = 0x2D56
0x31B6 = 0x2E24
0x31C3 = 0x2D7D
0x31F5 = 0x2E23
0x3372 = 0x2E3A
0x33D0 = 0x2E42
0x33D2 = 0x2E3D
0x33D3 = 0x2E3C
0x33D5 = 0x2E44
0x33DA = 0x2E47
0x33DF = 0x2E49
0x33E4 = 0x2E43
0x344A = 0x2E55
0x344B = 0x2E57
0x3451 = 0x2E56
0x3465 = 0x2E5B
0x34E4 = 0x2E77
0x355A = 0x2E78
0x3594 = 0x2F2A
0x3638 = 0x2F42
0x3639 = 0x2F3F
0x363A = 0x2F43
0x3647 = 0x2F40
0x370C = 0x2F59
0x371C = 0x2F4E
0x3764 = 0x2F61
0x37E7 = 0x2F6A
0x37FF = 0x2F69
0x3824 = 0x2F70
0x383D = 0x2F75
0x3A98 = 0x6E23
0x3C7F = 0x6E34
0x3D00 = 0x6E49
0x3D0E = 0x7475
0x3D40 = 0x6E5C
0x3DD3 = 0x6E60
0x3DF9 = 0x6E5F
0x3DFA = 0x6E5E
0x3F7E = 0x6F32
0x4096 = 0x6F47
0x4103 = 0x6F4D
0x41C6 = 0x6F61
0x41FE = 0x6F64
0x43BC = 0x7022
0x4629 = 0x7033
0x46A5 = 0x7039
0x4896 = 0x7053
0x4A4D = 0x707B
0x4B56 = 0x712E
0x4B6F = 0x7130
0x4C16 = 0x7135
0x4D14 = 0x7144
0x4E0E = 0x715D
0x4E37 = 0x7161
0x4E6A = 0x7166
0x4E8B = 0x7169
0x504A = 0x7175
0x5055 = 0x7177
0x5122 = 0x717A
0x51A9 = 0x7221
0x51CD = 0x7224
0x51E5 = 0x7223
0x521E = 0x7228
0x524C = 0x722C
0x542E = 0x723D
0x54D9 = 0x7248
0x55A7 = 0x725B
0x57A9 = 0x7275
0x57B4 = 0x7276
0x59D4 = 0x7332
0x5AE3 = 0x733E
0x5AE4 = 0x733D
0x5AF1 = 0x7340
0x5BB2 = 0x7352
0x5C4B = 0x735D
0x5C64 = 0x735E
0x5E2E = 0x7373
0x5E56 = 0x7374
0x5E62 = 0x7377
0x5E65 = 0x7375
0x5EC2 = 0x737D
0x5ED8 = 0x737B
0x5EE8 = 0x7422
0x5F23 = 0x7424
0x5F5C = 0x7427
0x5FD4 = 0x742F
0x5FE0 = 0x742E
0x5FFB = 0x7435
0x600C = 0x7434
0x6017 = 0x743D
0x6060 = 0x7442
0x60ED = 0x744F
0x6270 = 0x7469
0x6286 = 0x746B
0x634C = 0x7472
0x6402 = 0x7479
0x667E = 0x7535
0x66B0 = 0x753A
0x671D = 0x7546
0x68DD = 0x7556
0x68EA = 0x7558
0x6951 = 0x755A
0x696F = 0x755D
0x69DD = 0x755F
0x6A1E = 0x7563
0x6A58 = 0x756A
0x6A8C = 0x7570
0x6AB7 = 0x7573
0x6C29 = 0x2544
0x6C73 = 0x7644
0x6CDD = 0x764E
0x6E65 = 0x765D
0x6F94 = 0x7675
0x6FF6 = 0x7721
0x6FF7 = 0x7722
0x6FF8 = 0x767E
0x710D = 0x7733
0x7139 = 0x7736
0x73DA = 0x7765
0x73DB = 0x7764
0x73FE = 0x776B
0x7410 = 0x776E
0x7449 = 0x7773
0x7614 = 0x782A
0x7615 = 0x7829
0x7631 = 0x782C
0x7693 = 0x7834
0x770E = 0x783C
0x7723 = 0x783E
0x7752 = 0x7842
0x7985 = 0x7856
0x7A84 = 0x7863
0x7BB3 = 0x7877
0x7BBE = 0x7879
0x7BC7 = 0x787A
0x7CB8 = 0x7925
0x7DA0 = 0x792F
0x7E10 = 0x7932
0x7FB7 = 0x7939
0x808A = 0x7942
0x80BB = 0x7948
0x8282 = 0x7959
0x82F3 = 0x795E
0x840C = 0x7966
0x8455 = 0x796B
0x856B = 0x797A
0x85C8 = 0x797E
0x85C9 = 0x7A21
0x86D7 = 0x7A2C
0x86FA = 0x7A2F
0x8946 = 0x7A50
0x8949 = 0x7A4F
0x896B = 0x7A57
0x8987 = 0x7A65
0x8988 = 0x7A66
0x89BA = 0x7A71
0x89BB = 0x7A72
0x8A1E = 0x7A7E
0x8A29 = 0x7B21
0x8A43 = 0x7B2D
0x8A71 = 0x7B2C
0x8A99 = 0x7B36
0x8ACD = 0x7B37
0x8ADD = 0x7B3E
0x8AE4 = 0x7B3D
0x8BC1 = 0x7B4E
0x8BEF = 0x7B4F
0x8D10 = 0x7B57
0x8D71 = 0x7B5A
0x8DFB = 0x7B5C
0x8E1F = 0x7B5D
0x8E36 = 0x7B61
0x8E89 = 0x7B65
0x8EEB = 0x7B67
0x8F32 = 0x7B69
0x8FF8 = 0x7B71
0x92A0 = 0x7C22
0x92B1 = 0x7C23
0x9490 = 0x7C38
0x95CF = 0x7C42
0x967F = 0x7C4C
0x96F0 = 0x7C56
0x9719 = 0x7C59
0x9750 = 0x7C5D
0x98C6 = 0x7C76
0x9A72 = 0x7D2C
0x9DDB = 0x7D4B
0x9E15 = 0x7D59
0x9E3D = 0x7D4C
0x9E49 = 0x7D5D
0x9E8A = 0x7D5B
0x9EC4 = 0x7D67
0x9EDB = 0x7D70
0x9EE9 = 0x7D6D
0x9FCE = 0x7E25
0xA01A = 0x7E2B
0xA02F = 0x7E29
0xA082 = 0x7E35
0xA0F9 = 0x7E32
0xA38C = 0x7E58
0xA437 = 0x7E5A
0xA5F1 = 0x7E6E
0xA602 = 0x7E70
0xA61A = 0x7E72
0xA6B2 = 0x7E76
END_MAP

View file

@ -18,6 +18,12 @@
citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS") +
citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS")
transcode_tblgen "EUC-JP-2004", "UTF-8",
[["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS,JISX0213-1/UCS@BMP,JISX0213-1/UCS@SIP") +
citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS") +
citrus_decode_mapsrc("euc", 0x8000, "JISX0213-2/UCS@BMP,JISX0213-2/UCS@SIP")
transcode_tblgen "UTF-8", "EUC-JP",
[["{00-7f}", :nomap]] +
@ -35,6 +41,12 @@
[["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:MS,UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM") +
citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA")
transcode_tblgen "UTF-8", "EUC-JP-2004",
[["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990,UCS@BMP/JISX0213-1,UCS@SIP/JISX0213-1") +
citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA") +
citrus_decode_mapsrc("euc", 0x8000, "UCS@BMP/JISX0213-2,UCS@SIP/JISX0213-2")
%>
<%= transcode_generated_code %>

View file

@ -65,8 +65,11 @@ class TestTranscode < Test::Unit::TestCase
"\x82\xdc\x82\xc2\x82\xe0\x82\xc6 \x82\xe4\x82\xab\x82\xd0\x82\xeb", 'shift_jis') # まつもと ゆきひろ
check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D",
"\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp')
check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D",
"\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp-2004')
check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # 松本行弘
check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp')
check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp-2004')
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-1') # Dürst
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-2')
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-3')
@ -83,6 +86,7 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u0643\u062A\u0628", "\xE3\xCA\xC8", 'iso-8859-6') # كتب
check_both_ways("\u65E5\u8A18", "\x93\xFA\x8BL", 'shift_jis') # 日記
check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp')
check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp-2004')
check_both_ways("\uC560\uC778\uAD6C\uD568\u0020\u6734\uC9C0\uC778",
"\xBE\xD6\xC0\xCE\xB1\xB8\xC7\xD4\x20\xDA\xD3\xC1\xF6\xC0\xCE", 'euc-kr') # 애인구함 朴지인
check_both_ways("\uC544\uD58F\uD58F\u0020\uB620\uBC29\uD6BD\uB2D8\u0020\uC0AC\uB791\uD716",
@ -1154,10 +1158,16 @@ class TestTranscode < Test::Unit::TestCase
assert_equal("\uFFFD!",
"\xff!".encode("utf-8", "euc-jp", :invalid=>:replace))
assert_equal("\uFFFD!",
"\xff!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
assert_equal("\uFFFD!",
"\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace))
assert_equal("\uFFFD!",
"\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
assert_equal("\uFFFD!",
"\x8f\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace))
assert_equal("\uFFFD!",
"\x8f\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
assert_equal("?",
"\xdc\x00".encode("EUC-JP", "UTF-16BE", :invalid=>:replace), "[ruby-dev:35776]")
@ -1174,6 +1184,7 @@ class TestTranscode < Test::Unit::TestCase
def test_invalid_replace_string
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"<x>"))
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp-2004", :invalid=>:replace, :replace=>"<x>"))
end
def test_undef_replace
@ -1288,6 +1299,64 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp') # 神林義博
end
def test_euc_jp_2004
check_both_ways("\u3000", "\xA1\xA1", 'euc-jp-2004') # full-width space
check_both_ways("\u00D7", "\xA1\xDF", 'euc-jp-2004') # ×
check_both_ways("\u00F7", "\xA1\xE0", 'euc-jp-2004') # ÷
check_both_ways("\u25C7", "\xA1\xFE", 'euc-jp-2004') # ◇
check_both_ways("\u25C6", "\xA2\xA1", 'euc-jp-2004') # ◆
check_both_ways("\uFF07", "\xA2\xAF", 'euc-jp-2004') #
check_both_ways("\u309F", "\xA2\xB9", 'euc-jp-2004') # ゟ
check_both_ways("\u2284", "\xA2\xC2", 'euc-jp-2004') # ⊄
check_both_ways("\u2306", "\xA2\xC9", 'euc-jp-2004') # ⌆
check_both_ways("\u2295", "\xA2\xD1", 'euc-jp-2004') # ⊕
check_both_ways("\u3017", "\xA2\xDB", 'euc-jp-2004') # 〗
check_both_ways("\u2262", "\xA2\xEB", 'euc-jp-2004') # ≢
check_both_ways("\u2194", "\xA2\xF1", 'euc-jp-2004') # ↔
check_both_ways("\u266E", "\xA2\xFA", 'euc-jp-2004') # ♮
check_both_ways("\u2669", "\xA2\xFD", 'euc-jp-2004') # ♩
check_both_ways("\u25EF", "\xA2\xFE", 'euc-jp-2004') # ◯
check_both_ways("\u2935", "\xA3\xAF", 'euc-jp-2004') # ⤵
check_both_ways("\u29BF", "\xA3\xBA", 'euc-jp-2004') # ⦿
check_both_ways("\u2022", "\xA3\xC0", 'euc-jp-2004') # •
check_both_ways("\u2213", "\xA3\xDB", 'euc-jp-2004') # ∓
check_both_ways("\u2127", "\xA3\xE0", 'euc-jp-2004') # ℧
check_both_ways("\u30A0", "\xA3\xFB", 'euc-jp-2004') #
check_both_ways("\uFF54", "\xA3\xF4", 'euc-jp-2004') #
assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jp-2004') }
check_both_ways("\u2664", "\xA6\xB9", 'euc-jp-2004') # ♤
check_both_ways("\u2663", "\xA6\xC0", 'euc-jp-2004') # ♣
check_both_ways("\u03C2", "\xA6\xD9", 'euc-jp-2004') # ς
check_both_ways("\u23BE", "\xA7\xC2", 'euc-jp-2004') # ⎾
check_both_ways("\u23CC", "\xA7\xD0", 'euc-jp-2004') # ⏌
check_both_ways("\u30F7", "\xA7\xF2", 'euc-jp-2004') # ヷ
check_both_ways("\u3251", "\xA8\xC1", 'euc-jp-2004') # ㉑
check_both_ways("\u{20B9F}", "\xCF\xD4", 'euc-jp-2004') # 𠮑
check_both_ways("\u541E", "\xCF\xFE", 'euc-jp-2004') # 吞
check_both_ways("\u6A97", "\xDD\xA1", 'euc-jp-2004') # 檗
check_both_ways("\u6BEF", "\xDD\xDF", 'euc-jp-2004') # 毯
check_both_ways("\u9EBE", "\xDD\xE0", 'euc-jp-2004') # 麾
check_both_ways("\u6CBE", "\xDD\xFE", 'euc-jp-2004') # 沾
check_both_ways("\u6CBA", "\xDE\xA1", 'euc-jp-2004') # 沺
check_both_ways("\u6ECC", "\xDE\xFE", 'euc-jp-2004') # 滌
check_both_ways("\u6F3E", "\xDF\xA1", 'euc-jp-2004') # 漾
check_both_ways("\u70DD", "\xDF\xDF", 'euc-jp-2004') # 烝
check_both_ways("\u70D9", "\xDF\xE0", 'euc-jp-2004') # 烙
check_both_ways("\u71FC", "\xDF\xFE", 'euc-jp-2004') # 燼
check_both_ways("\u71F9", "\xE0\xA1", 'euc-jp-2004') # 燹
check_both_ways("\u73F1", "\xE0\xFE", 'euc-jp-2004') # 珱
check_both_ways("\u5653", "\xF4\xA7", 'euc-jp-2004') # 噓
#check_both_ways("\u9ADC", "\xFC\xE3", 'euc-jp') # 髜 (IBM extended)
check_both_ways("\u9DD7", "\xFE\xE5", 'euc-jp-2004') # 鷗
check_both_ways("\u{2000B}", "\xAE\xA2", 'euc-jp-2004') # 𠀋
check_both_ways("\u{2A6B2}", "\x8F\xFE\xF6", 'euc-jp-2004') # 𪚲
check_both_ways("\u677E\u672C\u884C\u5F18", "\xBE\xBE\xCB\xDC\xB9\xD4\xB9\xB0", 'euc-jp-2004') # 松本行弘
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8", 'euc-jp-2004') # 青山学院大学
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp-2004') # 神林義博
end
def test_eucjp_ms
check_both_ways("\u2116", "\xAD\xE2", 'eucJP-ms') # NUMERO SIGN
check_both_ways("\u221A", "\xA2\xE5", 'eucJP-ms') # SQUARE ROOT

View file

@ -704,14 +704,20 @@ def citrus_decode_mapsrc(ces, csid, mapsrcs)
mapsrcs.split(',').each do |mapsrc|
path = [$srcdir]
mode = nil
if mapsrc.rindex('UCS', 0)
if mapsrc.rindex(/UCS(?:@[A-Z]+)?/, 0)
mode = :from_ucs
from = mapsrc[4..-1]
from = mapsrc[$&.size+1..-1]
path << SUBDIR.find{|x| from.rindex(x, 0) }
else
mode = :to_ucs
path << SUBDIR.find{|x| mapsrc.rindex(x, 0) }
end
if /\bUCS@(BMP|SMP|SIP|TIP|SSP)\b/ =~ mapsrc
plane = {"BMP"=>0, "SMP"=>1, "SIP"=>2, "TIP"=>3, "SSP"=>14}[$1]
else
plane = 0
end
plane <<= 16
path << mapsrc.gsub(':', '@')
path = File.join(*path)
path << ".src"
@ -730,14 +736,14 @@ def citrus_decode_mapsrc(ces, csid, mapsrcs)
when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/
# Citrus OOB_MODE
when /(0x\w+)\s*=\s*(0x\w+)/
table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)]
table.push << [plane | $1.hex, citrus_cstomb(ces, csid, $2.hex)]
else
raise "unknown notation '%s'"% l
end
when :to_ucs
case l
when /(0x\w+)\s*=\s*(0x\w+)/
table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex]
table.push << [citrus_cstomb(ces, csid, $1.hex), plane | $2.hex]
else
raise "unknown notation '%s'"% l
end
@ -919,6 +925,10 @@ ValidEncoding = {
'CP51932' => '{00-7f}
{a1-fe}{a1-fe}
8e{a1-fe}',
'EUC-JP-2004' => '{00-7f}
{a1-fe}{a1-fe}
8e{a1-fe}
8f{a1-fe}{a1-fe}',
'Shift_JIS' => '{00-7f}
{81-9f,e0-fc}{40-7e,80-fc}
{a1-df}',