mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Sun Mar 16 18:07:07 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences were legal) * test/ruby/test_transcode.rb: test for above bug git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15786 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9e202f9398
commit
2e7815dd80
3 changed files with 27 additions and 16 deletions
|
@ -1,3 +1,10 @@
|
||||||
|
Sun Mar 16 18:07:07 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||||
|
|
||||||
|
* enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences
|
||||||
|
were legal)
|
||||||
|
|
||||||
|
* test/ruby/test_transcode.rb: test for above bug
|
||||||
|
|
||||||
Sun Mar 16 17:28:07 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
Sun Mar 16 17:28:07 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
* common.mk (LIBRUBY_SO): add dependency to $(BUILTIN_ENCOBJS).
|
* common.mk (LIBRUBY_SO): add dependency to $(BUILTIN_ENCOBJS).
|
||||||
|
|
|
@ -211,7 +211,7 @@ from_UTF_16BE_00_offsets[256] = {
|
||||||
static const struct byte_lookup* const
|
static const struct byte_lookup* const
|
||||||
from_UTF_16BE_00_infos[1] = {
|
from_UTF_16BE_00_infos[1] = {
|
||||||
/* used by from_UTF_16BE_00 */
|
/* used by from_UTF_16BE_00 */
|
||||||
/* used by to_UTF_32BE_82 */
|
/* used by to_UTF_32BE_C2 */
|
||||||
FUNso,
|
FUNso,
|
||||||
};
|
};
|
||||||
static const BYTE_LOOKUP
|
static const BYTE_LOOKUP
|
||||||
|
@ -324,8 +324,8 @@ rb_from_UTF_16BE = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char
|
static const unsigned char
|
||||||
to_UTF_32BE_82_offsets[64] = {
|
to_UTF_32BE_C2_offsets[64] = {
|
||||||
/* used by to_UTF_32BE_82 */
|
/* used by to_UTF_32BE_C2 */
|
||||||
/* used by to_UTF_32BE_E1 */
|
/* used by to_UTF_32BE_E1 */
|
||||||
/* used by to_UTF_32BE_F1 */
|
/* used by to_UTF_32BE_F1 */
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
@ -334,7 +334,7 @@ to_UTF_32BE_82_offsets[64] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
};
|
};
|
||||||
static const BYTE_LOOKUP
|
static const BYTE_LOOKUP
|
||||||
to_UTF_32BE_82 = {
|
to_UTF_32BE_C2 = {
|
||||||
/* used as to_UTF_32BE */
|
/* used as to_UTF_32BE */
|
||||||
/* used as to_UTF_16BE */
|
/* used as to_UTF_16BE */
|
||||||
/* used as to_UTF_32BE_E0 */
|
/* used as to_UTF_32BE_E0 */
|
||||||
|
@ -363,7 +363,7 @@ to_UTF_32BE_82 = {
|
||||||
/* used as to_UTF_32LE_F1_80 */
|
/* used as to_UTF_32LE_F1_80 */
|
||||||
/* used as to_UTF_16LE_F4_80 */
|
/* used as to_UTF_16LE_F4_80 */
|
||||||
/* used as to_UTF_32LE_F4_80 */
|
/* used as to_UTF_32LE_F4_80 */
|
||||||
to_UTF_32BE_82_offsets,
|
to_UTF_32BE_C2_offsets,
|
||||||
from_UTF_16BE_00_infos
|
from_UTF_16BE_00_infos
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -378,7 +378,7 @@ to_UTF_32BE_E0_offsets[64] = {
|
||||||
};
|
};
|
||||||
static const struct byte_lookup* const
|
static const struct byte_lookup* const
|
||||||
to_UTF_32BE_E0_infos[2] = {
|
to_UTF_32BE_E0_infos[2] = {
|
||||||
INVALID, &to_UTF_32BE_82,
|
INVALID, &to_UTF_32BE_C2,
|
||||||
};
|
};
|
||||||
static const BYTE_LOOKUP
|
static const BYTE_LOOKUP
|
||||||
to_UTF_32BE_E0 = {
|
to_UTF_32BE_E0 = {
|
||||||
|
@ -392,7 +392,7 @@ to_UTF_32BE_E0 = {
|
||||||
|
|
||||||
static const struct byte_lookup* const
|
static const struct byte_lookup* const
|
||||||
to_UTF_32BE_E1_infos[1] = {
|
to_UTF_32BE_E1_infos[1] = {
|
||||||
&to_UTF_32BE_82,
|
&to_UTF_32BE_C2,
|
||||||
};
|
};
|
||||||
static const BYTE_LOOKUP
|
static const BYTE_LOOKUP
|
||||||
to_UTF_32BE_E1 = {
|
to_UTF_32BE_E1 = {
|
||||||
|
@ -412,13 +412,13 @@ to_UTF_32BE_E1 = {
|
||||||
/* used as to_UTF_32LE_F1 */
|
/* used as to_UTF_32LE_F1 */
|
||||||
/* used as to_UTF_16LE_F4 */
|
/* used as to_UTF_16LE_F4 */
|
||||||
/* used as to_UTF_32LE_F4 */
|
/* used as to_UTF_32LE_F4 */
|
||||||
to_UTF_32BE_82_offsets,
|
to_UTF_32BE_C2_offsets,
|
||||||
to_UTF_32BE_E1_infos
|
to_UTF_32BE_E1_infos
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct byte_lookup* const
|
static const struct byte_lookup* const
|
||||||
to_UTF_32BE_ED_infos[2] = {
|
to_UTF_32BE_ED_infos[2] = {
|
||||||
&to_UTF_32BE_82, INVALID,
|
&to_UTF_32BE_C2, INVALID,
|
||||||
};
|
};
|
||||||
static const BYTE_LOOKUP
|
static const BYTE_LOOKUP
|
||||||
to_UTF_32BE_ED = {
|
to_UTF_32BE_ED = {
|
||||||
|
@ -463,7 +463,7 @@ to_UTF_32BE_F1 = {
|
||||||
/* used as to_UTF_16BE */
|
/* used as to_UTF_16BE */
|
||||||
/* used as to_UTF_16LE */
|
/* used as to_UTF_16LE */
|
||||||
/* used as to_UTF_32LE */
|
/* used as to_UTF_32LE */
|
||||||
to_UTF_32BE_82_offsets,
|
to_UTF_32BE_C2_offsets,
|
||||||
to_UTF_32BE_F1_infos
|
to_UTF_32BE_F1_infos
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -491,18 +491,18 @@ to_UTF_32BE_offsets[256] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
|
3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
|
||||||
6, 7, 7, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
6, 7, 7, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
};
|
};
|
||||||
static const struct byte_lookup* const
|
static const struct byte_lookup* const
|
||||||
to_UTF_32BE_infos[9] = {
|
to_UTF_32BE_infos[9] = {
|
||||||
FUNso, INVALID, &to_UTF_32BE_82, &to_UTF_32BE_E0,
|
FUNso, INVALID, &to_UTF_32BE_C2, &to_UTF_32BE_E0,
|
||||||
&to_UTF_32BE_E1, &to_UTF_32BE_ED, &to_UTF_32BE_F0, &to_UTF_32BE_F1,
|
&to_UTF_32BE_E1, &to_UTF_32BE_ED, &to_UTF_32BE_F0, &to_UTF_32BE_F1,
|
||||||
&to_UTF_32BE_F4,
|
&to_UTF_32BE_F4,
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
class TestTranscode < Test::Unit::TestCase
|
class TestTranscode < Test::Unit::TestCase
|
||||||
def setup # trick to create all the necessary encodings
|
def setup_really_needed? # trick to create all the necessary encodings
|
||||||
all_encodings = [ 'ISO-8859-1', 'ISO-8859-2',
|
all_encodings = [ 'ISO-8859-1', 'ISO-8859-2',
|
||||||
'ISO-8859-3', 'ISO-8859-4',
|
'ISO-8859-3', 'ISO-8859-4',
|
||||||
'ISO-8859-5', 'ISO-8859-6',
|
'ISO-8859-5', 'ISO-8859-6',
|
||||||
|
@ -248,5 +248,9 @@ class TestTranscode < Test::Unit::TestCase
|
||||||
"\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
"\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'),
|
assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'),
|
||||||
"\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
"\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
|
assert_equal("\x00\x42".force_encoding('UTF-16BE'),
|
||||||
|
"\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
|
assert_equal(''.force_encoding('UTF-16BE'),
|
||||||
|
"\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue