From 2e7815dd8000dc4d3ef7f2443bf5fd045812ee9e Mon Sep 17 00:00:00 2001 From: duerst Date: Sun, 16 Mar 2008 09:09:53 +0000 Subject: [PATCH] Sun Mar 16 18:07:07 2008 Martin Duerst * enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences were legal) * test/ruby/test_transcode.rb: test for above bug git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15786 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ enc/trans/utf_16_32.c | 30 +++++++++++++++--------------- test/ruby/test_transcode.rb | 6 +++++- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index c70278cfc7..b415022be8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sun Mar 16 18:07:07 2008 Martin Duerst + + * enc/trans/utf_16_32.c: bug fix (some invalid UTF-8 sequences + were legal) + + * test/ruby/test_transcode.rb: test for above bug + Sun Mar 16 17:28:07 2008 NARUSE, Yui * common.mk (LIBRUBY_SO): add dependency to $(BUILTIN_ENCOBJS). diff --git a/enc/trans/utf_16_32.c b/enc/trans/utf_16_32.c index b31adbb7ba..045dfcdc0a 100644 --- a/enc/trans/utf_16_32.c +++ b/enc/trans/utf_16_32.c @@ -211,7 +211,7 @@ from_UTF_16BE_00_offsets[256] = { static const struct byte_lookup* const from_UTF_16BE_00_infos[1] = { /* used by from_UTF_16BE_00 */ - /* used by to_UTF_32BE_82 */ + /* used by to_UTF_32BE_C2 */ FUNso, }; static const BYTE_LOOKUP @@ -324,8 +324,8 @@ rb_from_UTF_16BE = { }; static const unsigned char -to_UTF_32BE_82_offsets[64] = { - /* used by to_UTF_32BE_82 */ +to_UTF_32BE_C2_offsets[64] = { + /* used by to_UTF_32BE_C2 */ /* used by to_UTF_32BE_E1 */ /* used by to_UTF_32BE_F1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -334,7 +334,7 @@ to_UTF_32BE_82_offsets[64] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static const BYTE_LOOKUP -to_UTF_32BE_82 = { +to_UTF_32BE_C2 = { /* used as to_UTF_32BE */ /* used as to_UTF_16BE */ /* used as to_UTF_32BE_E0 */ @@ -363,7 +363,7 @@ to_UTF_32BE_82 = { /* used as to_UTF_32LE_F1_80 */ /* used as to_UTF_16LE_F4_80 */ /* used as to_UTF_32LE_F4_80 */ - to_UTF_32BE_82_offsets, + to_UTF_32BE_C2_offsets, from_UTF_16BE_00_infos }; @@ -378,7 +378,7 @@ to_UTF_32BE_E0_offsets[64] = { }; static const struct byte_lookup* const to_UTF_32BE_E0_infos[2] = { - INVALID, &to_UTF_32BE_82, + INVALID, &to_UTF_32BE_C2, }; static const BYTE_LOOKUP to_UTF_32BE_E0 = { @@ -392,7 +392,7 @@ to_UTF_32BE_E0 = { static const struct byte_lookup* const to_UTF_32BE_E1_infos[1] = { - &to_UTF_32BE_82, + &to_UTF_32BE_C2, }; static const BYTE_LOOKUP to_UTF_32BE_E1 = { @@ -412,13 +412,13 @@ to_UTF_32BE_E1 = { /* used as to_UTF_32LE_F1 */ /* used as to_UTF_16LE_F4 */ /* used as to_UTF_32LE_F4 */ - to_UTF_32BE_82_offsets, + to_UTF_32BE_C2_offsets, to_UTF_32BE_E1_infos }; static const struct byte_lookup* const to_UTF_32BE_ED_infos[2] = { - &to_UTF_32BE_82, INVALID, + &to_UTF_32BE_C2, INVALID, }; static const BYTE_LOOKUP to_UTF_32BE_ED = { @@ -463,7 +463,7 @@ to_UTF_32BE_F1 = { /* used as to_UTF_16BE */ /* used as to_UTF_16LE */ /* used as to_UTF_32LE */ - to_UTF_32BE_82_offsets, + to_UTF_32BE_C2_offsets, to_UTF_32BE_F1_infos }; @@ -491,18 +491,18 @@ to_UTF_32BE_offsets[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 6, 7, 7, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; static const struct byte_lookup* const to_UTF_32BE_infos[9] = { - FUNso, INVALID, &to_UTF_32BE_82, &to_UTF_32BE_E0, + FUNso, INVALID, &to_UTF_32BE_C2, &to_UTF_32BE_E0, &to_UTF_32BE_E1, &to_UTF_32BE_ED, &to_UTF_32BE_F0, &to_UTF_32BE_F1, &to_UTF_32BE_F4, }; diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 5a704fd364..e4bad8e38a 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -3,7 +3,7 @@ require 'test/unit' class TestTranscode < Test::Unit::TestCase - def setup # trick to create all the necessary encodings + def setup_really_needed? # trick to create all the necessary encodings all_encodings = [ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', @@ -248,5 +248,9 @@ class TestTranscode < Test::Unit::TestCase "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'), "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + assert_equal("\x00\x42".force_encoding('UTF-16BE'), + "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + assert_equal(''.force_encoding('UTF-16BE'), + "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) end end