mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
merge revision(s) 44604,44605,44606: [Backport #9415]
test_m17n.rb: split tests for inspect * test/ruby/test_m17n.rb (test_utf_16_32_inspect): split tests for each encodings. * string.c (get_actual_encoding): get actual encoding according to the BOM if exists. * string.c (rb_str_inspect): use according encoding, instead of pseudo encodings, UTF-{16,32}. [ruby-core:59757] [Bug #8940] * string.c (get_encoding): respect BOM on pseudo encodings. [ruby-dev:47895] [Bug #9415] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@45074 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
4423b56753
commit
862b86f2e4
5 changed files with 88 additions and 37 deletions
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
||||||
|
Fri Feb 21 16:47:20 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (get_encoding): respect BOM on pseudo encodings.
|
||||||
|
[ruby-dev:47895] [Bug #9415]
|
||||||
|
|
||||||
|
Fri Feb 21 16:47:20 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (get_actual_encoding): get actual encoding according to
|
||||||
|
the BOM if exists.
|
||||||
|
|
||||||
|
* string.c (rb_str_inspect): use according encoding, instead of
|
||||||
|
pseudo encodings, UTF-{16,32}. [ruby-core:59757] [Bug #8940]
|
||||||
|
|
||||||
Fri Feb 21 13:39:21 2014 Charlie Somerville <charliesome@ruby-lang.org>
|
Fri Feb 21 13:39:21 2014 Charlie Somerville <charliesome@ruby-lang.org>
|
||||||
|
|
||||||
* compile.c (iseq_build_from_ary_body): Use :blockptr instead of :block
|
* compile.c (iseq_build_from_ary_body): Use :blockptr instead of :block
|
||||||
|
|
|
@ -598,6 +598,12 @@ rb_enc_from_index(int index)
|
||||||
return enc_table.list[index].enc;
|
return enc_table.list[index].enc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rb_encoding *
|
||||||
|
rb_enc_get_from_index(int index)
|
||||||
|
{
|
||||||
|
return must_encindex(index);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
rb_enc_registered(const char *name)
|
rb_enc_registered(const char *name)
|
||||||
{
|
{
|
||||||
|
|
69
string.c
69
string.c
|
@ -121,7 +121,45 @@ VALUE rb_cSymbol;
|
||||||
#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
|
#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
|
||||||
#define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
|
#define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
|
||||||
|
|
||||||
#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
|
#define STR_ENC_GET(str) get_encoding(str)
|
||||||
|
|
||||||
|
rb_encoding *rb_enc_get_from_index(int index);
|
||||||
|
|
||||||
|
static rb_encoding *
|
||||||
|
get_actual_encoding(const int encidx, VALUE str)
|
||||||
|
{
|
||||||
|
const unsigned char *q;
|
||||||
|
|
||||||
|
switch (encidx) {
|
||||||
|
case ENCINDEX_UTF_16:
|
||||||
|
if (RSTRING_LEN(str) < 2) break;
|
||||||
|
q = (const unsigned char *)RSTRING_PTR(str);
|
||||||
|
if (q[0] == 0xFE && q[1] == 0xFF) {
|
||||||
|
return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
|
||||||
|
}
|
||||||
|
if (q[0] == 0xFF && q[1] == 0xFE) {
|
||||||
|
return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
|
||||||
|
}
|
||||||
|
return rb_ascii8bit_encoding();
|
||||||
|
case ENCINDEX_UTF_32:
|
||||||
|
if (RSTRING_LEN(str) < 4) break;
|
||||||
|
q = (const unsigned char *)RSTRING_PTR(str);
|
||||||
|
if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
|
||||||
|
return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
|
||||||
|
}
|
||||||
|
if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
|
||||||
|
return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
|
||||||
|
}
|
||||||
|
return rb_ascii8bit_encoding();
|
||||||
|
}
|
||||||
|
return rb_enc_from_index(encidx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static rb_encoding *
|
||||||
|
get_encoding(VALUE str)
|
||||||
|
{
|
||||||
|
return get_actual_encoding(ENCODING_GET(str), str);
|
||||||
|
}
|
||||||
|
|
||||||
static int fstring_cmp(VALUE a, VALUE b);
|
static int fstring_cmp(VALUE a, VALUE b);
|
||||||
|
|
||||||
|
@ -4750,8 +4788,8 @@ rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
|
||||||
VALUE
|
VALUE
|
||||||
rb_str_inspect(VALUE str)
|
rb_str_inspect(VALUE str)
|
||||||
{
|
{
|
||||||
rb_encoding *enc = STR_ENC_GET(str);
|
int encidx = ENCODING_GET(str);
|
||||||
int encidx = rb_enc_to_index(enc);
|
rb_encoding *enc = rb_enc_from_index(encidx), *actenc;
|
||||||
const char *p, *pend, *prev;
|
const char *p, *pend, *prev;
|
||||||
char buf[CHAR_ESC_LEN + 1];
|
char buf[CHAR_ESC_LEN + 1];
|
||||||
VALUE result = rb_str_buf_new(0);
|
VALUE result = rb_str_buf_new(0);
|
||||||
|
@ -4766,27 +4804,10 @@ rb_str_inspect(VALUE str)
|
||||||
|
|
||||||
p = RSTRING_PTR(str); pend = RSTRING_END(str);
|
p = RSTRING_PTR(str); pend = RSTRING_END(str);
|
||||||
prev = p;
|
prev = p;
|
||||||
if (encidx == ENCINDEX_UTF_16 && p + 2 <= pend) {
|
actenc = get_actual_encoding(encidx, str);
|
||||||
const unsigned char *q = (const unsigned char *)p;
|
if (actenc != enc) {
|
||||||
if (q[0] == 0xFE && q[1] == 0xFF)
|
enc = actenc;
|
||||||
enc = rb_enc_from_index(ENCINDEX_UTF_16BE);
|
if (unicode_p) unicode_p = rb_enc_unicode_p(enc);
|
||||||
else if (q[0] == 0xFF && q[1] == 0xFE)
|
|
||||||
enc = rb_enc_from_index(ENCINDEX_UTF_16LE);
|
|
||||||
else {
|
|
||||||
enc = rb_ascii8bit_encoding();
|
|
||||||
unicode_p = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (encidx == ENCINDEX_UTF_32 && p + 4 <= pend) {
|
|
||||||
const unsigned char *q = (const unsigned char *)p;
|
|
||||||
if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
|
|
||||||
enc = rb_enc_from_index(ENCINDEX_UTF_32BE);
|
|
||||||
else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
|
|
||||||
enc = rb_enc_from_index(ENCINDEX_UTF_32LE);
|
|
||||||
else {
|
|
||||||
enc = rb_ascii8bit_encoding();
|
|
||||||
unicode_p = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
while (p < pend) {
|
while (p < pend) {
|
||||||
unsigned int c, cc;
|
unsigned int c, cc;
|
||||||
|
|
|
@ -226,24 +226,35 @@ class TestM17N < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_utf_16_32_inspect
|
STR_WITHOUT_BOM = "\u3042".freeze
|
||||||
str = "\u3042"
|
STR_WITH_BOM = "\uFEFF\u3042".freeze
|
||||||
%w/UTF-16 UTF-32/.each do |enc|
|
bug8940 = '[ruby-core:59757] [Bug #8940]'
|
||||||
%w/BE LE/.each do |endian|
|
bug9415 = '[ruby-dev:47895] [Bug #9415]'
|
||||||
s = str.encode(enc + endian)
|
%w/UTF-16 UTF-32/.each do |enc|
|
||||||
|
%w/BE LE/.each do |endian|
|
||||||
|
bom = "\uFEFF".encode("#{enc}#{endian}").force_encoding(enc)
|
||||||
|
|
||||||
|
define_method("test_utf_16_32_inspect(#{enc}#{endian})") do
|
||||||
|
s = STR_WITHOUT_BOM.encode(enc + endian)
|
||||||
# When a UTF-16/32 string doesn't have a BOM,
|
# When a UTF-16/32 string doesn't have a BOM,
|
||||||
# inspect as a dummy encoding string.
|
# inspect as a dummy encoding string.
|
||||||
assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect,
|
assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect,
|
||||||
s.dup.force_encoding(enc).inspect)
|
s.dup.force_encoding(enc).inspect)
|
||||||
|
assert_normal_exit("#{bom.b.dump}.force_encoding('#{enc}').inspect", bug8940)
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
str = "\uFEFF\u3042"
|
define_method("test_utf_16_32_codepoints(#{enc}#{endian})") do
|
||||||
%w/UTF-16 UTF-32/.each do |enc|
|
assert_equal([0xFEFF], bom.codepoints, bug9415)
|
||||||
%w/BE LE/.each do |endian|
|
end
|
||||||
s = str.encode(enc + endian)
|
|
||||||
# When a UTF-16/32 string doesn't have a BOM,
|
define_method("test_utf_16_32_ord(#{enc}#{endian})") do
|
||||||
# inspect as a dummy encoding string.
|
assert_equal(0xFEFF, bom.ord, bug9415)
|
||||||
|
end
|
||||||
|
|
||||||
|
define_method("test_utf_16_32_inspect(#{enc}#{endian}-BOM)") do
|
||||||
|
s = STR_WITH_BOM.encode(enc + endian)
|
||||||
|
# When a UTF-16/32 string has a BOM,
|
||||||
|
# inspect as a particular encoding string.
|
||||||
assert_equal(s.inspect,
|
assert_equal(s.inspect,
|
||||||
s.dup.force_encoding(enc).inspect)
|
s.dup.force_encoding(enc).inspect)
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#define RUBY_VERSION "2.1.1"
|
#define RUBY_VERSION "2.1.1"
|
||||||
#define RUBY_RELEASE_DATE "2014-02-21"
|
#define RUBY_RELEASE_DATE "2014-02-21"
|
||||||
#define RUBY_PATCHLEVEL 40
|
#define RUBY_PATCHLEVEL 41
|
||||||
|
|
||||||
#define RUBY_RELEASE_YEAR 2014
|
#define RUBY_RELEASE_YEAR 2014
|
||||||
#define RUBY_RELEASE_MONTH 2
|
#define RUBY_RELEASE_MONTH 2
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue