mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* numeric.c (rb_enc_uint_char): raise RangeError when added codepoint
is invalid. [Feature #5855] [Bug #5863] [Bug #5864] * string.c (rb_str_concat): ditto. * string.c (rb_str_concat): set encoding as ASCII-8BIT when the string is US-ASCII and the argument is an integer greater than 127. * regenc.c (onigenc_mb2_code_to_mbclen): rearrange error code. * enc/euc_jp.c (code_to_mbclen): ditto. * enc/shift_jis.c (code_to_mbclen): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
5393622dad
commit
800f04c6a5
9 changed files with 79 additions and 20 deletions
16
ChangeLog
16
ChangeLog
|
@ -1,3 +1,19 @@
|
||||||
|
Mon Jan 9 01:12:35 2012 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* numeric.c (rb_enc_uint_char): raise RangeError when added codepoint
|
||||||
|
is invalid. [Feature #5855] [Bug #5863] [Bug #5864]
|
||||||
|
|
||||||
|
* string.c (rb_str_concat): ditto.
|
||||||
|
|
||||||
|
* string.c (rb_str_concat): set encoding as ASCII-8BIT when the string
|
||||||
|
is US-ASCII and the argument is an integer greater than 127.
|
||||||
|
|
||||||
|
* regenc.c (onigenc_mb2_code_to_mbclen): rearrange error code.
|
||||||
|
|
||||||
|
* enc/euc_jp.c (code_to_mbclen): ditto.
|
||||||
|
|
||||||
|
* enc/shift_jis.c (code_to_mbclen): ditto.
|
||||||
|
|
||||||
Sun Jan 8 20:31:45 2012 Narihiro Nakamura <narihiro@netlab.jp>
|
Sun Jan 8 20:31:45 2012 Narihiro Nakamura <narihiro@netlab.jp>
|
||||||
|
|
||||||
* gc.c : consider header bytes which are used by malloc.
|
* gc.c : consider header bytes which are used by malloc.
|
||||||
|
|
|
@ -154,9 +154,10 @@ static int
|
||||||
code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
|
code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
|
||||||
{
|
{
|
||||||
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
|
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
|
||||||
else if (code > 0xffffff) return 0;
|
else if (code > 0xffffff)
|
||||||
else if ((code & 0xff0000) >= 0x800000) return 3;
|
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||||
else if ((code & 0xff00) >= 0x8000) return 2;
|
else if (code & 0x800000) return 3;
|
||||||
|
else if (code & 0x8000) return 2;
|
||||||
else
|
else
|
||||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,13 +135,13 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
|
||||||
if (EncLen_SJIS[(int )code] == 1)
|
if (EncLen_SJIS[(int )code] == 1)
|
||||||
return 1;
|
return 1;
|
||||||
else
|
else
|
||||||
return 0;
|
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||||
}
|
}
|
||||||
else if (code <= 0xffff) {
|
else if (code <= 0xffff) {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static OnigCodePoint
|
static OnigCodePoint
|
||||||
|
|
11
numeric.c
11
numeric.c
|
@ -2281,11 +2281,20 @@ rb_enc_uint_chr(unsigned int code, rb_encoding *enc)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
VALUE str;
|
VALUE str;
|
||||||
if ((n = rb_enc_codelen(code, enc)) <= 0) {
|
switch (n = rb_enc_codelen(code, enc)) {
|
||||||
|
case ONIGERR_INVALID_CODE_POINT_VALUE:
|
||||||
|
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
|
||||||
|
break;
|
||||||
|
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||||
|
case 0:
|
||||||
rb_raise(rb_eRangeError, "%u out of char range", code);
|
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
str = rb_enc_str_new(0, n, enc);
|
str = rb_enc_str_new(0, n, enc);
|
||||||
rb_enc_mbcput(code, RSTRING_PTR(str), enc);
|
rb_enc_mbcput(code, RSTRING_PTR(str), enc);
|
||||||
|
if (rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_END(str), enc) != n) {
|
||||||
|
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
|
||||||
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
5
regenc.c
5
regenc.c
|
@ -732,8 +732,9 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
|
||||||
extern int
|
extern int
|
||||||
onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
|
onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
|
||||||
{
|
{
|
||||||
if ((code & 0xff00) != 0) return 2;
|
if (code <= 0xff) return 1;
|
||||||
else return 1;
|
if (code <= 0xffff) return 2;
|
||||||
|
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int
|
extern int
|
||||||
|
|
43
string.c
43
string.c
|
@ -2074,10 +2074,11 @@ rb_str_append(VALUE str, VALUE str2)
|
||||||
VALUE
|
VALUE
|
||||||
rb_str_concat(VALUE str1, VALUE str2)
|
rb_str_concat(VALUE str1, VALUE str2)
|
||||||
{
|
{
|
||||||
unsigned int lc;
|
unsigned int code;
|
||||||
|
rb_encoding *enc = STR_ENC_GET(str1);
|
||||||
|
|
||||||
if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
|
if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
|
||||||
if (rb_num_to_uint(str2, &lc) == 0) {
|
if (rb_num_to_uint(str2, &code) == 0) {
|
||||||
}
|
}
|
||||||
else if (FIXNUM_P(str2)) {
|
else if (FIXNUM_P(str2)) {
|
||||||
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
|
rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
|
||||||
|
@ -2089,22 +2090,46 @@ rb_str_concat(VALUE str1, VALUE str2)
|
||||||
else {
|
else {
|
||||||
return rb_str_append(str1, str2);
|
return rb_str_append(str1, str2);
|
||||||
}
|
}
|
||||||
{
|
|
||||||
rb_encoding *enc = STR_ENC_GET(str1);
|
if (enc == rb_usascii_encoding()) {
|
||||||
|
/* US-ASCII automatically extended to ASCII-8BIT */
|
||||||
|
char buf[1] = {(char)code};
|
||||||
|
if (code > 0xFF) {
|
||||||
|
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||||
|
}
|
||||||
|
rb_str_cat(str1, buf, 1);
|
||||||
|
if (code > 127) {
|
||||||
|
rb_enc_associate(str1, rb_ascii8bit_encoding());
|
||||||
|
ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
long pos = RSTRING_LEN(str1);
|
long pos = RSTRING_LEN(str1);
|
||||||
int cr = ENC_CODERANGE(str1);
|
int cr = ENC_CODERANGE(str1);
|
||||||
int len;
|
int len;
|
||||||
|
char *buf;
|
||||||
|
|
||||||
if ((len = rb_enc_codelen(lc, enc)) <= 0) {
|
switch (len = rb_enc_codelen(code, enc)) {
|
||||||
rb_raise(rb_eRangeError, "%u invalid char", lc);
|
case ONIGERR_INVALID_CODE_POINT_VALUE:
|
||||||
|
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
|
||||||
|
break;
|
||||||
|
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||||
|
case 0:
|
||||||
|
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buf = ALLOCA_N(char, len + 1);
|
||||||
|
rb_enc_mbcput(code, buf, enc);
|
||||||
|
if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
|
||||||
|
rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
|
||||||
}
|
}
|
||||||
rb_str_resize(str1, pos+len);
|
rb_str_resize(str1, pos+len);
|
||||||
rb_enc_mbcput(lc, RSTRING_PTR(str1)+pos, enc);
|
strncpy(RSTRING_PTR(str1) + pos, buf, len);
|
||||||
if (cr == ENC_CODERANGE_7BIT && lc > 127)
|
if (cr == ENC_CODERANGE_7BIT && code > 127)
|
||||||
cr = ENC_CODERANGE_VALID;
|
cr = ENC_CODERANGE_VALID;
|
||||||
ENC_CODERANGE_SET(str1, cr);
|
ENC_CODERANGE_SET(str1, cr);
|
||||||
return str1;
|
|
||||||
}
|
}
|
||||||
|
return str1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -22,6 +22,6 @@ class TestShiftJIS < Test::Unit::TestCase
|
||||||
s = "あいうえお"
|
s = "あいうえお"
|
||||||
s << 0x82a9
|
s << 0x82a9
|
||||||
assert_equal("あいうえおか", s)
|
assert_equal("あいうえおか", s)
|
||||||
assert_raise(ArgumentError) { s << 0x82 }
|
assert_raise(RangeError) { s << 0x82 }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1161,6 +1161,7 @@ class TestM17N < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_str_concat
|
def test_str_concat
|
||||||
assert_equal(1, "".concat(0xA2).size)
|
assert_equal(1, "".concat(0xA2).size)
|
||||||
|
assert_equal(Encoding::ASCII_8BIT, "".force_encoding("US-ASCII").concat(0xA2).encoding)
|
||||||
assert_equal("A\x84\x31\xA4\x39".force_encoding("GB18030"),
|
assert_equal("A\x84\x31\xA4\x39".force_encoding("GB18030"),
|
||||||
"A".force_encoding("GB18030") << 0x8431A439)
|
"A".force_encoding("GB18030") << 0x8431A439)
|
||||||
end
|
end
|
||||||
|
@ -1220,6 +1221,14 @@ class TestM17N < Test::Unit::TestCase
|
||||||
2206368128.chr(Encoding::UTF_8)
|
2206368128.chr(Encoding::UTF_8)
|
||||||
}
|
}
|
||||||
assert_not_match(/-\d+ out of char range/, e.message)
|
assert_not_match(/-\d+ out of char range/, e.message)
|
||||||
|
|
||||||
|
assert_raise(RangeError){ 0x80.chr("US-ASCII") }
|
||||||
|
assert_raise(RangeError){ 0x80.chr("SHIFT_JIS") }
|
||||||
|
assert_raise(RangeError){ 0xE0.chr("SHIFT_JIS") }
|
||||||
|
assert_raise(RangeError){ 0x100.chr("SHIFT_JIS") }
|
||||||
|
assert_raise(RangeError){ 0xA0.chr("EUC-JP") }
|
||||||
|
assert_raise(RangeError){ 0x100.chr("EUC-JP") }
|
||||||
|
assert_raise(RangeError){ 0xA1A0.chr("EUC-JP") }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_marshal
|
def test_marshal
|
||||||
|
|
|
@ -808,8 +808,6 @@ class TestRegexp < Test::Unit::TestCase
|
||||||
#assert_match(/^(\ufb05)\1\1$/i, "\ufb05\ufb06st") # this must be bug...
|
#assert_match(/^(\ufb05)\1\1$/i, "\ufb05\ufb06st") # this must be bug...
|
||||||
assert_match(/^\ufb05{3}$/i, "\ufb05\ufb06st")
|
assert_match(/^\ufb05{3}$/i, "\ufb05\ufb06st")
|
||||||
assert_match(/^\u03b9\u0308\u0301$/i, "\u0390")
|
assert_match(/^\u03b9\u0308\u0301$/i, "\u0390")
|
||||||
assert_nothing_raised { 0x03ffffff.chr("utf-8").size }
|
|
||||||
assert_nothing_raised { 0x7fffffff.chr("utf-8").size }
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_unicode_age
|
def test_unicode_age
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue