mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
fix UTF-32 valid_encoding?
* enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely. [ruby-core:79966] [Bug #13292] * enc/utf_32le.c (utf32le_mbc_enc_len): ditto. * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid Unicode codepoints. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57816 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e65c9bd177
commit
4171ed6c21
4 changed files with 101 additions and 6 deletions
|
@ -30,11 +30,23 @@
|
|||
#include "regenc.h"
|
||||
#include "iso_8859.h"
|
||||
|
||||
static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
|
||||
static int
|
||||
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
|
||||
OnigEncoding enc)
|
||||
{
|
||||
return 4;
|
||||
if (e < p) {
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
}
|
||||
else if (e-p < 4) {
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
|
||||
}
|
||||
else {
|
||||
OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
|
||||
if (!UNICODE_VALID_CODEPOINT_P(c))
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -30,11 +30,23 @@
|
|||
#include "regenc.h"
|
||||
#include "iso_8859.h"
|
||||
|
||||
static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
|
||||
static int
|
||||
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
|
||||
OnigEncoding enc)
|
||||
{
|
||||
return 4;
|
||||
if (e < p) {
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
}
|
||||
else if (e-p < 4) {
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
|
||||
}
|
||||
else {
|
||||
OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
|
||||
if (!UNICODE_VALID_CODEPOINT_P(c))
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
3
regenc.h
3
regenc.h
|
@ -186,6 +186,9 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigA
|
|||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||
#define UNICODE_VALID_CODEPOINT_P(c) ( \
|
||||
((c) <= 0x10ffff) && \
|
||||
!((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
|
||||
|
||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToLowerCaseTable[c]
|
||||
|
|
|
@ -90,5 +90,73 @@ EOT
|
|||
assert_equal(sl, "a".ord.chr("utf-32le"))
|
||||
assert_equal(sb, "a".ord.chr("utf-32be"))
|
||||
end
|
||||
|
||||
def test_utf32be_valid_encoding
|
||||
all_assertions do |a|
|
||||
[
|
||||
"\x00\x00\x00\x00",
|
||||
"\x00\x00\x00a",
|
||||
"\x00\x00\x30\x40",
|
||||
"\x00\x00\xd7\xff",
|
||||
"\x00\x00\xe0\x00",
|
||||
"\x00\x00\xff\xff",
|
||||
"\x00\x10\xff\xff",
|
||||
].each {|s|
|
||||
s.force_encoding("utf-32be")
|
||||
a.for(s) {
|
||||
assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||
}
|
||||
}
|
||||
[
|
||||
"a",
|
||||
"\x00a",
|
||||
"\x00\x00a",
|
||||
"\x00\x00\xd8\x00",
|
||||
"\x00\x00\xdb\xff",
|
||||
"\x00\x00\xdc\x00",
|
||||
"\x00\x00\xdf\xff",
|
||||
"\x00\x11\x00\x00",
|
||||
].each {|s|
|
||||
s.force_encoding("utf-32be")
|
||||
a.for(s) {
|
||||
assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def test_utf32le_valid_encoding
|
||||
all_assertions do |a|
|
||||
[
|
||||
"\x00\x00\x00\x00",
|
||||
"a\x00\x00\x00",
|
||||
"\x40\x30\x00\x00",
|
||||
"\xff\xd7\x00\x00",
|
||||
"\x00\xe0\x00\x00",
|
||||
"\xff\xff\x00\x00",
|
||||
"\xff\xff\x10\x00",
|
||||
].each {|s|
|
||||
s.force_encoding("utf-32le")
|
||||
a.for(s) {
|
||||
assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||
}
|
||||
}
|
||||
[
|
||||
"a",
|
||||
"a\x00",
|
||||
"a\x00\x00",
|
||||
"\x00\xd8\x00\x00",
|
||||
"\xff\xdb\x00\x00",
|
||||
"\x00\xdc\x00\x00",
|
||||
"\xff\xdf\x00\x00",
|
||||
"\x00\x00\x11\x00",
|
||||
].each {|s|
|
||||
s.force_encoding("utf-32le")
|
||||
a.for(s) {
|
||||
assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue