mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
re.c: fix for wide character encodings
* re.c (rb_memsearch): should match only char boundaries in wide character encodings. [ruby-core:70220] [Bug #11413] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51470 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e60d35e302
commit
2c8986d23b
4 changed files with 47 additions and 12 deletions
|
@ -1,3 +1,8 @@
|
|||
Mon Aug 3 10:08:33 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* re.c (rb_memsearch): should match only char boundaries in wide
|
||||
character encodings. [ruby-core:70220] [Bug #11413]
|
||||
|
||||
Sun Aug 2 07:01:17 2015 Eric Wong <e@80x24.org>
|
||||
|
||||
* ext/openssl/lib/openssl/buffering.rb (gets):
|
||||
|
|
44
re.c
44
re.c
|
@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l
|
|||
return -1;
|
||||
}
|
||||
|
||||
static inline long
|
||||
rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
||||
{
|
||||
const unsigned char *x = xs, x0 = *xs, *y = ys;
|
||||
enum {char_size = 2};
|
||||
|
||||
for (n -= m; n > 0; n -= char_size, y += char_size) {
|
||||
if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
|
||||
return y - ys;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline long
|
||||
rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
||||
{
|
||||
const unsigned char *x = xs, x0 = *xs, *y = ys;
|
||||
enum {char_size = 4};
|
||||
|
||||
for (n -= m; n > 0; n -= char_size, y += char_size) {
|
||||
if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
|
||||
return y - ys;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
long
|
||||
rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
|
||||
{
|
||||
|
@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
|
|||
else
|
||||
return -1;
|
||||
}
|
||||
else if (m <= SIZEOF_VALUE) {
|
||||
return rb_memsearch_ss(x0, m, y0, n);
|
||||
else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
|
||||
if (m <= SIZEOF_VALUE) {
|
||||
return rb_memsearch_ss(x0, m, y0, n);
|
||||
}
|
||||
else if (enc == rb_utf8_encoding()){
|
||||
return rb_memsearch_qs_utf8(x0, m, y0, n);
|
||||
}
|
||||
}
|
||||
else if (enc == rb_utf8_encoding()){
|
||||
return rb_memsearch_qs_utf8(x0, m, y0, n);
|
||||
else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
|
||||
return rb_memsearch_wchar(x0, m, y0, n);
|
||||
}
|
||||
else {
|
||||
return rb_memsearch_qs(x0, m, y0, n);
|
||||
else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
|
||||
return rb_memsearch_qchar(x0, m, y0, n);
|
||||
}
|
||||
return rb_memsearch_qs(x0, m, y0, n);
|
||||
}
|
||||
|
||||
#define REG_LITERAL FL_USER5
|
||||
|
|
7
string.c
7
string.c
|
@ -6544,15 +6544,10 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
|
|||
}
|
||||
|
||||
enc = STR_ENC_GET(str);
|
||||
if (NIL_P(spat)) {
|
||||
if (!NIL_P(rb_fs)) {
|
||||
spat = rb_fs;
|
||||
goto fs_set;
|
||||
}
|
||||
if (NIL_P(spat) && NIL_P(spat = rb_fs)) {
|
||||
split_type = awk;
|
||||
}
|
||||
else {
|
||||
fs_set:
|
||||
spat = get_pat_quoted(spat, 0);
|
||||
if (BUILTIN_TYPE(spat) == T_STRING) {
|
||||
rb_encoding *enc2 = STR_ENC_GET(spat);
|
||||
|
|
|
@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase
|
|||
each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected|
|
||||
assert_equal(expected, str.split(sep, -1))
|
||||
end
|
||||
each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected|
|
||||
assert_equal(expected, str.split(sep, -1))
|
||||
end
|
||||
end
|
||||
|
||||
def test_nonascii_method_name
|
||||
|
|
Loading…
Reference in a new issue