mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8 characters that have U+00FE or invalid characters.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7398 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
5fb312bd18
commit
09542e3269
3 changed files with 26 additions and 11 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
Sun Nov 28 12:08:15 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||||
|
|
||||||
|
* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8
|
||||||
|
characters that have U+00FE or invalid characters.
|
||||||
|
|
||||||
Sun Nov 28 12:07:04 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
Sun Nov 28 12:07:04 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||||
|
|
||||||
* regexec.c, test/ruby/test_regexp.rb: fixed segmentation falut
|
* regexec.c, test/ruby/test_regexp.rb: fixed segmentation falut
|
||||||
|
|
20
regparse.c
20
regparse.c
|
@ -3631,6 +3631,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
||||||
case CCS_RANGE:
|
case CCS_RANGE:
|
||||||
if (intype == *type) {
|
if (intype == *type) {
|
||||||
if (intype == CCV_SB) {
|
if (intype == CCV_SB) {
|
||||||
|
if (*vs > 0xff || v > 0xff)
|
||||||
|
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||||
|
|
||||||
if (*vs > v) {
|
if (*vs > v) {
|
||||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
|
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
|
||||||
goto ccs_range_end;
|
goto ccs_range_end;
|
||||||
|
@ -3646,14 +3649,8 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
#if 0
|
#if 0
|
||||||
if (intype == CCV_CODE_POINT && *type == CCV_SB &&
|
|
||||||
ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
|
|
||||||
bitset_set_range(cc->bs, (int )*vs, 0x7f);
|
|
||||||
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
|
|
||||||
if (r < 0) return r;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (intype == CCV_CODE_POINT && *type == CCV_SB) {
|
if (intype == CCV_CODE_POINT && *type == CCV_SB) {
|
||||||
|
#endif
|
||||||
if (*vs > v) {
|
if (*vs > v) {
|
||||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
|
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
|
||||||
goto ccs_range_end;
|
goto ccs_range_end;
|
||||||
|
@ -3663,10 +3660,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
||||||
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
||||||
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
|
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
|
||||||
if (r < 0) return r;
|
if (r < 0) return r;
|
||||||
|
#if 0
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
else
|
else
|
||||||
return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
|
return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
ccs_range_end:
|
ccs_range_end:
|
||||||
*state = CCS_COMPLETE;
|
*state = CCS_COMPLETE;
|
||||||
|
@ -3826,7 +3824,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TK_CODE_POINT:
|
case TK_CODE_POINT:
|
||||||
v = tok->u.code;
|
v = (OnigCodePoint )tok->u.code;
|
||||||
in_israw = 1;
|
in_israw = 1;
|
||||||
val_entry:
|
val_entry:
|
||||||
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
|
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
|
||||||
|
@ -3952,7 +3950,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
||||||
case TK_CC_AND: /* && */
|
case TK_CC_AND: /* && */
|
||||||
{
|
{
|
||||||
if (state == CCS_VALUE) {
|
if (state == CCS_VALUE) {
|
||||||
r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
|
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
|
||||||
&val_type, &state, env);
|
&val_type, &state, env);
|
||||||
if (r != 0) goto err;
|
if (r != 0) goto err;
|
||||||
}
|
}
|
||||||
|
@ -3992,7 +3990,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state == CCS_VALUE) {
|
if (state == CCS_VALUE) {
|
||||||
r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
|
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
|
||||||
&val_type, &state, env);
|
&val_type, &state, env);
|
||||||
if (r != 0) goto err;
|
if (r != 0) goto err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,4 +12,16 @@ class TestRegexp < Test::Unit::TestCase
|
||||||
def test_ruby_dev_24887
|
def test_ruby_dev_24887
|
||||||
assert_equal("a".gsub(/a\Z/, ""), "")
|
assert_equal("a".gsub(/a\Z/, ""), "")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_yoshidam_net_20041111_1
|
||||||
|
s = "[\xC2\xA0-\xC3\xBE]"
|
||||||
|
assert_match(Regexp.new(s, nil, "u"), "\xC3\xBE")
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_yoshidam_net_20041111_2
|
||||||
|
assert_raise(RegexpError) do
|
||||||
|
s = "[\xFF-\xFF]"
|
||||||
|
Regexp.new(s, nil, "u")
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue