mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8 characters that have U+00FE or invalid characters.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7398 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									5fb312bd18
								
							
						
					
					
						commit
						09542e3269
					
				
					 3 changed files with 26 additions and 11 deletions
				
			
		| 
						 | 
				
			
			@ -1,3 +1,8 @@
 | 
			
		|||
Sun Nov 28 12:08:15 2004  Kazuo Saito  <ksaito@uranus.dti.ne.jp>
 | 
			
		||||
 | 
			
		||||
	* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8
 | 
			
		||||
	characters that have U+00FE or invalid characters.
 | 
			
		||||
 | 
			
		||||
Sun Nov 28 12:07:04 2004  Kazuo Saito  <ksaito@uranus.dti.ne.jp>
 | 
			
		||||
 | 
			
		||||
	* regexec.c, test/ruby/test_regexp.rb: fixed segmentation falut
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										20
									
								
								regparse.c
									
										
									
									
									
								
							
							
						
						
									
										20
									
								
								regparse.c
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -3631,6 +3631,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
 | 
			
		|||
  case CCS_RANGE:
 | 
			
		||||
    if (intype == *type) {
 | 
			
		||||
      if (intype == CCV_SB) {
 | 
			
		||||
        if (*vs > 0xff || v > 0xff)
 | 
			
		||||
          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
 | 
			
		||||
	if (*vs > v) {
 | 
			
		||||
	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
 | 
			
		||||
	    goto ccs_range_end;
 | 
			
		||||
| 
						 | 
				
			
			@ -3646,14 +3649,8 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
 | 
			
		|||
    }
 | 
			
		||||
    else {
 | 
			
		||||
#if 0
 | 
			
		||||
      if (intype == CCV_CODE_POINT && *type == CCV_SB &&
 | 
			
		||||
	  ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
 | 
			
		||||
	bitset_set_range(cc->bs, (int )*vs, 0x7f);
 | 
			
		||||
	r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
 | 
			
		||||
	if (r < 0) return r;
 | 
			
		||||
      }
 | 
			
		||||
#else
 | 
			
		||||
      if (intype == CCV_CODE_POINT && *type == CCV_SB) {
 | 
			
		||||
#endif
 | 
			
		||||
	if (*vs > v) {
 | 
			
		||||
	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
 | 
			
		||||
	    goto ccs_range_end;
 | 
			
		||||
| 
						 | 
				
			
			@ -3663,10 +3660,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
 | 
			
		|||
	bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
 | 
			
		||||
	r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
 | 
			
		||||
	if (r < 0) return r;
 | 
			
		||||
#if 0
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
      else
 | 
			
		||||
	return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
  ccs_range_end:
 | 
			
		||||
    *state = CCS_COMPLETE;
 | 
			
		||||
| 
						 | 
				
			
			@ -3826,7 +3824,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 | 
			
		|||
      break;
 | 
			
		||||
 | 
			
		||||
    case TK_CODE_POINT:
 | 
			
		||||
      v = tok->u.code;
 | 
			
		||||
      v = (OnigCodePoint )tok->u.code;
 | 
			
		||||
      in_israw = 1;
 | 
			
		||||
    val_entry:
 | 
			
		||||
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
 | 
			
		||||
| 
						 | 
				
			
			@ -3952,7 +3950,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 | 
			
		|||
    case TK_CC_AND: /* && */
 | 
			
		||||
      {
 | 
			
		||||
	if (state == CCS_VALUE) {
 | 
			
		||||
	  r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
 | 
			
		||||
	  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
 | 
			
		||||
			     &val_type, &state, env);
 | 
			
		||||
	  if (r != 0) goto err;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -3992,7 +3990,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 | 
			
		|||
  }
 | 
			
		||||
 | 
			
		||||
  if (state == CCS_VALUE) {
 | 
			
		||||
    r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
 | 
			
		||||
    r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
 | 
			
		||||
		       &val_type, &state, env);
 | 
			
		||||
    if (r != 0) goto err;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,4 +12,16 @@ class TestRegexp < Test::Unit::TestCase
 | 
			
		|||
  def test_ruby_dev_24887
 | 
			
		||||
    assert_equal("a".gsub(/a\Z/, ""), "")
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_yoshidam_net_20041111_1
 | 
			
		||||
    s = "[\xC2\xA0-\xC3\xBE]"
 | 
			
		||||
    assert_match(Regexp.new(s, nil, "u"), "\xC3\xBE")
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_yoshidam_net_20041111_2
 | 
			
		||||
    assert_raise(RegexpError) do
 | 
			
		||||
      s = "[\xFF-\xFF]"
 | 
			
		||||
      Regexp.new(s, nil, "u")
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue