mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	merge revision(s) 57816,57817: [Backport #13292]
fix UTF-32 valid_encoding? * enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely. [ruby-core:79966] [Bug #13292] * enc/utf_32le.c (utf32le_mbc_enc_len): ditto. * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid Unicode codepoints. fix UTF-32 valid_encoding? * test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do not use invalid codepoint. [ruby-core:79966] [Bug #13292] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_3@58183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									ff6e8710ee
								
							
						
					
					
						commit
						909331e26e
					
				
					 6 changed files with 104 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -29,11 +29,23 @@
 | 
			
		|||
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 | 
			
		||||
static int
 | 
			
		||||
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
 | 
			
		||||
		    OnigEncoding enc ARG_UNUSED)
 | 
			
		||||
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
 | 
			
		||||
		    OnigEncoding enc)
 | 
			
		||||
{
 | 
			
		||||
  return 4;
 | 
			
		||||
  if (e < p) {
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 | 
			
		||||
  }
 | 
			
		||||
  else if (e-p < 4) {
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
 | 
			
		||||
    if (!UNICODE_VALID_CODEPOINT_P(c))
 | 
			
		||||
      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,11 +29,23 @@
 | 
			
		|||
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 | 
			
		||||
static int
 | 
			
		||||
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
 | 
			
		||||
		    OnigEncoding enc ARG_UNUSED)
 | 
			
		||||
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
 | 
			
		||||
		    OnigEncoding enc)
 | 
			
		||||
{
 | 
			
		||||
  return 4;
 | 
			
		||||
  if (e < p) {
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 | 
			
		||||
  }
 | 
			
		||||
  else if (e-p < 4) {
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
 | 
			
		||||
    if (!UNICODE_VALID_CODEPOINT_P(c))
 | 
			
		||||
      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 | 
			
		||||
    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										3
									
								
								regenc.h
									
										
									
									
									
								
							
							
						
						
									
										3
									
								
								regenc.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -169,6 +169,9 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O
 | 
			
		|||
#define UTF16_IS_SURROGATE_FIRST(c)    (((c) & 0xfc) == 0xd8)
 | 
			
		||||
#define UTF16_IS_SURROGATE_SECOND(c)   (((c) & 0xfc) == 0xdc)
 | 
			
		||||
#define UTF16_IS_SURROGATE(c)          (((c) & 0xf8) == 0xd8)
 | 
			
		||||
#define UNICODE_VALID_CODEPOINT_P(c) ( \
 | 
			
		||||
	((c) <= 0x10ffff) && \
 | 
			
		||||
	!((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
 | 
			
		||||
  OnigEncISO_8859_1_ToLowerCaseTable[c]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -90,5 +90,73 @@ EOT
 | 
			
		|||
    assert_equal(sl, "a".ord.chr("utf-32le"))
 | 
			
		||||
    assert_equal(sb, "a".ord.chr("utf-32be"))
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_utf32be_valid_encoding
 | 
			
		||||
    all_assertions do |a|
 | 
			
		||||
      [
 | 
			
		||||
        "\x00\x00\x00\x00",
 | 
			
		||||
        "\x00\x00\x00a",
 | 
			
		||||
        "\x00\x00\x30\x40",
 | 
			
		||||
        "\x00\x00\xd7\xff",
 | 
			
		||||
        "\x00\x00\xe0\x00",
 | 
			
		||||
        "\x00\x00\xff\xff",
 | 
			
		||||
        "\x00\x10\xff\xff",
 | 
			
		||||
      ].each {|s|
 | 
			
		||||
        s.force_encoding("utf-32be")
 | 
			
		||||
        a.for(s) {
 | 
			
		||||
          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      [
 | 
			
		||||
        "a",
 | 
			
		||||
        "\x00a",
 | 
			
		||||
        "\x00\x00a",
 | 
			
		||||
        "\x00\x00\xd8\x00",
 | 
			
		||||
        "\x00\x00\xdb\xff",
 | 
			
		||||
        "\x00\x00\xdc\x00",
 | 
			
		||||
        "\x00\x00\xdf\xff",
 | 
			
		||||
        "\x00\x11\x00\x00",
 | 
			
		||||
      ].each {|s|
 | 
			
		||||
        s.force_encoding("utf-32be")
 | 
			
		||||
        a.for(s) {
 | 
			
		||||
          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_utf32le_valid_encoding
 | 
			
		||||
    all_assertions do |a|
 | 
			
		||||
      [
 | 
			
		||||
        "\x00\x00\x00\x00",
 | 
			
		||||
        "a\x00\x00\x00",
 | 
			
		||||
        "\x40\x30\x00\x00",
 | 
			
		||||
        "\xff\xd7\x00\x00",
 | 
			
		||||
        "\x00\xe0\x00\x00",
 | 
			
		||||
        "\xff\xff\x00\x00",
 | 
			
		||||
        "\xff\xff\x10\x00",
 | 
			
		||||
      ].each {|s|
 | 
			
		||||
        s.force_encoding("utf-32le")
 | 
			
		||||
        a.for(s) {
 | 
			
		||||
          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      [
 | 
			
		||||
        "a",
 | 
			
		||||
        "a\x00",
 | 
			
		||||
        "a\x00\x00",
 | 
			
		||||
        "\x00\xd8\x00\x00",
 | 
			
		||||
        "\xff\xdb\x00\x00",
 | 
			
		||||
        "\x00\xdc\x00\x00",
 | 
			
		||||
        "\xff\xdf\x00\x00",
 | 
			
		||||
        "\x00\x00\x11\x00",
 | 
			
		||||
      ].each {|s|
 | 
			
		||||
        s.force_encoding("utf-32le")
 | 
			
		||||
        a.for(s) {
 | 
			
		||||
          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2224,7 +2224,7 @@ EOT
 | 
			
		|||
           w.binmode
 | 
			
		||||
           w.puts(0x010a.chr(Encoding::UTF_32BE))
 | 
			
		||||
           w.puts(0x010a.chr(Encoding::UTF_16BE))
 | 
			
		||||
           w.puts(0x0a010000.chr(Encoding::UTF_32LE))
 | 
			
		||||
           w.puts(0x0a01.chr(Encoding::UTF_32LE))
 | 
			
		||||
           w.puts(0x0a01.chr(Encoding::UTF_16LE))
 | 
			
		||||
           w.close
 | 
			
		||||
         end,
 | 
			
		||||
| 
						 | 
				
			
			@ -2232,7 +2232,7 @@ EOT
 | 
			
		|||
           r.binmode
 | 
			
		||||
           assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
 | 
			
		||||
           assert_equal("\x01\x0a\n", r.read(3), bug)
 | 
			
		||||
           assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
 | 
			
		||||
           assert_equal("\x01\x0a\x00\x00\n", r.read(5), bug)
 | 
			
		||||
           assert_equal("\x01\x0a\n", r.read(3), bug)
 | 
			
		||||
           assert_equal("", r.read, bug)
 | 
			
		||||
           r.close
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
#define RUBY_VERSION "2.3.3"
 | 
			
		||||
#define RUBY_RELEASE_DATE "2017-03-28"
 | 
			
		||||
#define RUBY_PATCHLEVEL 292
 | 
			
		||||
#define RUBY_PATCHLEVEL 293
 | 
			
		||||
 | 
			
		||||
#define RUBY_RELEASE_YEAR 2017
 | 
			
		||||
#define RUBY_RELEASE_MONTH 3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue