UTF LE is fixed at least the first 2 bytes

* io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it should be a little-endian UTF, 16 or 32. [Bug #16099]
2022-11-09 12:17:21 -05:00 · 2019-08-13 23:23:43 +09:00 · 2019-08-13 23:23:43 +09:00 · 5b1bf8dd2d
commit 5b1bf8dd2d
parent 79f9c626b6
3 changed files with 5 additions and 8 deletions
--- a/io.c
+++ b/io.c
@ -6136,12 +6136,9 @@ io_strip_bom(VALUE io)
 		    return ENCINDEX_UTF_32LE;
 		}
 		rb_io_ungetbyte(io, b4);
-		rb_io_ungetbyte(io, b3);
-	    }
-	    else {
-		rb_io_ungetbyte(io, b3);
-		return ENCINDEX_UTF_16LE;
 	    }
+            rb_io_ungetbyte(io, b3);
+            return ENCINDEX_UTF_16LE;
 	}
 	rb_io_ungetbyte(io, b2);
 	break;
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase
  end

  def test_bom_32le
-    assert_bom(["\xFF\xFE\0", "\0"], __method__)
+    assert_bom(["\xFF", "\xFE\0\0"], __method__)
  end

  def test_truncate_wbuf
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@ -2084,8 +2084,8 @@ EOT
    define_method("test_strip_bom:#{name}") do
      path = "#{name}-bom.txt"
      with_tmpdir {
-        text = "\uFEFFa"
-        stripped = "a"
+        text = "\uFEFF\u0100a"
+        stripped = "\u0100a"
        content = text.encode(name)
        generate_file(path, content)
        result = File.read(path, mode: 'rb:BOM|UTF-8')