1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

UTF LE is fixed at least the first 2 bytes

* io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it
should be a little-endian UTF, 16 or 32.  [Bug #16099]
This commit is contained in:
Nobuyoshi Nakada 2019-08-13 23:23:43 +09:00
parent 79f9c626b6
commit 5b1bf8dd2d
No known key found for this signature in database
GPG key ID: 4BC7D6DF58D8DF60
3 changed files with 5 additions and 8 deletions

7
io.c
View file

@ -6136,12 +6136,9 @@ io_strip_bom(VALUE io)
return ENCINDEX_UTF_32LE;
}
rb_io_ungetbyte(io, b4);
rb_io_ungetbyte(io, b3);
}
else {
rb_io_ungetbyte(io, b3);
return ENCINDEX_UTF_16LE;
}
rb_io_ungetbyte(io, b3);
return ENCINDEX_UTF_16LE;
}
rb_io_ungetbyte(io, b2);
break;

View file

@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase
end
def test_bom_32le
assert_bom(["\xFF\xFE\0", "\0"], __method__)
assert_bom(["\xFF", "\xFE\0\0"], __method__)
end
def test_truncate_wbuf

View file

@ -2084,8 +2084,8 @@ EOT
define_method("test_strip_bom:#{name}") do
path = "#{name}-bom.txt"
with_tmpdir {
text = "\uFEFFa"
stripped = "a"
text = "\uFEFF\u0100a"
stripped = "\u0100a"
content = text.encode(name)
generate_file(path, content)
result = File.read(path, mode: 'rb:BOM|UTF-8')