mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
IO#set_encoding_by_bom
* io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set the encoding by BOM if exists. [Bug #15210]
This commit is contained in:
parent
bdc8b3789a
commit
e717d6faa8
3 changed files with 56 additions and 3 deletions
7
NEWS
7
NEWS
|
@ -86,6 +86,13 @@ GC::
|
|||
Details on the algorithm and caveats can be found here:
|
||||
https://bugs.ruby-lang.org/issues/15626
|
||||
|
||||
IO::
|
||||
|
||||
New method::
|
||||
|
||||
* Added IO#set_encoding_by_bom to check the BOM and set the external
|
||||
encoding. [Bug #15210]
|
||||
|
||||
Integer::
|
||||
|
||||
Modified method::
|
||||
|
|
44
io.c
44
io.c
|
@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static rb_encoding *
|
||||
io_set_encoding_by_bom(VALUE io)
|
||||
{
|
||||
int idx = io_strip_bom(io);
|
||||
rb_io_t *fptr;
|
||||
rb_encoding *extenc = NULL;
|
||||
|
||||
GetOpenFile(io, fptr);
|
||||
if (idx) {
|
||||
io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
|
||||
rb_io_internal_encoding(io), Qnil);
|
||||
extenc = rb_enc_from_index(idx);
|
||||
io_encoding_set(fptr, rb_enc_from_encoding(extenc),
|
||||
rb_io_internal_encoding(io), Qnil);
|
||||
}
|
||||
else {
|
||||
fptr->encs.enc2 = NULL;
|
||||
}
|
||||
return extenc;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
|
@ -8306,6 +8309,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
|
|||
return io;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ios.set_encoding_by_bom -> encoding or nil
|
||||
*
|
||||
* Checks if +ios+ starts with a BOM, and then consumes it and sets
|
||||
* the external encoding. Returns the result encoding if found, or
|
||||
* nil. If +ios+ is not binmode or its encoding has been set
|
||||
* already, an exception will be raised.
|
||||
*
|
||||
* File.write("bom.txt", "\u{FEFF}abc")
|
||||
* ios = File.open("bom.txt", "rb")
|
||||
* ios.set_encoding_by_bom #=> #<Encoding:UTF-8>
|
||||
*
|
||||
* File.write("nobom.txt", "abc")
|
||||
* ios = File.open("nobom.txt", "rb")
|
||||
* ios.set_encoding_by_bom #=> nil
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_io_set_encoding_by_bom(VALUE io)
|
||||
{
|
||||
rb_io_t *fptr;
|
||||
|
||||
GetOpenFile(io, fptr);
|
||||
if (!(fptr->mode & FMODE_BINMODE)) {
|
||||
rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
|
||||
}
|
||||
if (fptr->encs.enc2) {
|
||||
rb_raise(rb_eArgError, "encoding conversion is set");
|
||||
}
|
||||
if (!io_set_encoding_by_bom(io)) return Qnil;
|
||||
return rb_enc_from_encoding(fptr->encs.enc);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* File.new(filename, mode="r" [, opt]) -> file
|
||||
|
@ -13319,6 +13356,7 @@ Init_IO(void)
|
|||
rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0);
|
||||
rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0);
|
||||
rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1);
|
||||
rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0);
|
||||
|
||||
rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0);
|
||||
rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1);
|
||||
|
|
|
@ -2097,6 +2097,10 @@ EOT
|
|||
assert_equal(Encoding::UTF_8, result.encoding, message)
|
||||
assert_equal(stripped, result, message)
|
||||
end
|
||||
|
||||
File.open(path, "rb") {|f|
|
||||
assert_equal(Encoding.find(name), f.set_encoding_by_bom)
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
@ -2139,6 +2143,10 @@ EOT
|
|||
assert_equal(stripped, result, bug8323)
|
||||
result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
|
||||
assert_equal(stripped, result, bug8323)
|
||||
|
||||
File.open(path, "rb") {|f|
|
||||
assert_nil(f.set_encoding_by_bom)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue