1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Support Encoding::Converter newline: :lf and :lf_newline options

Previously, newline: :lf was accepted but ignored.  Where it
should have been used was commented out code that didn't work,
but unlike all other invalid values, using newline: :lf did
not raise an error.

This adds support for newline: :lf and :lf_newline, for consistency
with newline: :cr and :cr_newline.  This is basically the same as
universal_newline, except that it only affects writing and not
reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK.

Add tests for the File.open :newline option while here.

Fixes [Bug #12436]
This commit is contained in:
Jeremy Evans 2021-06-18 16:05:15 -07:00
parent 1a2f99275b
commit 6f3857f6a7
Notes: git 2022-08-20 12:24:04 +09:00
5 changed files with 104 additions and 8 deletions

View file

@ -17,10 +17,16 @@
map_cr["0a"] = "0d" map_cr["0a"] = "0d"
transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
map_normalize = {}
map_normalize["{00-ff}"] = :func_so
transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
%> %>
<%= transcode_generated_code %> <%= transcode_generated_code %>
#define lf_newline universal_newline
#define STATE (sp[0]) #define STATE (sp[0])
#define NORMAL 0 #define NORMAL 0
#define JUST_AFTER_CR 1 #define JUST_AFTER_CR 1
@ -126,10 +132,24 @@ rb_cr_newline = {
0, 0, 0, 0 0, 0, 0, 0
}; };
static const rb_transcoder
rb_lf_newline = {
"", "lf_newline", lf_newline,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
1, /* max_input */
2, /* max_output */
asciicompat_converter, /* asciicompat_type */
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
0, 0, 0, fun_so_universal_newline,
universal_newline_finish
};
void void
Init_newline(void) Init_newline(void)
{ {
rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_universal_newline);
rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_crlf_newline);
rb_register_transcoder(&rb_cr_newline); rb_register_transcoder(&rb_cr_newline);
rb_register_transcoder(&rb_lf_newline);
} }

View file

@ -476,16 +476,16 @@ enum ruby_econv_flag_type {
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
/** Decorators are there. */ /** Decorators are there. */
RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
/** Newline converters are there. */ /** Newline converters are there. */
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
/** (Unclear; seems unused). */ /** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
/** (Unclear; seems unused). */ /** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
/** Universal newline mode. */ /** Universal newline mode. */
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
@ -496,11 +496,14 @@ enum ruby_econv_flag_type {
/** CRLF to CR conversion shall happen. */ /** CRLF to CR conversion shall happen. */
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
/** CRLF to LF conversion shall happen. */
RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
/** Texts shall be XML-escaped. */ /** Texts shall be XML-escaped. */
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
/** Texts shall be AttrValue escaped */ /** Texts shall be AttrValue escaped */
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
/** (Unclear; seems unused). */ /** (Unclear; seems unused). */
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
@ -529,6 +532,7 @@ enum ruby_econv_flag_type {
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
@ -543,10 +547,10 @@ enum ruby_econv_flag_type {
*/ */
/** Indicates the input is a part of much larger one. */ /** Indicates the input is a part of much larger one. */
RUBY_ECONV_PARTIAL_INPUT = 0x00010000, RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
/** Instructs the converter to stop after output. */ /** Instructs the converter to stop after output. */
RUBY_ECONV_AFTER_OUTPUT = 0x00020000, RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */

View file

@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase
end end
end end
def test_file_open_newline_option
Dir.mktmpdir(__method__.to_s) do |tmpdir|
path = File.join(tmpdir, "foo")
test = lambda do |newline|
File.open(path, "wt", newline: newline) do |f|
f.write "a\n"
f.puts "b"
end
File.binread(path)
end
assert_equal("a\nb\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\r\nb\r\n", test.(:crlf))
assert_equal("a\rb\r", test.(:cr))
test = lambda do |newline|
File.open(path, "rt", newline: newline) do |f|
f.read
end
end
File.binwrite(path, "a\nb\n")
assert_equal("a\nb\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\nb\n", test.(:crlf))
assert_equal("a\nb\n", test.(:cr))
File.binwrite(path, "a\r\nb\r\n")
assert_equal("a\r\nb\r\n", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
# Work on both Windows and non-Windows
assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf))
assert_equal("a\r\nb\r\n", test.(:cr))
File.binwrite(path, "a\rb\r")
assert_equal("a\rb\r", test.(:lf))
assert_equal("a\nb\n", test.(:universal))
assert_equal("a\rb\r", test.(:crlf))
assert_equal("a\rb\r", test.(:cr))
end
end
def test_open_nul def test_open_nul
Dir.mktmpdir(__method__.to_s) do |tmpdir| Dir.mktmpdir(__method__.to_s) do |tmpdir|
path = File.join(tmpdir, "foo") path = File.join(tmpdir, "foo")

View file

@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase
assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf))
assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true))
assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
end end
end end

View file

@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr;
static VALUE sym_universal_newline; static VALUE sym_universal_newline;
static VALUE sym_crlf_newline; static VALUE sym_crlf_newline;
static VALUE sym_cr_newline; static VALUE sym_cr_newline;
static VALUE sym_lf_newline;
#ifdef ENABLE_ECONV_NEWLINE_OPTION #ifdef ENABLE_ECONV_NEWLINE_OPTION
static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
#endif #endif
@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret)
case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR:
case ECONV_CR_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR:
case ECONV_LF_NEWLINE_DECORATOR:
case 0: case 0:
break; break;
default: default:
@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret)
decorators_ret[num_decorators++] = "crlf_newline"; decorators_ret[num_decorators++] = "crlf_newline";
if (ecflags & ECONV_CR_NEWLINE_DECORATOR) if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "cr_newline"; decorators_ret[num_decorators++] = "cr_newline";
if (ecflags & ECONV_LF_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "lf_newline";
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "universal_newline"; decorators_ret[num_decorators++] = "universal_newline";
@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec)
case ECONV_CR_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR:
dname = "cr_newline"; dname = "cr_newline";
break; break;
case ECONV_LF_NEWLINE_DECORATOR:
dname = "lf_newline";
break;
} }
if (dname) { if (dname) {
@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "cr_newline"); rb_str_cat2(mesg, "cr_newline");
} }
if (ecflags & ECONV_LF_NEWLINE_DECORATOR) {
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "lf_newline");
}
if (ecflags & ECONV_XML_TEXT_DECORATOR) { if (ecflags & ECONV_XML_TEXT_DECORATOR) {
rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "xml_text"); rb_str_cat2(mesg, "xml_text");
@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags)
ecflags |= ECONV_CR_NEWLINE_DECORATOR; ecflags |= ECONV_CR_NEWLINE_DECORATOR;
} }
else if (v == sym_lf) { else if (v == sym_lf) {
/* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ ecflags |= ECONV_LF_NEWLINE_DECORATOR;
} }
else if (SYMBOL_P(v)) { else if (SYMBOL_P(v)) {
rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags)
setflags |= ECONV_CR_NEWLINE_DECORATOR; setflags |= ECONV_CR_NEWLINE_DECORATOR;
newlineflag |= !NIL_P(v); newlineflag |= !NIL_P(v);
v = rb_hash_aref(opt, sym_lf_newline);
if (RTEST(v))
setflags |= ECONV_LF_NEWLINE_DECORATOR;
newlineflag |= !NIL_P(v);
switch (newlineflag) { switch (newlineflag) {
case 1: case 1:
ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* :undef => :replace # replace undefined conversion * :undef => :replace # replace undefined conversion
* :replace => string # replacement string ("?" or "\uFFFD" if not specified) * :replace => string # replacement string ("?" or "\uFFFD" if not specified)
* :newline => :universal # decorator for converting CRLF and CR to LF * :newline => :universal # decorator for converting CRLF and CR to LF
* :newline => :lf # decorator for converting CRLF and CR to LF when writing
* :newline => :crlf # decorator for converting LF to CRLF * :newline => :crlf # decorator for converting LF to CRLF
* :newline => :cr # decorator for converting LF to CR * :newline => :cr # decorator for converting LF to CR
* :universal_newline => true # decorator for converting CRLF and CR to LF * :universal_newline => true # decorator for converting CRLF and CR to LF
* :crlf_newline => true # decorator for converting LF to CRLF * :crlf_newline => true # decorator for converting LF to CRLF
* :cr_newline => true # decorator for converting LF to CR * :cr_newline => true # decorator for converting LF to CR
* :lf_newline => true # decorator for converting CRLF and CR to LF when writing
* :xml => :text # escape as XML CharData. * :xml => :text # escape as XML CharData.
* :xml => :attr # escape as XML AttValue * :xml => :attr # escape as XML AttValue
* integer form: * integer form:
@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Encoding::Converter::UNDEF_REPLACE * Encoding::Converter::UNDEF_REPLACE
* Encoding::Converter::UNDEF_HEX_CHARREF * Encoding::Converter::UNDEF_HEX_CHARREF
* Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
* Encoding::Converter::LF_NEWLINE_DECORATOR
* Encoding::Converter::CRLF_NEWLINE_DECORATOR * Encoding::Converter::CRLF_NEWLINE_DECORATOR
* Encoding::Converter::CR_NEWLINE_DECORATOR * Encoding::Converter::CR_NEWLINE_DECORATOR
* Encoding::Converter::XML_TEXT_DECORATOR * Encoding::Converter::XML_TEXT_DECORATOR
@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Convert LF to CRLF. * Convert LF to CRLF.
* [:cr_newline => true] * [:cr_newline => true]
* Convert LF to CR. * Convert LF to CR.
* [:lf_newline => true]
* Convert CRLF and CR to LF (when writing).
* [:xml => :text] * [:xml => :text]
* Escape as XML CharData. * Escape as XML CharData.
* This form can be used as an HTML 4.0 #PCDATA. * This form can be used as an HTML 4.0 #PCDATA.
@ -4437,6 +4458,7 @@ Init_transcode(void)
sym_universal_newline = ID2SYM(rb_intern_const("universal_newline")); sym_universal_newline = ID2SYM(rb_intern_const("universal_newline"));
sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline")); sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline"));
sym_cr_newline = ID2SYM(rb_intern_const("cr_newline")); sym_cr_newline = ID2SYM(rb_intern_const("cr_newline"));
sym_lf_newline = ID2SYM(rb_intern("lf_newline"));
sym_partial_input = ID2SYM(rb_intern_const("partial_input")); sym_partial_input = ID2SYM(rb_intern_const("partial_input"));
#ifdef ENABLE_ECONV_NEWLINE_OPTION #ifdef ENABLE_ECONV_NEWLINE_OPTION
@ -4533,6 +4555,12 @@ InitVM_transcode(void)
*/ */
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
/* Document-const: LF_NEWLINE_DECORATOR
*
* Decorator for converting CRLF and CR to LF when writing
*/
rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR));
/* Document-const: CRLF_NEWLINE_DECORATOR /* Document-const: CRLF_NEWLINE_DECORATOR
* *
* Decorator for converting LF to CRLF * Decorator for converting LF to CRLF