From 6f3857f6a7b3cd6bd7e62e4efdbb1b841544e053 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 18 Jun 2021 16:05:15 -0700 Subject: [PATCH] Support Encoding::Converter newline: :lf and :lf_newline options Previously, newline: :lf was accepted but ignored. Where it should have been used was commented out code that didn't work, but unlike all other invalid values, using newline: :lf did not raise an error. This adds support for newline: :lf and :lf_newline, for consistency with newline: :cr and :cr_newline. This is basically the same as universal_newline, except that it only affects writing and not reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. Add tests for the File.open :newline option while here. Fixes [Bug #12436] --- enc/trans/newline.trans | 20 +++++++++++ include/ruby/internal/encoding/transcode.h | 18 ++++++---- test/ruby/test_file.rb | 42 ++++++++++++++++++++++ test/ruby/test_transcode.rb | 2 ++ transcode.c | 30 +++++++++++++++- 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 9e763407f9..95e082f5bd 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -17,10 +17,16 @@ map_cr["0a"] = "0d" transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") + + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline") %> <%= transcode_generated_code %> +#define lf_newline universal_newline #define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 @@ -126,10 +132,24 @@ rb_cr_newline = { 0, 0, 0, 0 }; +static const rb_transcoder +rb_lf_newline = { + "", "lf_newline", lf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + 0, 0, 0, fun_so_universal_newline, + universal_newline_finish +}; + void Init_newline(void) { rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_cr_newline); + rb_register_transcoder(&rb_lf_newline); } diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h index 60c96a41c9..7f26d2eae9 100644 --- a/include/ruby/internal/encoding/transcode.h +++ b/include/ruby/internal/encoding/transcode.h @@ -476,16 +476,16 @@ enum ruby_econv_flag_type { RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, /** (Unclear; seems unused). */ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, /** Universal newline mode. */ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, @@ -496,11 +496,14 @@ enum ruby_econv_flag_type { /** CRLF to CR conversion shall happen. */ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, /** (Unclear; seems unused). */ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, @@ -529,6 +532,7 @@ enum ruby_econv_flag_type { #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ @@ -543,10 +547,10 @@ enum ruby_econv_flag_type { */ /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 905416911a..669b004b83 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase end end + def test_file_open_newline_option + Dir.mktmpdir(__method__.to_s) do |tmpdir| + path = File.join(tmpdir, "foo") + test = lambda do |newline| + File.open(path, "wt", newline: newline) do |f| + f.write "a\n" + f.puts "b" + end + File.binread(path) + end + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\r\nb\r\n", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + + test = lambda do |newline| + File.open(path, "rt", newline: newline) do |f| + f.read + end + end + + File.binwrite(path, "a\nb\n") + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\nb\n", test.(:crlf)) + assert_equal("a\nb\n", test.(:cr)) + + File.binwrite(path, "a\r\nb\r\n") + assert_equal("a\r\nb\r\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + # Work on both Windows and non-Windows + assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf)) + assert_equal("a\r\nb\r\n", test.(:cr)) + + File.binwrite(path, "a\rb\r") + assert_equal("a\rb\r", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\rb\r", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + end + end + def test_open_nul Dir.mktmpdir(__method__.to_s) do |tmpdir| path = File.join(tmpdir, "foo") diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index c8b0034e06..73737be0ad 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) + assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true)) + assert_equal("A\nB\nC", s.encode(usascii, newline: :lf)) end end diff --git a/transcode.c b/transcode.c index 5fafad398f..535e436b03 100644 --- a/transcode.c +++ b/transcode.c @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; static VALUE sym_universal_newline; static VALUE sym_crlf_newline; static VALUE sym_cr_newline; +static VALUE sym_lf_newline; #ifdef ENABLE_ECONV_NEWLINE_OPTION static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; #endif @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR: + case ECONV_LF_NEWLINE_DECORATOR: case 0: break; default: @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) decorators_ret[num_decorators++] = "crlf_newline"; if (ecflags & ECONV_CR_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "lf_newline"; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "universal_newline"; @@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec) case ECONV_CR_NEWLINE_DECORATOR: dname = "cr_newline"; break; + case ECONV_LF_NEWLINE_DECORATOR: + dname = "lf_newline"; + break; } if (dname) { @@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "cr_newline"); } + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "lf_newline"); + } if (ecflags & ECONV_XML_TEXT_DECORATOR) { rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "xml_text"); @@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags) ecflags |= ECONV_CR_NEWLINE_DECORATOR; } else if (v == sym_lf) { - /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ + ecflags |= ECONV_LF_NEWLINE_DECORATOR; } else if (SYMBOL_P(v)) { rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, @@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags) setflags |= ECONV_CR_NEWLINE_DECORATOR; newlineflag |= !NIL_P(v); + v = rb_hash_aref(opt, sym_lf_newline); + if (RTEST(v)) + setflags |= ECONV_LF_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); + switch (newlineflag) { case 1: ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; @@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * :undef => :replace # replace undefined conversion * :replace => string # replacement string ("?" or "\uFFFD" if not specified) * :newline => :universal # decorator for converting CRLF and CR to LF + * :newline => :lf # decorator for converting CRLF and CR to LF when writing * :newline => :crlf # decorator for converting LF to CRLF * :newline => :cr # decorator for converting LF to CR * :universal_newline => true # decorator for converting CRLF and CR to LF * :crlf_newline => true # decorator for converting LF to CRLF * :cr_newline => true # decorator for converting LF to CR + * :lf_newline => true # decorator for converting CRLF and CR to LF when writing * :xml => :text # escape as XML CharData. * :xml => :attr # escape as XML AttValue * integer form: @@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Encoding::Converter::UNDEF_REPLACE * Encoding::Converter::UNDEF_HEX_CHARREF * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::LF_NEWLINE_DECORATOR * Encoding::Converter::CRLF_NEWLINE_DECORATOR * Encoding::Converter::CR_NEWLINE_DECORATOR * Encoding::Converter::XML_TEXT_DECORATOR @@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Convert LF to CRLF. * [:cr_newline => true] * Convert LF to CR. + * [:lf_newline => true] + * Convert CRLF and CR to LF (when writing). * [:xml => :text] * Escape as XML CharData. * This form can be used as an HTML 4.0 #PCDATA. @@ -4437,6 +4458,7 @@ Init_transcode(void) sym_universal_newline = ID2SYM(rb_intern_const("universal_newline")); sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline")); sym_cr_newline = ID2SYM(rb_intern_const("cr_newline")); + sym_lf_newline = ID2SYM(rb_intern("lf_newline")); sym_partial_input = ID2SYM(rb_intern_const("partial_input")); #ifdef ENABLE_ECONV_NEWLINE_OPTION @@ -4533,6 +4555,12 @@ InitVM_transcode(void) */ rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + /* Document-const: LF_NEWLINE_DECORATOR + * + * Decorator for converting CRLF and CR to LF when writing + */ + rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR)); + /* Document-const: CRLF_NEWLINE_DECORATOR * * Decorator for converting LF to CRLF