mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Support Encoding::Converter newline: :lf and :lf_newline options
Previously, newline: :lf was accepted but ignored. Where it should have been used was commented out code that didn't work, but unlike all other invalid values, using newline: :lf did not raise an error. This adds support for newline: :lf and :lf_newline, for consistency with newline: :cr and :cr_newline. This is basically the same as universal_newline, except that it only affects writing and not reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. Add tests for the File.open :newline option while here. Fixes [Bug #12436]
This commit is contained in:
parent
1a2f99275b
commit
6f3857f6a7
Notes:
git
2022-08-20 12:24:04 +09:00
5 changed files with 104 additions and 8 deletions
|
@ -17,10 +17,16 @@
|
|||
map_cr["0a"] = "0d"
|
||||
|
||||
transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
|
||||
|
||||
map_normalize = {}
|
||||
map_normalize["{00-ff}"] = :func_so
|
||||
|
||||
transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
||||
#define lf_newline universal_newline
|
||||
#define STATE (sp[0])
|
||||
#define NORMAL 0
|
||||
#define JUST_AFTER_CR 1
|
||||
|
@ -126,10 +132,24 @@ rb_cr_newline = {
|
|||
0, 0, 0, 0
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_lf_newline = {
|
||||
"", "lf_newline", lf_newline,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
1, /* max_input */
|
||||
2, /* max_output */
|
||||
asciicompat_converter, /* asciicompat_type */
|
||||
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
|
||||
0, 0, 0, fun_so_universal_newline,
|
||||
universal_newline_finish
|
||||
};
|
||||
|
||||
void
|
||||
Init_newline(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_universal_newline);
|
||||
rb_register_transcoder(&rb_crlf_newline);
|
||||
rb_register_transcoder(&rb_cr_newline);
|
||||
rb_register_transcoder(&rb_lf_newline);
|
||||
}
|
||||
|
|
|
@ -476,16 +476,16 @@ enum ruby_econv_flag_type {
|
|||
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
|
||||
|
||||
/** Decorators are there. */
|
||||
RUBY_ECONV_DECORATOR_MASK = 0x0000ff00,
|
||||
RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
|
||||
|
||||
/** Newline converters are there. */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00,
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000,
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
|
||||
|
||||
/** Universal newline mode. */
|
||||
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
|
||||
|
@ -496,11 +496,14 @@ enum ruby_econv_flag_type {
|
|||
/** CRLF to CR conversion shall happen. */
|
||||
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
|
||||
|
||||
/** CRLF to LF conversion shall happen. */
|
||||
RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
|
||||
|
||||
/** Texts shall be XML-escaped. */
|
||||
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000,
|
||||
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
|
||||
|
||||
/** Texts shall be AttrValue escaped */
|
||||
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000,
|
||||
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
|
||||
|
@ -529,6 +532,7 @@ enum ruby_econv_flag_type {
|
|||
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
|
||||
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
|
||||
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
|
||||
#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
|
||||
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
|
||||
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
|
||||
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
|
||||
|
@ -543,10 +547,10 @@ enum ruby_econv_flag_type {
|
|||
*/
|
||||
|
||||
/** Indicates the input is a part of much larger one. */
|
||||
RUBY_ECONV_PARTIAL_INPUT = 0x00010000,
|
||||
RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
|
||||
|
||||
/** Instructs the converter to stop after output. */
|
||||
RUBY_ECONV_AFTER_OUTPUT = 0x00020000,
|
||||
RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
|
||||
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
|
||||
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
|
||||
|
||||
|
|
|
@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase
|
|||
end
|
||||
end
|
||||
|
||||
def test_file_open_newline_option
|
||||
Dir.mktmpdir(__method__.to_s) do |tmpdir|
|
||||
path = File.join(tmpdir, "foo")
|
||||
test = lambda do |newline|
|
||||
File.open(path, "wt", newline: newline) do |f|
|
||||
f.write "a\n"
|
||||
f.puts "b"
|
||||
end
|
||||
File.binread(path)
|
||||
end
|
||||
assert_equal("a\nb\n", test.(:lf))
|
||||
assert_equal("a\nb\n", test.(:universal))
|
||||
assert_equal("a\r\nb\r\n", test.(:crlf))
|
||||
assert_equal("a\rb\r", test.(:cr))
|
||||
|
||||
test = lambda do |newline|
|
||||
File.open(path, "rt", newline: newline) do |f|
|
||||
f.read
|
||||
end
|
||||
end
|
||||
|
||||
File.binwrite(path, "a\nb\n")
|
||||
assert_equal("a\nb\n", test.(:lf))
|
||||
assert_equal("a\nb\n", test.(:universal))
|
||||
assert_equal("a\nb\n", test.(:crlf))
|
||||
assert_equal("a\nb\n", test.(:cr))
|
||||
|
||||
File.binwrite(path, "a\r\nb\r\n")
|
||||
assert_equal("a\r\nb\r\n", test.(:lf))
|
||||
assert_equal("a\nb\n", test.(:universal))
|
||||
# Work on both Windows and non-Windows
|
||||
assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf))
|
||||
assert_equal("a\r\nb\r\n", test.(:cr))
|
||||
|
||||
File.binwrite(path, "a\rb\r")
|
||||
assert_equal("a\rb\r", test.(:lf))
|
||||
assert_equal("a\nb\n", test.(:universal))
|
||||
assert_equal("a\rb\r", test.(:crlf))
|
||||
assert_equal("a\rb\r", test.(:cr))
|
||||
end
|
||||
end
|
||||
|
||||
def test_open_nul
|
||||
Dir.mktmpdir(__method__.to_s) do |tmpdir|
|
||||
path = File.join(tmpdir, "foo")
|
||||
|
|
|
@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr))
|
||||
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true))
|
||||
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf))
|
||||
assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true))
|
||||
assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
|
||||
end
|
||||
end
|
||||
|
|
30
transcode.c
30
transcode.c
|
@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr;
|
|||
static VALUE sym_universal_newline;
|
||||
static VALUE sym_crlf_newline;
|
||||
static VALUE sym_cr_newline;
|
||||
static VALUE sym_lf_newline;
|
||||
#ifdef ENABLE_ECONV_NEWLINE_OPTION
|
||||
static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
|
||||
#endif
|
||||
|
@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret)
|
|||
case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
|
||||
case ECONV_CRLF_NEWLINE_DECORATOR:
|
||||
case ECONV_CR_NEWLINE_DECORATOR:
|
||||
case ECONV_LF_NEWLINE_DECORATOR:
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
|
@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret)
|
|||
decorators_ret[num_decorators++] = "crlf_newline";
|
||||
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "cr_newline";
|
||||
if (ecflags & ECONV_LF_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "lf_newline";
|
||||
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "universal_newline";
|
||||
|
||||
|
@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec)
|
|||
case ECONV_CR_NEWLINE_DECORATOR:
|
||||
dname = "cr_newline";
|
||||
break;
|
||||
case ECONV_LF_NEWLINE_DECORATOR:
|
||||
dname = "lf_newline";
|
||||
break;
|
||||
}
|
||||
|
||||
if (dname) {
|
||||
|
@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
|
|||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "cr_newline");
|
||||
}
|
||||
if (ecflags & ECONV_LF_NEWLINE_DECORATOR) {
|
||||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "lf_newline");
|
||||
}
|
||||
if (ecflags & ECONV_XML_TEXT_DECORATOR) {
|
||||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "xml_text");
|
||||
|
@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags)
|
|||
ecflags |= ECONV_CR_NEWLINE_DECORATOR;
|
||||
}
|
||||
else if (v == sym_lf) {
|
||||
/* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
|
||||
ecflags |= ECONV_LF_NEWLINE_DECORATOR;
|
||||
}
|
||||
else if (SYMBOL_P(v)) {
|
||||
rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
|
||||
|
@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags)
|
|||
setflags |= ECONV_CR_NEWLINE_DECORATOR;
|
||||
newlineflag |= !NIL_P(v);
|
||||
|
||||
v = rb_hash_aref(opt, sym_lf_newline);
|
||||
if (RTEST(v))
|
||||
setflags |= ECONV_LF_NEWLINE_DECORATOR;
|
||||
newlineflag |= !NIL_P(v);
|
||||
|
||||
switch (newlineflag) {
|
||||
case 1:
|
||||
ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
|
||||
|
@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
|
|||
* :undef => :replace # replace undefined conversion
|
||||
* :replace => string # replacement string ("?" or "\uFFFD" if not specified)
|
||||
* :newline => :universal # decorator for converting CRLF and CR to LF
|
||||
* :newline => :lf # decorator for converting CRLF and CR to LF when writing
|
||||
* :newline => :crlf # decorator for converting LF to CRLF
|
||||
* :newline => :cr # decorator for converting LF to CR
|
||||
* :universal_newline => true # decorator for converting CRLF and CR to LF
|
||||
* :crlf_newline => true # decorator for converting LF to CRLF
|
||||
* :cr_newline => true # decorator for converting LF to CR
|
||||
* :lf_newline => true # decorator for converting CRLF and CR to LF when writing
|
||||
* :xml => :text # escape as XML CharData.
|
||||
* :xml => :attr # escape as XML AttValue
|
||||
* integer form:
|
||||
|
@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
|
|||
* Encoding::Converter::UNDEF_REPLACE
|
||||
* Encoding::Converter::UNDEF_HEX_CHARREF
|
||||
* Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
|
||||
* Encoding::Converter::LF_NEWLINE_DECORATOR
|
||||
* Encoding::Converter::CRLF_NEWLINE_DECORATOR
|
||||
* Encoding::Converter::CR_NEWLINE_DECORATOR
|
||||
* Encoding::Converter::XML_TEXT_DECORATOR
|
||||
|
@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
|
|||
* Convert LF to CRLF.
|
||||
* [:cr_newline => true]
|
||||
* Convert LF to CR.
|
||||
* [:lf_newline => true]
|
||||
* Convert CRLF and CR to LF (when writing).
|
||||
* [:xml => :text]
|
||||
* Escape as XML CharData.
|
||||
* This form can be used as an HTML 4.0 #PCDATA.
|
||||
|
@ -4437,6 +4458,7 @@ Init_transcode(void)
|
|||
sym_universal_newline = ID2SYM(rb_intern_const("universal_newline"));
|
||||
sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline"));
|
||||
sym_cr_newline = ID2SYM(rb_intern_const("cr_newline"));
|
||||
sym_lf_newline = ID2SYM(rb_intern("lf_newline"));
|
||||
sym_partial_input = ID2SYM(rb_intern_const("partial_input"));
|
||||
|
||||
#ifdef ENABLE_ECONV_NEWLINE_OPTION
|
||||
|
@ -4533,6 +4555,12 @@ InitVM_transcode(void)
|
|||
*/
|
||||
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
|
||||
|
||||
/* Document-const: LF_NEWLINE_DECORATOR
|
||||
*
|
||||
* Decorator for converting CRLF and CR to LF when writing
|
||||
*/
|
||||
rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR));
|
||||
|
||||
/* Document-const: CRLF_NEWLINE_DECORATOR
|
||||
*
|
||||
* Decorator for converting LF to CRLF
|
||||
|
|
Loading…
Reference in a new issue