1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* transcode.c (rb_econv_open): disable newline conversion for ASCII

incompatible encodings.
  (str_transcode0): don't need disable newline conversion here.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18870 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-26 12:55:14 +00:00
parent 18b097b73b
commit 19ab08653e
4 changed files with 92 additions and 16 deletions

View file

@ -1,3 +1,9 @@
Tue Aug 26 21:53:56 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_open): disable newline conversion for ASCII
incompatible encodings.
(str_transcode0): don't need disable newline conversion here.
Tue Aug 26 21:44:39 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_binmode): binmode is effective only once.

View file

@ -297,12 +297,18 @@ void rb_econv_binmode(rb_econv_t *ec);
#define ECONV_UNDEF_IGNORE 0x0010
#define ECONV_UNDEF_REPLACE 0x0020
/* effective only if output is ascii compatible */
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x0100
/* effective only if input is ascii compatible */
#define ECONV_CRLF_NEWLINE_ENCODER 0x0200
#define ECONV_CR_NEWLINE_ENCODER 0x0400
/* end of flags for rb_econv_open */
/* flags for rb_econv_convert */
#define ECONV_PARTIAL_INPUT 0x10000
#define ECONV_OUTPUT_FOLLOWED_BY_INPUT 0x20000
/* end of flags for rb_econv_convert */
#endif /* RUBY_ENCODING_H */

View file

@ -1154,8 +1154,18 @@ EOT
}
end
SYSTEM_NEWLINE = []
def system_newline
File::BINARY == 0 ? "\n" : "\r\n"
return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
with_tmpdir {
open("newline", "wt") {|f|
f.print "\n"
}
open("newline", "rb") {|f|
SYSTEM_NEWLINE << f.read
}
}
SYSTEM_NEWLINE.first
end
def test_textmode_encode_newline
@ -1170,6 +1180,41 @@ EOT
}
end
def test_textmode_encode_newline_enc
with_tmpdir {
open("t.txt", "wt:euc-jp") {|f|
f.puts "abc\u3042"
f.puts "def\u3044"
}
content = File.read("t.txt", :mode=>"rb:ascii-8bit")
nl = system_newline
assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
}
end
def test_textmode_read_ascii_incompat_internal
with_tmpdir {
generate_file("t.utf8.crlf", "a\r\nb\r\n")
open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
content = f.read
# textmode doesn't affect for ascii incompatible internal encoding.
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
content)
}
}
end
def test_textmode_write_ascii_incompat_internal
with_tmpdir {
open("t.utf8.lf", "wt:utf-8:utf-16be") {|f|
f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
}
content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
# textmode doesn't affect for ascii incompatible internal encoding.
assert_equal("a\nb\n", content)
}
end
def test_binary
with_tmpdir {
src = "a\nb\rc\r\nd\n"
@ -1180,7 +1225,7 @@ EOT
open("t.txt", "r", :binmode=>true) {|f|
assert_equal(src, f.read)
}
if File::BINARY == 0
if system_newline == "\n"
open("t.txt", "r") {|f|
assert_equal(src, f.read)
}

View file

@ -748,6 +748,26 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
int num_additional;
static rb_econv_t *ec;
int flags = opts ? opts->flags : 0;
int universal_newline_decoder_added = 0;
rb_encoding *senc, *denc;
int sidx, didx;
senc = NULL;
if (*from) {
sidx = rb_enc_find_index(from);
if (0 <= sidx) {
senc = rb_enc_from_index(sidx);
}
}
denc = NULL;
if (*to) {
didx = rb_enc_find_index(to);
if (0 <= didx) {
denc = rb_enc_from_index(didx);
}
}
if (*from == '\0' && *to == '\0') {
num_trans = 0;
@ -763,7 +783,8 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
}
num_additional = 0;
if (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
if ((!*from || (senc && rb_enc_asciicompat(senc))) &&
(flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER))) {
const char *name = (flags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
transcoder_entry_t *e = get_transcoder_entry("", name);
if (flags & ECONV_CRLF_NEWLINE_ENCODER)
@ -779,8 +800,12 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
num_trans++;
num_additional++;
}
else {
flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
}
if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
if ((!*to || (denc && rb_enc_asciicompat(denc))) &&
(flags & ECONV_UNIVERSAL_NEWLINE_DECODER)) {
transcoder_entry_t *e = get_transcoder_entry("universal_newline", "");
if (!e) {
xfree(entries);
@ -788,6 +813,10 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
}
entries[num_trans++] = e;
num_additional++;
universal_newline_decoder_added = 1;
}
else {
flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
}
ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
@ -799,6 +828,7 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
ec->opts.flags = 0;
else
ec->opts = *opts;
ec->opts.flags = flags;
ec->source_encoding_name = from;
ec->destination_encoding_name = to;
@ -806,7 +836,7 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
ec->last_tc = NULL;
ec->last_trans_index = -1;
}
else if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
else if (universal_newline_decoder_added) {
ec->last_tc = ec->elems[ec->num_trans-2].tc;
ec->last_trans_index = ec->num_trans-2;
}
@ -1886,17 +1916,6 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, rb_econv_option_t *ecopts_arg
else
rb_econv_opts(Qnil, &ecopts);
/* disable newline conversion for ascii incompatible encoding.
* xxx: convert newline in ascii-compatible encoding?
* ex. UTF-16BE -> UTF-8 -> newline conversion -> UTF-8 -> UTF-16BE.
*/
if (!from_enc || !rb_enc_asciicompat(from_enc)) {
ecopts.flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
}
if (!to_enc || !rb_enc_asciicompat(to_enc)) {
ecopts.flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
}
if ((ecopts.flags & (ECONV_UNIVERSAL_NEWLINE_DECODER|
ECONV_CRLF_NEWLINE_ENCODER|
ECONV_CR_NEWLINE_ENCODER)) == 0) {