mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (rb_econv_open): disable newline conversion for ASCII
incompatible encodings. (str_transcode0): don't need disable newline conversion here. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18870 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
18b097b73b
commit
19ab08653e
4 changed files with 92 additions and 16 deletions
|
@ -1,3 +1,9 @@
|
|||
Tue Aug 26 21:53:56 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (rb_econv_open): disable newline conversion for ASCII
|
||||
incompatible encodings.
|
||||
(str_transcode0): don't need disable newline conversion here.
|
||||
|
||||
Tue Aug 26 21:44:39 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (rb_econv_binmode): binmode is effective only once.
|
||||
|
|
|
@ -297,12 +297,18 @@ void rb_econv_binmode(rb_econv_t *ec);
|
|||
#define ECONV_UNDEF_IGNORE 0x0010
|
||||
#define ECONV_UNDEF_REPLACE 0x0020
|
||||
|
||||
/* effective only if output is ascii compatible */
|
||||
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x0100
|
||||
|
||||
/* effective only if input is ascii compatible */
|
||||
#define ECONV_CRLF_NEWLINE_ENCODER 0x0200
|
||||
#define ECONV_CR_NEWLINE_ENCODER 0x0400
|
||||
|
||||
/* end of flags for rb_econv_open */
|
||||
|
||||
/* flags for rb_econv_convert */
|
||||
#define ECONV_PARTIAL_INPUT 0x10000
|
||||
#define ECONV_OUTPUT_FOLLOWED_BY_INPUT 0x20000
|
||||
/* end of flags for rb_econv_convert */
|
||||
|
||||
#endif /* RUBY_ENCODING_H */
|
||||
|
|
|
@ -1154,8 +1154,18 @@ EOT
|
|||
}
|
||||
end
|
||||
|
||||
SYSTEM_NEWLINE = []
|
||||
def system_newline
|
||||
File::BINARY == 0 ? "\n" : "\r\n"
|
||||
return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
|
||||
with_tmpdir {
|
||||
open("newline", "wt") {|f|
|
||||
f.print "\n"
|
||||
}
|
||||
open("newline", "rb") {|f|
|
||||
SYSTEM_NEWLINE << f.read
|
||||
}
|
||||
}
|
||||
SYSTEM_NEWLINE.first
|
||||
end
|
||||
|
||||
def test_textmode_encode_newline
|
||||
|
@ -1170,6 +1180,41 @@ EOT
|
|||
}
|
||||
end
|
||||
|
||||
def test_textmode_encode_newline_enc
|
||||
with_tmpdir {
|
||||
open("t.txt", "wt:euc-jp") {|f|
|
||||
f.puts "abc\u3042"
|
||||
f.puts "def\u3044"
|
||||
}
|
||||
content = File.read("t.txt", :mode=>"rb:ascii-8bit")
|
||||
nl = system_newline
|
||||
assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
|
||||
}
|
||||
end
|
||||
|
||||
def test_textmode_read_ascii_incompat_internal
|
||||
with_tmpdir {
|
||||
generate_file("t.utf8.crlf", "a\r\nb\r\n")
|
||||
open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
|
||||
content = f.read
|
||||
# textmode doesn't affect for ascii incompatible internal encoding.
|
||||
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
|
||||
content)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_textmode_write_ascii_incompat_internal
|
||||
with_tmpdir {
|
||||
open("t.utf8.lf", "wt:utf-8:utf-16be") {|f|
|
||||
f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
|
||||
}
|
||||
content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
|
||||
# textmode doesn't affect for ascii incompatible internal encoding.
|
||||
assert_equal("a\nb\n", content)
|
||||
}
|
||||
end
|
||||
|
||||
def test_binary
|
||||
with_tmpdir {
|
||||
src = "a\nb\rc\r\nd\n"
|
||||
|
@ -1180,7 +1225,7 @@ EOT
|
|||
open("t.txt", "r", :binmode=>true) {|f|
|
||||
assert_equal(src, f.read)
|
||||
}
|
||||
if File::BINARY == 0
|
||||
if system_newline == "\n"
|
||||
open("t.txt", "r") {|f|
|
||||
assert_equal(src, f.read)
|
||||
}
|
||||
|
|
47
transcode.c
47
transcode.c
|
@ -748,6 +748,26 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
int num_additional;
|
||||
static rb_econv_t *ec;
|
||||
int flags = opts ? opts->flags : 0;
|
||||
int universal_newline_decoder_added = 0;
|
||||
|
||||
rb_encoding *senc, *denc;
|
||||
int sidx, didx;
|
||||
|
||||
senc = NULL;
|
||||
if (*from) {
|
||||
sidx = rb_enc_find_index(from);
|
||||
if (0 <= sidx) {
|
||||
senc = rb_enc_from_index(sidx);
|
||||
}
|
||||
}
|
||||
|
||||
denc = NULL;
|
||||
if (*to) {
|
||||
didx = rb_enc_find_index(to);
|
||||
if (0 <= didx) {
|
||||
denc = rb_enc_from_index(didx);
|
||||
}
|
||||
}
|
||||
|
||||
if (*from == '\0' && *to == '\0') {
|
||||
num_trans = 0;
|
||||
|
@ -763,7 +783,8 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
}
|
||||
|
||||
num_additional = 0;
|
||||
if (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
|
||||
if ((!*from || (senc && rb_enc_asciicompat(senc))) &&
|
||||
(flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER))) {
|
||||
const char *name = (flags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
|
||||
transcoder_entry_t *e = get_transcoder_entry("", name);
|
||||
if (flags & ECONV_CRLF_NEWLINE_ENCODER)
|
||||
|
@ -779,8 +800,12 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
num_trans++;
|
||||
num_additional++;
|
||||
}
|
||||
else {
|
||||
flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
|
||||
}
|
||||
|
||||
if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
|
||||
if ((!*to || (denc && rb_enc_asciicompat(denc))) &&
|
||||
(flags & ECONV_UNIVERSAL_NEWLINE_DECODER)) {
|
||||
transcoder_entry_t *e = get_transcoder_entry("universal_newline", "");
|
||||
if (!e) {
|
||||
xfree(entries);
|
||||
|
@ -788,6 +813,10 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
}
|
||||
entries[num_trans++] = e;
|
||||
num_additional++;
|
||||
universal_newline_decoder_added = 1;
|
||||
}
|
||||
else {
|
||||
flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
|
||||
}
|
||||
|
||||
ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
|
||||
|
@ -799,6 +828,7 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
ec->opts.flags = 0;
|
||||
else
|
||||
ec->opts = *opts;
|
||||
ec->opts.flags = flags;
|
||||
ec->source_encoding_name = from;
|
||||
ec->destination_encoding_name = to;
|
||||
|
||||
|
@ -806,7 +836,7 @@ rb_econv_open(const char *from, const char *to, rb_econv_option_t *opts)
|
|||
ec->last_tc = NULL;
|
||||
ec->last_trans_index = -1;
|
||||
}
|
||||
else if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
|
||||
else if (universal_newline_decoder_added) {
|
||||
ec->last_tc = ec->elems[ec->num_trans-2].tc;
|
||||
ec->last_trans_index = ec->num_trans-2;
|
||||
}
|
||||
|
@ -1886,17 +1916,6 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, rb_econv_option_t *ecopts_arg
|
|||
else
|
||||
rb_econv_opts(Qnil, &ecopts);
|
||||
|
||||
/* disable newline conversion for ascii incompatible encoding.
|
||||
* xxx: convert newline in ascii-compatible encoding?
|
||||
* ex. UTF-16BE -> UTF-8 -> newline conversion -> UTF-8 -> UTF-16BE.
|
||||
*/
|
||||
if (!from_enc || !rb_enc_asciicompat(from_enc)) {
|
||||
ecopts.flags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER);
|
||||
}
|
||||
if (!to_enc || !rb_enc_asciicompat(to_enc)) {
|
||||
ecopts.flags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER;
|
||||
}
|
||||
|
||||
if ((ecopts.flags & (ECONV_UNIVERSAL_NEWLINE_DECODER|
|
||||
ECONV_CRLF_NEWLINE_ENCODER|
|
||||
ECONV_CR_NEWLINE_ENCODER)) == 0) {
|
||||
|
|
Loading…
Reference in a new issue