mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* include/ruby/io.h (FMODE_TEXTMODE): defined.
* include/ruby/encoding.h (rb_econv_t): new field: flags. (rb_econv_binmode): declared. * io.c (io_unread): text mode hack removed. (NEED_NEWLINE_DECODER): defined. (NEED_NEWLINE_ENCODER): defined. (NEED_READCONV): defined. (NEED_WRITECONV): defined. (TEXTMODE_NEWLINE_ENCODER): defined for windows. (make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for text mode. (io_fwrite): use NEED_WRITECONV. character code conversion is disabled if fptr->writeconv_stateless is nil. (make_readconv): setup converter with ECONV_UNIVERSAL_NEWLINE_DECODER for text mode. (read_all): use NEED_READCONV. (appendline): use NEED_READCONV. (rb_io_getline_1): use NEED_READCONV. (io_getc): use NEED_READCONV. (rb_io_ungetc): use NEED_READCONV. (rb_io_binmode): OS-level text mode test removed. call rb_econv_binmode. (rb_io_binmode_m): call rb_io_binmode_m with write_io as well. (rb_io_flags_mode): return mode string including "t". (rb_io_mode_flags): detect "t" for text mode. (rb_sysopen): always specify O_BINARY. * transcode.c (rb_econv_open_by_transcoder_entries): initialize flags. (rb_econv_open): if source and destination encoding is both empty string, open newline converter. last_tc will be NULL in this case. (rb_econv_encoding_to_insert_output): last_tc may be NULL now. (rb_econv_string): ditto. (output_replacement_character): ditto. (transcode_loop): ditto. (econv_init): ditto. (econv_inspect): ditto. (rb_econv_binmode): new function. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18780 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
0dd677da4e
commit
c0d3881e0e
8 changed files with 420 additions and 80 deletions
43
ChangeLog
43
ChangeLog
|
@ -1,3 +1,46 @@
|
|||
Sat Aug 23 01:42:22 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/io.h (FMODE_TEXTMODE): defined.
|
||||
|
||||
* include/ruby/encoding.h (rb_econv_t): new field: flags.
|
||||
(rb_econv_binmode): declared.
|
||||
|
||||
* io.c (io_unread): text mode hack removed.
|
||||
(NEED_NEWLINE_DECODER): defined.
|
||||
(NEED_NEWLINE_ENCODER): defined.
|
||||
(NEED_READCONV): defined.
|
||||
(NEED_WRITECONV): defined.
|
||||
(TEXTMODE_NEWLINE_ENCODER): defined for windows.
|
||||
(make_writeconv): setup converter with TEXTMODE_NEWLINE_ENCODER for
|
||||
text mode.
|
||||
(io_fwrite): use NEED_WRITECONV. character code conversion is
|
||||
disabled if fptr->writeconv_stateless is nil.
|
||||
(make_readconv): setup converter with
|
||||
ECONV_UNIVERSAL_NEWLINE_DECODER for text mode.
|
||||
(read_all): use NEED_READCONV.
|
||||
(appendline): use NEED_READCONV.
|
||||
(rb_io_getline_1): use NEED_READCONV.
|
||||
(io_getc): use NEED_READCONV.
|
||||
(rb_io_ungetc): use NEED_READCONV.
|
||||
(rb_io_binmode): OS-level text mode test removed. call
|
||||
rb_econv_binmode.
|
||||
(rb_io_binmode_m): call rb_io_binmode_m with write_io as well.
|
||||
(rb_io_flags_mode): return mode string including "t".
|
||||
(rb_io_mode_flags): detect "t" for text mode.
|
||||
(rb_sysopen): always specify O_BINARY.
|
||||
|
||||
* transcode.c (rb_econv_open_by_transcoder_entries): initialize flags.
|
||||
(rb_econv_open): if source and destination encoding is
|
||||
both empty string, open newline converter. last_tc will be NULL in
|
||||
this case.
|
||||
(rb_econv_encoding_to_insert_output): last_tc may be NULL now.
|
||||
(rb_econv_string): ditto.
|
||||
(output_replacement_character): ditto.
|
||||
(transcode_loop): ditto.
|
||||
(econv_init): ditto.
|
||||
(econv_inspect): ditto.
|
||||
(rb_econv_binmode): new function.
|
||||
|
||||
Fri Aug 22 21:18:40 2008 Tadayoshi Funaba <tadf@dotrb.org>
|
||||
|
||||
* complex.c (nucomp_div): now behaves as quo.
|
||||
|
|
|
@ -44,7 +44,7 @@ rb_universal_newline = {
|
|||
1, /* input_unit_length */
|
||||
1, /* max_input */
|
||||
1, /* max_output */
|
||||
stateless_converter, /* stateful_type */
|
||||
stateful_decoder, /* stateful_type */
|
||||
NULL, NULL, NULL, fun_so_universal_newline
|
||||
};
|
||||
|
||||
|
|
|
@ -217,6 +217,7 @@ typedef struct {
|
|||
} rb_econv_elem_t;
|
||||
|
||||
typedef struct {
|
||||
int flags;
|
||||
const char *source_encoding_name;
|
||||
const char *destination_encoding_name;
|
||||
|
||||
|
@ -273,6 +274,8 @@ const char *rb_econv_stateless_encoding(const char *stateful_enc);
|
|||
|
||||
VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
|
||||
|
||||
void rb_econv_binmode(rb_econv_t *ec);
|
||||
|
||||
/* flags for rb_econv_open */
|
||||
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100
|
||||
#define ECONV_CRLF_NEWLINE_ENCODER 0x200
|
||||
|
|
|
@ -85,6 +85,7 @@ typedef struct rb_io_t {
|
|||
#define FMODE_WSPLIT 0x0200
|
||||
#define FMODE_WSPLIT_INITIALIZED 0x0400
|
||||
#define FMODE_TRUNC 0x0800
|
||||
#define FMODE_TEXTMODE 0x1000
|
||||
/* #define FMODE_PREP 0x10000 */
|
||||
|
||||
#define GetOpenFile(obj,fp) rb_io_check_closed((fp) = RFILE(rb_io_taint_check(obj))->fptr)
|
||||
|
|
139
io.c
139
io.c
|
@ -300,17 +300,6 @@ io_unread(rb_io_t *fptr)
|
|||
if (fptr->rbuf_len == 0 || fptr->mode & FMODE_DUPLEX)
|
||||
return;
|
||||
/* xxx: target position may be negative if buffer is filled by ungetc */
|
||||
#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
|
||||
if (!(fptr->mode & FMODE_BINMODE)) {
|
||||
int len = fptr->rbuf_len;
|
||||
while (fptr->rbuf_len-- > 0) {
|
||||
if (fptr->rbuf[fptr->rbuf_len] == '\n')
|
||||
++len;
|
||||
}
|
||||
r = lseek(fptr->fd, -len, SEEK_CUR);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
r = lseek(fptr->fd, -fptr->rbuf_len, SEEK_CUR);
|
||||
if (r < 0) {
|
||||
if (errno == ESPIPE)
|
||||
|
@ -681,12 +670,41 @@ rb_io_wait_writable(int f)
|
|||
}
|
||||
}
|
||||
|
||||
/* xxx: better way to determine the newline of the platform? */
|
||||
#if defined(O_BINARY) && O_BINARY != 0
|
||||
/* Windows */
|
||||
# define NEED_NEWLINE_DECODER(fptr) (!(fptr->mode & FMODE_BINMODE))
|
||||
# define NEED_NEWLINE_ENCODER(fptr) (!(fptr->mode & FMODE_BINMODE))
|
||||
# define TEXTMODE_NEWLINE_ENCODER ECONV_CRLF_NEWLINE_ENCODER
|
||||
#else
|
||||
/* Unix */
|
||||
# define NEED_NEWLINE_DECODER(fptr) (fptr->mode & FMODE_TEXTMODE)
|
||||
# define NEED_NEWLINE_ENCODER(fptr) 0
|
||||
#endif
|
||||
#define NEED_READCONV(fptr) (fptr->enc2 != NULL || NEED_NEWLINE_DECODER(fptr))
|
||||
#define NEED_WRITECONV(fptr) (fptr->enc != NULL || NEED_NEWLINE_ENCODER(fptr))
|
||||
|
||||
static void
|
||||
make_writeconv(rb_io_t *fptr)
|
||||
{
|
||||
if (!fptr->writeconv_initialized) {
|
||||
const char *senc, *denc;
|
||||
rb_encoding *enc;
|
||||
int ecflags;
|
||||
|
||||
fptr->writeconv_initialized = 1;
|
||||
|
||||
ecflags = 0;
|
||||
#ifdef TEXTMODE_NEWLINE_ENCODER
|
||||
if (NEED_NEWLINE_ENCODER(fptr))
|
||||
ecflags |= TEXTMODE_NEWLINE_ENCODER;
|
||||
|
||||
if (!fptr->enc) {
|
||||
fptr->writeconv = rb_econv_open("", "", ecflags);
|
||||
fptr->writeconv_stateless = Qnil;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
enc = fptr->enc2 ? fptr->enc2 : fptr->enc;
|
||||
senc = rb_econv_stateless_encoding(enc->name);
|
||||
|
@ -699,14 +717,13 @@ make_writeconv(rb_io_t *fptr)
|
|||
fptr->writeconv_stateless = Qnil;
|
||||
}
|
||||
if (senc) {
|
||||
fptr->writeconv = rb_econv_open(senc, denc, 0);
|
||||
fptr->writeconv = rb_econv_open(senc, denc, ecflags);
|
||||
if (!fptr->writeconv)
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
|
||||
}
|
||||
else {
|
||||
fptr->writeconv = NULL;
|
||||
}
|
||||
fptr->writeconv_initialized = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -716,14 +733,12 @@ io_fwrite(VALUE str, rb_io_t *fptr)
|
|||
{
|
||||
long len, n, r, l, offset = 0;
|
||||
|
||||
/*
|
||||
* If an external encoding was specified and it differs from
|
||||
* the strings encoding then we must transcode before writing.
|
||||
*/
|
||||
if (fptr->enc) {
|
||||
if (NEED_WRITECONV(fptr)) {
|
||||
make_writeconv(fptr);
|
||||
if (fptr->writeconv) {
|
||||
str = rb_str_transcode(str, fptr->writeconv_stateless);
|
||||
if (!NIL_P(fptr->writeconv_stateless)) {
|
||||
str = rb_str_transcode(str, fptr->writeconv_stateless);
|
||||
}
|
||||
str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
|
@ -1411,9 +1426,20 @@ static void
|
|||
make_readconv(rb_io_t *fptr)
|
||||
{
|
||||
if (!fptr->readconv) {
|
||||
fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
|
||||
int ecflags = 0;
|
||||
const char *sname, *dname;
|
||||
if (NEED_NEWLINE_DECODER(fptr))
|
||||
ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER;
|
||||
if (fptr->enc2) {
|
||||
sname = fptr->enc2->name;
|
||||
dname = fptr->enc->name;
|
||||
}
|
||||
else {
|
||||
sname = dname = "";
|
||||
}
|
||||
fptr->readconv = rb_econv_open(sname, dname, ecflags);
|
||||
if (!fptr->readconv)
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", sname, dname);
|
||||
fptr->crbuf_off = 0;
|
||||
fptr->crbuf_len = 0;
|
||||
fptr->crbuf_capa = 1024;
|
||||
|
@ -1519,7 +1545,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str)
|
|||
rb_encoding *enc;
|
||||
int cr;
|
||||
|
||||
if (fptr->enc2) {
|
||||
if (NEED_READCONV(fptr)) {
|
||||
VALUE str = rb_str_new(NULL, 0);
|
||||
make_readconv(fptr);
|
||||
while (1) {
|
||||
|
@ -1873,7 +1899,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
|
|||
VALUE str = *strp;
|
||||
long limit = *lp;
|
||||
|
||||
if (fptr->enc2) {
|
||||
if (NEED_READCONV(fptr)) {
|
||||
make_readconv(fptr);
|
||||
while (1) {
|
||||
const char *p, *e;
|
||||
|
@ -2084,7 +2110,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
|
|||
else if (limit == 0) {
|
||||
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
|
||||
}
|
||||
else if (rs == rb_default_rs && limit < 0 && !fptr->enc2 &&
|
||||
else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) &&
|
||||
rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
|
||||
return rb_io_getline_fast(fptr, enc);
|
||||
}
|
||||
|
@ -2409,18 +2435,19 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
|
|||
int r, n, cr = 0;
|
||||
VALUE str;
|
||||
|
||||
if (fptr->enc2) {
|
||||
if (NEED_READCONV(fptr)) {
|
||||
VALUE str = Qnil;
|
||||
|
||||
if (!fptr->readconv) {
|
||||
make_readconv(fptr);
|
||||
}
|
||||
make_readconv(fptr);
|
||||
|
||||
while (1) {
|
||||
if (fptr->crbuf_len) {
|
||||
r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off,
|
||||
fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
|
||||
fptr->enc);
|
||||
if (fptr->enc)
|
||||
r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off,
|
||||
fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len,
|
||||
fptr->enc);
|
||||
else
|
||||
r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
|
||||
if (!MBCLEN_NEEDMORE_P(r))
|
||||
break;
|
||||
if (fptr->crbuf_len == fptr->crbuf_capa) {
|
||||
|
@ -2776,7 +2803,7 @@ rb_io_ungetc(VALUE io, VALUE c)
|
|||
else {
|
||||
SafeStringValue(c);
|
||||
}
|
||||
if (fptr->enc2) {
|
||||
if (NEED_READCONV(fptr)) {
|
||||
make_readconv(fptr);
|
||||
len = RSTRING_LEN(c);
|
||||
if (fptr->crbuf_capa - fptr->crbuf_len < len)
|
||||
|
@ -3462,14 +3489,12 @@ rb_io_binmode(VALUE io)
|
|||
rb_io_t *fptr;
|
||||
|
||||
GetOpenFile(io, fptr);
|
||||
#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
|
||||
if (!(fptr->mode & FMODE_BINMODE) && READ_DATA_BUFFERED(fptr)) {
|
||||
rb_raise(rb_eIOError, "buffer already filled with text-mode content");
|
||||
}
|
||||
if (0 <= fptr->fd && setmode(fptr->fd, O_BINARY) == -1)
|
||||
rb_sys_fail(fptr->path);
|
||||
#endif
|
||||
if (fptr->readconv)
|
||||
rb_econv_binmode(fptr->readconv);
|
||||
if (fptr->writeconv)
|
||||
rb_econv_binmode(fptr->writeconv);
|
||||
fptr->mode |= FMODE_BINMODE;
|
||||
fptr->mode &= ~FMODE_TEXTMODE;
|
||||
return io;
|
||||
}
|
||||
|
||||
|
@ -3485,17 +3510,13 @@ rb_io_binmode(VALUE io)
|
|||
static VALUE
|
||||
rb_io_binmode_m(VALUE io)
|
||||
{
|
||||
#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
|
||||
VALUE write_io;
|
||||
#endif
|
||||
|
||||
rb_io_binmode(io);
|
||||
|
||||
#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__)
|
||||
write_io = GetWriteIO(io);
|
||||
if (write_io != io)
|
||||
rb_io_binmode(write_io);
|
||||
#endif
|
||||
return io;
|
||||
}
|
||||
|
||||
|
@ -3516,27 +3537,24 @@ rb_io_binmode_p(VALUE io)
|
|||
static const char*
|
||||
rb_io_flags_mode(int flags)
|
||||
{
|
||||
#ifdef O_BINARY
|
||||
# define MODE_BINMODE(a,b) ((flags & FMODE_BINMODE) ? (b) : (a))
|
||||
#else
|
||||
# define MODE_BINMODE(a,b) (a)
|
||||
#endif
|
||||
# define MODE_BTMODE(a,b,c) ((flags & FMODE_BINMODE) ? (b) : \
|
||||
(flags & FMODE_TEXTMODE) ? (c) : (a))
|
||||
if (flags & FMODE_APPEND) {
|
||||
if ((flags & FMODE_READWRITE) == FMODE_READWRITE) {
|
||||
return MODE_BINMODE("a+", "ab+");
|
||||
return MODE_BTMODE("a+", "ab+", "at+");
|
||||
}
|
||||
return MODE_BINMODE("a", "ab");
|
||||
return MODE_BTMODE("a", "ab", "at");
|
||||
}
|
||||
switch (flags & FMODE_READWRITE) {
|
||||
case FMODE_READABLE:
|
||||
return MODE_BINMODE("r", "rb");
|
||||
return MODE_BTMODE("r", "rb", "rt");
|
||||
case FMODE_WRITABLE:
|
||||
return MODE_BINMODE("w", "wb");
|
||||
return MODE_BTMODE("w", "wb", "wt");
|
||||
case FMODE_READWRITE:
|
||||
if (flags & FMODE_CREATE) {
|
||||
return MODE_BINMODE("w+", "wb+");
|
||||
return MODE_BTMODE("w+", "wb+", "wt+");
|
||||
}
|
||||
return MODE_BINMODE("r+", "rb+");
|
||||
return MODE_BTMODE("r+", "rb+", "rt+");
|
||||
}
|
||||
rb_raise(rb_eArgError, "invalid access modenum 0x%x", flags);
|
||||
return NULL; /* not reached */
|
||||
|
@ -3568,16 +3586,23 @@ rb_io_mode_flags(const char *mode)
|
|||
case 'b':
|
||||
flags |= FMODE_BINMODE;
|
||||
break;
|
||||
case 't':
|
||||
flags |= FMODE_TEXTMODE;
|
||||
break;
|
||||
case '+':
|
||||
flags |= FMODE_READWRITE;
|
||||
break;
|
||||
default:
|
||||
goto error;
|
||||
case ':':
|
||||
return flags;
|
||||
goto finished;
|
||||
}
|
||||
}
|
||||
|
||||
finished:
|
||||
if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE))
|
||||
goto error;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
@ -3887,6 +3912,10 @@ rb_sysopen(char *fname, int flags, mode_t mode)
|
|||
{
|
||||
int fd;
|
||||
|
||||
#ifdef O_BINARY
|
||||
flags |= O_BINARY;
|
||||
#endif
|
||||
|
||||
fd = rb_sysopen_internal(fname, flags, mode);
|
||||
if (fd < 0) {
|
||||
if (errno == EMFILE || errno == ENFILE) {
|
||||
|
|
|
@ -305,16 +305,37 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
||||
end
|
||||
|
||||
def test_universal_newline2
|
||||
ec = Encoding::Converter.new("", "", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
||||
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
||||
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
||||
src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu\n", "", :source_buffer_empty, *a)
|
||||
src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
|
||||
src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
|
||||
end
|
||||
|
||||
def test_crlf_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER)
|
||||
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_crlf_newline2
|
||||
ec = Encoding::Converter.new("", "", Encoding::Converter::CRLF_NEWLINE_ENCODER)
|
||||
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_cr_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER)
|
||||
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_cr_newline2
|
||||
ec = Encoding::Converter.new("", "", Encoding::Converter::CR_NEWLINE_ENCODER)
|
||||
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
|
||||
end
|
||||
|
||||
def test_output_followed_by_input
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
|
|
|
@ -979,5 +979,179 @@ EOT
|
|||
}
|
||||
end
|
||||
|
||||
def test_textmode_decode_universal_newline_read
|
||||
with_tmpdir {
|
||||
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
||||
assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
|
||||
assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
|
||||
|
||||
generate_file("t.cr", "a\rb\rc\r")
|
||||
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
|
||||
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
|
||||
|
||||
generate_file("t.lf", "a\nb\nc\n")
|
||||
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
|
||||
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
|
||||
}
|
||||
end
|
||||
|
||||
def test_textmode_decode_universal_newline_getc
|
||||
with_tmpdir {
|
||||
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
||||
open("t.crlf", "rt") {|f|
|
||||
assert_equal("a", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("b", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("c", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal(nil, f.getc)
|
||||
}
|
||||
|
||||
generate_file("t.cr", "a\rb\rc\r")
|
||||
open("t.cr", "rt") {|f|
|
||||
assert_equal("a", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("b", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("c", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal(nil, f.getc)
|
||||
}
|
||||
|
||||
generate_file("t.lf", "a\nb\nc\n")
|
||||
open("t.lf", "rt") {|f|
|
||||
assert_equal("a", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("b", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("c", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal(nil, f.getc)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_textmode_decode_universal_newline_gets
|
||||
with_tmpdir {
|
||||
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
||||
open("t.crlf", "rt") {|f|
|
||||
assert_equal("a\n", f.gets)
|
||||
assert_equal("b\n", f.gets)
|
||||
assert_equal("c\n", f.gets)
|
||||
assert_equal(nil, f.gets)
|
||||
}
|
||||
|
||||
generate_file("t.cr", "a\rb\rc\r")
|
||||
open("t.cr", "rt") {|f|
|
||||
assert_equal("a\n", f.gets)
|
||||
assert_equal("b\n", f.gets)
|
||||
assert_equal("c\n", f.gets)
|
||||
assert_equal(nil, f.gets)
|
||||
}
|
||||
|
||||
generate_file("t.lf", "a\nb\nc\n")
|
||||
open("t.lf", "rt") {|f|
|
||||
assert_equal("a\n", f.gets)
|
||||
assert_equal("b\n", f.gets)
|
||||
assert_equal("c\n", f.gets)
|
||||
assert_equal(nil, f.gets)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_textmode_decode_universal_newline_utf16
|
||||
with_tmpdir {
|
||||
generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
|
||||
|
||||
generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
|
||||
|
||||
generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
|
||||
|
||||
generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
|
||||
|
||||
generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
|
||||
|
||||
generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
|
||||
assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
|
||||
}
|
||||
end
|
||||
|
||||
def system_newline
|
||||
File::BINARY == 0 ? "\n" : "\r\n"
|
||||
end
|
||||
|
||||
def test_textmode_encode_newline
|
||||
with_tmpdir {
|
||||
open("t.txt", "wt") {|f|
|
||||
f.puts "abc"
|
||||
f.puts "def"
|
||||
}
|
||||
content = File.read("t.txt", :mode=>"rb")
|
||||
nl = system_newline
|
||||
assert_equal("abc#{nl}def#{nl}", content)
|
||||
}
|
||||
end
|
||||
|
||||
def test_binary
|
||||
with_tmpdir {
|
||||
src = "a\nb\rc\r\nd\n"
|
||||
generate_file("t.txt", src)
|
||||
open("t.txt", "rb") {|f|
|
||||
assert_equal(src, f.read)
|
||||
}
|
||||
if File::BINARY == 0
|
||||
open("t.txt", "r") {|f|
|
||||
assert_equal(src, f.read)
|
||||
}
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
def test_binmode
|
||||
with_tmpdir {
|
||||
src = "a\r\nb\r\nc\r\n"
|
||||
generate_file("t.txt", src)
|
||||
open("t.txt", "rt") {|f|
|
||||
assert_equal("a", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
f.binmode
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("b", f.getc)
|
||||
assert_equal("\r", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("c", f.getc)
|
||||
assert_equal("\r", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal(nil, f.getc)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def test_binmode2
|
||||
with_tmpdir {
|
||||
src = "a\r\nb\r\nc\r\n"
|
||||
generate_file("t.txt", src)
|
||||
open("t.txt", "rt:euc-jp:utf-8") {|f|
|
||||
assert_equal("a", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
f.binmode
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("b", f.getc)
|
||||
assert_equal("\r", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal("c", f.getc)
|
||||
assert_equal("\r", f.getc)
|
||||
assert_equal("\n", f.getc)
|
||||
assert_equal(nil, f.getc)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
117
transcode.c
117
transcode.c
|
@ -680,6 +680,7 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
|
|||
}
|
||||
|
||||
ec = ALLOC(rb_econv_t);
|
||||
ec->flags = 0;
|
||||
ec->source_encoding_name = NULL;
|
||||
ec->destination_encoding_name = NULL;
|
||||
ec->in_buf_start = NULL;
|
||||
|
@ -741,7 +742,13 @@ rb_econv_open(const char *from, const char *to, int flags)
|
|||
int num_trans;
|
||||
static rb_econv_t *ec;
|
||||
|
||||
num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries);
|
||||
if (*from == '\0' && *to == '\0') {
|
||||
num_trans = 0;
|
||||
entries = ALLOC_N(transcoder_entry_t *, 1+2);
|
||||
}
|
||||
else {
|
||||
num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries);
|
||||
}
|
||||
|
||||
if (num_trans < 0 || !entries) {
|
||||
xfree(entries);
|
||||
|
@ -751,6 +758,10 @@ rb_econv_open(const char *from, const char *to, int flags)
|
|||
if (flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
|
||||
const char *name = (flags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
|
||||
transcoder_entry_t *e = get_transcoder_entry("", name);
|
||||
if (flags & ECONV_CRLF_NEWLINE_ENCODER)
|
||||
flags &= ~ECONV_CR_NEWLINE_ENCODER;
|
||||
else
|
||||
flags &= ~ECONV_CRLF_NEWLINE_ENCODER;
|
||||
if (!e) {
|
||||
xfree(entries);
|
||||
return NULL;
|
||||
|
@ -774,12 +785,19 @@ rb_econv_open(const char *from, const char *to, int flags)
|
|||
if (!ec)
|
||||
rb_raise(rb_eArgError, "encoding conversion not supported (from %s to %s)", from, to);
|
||||
|
||||
ec->flags = flags;
|
||||
ec->source_encoding_name = from;
|
||||
ec->destination_encoding_name = to;
|
||||
|
||||
if (flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
|
||||
ec->last_tc = ec->elems[ec->num_trans-2].tc;
|
||||
ec->last_trans_index = ec->num_trans-2;
|
||||
if (ec->num_trans == 1) {
|
||||
ec->last_tc = NULL;
|
||||
ec->last_trans_index = -1;
|
||||
}
|
||||
else {
|
||||
ec->last_tc = ec->elems[ec->num_trans-2].tc;
|
||||
ec->last_trans_index = ec->num_trans-2;
|
||||
}
|
||||
}
|
||||
|
||||
return ec;
|
||||
|
@ -1037,7 +1055,12 @@ const char *
|
|||
rb_econv_encoding_to_insert_output(rb_econv_t *ec)
|
||||
{
|
||||
rb_transcoding *tc = ec->last_tc;
|
||||
const rb_transcoder *tr = tc->transcoder;
|
||||
const rb_transcoder *tr;
|
||||
|
||||
if (tc == NULL)
|
||||
return "";
|
||||
|
||||
tr = tc->transcoder;
|
||||
|
||||
if (tr->stateful_type == stateful_encoder)
|
||||
return tr->from_encoding;
|
||||
|
@ -1103,7 +1126,6 @@ rb_econv_insert_output(rb_econv_t *ec,
|
|||
size_t insert_len;
|
||||
|
||||
rb_transcoding *tc;
|
||||
const rb_transcoder *tr;
|
||||
|
||||
unsigned char **buf_start_p;
|
||||
unsigned char **data_start_p;
|
||||
|
@ -1125,11 +1147,16 @@ rb_econv_insert_output(rb_econv_t *ec,
|
|||
return -1;
|
||||
}
|
||||
|
||||
tc = ec->last_tc;
|
||||
tr = tc->transcoder;
|
||||
|
||||
need = insert_len;
|
||||
if (tr->stateful_type == stateful_encoder) {
|
||||
|
||||
tc = ec->last_tc;
|
||||
if (!tc) {
|
||||
buf_start_p = &ec->in_buf_start;
|
||||
data_start_p = &ec->in_data_start;
|
||||
data_end_p = &ec->in_data_end;
|
||||
buf_end_p = &ec->in_buf_end;
|
||||
}
|
||||
else if (tc->transcoder->stateful_type == stateful_encoder) {
|
||||
need += tc->readagain_len;
|
||||
if (need < insert_len)
|
||||
goto fail;
|
||||
|
@ -1179,7 +1206,7 @@ rb_econv_insert_output(rb_econv_t *ec,
|
|||
}
|
||||
}
|
||||
|
||||
if (tr->stateful_type == stateful_encoder) {
|
||||
if (tc && tc->transcoder->stateful_type == stateful_encoder) {
|
||||
memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
|
||||
*data_end_p += tc->readagain_len;
|
||||
tc->readagain_len = 0;
|
||||
|
@ -1267,15 +1294,20 @@ rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int fl
|
|||
unsigned const char *ss, *sp, *se;
|
||||
unsigned char *ds, *dp, *de;
|
||||
rb_econv_result_t res;
|
||||
int max_output;
|
||||
|
||||
if (NIL_P(dst)) {
|
||||
dst = rb_str_buf_new(len);
|
||||
}
|
||||
|
||||
if (ec->last_tc)
|
||||
max_output = ec->last_tc->transcoder->max_output;
|
||||
else
|
||||
max_output = 1;
|
||||
|
||||
res = econv_destination_buffer_full;
|
||||
while (res == econv_destination_buffer_full) {
|
||||
long dlen = RSTRING_LEN(dst);
|
||||
int max_output = ec->last_tc->transcoder->max_output;
|
||||
if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
|
||||
unsigned long new_capa = (unsigned long)dlen + len + max_output;
|
||||
if (LONG_MAX < new_capa)
|
||||
|
@ -1297,6 +1329,27 @@ rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int fl
|
|||
return dst;
|
||||
}
|
||||
|
||||
void
|
||||
rb_econv_binmode(rb_econv_t *ec)
|
||||
{
|
||||
if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
|
||||
int i = ec->num_trans-1;
|
||||
rb_transcoding_close(ec->elems[i].tc);
|
||||
xfree(ec->elems[i].out_buf_start);
|
||||
ec->elems[i].tc = NULL;
|
||||
ec->elems[i].out_buf_start = NULL;
|
||||
ec->elems[i].out_data_start = NULL;
|
||||
ec->elems[i].out_data_end = NULL;
|
||||
ec->elems[i].out_buf_end = NULL;
|
||||
ec->num_trans--;
|
||||
}
|
||||
if (ec->flags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) {
|
||||
rb_transcoding_close(ec->elems[0].tc);
|
||||
xfree(ec->elems[0].out_buf_start);
|
||||
MEMMOVE(&ec->elems[0], &ec->elems[1], rb_econv_elem_t, ec->num_trans-1);
|
||||
ec->num_trans--;
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE
|
||||
make_econv_exception(rb_econv_t *ec)
|
||||
|
@ -1358,7 +1411,7 @@ more_output_buffer(
|
|||
static int
|
||||
output_replacement_character(rb_econv_t *ec)
|
||||
{
|
||||
rb_transcoding *tc = ec->last_tc;
|
||||
rb_transcoding *tc;
|
||||
const rb_transcoder *tr;
|
||||
rb_encoding *enc;
|
||||
const unsigned char *replacement;
|
||||
|
@ -1366,10 +1419,17 @@ output_replacement_character(rb_econv_t *ec)
|
|||
int len;
|
||||
int ret;
|
||||
|
||||
tr = tc->transcoder;
|
||||
enc = rb_enc_find(tr->to_encoding);
|
||||
|
||||
replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc);
|
||||
tc = ec->last_tc;
|
||||
if (tc) {
|
||||
tr = tc->transcoder;
|
||||
enc = rb_enc_find(tr->to_encoding);
|
||||
replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc);
|
||||
}
|
||||
else {
|
||||
replacement = (unsigned char *)"?";
|
||||
len = 1;
|
||||
repl_enc = "";
|
||||
}
|
||||
|
||||
ret = rb_econv_insert_output(ec, replacement, len, repl_enc);
|
||||
if (ret == -1)
|
||||
|
@ -1400,7 +1460,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
last_tc = ec->last_tc;
|
||||
max_output = last_tc->transcoder->max_output;
|
||||
max_output = last_tc ? last_tc->transcoder->max_output : 1;
|
||||
|
||||
resume:
|
||||
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
|
@ -1465,7 +1525,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
last_tc = ec->last_tc;
|
||||
max_output = ec->elems[ec->num_trans-1].tc->transcoder->max_output;
|
||||
max_output = last_tc ? last_tc->transcoder->max_output : 1;
|
||||
|
||||
ret = econv_source_buffer_empty;
|
||||
ptr = *in_pos;
|
||||
|
@ -1835,8 +1895,14 @@ econv_init(int argc, VALUE *argv, VALUE self)
|
|||
ec->source_encoding = senc;
|
||||
ec->destination_encoding = denc;
|
||||
|
||||
ec->source_encoding_name = ec->elems[0].tc->transcoder->from_encoding;
|
||||
ec->destination_encoding_name = ec->last_tc->transcoder->to_encoding;
|
||||
if (ec->last_tc) {
|
||||
ec->source_encoding_name = ec->elems[0].tc->transcoder->from_encoding;
|
||||
ec->destination_encoding_name = ec->last_tc->transcoder->to_encoding;
|
||||
}
|
||||
else {
|
||||
ec->source_encoding_name = "";
|
||||
ec->destination_encoding_name = "";
|
||||
}
|
||||
|
||||
DATA_PTR(self) = ec;
|
||||
|
||||
|
@ -1851,10 +1917,13 @@ econv_inspect(VALUE self)
|
|||
|
||||
if (!ec)
|
||||
return rb_sprintf("#<%s: uninitialized>", cname);
|
||||
else
|
||||
return rb_sprintf("#<%s: %s to %s>", cname,
|
||||
ec->source_encoding_name,
|
||||
ec->destination_encoding_name);
|
||||
else {
|
||||
const char *sname = ec->source_encoding_name;
|
||||
const char *dname = ec->destination_encoding_name;
|
||||
if (*sname == '\0') sname = "(none)";
|
||||
if (*dname == '\0') dname = "(none)";
|
||||
return rb_sprintf("#<%s: %s to %s>", cname, sname, dname);
|
||||
}
|
||||
}
|
||||
|
||||
#define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)
|
||||
|
|
Loading…
Reference in a new issue