1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Introduce NEED_READCONV and NEED_WRITECONV to replace universal newline decorator

Use CRLF only when required to improve file reading and writing under Windows.
Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@33937 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
luislavena 2011-12-04 01:10:06 +00:00
parent 4a1cfe70dc
commit f9a6a1dd0c
6 changed files with 214 additions and 19 deletions

View file

@ -1,3 +1,31 @@
Sun Dec 4 10:15:00 2011 Luis Lavena <luislavena@gmail.com>
* ext/zlib/zlib.c (rb_gzreader_initialize): use binary mode by default
under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706]
[Feature #5562]
* include/ruby/encoding.h (void rb_econv_binmode): define NEWLINE
decorator.
* io.c (rb_cloexec_fcntl_dupfd): Introduce NEED_READCONV and
NEED_WRITECONV to replace universal newline decorator by CRLF only
when required to improve file reading and writing under Windows.
Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562]
* io.c (do_writeconv): adjust binary mode if required.
* io.c (read_all, appendline, swallow, rb_io_getline_1): ditto.
* io.c (io_getc, rb_io_each_codepoint, rb_io_ungetc): ditto.
* io.c (rb_io_binmode, rb_io_ascii8bit_binmode): ditto.
* io.c (rb_io_extract_modeenc, rb_sysopen): ditto.
* io.c (pipe_open, prep_stdio, io_encoding_set): ditto.
* io.c (rb_io_s_pipe, copy_stream_body): ditto.
* test/ruby/test_io_m17n.rb (EOT): add test for pipe and stdin in
binary mode.
* win32/win32.c (init_stdhandle): remove O_BINARY from stdhandle
initialization.
* win32/win32.c (rb_w32_write): use FTEXT mode accordingly.
Sat Dec 3 20:49:16 2011 Yusuke Endoh <mame@tsg.ne.jp>
* variable.c (set_const_visibility): print a warning when no argument

View file

@ -10,6 +10,7 @@
#include <zlib.h>
#include <time.h>
#include <ruby/io.h>
#include <fcntl.h>
#ifdef HAVE_VALGRIND_MEMCHECK_H
# include <valgrind/memcheck.h>
@ -3425,6 +3426,13 @@ rb_gzreader_initialize(int argc, VALUE *argv, VALUE obj)
Data_Get_Struct(obj, struct gzfile, gz);
rb_scan_args(argc, argv, "1:", &io, &opt);
#ifdef O_BINARY
if (BUILTIN_TYPE(io) == T_FILE) {
rb_io_t *fptr;
GetOpenFile(io, fptr);
setmode(fptr->fd, O_BINARY);
}
#endif
/* this is undocumented feature of zlib */
err = inflateInit2(&gz->z.stream, -MAX_WBITS);

View file

@ -318,7 +318,7 @@ void rb_econv_binmode(rb_econv_t *ec);
#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_UNIVERSAL_NEWLINE_DECORATOR
#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_CRLF_NEWLINE_DECORATOR
#else
#define ECONV_DEFAULT_NEWLINE_DECORATOR 0
#endif

133
io.c
View file

@ -380,12 +380,66 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd)
/* Windows */
# define DEFAULT_TEXTMODE FMODE_TEXTMODE
# define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR
/*
* CRLF newline is set as default newline decorator.
* If only CRLF newline conversion is needed, we use binary IO process
* with OS's text mode for IO performance improvement.
* If encoding conversion is needed or a user sets text mode, we use encoding
* conversion IO process and universal newline decorator by default.
*/
#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || (fptr)->encs.ecflags & ~ECONV_CRLF_NEWLINE_DECORATOR)
#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || ((fptr)->encs.ecflags & ((ECONV_DECORATOR_MASK & ~ECONV_CRLF_NEWLINE_DECORATOR)|ECONV_STATEFUL_DECORATOR_MASK)))
#define SET_BINARY_MODE(fptr) setmode((fptr)->fd, O_BINARY)
#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) do {\
if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {\
if (((fptr)->mode & FMODE_READABLE) &&\
!((fptr)->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {\
setmode((fptr)->fd, O_BINARY);\
}\
else {\
setmode((fptr)->fd, O_TEXT);\
}\
}\
} while(0)
#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) do {\
if ((enc2) && ((ecflags) & ECONV_DEFAULT_NEWLINE_DECORATOR)) {\
(ecflags) |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;\
}\
} while(0)
/*
* We use io_seek to back cursor position when changing mode from text to binary,
* but stdin and pipe cannot seek back. Stdin and pipe read should use encoding
* conversion for working properly with mode change.
*/
#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) do {\
if ((fptr)->rbuf.len > 0 && !((fptr)->mode & FMODE_DUPLEX)) {\
off_t r;\
errno = 0;\
r = io_seek((fptr), -(fptr)->rbuf.len, SEEK_CUR);\
if (r < 0 && errno) {\
if (errno == ESPIPE)\
(fptr)->mode |= FMODE_DUPLEX;\
}\
else {\
(fptr)->rbuf.off = 0;\
(fptr)->rbuf.len = 0;\
}\
}\
setmode((fptr)->fd, O_BINARY);\
} while(0)
#else
/* Unix */
# define DEFAULT_TEXTMODE 0
#endif
#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr))
#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK)))
#define SET_BINARY_MODE(fptr) 0
#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) 0
#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) 0
#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) 0
#endif
#if !defined HAVE_SHUTDOWN && !defined shutdown
#define shutdown(a,b) 0
@ -1051,6 +1105,7 @@ do_writeconv(VALUE str, rb_io_t *fptr)
{
if (NEED_WRITECONV(fptr)) {
VALUE common_encoding = Qnil;
SET_BINARY_MODE(fptr);
make_writeconv(fptr);
@ -1080,6 +1135,20 @@ do_writeconv(VALUE str, rb_io_t *fptr)
str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT);
}
}
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
#define fmode (fptr->mode)
else if (MODE_BTMODE(DEFAULT_TEXTMODE,0,1)) {
if ((fptr->mode & FMODE_READABLE) &&
!(fptr->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {
setmode(fptr->fd, O_BINARY);
}
if (!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s",
rb_enc_name(rb_enc_get(str)));
}
}
#undef fmode
#endif
return str;
}
@ -1992,6 +2061,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str)
int cr;
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
io_setstrbuf(&str,0);
make_readconv(fptr, 0);
while (1) {
@ -2013,6 +2083,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str)
}
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
bytes = 0;
pos = 0;
@ -2438,6 +2509,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
long limit = *lp;
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
do {
const char *p, *e;
@ -2480,6 +2552,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
return EOF;
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
@ -2518,6 +2591,7 @@ swallow(rb_io_t *fptr, int term)
if (NEED_READCONV(fptr)) {
rb_encoding *enc = io_read_encoding(fptr);
int needconv = rb_enc_mbminlen(enc) != 1;
SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
do {
size_t cnt;
@ -2541,6 +2615,7 @@ swallow(rb_io_t *fptr, int term)
return FALSE;
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
do {
size_t cnt;
while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) {
@ -2677,6 +2752,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
}
else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) &&
rb_enc_asciicompat(enc = io_read_encoding(fptr))) {
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
return rb_io_getline_fast(fptr, enc, io);
}
else {
@ -2686,6 +2762,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
int rspara = 0;
int extra_limit = 16;
SET_BINARY_MODE(fptr);
enc = io_read_encoding(fptr);
if (!NIL_P(rs)) {
@ -3034,6 +3111,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
VALUE str = Qnil;
rb_encoding *read_enc = io_read_encoding(fptr);
SET_BINARY_MODE(fptr);
make_readconv(fptr, 0);
while (1) {
@ -3078,6 +3156,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc)
return str;
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
if (io_fillbuf(fptr) < 0) {
return Qnil;
}
@ -3192,6 +3271,7 @@ rb_io_each_codepoint(VALUE io)
READ_CHECK(fptr);
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
for (;;) {
make_readconv(fptr, 0);
for (;;) {
@ -3232,6 +3312,7 @@ rb_io_each_codepoint(VALUE io)
rb_yield(UINT2NUM(c));
}
}
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
enc = io_input_encoding(fptr);
for (;;) {
if (io_fillbuf(fptr) < 0) {
@ -3435,6 +3516,7 @@ rb_io_ungetc(VALUE io, VALUE c)
SafeStringValue(c);
}
if (NEED_READCONV(fptr)) {
SET_BINARY_MODE(fptr);
len = RSTRING_LEN(c);
#if SIZEOF_LONG > SIZEOF_INT
if (len > INT_MAX)
@ -3454,6 +3536,7 @@ rb_io_ungetc(VALUE io, VALUE c)
MEMMOVE(fptr->cbuf.ptr+fptr->cbuf.off, RSTRING_PTR(c), char, len);
}
else {
NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr);
io_ungetbyte(c, fptr);
}
return Qnil;
@ -4162,6 +4245,14 @@ rb_io_binmode(VALUE io)
fptr->mode |= FMODE_BINMODE;
fptr->mode &= ~FMODE_TEXTMODE;
fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
#ifdef O_BINARY
if (!fptr->readconv) {
SET_BINARY_MODE_WITH_SEEK_CUR(fptr);
}
else {
setmode(fptr->fd, O_BINARY);
}
#endif
return io;
}
@ -4181,6 +4272,7 @@ rb_io_ascii8bit_binmode(VALUE io)
}
fptr->mode |= FMODE_BINMODE;
fptr->mode &= ~FMODE_TEXTMODE;
SET_BINARY_MODE_WITH_SEEK_CUR(fptr);
fptr->encs.enc = rb_ascii8bit_encoding();
fptr->encs.enc2 = NULL;
@ -4703,6 +4795,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
#endif
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
else {
@ -4743,13 +4836,14 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE,
0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
#endif
ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags);
if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) {
if (has_enc) {
rb_raise(rb_eArgError, "encoding specified twice");
}
}
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags);
}
validate_enc_binmode(&fmode, ecflags, enc, enc2);
@ -4794,9 +4888,6 @@ rb_sysopen(VALUE fname, int oflags, mode_t perm)
int fd;
struct sysopen_struct data;
#ifdef O_BINARY
oflags |= O_BINARY;
#endif
data.fname = rb_str_encode_ospath(fname);
data.oflags = oflags;
data.perm = perm;
@ -5482,6 +5573,11 @@ pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode,
fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX;
if (convconfig) {
fptr->encs = *convconfig;
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) {
fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
}
#endif
}
else {
if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {
@ -6673,6 +6769,9 @@ prep_stdio(FILE *f, int fmode, VALUE klass, const char *path)
fptr->encs.ecflags |= ECONV_DEFAULT_NEWLINE_DECORATOR;
#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE
fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE;
if (fmode & FMODE_READABLE) {
fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
}
#endif
fptr->stdio_file = f;
@ -8527,22 +8626,26 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
}
else
enc = rb_to_encoding(v2);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
else {
if (NIL_P(v1)) {
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
else {
tmp = rb_check_string_type(v1);
if (!NIL_P(tmp) && rb_enc_asciicompat(rb_enc_get(tmp))) {
parse_mode_enc(RSTRING_PTR(tmp), &enc, &enc2, NULL);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
else {
rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2);
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecopts = Qnil;
}
}
@ -8661,13 +8764,22 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass)
extract_binmode(opt, &fmode);
#if DEFAULT_TEXTMODE
if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE))
if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) {
fptr->mode &= ~FMODE_TEXTMODE;
setmode(fptr->fd, O_BINARY);
}
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) {
fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
}
#endif
#endif
fptr->mode |= fmode;
#if DEFAULT_TEXTMODE
if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE))
if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) {
fptr2->mode &= ~FMODE_TEXTMODE;
setmode(fptr2->fd, O_BINARY);
}
#endif
fptr2->mode |= fmode;
@ -9590,6 +9702,13 @@ copy_stream_body(VALUE arg)
}
stp->dst_fd = dst_fd;
#ifdef O_BINARY
if (src_fptr)
SET_BINARY_MODE_WITH_SEEK_CUR(src_fptr);
if (dst_fptr)
setmode(dst_fd, O_BINARY);
#endif
if (stp->src_offset == (off_t)-1 && src_fptr && src_fptr->rbuf.len) {
size_t len = src_fptr->rbuf.len;
VALUE str;

View file

@ -2174,4 +2174,52 @@ EOT
end
end
end
def test_binmode_with_pipe
with_pipe do |r, w|
src = "a\r\nb\r\nc\r\n"
w.binmode.write src
w.close
assert_equal("a", r.getc)
assert_equal("\n", r.getc)
r.binmode
assert_equal("b", r.getc)
assert_equal("\r", r.getc)
assert_equal("\n", r.getc)
assert_equal("c", r.getc)
assert_equal("\r", r.getc)
assert_equal("\n", r.getc)
assert_equal(nil, r.getc)
r.close
end
end if /mswin|mingw/ =~ RUBY_PLATFORM
def test_stdin_binmode
with_pipe do |in_r, in_w|
with_pipe do |out_r, out_w|
pid = Process.spawn({}, EnvUtil.rubybin, '-e', <<-'End', in: in_r, out: out_w)
STDOUT.binmode
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDIN.binmode
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
STDOUT.write STDIN.getc
End
in_r.close
out_w.close
src = "a\r\nb\r\nc\r\n"
in_w.binmode.write src
in_w.close
Process.wait pid
assert_equal "a\nb\r\nc\r\n", out_r.binmode.read
out_r.close
end
end
end if /mswin|mingw/ =~ RUBY_PLATFORM
end

View file

@ -2252,28 +2252,19 @@ init_stdhandle(void)
int keep = 0;
#define open_null(fd) \
(((nullfd < 0) ? \
(nullfd = open("NUL", O_RDWR|O_BINARY)) : 0), \
(nullfd = open("NUL", O_RDWR)) : 0), \
((nullfd == (fd)) ? (keep = 1) : dup2(nullfd, fd)), \
(fd))
if (fileno(stdin) < 0) {
stdin->_file = open_null(0);
}
else {
setmode(fileno(stdin), O_BINARY);
}
if (fileno(stdout) < 0) {
stdout->_file = open_null(1);
}
else {
setmode(fileno(stdout), O_BINARY);
}
if (fileno(stderr) < 0) {
stderr->_file = open_null(2);
}
else {
setmode(fileno(stderr), O_BINARY);
}
if (nullfd >= 0 && !keep) close(nullfd);
setvbuf(stderr, NULL, _IONBF, 0);
}
@ -5614,7 +5605,8 @@ rb_w32_write(int fd, const void *buf, size_t size)
return -1;
}
if (_osfile(fd) & FTEXT) {
if ((_osfile(fd) & FTEXT) &&
(!(_osfile(fd) & FPIPE) || fd == fileno(stdout) || fd == fileno(stderr))) {
return _write(fd, buf, size);
}