1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (rb_str_each_line): use memchr(3) for faster newline

search.

* io.c (appendline): remove unused arguments

* io.c (rb_io_getline_fast): make much simpler (and faster).

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15199 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2008-01-23 18:43:51 +00:00
parent fe068da879
commit 9580a9ca91
4 changed files with 85 additions and 46 deletions

View file

@ -1,3 +1,12 @@
Thu Jan 24 03:23:44 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (rb_str_each_line): use memchr(3) for faster newline
search.
* io.c (appendline): remove unused arguments
* io.c (rb_io_getline_fast): make much simpler (and faster).
Thu Jan 24 02:13:07 2008 Yusuke Endoh <mame@tsg.ne.jp>
* insns.def (expandarray): fix stack inc.

View file

@ -134,7 +134,7 @@ int rb_enc_codelen(int code, rb_encoding *enc);
#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
/* ptr, ptr, encoding -> newline_or_not */
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,p,end)
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)p,(UChar*)end)
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)

80
io.c
View file

@ -1686,21 +1686,19 @@ rscheck(const char *rsptr, long rslen, VALUE rs)
}
static int
appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp, long *lp)
appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp)
{
VALUE str = *strp;
int c = EOF;
long limit = *lp;
if (rsptr == 0)
rslen = 1;
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
const char *p = READ_DATA_PENDING_PTR(fptr);
const char *e;
long last = 0, len = (c != EOF);
rb_encoding *enc = io_read_encoding(fptr);
if (limit > 0 && pending > limit) pending = limit;
e = memchr(p, delim, pending);
@ -1720,7 +1718,7 @@ appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE *strp,
if (limit > 0 && limit == pending) {
char *p = fptr->rbuf+fptr->rbuf_off;
char *pp = p + limit;
char *pl = rb_enc_left_char_head(p, pp, io_read_encoding(fptr));
char *pl = rb_enc_left_char_head(p, pp, enc);
if (pl < pp) {
int diff = pp - pl;
@ -1790,27 +1788,53 @@ swallow(rb_io_t *fptr, int term)
}
static VALUE
rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
rb_io_getline_fast(rb_io_t *fptr)
{
VALUE str = Qnil;
int c, nolimit = 0;
int len = 0;
rb_encoding *enc = io_read_encoding(fptr);
for (;;) {
c = appendline(fptr, delim, 0, 0, &str, &limit);
if (c == EOF || c == delim) break;
if (limit == 0) {
nolimit = 1;
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
const char *p = READ_DATA_PENDING_PTR(fptr);
const char *e;
e = memchr(p, '\n', pending);
if (e) {
const char *p0 = rb_enc_left_char_head(p, e, enc);
const char *pend = rb_enc_left_char_head(p, p+pending, enc);
if (rb_enc_is_newline(p0, pend, enc)) {
pending = p0 - p + rb_enc_mbclen(p0, pend, enc);
}
else {
e = 0;
}
}
if (NIL_P(str)) {
str = rb_str_new(p, pending);
fptr->rbuf_off += pending;
fptr->rbuf_len -= pending;
}
else {
rb_str_resize(str, len + pending);
read_buffered_data(RSTRING_PTR(str)+len, pending, fptr);
}
len += pending;
if (e) break;
}
rb_thread_wait_fd(fptr->fd);
rb_io_check_closed(fptr);
if (io_fillbuf(fptr) < 0) {
if (NIL_P(str)) return Qnil;
break;
}
}
if (!NIL_P(str)) {
str = io_enc_str(str, fptr);
if (!nolimit) {
fptr->lineno++;
lineno = INT2FIX(fptr->lineno);
}
}
str = io_enc_str(str, fptr);
fptr->lineno++;
lineno = INT2FIX(fptr->lineno);
return str;
}
@ -1838,11 +1862,12 @@ prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
}
}
}
GetOpenFile(io, fptr);
if (!NIL_P(rs)) {
rb_encoding *enc_rs = rb_enc_get(rs);
rb_encoding *enc_io = io_read_encoding(fptr);
if (!NIL_P(rs) && rs != rb_default_rs) {
rb_encoding *enc_rs, *enc_io;
GetOpenFile(io, fptr);
enc_rs = rb_enc_get(rs);
enc_io = io_read_encoding(fptr);
if (enc_io != enc_rs &&
(rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
!rb_enc_asciicompat(enc_io))) {
@ -1876,8 +1901,8 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
else if (limit == 0) {
return rb_enc_str_new(0, 0, io_read_encoding(fptr));
}
else if (rs == rb_default_rs) {
return rb_io_getline_fast(fptr, '\n', limit);
else if (rs == rb_default_rs && limit < 0) {
return rb_io_getline_fast(fptr);
}
else {
int c, newline;
@ -1893,15 +1918,12 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
swallow(fptr, '\n');
rs = 0;
}
else if (rslen == 1) {
return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
}
else {
rsptr = RSTRING_PTR(rs);
}
newline = rsptr[rslen - 1];
while ((c = appendline(fptr, newline, rsptr, rslen, &str, &limit)) != EOF) {
while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
if (c == newline) {
const char *s, *p, *pp;
@ -1954,7 +1976,7 @@ rb_io_gets(VALUE io)
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
return rb_io_getline_fast(fptr, '\n', 0);
return rb_io_getline_fast(fptr);
}
/*

View file

@ -4470,9 +4470,8 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
rb_encoding *enc;
VALUE rs;
int newline;
char *p = RSTRING_PTR(str), *pend = p + RSTRING_LEN(str), *s = p;
char *ptr = p;
long len = RSTRING_LEN(str), rslen;
char *p, *pend, *s, *ptr;
long len, rslen;
VALUE line;
int n;
@ -4480,29 +4479,39 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
rs = rb_rs;
}
RETURN_ENUMERATOR(str, argc, argv);
if (NIL_P(rs)) {
rb_yield(str);
return str;
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
pend = p + RSTRING_LEN(str);
len = RSTRING_LEN(str);
StringValue(rs);
enc = rb_enc_check(str, rs);
if (rs == rb_default_rs) {
enc = rb_enc_get(str);
while (p < pend) {
n = rb_enc_mbclen(p, pend, enc);
if (rb_enc_is_newline(p, pend, enc)) {
line = rb_str_new5(str, s, p - s + n);
OBJ_INFECT(line, str);
rb_enc_copy(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
s = p + n;
char *p0;
p = memchr(p, '\n', pend - p);
if (!p) break;
p0 = rb_enc_left_char_head(s, p, enc);
if (!rb_enc_is_newline(p0, pend, enc)) {
p++;
continue;
}
p += n;
p = p0 + rb_enc_mbclen(s, p0, enc);
line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_enc_copy(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
s = p;
}
goto finish;
}
enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
@ -4535,8 +4544,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
finish:
if (s != pend) {
if (p > pend) p = pend;
line = rb_str_new5(str, s, p - s);
line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_copy(line, str);
rb_yield(line);