mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (str_nth_len, str_utf8_nth): return the rest length together.
* string.c (rb_str_substr): get rid of measure the length always to improve performance for huge string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30635 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
18e3c52bc4
commit
b0eaf0f621
2 changed files with 41 additions and 16 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,10 @@
|
|||
Sun Jan 23 08:00:09 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* string.c (str_nth_len, str_utf8_nth): return the rest length together.
|
||||
|
||||
* string.c (rb_str_substr): get rid of measure the length always
|
||||
to improve performance for huge string.
|
||||
|
||||
Sun Jan 23 00:40:10 2011 KOSAKI Motohiro <kosaki.motohiro@gmail.com>
|
||||
|
||||
* test/test_syslog.rb: Fix to make a lot of test failure if
|
||||
|
@ -9,7 +16,7 @@ Sat Jan 22 11:49:55 2011 Aaron Patterson <aaron@tenderlovemaking.com>
|
|||
when multiple merge keys are specified.
|
||||
|
||||
* test/psych/test_merge_keys.rb: tests for multi-merge key support
|
||||
|
||||
|
||||
Sat Jan 22 11:33:04 2011 Aaron Patterson <aaron@tenderlovemaking.com>
|
||||
|
||||
* ext/psych/lib/psych/visitors/to_ruby.rb: merge keys are actually
|
||||
|
@ -29,7 +36,7 @@ Sat Jan 22 10:12:30 2011 Aaron Patterson <aaron@tenderlovemaking.com>
|
|||
|
||||
* ext/psych/parser.c (parse): fix assertion error when reusing a
|
||||
parser after an exception has been raised
|
||||
|
||||
|
||||
* test/psych/test_parser.rb: test for assertion error
|
||||
|
||||
Sat Jan 22 04:09:22 2011 Aaron Patterson <aaron@tenderlovemaking.com>
|
||||
|
|
46
string.c
46
string.c
|
@ -1420,9 +1420,10 @@ rb_str_s_try_convert(VALUE dummy, VALUE str)
|
|||
return rb_check_string_type(str);
|
||||
}
|
||||
|
||||
char*
|
||||
rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
|
||||
static char*
|
||||
str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
|
||||
{
|
||||
long nth = *nthp;
|
||||
if (rb_enc_mbmaxlen(enc) == 1) {
|
||||
p += nth;
|
||||
}
|
||||
|
@ -1435,12 +1436,16 @@ rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
|
|||
|
||||
while (p < e && 0 < nth) {
|
||||
e2 = p + nth;
|
||||
if (e < e2)
|
||||
if (e < e2) {
|
||||
*nthp = nth;
|
||||
return (char *)e;
|
||||
}
|
||||
if (ISASCII(*p)) {
|
||||
p2 = search_nonascii(p, e2);
|
||||
if (!p2)
|
||||
if (!p2) {
|
||||
*nthp = nth;
|
||||
return (char *)e2;
|
||||
}
|
||||
nth -= p2 - p;
|
||||
p = p2;
|
||||
}
|
||||
|
@ -1448,26 +1453,35 @@ rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
|
|||
p += n;
|
||||
nth--;
|
||||
}
|
||||
if (nth != 0)
|
||||
*nthp = nth;
|
||||
if (nth != 0) {
|
||||
return (char *)e;
|
||||
}
|
||||
return (char *)p;
|
||||
}
|
||||
else {
|
||||
while (p<e && nth--) {
|
||||
while (p < e && nth--) {
|
||||
p += rb_enc_mbclen(p, e, enc);
|
||||
}
|
||||
}
|
||||
if (p > e) p = e;
|
||||
*nthp = nth;
|
||||
return (char*)p;
|
||||
}
|
||||
|
||||
char*
|
||||
rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
|
||||
{
|
||||
return str_nth_len(p, e, &nth, enc);
|
||||
}
|
||||
|
||||
static char*
|
||||
str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
|
||||
{
|
||||
if (singlebyte)
|
||||
p += nth;
|
||||
else {
|
||||
p = rb_enc_nth(p, e, nth, enc);
|
||||
p = str_nth_len(p, e, &nth, enc);
|
||||
}
|
||||
if (!p) return 0;
|
||||
if (p > e) p = e;
|
||||
|
@ -1492,8 +1506,9 @@ rb_str_offset(VALUE str, long pos)
|
|||
|
||||
#ifdef NONASCII_MASK
|
||||
static char *
|
||||
str_utf8_nth(const char *p, const char *e, long nth)
|
||||
str_utf8_nth(const char *p, const char *e, long *nthp)
|
||||
{
|
||||
long nth = *nthp;
|
||||
if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) {
|
||||
const VALUE *s, *t;
|
||||
const VALUE lowbits = sizeof(VALUE) - 1;
|
||||
|
@ -1516,13 +1531,14 @@ str_utf8_nth(const char *p, const char *e, long nth)
|
|||
}
|
||||
p++;
|
||||
}
|
||||
*nthp = nth;
|
||||
return (char *)p;
|
||||
}
|
||||
|
||||
static long
|
||||
str_utf8_offset(const char *p, const char *e, long nth)
|
||||
{
|
||||
const char *pp = str_utf8_nth(p, e, nth);
|
||||
const char *pp = str_utf8_nth(p, e, &nth);
|
||||
return pp - p;
|
||||
}
|
||||
#endif
|
||||
|
@ -1603,16 +1619,18 @@ rb_str_substr(VALUE str, long beg, long len)
|
|||
if (beg < 0) return Qnil;
|
||||
}
|
||||
}
|
||||
else if (beg > 0 && beg > str_strlen(str, enc)) {
|
||||
else if (beg > 0 && beg > RSTRING_LEN(str)) {
|
||||
return Qnil;
|
||||
}
|
||||
if (len == 0) {
|
||||
if (beg > str_strlen(str, enc)) return Qnil;
|
||||
p = 0;
|
||||
}
|
||||
#ifdef NONASCII_MASK
|
||||
else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
|
||||
enc == rb_utf8_encoding()) {
|
||||
p = str_utf8_nth(s, e, beg);
|
||||
p = str_utf8_nth(s, e, &beg);
|
||||
if (beg > 0) return Qnil;
|
||||
len = str_utf8_offset(p, e, len);
|
||||
}
|
||||
#endif
|
||||
|
@ -1621,15 +1639,15 @@ rb_str_substr(VALUE str, long beg, long len)
|
|||
|
||||
p = s + beg * char_sz;
|
||||
if (p > e) {
|
||||
p = e;
|
||||
len = 0;
|
||||
return Qnil;
|
||||
}
|
||||
else if (len * char_sz > e - p)
|
||||
len = e - p;
|
||||
else
|
||||
len *= char_sz;
|
||||
}
|
||||
else if ((p = str_nth(s, e, beg, enc, 0)) == e) {
|
||||
else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
|
||||
if (beg > 0) return Qnil;
|
||||
len = 0;
|
||||
}
|
||||
else {
|
||||
|
|
Loading…
Add table
Reference in a new issue