1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):

defined for UTF-8 optimization.

* string.c (str_strlen): use is_utf8_lead_byte and
  count_utf8_lead_bytes_with_ulong.

* string.c (str_utf8_nth) ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15695 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2008-03-05 13:54:36 +00:00
parent e34480c330
commit fff981ee6d
2 changed files with 45 additions and 35 deletions

View file

@ -1,3 +1,13 @@
Wed Mar 05 22:49:20 2008 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
defined for UTF-8 optimization.
* string.c (str_strlen): use is_utf8_lead_byte and
count_utf8_lead_bytes_with_ulong.
* string.c (str_utf8_nth) ditto.
Wed Mar 5 17:53:01 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> Wed Mar 5 17:53:01 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* file.c (rb_file_flock): returns false on EAGAIN if non-blocking. * file.c (rb_file_flock): returns false on EAGAIN if non-blocking.

View file

@ -755,6 +755,24 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
return c; return c;
} }
#ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
static inline const long
count_utf8_lead_bytes_with_ulong(const unsigned long *s)
{
unsigned long d = *s;
d |= ~(d>>1);
d >>= 6;
d &= NONASCII_MASK >> 3;
d += (d>>8);
d += (d>>16);
#if NONASCII_MASK == 0x8080808080808080UL
d += (d>>32);
#endif
return (long)(d&0xF);
}
#endif
static long static long
str_strlen(VALUE str, rb_encoding *enc) str_strlen(VALUE str, rb_encoding *enc)
{ {
@ -774,26 +792,19 @@ str_strlen(VALUE str, rb_encoding *enc)
const VALUE lowbits = sizeof(unsigned long) - 1; const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e); t = (const unsigned long*)(~lowbits & (VALUE)e);
for (len=0; p<(const char *)s; p++) { while (p < (const char *)s) {
if (((*p)&0xC0) != 0x80) len++; if (is_utf8_lead_byte(*p)) len++;
p++;
} }
while (s < t) { while (s < t) {
unsigned long d = *s; len += count_utf8_lead_bytes_with_ulong(s);
d = ~d | (d<<1);
d &= NONASCII_MASK;
d >>= 7;
d += (d>>8);
d += (d>>16);
#if NONASCII_MASK == 0x8080808080808080UL
d = d + (d>>32);
#endif
len += (long)(d&0xF);
s++; s++;
} }
p = (const char *)t; p = (const char *)s;
} }
for (; p<e; p++) { while (p < e) {
if (((*p)&0xC0) != 0x80) len++; if (is_utf8_lead_byte(*p)) len++;
p++;
} }
return len; return len;
} }
@ -1162,33 +1173,22 @@ str_utf8_nth(const char *p, const char *e, int nth)
const VALUE lowbits = sizeof(unsigned long) - 1; const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e); t = (const unsigned long*)(~lowbits & (VALUE)e);
for (; p<(const char *)s && 0<nth; p++) { while (p < (const char *)s) {
if (((*p)&0xC0) != 0x80) nth--; if (is_utf8_lead_byte(*p)) nth--;
p++;
} }
while (s < t) { while (s < t) {
unsigned long d = *s++; nth -= count_utf8_lead_bytes_with_ulong(s);
d = ~d | (d<<1); if (nth < sizeof(long)) break;
d &= NONASCII_MASK; s++;
d >>= 7;
d += (d>>8);
d += (d>>16);
#if NONASCII_MASK == 0x8080808080808080UL
d += (d>>32);
#endif
nth -= (long)(d&0xF);
if (nth < 8) {
t = s;
break;
} }
} p = (char *)s;
p = (char *)t;
} }
if (0 < nth) { if (0 < nth) {
while (p < e) { while (p < e) {
if (((*p)&0xC0) != 0x80) { if (is_utf8_lead_byte(*p)) {
nth--; nth--;
if (nth < 0) if (nth < 0) break;
break;
} }
p++; p++;
} }