mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
defined for UTF-8 optimization. * string.c (str_strlen): use is_utf8_lead_byte and count_utf8_lead_bytes_with_ulong. * string.c (str_utf8_nth) ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15695 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e34480c330
commit
fff981ee6d
2 changed files with 45 additions and 35 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
||||||
|
Wed Mar 05 22:49:20 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
|
||||||
|
defined for UTF-8 optimization.
|
||||||
|
|
||||||
|
* string.c (str_strlen): use is_utf8_lead_byte and
|
||||||
|
count_utf8_lead_bytes_with_ulong.
|
||||||
|
|
||||||
|
* string.c (str_utf8_nth) ditto.
|
||||||
|
|
||||||
Wed Mar 5 17:53:01 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Wed Mar 5 17:53:01 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* file.c (rb_file_flock): returns false on EAGAIN if non-blocking.
|
* file.c (rb_file_flock): returns false on EAGAIN if non-blocking.
|
||||||
|
|
70
string.c
70
string.c
|
@ -755,6 +755,24 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef NONASCII_MASK
|
||||||
|
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
|
||||||
|
static inline const long
|
||||||
|
count_utf8_lead_bytes_with_ulong(const unsigned long *s)
|
||||||
|
{
|
||||||
|
unsigned long d = *s;
|
||||||
|
d |= ~(d>>1);
|
||||||
|
d >>= 6;
|
||||||
|
d &= NONASCII_MASK >> 3;
|
||||||
|
d += (d>>8);
|
||||||
|
d += (d>>16);
|
||||||
|
#if NONASCII_MASK == 0x8080808080808080UL
|
||||||
|
d += (d>>32);
|
||||||
|
#endif
|
||||||
|
return (long)(d&0xF);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static long
|
static long
|
||||||
str_strlen(VALUE str, rb_encoding *enc)
|
str_strlen(VALUE str, rb_encoding *enc)
|
||||||
{
|
{
|
||||||
|
@ -774,26 +792,19 @@ str_strlen(VALUE str, rb_encoding *enc)
|
||||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
const VALUE lowbits = sizeof(unsigned long) - 1;
|
||||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
||||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
||||||
for (len=0; p<(const char *)s; p++) {
|
while (p < (const char *)s) {
|
||||||
if (((*p)&0xC0) != 0x80) len++;
|
if (is_utf8_lead_byte(*p)) len++;
|
||||||
|
p++;
|
||||||
}
|
}
|
||||||
while (s < t) {
|
while (s < t) {
|
||||||
unsigned long d = *s;
|
len += count_utf8_lead_bytes_with_ulong(s);
|
||||||
d = ~d | (d<<1);
|
|
||||||
d &= NONASCII_MASK;
|
|
||||||
d >>= 7;
|
|
||||||
d += (d>>8);
|
|
||||||
d += (d>>16);
|
|
||||||
#if NONASCII_MASK == 0x8080808080808080UL
|
|
||||||
d = d + (d>>32);
|
|
||||||
#endif
|
|
||||||
len += (long)(d&0xF);
|
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
p = (const char *)t;
|
p = (const char *)s;
|
||||||
}
|
}
|
||||||
for (; p<e; p++) {
|
while (p < e) {
|
||||||
if (((*p)&0xC0) != 0x80) len++;
|
if (is_utf8_lead_byte(*p)) len++;
|
||||||
|
p++;
|
||||||
}
|
}
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
@ -1162,33 +1173,22 @@ str_utf8_nth(const char *p, const char *e, int nth)
|
||||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
const VALUE lowbits = sizeof(unsigned long) - 1;
|
||||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
||||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
||||||
for (; p<(const char *)s && 0<nth; p++) {
|
while (p < (const char *)s) {
|
||||||
if (((*p)&0xC0) != 0x80) nth--;
|
if (is_utf8_lead_byte(*p)) nth--;
|
||||||
|
p++;
|
||||||
}
|
}
|
||||||
while (s < t) {
|
while (s < t) {
|
||||||
unsigned long d = *s++;
|
nth -= count_utf8_lead_bytes_with_ulong(s);
|
||||||
d = ~d | (d<<1);
|
if (nth < sizeof(long)) break;
|
||||||
d &= NONASCII_MASK;
|
s++;
|
||||||
d >>= 7;
|
|
||||||
d += (d>>8);
|
|
||||||
d += (d>>16);
|
|
||||||
#if NONASCII_MASK == 0x8080808080808080UL
|
|
||||||
d += (d>>32);
|
|
||||||
#endif
|
|
||||||
nth -= (long)(d&0xF);
|
|
||||||
if (nth < 8) {
|
|
||||||
t = s;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
p = (char *)s;
|
||||||
p = (char *)t;
|
|
||||||
}
|
}
|
||||||
if (0 < nth) {
|
if (0 < nth) {
|
||||||
while (p < e) {
|
while (p < e) {
|
||||||
if (((*p)&0xC0) != 0x80) {
|
if (is_utf8_lead_byte(*p)) {
|
||||||
nth--;
|
nth--;
|
||||||
if (nth < 0)
|
if (nth < 0) break;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue