1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (search_nonascii): Use VALUE instead of unsigned long

because VALUE can be the fastest unsigned integer type.
  On LLP64 unsigned long isn't the fastest.
* string.c (str_strlen): ditto.
* string.c (str_utf8_nth): ditto.
* string.c (count_utf8_lead_bytes_with_ulong): ditto.

* string.c (count_utf8_lead_bytes_with_word): renamed.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2008-03-08 09:05:34 +00:00
parent b7d7519b18
commit a39feece85
4 changed files with 49 additions and 38 deletions

View file

@ -1,3 +1,14 @@
Sat Mar 8 06:53:48 2008 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (search_nonascii): Use VALUE instead of unsigned long
because VALUE can be the fastest unsigned integer type.
On LLP64 unsigned long isn't the fastest.
* string.c (str_strlen): ditto.
* string.c (str_utf8_nth): ditto.
* string.c (count_utf8_lead_bytes_with_ulong): ditto.
* string.c (count_utf8_lead_bytes_with_word): renamed.
Fri Mar 7 21:27:43 2008 Yusuke Endoh <mame@tsg.ne.jp> Fri Mar 7 21:27:43 2008 Yusuke Endoh <mame@tsg.ne.jp>
* bignum.c: fix indent. * bignum.c: fix indent.

View file

@ -118,22 +118,22 @@ VALUE rb_fs;
static inline const char * static inline const char *
search_nonascii(const char *p, const char *e) search_nonascii(const char *p, const char *e)
{ {
#if ULONG_MAX == 18446744073709551615UL #if SIZEOF_VALUE == 8
# define NONASCII_MASK 0x8080808080808080UL # define NONASCII_MASK 0x8080808080808080LL
#elif ULONG_MAX == 4294967295UL #elif SIZEOF_VALUE == 4
# define NONASCII_MASK 0x80808080UL # define NONASCII_MASK 0x80808080UL
#endif #endif
#ifdef NONASCII_MASK #ifdef NONASCII_MASK
if (sizeof(long) * 2 < e - p) { if (sizeof(VALUE) * 2 < e - p) {
const unsigned long *s, *t; const VALUE *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1; const VALUE lowbits = sizeof(VALUE) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
while (p < (const char *)s) { while (p < (const char *)s) {
if (!ISASCII(*p)) if (!ISASCII(*p))
return p; return p;
p++; p++;
} }
t = (const unsigned long*)(~lowbits & (VALUE)e); t = (const VALUE*)(~lowbits & (VALUE)e);
while (s < t) { while (s < t) {
if (*s & NONASCII_MASK) { if (*s & NONASCII_MASK) {
t = s; t = s;
@ -757,19 +757,19 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
#ifdef NONASCII_MASK #ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
static inline const long static inline const VALUE
count_utf8_lead_bytes_with_ulong(const unsigned long *s) count_utf8_lead_bytes_with_word(const VALUE *s)
{ {
unsigned long d = *s; VALUE d = *s;
d |= ~(d>>1); d |= ~(d>>1);
d >>= 6; d >>= 6;
d &= NONASCII_MASK >> 7; d &= NONASCII_MASK >> 7;
d += (d>>8); d += (d>>8);
d += (d>>16); d += (d>>16);
#if NONASCII_MASK == 0x8080808080808080UL #if SIZEOF_VALUE == 8
d += (d>>32); d += (d>>32);
#endif #endif
return (long)(d&0xF); return (d&0xF);
} }
#endif #endif
@ -786,18 +786,18 @@ str_strlen(VALUE str, rb_encoding *enc)
#ifdef NONASCII_MASK #ifdef NONASCII_MASK
if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
enc == rb_utf8_encoding()) { enc == rb_utf8_encoding()) {
long len = 0; VALUE len = 0;
if (sizeof(long) * 2 < e - p) { if (sizeof(VALUE) * 2 < e - p) {
const unsigned long *s, *t; const VALUE *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1; const VALUE lowbits = sizeof(VALUE) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e); t = (const VALUE*)(~lowbits & (VALUE)e);
while (p < (const char *)s) { while (p < (const char *)s) {
if (is_utf8_lead_byte(*p)) len++; if (is_utf8_lead_byte(*p)) len++;
p++; p++;
} }
while (s < t) { while (s < t) {
len += count_utf8_lead_bytes_with_ulong(s); len += count_utf8_lead_bytes_with_word(s);
s++; s++;
} }
p = (const char *)s; p = (const char *)s;
@ -806,7 +806,7 @@ str_strlen(VALUE str, rb_encoding *enc)
if (is_utf8_lead_byte(*p)) len++; if (is_utf8_lead_byte(*p)) len++;
p++; p++;
} }
return len; return (long)len;
} }
#endif #endif
n = rb_enc_strlen_cr(p, e, enc, &cr); n = rb_enc_strlen_cr(p, e, enc, &cr);
@ -1168,29 +1168,27 @@ str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int singleby
static char * static char *
str_utf8_nth(const char *p, const char *e, int nth) str_utf8_nth(const char *p, const char *e, int nth)
{ {
if (sizeof(long) * 2 < nth) { if (sizeof(VALUE) * 2 < nth) {
const unsigned long *s, *t; const VALUE *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1; const VALUE lowbits = sizeof(VALUE) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e); t = (const VALUE*)(~lowbits & (VALUE)e);
while (p < (const char *)s) { while (p < (const char *)s) {
if (is_utf8_lead_byte(*p)) nth--; if (is_utf8_lead_byte(*p)) nth--;
p++; p++;
} }
do { do {
nth -= count_utf8_lead_bytes_with_ulong(s); nth -= count_utf8_lead_bytes_with_word(s);
s++; s++;
} while (s < t && sizeof(long) <= nth); } while (s < t && sizeof(VALUE) <= nth);
p = (char *)s; p = (char *)s;
} }
if (0 < nth) { while (p < e) {
while (p < e) { if (is_utf8_lead_byte(*p)) {
if (is_utf8_lead_byte(*p)) { if (nth == 0) break;
nth--; nth--;
if (nth < 0) break;
}
p++;
} }
p++;
} }
return (char *)p; return (char *)p;
} }

View file

@ -819,6 +819,8 @@ class TestM17N < Test::Unit::TestCase
assert_equal("\u{3044}", s[27]) assert_equal("\u{3044}", s[27])
assert_equal("\u{3046}", s[28]) assert_equal("\u{3046}", s[28])
assert_equal("\u{3048}", s[29]) assert_equal("\u{3048}", s[29])
s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
assert_equal("\u{3044}", s[24])
end end
def test_str_aref_len def test_str_aref_len

View file

@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0" #define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2008-03-07" #define RUBY_RELEASE_DATE "2008-03-08"
#define RUBY_VERSION_CODE 190 #define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20080307 #define RUBY_RELEASE_CODE 20080308
#define RUBY_PATCHLEVEL 0 #define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1 #define RUBY_VERSION_MAJOR 1
@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0 #define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_YEAR 2008
#define RUBY_RELEASE_MONTH 3 #define RUBY_RELEASE_MONTH 3
#define RUBY_RELEASE_DAY 7 #define RUBY_RELEASE_DAY 8
#ifdef RUBY_EXTERN #ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[]; RUBY_EXTERN const char ruby_version[];