mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (search_nonascii): Use VALUE instead of unsigned long
because VALUE can be the fastest unsigned integer type. On LLP64 unsigned long isn't the fastest. * string.c (str_strlen): ditto. * string.c (str_utf8_nth): ditto. * string.c (count_utf8_lead_bytes_with_ulong): ditto. * string.c (count_utf8_lead_bytes_with_word): renamed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b7d7519b18
commit
a39feece85
4 changed files with 49 additions and 38 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
||||||
|
Sat Mar 8 06:53:48 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (search_nonascii): Use VALUE instead of unsigned long
|
||||||
|
because VALUE can be the fastest unsigned integer type.
|
||||||
|
On LLP64 unsigned long isn't the fastest.
|
||||||
|
* string.c (str_strlen): ditto.
|
||||||
|
* string.c (str_utf8_nth): ditto.
|
||||||
|
* string.c (count_utf8_lead_bytes_with_ulong): ditto.
|
||||||
|
|
||||||
|
* string.c (count_utf8_lead_bytes_with_word): renamed.
|
||||||
|
|
||||||
Fri Mar 7 21:27:43 2008 Yusuke Endoh <mame@tsg.ne.jp>
|
Fri Mar 7 21:27:43 2008 Yusuke Endoh <mame@tsg.ne.jp>
|
||||||
|
|
||||||
* bignum.c: fix indent.
|
* bignum.c: fix indent.
|
||||||
|
|
68
string.c
68
string.c
|
@ -118,22 +118,22 @@ VALUE rb_fs;
|
||||||
static inline const char *
|
static inline const char *
|
||||||
search_nonascii(const char *p, const char *e)
|
search_nonascii(const char *p, const char *e)
|
||||||
{
|
{
|
||||||
#if ULONG_MAX == 18446744073709551615UL
|
#if SIZEOF_VALUE == 8
|
||||||
# define NONASCII_MASK 0x8080808080808080UL
|
# define NONASCII_MASK 0x8080808080808080LL
|
||||||
#elif ULONG_MAX == 4294967295UL
|
#elif SIZEOF_VALUE == 4
|
||||||
# define NONASCII_MASK 0x80808080UL
|
# define NONASCII_MASK 0x80808080UL
|
||||||
#endif
|
#endif
|
||||||
#ifdef NONASCII_MASK
|
#ifdef NONASCII_MASK
|
||||||
if (sizeof(long) * 2 < e - p) {
|
if (sizeof(VALUE) * 2 < e - p) {
|
||||||
const unsigned long *s, *t;
|
const VALUE *s, *t;
|
||||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
const VALUE lowbits = sizeof(VALUE) - 1;
|
||||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
|
||||||
while (p < (const char *)s) {
|
while (p < (const char *)s) {
|
||||||
if (!ISASCII(*p))
|
if (!ISASCII(*p))
|
||||||
return p;
|
return p;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
t = (const VALUE*)(~lowbits & (VALUE)e);
|
||||||
while (s < t) {
|
while (s < t) {
|
||||||
if (*s & NONASCII_MASK) {
|
if (*s & NONASCII_MASK) {
|
||||||
t = s;
|
t = s;
|
||||||
|
@ -757,19 +757,19 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
|
||||||
|
|
||||||
#ifdef NONASCII_MASK
|
#ifdef NONASCII_MASK
|
||||||
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
|
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
|
||||||
static inline const long
|
static inline const VALUE
|
||||||
count_utf8_lead_bytes_with_ulong(const unsigned long *s)
|
count_utf8_lead_bytes_with_word(const VALUE *s)
|
||||||
{
|
{
|
||||||
unsigned long d = *s;
|
VALUE d = *s;
|
||||||
d |= ~(d>>1);
|
d |= ~(d>>1);
|
||||||
d >>= 6;
|
d >>= 6;
|
||||||
d &= NONASCII_MASK >> 7;
|
d &= NONASCII_MASK >> 7;
|
||||||
d += (d>>8);
|
d += (d>>8);
|
||||||
d += (d>>16);
|
d += (d>>16);
|
||||||
#if NONASCII_MASK == 0x8080808080808080UL
|
#if SIZEOF_VALUE == 8
|
||||||
d += (d>>32);
|
d += (d>>32);
|
||||||
#endif
|
#endif
|
||||||
return (long)(d&0xF);
|
return (d&0xF);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -786,18 +786,18 @@ str_strlen(VALUE str, rb_encoding *enc)
|
||||||
#ifdef NONASCII_MASK
|
#ifdef NONASCII_MASK
|
||||||
if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
|
if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
|
||||||
enc == rb_utf8_encoding()) {
|
enc == rb_utf8_encoding()) {
|
||||||
long len = 0;
|
VALUE len = 0;
|
||||||
if (sizeof(long) * 2 < e - p) {
|
if (sizeof(VALUE) * 2 < e - p) {
|
||||||
const unsigned long *s, *t;
|
const VALUE *s, *t;
|
||||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
const VALUE lowbits = sizeof(VALUE) - 1;
|
||||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
|
||||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
t = (const VALUE*)(~lowbits & (VALUE)e);
|
||||||
while (p < (const char *)s) {
|
while (p < (const char *)s) {
|
||||||
if (is_utf8_lead_byte(*p)) len++;
|
if (is_utf8_lead_byte(*p)) len++;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
while (s < t) {
|
while (s < t) {
|
||||||
len += count_utf8_lead_bytes_with_ulong(s);
|
len += count_utf8_lead_bytes_with_word(s);
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
p = (const char *)s;
|
p = (const char *)s;
|
||||||
|
@ -806,7 +806,7 @@ str_strlen(VALUE str, rb_encoding *enc)
|
||||||
if (is_utf8_lead_byte(*p)) len++;
|
if (is_utf8_lead_byte(*p)) len++;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
return len;
|
return (long)len;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
n = rb_enc_strlen_cr(p, e, enc, &cr);
|
n = rb_enc_strlen_cr(p, e, enc, &cr);
|
||||||
|
@ -1168,29 +1168,27 @@ str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int singleby
|
||||||
static char *
|
static char *
|
||||||
str_utf8_nth(const char *p, const char *e, int nth)
|
str_utf8_nth(const char *p, const char *e, int nth)
|
||||||
{
|
{
|
||||||
if (sizeof(long) * 2 < nth) {
|
if (sizeof(VALUE) * 2 < nth) {
|
||||||
const unsigned long *s, *t;
|
const VALUE *s, *t;
|
||||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
const VALUE lowbits = sizeof(VALUE) - 1;
|
||||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
|
||||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
t = (const VALUE*)(~lowbits & (VALUE)e);
|
||||||
while (p < (const char *)s) {
|
while (p < (const char *)s) {
|
||||||
if (is_utf8_lead_byte(*p)) nth--;
|
if (is_utf8_lead_byte(*p)) nth--;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
nth -= count_utf8_lead_bytes_with_ulong(s);
|
nth -= count_utf8_lead_bytes_with_word(s);
|
||||||
s++;
|
s++;
|
||||||
} while (s < t && sizeof(long) <= nth);
|
} while (s < t && sizeof(VALUE) <= nth);
|
||||||
p = (char *)s;
|
p = (char *)s;
|
||||||
}
|
}
|
||||||
if (0 < nth) {
|
while (p < e) {
|
||||||
while (p < e) {
|
if (is_utf8_lead_byte(*p)) {
|
||||||
if (is_utf8_lead_byte(*p)) {
|
if (nth == 0) break;
|
||||||
nth--;
|
nth--;
|
||||||
if (nth < 0) break;
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
}
|
}
|
||||||
|
p++;
|
||||||
}
|
}
|
||||||
return (char *)p;
|
return (char *)p;
|
||||||
}
|
}
|
||||||
|
|
|
@ -819,6 +819,8 @@ class TestM17N < Test::Unit::TestCase
|
||||||
assert_equal("\u{3044}", s[27])
|
assert_equal("\u{3044}", s[27])
|
||||||
assert_equal("\u{3046}", s[28])
|
assert_equal("\u{3046}", s[28])
|
||||||
assert_equal("\u{3048}", s[29])
|
assert_equal("\u{3048}", s[29])
|
||||||
|
s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
|
||||||
|
assert_equal("\u{3044}", s[24])
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_str_aref_len
|
def test_str_aref_len
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#define RUBY_VERSION "1.9.0"
|
#define RUBY_VERSION "1.9.0"
|
||||||
#define RUBY_RELEASE_DATE "2008-03-07"
|
#define RUBY_RELEASE_DATE "2008-03-08"
|
||||||
#define RUBY_VERSION_CODE 190
|
#define RUBY_VERSION_CODE 190
|
||||||
#define RUBY_RELEASE_CODE 20080307
|
#define RUBY_RELEASE_CODE 20080308
|
||||||
#define RUBY_PATCHLEVEL 0
|
#define RUBY_PATCHLEVEL 0
|
||||||
|
|
||||||
#define RUBY_VERSION_MAJOR 1
|
#define RUBY_VERSION_MAJOR 1
|
||||||
|
@ -9,7 +9,7 @@
|
||||||
#define RUBY_VERSION_TEENY 0
|
#define RUBY_VERSION_TEENY 0
|
||||||
#define RUBY_RELEASE_YEAR 2008
|
#define RUBY_RELEASE_YEAR 2008
|
||||||
#define RUBY_RELEASE_MONTH 3
|
#define RUBY_RELEASE_MONTH 3
|
||||||
#define RUBY_RELEASE_DAY 7
|
#define RUBY_RELEASE_DAY 8
|
||||||
|
|
||||||
#ifdef RUBY_EXTERN
|
#ifdef RUBY_EXTERN
|
||||||
RUBY_EXTERN const char ruby_version[];
|
RUBY_EXTERN const char ruby_version[];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue