mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (coderange_scan): don't call mbclen functions for ASCII
characters with ASCII compatible encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15129 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
133a6011f7
commit
35a4fb5ef6
2 changed files with 76 additions and 19 deletions
|
@ -1,3 +1,8 @@
|
|||
Sat Jan 19 22:41:39 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* string.c (coderange_scan): don't call mbclen functions for ASCII
|
||||
characters with ASCII compatible encoding.
|
||||
|
||||
Sat Jan 19 21:00:34 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* lib/rdoc/template.rb (RDoc): defined to avoid uninitialized constant
|
||||
|
|
90
string.c
90
string.c
|
@ -115,40 +115,92 @@ single_byte_optimizable(VALUE str)
|
|||
|
||||
VALUE rb_fs;
|
||||
|
||||
static inline const char *
|
||||
search_nonascii(const char *p, const char *e)
|
||||
{
|
||||
#if ULONG_MAX == 18446744073709551615UL
|
||||
# define NONASCII_MASK 0x8080808080808080UL
|
||||
#elif ULONG_MAX == 4294967295UL
|
||||
# define NONASCII_MASK 0x80808080UL
|
||||
#endif
|
||||
#ifdef NONASCII_MASK
|
||||
if (sizeof(long) * 2 < e - p) {
|
||||
const unsigned long *s, *t;
|
||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
||||
while (p < (const char *)s) {
|
||||
if (!ISASCII(*p))
|
||||
return p;
|
||||
p++;
|
||||
}
|
||||
while (s < t) {
|
||||
if (*s & NONASCII_MASK) {
|
||||
t = s;
|
||||
break;
|
||||
}
|
||||
s++;
|
||||
}
|
||||
p = (const char *)t;
|
||||
}
|
||||
#endif
|
||||
while (p < e) {
|
||||
if (!ISASCII(*p))
|
||||
return p;
|
||||
p++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
coderange_scan(const char *p, long len, rb_encoding *enc)
|
||||
{
|
||||
const char *e = p + len;
|
||||
int cr;
|
||||
|
||||
if (rb_enc_to_index(enc) == 0) {
|
||||
/* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
|
||||
while (p < e) {
|
||||
if (!ISASCII((unsigned char)*p)) {
|
||||
return ENC_CODERANGE_VALID;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
return ENC_CODERANGE_7BIT;
|
||||
p = search_nonascii(p, e);
|
||||
return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
if (rb_enc_asciicompat(enc)) {
|
||||
p = search_nonascii(p, e);
|
||||
if (!p) {
|
||||
return ENC_CODERANGE_7BIT;
|
||||
}
|
||||
while (p < e) {
|
||||
int ret = rb_enc_precise_mbclen(p, e, enc);
|
||||
int len = MBCLEN_CHARFOUND(ret);
|
||||
if (!len) {
|
||||
return ENC_CODERANGE_BROKEN;
|
||||
}
|
||||
p += len;
|
||||
if (p < e) {
|
||||
p = search_nonascii(p, e);
|
||||
if (!p) {
|
||||
return ENC_CODERANGE_VALID;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (e < p) {
|
||||
return ENC_CODERANGE_BROKEN;
|
||||
}
|
||||
return ENC_CODERANGE_VALID;
|
||||
}
|
||||
|
||||
cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
|
||||
while (p < e) {
|
||||
int ret = rb_enc_precise_mbclen(p, e, enc);
|
||||
int len = MBCLEN_CHARFOUND(ret);
|
||||
|
||||
if (len) {
|
||||
if (len != 1 || !ISASCII((unsigned char)*p)) {
|
||||
cr = ENC_CODERANGE_VALID;
|
||||
}
|
||||
p += len;
|
||||
}
|
||||
else {
|
||||
cr = ENC_CODERANGE_BROKEN;
|
||||
break;
|
||||
if (!len) {
|
||||
return ENC_CODERANGE_BROKEN;
|
||||
}
|
||||
p += len;
|
||||
}
|
||||
return cr;
|
||||
if (e < p) {
|
||||
return ENC_CODERANGE_BROKEN;
|
||||
}
|
||||
return ENC_CODERANGE_VALID;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
Loading…
Add table
Reference in a new issue