1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (coderange_scan): don't call mbclen functions for ASCII

characters with ASCII compatible encoding.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15129 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-01-19 13:42:50 +00:00
parent 133a6011f7
commit 35a4fb5ef6
2 changed files with 76 additions and 19 deletions

View file

@ -1,3 +1,8 @@
Sat Jan 19 22:41:39 2008 Tanaka Akira <akr@fsij.org>
* string.c (coderange_scan): don't call mbclen functions for ASCII
characters with ASCII compatible encoding.
Sat Jan 19 21:00:34 2008 Tanaka Akira <akr@fsij.org>
* lib/rdoc/template.rb (RDoc): defined to avoid uninitialized constant

View file

@ -115,40 +115,92 @@ single_byte_optimizable(VALUE str)
VALUE rb_fs;
static inline const char *
search_nonascii(const char *p, const char *e)
{
#if ULONG_MAX == 18446744073709551615UL
# define NONASCII_MASK 0x8080808080808080UL
#elif ULONG_MAX == 4294967295UL
# define NONASCII_MASK 0x80808080UL
#endif
#ifdef NONASCII_MASK
if (sizeof(long) * 2 < e - p) {
const unsigned long *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e);
while (p < (const char *)s) {
if (!ISASCII(*p))
return p;
p++;
}
while (s < t) {
if (*s & NONASCII_MASK) {
t = s;
break;
}
s++;
}
p = (const char *)t;
}
#endif
while (p < e) {
if (!ISASCII(*p))
return p;
p++;
}
return NULL;
}
static int
coderange_scan(const char *p, long len, rb_encoding *enc)
{
const char *e = p + len;
int cr;
if (rb_enc_to_index(enc) == 0) {
/* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
while (p < e) {
if (!ISASCII((unsigned char)*p)) {
return ENC_CODERANGE_VALID;
}
p++;
}
return ENC_CODERANGE_7BIT;
p = search_nonascii(p, e);
return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
}
if (rb_enc_asciicompat(enc)) {
p = search_nonascii(p, e);
if (!p) {
return ENC_CODERANGE_7BIT;
}
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
int len = MBCLEN_CHARFOUND(ret);
if (!len) {
return ENC_CODERANGE_BROKEN;
}
p += len;
if (p < e) {
p = search_nonascii(p, e);
if (!p) {
return ENC_CODERANGE_VALID;
}
}
}
if (e < p) {
return ENC_CODERANGE_BROKEN;
}
return ENC_CODERANGE_VALID;
}
cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
int len = MBCLEN_CHARFOUND(ret);
if (len) {
if (len != 1 || !ISASCII((unsigned char)*p)) {
cr = ENC_CODERANGE_VALID;
}
p += len;
}
else {
cr = ENC_CODERANGE_BROKEN;
break;
if (!len) {
return ENC_CODERANGE_BROKEN;
}
p += len;
}
return cr;
if (e < p) {
return ENC_CODERANGE_BROKEN;
}
return ENC_CODERANGE_VALID;
}
int