1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (rb_str_squeeze_bang): specialized for 7bit characters in

ASCII compatible strings.

* string.c (rb_str_count): ditto.

* string.c (tr_trans): preserve 7bit/valid coderange flag.

* string.c (rb_str_squeeze_bang): preserve previous coderange value.

* string.c (rb_str_lstrip_bang): ditto.

* string.c (rb_str_rstrip_bang): ditto.

* encoding.c (rb_default_external_encoding): preserve
  default_external_encoding in a static variable.

* string.c (single_byte_optimizable): check coderange first, to
  reduce number of calling rb_enc_from_index().

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19364 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2008-09-15 14:40:00 +00:00
parent e0f064cf3c
commit b121e47b44
3 changed files with 121 additions and 32 deletions

View file

@ -9,6 +9,27 @@ Mon Sep 15 23:28:28 2008 Tanaka Akira <akr@fsij.org>
* io.c (rb_scan_open_args): call FilePathValue before encoding * io.c (rb_scan_open_args): call FilePathValue before encoding
conversion. conversion.
Mon Sep 15 22:11:07 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (rb_str_squeeze_bang): specialized for 7bit characters in
ASCII compatible strings.
* string.c (rb_str_count): ditto.
* string.c (tr_trans): preserve 7bit/valid coderange flag.
* string.c (rb_str_squeeze_bang): preserve previous coderange value.
* string.c (rb_str_lstrip_bang): ditto.
* string.c (rb_str_rstrip_bang): ditto.
* encoding.c (rb_default_external_encoding): preserve
default_external_encoding in a static variable.
* string.c (single_byte_optimizable): check coderange first, to
reduce number of calling rb_enc_from_index().
Mon Sep 15 20:57:00 2008 Yuki Sonoda (Yugui) <yugui@yugui.jp> Mon Sep 15 20:57:00 2008 Yuki Sonoda (Yugui) <yugui@yugui.jp>
* lib/matrix.rb (Matrix#eql?): fixed [ruby-dev:36298]. * lib/matrix.rb (Matrix#eql?): fixed [ruby-dev:36298].

View file

@ -997,11 +997,15 @@ rb_filesystem_encoding(void)
} }
static int default_external_index; static int default_external_index;
static rb_encoding *default_external;
rb_encoding * rb_encoding *
rb_default_external_encoding(void) rb_default_external_encoding(void)
{ {
return rb_enc_from_index(default_external_index); if (!default_external) {
default_external = rb_enc_from_index(default_external_index);
}
return default_external;
} }
VALUE VALUE
@ -1028,6 +1032,7 @@ void
rb_enc_set_default_external(VALUE encoding) rb_enc_set_default_external(VALUE encoding)
{ {
default_external_index = rb_enc_to_index(rb_to_encoding(encoding)); default_external_index = rb_enc_to_index(rb_to_encoding(encoding));
default_external = 0;
} }
/* /*

125
string.c
View file

@ -115,15 +115,16 @@ VALUE rb_cSymbol;
static int static int
single_byte_optimizable(VALUE str) single_byte_optimizable(VALUE str)
{ {
rb_encoding *enc = STR_ENC_GET(str); rb_encoding *enc;
if (rb_enc_mbmaxlen(enc) == 1)
return 1;
/* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
return 1; return 1;
enc = STR_ENC_GET(str);
if (rb_enc_mbmaxlen(enc) == 1)
return 1;
/* Conservative. Possibly single byte. /* Conservative. Possibly single byte.
* "\xa1" in Shift_JIS for example. */ * "\xa1" in Shift_JIS for example. */
return 0; return 0;
@ -4325,6 +4326,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
char *s, *send; char *s, *send;
VALUE hash = 0; VALUE hash = 0;
int singlebyte = single_byte_optimizable(str); int singlebyte = single_byte_optimizable(str);
int cr, cr1, cr2;
StringValue(src); StringValue(src);
StringValue(repl); StringValue(repl);
@ -4333,6 +4335,12 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
return rb_str_delete_bang(1, &src, str); return rb_str_delete_bang(1, &src, str);
} }
cr = ENC_CODERANGE(str);
cr1 = ENC_CODERANGE(src);
cr2 = ENC_CODERANGE(repl);
if (cr != cr1 || cr1 != cr2) {
cr = ENC_CODERANGE_UNKNOWN;
}
e1 = rb_enc_check(str, src); e1 = rb_enc_check(str, src);
e2 = rb_enc_check(str, repl); e2 = rb_enc_check(str, repl);
if (e1 == e2) { if (e1 == e2) {
@ -4517,6 +4525,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
RSTRING(str)->as.heap.aux.capa = max; RSTRING(str)->as.heap.aux.capa = max;
} }
ENC_CODERANGE_SET(str, cr);
if (modify) { if (modify) {
rb_enc_associate(str, enc); rb_enc_associate(str, enc);
return str; return str;
@ -4738,6 +4747,8 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
char *s, *send, *t; char *s, *send, *t;
int save, modify = 0; int save, modify = 0;
int i; int i;
int ascompat, singlebyte = single_byte_optimizable(str);
int cr = ENC_CODERANGE(str);
if (argc == 0) { if (argc == 0) {
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
@ -4757,29 +4768,53 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
if (!s || RSTRING_LEN(str) == 0) return Qnil; if (!s || RSTRING_LEN(str) == 0) return Qnil;
send = RSTRING_END(str); send = RSTRING_END(str);
save = -1; save = -1;
while (s < send) { ascompat = rb_enc_asciicompat(enc);
unsigned int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (singlebyte) {
if (t != s) rb_enc_mbcput(c, t, enc); while (s < send) {
save = c; unsigned int c = *(unsigned char*)s++;
t += clen; if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
}
} else {
while (s < send) {
unsigned int c;
int clen;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (c != save || (argc > 0 && !squeez[c])) {
*t++ = save = c;
}
s++;
}
else {
c = rb_enc_codepoint(s, send, enc);
clen = rb_enc_codelen(c, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
if (t != s) rb_enc_mbcput(c, t, enc);
save = c;
t += clen;
}
s += clen;
}
} }
s += clen;
} }
*t = '\0'; *t = '\0';
if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) { if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
STR_SET_LEN(str, t - RSTRING_PTR(str)); STR_SET_LEN(str, t - RSTRING_PTR(str));
modify = 1; modify = 1;
} }
ENC_CODERANGE_SET(str, cr);
if (modify) return str; if (modify) return str;
return Qnil; return Qnil;
} }
/* /*
* call-seq: * call-seq:
* str.squeeze([other_str]*) => new_str * str.squeeze([other_str]*) => new_str
* *
@ -4864,6 +4899,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
VALUE del = 0, nodel = 0; VALUE del = 0, nodel = 0;
char *s, *send; char *s, *send;
int i; int i;
int ascompat;
if (argc < 1) { if (argc < 1) {
rb_raise(rb_eArgError, "wrong number of arguments"); rb_raise(rb_eArgError, "wrong number of arguments");
@ -4873,22 +4909,36 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
StringValue(s); StringValue(s);
enc = rb_enc_check(str, s); enc = rb_enc_check(str, s);
tr_setup_table(s, table,i==0, &del, &nodel, enc); tr_setup_table(s, table, i==0, &del, &nodel, enc);
} }
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0); if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
send = RSTRING_END(str); send = RSTRING_END(str);
i = 0; i = 0;
while (s < send) { ascompat = rb_enc_asciicompat(enc);
unsigned int c = rb_enc_codepoint(s, send, enc);
int clen = rb_enc_codelen(c, enc);
if (tr_find(c, table, del, nodel)) { while (s < send) {
i++; unsigned int c;
int clen;
if (ascompat && (c = *(unsigned char*)s) < 0x80) {
clen = 1;
if (table[c]) {
i++;
}
s++;
}
else {
c = rb_enc_codepoint(s, send, enc);
clen = rb_enc_codelen(c, enc);
if (tr_find(c, table, del, nodel)) {
i++;
}
s += clen;
} }
s += clen;
} }
return INT2NUM(i); return INT2NUM(i);
} }
@ -5549,8 +5599,10 @@ rb_str_lstrip_bang(VALUE str)
{ {
rb_encoding *enc; rb_encoding *enc;
char *s, *t, *e; char *s, *t, *e;
int cr = ENC_CODERANGE(str);
rb_str_modify(str); rb_str_modify(str);
ENC_CODERANGE_SET(str, cr);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil; if (!s || RSTRING_LEN(str) == 0) return Qnil;
@ -5612,27 +5664,38 @@ rb_str_rstrip_bang(VALUE str)
rb_encoding *enc; rb_encoding *enc;
char *s, *t, *e; char *s, *t, *e;
int space_seen = Qfalse; int space_seen = Qfalse;
int cr = ENC_CODERANGE(str);
rb_str_modify(str); rb_str_modify(str);
ENC_CODERANGE_SET(str, cr);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
if (!s || RSTRING_LEN(str) == 0) return Qnil; if (!s || RSTRING_LEN(str) == 0) return Qnil;
t = e = RSTRING_END(str); t = e = RSTRING_END(str);
while (s < e) {
unsigned int cc = rb_enc_codepoint(s, e, enc);
if (!cc || rb_enc_isspace(cc, enc)) { if (single_byte_optimizable(str)) {
if (!space_seen) t = s; /* remove trailing '\0's */
space_seen = Qtrue; while (s < t && t[-1] == '\0') t--;
/* remove trailing spaces */
while (s < t && rb_enc_isspace(*(t-1), enc)) t--;
} else {
while (s < e) {
unsigned int cc = rb_enc_codepoint(s, e, enc);
if (!cc || rb_enc_isspace(cc, enc)) {
if (!space_seen) t = s;
space_seen = Qtrue;
}
else {
space_seen = Qfalse;
}
s += rb_enc_codelen(cc, enc);
} }
else { if (!space_seen) t = s;
space_seen = Qfalse;
}
s += rb_enc_codelen(cc, enc);
} }
if (!space_seen) t = s;
if (t < e) { if (t < e) {
rb_str_modify(str);
STR_SET_LEN(str, t-RSTRING_PTR(str)); STR_SET_LEN(str, t-RSTRING_PTR(str));
RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
return str; return str;