From b2168c5a2a37be1e9937659c499bb30be1db286e Mon Sep 17 00:00:00 2001 From: akr Date: Sun, 6 Jan 2008 16:38:04 +0000 Subject: [PATCH] * string.c (coderange_scan): optimize ASCII-8BIT string. (rb_enc_str_buf_cat): don't call coderange_scan if possible. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14915 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ string.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index ff88c49f1b..80425f8542 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Mon Jan 7 01:36:49 2008 Tanaka Akira + + * string.c (coderange_scan): optimize ASCII-8BIT string. + (rb_enc_str_buf_cat): don't call coderange_scan if possible. + Mon Jan 7 01:05:45 2008 Tanaka Akira * lib/erb.rb (ERB::Revision): cut off locale dependent string in Date diff --git a/string.c b/string.c index 6679ff44fb..c9d0fa3eef 100644 --- a/string.c +++ b/string.c @@ -119,6 +119,17 @@ coderange_scan(const char *p, long len, rb_encoding *enc) const char *e = p + len; int cr; + if (rb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + while (p < e) { + if (!ISASCII((unsigned char)*p)) { + return ENC_CODERANGE_VALID; + } + p++; + } + return ENC_CODERANGE_7BIT; + } + cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); @@ -1056,12 +1067,22 @@ rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc) int ptr_a8 = rb_enc_to_index(ptr_enc) == 0; str_cr = ENC_CODERANGE(str); - ptr_cr = coderange_scan(ptr, len, ptr_enc); - if (str_cr == ENC_CODERANGE_UNKNOWN) { - if (str_a8 ? !ptr_a8 - : (str_enc != ptr_enc && ptr_cr != ENC_CODERANGE_7BIT)) { - str_cr = rb_enc_str_coderange(str); + if (str_enc == ptr_enc) { + if (str_cr == ENC_CODERANGE_UNKNOWN || + (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) { + ptr_cr = ENC_CODERANGE_UNKNOWN; + } + else { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + } + } + else { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + if (str_cr == ENC_CODERANGE_UNKNOWN) { + if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) { + str_cr = rb_enc_str_coderange(str); + } } }