From f0fc6ec872f043836f01455b16feee6bb9ed9eb9 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 8 Jun 2016 12:28:42 +0000 Subject: [PATCH] * string.c: New static function rb_str_ascii_casemap; special-casing :ascii option in rb_str_upcase_bang and rb_str_downcase_bang. * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string). * regenc.h -> include/ruby/oniguruma.h: Move declaration of onigenc_ascii_only_case_map so that it is visible in string.c. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55329 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 10 ++++++++++ include/ruby/oniguruma.h | 8 ++++++++ regenc.c | 4 +--- regenc.h | 1 - string.c | 40 ++++++++++++++++++++++++++++++++-------- 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9d00b68313..23806d8355 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Wed Jun 8 21:28:36 2016 Martin Duerst + + * string.c: New static function rb_str_ascii_casemap; special-casing + :ascii option in rb_str_upcase_bang and rb_str_downcase_bang. + + * regenc.c: Fix a bug (wrong use of unnecessary slack at end of string). + + * regenc.h -> include/ruby/oniguruma.h: Move declaration of + onigenc_ascii_only_case_map so that it is visible in string.c. + Wed Jun 8 20:33:44 2016 Naohisa Goto * include/ruby/intern.h: Remove excess semicolons in PUREFUNC(). diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 51c200a351..523fc7089a 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -229,6 +229,14 @@ ONIG_EXTERN const OnigEncodingType OnigEncodingASCII; #define ONIG_ENCODING_UNDEF ((OnigEncoding )0) +#ifdef ONIG_CASE_MAPPING + /* this declaration needs to be here because it is used in string.c */ + ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP, + const OnigUChar** pp, const OnigUChar* end, + OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc)); +#endif /* ONIG_CASE_MAPPING */ + /* work size */ #define ONIGENC_CODE_TO_MBC_MAXLEN 7 diff --git a/regenc.c b/regenc.c index abc0c029c8..d345b0f8d5 100644 --- a/regenc.c +++ b/regenc.c @@ -968,9 +968,7 @@ onigenc_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, cons OnigCaseFoldType flags = *flagP; int codepoint_length; - to_end -= 4; /* longest possible length of a single character */ - - while (*pp str or nil @@ -5855,7 +5878,8 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str) str_modify_keep_cr(str); enc = STR_ENC_GET(str); rb_str_check_dummy_enc(enc); - if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) { + if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1) + || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) { char *s = RSTRING_PTR(str), *send = RSTRING_END(str); while (s < send) { @@ -5914,14 +5938,14 @@ static VALUE rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; - int modify = 0; OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE; flags = check_case_options(argc, argv, flags); str_modify_keep_cr(str); enc = STR_ENC_GET(str); rb_str_check_dummy_enc(enc); - if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT) { + if ((flags&ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc)==1) + || (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) { char *s = RSTRING_PTR(str), *send = RSTRING_END(str); while (s < send) { @@ -5929,17 +5953,17 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { *s = 'a' + (c - 'A'); - modify = 1; + flags |= ONIGENC_CASE_MODIFIED; } s++; } } - else { + else if (flags&ONIGENC_CASE_ASCII_ONLY) + rb_str_ascii_casemap(str, &flags, enc); + else str_shared_replace(str, rb_str_casemap(str, &flags, enc)); - modify = ONIGENC_CASE_MODIFIED & flags; - } - if (modify) return str; + if (ONIGENC_CASE_MODIFIED&flags) return str; return Qnil; }