mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/unicode.c, test/ruby/enc/test_case_mapping.rb: Implemented :fold
option for String#downcase by using case folding data from regular expression engine, and added a few simple tests. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53747 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
6ab70ad72a
commit
81515b2381
3 changed files with 39 additions and 5 deletions
|
@ -1,3 +1,10 @@
|
|||
Sat Feb 6 14:37:16 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/unicode.c, test/ruby/enc/test_case_mapping.rb: Implemented :fold
|
||||
option for String#downcase by using case folding data from
|
||||
regular expression engine, and added a few simple tests.
|
||||
(with Kimihito Matsui)
|
||||
|
||||
Fri Feb 5 20:08:59 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* test/ruby/enc/test_case_mapping.rb: added tests for :ascii option.
|
||||
|
|
|
@ -628,40 +628,61 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
if (code<='z') { /* ASCII comes first */
|
||||
if (code>='a' && code<='z') {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
MODIFIED;
|
||||
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0069) /* i -> I WITH DOT ABOVE */
|
||||
code = 0x0130;
|
||||
else
|
||||
code += 'A'-'a';
|
||||
MODIFIED;
|
||||
}
|
||||
}
|
||||
else if (code>='A' && code<='Z') {
|
||||
if (flags&ONIGENC_CASE_DOWNCASE) {
|
||||
MODIFIED;
|
||||
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0049) /* I -> DOTLESS i */
|
||||
code = 0x0131;
|
||||
else
|
||||
code += 'a'-'A';
|
||||
MODIFIED;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00C0) { /* deal with non-ASCII; nothing relevant below U+00C0 */
|
||||
const CodePointList3 *folded;
|
||||
|
||||
if (code==0x0130) {
|
||||
if (flags&ONIGENC_CASE_DOWNCASE) {
|
||||
MODIFIED;
|
||||
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)
|
||||
code = 0x0069; /* I WITH DOT ABOVE -> i */
|
||||
else { /* make dot above explicit */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, 0x0069, to);
|
||||
code = 0x0307; /* dot above */
|
||||
}
|
||||
MODIFIED;
|
||||
}
|
||||
}
|
||||
/* the following case can be removed once we rely on data,
|
||||
/* the following special case for DOTLESS i -> I
|
||||
* can be removed once we rely on data,
|
||||
* because the mapping is always the same */
|
||||
else if (code==0x0131 && (flags&ONIGENC_CASE_UPCASE)) { /* DOTLESS i -> I */
|
||||
else if (code==0x0131 && (flags&ONIGENC_CASE_UPCASE)) {
|
||||
code = 0x0049; MODIFIED;
|
||||
}
|
||||
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) {
|
||||
if (flags&ONIGENC_CASE_FOLD) {
|
||||
const OnigCodePoint *next = folded->code;
|
||||
int count = OnigCodePointCount(folded->n);
|
||||
MODIFIED;
|
||||
if (count==1)
|
||||
code = *next;
|
||||
else if (count==2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
/* switch from titlecase to lowercase for capitalize */
|
||||
|
|
|
@ -59,6 +59,12 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
|
|||
check_swapcase_properties 'rÉsumÉ dÜrst ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤĬŌŅ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤĬŌŅ', :ascii
|
||||
end
|
||||
|
||||
def test_fold_option
|
||||
check_downcase_properties 'ss', 'ß', :fold
|
||||
check_downcase_properties 'fifl', 'fifl', :fold
|
||||
check_downcase_properties 'σ', 'ς', :fold
|
||||
end
|
||||
|
||||
def test_turcic
|
||||
check_downcase_properties 'yukihiro matsumoto (matz)', 'Yukihiro MATSUMOTO (MATZ)', :turkic
|
||||
check_upcase_properties 'YUKİHİRO MATSUMOTO (MATZ)', 'Yukihiro Matsumoto (matz)', :turkic
|
||||
|
|
Loading…
Reference in a new issue