mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/unicode/case-folding.rb, casefold.h: Tweaked handling of 6
special cases in CaseUnfold_11_Table. * enc/unicode.c: Adjustments for above. * test/ruby/enc/test_case_mapping.rb: Tests for the above: Some tests in test_titlecase activated; test_greek added. A test in test_cherokee fixed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54383 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
49f25a1299
commit
78f540019a
5 changed files with 85 additions and 48 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Tue Mar 29 16:53:44 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/unicode/case-folding.rb, casefold.h: Tweaked handling of 6
|
||||
special cases in CaseUnfold_11_Table.
|
||||
|
||||
* enc/unicode.c: Adjustments for above.
|
||||
|
||||
* test/ruby/enc/test_case_mapping.rb: Tests for the above: Some tests in
|
||||
test_titlecase activated; test_greek added. A test in test_cherokee fixed.
|
||||
|
||||
Tue Mar 29 13:31:00 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/unicode.c: Cleaned up some comments.
|
||||
|
|
|
@ -750,12 +750,17 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
}
|
||||
}
|
||||
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */
|
||||
if (flags&OnigCaseFoldFlags(folded->n)) {
|
||||
int count = OnigCodePointCount(folded->n);
|
||||
const OnigCodePoint *next = folded->code;
|
||||
if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
|
||||
MODIFIED;
|
||||
if (count==1)
|
||||
code = *next;
|
||||
if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE)
|
||||
code = folded->code[1];
|
||||
else
|
||||
code = folded->code[0];
|
||||
}
|
||||
else if ((flags&(ONIGENC_CASE_UPCASE))
|
||||
&& (code==0x03B9||code==0x03BC)) { /* GREEK SMALL LETTERs IOTA/MU */
|
||||
MODIFIED;
|
||||
code = folded->code[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -230,38 +230,61 @@ class CaseMapping
|
|||
def flags(from, type, to)
|
||||
# types: CaseFold_11, CaseUnfold_11, CaseUnfold_12, CaseUnfold_13
|
||||
flags = ""
|
||||
flags += '|F' if type=='CaseFold_11'
|
||||
from = Array(from).map {|i| "%04X" % i}.join(" ")
|
||||
to = Array(to).map {|i| "%04X" % i}.join(" ")
|
||||
to = to.split(/ /).first if type=='CaseUnfold_11'
|
||||
item = @mappings[from]
|
||||
if item
|
||||
flags += '|U' if to==item.upper
|
||||
flags += '|D' if to==item.lower
|
||||
specials_index = nil
|
||||
specials = []
|
||||
unless item.upper == item.title
|
||||
if item.code == item.title
|
||||
flags += '|IT'
|
||||
else
|
||||
flags += '|ST'
|
||||
specials << item.title
|
||||
specials_index = nil
|
||||
specials = []
|
||||
case type
|
||||
when 'CaseFold_11'
|
||||
flags += '|F'
|
||||
if item
|
||||
flags += '|U' if to==item.upper
|
||||
flags += '|D' if to==item.lower
|
||||
unless item.upper == item.title
|
||||
if item.code == item.title
|
||||
flags += '|IT'
|
||||
else
|
||||
flags += '|ST'
|
||||
specials << item.title
|
||||
end
|
||||
end
|
||||
unless item.lower.nil? or item.lower==from or item.lower==to
|
||||
specials << item.lower
|
||||
flags += '|SL'
|
||||
end
|
||||
unless item.upper.nil? or item.upper==from or item.upper==to
|
||||
specials << item.upper
|
||||
flags += '|SU'
|
||||
end
|
||||
end
|
||||
unless item.lower.nil? or item.lower==from or item.lower==to
|
||||
specials << item.lower
|
||||
flags += '|SL'
|
||||
end
|
||||
unless item.upper.nil? or item.upper==from or item.upper==to
|
||||
specials << item.upper
|
||||
flags += '|SU'
|
||||
end
|
||||
if specials.first
|
||||
flags += "|I(#{@specials_length})"
|
||||
@specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
|
||||
@specials << specials
|
||||
when 'CaseUnfold_11'
|
||||
to = to.split(/ /)
|
||||
if item
|
||||
case to.first
|
||||
when item.upper then flags += '|U'
|
||||
when item.lower then flags += '|D'
|
||||
else
|
||||
unless from=='03B9' or from=='03BC'
|
||||
warn 'Unpredicted case 0; check data or adjust program (enc/unicode/case_folding.rb).'
|
||||
end
|
||||
end
|
||||
unless item.upper == item.title
|
||||
if item.code == item.title
|
||||
warn 'Unpredicted case 1; check data or adjust program (enc/unicode/case_folding.rb).'
|
||||
elsif item.title==to[1]
|
||||
flags += '|ST'
|
||||
else
|
||||
warn 'Unpredicted case 2; check data or adjust program (enc/unicode/case_folding.rb).'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
unless specials.empty?
|
||||
flags += "|I(#{@specials_length})"
|
||||
@specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
|
||||
@specials << specials
|
||||
end
|
||||
flags
|
||||
end
|
||||
|
||||
|
|
|
@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
|
|||
{0x01b9, {1|U, {0x01b8}}},
|
||||
{0x01bd, {1|U, {0x01bc}}},
|
||||
{0x01bf, {1|U, {0x01f7}}},
|
||||
{0x01c6, {2|U|ST|I(347), {0x01c4, 0x01c5}}},
|
||||
{0x01c9, {2|U|ST|I(348), {0x01c7, 0x01c8}}},
|
||||
{0x01cc, {2|U|ST|I(349), {0x01ca, 0x01cb}}},
|
||||
{0x01c6, {2|U|ST, {0x01c4, 0x01c5}}},
|
||||
{0x01c9, {2|U|ST, {0x01c7, 0x01c8}}},
|
||||
{0x01cc, {2|U|ST, {0x01ca, 0x01cb}}},
|
||||
{0x01ce, {1|U, {0x01cd}}},
|
||||
{0x01d0, {1|U, {0x01cf}}},
|
||||
{0x01d2, {1|U, {0x01d1}}},
|
||||
|
@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
|
|||
{0x01eb, {1|U, {0x01ea}}},
|
||||
{0x01ed, {1|U, {0x01ec}}},
|
||||
{0x01ef, {1|U, {0x01ee}}},
|
||||
{0x01f3, {2|U|ST|I(350), {0x01f1, 0x01f2}}},
|
||||
{0x01f3, {2|U|ST, {0x01f1, 0x01f2}}},
|
||||
{0x01f5, {1|U, {0x01f4}}},
|
||||
{0x01f9, {1|U, {0x01f8}}},
|
||||
{0x01fb, {1|U, {0x01fa}}},
|
||||
|
@ -3412,10 +3412,10 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
|
|||
{0x03b6, {1|U, {0x0396}}},
|
||||
{0x03b7, {1|U, {0x0397}}},
|
||||
{0x03b8, {3|U, {0x0398, 0x03d1, 0x03f4}}},
|
||||
{0x03b9, {3|SU|I(351), {0x0345, 0x0399, 0x1fbe}}},
|
||||
{0x03b9, {3, {0x0345, 0x0399, 0x1fbe}}},
|
||||
{0x03ba, {2|U, {0x039a, 0x03f0}}},
|
||||
{0x03bb, {1|U, {0x039b}}},
|
||||
{0x03bc, {2|SU|I(352), {0x00b5, 0x039c}}},
|
||||
{0x03bc, {2, {0x00b5, 0x039c}}},
|
||||
{0x03bd, {1|U, {0x039d}}},
|
||||
{0x03be, {1|U, {0x039e}}},
|
||||
{0x03bf, {1|U, {0x039f}}},
|
||||
|
@ -6371,10 +6371,4 @@ OnigCodePoint CaseMappingSpecials[] = {
|
|||
L(2)|0x0544, 0x056B, L(2)|0x0544, 0x053B,
|
||||
L(2)|0x054E, 0x0576, L(2)|0x054E, 0x0546,
|
||||
L(2)|0x0544, 0x056D, L(2)|0x0544, 0x053D,
|
||||
L(1)|0x01C5,
|
||||
L(1)|0x01C8,
|
||||
L(1)|0x01CB,
|
||||
L(1)|0x01F2,
|
||||
L(1)|0x0399,
|
||||
L(1)|0x039C,
|
||||
};
|
||||
|
|
|
@ -74,7 +74,7 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
|
|||
check_downcase_properties "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', :lithuanian
|
||||
check_upcase_properties 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", :lithuanian
|
||||
check_capitalize_suffixes "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'
|
||||
assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', :fold
|
||||
assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'.downcase(:fold)
|
||||
assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79".downcase(:fold)
|
||||
end
|
||||
|
||||
|
@ -82,15 +82,15 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
|
|||
check_downcase_properties 'dz dž lj nj', 'Dz Dž Lj Nj', :lithuanian
|
||||
check_downcase_properties 'dz dž lj nj', 'DZ DŽ LJ NJ', :lithuanian
|
||||
check_upcase_properties 'DZ DŽ LJ NJ', 'Dz Dž Lj Nj', :lithuanian
|
||||
# check_upcase_properties 'DZ DŽ LJ NJ', 'dz dž lj nj', :lithuanian
|
||||
check_upcase_properties 'DZ DŽ LJ NJ', 'dz dž lj nj', :lithuanian
|
||||
check_capitalize_properties 'Dz', 'DZ', :lithuanian
|
||||
check_capitalize_properties 'Dž', 'DŽ', :lithuanian
|
||||
check_capitalize_properties 'Lj', 'LJ', :lithuanian
|
||||
check_capitalize_properties 'Nj', 'NJ', :lithuanian
|
||||
# check_capitalize_properties 'Dz', 'dz', :lithuanian
|
||||
# check_capitalize_properties 'Dž', 'dž', :lithuanian
|
||||
# check_capitalize_properties 'Lj', 'lj', :lithuanian
|
||||
# check_capitalize_properties 'Nj', 'nj', :lithuanian
|
||||
check_capitalize_properties 'Dz', 'dz', :lithuanian
|
||||
check_capitalize_properties 'Dž', 'dž', :lithuanian
|
||||
check_capitalize_properties 'Lj', 'lj', :lithuanian
|
||||
check_capitalize_properties 'Nj', 'nj', :lithuanian
|
||||
end
|
||||
|
||||
def test_ascii_option
|
||||
|
@ -116,6 +116,11 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
|
|||
check_downcase_properties "yuki\u0307hi\u0307ro matsumoto (matz)", 'YUKİHİRO MATSUMOTO (MATZ)', :lithuanian
|
||||
end
|
||||
|
||||
def test_greek
|
||||
check_downcase_properties 'αβγδεζηθικλμνξοπρστυφχψω', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ', :lithuanian
|
||||
check_upcase_properties 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ', 'αβγδεζηθικλμνξοπρστυφχψω', :lithuanian
|
||||
end
|
||||
|
||||
def no_longer_a_test_buffer_allocations
|
||||
assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic, :lithuanian)
|
||||
assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic, :lithuanian)
|
||||
|
|
Loading…
Reference in a new issue