From 2ac58e689131e368a0148b05856054c0e04d7409 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 13 Jul 2016 09:09:47 +0000 Subject: [PATCH] * enc/iso_8859_9.c, test/ruby/enc/test_case_comprehensive.rb: Implement non-ASCII case conversion for ISO-8859-9, by Kazuki Iijima. * enc/iso_8859_9.c: Exclude dotless i/I with dot from case-insensitive matching because they are not a case pair. * test/ruby/enc/test_iso_8859.rb: Make test coverage for ISO-8859-9 a bit more complete. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55666 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 +++++ enc/iso_8859_9.c | 60 ++++++++++++++++++++++-- test/ruby/enc/test_case_comprehensive.rb | 2 +- test/ruby/enc/test_iso_8859.rb | 2 +- 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index aa1e670e4a..8ada3a4962 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Wed Jul 13 18:09:42 2016 Martin Duerst + + * enc/iso_8859_9.c, test/ruby/enc/test_case_comprehensive.rb: + Implement non-ASCII case conversion for ISO-8859-9, by Kazuki Iijima. + + * enc/iso_8859_9.c: Exclude dotless i/I with dot from case-insensitive + matching because they are not a case pair. + + * test/ruby/enc/test_iso_8859.rb: Make test coverage for ISO-8859-9 + a bit more complete. + Wed Jul 13 17:21:24 2016 Martin Duerst * enc/windows_1252.c, test/ruby/enc/test_case_comprehensive.rb: diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 668168daa9..5b80ebe0fe 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -62,7 +62,7 @@ static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', + '\370', '\371', '\372', '\373', '\374', '\151', '\376', '\337', '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', @@ -188,7 +188,7 @@ static const OnigPairCaseFoldCodes CaseFoldMap[] = { { 0xda, 0xfa }, { 0xdb, 0xfb }, { 0xdc, 0xfc }, - { 0xdd, 0xfd }, + /*{ 0xdd, 0xfd }, exclude dotless i/I with dot; not a case pair */ { 0xde, 0xfe } }; @@ -213,6 +213,60 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef ONIG_CASE_MAPPING +#define DOTLESS_i (0xFD) +#define I_WITH_DOT_ABOVE (0xDD) +static int +case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) +{ + OnigCodePoint code; + OnigUChar *to_start = to; + OnigCaseFoldType flags = *flagP; + + while (*pp