diff --git a/ChangeLog b/ChangeLog index 19a559465b..c59e3cfc33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Tue Mar 15 13:49:23 2016 Martin Duerst + + * enc/unicode.c: Additional macros and code to use mapping data in + CaseMappingSpecials array. + (with Kimihito Matsui) + Tue Mar 15 13:41:22 2016 Nobuyoshi Nakada * internal.h (rb_gc_mark_global_tbl): should be private, diff --git a/enc/unicode.c b/enc/unicode.c index f4487e40d5..b012586977 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -137,15 +137,29 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) return 1; } +/* macros related to ONIGENC_CASE flags */ +/* defined here because not used in other files */ +#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL) + +/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */ +#define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */ +#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset) +#define SpecialsCodepointExtract(n) ((n)&((1<>OnigSpecialIndexShift) + /* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */ #define U ONIGENC_CASE_UPCASE #define D ONIGENC_CASE_DOWNCASE #define F ONIGENC_CASE_FOLD -#define ST 0 -#define SU 0 -#define SL 0 +#define ST ONIGENC_CASE_TITLECASE +#define SU ONIGENC_CASE_UP_SPECIAL +#define SL ONIGENC_CASE_DOWN_SPECIAL #define I(n) 0 -#define L(n) 0 +#define L(n) SpecialsLengthEncode(n) #include "enc/unicode/casefold.h" @@ -158,12 +172,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #undef I #undef L -/* macros related to ONIGENC_CASE flags */ -/* defined here because not used in other files */ -#define OnigSpecialIndexMask (((1<>OnigSpecialIndexShift) - #include "enc/unicode/name2ctype.h" #define CODE_RANGES_NUM numberof(CodeRanges) @@ -654,6 +662,7 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; to_end -= CASE_MAPPING_SLACK; + flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<n)) { - int count = OnigCodePointCount(folded->n); - const OnigCodePoint *next = folded->code; + const OnigCodePoint *next; + int count; + MODIFIED; - if (count==1) - code = *next; - else if (count==2) { - to += ONIGENC_CODE_TO_MBC(enc, *next++, to); - code = *next; + if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { + OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n); + int count; + + if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { + if (flags&ONIGENC_CASE_TITLECASE) + goto SpecialsCopy; + else + SpecialsStart += SpecialsLengthExtract(*SpecialsStart); + } + if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) { + if (flags&ONIGENC_CASE_DOWN_SPECIAL) + goto SpecialsCopy; + else + SpecialsStart += SpecialsLengthExtract(*SpecialsStart); + } + /* if we pass here, we know we use special upcasing, and are at the right position */ + SpecialsCopy: + count = SpecialsLengthExtract(*SpecialsStart); + next = SpecialsStart; + if (count==1) + code = SpecialsCodepointExtract(*next); + else if (count==2) { + to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to); + code = *next; + } + else { /* count == 3 */ + to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to); + to += ONIGENC_CODE_TO_MBC(enc, *next++, to); + code = *next; + } } - else { /* count == 3 */ - to += ONIGENC_CODE_TO_MBC(enc, *next++, to); - to += ONIGENC_CODE_TO_MBC(enc, *next++, to); - code = *next; + else { /* no specials */ + count = OnigCodePointCount(folded->n); + next = folded->code; + if (count==1) + code = *next; + else if (count==2) { + to += ONIGENC_CODE_TO_MBC(enc, *next++, to); + code = *next; + } + else { /* count == 3 */ + to += ONIGENC_CODE_TO_MBC(enc, *next++, to); + to += ONIGENC_CODE_TO_MBC(enc, *next++, to); + code = *next; + } } } }