mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enc/unicode.c: Additional macros and code to use mapping data in
CaseMappingSpecials array. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54112 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
5d6349aefe
commit
00cc59a054
2 changed files with 73 additions and 21 deletions
|
@ -1,3 +1,9 @@
|
|||
Tue Mar 15 13:49:23 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/unicode.c: Additional macros and code to use mapping data in
|
||||
CaseMappingSpecials array.
|
||||
(with Kimihito Matsui)
|
||||
|
||||
Tue Mar 15 13:41:22 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* internal.h (rb_gc_mark_global_tbl): should be private,
|
||||
|
|
|
@ -137,15 +137,29 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* macros related to ONIGENC_CASE flags */
|
||||
/* defined here because not used in other files */
|
||||
#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
|
||||
|
||||
/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
|
||||
#define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */
|
||||
#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset)
|
||||
#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
|
||||
#define SpecialsLengthEncode(n) ((n)<<SpecialsLengthOffset)
|
||||
|
||||
#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexWidth)
|
||||
#define OnigSpecialIndexEncode(n) ((n)<<OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
|
||||
|
||||
/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
|
||||
#define U ONIGENC_CASE_UPCASE
|
||||
#define D ONIGENC_CASE_DOWNCASE
|
||||
#define F ONIGENC_CASE_FOLD
|
||||
#define ST 0
|
||||
#define SU 0
|
||||
#define SL 0
|
||||
#define ST ONIGENC_CASE_TITLECASE
|
||||
#define SU ONIGENC_CASE_UP_SPECIAL
|
||||
#define SL ONIGENC_CASE_DOWN_SPECIAL
|
||||
#define I(n) 0
|
||||
#define L(n) 0
|
||||
#define L(n) SpecialsLengthEncode(n)
|
||||
|
||||
#include "enc/unicode/casefold.h"
|
||||
|
||||
|
@ -158,12 +172,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
#undef I
|
||||
#undef L
|
||||
|
||||
/* macros related to ONIGENC_CASE flags */
|
||||
/* defined here because not used in other files */
|
||||
#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexWidth)
|
||||
#define OnigSpecialIndexEncode(n) (((n)<<OnigSpecialIndexShift)&OnigSpecialIndexMask)
|
||||
#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
|
||||
|
||||
#include "enc/unicode/name2ctype.h"
|
||||
|
||||
#define CODE_RANGES_NUM numberof(CodeRanges)
|
||||
|
@ -654,6 +662,7 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
to_end -= CASE_MAPPING_SLACK;
|
||||
flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
|
||||
|
||||
/* hopelessly preliminary implementation, just dealing with ASCII and Turkic */
|
||||
while (*pp<end && to<=to_end) {
|
||||
|
@ -701,19 +710,56 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
}
|
||||
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) {
|
||||
if (flags&OnigCaseFoldFlags(folded->n)) {
|
||||
int count = OnigCodePointCount(folded->n);
|
||||
const OnigCodePoint *next = folded->code;
|
||||
const OnigCodePoint *next;
|
||||
int count;
|
||||
|
||||
MODIFIED;
|
||||
if (count==1)
|
||||
code = *next;
|
||||
else if (count==2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) {
|
||||
OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
|
||||
int count;
|
||||
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) {
|
||||
if (flags&ONIGENC_CASE_TITLECASE)
|
||||
goto SpecialsCopy;
|
||||
else
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
|
||||
if (flags&ONIGENC_CASE_DOWN_SPECIAL)
|
||||
goto SpecialsCopy;
|
||||
else
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
/* if we pass here, we know we use special upcasing, and are at the right position */
|
||||
SpecialsCopy:
|
||||
count = SpecialsLengthExtract(*SpecialsStart);
|
||||
next = SpecialsStart;
|
||||
if (count==1)
|
||||
code = SpecialsCodepointExtract(*next);
|
||||
else if (count==2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to);
|
||||
code = *next;
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, SpecialsCodepointExtract(*next++), to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
else { /* no specials */
|
||||
count = OnigCodePointCount(folded->n);
|
||||
next = folded->code;
|
||||
if (count==1)
|
||||
code = *next;
|
||||
else if (count==2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue