1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* tool/enc-unicode.rb: parse range notation of UnicodeData.txt.

* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  follow above change. [ruby-dev:39444]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-10-08 02:49:11 +00:00
parent ec0e370eb5
commit 866c79e2de
7 changed files with 272 additions and 529 deletions

View file

@ -1,3 +1,11 @@
Thu Oct 8 05:45:14 2009 NARUSE, Yui <naruse@ruby-lang.org>
* tool/enc-unicode.rb: parse range notation of UnicodeData.txt.
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
follow above change. [ruby-dev:39444]
Thu Oct 8 02:46:24 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_inspect): copy by chunks.

View file

@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
482,
477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
492,
487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
494,
489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
508,
503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
502,
497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
495,
484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xfa2d,
0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
26,
20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xf8ff,
0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10ffff,
0xf0000, 0xffffd,
0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
495,
484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
0x3401, 0x4db4,
0x4db6, 0x4dbf,
0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
0xd801, 0xdb7e,
0xdb81, 0xdbfe,
0xdc01, 0xdffe,
0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
0xf0001, 0xffffc,
0xffffe, 0xfffff,
0x100001, 0x10ffff,
0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
6,
0xe000, 0xe000,
0xf8ff, 0xf8ff,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10fffd,
3,
0xe000, 0xf8ff,
0xf0000, 0xffffd,
0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
4,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xdfff,
1,
0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
427,
422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
316,
311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */

View file

@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
482,
477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
492,
487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
494,
489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
508,
503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
502,
497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
495,
484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xfa2d,
0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
26,
20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xf8ff,
0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10ffff,
0xf0000, 0xffffd,
0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
495,
484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
0x3401, 0x4db4,
0x4db6, 0x4dbf,
0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
0xd801, 0xdb7e,
0xdb81, 0xdbfe,
0xdc01, 0xdffe,
0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
0xf0001, 0xffffc,
0xffffe, 0xfffff,
0x100001, 0x10ffff,
0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
6,
0xe000, 0xe000,
0xf8ff, 0xf8ff,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10fffd,
3,
0xe000, 0xf8ff,
0xf0000, 0xffffd,
0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
4,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xdfff,
1,
0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
427,
422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
316,
311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */

View file

@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
482,
477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
492,
487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
494,
489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
508,
503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
502,
497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
495,
484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xfa2d,
0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
26,
20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xf8ff,
0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10ffff,
0xf0000, 0xffffd,
0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
495,
484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
0x3401, 0x4db4,
0x4db6, 0x4dbf,
0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
0xd801, 0xdb7e,
0xdb81, 0xdbfe,
0xdc01, 0xdffe,
0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
0xf0001, 0xffffc,
0xffffe, 0xfffff,
0x100001, 0x10ffff,
0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
6,
0xe000, 0xe000,
0xf8ff, 0xf8ff,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10fffd,
3,
0xe000, 0xf8ff,
0xf0000, 0xffffd,
0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
4,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xdfff,
1,
0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
427,
422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
316,
311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */

View file

@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
482,
477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
492,
487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
494,
489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
508,
503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
502,
497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
495,
484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
0x3300, 0x3400,
0x4db5, 0x4db5,
0x4dc0, 0x4e00,
0x9fcb, 0x9fcb,
0x3300, 0x4db5,
0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xfa2d,
0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
26,
20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xe000,
0xf8ff, 0xf8ff,
0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10ffff,
0xf0000, 0xffffd,
0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
495,
484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
0x3401, 0x4db4,
0x4db6, 0x4dbf,
0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
0xd801, 0xdb7e,
0xdb81, 0xdbfe,
0xdc01, 0xdffe,
0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
0xf0001, 0xffffc,
0xffffe, 0xfffff,
0x100001, 0x10ffff,
0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
6,
0xe000, 0xe000,
0xf8ff, 0xf8ff,
0xf0000, 0xf0000,
0xffffd, 0xffffd,
0x100000, 0x100000,
0x10fffd, 0x10fffd,
3,
0xe000, 0xf8ff,
0xf0000, 0xffffd,
0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
4,
0xd800, 0xd800,
0xdb7f, 0xdb80,
0xdbff, 0xdc00,
0xdfff, 0xdfff,
1,
0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
427,
422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
316,
311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
0x3400, 0x3400,
0x4db5, 0x4db5,
0x4e00, 0x4e00,
0x9fcb, 0x9fcb,
0x3400, 0x4db5,
0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
0xac00, 0xac00,
0xd7a3, 0xd7a3,
0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
0x20000, 0x20000,
0x2a6d6, 0x2a6d6,
0x2a700, 0x2a700,
0x2b734, 0x2b734,
0x20000, 0x2a6d6,
0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */

View file

@ -745,6 +745,7 @@ class TestRegexp < Test::Unit::TestCase
assert_match(/^\u3042{0}\p{Any}$/, "a")
assert_match(/^\u3042{0}\p{Any}$/, "\u3041")
assert_match(/^\u3042{0}\p{Any}$/, "\0")
assert_match(/^\p{Lo}{4}$/u, "\u3401\u4E01\u{20001}\u{2A701}")
assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0")
assert_no_match(/^\u3042{0}\p{Any}$/, "")
assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') }

View file

@ -40,26 +40,36 @@ end
def parse_unicode_data(file)
last_cp = 0
data = {'Cn' => []}
beg_cp = nil
IO.foreach(file) do |line|
fields = line.split(';')
cp = fields[0].to_i(16)
case fields[1]
when /\A<(.*),\s*First>\z/
beg_cp = cp
next
when /\A<(.*),\s*Last>\z/
cps = (beg_cp..cp).to_a
else
beg_cp = cp
cps = [cp]
end
# The Cn category represents unassigned characters. These are not listed in
# UnicodeData.txt so we must derive them by looking for 'holes' in the range
# of listed codepoints. We increment the last codepoint seen and compare it
# with the current codepoint. If the current codepoint is less than
# last_cp.next we have found a hole, so we add the missing codepoint to the
# Cn category.
while ((last_cp = last_cp.next) < cp)
data['Cn'] << last_cp
end
data['Cn'].concat((last_cp.next...beg_cp).to_a)
# The third field denotes the 'General' category, e.g. Lu
(data[fields[2]] ||= []) << cp
(data[fields[2]] ||= []).concat(cps)
# The 'Major' category is the first letter of the 'General' category, e.g.
# 'Lu' -> 'L'
(data[fields[2][0,1]] ||= []) << cp
(data[fields[2][0,1]] ||= []).concat(cps)
last_cp = cp
end