mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* unicode.c (onigenc_unicode_property_name_to_ctype):
ignore case of properties. * tool/enc-unicode.rb: downcase properties list. * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt, enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src: follow above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24836 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e519c9d960
commit
ee4b59a419
7 changed files with 1312 additions and 1223 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
Fri Sep 11 07:52:43 2009 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* unicode.c (onigenc_unicode_property_name_to_ctype):
|
||||
ignore case of properties.
|
||||
|
||||
* tool/enc-unicode.rb: downcase properties list.
|
||||
|
||||
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
|
||||
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
|
||||
follow above.
|
||||
|
||||
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
|
||||
|
||||
* include/ruby/ruby.h (rb_data_type_t): Add comments.
|
||||
|
|
|
@ -2093,16 +2093,16 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
|
|||
|
||||
p = name;
|
||||
len = 0;
|
||||
while (p < end) {
|
||||
for (p = name; p < end; p += enclen(enc, p, end)) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code == ' ' || code == '-' || code == '_')
|
||||
continue;
|
||||
if (code >= 0x80)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
|
||||
buf[len++] = (UChar )code;
|
||||
buf[len++] = (UChar )TOLOWER((unsigned char)code);
|
||||
if (len >= PROPERTY_NAME_MAX_SIZE)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
|
||||
p += enclen(enc, p, end);
|
||||
}
|
||||
|
||||
buf[len] = 0;
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
|
|||
%}
|
||||
struct uniname2ctype_struct;
|
||||
%%
|
||||
NEWLINE, 0
|
||||
Alpha, 1
|
||||
Blank, 2
|
||||
Cntrl, 3
|
||||
Digit, 4
|
||||
Graph, 5
|
||||
Lower, 6
|
||||
Print, 7
|
||||
Punct, 8
|
||||
Space, 9
|
||||
Upper, 10
|
||||
XDigit, 11
|
||||
Word, 12
|
||||
Alnum, 13
|
||||
ASCII, 14
|
||||
newline, 0
|
||||
alpha, 1
|
||||
blank, 2
|
||||
cntrl, 3
|
||||
digit, 4
|
||||
graph, 5
|
||||
lower, 6
|
||||
print, 7
|
||||
punct, 8
|
||||
space, 9
|
||||
upper, 10
|
||||
xdigit, 11
|
||||
word, 12
|
||||
alnum, 13
|
||||
ascii, 14
|
||||
#ifdef USE_UNICODE_PROPERTIES
|
||||
Any, 15
|
||||
Assigned, 16
|
||||
C, 17
|
||||
Cc, 18
|
||||
Cf, 19
|
||||
Cn, 20
|
||||
Co, 21
|
||||
Cs, 22
|
||||
L, 23
|
||||
Ll, 24
|
||||
Lm, 25
|
||||
Lo, 26
|
||||
Lt, 27
|
||||
Lu, 28
|
||||
M, 29
|
||||
Mc, 30
|
||||
Me, 31
|
||||
Mn, 32
|
||||
N, 33
|
||||
Nd, 34
|
||||
Nl, 35
|
||||
No, 36
|
||||
P, 37
|
||||
Pc, 38
|
||||
Pd, 39
|
||||
Pe, 40
|
||||
Pf, 41
|
||||
Pi, 42
|
||||
Po, 43
|
||||
Ps, 44
|
||||
S, 45
|
||||
Sc, 46
|
||||
Sk, 47
|
||||
Sm, 48
|
||||
So, 49
|
||||
Z, 50
|
||||
Zl, 51
|
||||
Zp, 52
|
||||
Zs, 53
|
||||
Common, 54
|
||||
Latin, 55
|
||||
Greek, 56
|
||||
Cyrillic, 57
|
||||
Armenian, 58
|
||||
Hebrew, 59
|
||||
Arabic, 60
|
||||
Syriac, 61
|
||||
Thaana, 62
|
||||
Devanagari, 63
|
||||
Bengali, 64
|
||||
Gurmukhi, 65
|
||||
Gujarati, 66
|
||||
Oriya, 67
|
||||
Tamil, 68
|
||||
Telugu, 69
|
||||
Kannada, 70
|
||||
Malayalam, 71
|
||||
Sinhala, 72
|
||||
Thai, 73
|
||||
Lao, 74
|
||||
Tibetan, 75
|
||||
Myanmar, 76
|
||||
Georgian, 77
|
||||
Hangul, 78
|
||||
Ethiopic, 79
|
||||
Cherokee, 80
|
||||
Canadian_Aboriginal, 81
|
||||
Ogham, 82
|
||||
Runic, 83
|
||||
Khmer, 84
|
||||
Mongolian, 85
|
||||
Hiragana, 86
|
||||
Katakana, 87
|
||||
Bopomofo, 88
|
||||
Han, 89
|
||||
Yi, 90
|
||||
Old_Italic, 91
|
||||
Gothic, 92
|
||||
Deseret, 93
|
||||
Inherited, 94
|
||||
Tagalog, 95
|
||||
Hanunoo, 96
|
||||
Buhid, 97
|
||||
Tagbanwa, 98
|
||||
Limbu, 99
|
||||
Tai_Le, 100
|
||||
Linear_B, 101
|
||||
Ugaritic, 102
|
||||
Shavian, 103
|
||||
Osmanya, 104
|
||||
Cypriot, 105
|
||||
Braille, 106
|
||||
Buginese, 107
|
||||
Coptic, 108
|
||||
New_Tai_Lue, 109
|
||||
Glagolitic, 110
|
||||
Tifinagh, 111
|
||||
Syloti_Nagri, 112
|
||||
Old_Persian, 113
|
||||
Kharoshthi, 114
|
||||
Balinese, 115
|
||||
Cuneiform, 116
|
||||
Phoenician, 117
|
||||
Phags_Pa, 118
|
||||
Nko, 119
|
||||
Sundanese, 120
|
||||
Lepcha, 121
|
||||
Ol_Chiki, 122
|
||||
Vai, 123
|
||||
Saurashtra, 124
|
||||
Kayah_Li, 125
|
||||
Rejang, 126
|
||||
Lycian, 127
|
||||
Carian, 128
|
||||
Lydian, 129
|
||||
Cham, 130
|
||||
any, 15
|
||||
assigned, 16
|
||||
c, 17
|
||||
cc, 18
|
||||
cf, 19
|
||||
cn, 20
|
||||
co, 21
|
||||
cs, 22
|
||||
l, 23
|
||||
ll, 24
|
||||
lm, 25
|
||||
lo, 26
|
||||
lt, 27
|
||||
lu, 28
|
||||
m, 29
|
||||
mc, 30
|
||||
me, 31
|
||||
mn, 32
|
||||
n, 33
|
||||
nd, 34
|
||||
nl, 35
|
||||
no, 36
|
||||
p, 37
|
||||
pc, 38
|
||||
pd, 39
|
||||
pe, 40
|
||||
pf, 41
|
||||
pi, 42
|
||||
po, 43
|
||||
ps, 44
|
||||
s, 45
|
||||
sc, 46
|
||||
sk, 47
|
||||
sm, 48
|
||||
so, 49
|
||||
z, 50
|
||||
zl, 51
|
||||
zp, 52
|
||||
zs, 53
|
||||
common, 54
|
||||
latin, 55
|
||||
greek, 56
|
||||
cyrillic, 57
|
||||
armenian, 58
|
||||
hebrew, 59
|
||||
arabic, 60
|
||||
syriac, 61
|
||||
thaana, 62
|
||||
devanagari, 63
|
||||
bengali, 64
|
||||
gurmukhi, 65
|
||||
gujarati, 66
|
||||
oriya, 67
|
||||
tamil, 68
|
||||
telugu, 69
|
||||
kannada, 70
|
||||
malayalam, 71
|
||||
sinhala, 72
|
||||
thai, 73
|
||||
lao, 74
|
||||
tibetan, 75
|
||||
myanmar, 76
|
||||
georgian, 77
|
||||
hangul, 78
|
||||
ethiopic, 79
|
||||
cherokee, 80
|
||||
canadianaboriginal, 81
|
||||
ogham, 82
|
||||
runic, 83
|
||||
khmer, 84
|
||||
mongolian, 85
|
||||
hiragana, 86
|
||||
katakana, 87
|
||||
bopomofo, 88
|
||||
han, 89
|
||||
yi, 90
|
||||
olditalic, 91
|
||||
gothic, 92
|
||||
deseret, 93
|
||||
inherited, 94
|
||||
tagalog, 95
|
||||
hanunoo, 96
|
||||
buhid, 97
|
||||
tagbanwa, 98
|
||||
limbu, 99
|
||||
taile, 100
|
||||
linearb, 101
|
||||
ugaritic, 102
|
||||
shavian, 103
|
||||
osmanya, 104
|
||||
cypriot, 105
|
||||
braille, 106
|
||||
buginese, 107
|
||||
coptic, 108
|
||||
newtailue, 109
|
||||
glagolitic, 110
|
||||
tifinagh, 111
|
||||
sylotinagri, 112
|
||||
oldpersian, 113
|
||||
kharoshthi, 114
|
||||
balinese, 115
|
||||
cuneiform, 116
|
||||
phoenician, 117
|
||||
phagspa, 118
|
||||
nko, 119
|
||||
sundanese, 120
|
||||
lepcha, 121
|
||||
olchiki, 122
|
||||
vai, 123
|
||||
saurashtra, 124
|
||||
kayahli, 125
|
||||
rejang, 126
|
||||
lycian, 127
|
||||
carian, 128
|
||||
lydian, 129
|
||||
cham, 130
|
||||
#endif /* USE_UNICODE_PROPERTIES */
|
||||
%%
|
||||
static int
|
||||
|
|
|
@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
|
|||
%}
|
||||
struct uniname2ctype_struct;
|
||||
%%
|
||||
NEWLINE, 0
|
||||
Alpha, 1
|
||||
Blank, 2
|
||||
Cntrl, 3
|
||||
Digit, 4
|
||||
Graph, 5
|
||||
Lower, 6
|
||||
Print, 7
|
||||
Punct, 8
|
||||
Space, 9
|
||||
Upper, 10
|
||||
XDigit, 11
|
||||
Word, 12
|
||||
Alnum, 13
|
||||
ASCII, 14
|
||||
newline, 0
|
||||
alpha, 1
|
||||
blank, 2
|
||||
cntrl, 3
|
||||
digit, 4
|
||||
graph, 5
|
||||
lower, 6
|
||||
print, 7
|
||||
punct, 8
|
||||
space, 9
|
||||
upper, 10
|
||||
xdigit, 11
|
||||
word, 12
|
||||
alnum, 13
|
||||
ascii, 14
|
||||
#ifdef USE_UNICODE_PROPERTIES
|
||||
Any, 15
|
||||
Assigned, 16
|
||||
C, 17
|
||||
Cc, 18
|
||||
Cf, 19
|
||||
Cn, 20
|
||||
Co, 21
|
||||
Cs, 22
|
||||
L, 23
|
||||
Ll, 24
|
||||
Lm, 25
|
||||
Lo, 26
|
||||
Lt, 27
|
||||
Lu, 28
|
||||
M, 29
|
||||
Mc, 30
|
||||
Me, 31
|
||||
Mn, 32
|
||||
N, 33
|
||||
Nd, 34
|
||||
Nl, 35
|
||||
No, 36
|
||||
P, 37
|
||||
Pc, 38
|
||||
Pd, 39
|
||||
Pe, 40
|
||||
Pf, 41
|
||||
Pi, 42
|
||||
Po, 43
|
||||
Ps, 44
|
||||
S, 45
|
||||
Sc, 46
|
||||
Sk, 47
|
||||
Sm, 48
|
||||
So, 49
|
||||
Z, 50
|
||||
Zl, 51
|
||||
Zp, 52
|
||||
Zs, 53
|
||||
Common, 54
|
||||
Latin, 55
|
||||
Greek, 56
|
||||
Cyrillic, 57
|
||||
Armenian, 58
|
||||
Hebrew, 59
|
||||
Arabic, 60
|
||||
Syriac, 61
|
||||
Thaana, 62
|
||||
Devanagari, 63
|
||||
Bengali, 64
|
||||
Gurmukhi, 65
|
||||
Gujarati, 66
|
||||
Oriya, 67
|
||||
Tamil, 68
|
||||
Telugu, 69
|
||||
Kannada, 70
|
||||
Malayalam, 71
|
||||
Sinhala, 72
|
||||
Thai, 73
|
||||
Lao, 74
|
||||
Tibetan, 75
|
||||
Myanmar, 76
|
||||
Georgian, 77
|
||||
Hangul, 78
|
||||
Ethiopic, 79
|
||||
Cherokee, 80
|
||||
Canadian_Aboriginal, 81
|
||||
Ogham, 82
|
||||
Runic, 83
|
||||
Khmer, 84
|
||||
Mongolian, 85
|
||||
Hiragana, 86
|
||||
Katakana, 87
|
||||
Bopomofo, 88
|
||||
Han, 89
|
||||
Yi, 90
|
||||
Old_Italic, 91
|
||||
Gothic, 92
|
||||
Deseret, 93
|
||||
Inherited, 94
|
||||
Tagalog, 95
|
||||
Hanunoo, 96
|
||||
Buhid, 97
|
||||
Tagbanwa, 98
|
||||
Limbu, 99
|
||||
Tai_Le, 100
|
||||
Linear_B, 101
|
||||
Ugaritic, 102
|
||||
Shavian, 103
|
||||
Osmanya, 104
|
||||
Cypriot, 105
|
||||
Braille, 106
|
||||
Buginese, 107
|
||||
Coptic, 108
|
||||
New_Tai_Lue, 109
|
||||
Glagolitic, 110
|
||||
Tifinagh, 111
|
||||
Syloti_Nagri, 112
|
||||
Old_Persian, 113
|
||||
Kharoshthi, 114
|
||||
Balinese, 115
|
||||
Cuneiform, 116
|
||||
Phoenician, 117
|
||||
Phags_Pa, 118
|
||||
Nko, 119
|
||||
Sundanese, 120
|
||||
Lepcha, 121
|
||||
Ol_Chiki, 122
|
||||
Vai, 123
|
||||
Saurashtra, 124
|
||||
Kayah_Li, 125
|
||||
Rejang, 126
|
||||
Lycian, 127
|
||||
Carian, 128
|
||||
Lydian, 129
|
||||
Cham, 130
|
||||
any, 15
|
||||
assigned, 16
|
||||
c, 17
|
||||
cc, 18
|
||||
cf, 19
|
||||
cn, 20
|
||||
co, 21
|
||||
cs, 22
|
||||
l, 23
|
||||
ll, 24
|
||||
lm, 25
|
||||
lo, 26
|
||||
lt, 27
|
||||
lu, 28
|
||||
m, 29
|
||||
mc, 30
|
||||
me, 31
|
||||
mn, 32
|
||||
n, 33
|
||||
nd, 34
|
||||
nl, 35
|
||||
no, 36
|
||||
p, 37
|
||||
pc, 38
|
||||
pd, 39
|
||||
pe, 40
|
||||
pf, 41
|
||||
pi, 42
|
||||
po, 43
|
||||
ps, 44
|
||||
s, 45
|
||||
sc, 46
|
||||
sk, 47
|
||||
sm, 48
|
||||
so, 49
|
||||
z, 50
|
||||
zl, 51
|
||||
zp, 52
|
||||
zs, 53
|
||||
common, 54
|
||||
latin, 55
|
||||
greek, 56
|
||||
cyrillic, 57
|
||||
armenian, 58
|
||||
hebrew, 59
|
||||
arabic, 60
|
||||
syriac, 61
|
||||
thaana, 62
|
||||
devanagari, 63
|
||||
bengali, 64
|
||||
gurmukhi, 65
|
||||
gujarati, 66
|
||||
oriya, 67
|
||||
tamil, 68
|
||||
telugu, 69
|
||||
kannada, 70
|
||||
malayalam, 71
|
||||
sinhala, 72
|
||||
thai, 73
|
||||
lao, 74
|
||||
tibetan, 75
|
||||
myanmar, 76
|
||||
georgian, 77
|
||||
hangul, 78
|
||||
ethiopic, 79
|
||||
cherokee, 80
|
||||
canadianaboriginal, 81
|
||||
ogham, 82
|
||||
runic, 83
|
||||
khmer, 84
|
||||
mongolian, 85
|
||||
hiragana, 86
|
||||
katakana, 87
|
||||
bopomofo, 88
|
||||
han, 89
|
||||
yi, 90
|
||||
olditalic, 91
|
||||
gothic, 92
|
||||
deseret, 93
|
||||
inherited, 94
|
||||
tagalog, 95
|
||||
hanunoo, 96
|
||||
buhid, 97
|
||||
tagbanwa, 98
|
||||
limbu, 99
|
||||
taile, 100
|
||||
linearb, 101
|
||||
ugaritic, 102
|
||||
shavian, 103
|
||||
osmanya, 104
|
||||
cypriot, 105
|
||||
braille, 106
|
||||
buginese, 107
|
||||
coptic, 108
|
||||
newtailue, 109
|
||||
glagolitic, 110
|
||||
tifinagh, 111
|
||||
sylotinagri, 112
|
||||
oldpersian, 113
|
||||
kharoshthi, 114
|
||||
balinese, 115
|
||||
cuneiform, 116
|
||||
phoenician, 117
|
||||
phagspa, 118
|
||||
nko, 119
|
||||
sundanese, 120
|
||||
lepcha, 121
|
||||
olchiki, 122
|
||||
vai, 123
|
||||
saurashtra, 124
|
||||
kayahli, 125
|
||||
rejang, 126
|
||||
lycian, 127
|
||||
carian, 128
|
||||
lydian, 129
|
||||
cham, 130
|
||||
#endif /* USE_UNICODE_PROPERTIES */
|
||||
%%
|
||||
static int
|
||||
|
|
|
@ -176,6 +176,12 @@ def make_const(prop, pairs, name)
|
|||
puts "}; /* CR_#{prop} */"
|
||||
end
|
||||
|
||||
def normalize_propname(name)
|
||||
name = name.downcase
|
||||
name.gsub!(/[- _]/, '')
|
||||
name
|
||||
end
|
||||
|
||||
puts '%{'
|
||||
gcps, data = parse_unicode_data(ARGV[0])
|
||||
POSIX_NAMES.each do |name|
|
||||
|
@ -213,10 +219,10 @@ struct uniname2ctype_struct;
|
|||
%%
|
||||
__HEREDOC
|
||||
i = -1
|
||||
POSIX_NAMES.each {|name|puts"%-21s %3d"%[name+',', i+=1]}
|
||||
POSIX_NAMES.each {|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||
puts "#ifdef USE_UNICODE_PROPERTIES"
|
||||
gcps.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
|
||||
scripts.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
|
||||
gcps.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||
scripts.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||
puts "#endif /* USE_UNICODE_PROPERTIES */\n"
|
||||
puts(<<'__HEREDOC')
|
||||
%%
|
||||
|
|
Loading…
Reference in a new issue