mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* unicode.c (onigenc_unicode_property_name_to_ctype):
ignore case of properties. * tool/enc-unicode.rb: downcase properties list. * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt, enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src: follow above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24836 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
e519c9d960
commit
ee4b59a419
7 changed files with 1312 additions and 1223 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
||||||
|
Fri Sep 11 07:52:43 2009 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* unicode.c (onigenc_unicode_property_name_to_ctype):
|
||||||
|
ignore case of properties.
|
||||||
|
|
||||||
|
* tool/enc-unicode.rb: downcase properties list.
|
||||||
|
|
||||||
|
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
|
||||||
|
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
|
||||||
|
follow above.
|
||||||
|
|
||||||
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
|
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
|
||||||
|
|
||||||
* include/ruby/ruby.h (rb_data_type_t): Add comments.
|
* include/ruby/ruby.h (rb_data_type_t): Add comments.
|
||||||
|
|
|
@ -2093,16 +2093,16 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
|
||||||
|
|
||||||
p = name;
|
p = name;
|
||||||
len = 0;
|
len = 0;
|
||||||
while (p < end) {
|
for (p = name; p < end; p += enclen(enc, p, end)) {
|
||||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||||
|
if (code == ' ' || code == '-' || code == '_')
|
||||||
|
continue;
|
||||||
if (code >= 0x80)
|
if (code >= 0x80)
|
||||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||||
|
|
||||||
buf[len++] = (UChar )code;
|
buf[len++] = (UChar )TOLOWER((unsigned char)code);
|
||||||
if (len >= PROPERTY_NAME_MAX_SIZE)
|
if (len >= PROPERTY_NAME_MAX_SIZE)
|
||||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||||
|
|
||||||
p += enclen(enc, p, end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf[len] = 0;
|
buf[len] = 0;
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
|
||||||
%}
|
%}
|
||||||
struct uniname2ctype_struct;
|
struct uniname2ctype_struct;
|
||||||
%%
|
%%
|
||||||
NEWLINE, 0
|
newline, 0
|
||||||
Alpha, 1
|
alpha, 1
|
||||||
Blank, 2
|
blank, 2
|
||||||
Cntrl, 3
|
cntrl, 3
|
||||||
Digit, 4
|
digit, 4
|
||||||
Graph, 5
|
graph, 5
|
||||||
Lower, 6
|
lower, 6
|
||||||
Print, 7
|
print, 7
|
||||||
Punct, 8
|
punct, 8
|
||||||
Space, 9
|
space, 9
|
||||||
Upper, 10
|
upper, 10
|
||||||
XDigit, 11
|
xdigit, 11
|
||||||
Word, 12
|
word, 12
|
||||||
Alnum, 13
|
alnum, 13
|
||||||
ASCII, 14
|
ascii, 14
|
||||||
#ifdef USE_UNICODE_PROPERTIES
|
#ifdef USE_UNICODE_PROPERTIES
|
||||||
Any, 15
|
any, 15
|
||||||
Assigned, 16
|
assigned, 16
|
||||||
C, 17
|
c, 17
|
||||||
Cc, 18
|
cc, 18
|
||||||
Cf, 19
|
cf, 19
|
||||||
Cn, 20
|
cn, 20
|
||||||
Co, 21
|
co, 21
|
||||||
Cs, 22
|
cs, 22
|
||||||
L, 23
|
l, 23
|
||||||
Ll, 24
|
ll, 24
|
||||||
Lm, 25
|
lm, 25
|
||||||
Lo, 26
|
lo, 26
|
||||||
Lt, 27
|
lt, 27
|
||||||
Lu, 28
|
lu, 28
|
||||||
M, 29
|
m, 29
|
||||||
Mc, 30
|
mc, 30
|
||||||
Me, 31
|
me, 31
|
||||||
Mn, 32
|
mn, 32
|
||||||
N, 33
|
n, 33
|
||||||
Nd, 34
|
nd, 34
|
||||||
Nl, 35
|
nl, 35
|
||||||
No, 36
|
no, 36
|
||||||
P, 37
|
p, 37
|
||||||
Pc, 38
|
pc, 38
|
||||||
Pd, 39
|
pd, 39
|
||||||
Pe, 40
|
pe, 40
|
||||||
Pf, 41
|
pf, 41
|
||||||
Pi, 42
|
pi, 42
|
||||||
Po, 43
|
po, 43
|
||||||
Ps, 44
|
ps, 44
|
||||||
S, 45
|
s, 45
|
||||||
Sc, 46
|
sc, 46
|
||||||
Sk, 47
|
sk, 47
|
||||||
Sm, 48
|
sm, 48
|
||||||
So, 49
|
so, 49
|
||||||
Z, 50
|
z, 50
|
||||||
Zl, 51
|
zl, 51
|
||||||
Zp, 52
|
zp, 52
|
||||||
Zs, 53
|
zs, 53
|
||||||
Common, 54
|
common, 54
|
||||||
Latin, 55
|
latin, 55
|
||||||
Greek, 56
|
greek, 56
|
||||||
Cyrillic, 57
|
cyrillic, 57
|
||||||
Armenian, 58
|
armenian, 58
|
||||||
Hebrew, 59
|
hebrew, 59
|
||||||
Arabic, 60
|
arabic, 60
|
||||||
Syriac, 61
|
syriac, 61
|
||||||
Thaana, 62
|
thaana, 62
|
||||||
Devanagari, 63
|
devanagari, 63
|
||||||
Bengali, 64
|
bengali, 64
|
||||||
Gurmukhi, 65
|
gurmukhi, 65
|
||||||
Gujarati, 66
|
gujarati, 66
|
||||||
Oriya, 67
|
oriya, 67
|
||||||
Tamil, 68
|
tamil, 68
|
||||||
Telugu, 69
|
telugu, 69
|
||||||
Kannada, 70
|
kannada, 70
|
||||||
Malayalam, 71
|
malayalam, 71
|
||||||
Sinhala, 72
|
sinhala, 72
|
||||||
Thai, 73
|
thai, 73
|
||||||
Lao, 74
|
lao, 74
|
||||||
Tibetan, 75
|
tibetan, 75
|
||||||
Myanmar, 76
|
myanmar, 76
|
||||||
Georgian, 77
|
georgian, 77
|
||||||
Hangul, 78
|
hangul, 78
|
||||||
Ethiopic, 79
|
ethiopic, 79
|
||||||
Cherokee, 80
|
cherokee, 80
|
||||||
Canadian_Aboriginal, 81
|
canadianaboriginal, 81
|
||||||
Ogham, 82
|
ogham, 82
|
||||||
Runic, 83
|
runic, 83
|
||||||
Khmer, 84
|
khmer, 84
|
||||||
Mongolian, 85
|
mongolian, 85
|
||||||
Hiragana, 86
|
hiragana, 86
|
||||||
Katakana, 87
|
katakana, 87
|
||||||
Bopomofo, 88
|
bopomofo, 88
|
||||||
Han, 89
|
han, 89
|
||||||
Yi, 90
|
yi, 90
|
||||||
Old_Italic, 91
|
olditalic, 91
|
||||||
Gothic, 92
|
gothic, 92
|
||||||
Deseret, 93
|
deseret, 93
|
||||||
Inherited, 94
|
inherited, 94
|
||||||
Tagalog, 95
|
tagalog, 95
|
||||||
Hanunoo, 96
|
hanunoo, 96
|
||||||
Buhid, 97
|
buhid, 97
|
||||||
Tagbanwa, 98
|
tagbanwa, 98
|
||||||
Limbu, 99
|
limbu, 99
|
||||||
Tai_Le, 100
|
taile, 100
|
||||||
Linear_B, 101
|
linearb, 101
|
||||||
Ugaritic, 102
|
ugaritic, 102
|
||||||
Shavian, 103
|
shavian, 103
|
||||||
Osmanya, 104
|
osmanya, 104
|
||||||
Cypriot, 105
|
cypriot, 105
|
||||||
Braille, 106
|
braille, 106
|
||||||
Buginese, 107
|
buginese, 107
|
||||||
Coptic, 108
|
coptic, 108
|
||||||
New_Tai_Lue, 109
|
newtailue, 109
|
||||||
Glagolitic, 110
|
glagolitic, 110
|
||||||
Tifinagh, 111
|
tifinagh, 111
|
||||||
Syloti_Nagri, 112
|
sylotinagri, 112
|
||||||
Old_Persian, 113
|
oldpersian, 113
|
||||||
Kharoshthi, 114
|
kharoshthi, 114
|
||||||
Balinese, 115
|
balinese, 115
|
||||||
Cuneiform, 116
|
cuneiform, 116
|
||||||
Phoenician, 117
|
phoenician, 117
|
||||||
Phags_Pa, 118
|
phagspa, 118
|
||||||
Nko, 119
|
nko, 119
|
||||||
Sundanese, 120
|
sundanese, 120
|
||||||
Lepcha, 121
|
lepcha, 121
|
||||||
Ol_Chiki, 122
|
olchiki, 122
|
||||||
Vai, 123
|
vai, 123
|
||||||
Saurashtra, 124
|
saurashtra, 124
|
||||||
Kayah_Li, 125
|
kayahli, 125
|
||||||
Rejang, 126
|
rejang, 126
|
||||||
Lycian, 127
|
lycian, 127
|
||||||
Carian, 128
|
carian, 128
|
||||||
Lydian, 129
|
lydian, 129
|
||||||
Cham, 130
|
cham, 130
|
||||||
#endif /* USE_UNICODE_PROPERTIES */
|
#endif /* USE_UNICODE_PROPERTIES */
|
||||||
%%
|
%%
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
|
||||||
%}
|
%}
|
||||||
struct uniname2ctype_struct;
|
struct uniname2ctype_struct;
|
||||||
%%
|
%%
|
||||||
NEWLINE, 0
|
newline, 0
|
||||||
Alpha, 1
|
alpha, 1
|
||||||
Blank, 2
|
blank, 2
|
||||||
Cntrl, 3
|
cntrl, 3
|
||||||
Digit, 4
|
digit, 4
|
||||||
Graph, 5
|
graph, 5
|
||||||
Lower, 6
|
lower, 6
|
||||||
Print, 7
|
print, 7
|
||||||
Punct, 8
|
punct, 8
|
||||||
Space, 9
|
space, 9
|
||||||
Upper, 10
|
upper, 10
|
||||||
XDigit, 11
|
xdigit, 11
|
||||||
Word, 12
|
word, 12
|
||||||
Alnum, 13
|
alnum, 13
|
||||||
ASCII, 14
|
ascii, 14
|
||||||
#ifdef USE_UNICODE_PROPERTIES
|
#ifdef USE_UNICODE_PROPERTIES
|
||||||
Any, 15
|
any, 15
|
||||||
Assigned, 16
|
assigned, 16
|
||||||
C, 17
|
c, 17
|
||||||
Cc, 18
|
cc, 18
|
||||||
Cf, 19
|
cf, 19
|
||||||
Cn, 20
|
cn, 20
|
||||||
Co, 21
|
co, 21
|
||||||
Cs, 22
|
cs, 22
|
||||||
L, 23
|
l, 23
|
||||||
Ll, 24
|
ll, 24
|
||||||
Lm, 25
|
lm, 25
|
||||||
Lo, 26
|
lo, 26
|
||||||
Lt, 27
|
lt, 27
|
||||||
Lu, 28
|
lu, 28
|
||||||
M, 29
|
m, 29
|
||||||
Mc, 30
|
mc, 30
|
||||||
Me, 31
|
me, 31
|
||||||
Mn, 32
|
mn, 32
|
||||||
N, 33
|
n, 33
|
||||||
Nd, 34
|
nd, 34
|
||||||
Nl, 35
|
nl, 35
|
||||||
No, 36
|
no, 36
|
||||||
P, 37
|
p, 37
|
||||||
Pc, 38
|
pc, 38
|
||||||
Pd, 39
|
pd, 39
|
||||||
Pe, 40
|
pe, 40
|
||||||
Pf, 41
|
pf, 41
|
||||||
Pi, 42
|
pi, 42
|
||||||
Po, 43
|
po, 43
|
||||||
Ps, 44
|
ps, 44
|
||||||
S, 45
|
s, 45
|
||||||
Sc, 46
|
sc, 46
|
||||||
Sk, 47
|
sk, 47
|
||||||
Sm, 48
|
sm, 48
|
||||||
So, 49
|
so, 49
|
||||||
Z, 50
|
z, 50
|
||||||
Zl, 51
|
zl, 51
|
||||||
Zp, 52
|
zp, 52
|
||||||
Zs, 53
|
zs, 53
|
||||||
Common, 54
|
common, 54
|
||||||
Latin, 55
|
latin, 55
|
||||||
Greek, 56
|
greek, 56
|
||||||
Cyrillic, 57
|
cyrillic, 57
|
||||||
Armenian, 58
|
armenian, 58
|
||||||
Hebrew, 59
|
hebrew, 59
|
||||||
Arabic, 60
|
arabic, 60
|
||||||
Syriac, 61
|
syriac, 61
|
||||||
Thaana, 62
|
thaana, 62
|
||||||
Devanagari, 63
|
devanagari, 63
|
||||||
Bengali, 64
|
bengali, 64
|
||||||
Gurmukhi, 65
|
gurmukhi, 65
|
||||||
Gujarati, 66
|
gujarati, 66
|
||||||
Oriya, 67
|
oriya, 67
|
||||||
Tamil, 68
|
tamil, 68
|
||||||
Telugu, 69
|
telugu, 69
|
||||||
Kannada, 70
|
kannada, 70
|
||||||
Malayalam, 71
|
malayalam, 71
|
||||||
Sinhala, 72
|
sinhala, 72
|
||||||
Thai, 73
|
thai, 73
|
||||||
Lao, 74
|
lao, 74
|
||||||
Tibetan, 75
|
tibetan, 75
|
||||||
Myanmar, 76
|
myanmar, 76
|
||||||
Georgian, 77
|
georgian, 77
|
||||||
Hangul, 78
|
hangul, 78
|
||||||
Ethiopic, 79
|
ethiopic, 79
|
||||||
Cherokee, 80
|
cherokee, 80
|
||||||
Canadian_Aboriginal, 81
|
canadianaboriginal, 81
|
||||||
Ogham, 82
|
ogham, 82
|
||||||
Runic, 83
|
runic, 83
|
||||||
Khmer, 84
|
khmer, 84
|
||||||
Mongolian, 85
|
mongolian, 85
|
||||||
Hiragana, 86
|
hiragana, 86
|
||||||
Katakana, 87
|
katakana, 87
|
||||||
Bopomofo, 88
|
bopomofo, 88
|
||||||
Han, 89
|
han, 89
|
||||||
Yi, 90
|
yi, 90
|
||||||
Old_Italic, 91
|
olditalic, 91
|
||||||
Gothic, 92
|
gothic, 92
|
||||||
Deseret, 93
|
deseret, 93
|
||||||
Inherited, 94
|
inherited, 94
|
||||||
Tagalog, 95
|
tagalog, 95
|
||||||
Hanunoo, 96
|
hanunoo, 96
|
||||||
Buhid, 97
|
buhid, 97
|
||||||
Tagbanwa, 98
|
tagbanwa, 98
|
||||||
Limbu, 99
|
limbu, 99
|
||||||
Tai_Le, 100
|
taile, 100
|
||||||
Linear_B, 101
|
linearb, 101
|
||||||
Ugaritic, 102
|
ugaritic, 102
|
||||||
Shavian, 103
|
shavian, 103
|
||||||
Osmanya, 104
|
osmanya, 104
|
||||||
Cypriot, 105
|
cypriot, 105
|
||||||
Braille, 106
|
braille, 106
|
||||||
Buginese, 107
|
buginese, 107
|
||||||
Coptic, 108
|
coptic, 108
|
||||||
New_Tai_Lue, 109
|
newtailue, 109
|
||||||
Glagolitic, 110
|
glagolitic, 110
|
||||||
Tifinagh, 111
|
tifinagh, 111
|
||||||
Syloti_Nagri, 112
|
sylotinagri, 112
|
||||||
Old_Persian, 113
|
oldpersian, 113
|
||||||
Kharoshthi, 114
|
kharoshthi, 114
|
||||||
Balinese, 115
|
balinese, 115
|
||||||
Cuneiform, 116
|
cuneiform, 116
|
||||||
Phoenician, 117
|
phoenician, 117
|
||||||
Phags_Pa, 118
|
phagspa, 118
|
||||||
Nko, 119
|
nko, 119
|
||||||
Sundanese, 120
|
sundanese, 120
|
||||||
Lepcha, 121
|
lepcha, 121
|
||||||
Ol_Chiki, 122
|
olchiki, 122
|
||||||
Vai, 123
|
vai, 123
|
||||||
Saurashtra, 124
|
saurashtra, 124
|
||||||
Kayah_Li, 125
|
kayahli, 125
|
||||||
Rejang, 126
|
rejang, 126
|
||||||
Lycian, 127
|
lycian, 127
|
||||||
Carian, 128
|
carian, 128
|
||||||
Lydian, 129
|
lydian, 129
|
||||||
Cham, 130
|
cham, 130
|
||||||
#endif /* USE_UNICODE_PROPERTIES */
|
#endif /* USE_UNICODE_PROPERTIES */
|
||||||
%%
|
%%
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -176,6 +176,12 @@ def make_const(prop, pairs, name)
|
||||||
puts "}; /* CR_#{prop} */"
|
puts "}; /* CR_#{prop} */"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def normalize_propname(name)
|
||||||
|
name = name.downcase
|
||||||
|
name.gsub!(/[- _]/, '')
|
||||||
|
name
|
||||||
|
end
|
||||||
|
|
||||||
puts '%{'
|
puts '%{'
|
||||||
gcps, data = parse_unicode_data(ARGV[0])
|
gcps, data = parse_unicode_data(ARGV[0])
|
||||||
POSIX_NAMES.each do |name|
|
POSIX_NAMES.each do |name|
|
||||||
|
@ -213,10 +219,10 @@ struct uniname2ctype_struct;
|
||||||
%%
|
%%
|
||||||
__HEREDOC
|
__HEREDOC
|
||||||
i = -1
|
i = -1
|
||||||
POSIX_NAMES.each {|name|puts"%-21s %3d"%[name+',', i+=1]}
|
POSIX_NAMES.each {|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||||
puts "#ifdef USE_UNICODE_PROPERTIES"
|
puts "#ifdef USE_UNICODE_PROPERTIES"
|
||||||
gcps.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
|
gcps.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||||
scripts.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
|
scripts.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
|
||||||
puts "#endif /* USE_UNICODE_PROPERTIES */\n"
|
puts "#endif /* USE_UNICODE_PROPERTIES */\n"
|
||||||
puts(<<'__HEREDOC')
|
puts(<<'__HEREDOC')
|
||||||
%%
|
%%
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue