1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* unicode.c (onigenc_unicode_property_name_to_ctype):

ignore case of properties.

* tool/enc-unicode.rb: downcase properties list.

* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  follow above.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24836 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-09-10 22:54:01 +00:00
parent e519c9d960
commit ee4b59a419
7 changed files with 1312 additions and 1223 deletions

View file

@ -1,3 +1,14 @@
Fri Sep 11 07:52:43 2009 NARUSE, Yui <naruse@ruby-lang.org>
* unicode.c (onigenc_unicode_property_name_to_ctype):
ignore case of properties.
* tool/enc-unicode.rb: downcase properties list.
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
follow above.
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
* include/ruby/ruby.h (rb_data_type_t): Add comments.

View file

@ -2093,16 +2093,16 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
p = name;
len = 0;
while (p < end) {
for (p = name; p < end; p += enclen(enc, p, end)) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code == ' ' || code == '-' || code == '_')
continue;
if (code >= 0x80)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
buf[len++] = (UChar )code;
buf[len++] = (UChar )TOLOWER((unsigned char)code);
if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
p += enclen(enc, p, end);
}
buf[len] = 0;

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%}
struct uniname2ctype_struct;
%%
NEWLINE, 0
Alpha, 1
Blank, 2
Cntrl, 3
Digit, 4
Graph, 5
Lower, 6
Print, 7
Punct, 8
Space, 9
Upper, 10
XDigit, 11
Word, 12
Alnum, 13
ASCII, 14
newline, 0
alpha, 1
blank, 2
cntrl, 3
digit, 4
graph, 5
lower, 6
print, 7
punct, 8
space, 9
upper, 10
xdigit, 11
word, 12
alnum, 13
ascii, 14
#ifdef USE_UNICODE_PROPERTIES
Any, 15
Assigned, 16
C, 17
Cc, 18
Cf, 19
Cn, 20
Co, 21
Cs, 22
L, 23
Ll, 24
Lm, 25
Lo, 26
Lt, 27
Lu, 28
M, 29
Mc, 30
Me, 31
Mn, 32
N, 33
Nd, 34
Nl, 35
No, 36
P, 37
Pc, 38
Pd, 39
Pe, 40
Pf, 41
Pi, 42
Po, 43
Ps, 44
S, 45
Sc, 46
Sk, 47
Sm, 48
So, 49
Z, 50
Zl, 51
Zp, 52
Zs, 53
Common, 54
Latin, 55
Greek, 56
Cyrillic, 57
Armenian, 58
Hebrew, 59
Arabic, 60
Syriac, 61
Thaana, 62
Devanagari, 63
Bengali, 64
Gurmukhi, 65
Gujarati, 66
Oriya, 67
Tamil, 68
Telugu, 69
Kannada, 70
Malayalam, 71
Sinhala, 72
Thai, 73
Lao, 74
Tibetan, 75
Myanmar, 76
Georgian, 77
Hangul, 78
Ethiopic, 79
Cherokee, 80
Canadian_Aboriginal, 81
Ogham, 82
Runic, 83
Khmer, 84
Mongolian, 85
Hiragana, 86
Katakana, 87
Bopomofo, 88
Han, 89
Yi, 90
Old_Italic, 91
Gothic, 92
Deseret, 93
Inherited, 94
Tagalog, 95
Hanunoo, 96
Buhid, 97
Tagbanwa, 98
Limbu, 99
Tai_Le, 100
Linear_B, 101
Ugaritic, 102
Shavian, 103
Osmanya, 104
Cypriot, 105
Braille, 106
Buginese, 107
Coptic, 108
New_Tai_Lue, 109
Glagolitic, 110
Tifinagh, 111
Syloti_Nagri, 112
Old_Persian, 113
Kharoshthi, 114
Balinese, 115
Cuneiform, 116
Phoenician, 117
Phags_Pa, 118
Nko, 119
Sundanese, 120
Lepcha, 121
Ol_Chiki, 122
Vai, 123
Saurashtra, 124
Kayah_Li, 125
Rejang, 126
Lycian, 127
Carian, 128
Lydian, 129
Cham, 130
any, 15
assigned, 16
c, 17
cc, 18
cf, 19
cn, 20
co, 21
cs, 22
l, 23
ll, 24
lm, 25
lo, 26
lt, 27
lu, 28
m, 29
mc, 30
me, 31
mn, 32
n, 33
nd, 34
nl, 35
no, 36
p, 37
pc, 38
pd, 39
pe, 40
pf, 41
pi, 42
po, 43
ps, 44
s, 45
sc, 46
sk, 47
sm, 48
so, 49
z, 50
zl, 51
zp, 52
zs, 53
common, 54
latin, 55
greek, 56
cyrillic, 57
armenian, 58
hebrew, 59
arabic, 60
syriac, 61
thaana, 62
devanagari, 63
bengali, 64
gurmukhi, 65
gujarati, 66
oriya, 67
tamil, 68
telugu, 69
kannada, 70
malayalam, 71
sinhala, 72
thai, 73
lao, 74
tibetan, 75
myanmar, 76
georgian, 77
hangul, 78
ethiopic, 79
cherokee, 80
canadianaboriginal, 81
ogham, 82
runic, 83
khmer, 84
mongolian, 85
hiragana, 86
katakana, 87
bopomofo, 88
han, 89
yi, 90
olditalic, 91
gothic, 92
deseret, 93
inherited, 94
tagalog, 95
hanunoo, 96
buhid, 97
tagbanwa, 98
limbu, 99
taile, 100
linearb, 101
ugaritic, 102
shavian, 103
osmanya, 104
cypriot, 105
braille, 106
buginese, 107
coptic, 108
newtailue, 109
glagolitic, 110
tifinagh, 111
sylotinagri, 112
oldpersian, 113
kharoshthi, 114
balinese, 115
cuneiform, 116
phoenician, 117
phagspa, 118
nko, 119
sundanese, 120
lepcha, 121
olchiki, 122
vai, 123
saurashtra, 124
kayahli, 125
rejang, 126
lycian, 127
carian, 128
lydian, 129
cham, 130
#endif /* USE_UNICODE_PROPERTIES */
%%
static int

View file

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%}
struct uniname2ctype_struct;
%%
NEWLINE, 0
Alpha, 1
Blank, 2
Cntrl, 3
Digit, 4
Graph, 5
Lower, 6
Print, 7
Punct, 8
Space, 9
Upper, 10
XDigit, 11
Word, 12
Alnum, 13
ASCII, 14
newline, 0
alpha, 1
blank, 2
cntrl, 3
digit, 4
graph, 5
lower, 6
print, 7
punct, 8
space, 9
upper, 10
xdigit, 11
word, 12
alnum, 13
ascii, 14
#ifdef USE_UNICODE_PROPERTIES
Any, 15
Assigned, 16
C, 17
Cc, 18
Cf, 19
Cn, 20
Co, 21
Cs, 22
L, 23
Ll, 24
Lm, 25
Lo, 26
Lt, 27
Lu, 28
M, 29
Mc, 30
Me, 31
Mn, 32
N, 33
Nd, 34
Nl, 35
No, 36
P, 37
Pc, 38
Pd, 39
Pe, 40
Pf, 41
Pi, 42
Po, 43
Ps, 44
S, 45
Sc, 46
Sk, 47
Sm, 48
So, 49
Z, 50
Zl, 51
Zp, 52
Zs, 53
Common, 54
Latin, 55
Greek, 56
Cyrillic, 57
Armenian, 58
Hebrew, 59
Arabic, 60
Syriac, 61
Thaana, 62
Devanagari, 63
Bengali, 64
Gurmukhi, 65
Gujarati, 66
Oriya, 67
Tamil, 68
Telugu, 69
Kannada, 70
Malayalam, 71
Sinhala, 72
Thai, 73
Lao, 74
Tibetan, 75
Myanmar, 76
Georgian, 77
Hangul, 78
Ethiopic, 79
Cherokee, 80
Canadian_Aboriginal, 81
Ogham, 82
Runic, 83
Khmer, 84
Mongolian, 85
Hiragana, 86
Katakana, 87
Bopomofo, 88
Han, 89
Yi, 90
Old_Italic, 91
Gothic, 92
Deseret, 93
Inherited, 94
Tagalog, 95
Hanunoo, 96
Buhid, 97
Tagbanwa, 98
Limbu, 99
Tai_Le, 100
Linear_B, 101
Ugaritic, 102
Shavian, 103
Osmanya, 104
Cypriot, 105
Braille, 106
Buginese, 107
Coptic, 108
New_Tai_Lue, 109
Glagolitic, 110
Tifinagh, 111
Syloti_Nagri, 112
Old_Persian, 113
Kharoshthi, 114
Balinese, 115
Cuneiform, 116
Phoenician, 117
Phags_Pa, 118
Nko, 119
Sundanese, 120
Lepcha, 121
Ol_Chiki, 122
Vai, 123
Saurashtra, 124
Kayah_Li, 125
Rejang, 126
Lycian, 127
Carian, 128
Lydian, 129
Cham, 130
any, 15
assigned, 16
c, 17
cc, 18
cf, 19
cn, 20
co, 21
cs, 22
l, 23
ll, 24
lm, 25
lo, 26
lt, 27
lu, 28
m, 29
mc, 30
me, 31
mn, 32
n, 33
nd, 34
nl, 35
no, 36
p, 37
pc, 38
pd, 39
pe, 40
pf, 41
pi, 42
po, 43
ps, 44
s, 45
sc, 46
sk, 47
sm, 48
so, 49
z, 50
zl, 51
zp, 52
zs, 53
common, 54
latin, 55
greek, 56
cyrillic, 57
armenian, 58
hebrew, 59
arabic, 60
syriac, 61
thaana, 62
devanagari, 63
bengali, 64
gurmukhi, 65
gujarati, 66
oriya, 67
tamil, 68
telugu, 69
kannada, 70
malayalam, 71
sinhala, 72
thai, 73
lao, 74
tibetan, 75
myanmar, 76
georgian, 77
hangul, 78
ethiopic, 79
cherokee, 80
canadianaboriginal, 81
ogham, 82
runic, 83
khmer, 84
mongolian, 85
hiragana, 86
katakana, 87
bopomofo, 88
han, 89
yi, 90
olditalic, 91
gothic, 92
deseret, 93
inherited, 94
tagalog, 95
hanunoo, 96
buhid, 97
tagbanwa, 98
limbu, 99
taile, 100
linearb, 101
ugaritic, 102
shavian, 103
osmanya, 104
cypriot, 105
braille, 106
buginese, 107
coptic, 108
newtailue, 109
glagolitic, 110
tifinagh, 111
sylotinagri, 112
oldpersian, 113
kharoshthi, 114
balinese, 115
cuneiform, 116
phoenician, 117
phagspa, 118
nko, 119
sundanese, 120
lepcha, 121
olchiki, 122
vai, 123
saurashtra, 124
kayahli, 125
rejang, 126
lycian, 127
carian, 128
lydian, 129
cham, 130
#endif /* USE_UNICODE_PROPERTIES */
%%
static int

View file

@ -176,6 +176,12 @@ def make_const(prop, pairs, name)
puts "}; /* CR_#{prop} */"
end
def normalize_propname(name)
name = name.downcase
name.gsub!(/[- _]/, '')
name
end
puts '%{'
gcps, data = parse_unicode_data(ARGV[0])
POSIX_NAMES.each do |name|
@ -213,10 +219,10 @@ struct uniname2ctype_struct;
%%
__HEREDOC
i = -1
POSIX_NAMES.each {|name|puts"%-21s %3d"%[name+',', i+=1]}
POSIX_NAMES.each {|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#ifdef USE_UNICODE_PROPERTIES"
gcps.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
scripts.each{|name|puts"%-21s %3d"%[name+',', i+=1]}
gcps.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
scripts.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#endif /* USE_UNICODE_PROPERTIES */\n"
puts(<<'__HEREDOC')
%%