1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* unicode.c (onigenc_unicode_property_name_to_ctype):

ignore case of properties.

* tool/enc-unicode.rb: downcase properties list.

* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  follow above.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24836 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2009-09-10 22:54:01 +00:00
parent e519c9d960
commit ee4b59a419
7 changed files with 1312 additions and 1223 deletions

View file

@ -1,3 +1,14 @@
Fri Sep 11 07:52:43 2009 NARUSE, Yui <naruse@ruby-lang.org>
* unicode.c (onigenc_unicode_property_name_to_ctype):
ignore case of properties.
* tool/enc-unicode.rb: downcase properties list.
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
follow above.
Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net> Fri Sep 11 05:00:19 2009 Koichi Sasada <ko1@atdot.net>
* include/ruby/ruby.h (rb_data_type_t): Add comments. * include/ruby/ruby.h (rb_data_type_t): Add comments.

View file

@ -2093,16 +2093,16 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
p = name; p = name;
len = 0; len = 0;
while (p < end) { for (p = name; p < end; p += enclen(enc, p, end)) {
code = ONIGENC_MBC_TO_CODE(enc, p, end); code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code == ' ' || code == '-' || code == '_')
continue;
if (code >= 0x80) if (code >= 0x80)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME; return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
buf[len++] = (UChar )code; buf[len++] = (UChar )TOLOWER((unsigned char)code);
if (len >= PROPERTY_NAME_MAX_SIZE) if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME; return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
p += enclen(enc, p, end);
} }
buf[len] = 0; buf[len] = 0;

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%} %}
struct uniname2ctype_struct; struct uniname2ctype_struct;
%% %%
NEWLINE, 0 newline, 0
Alpha, 1 alpha, 1
Blank, 2 blank, 2
Cntrl, 3 cntrl, 3
Digit, 4 digit, 4
Graph, 5 graph, 5
Lower, 6 lower, 6
Print, 7 print, 7
Punct, 8 punct, 8
Space, 9 space, 9
Upper, 10 upper, 10
XDigit, 11 xdigit, 11
Word, 12 word, 12
Alnum, 13 alnum, 13
ASCII, 14 ascii, 14
#ifdef USE_UNICODE_PROPERTIES #ifdef USE_UNICODE_PROPERTIES
Any, 15 any, 15
Assigned, 16 assigned, 16
C, 17 c, 17
Cc, 18 cc, 18
Cf, 19 cf, 19
Cn, 20 cn, 20
Co, 21 co, 21
Cs, 22 cs, 22
L, 23 l, 23
Ll, 24 ll, 24
Lm, 25 lm, 25
Lo, 26 lo, 26
Lt, 27 lt, 27
Lu, 28 lu, 28
M, 29 m, 29
Mc, 30 mc, 30
Me, 31 me, 31
Mn, 32 mn, 32
N, 33 n, 33
Nd, 34 nd, 34
Nl, 35 nl, 35
No, 36 no, 36
P, 37 p, 37
Pc, 38 pc, 38
Pd, 39 pd, 39
Pe, 40 pe, 40
Pf, 41 pf, 41
Pi, 42 pi, 42
Po, 43 po, 43
Ps, 44 ps, 44
S, 45 s, 45
Sc, 46 sc, 46
Sk, 47 sk, 47
Sm, 48 sm, 48
So, 49 so, 49
Z, 50 z, 50
Zl, 51 zl, 51
Zp, 52 zp, 52
Zs, 53 zs, 53
Common, 54 common, 54
Latin, 55 latin, 55
Greek, 56 greek, 56
Cyrillic, 57 cyrillic, 57
Armenian, 58 armenian, 58
Hebrew, 59 hebrew, 59
Arabic, 60 arabic, 60
Syriac, 61 syriac, 61
Thaana, 62 thaana, 62
Devanagari, 63 devanagari, 63
Bengali, 64 bengali, 64
Gurmukhi, 65 gurmukhi, 65
Gujarati, 66 gujarati, 66
Oriya, 67 oriya, 67
Tamil, 68 tamil, 68
Telugu, 69 telugu, 69
Kannada, 70 kannada, 70
Malayalam, 71 malayalam, 71
Sinhala, 72 sinhala, 72
Thai, 73 thai, 73
Lao, 74 lao, 74
Tibetan, 75 tibetan, 75
Myanmar, 76 myanmar, 76
Georgian, 77 georgian, 77
Hangul, 78 hangul, 78
Ethiopic, 79 ethiopic, 79
Cherokee, 80 cherokee, 80
Canadian_Aboriginal, 81 canadianaboriginal, 81
Ogham, 82 ogham, 82
Runic, 83 runic, 83
Khmer, 84 khmer, 84
Mongolian, 85 mongolian, 85
Hiragana, 86 hiragana, 86
Katakana, 87 katakana, 87
Bopomofo, 88 bopomofo, 88
Han, 89 han, 89
Yi, 90 yi, 90
Old_Italic, 91 olditalic, 91
Gothic, 92 gothic, 92
Deseret, 93 deseret, 93
Inherited, 94 inherited, 94
Tagalog, 95 tagalog, 95
Hanunoo, 96 hanunoo, 96
Buhid, 97 buhid, 97
Tagbanwa, 98 tagbanwa, 98
Limbu, 99 limbu, 99
Tai_Le, 100 taile, 100
Linear_B, 101 linearb, 101
Ugaritic, 102 ugaritic, 102
Shavian, 103 shavian, 103
Osmanya, 104 osmanya, 104
Cypriot, 105 cypriot, 105
Braille, 106 braille, 106
Buginese, 107 buginese, 107
Coptic, 108 coptic, 108
New_Tai_Lue, 109 newtailue, 109
Glagolitic, 110 glagolitic, 110
Tifinagh, 111 tifinagh, 111
Syloti_Nagri, 112 sylotinagri, 112
Old_Persian, 113 oldpersian, 113
Kharoshthi, 114 kharoshthi, 114
Balinese, 115 balinese, 115
Cuneiform, 116 cuneiform, 116
Phoenician, 117 phoenician, 117
Phags_Pa, 118 phagspa, 118
Nko, 119 nko, 119
Sundanese, 120 sundanese, 120
Lepcha, 121 lepcha, 121
Ol_Chiki, 122 olchiki, 122
Vai, 123 vai, 123
Saurashtra, 124 saurashtra, 124
Kayah_Li, 125 kayahli, 125
Rejang, 126 rejang, 126
Lycian, 127 lycian, 127
Carian, 128 carian, 128
Lydian, 129 lydian, 129
Cham, 130 cham, 130
#endif /* USE_UNICODE_PROPERTIES */ #endif /* USE_UNICODE_PROPERTIES */
%% %%
static int static int

View file

@ -9330,138 +9330,138 @@ static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned
%} %}
struct uniname2ctype_struct; struct uniname2ctype_struct;
%% %%
NEWLINE, 0 newline, 0
Alpha, 1 alpha, 1
Blank, 2 blank, 2
Cntrl, 3 cntrl, 3
Digit, 4 digit, 4
Graph, 5 graph, 5
Lower, 6 lower, 6
Print, 7 print, 7
Punct, 8 punct, 8
Space, 9 space, 9
Upper, 10 upper, 10
XDigit, 11 xdigit, 11
Word, 12 word, 12
Alnum, 13 alnum, 13
ASCII, 14 ascii, 14
#ifdef USE_UNICODE_PROPERTIES #ifdef USE_UNICODE_PROPERTIES
Any, 15 any, 15
Assigned, 16 assigned, 16
C, 17 c, 17
Cc, 18 cc, 18
Cf, 19 cf, 19
Cn, 20 cn, 20
Co, 21 co, 21
Cs, 22 cs, 22
L, 23 l, 23
Ll, 24 ll, 24
Lm, 25 lm, 25
Lo, 26 lo, 26
Lt, 27 lt, 27
Lu, 28 lu, 28
M, 29 m, 29
Mc, 30 mc, 30
Me, 31 me, 31
Mn, 32 mn, 32
N, 33 n, 33
Nd, 34 nd, 34
Nl, 35 nl, 35
No, 36 no, 36
P, 37 p, 37
Pc, 38 pc, 38
Pd, 39 pd, 39
Pe, 40 pe, 40
Pf, 41 pf, 41
Pi, 42 pi, 42
Po, 43 po, 43
Ps, 44 ps, 44
S, 45 s, 45
Sc, 46 sc, 46
Sk, 47 sk, 47
Sm, 48 sm, 48
So, 49 so, 49
Z, 50 z, 50
Zl, 51 zl, 51
Zp, 52 zp, 52
Zs, 53 zs, 53
Common, 54 common, 54
Latin, 55 latin, 55
Greek, 56 greek, 56
Cyrillic, 57 cyrillic, 57
Armenian, 58 armenian, 58
Hebrew, 59 hebrew, 59
Arabic, 60 arabic, 60
Syriac, 61 syriac, 61
Thaana, 62 thaana, 62
Devanagari, 63 devanagari, 63
Bengali, 64 bengali, 64
Gurmukhi, 65 gurmukhi, 65
Gujarati, 66 gujarati, 66
Oriya, 67 oriya, 67
Tamil, 68 tamil, 68
Telugu, 69 telugu, 69
Kannada, 70 kannada, 70
Malayalam, 71 malayalam, 71
Sinhala, 72 sinhala, 72
Thai, 73 thai, 73
Lao, 74 lao, 74
Tibetan, 75 tibetan, 75
Myanmar, 76 myanmar, 76
Georgian, 77 georgian, 77
Hangul, 78 hangul, 78
Ethiopic, 79 ethiopic, 79
Cherokee, 80 cherokee, 80
Canadian_Aboriginal, 81 canadianaboriginal, 81
Ogham, 82 ogham, 82
Runic, 83 runic, 83
Khmer, 84 khmer, 84
Mongolian, 85 mongolian, 85
Hiragana, 86 hiragana, 86
Katakana, 87 katakana, 87
Bopomofo, 88 bopomofo, 88
Han, 89 han, 89
Yi, 90 yi, 90
Old_Italic, 91 olditalic, 91
Gothic, 92 gothic, 92
Deseret, 93 deseret, 93
Inherited, 94 inherited, 94
Tagalog, 95 tagalog, 95
Hanunoo, 96 hanunoo, 96
Buhid, 97 buhid, 97
Tagbanwa, 98 tagbanwa, 98
Limbu, 99 limbu, 99
Tai_Le, 100 taile, 100
Linear_B, 101 linearb, 101
Ugaritic, 102 ugaritic, 102
Shavian, 103 shavian, 103
Osmanya, 104 osmanya, 104
Cypriot, 105 cypriot, 105
Braille, 106 braille, 106
Buginese, 107 buginese, 107
Coptic, 108 coptic, 108
New_Tai_Lue, 109 newtailue, 109
Glagolitic, 110 glagolitic, 110
Tifinagh, 111 tifinagh, 111
Syloti_Nagri, 112 sylotinagri, 112
Old_Persian, 113 oldpersian, 113
Kharoshthi, 114 kharoshthi, 114
Balinese, 115 balinese, 115
Cuneiform, 116 cuneiform, 116
Phoenician, 117 phoenician, 117
Phags_Pa, 118 phagspa, 118
Nko, 119 nko, 119
Sundanese, 120 sundanese, 120
Lepcha, 121 lepcha, 121
Ol_Chiki, 122 olchiki, 122
Vai, 123 vai, 123
Saurashtra, 124 saurashtra, 124
Kayah_Li, 125 kayahli, 125
Rejang, 126 rejang, 126
Lycian, 127 lycian, 127
Carian, 128 carian, 128
Lydian, 129 lydian, 129
Cham, 130 cham, 130
#endif /* USE_UNICODE_PROPERTIES */ #endif /* USE_UNICODE_PROPERTIES */
%% %%
static int static int

View file

@ -176,6 +176,12 @@ def make_const(prop, pairs, name)
puts "}; /* CR_#{prop} */" puts "}; /* CR_#{prop} */"
end end
def normalize_propname(name)
name = name.downcase
name.gsub!(/[- _]/, '')
name
end
puts '%{' puts '%{'
gcps, data = parse_unicode_data(ARGV[0]) gcps, data = parse_unicode_data(ARGV[0])
POSIX_NAMES.each do |name| POSIX_NAMES.each do |name|
@ -213,10 +219,10 @@ struct uniname2ctype_struct;
%% %%
__HEREDOC __HEREDOC
i = -1 i = -1
POSIX_NAMES.each {|name|puts"%-21s %3d"%[name+',', i+=1]} POSIX_NAMES.each {|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#ifdef USE_UNICODE_PROPERTIES" puts "#ifdef USE_UNICODE_PROPERTIES"
gcps.each{|name|puts"%-21s %3d"%[name+',', i+=1]} gcps.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
scripts.each{|name|puts"%-21s %3d"%[name+',', i+=1]} scripts.each{|name|puts"%-21s %3d" % [normalize_propname(name)+',', i+=1]}
puts "#endif /* USE_UNICODE_PROPERTIES */\n" puts "#endif /* USE_UNICODE_PROPERTIES */\n"
puts(<<'__HEREDOC') puts(<<'__HEREDOC')
%% %%