1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* regex.c (mbc_startpos): become macro.

* regex.c (euc_startpos): added for improvement.

* regex.c (sjis_startpos): ditto.

* regex.c (utf8_startpos): ditto.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2040 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2002-02-01 08:49:02 +00:00
parent dad91ce6d8
commit d9b49e39b2
2 changed files with 91 additions and 85 deletions

View file

@ -1,3 +1,13 @@
Fri Feb 1 17:46:39 2002 Nobuyoshi Nakada <nobu.nakada@nifty.ne.jp>
* regex.c (mbc_startpos): become macro.
* regex.c (euc_startpos): added for improvement.
* regex.c (sjis_startpos): ditto.
* regex.c (utf8_startpos): ditto.
Fri Feb 1 00:03:30 2002 Yukihiro Matsumoto <matz@ruby-lang.org>
* file.c (rb_stat_inspect): print dev, rdev in hexadecimal.

166
regex.c
View file

@ -478,7 +478,9 @@ re_set_syntax(syntax)
#define WC2MBC1ST(c) \
((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
int mbc_startpos _((const char *start, int pos));
typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
const mbc_startpos_func_t mbc_startpos_func[];
#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
static unsigned int
utf8_firstbyte(c)
@ -4384,7 +4386,6 @@ re_free_registers(regs)
Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
Last change: Jul. 9, 1993 by t^2 */
static const unsigned char mbctab_ascii[] = {
/* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -4401,28 +4402,9 @@ static const unsigned char mbctab_ascii[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* reverse scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
/* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -4439,28 +4421,9 @@ static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
/* reverse scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
/* forward scan */
static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFC */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -4476,9 +4439,10 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
/* reverse scan */
static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -4498,7 +4462,6 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
};
static const unsigned char mbctab_utf8[] = {
/* forward scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -4515,24 +4478,6 @@ static const unsigned char mbctab_utf8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
/* reverse scan */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const unsigned char *re_mbctab = mbctab_ascii;
@ -4561,36 +4506,87 @@ re_mbcinit(mbctype)
}
}
int
mbc_startpos(string, pos)
const char *string;
int pos;
{
int i = pos, w;
#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1)
while (i > 0 && re_mbctab[(unsigned char)string[i]+256]) {
static unsigned int asc_startpos _((const char *string, unsigned int pos));
static unsigned int
asc_startpos(string, pos)
const char *string;
unsigned int pos;
{
return pos;
}
#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
#define euc_mbclen(c) mbc_len(mbctab_euc, (c))
static unsigned int euc_startpos _((const char *string, unsigned int pos));
static unsigned int
euc_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
while (i > 0 && !euc_islead(string[i])) {
--i;
}
if (i == pos || i + (w = mbclen(string[i])) > pos) return i;
i += w;
switch (current_mbctype) {
case MBCTYPE_EUC:
return i + ((pos - i) & ~1);
case MBCTYPE_SJIS:
while (i + (w = mbclen(string[i])) < pos) {
i += w;
}
if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
return i;
case MBCTYPE_UTF8:
return i;
default:
return pos;
}
i += w;
return i + ((pos - i) & ~1);
}
#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c))
static unsigned int sjis_startpos _((const char *string, unsigned int pos));
static unsigned int
sjis_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
if (i > 0 && sjis_istrail(string[i])) {
do {
if (!sjis_isfirst(string[--i])) {
++i;
break;
}
} while (i > 0);
}
if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
return i;
}
i += w;
return i + ((pos - i) & ~1);
}
#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80)
#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c))
static unsigned int utf8_startpos _((const char *string, unsigned int pos));
static unsigned int
utf8_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
while (i > 0 && !utf8_islead(string[i])) {
--i;
}
if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
return i;
}
return i + w;
}
const mbc_startpos_func_t mbc_startpos_func[4] = {
asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
};
/*
vi: sw=2 ts=8
Local variables: