1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

merge Oniguruma 4.2.2

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10684 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
kosako 2006-08-05 13:54:40 +00:00
parent 7e10b0c4de
commit d92db05a27
8 changed files with 437 additions and 133 deletions

230
regexec.c
View file

@ -610,15 +610,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p) \
if ((p) < stk_base) goto stack_error;
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
goto stack_error;\
}
#else
#define STACK_BASE_CHECK(p)
#define STACK_BASE_CHECK(p, at)
#endif
#define STACK_POP_ONE do {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
} while(0)
#define STACK_POP do {\
@ -626,14 +629,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
case STACK_POP_LEVEL_FREE:\
while (1) {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
while (1) {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP 2"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@ -644,7 +647,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
default:\
while (1) {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP 3"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@ -665,7 +668,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_POP_TIL_POS_NOT do {\
while (1) {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
if (stk->type == STK_POS_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@ -684,7 +687,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
while (1) {\
stk--;\
STACK_BASE_CHECK(stk); \
STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
if (stk->type == STK_LOOK_BEHIND_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@ -704,7 +707,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_POS_END"); \
if (IS_TO_VOID_TARGET(k)) {\
k->type = STK_VOID;\
}\
@ -719,7 +722,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType *k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
if (IS_TO_VOID_TARGET(k)) {\
k->type = STK_VOID;\
}\
@ -734,7 +737,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
(isnull) = (k->u.null_check.pstr == (s));\
@ -749,7 +752,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (level == 0) {\
@ -769,7 +772,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (k->u.null_check.pstr != (s)) {\
@ -809,7 +812,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (level == 0) {\
@ -857,7 +860,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
if (k->type == STK_REPEAT) {\
if (level == 0) {\
if (k->u.repeat.num == (id)) {\
@ -875,7 +878,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k); \
STACK_BASE_CHECK(k, "STACK_RETURN"); \
if (k->type == STK_CALL_FRAME) {\
if (level == 0) {\
(addr) = k->u.call_frame.ret_addr;\
@ -995,6 +998,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
}
#endif
#ifdef USE_BACKREF_AT_LEVEL
static int mem_is_in_memp(int mem, int num, UChar* memp)
{
int i;
MemNumType m;
for (i = 0; i < num; i++) {
GET_MEMNUM_INC(m, memp);
if (mem == (int )m) return 1;
}
return 0;
}
static int backref_match_at_nested_level(regex_t* reg
, StackType* top, StackType* stk_base
, int ignore_case, int ambig_flag
, int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
StackType* k;
level = 0;
k = top;
k--;
while (k >= stk_base) {
if (k->type == STK_CALL_FRAME) {
level--;
}
else if (k->type == STK_RETURN) {
level++;
}
else if (level == nest) {
if (k->type == STK_MEM_START) {
if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
pstart = k->u.mem.pstr;
if (pend != NULL_UCHARP) {
if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
p = pstart;
ss = *s;
if (ignore_case != 0) {
if (string_cmp_ic(reg->enc, ambig_flag,
pstart, &ss, (int )(pend - pstart)) == 0)
return 0; /* or goto next_mem; */
}
else {
while (p < pend) {
if (*p++ != *ss++) return 0; /* or goto next_mem; */
}
}
*s = ss;
return 1;
}
}
}
else if (k->type == STK_MEM_END) {
if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
pend = k->u.mem.pstr;
}
}
}
k--;
}
return 0;
}
#endif /* USE_BACKREF_AT_LEVEL */
#ifdef RUBY_PLATFORM
typedef struct {
@ -1010,7 +1084,7 @@ trap_ensure(VALUE arg)
TrapEnsureArg* ta = (TrapEnsureArg* )arg;
if (ta->state == 0) { /* trap_exec() is not normal return */
ONIG_STATE_DEC(ta->reg);
ONIG_STATE_DEC_THREAD(ta->reg);
if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
xfree(ta->stk_base);
@ -2227,6 +2301,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
continue;
}
break;
#ifdef USE_BACKREF_AT_LEVEL
case OP_BACKREF_AT_LEVEL:
{
int len;
OnigOptionType ic;
LengthType level;
GET_OPTION_INC(ic, p);
GET_LENGTH_INC(level, p);
GET_LENGTH_INC(tlen, p);
sprev = s;
if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
, (int )level, (int )tlen, p, &s, end)) {
while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * tlen);
}
else
goto fail;
STAT_OP_OUT;
continue;
}
break;
#endif
case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
@ -2766,66 +2869,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar* text, const UChar* text_end,
const UChar* text_range)
{
const UChar *s, *t, *p, *end;
const UChar *s, *se, *t, *p, *end;
const UChar *tail;
int skip;
int skip, tlen1;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
(int )text, (int )text_end, (int )text_range);
#endif
end = text_range + (target_end - target) - 1;
tlen1 = (target_end - target) - 1;
end = text_range + tlen1;
if (end > text_end)
end = text_end;
tail = target_end - 1;
s = text;
while ((s - text) < target_end - target) {
s += enc_len(reg->enc, s);
}
s--; /* set to text check tail position. */
if (IS_NULL(reg->int_map)) {
while (s < end) {
p = s;
p = se = s + tlen1;
t = tail;
while (t >= target && *p == *t) {
p--; t--;
while (*p == *t && t >= target) {
p--; t--;
}
if (t < target) return (UChar* )(p + 1);
if (t < target) return (UChar* )s;
skip = reg->map[*s];
p = s + 1;
if (p >= text_end) return (UChar* )NULL;
t = p;
skip = reg->map[*se];
t = s;
do {
p += enc_len(reg->enc, p);
} while ((p - t) < skip && p < text_end);
s += (p - t);
s += enc_len(reg->enc, s);
} while ((s - t) < skip && s < end);
}
}
else {
while (s < end) {
p = s;
p = se = s + tlen1;
t = tail;
while (t >= target && *p == *t) {
p--; t--;
while (*p == *t && t >= target) {
p--; t--;
}
if (t < target) return (UChar* )(p + 1);
if (t < target) return (UChar* )s;
skip = reg->int_map[*s];
p = s + 1;
if (p >= text_end) return (UChar* )NULL;
t = p;
skip = reg->int_map[*se];
t = s;
do {
p += enc_len(reg->enc, p);
} while ((p - t) < skip && p < text_end);
s += (p - t);
s += enc_len(reg->enc, s);
} while ((s - t) < skip && s < end);
}
}
return (UChar* )NULL;
}
@ -2954,7 +3047,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
UChar *prev;
MatchArg msa;
#ifdef USE_MULTI_THREAD_SYSTEM
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
THREAD_ATOMIC_START;
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@ -2963,15 +3058,19 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
}
else {
int n = 0;
int n;
THREAD_ATOMIC_END;
n = 0;
while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
ONIG_STATE_INC(reg);
goto start;
}
#endif /* USE_MULTI_THREAD_SYSTEM */
THREAD_ATOMIC_END;
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at);
@ -2991,7 +3090,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
MATCH_ARG_FREE(msa);
ONIG_STATE_DEC(reg);
ONIG_STATE_DEC_THREAD(reg);
return r;
}
@ -3234,8 +3333,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
int r;
UChar *s, *prev;
MatchArg msa;
const UChar *orig_start = start;
#ifdef USE_MULTI_THREAD_SYSTEM
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
THREAD_ATOMIC_START;
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@ -3244,15 +3346,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else {
int n = 0;
int n;
THREAD_ATOMIC_END;
n = 0;
while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
ONIG_STATE_INC(reg);
goto start;
}
#endif /* USE_MULTI_THREAD_SYSTEM */
THREAD_ATOMIC_END;
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@ -3380,7 +3486,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
MATCH_ARG_INIT(msa, option, region, start);
MATCH_ARG_INIT(msa, option, region, orig_start);
s = (UChar* )start;
if (range > start) { /* forward search */
@ -3512,7 +3618,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
finish:
MATCH_ARG_FREE(msa);
ONIG_STATE_DEC(reg);
ONIG_STATE_DEC_THREAD(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@ -3533,7 +3639,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
ONIG_STATE_DEC(reg);
ONIG_STATE_DEC_THREAD(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@ -3541,7 +3647,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
return r;
match:
ONIG_STATE_DEC(reg);
ONIG_STATE_DEC_THREAD(reg);
MATCH_ARG_FREE(msa);
return s - str;
}