diff --git a/regcomp.c b/regcomp.c index 24d44dd1b8..de44cfe037 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3021,7 +3021,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) NSTRING_SET_CASE_AMBIG(node); break; } - p++; + p += enc_len(reg->enc, *p); } } break; @@ -3950,22 +3950,17 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } } - if (IS_NULL(cc->mbuf)) { - if (cc->not) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - add_char_opt_map_info(&opt->map, i); - } - mb_found = 1; - } - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = ONIGENC_IS_MBC_HEAD(env->enc, i); - if (z) { - mb_found = 1; - add_char_opt_map_info(&opt->map, i); - } - } + if (! ONIGENC_IS_SINGLEBYTE(env->enc)) { + if (! IS_NULL(cc->mbuf) || + (cc->not != 0 && found != 0)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = ONIGENC_IS_MBC_HEAD(env->enc, i); + if (z) { + mb_found = 1; + add_char_opt_map_info(&opt->map, i); + } + } + } } if (mb_found) { diff --git a/regexec.c b/regexec.c index 870a6535bd..1bae0d9516 100644 --- a/regexec.c +++ b/regexec.c @@ -362,11 +362,26 @@ typedef struct { };\ } while(0) +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + static int stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) { - int n; + unsigned int n; StackType *x, *stk_base, *stk_end, *stk; stk_base = *arg_stk_base; @@ -385,7 +400,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } else { n *= 2; - if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); if (IS_NULL(x)) { STACK_SAVE; @@ -1171,10 +1191,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, goto fail; /* for retry */ } } - else { - /* default behavior: return first-matching result. */ - goto finish; - } + + /* default behavior: return first-matching result. */ + goto finish; break; case OP_EXACT1: STAT_OP_IN(OP_EXACT1); @@ -2574,11 +2593,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, if (t < target) return p + 1; skip = reg->map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { + do { p += enc_len(reg->enc, *p); - } + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -2592,11 +2613,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, if (t < target) return p + 1; skip = reg->int_map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { + do { p += enc_len(reg->enc, *p); - } + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -3288,13 +3311,3 @@ onig_get_syntax(regex_t* reg) { return reg->syntax; } - -extern const char* -onig_version(void) -{ -#define MSTR(a) # a - - return (MSTR(ONIGURUMA_VERSION_MAJOR) "." - MSTR(ONIGURUMA_VERSION_MINOR) "." - MSTR(ONIGURUMA_VERSION_TEENY)); -} diff --git a/regint.h b/regint.h index dacc0400be..bcc5fa5fc4 100644 --- a/regint.h +++ b/regint.h @@ -46,13 +46,12 @@ #define USE_QUALIFIER_PEEK_NEXT #define INIT_MATCH_STACK_SIZE 160 -#define MATCH_STACK_LIMIT_SIZE 500000 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ /* interface to external system */ #ifdef NOT_RUBY /* gived from Makefile */ #include "config.h" #define USE_VARIABLE_META_CHARS -#define USE_VARIABLE_SYNTAX #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ #define THREAD_ATOMIC_START /* depend on thread system */ @@ -654,6 +653,31 @@ extern OnigMetaCharTableType OnigMetaCharTable; #define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time #define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + #ifdef ONIG_DEBUG