From 92d805753bf9264d35bb6ac44a740f71898654ab Mon Sep 17 00:00:00 2001 From: kosako Date: Sat, 28 Oct 2006 11:15:41 +0000 Subject: [PATCH] merge Oniguruma 4.4.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11234 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 16 ++++++++++++++- oniguruma.h | 3 ++- regcomp.c | 3 +-- regerror.c | 58 ++++++++++++++++++++++++++++++++++++++++++++--------- regexec.c | 37 +++++++++++++++++++++++++--------- regint.h | 4 +--- regparse.c | 2 +- 7 files changed, 97 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index 084519684a..abc34fdcfa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Sat Oct 28 20:13:18 2006 K.Kosako + + * oniguruma.h: Version 4.4.5 + + * regint.h: ditto. + + * regerror.c: ditto. + + * regexec.c: ditto. + + * regcomp.c ditto. + + * regparse.c ditto. + Sat Oct 28 07:56:13 2006 Yukihiro Matsumoto * marshal.c (r_object0): missing break. [ruby-core:09345] @@ -10926,7 +10940,7 @@ Fri Feb 18 04:06:41 2005 Yukihiro Matsumoto * parse.y (fcall_gen): lvar(arg) will be evaluated as lvar.call(arg) when lvar is a defined local variable. [new] -Thu Feb 17 22:15:34 2005 K.Kosako +Thu Feb 17 22:15:34 2005 K.Kosako * ext/strscan/strscan.c: calls Oniguruma API directly. diff --git a/oniguruma.h b/oniguruma.h index a29e2e52c6..2bb822bd22 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -36,7 +36,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 4 #define ONIGURUMA_VERSION_MINOR 4 -#define ONIGURUMA_VERSION_TEENY 4 +#define ONIGURUMA_VERSION_TEENY 5 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -715,6 +715,7 @@ struct re_registers { typedef struct re_registers OnigRegion; typedef struct { + OnigEncoding enc; OnigUChar* par; OnigUChar* par_end; } OnigErrorInfo; diff --git a/regcomp.c b/regcomp.c index 9b862657d8..3f7168d968 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5277,6 +5277,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, err: if (IS_NOT_NULL(scan_env.error)) { if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; einfo->par = scan_env.error; einfo->par_end = scan_env.error_end; } @@ -5470,8 +5471,6 @@ OnigOpInfoType OnigOpInfo[] = { { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, { OP_WORD, "word", ARG_NON }, { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_SB, "word-sb", ARG_NON }, - { OP_WORD_MB, "word-mb", ARG_NON }, { OP_WORD_BOUND, "word-bound", ARG_NON }, { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, { OP_WORD_BEGIN, "word-begin", ARG_NON }, diff --git a/regerror.c b/regerror.c index ad73b76c3c..d6ec91856d 100644 --- a/regerror.c +++ b/regerror.c @@ -183,6 +183,48 @@ onig_error_code_to_format(int code) } +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (len + 5 <= buf_size) { + sprintf((char* )(&(buf[len])), "\\%03o", + (unsigned int)(code & 0377)); + len += 5; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enc_len(enc, p); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + /* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 @@ -198,7 +240,8 @@ onig_error_code_to_str(s, code, va_alist) { UChar *p, *q; OnigErrorInfo* einfo; - int len; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; va_list vargs; va_init_list(vargs, code); @@ -212,23 +255,20 @@ onig_error_code_to_str(s, code, va_alist) case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: case ONIGERR_INVALID_CHAR_PROPERTY_NAME: einfo = va_arg(vargs, OnigErrorInfo*); - len = einfo->par_end - einfo->par; + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { q++; if (*q == 'n') { /* '%n': name */ - if (len > MAX_ERROR_PAR_LEN) { - xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3); - p += (MAX_ERROR_PAR_LEN - 3); + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { xmemcpy(p, "...", 3); p += 3; } - else { - xmemcpy(p, einfo->par, len); - p += len; - } q++; } else diff --git a/regexec.c b/regexec.c index 1a9567a3d0..1e31cd6038 100644 --- a/regexec.c +++ b/regexec.c @@ -388,18 +388,26 @@ typedef struct { #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 -#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \ - (msa).state_check_buff = (void* )0;\ +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\ - (msa).state_check_buff_size = size; \ - if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ (msa).state_check_buff = (void* )xmalloc(size);\ else \ (msa).state_check_buff = (void* )xalloca(size);\ - xmemset((msa).state_check_buff, 0, (size_t )size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ }\ } while (0) @@ -410,7 +418,7 @@ typedef struct { }\ } while (0); #else -#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) #endif @@ -3261,7 +3269,12 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); - STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif if (region #ifdef USE_POSIX_REGION_OPTION @@ -3665,7 +3678,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_INIT(msa, option, region, start); #ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; #endif MATCH_AND_RETURN_CHECK; goto mismatch; @@ -3679,7 +3693,12 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif MATCH_ARG_INIT(msa, option, region, orig_start); - STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif s = (UChar* )start; if (range > start) { /* forward search */ diff --git a/regint.h b/regint.h index c06bf57630..138e05e0db 100644 --- a/regint.h +++ b/regint.h @@ -121,7 +121,7 @@ #endif /* else NOT_RUBY */ #define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000 +#define STATE_CHECK_BUFF_MAX_SIZE 0x8000 #define THREAD_PASS_LIMIT_COUNT 8 #define xmemset memset @@ -584,8 +584,6 @@ enum OpCode { OP_WORD, OP_NOT_WORD, - OP_WORD_SB, - OP_WORD_MB, OP_WORD_BOUND, OP_NOT_WORD_BOUND, OP_WORD_BEGIN, diff --git a/regparse.c b/regparse.c index ba7e38e7f1..5bbd6a09ad 100644 --- a/regparse.c +++ b/regparse.c @@ -4830,7 +4830,7 @@ onig_free_shared_cclass_table() { if (IS_NOT_NULL(OnigTypeCClassTable)) { onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); - xfree(OnigTypeCClassTable); + onig_st_free_table(OnigTypeCClassTable); OnigTypeCClassTable = NULL; }