1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* regcomp.c: Merge Onigmo 5.14.1 25a8a69fc05ae3b56a09.

this includes Support for Unicode 7.0 [Bug #9092].

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46831 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2014-07-16 03:27:25 +00:00
parent bb51e69af0
commit 64c81e40d4
12 changed files with 10416 additions and 5961 deletions

View file

@ -1,3 +1,8 @@
Wed Jul 16 12:25:39 2014 NARUSE, Yui <naruse@ruby-lang.org>
* regcomp.c: Merge Onigmo 5.14.1 25a8a69fc05ae3b56a09.
this includes Support for Unicode 7.0 [Bug #9092].
Tue Jul 15 23:59:27 2014 Jared Jennings <jared.jennings.ctr@us.af.mil>
* ext/digest: make built-in digest function implementations

View file

@ -86,7 +86,7 @@ class CaseFolding
hash = "onigenc_unicode_#{key}_hash"
lookup = "onigenc_unicode_#{key}_lookup"
arity = Array(data[0][0]).size
gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(",")} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup}"
gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(",")} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup} -n"
argname = arity > 1 ? "codes" : "code"
argdecl = "const OnigCodePoint #{arity > 1 ? "*": ""}#{argname}"
n = 7

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,8 +39,8 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 13
#define ONIGURUMA_VERSION_TEENY 5
#define ONIGURUMA_VERSION_MINOR 14
#define ONIGURUMA_VERSION_TEENY 1
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@ -338,6 +338,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
/* config parameters */
#define ONIG_NREGION 10
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
#define ONIG_MAX_REPEAT_NUM 100000
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
/* constants */
@ -369,7 +370,9 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
/* options (newline) */
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NEWLINE_CRLF << 1)
#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOTEOS /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@ -582,6 +585,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -224
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
@ -708,7 +712,7 @@ typedef struct {
int num_of_elements;
OnigEncoding pattern_enc;
OnigEncoding target_enc;
OnigSyntaxType* syntax;
const OnigSyntaxType* syntax;
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;

114
regcomp.c
View file

@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -30,6 +30,15 @@
#include "regparse.h"
#if defined(USE_MULTI_THREAD_SYSTEM) \
&& defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
#ifdef _WIN32
CRITICAL_SECTION gOnigMutex;
#else
pthread_mutex_t gOnigMutex;
#endif
#endif
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
extern OnigCaseFoldType
@ -2669,22 +2678,22 @@ is_not_included(Node* x, Node* y, regex_t* reg)
break;
case NT_CCLASS:
{
CClassNode* cc = NCCLASS(y);
{
CClassNode* cc = NCCLASS(y);
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
}
break;
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
}
break;
case NT_STR:
{
UChar *q;
StrNode* ys = NSTR(y);
len = NSTRING_LEN(x);
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
{
UChar *q;
StrNode* ys = NSTR(y);
len = NSTRING_LEN(x);
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
/* tiny version */
return 0;
}
@ -2697,7 +2706,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
break;
default:
break;
break;
}
}
break;
@ -3442,7 +3451,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
break;
}
if (items[i].code_len != 1) {
varclen = 1;
varclen |= 1;
}
}
@ -3493,29 +3502,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
UChar *q = p + items[i].byte_len;
if (q < end) {
r = expand_case_fold_make_rem_string(&rem, q, end, reg);
if (r != 0) {
onig_node_free(an);
goto mem_err2;
}
r = expand_case_fold_make_rem_string(&rem, q, end, reg);
if (r != 0) {
onig_node_free(an);
goto mem_err2;
}
xnode = onig_node_list_add(NULL_NODE, snode);
if (IS_NULL(xnode)) {
onig_node_free(an);
onig_node_free(rem);
goto mem_err2;
}
if (IS_NULL(onig_node_list_add(xnode, rem))) {
onig_node_free(an);
onig_node_free(xnode);
onig_node_free(rem);
goto mem_err;
}
xnode = onig_node_list_add(NULL_NODE, snode);
if (IS_NULL(xnode)) {
onig_node_free(an);
onig_node_free(rem);
goto mem_err2;
}
if (IS_NULL(onig_node_list_add(xnode, rem))) {
onig_node_free(an);
onig_node_free(xnode);
onig_node_free(rem);
goto mem_err;
}
NCAR(an) = xnode;
NCAR(an) = xnode;
}
else {
NCAR(an) = snode;
NCAR(an) = snode;
}
NCDR(var_anode) = an;
@ -3598,7 +3607,10 @@ expand_case_fold_string(Node* node, regex_t* reg)
}
else {
alt_num *= (n + 1);
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
varlen = 1; /* Assume that expanded strings are variable length. */
break;
}
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
@ -5448,14 +5460,14 @@ print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
if (a == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
fprintf(f, "(%"PRIuSIZE")", a);
fprintf(f, "(%"PRIuPTR")", a);
fputs("-", f);
if (b == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
fprintf(f, "(%"PRIuSIZE")", b);
fprintf(f, "(%"PRIuPTR")", b);
}
static void
@ -5532,7 +5544,7 @@ print_optimize_info(FILE* f, regex_t* reg)
for (p = reg->exact; p < reg->exact_end; p++) {
fputc(*p, f);
}
fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
}
else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
int c, i, n = 0;
@ -6394,7 +6406,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
GET_POINTER_INC(cc, bp);
n = bitset_on_num(cc->bs);
fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
}
break;
@ -6530,9 +6542,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_LIST:
case NT_ALT:
if (NTYPE(node) == NT_LIST)
fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
else
fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
print_indent_tree(f, NCAR(node), indent + add);
while (IS_NOT_NULL(node = NCDR(node))) {
@ -6546,7 +6558,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_STR:
fprintf(f, "<string%s:%"PRIxPTR">",
(NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
(NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
fputc(*p, f);
@ -6557,7 +6569,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NT_CCLASS:
fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
if (NCCLASS(node)->mbuf) {
BBuf* bbuf = NCCLASS(node)->mbuf;
@ -6569,7 +6581,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NT_CTYPE:
fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
switch (NCTYPE(node)->ctype) {
case ONIGENC_CTYPE_WORD:
if (NCTYPE(node)->not != 0)
@ -6585,11 +6597,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NT_CANY:
fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
break;
case NT_ANCHOR:
fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
switch (NANCHOR(node)->type) {
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
case ANCHOR_END_BUF: fputs("end buf", f); break;
@ -6622,7 +6634,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
int* p;
BRefNode* br = NBREF(node);
p = BACKREFS_P(br);
fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
for (i = 0; i < br->back_num; i++) {
if (i > 0) fputs(", ", f);
fprintf(f, "%d", p[i]);
@ -6634,21 +6646,21 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_CALL:
{
CallNode* cn = NCALL(node);
fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
#endif
case NT_QTFR:
fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
NQTFR(node)->lower, NQTFR(node)->upper,
(NQTFR(node)->greedy ? "" : "?"));
print_indent_tree(f, NQTFR(node)->target, indent + add);
break;
case NT_ENCLOSE:
fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
switch (NENCLOSE(node)->type) {
case ENCLOSE_OPTION:
fprintf(f, "option:%d", NENCLOSE(node)->option);

View file

@ -108,6 +108,7 @@ typedef struct {
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
#define USE_UNICODE_AGE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */

View file

@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -53,8 +53,6 @@ onig_error_code_to_format(OnigPosition code)
p = "no support in this configuration"; break;
case ONIGERR_MEMORY:
p = "failed to allocate memory"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
@ -65,6 +63,8 @@ onig_error_code_to_format(OnigPosition code)
p = "undefined bytecode (bug)"; break;
case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
p = "default multibyte-encoding is not set"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
@ -141,14 +141,10 @@ onig_error_code_to_format(OnigPosition code)
#endif
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
p = "numbered backref/call is not allowed. (use name)"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case ONIGERR_TOO_SHORT_DIGITS:
p = "too short digits"; break;
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
case ONIGERR_INVALID_CODE_POINT_VALUE:
p = "invalid code point value"; break;
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
@ -173,6 +169,12 @@ onig_error_code_to_format(OnigPosition code)
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name {%n}"; break;
case ONIGERR_TOO_MANY_CAPTURE_GROUPS:
p = "too many capture groups are specified"; break;
case ONIGERR_INVALID_CODE_POINT_VALUE:
p = "invalid code point value"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
@ -307,8 +309,12 @@ onig_error_code_to_str(s, code, va_alist)
default:
q = onig_error_code_to_format(code);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
if (q) {
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
} else {
len = 0;
}
s[len] = '\0';
break;
}

View file

@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -444,9 +444,26 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
if (msa->stack_p) {\
#define MAX_PTR_NUM 100
#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
if (ptr_num > MAX_PTR_NUM) {\
alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
heap_addr = alloc_addr;\
if (msa->stack_p) {\
stk_alloc = (OnigStackType* )(msa->stack_p);\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
} else {\
stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
} else if (msa->stack_p) {\
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
heap_addr = NULL;\
stk_alloc = (OnigStackType* )(msa->stack_p);\
stk_base = stk_alloc;\
stk = stk_base;\
@ -455,6 +472,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
else {\
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
+ sizeof(OnigStackType) * (stack_num));\
heap_addr = NULL;\
stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
stk_base = stk_alloc;\
stk = stk_base;\
@ -529,7 +547,11 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define STACK_ENSURE(n) do {\
if (stk_end - stk < (n)) {\
int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
if (r != 0) { STACK_SAVE; return r; } \
if (r != 0) {\
STACK_SAVE;\
if (xmalloc_base) xfree(xmalloc_base);\
return r;\
}\
}\
} while(0)
@ -1299,9 +1321,6 @@ typedef struct {
regoff_t rm_eo;
} posix_regmatch_t;
void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
OnigEncoding enc);
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static OnigPosition
@ -1325,6 +1344,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar *p = reg->p;
UChar *pkeep;
char *alloca_base;
char *xmalloc_base = NULL;
OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
OnigStackType *stkp; /* used as any purpose. */
OnigStackIndex si;
@ -1340,7 +1360,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, etc. */
n = reg->num_repeat + (reg->num_mem + 1) * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
repeat_stk = (OnigStackIndex* )alloca_base;
@ -1354,7 +1374,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* Stack #0 not is used. */
n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
repeat_stk = (OnigStackIndex* )alloca_base;
@ -1372,7 +1392,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
(intptr_t)str, str, (intptr_t)end, end, (intptr_t)sstart, sstart, (intptr_t)sprev, sprev);
(intptr_t )str, str, (intptr_t )end, end, (intptr_t )sstart, sstart, (intptr_t )sprev, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@ -1386,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (s) {
UChar *q, *bp, buf[50];
int len;
fprintf(stderr, "%4d> \"", (int )(s - str));
fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str));
bp = buf;
if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */
for (i = 0, q = s; i < 7 && q < end; i++) {
@ -2150,6 +2170,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
if (! ON_STR_BEGIN(s)) goto fail;
if (IS_NOTBOS(msa->options)) goto fail;
MOP_OUT;
continue;
@ -2157,6 +2178,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_END_BUF: MOP_IN(OP_END_BUF);
if (! ON_STR_END(s)) goto fail;
if (IS_NOTEOS(msa->options)) goto fail;
MOP_OUT;
continue;
@ -2916,20 +2938,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
finish:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return best_len;
#ifdef ONIG_DEBUG
stack_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_STACK_BUG;
#endif
bytecode_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_UNDEFINED_BYTECODE;
unexpected_bytecode_error:
STACK_SAVE;
if (xmalloc_base) xfree(xmalloc_base);
return ONIGERR_UNEXPECTED_BYTECODE;
}
@ -3159,8 +3185,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
p = s;
t = tail;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_loop: pos: %d %s\n",
(int)(s - text), s);
fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
(intptr_t )(s - text), s);
#endif
while (*p == *t) {
if (t == target) return (UChar* )p;
@ -3294,8 +3320,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
fprintf(stderr, "bm_search_notrev: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
#endif
tail = target_end - 1;
@ -3398,8 +3424,8 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
int case_fold_flag = reg->case_fold_flag;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
fprintf(stderr, "bm_search_notrev_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
#endif
tail = target_end - 1;
@ -3454,8 +3480,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
int case_fold_flag = reg->case_fold_flag;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
fprintf(stderr, "bm_search_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
#endif
tail = target_end - 1;
@ -3641,7 +3667,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
str, str, end, end, s, s, range, range);
(intptr_t )str, str, (intptr_t )end, end, (intptr_t )s, s, (intptr_t )range, range);
#endif
p = s;
@ -3754,8 +3780,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
(int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
"forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
*low - str, *high - str, reg->dmin, reg->dmax);
#endif
return 1; /* success */
}
@ -3919,7 +3945,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
str, str, end - str, start - str, range - str);
(intptr_t )str, str, end - str, start - str, range - str);
#endif
if (region
@ -4181,11 +4207,6 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}
}
else { /* backward search */
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
if (orig_start < end)
orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
#endif
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
@ -4268,7 +4289,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
fprintf(stderr, "onig_search: error %"PRIdPTR"\n", r);
#endif
return r;
@ -4278,7 +4299,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
ONIG_STATE_DEC_THREAD(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
fprintf(stderr, "onig_search: error %"PRIdPTR"\n", r);
#endif
return r;

View file

@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -92,8 +92,6 @@
# define ARG_UNUSED
#endif
/* */
/* escape other system UChar definition */
#ifndef RUBY_DEFINES_H
#include "ruby/ruby.h"
#undef xmalloc
@ -101,23 +99,67 @@
#undef xcalloc
#undef xfree
#endif
/* */
/* escape other system UChar definition */
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE
#undef USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION /* needed for POSIX API support */
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* multithread config */
/* #define USE_MULTI_THREAD_SYSTEM */
/* #define USE_DEFAULT_MULTI_THREAD_SYSTEM */
#if defined(USE_MULTI_THREAD_SYSTEM) \
&& defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
extern CRITICAL_SECTION gOnigMutex;
#define THREAD_SYSTEM_INIT InitializeCriticalSection(&gOnigMutex)
#define THREAD_SYSTEM_END DeleteCriticalSection(&gOnigMutex)
#define THREAD_ATOMIC_START EnterCriticalSection(&gOnigMutex)
#define THREAD_ATOMIC_END LeaveCriticalSection(&gOnigMutex)
#define THREAD_PASS Sleep(0)
#else /* _WIN32 */
#include <pthread.h>
#include <sched.h>
extern pthread_mutex_t gOnigMutex;
#define THREAD_SYSTEM_INIT pthread_mutex_init(&gOnigMutex, NULL)
#define THREAD_SYSTEM_END pthread_mutex_destroy(&gOnigMutex)
#define THREAD_ATOMIC_START pthread_mutex_lock(&gOnigMutex)
#define THREAD_ATOMIC_END pthread_mutex_unlock(&gOnigMutex)
#define THREAD_PASS sched_yield()
#endif /* _WIN32 */
#else /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
#ifndef THREAD_SYSTEM_INIT
#define THREAD_SYSTEM_INIT /* depend on thread system */
#endif
#ifndef THREAD_SYSTEM_END
#define THREAD_SYSTEM_END /* depend on thread system */
#endif
#ifndef THREAD_ATOMIC_START
#define THREAD_ATOMIC_START /* depend on thread system */
#endif
#ifndef THREAD_ATOMIC_END
#define THREAD_ATOMIC_END /* depend on thread system */
#endif
#ifndef THREAD_PASS
#define THREAD_PASS /* depend on thread system */
#endif
#endif /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
#ifndef xmalloc
#define xmalloc malloc
#define xrealloc realloc
@ -235,12 +277,16 @@
# include <stdint.h>
#endif
#ifdef HAVE_INTTYPES_H
# include <inttypes.h>
#endif
#ifdef STDC_HEADERS
# include <stddef.h>
#endif
#ifdef __BORLANDC__
#include <malloc.h>
#ifdef _WIN32
#include <malloc.h> /* for alloca() */
#endif
#ifdef ONIG_DEBUG
@ -260,6 +306,18 @@ typedef unsigned int uintptr_t;
#endif
#endif /* _WIN32 */
#ifndef PRIdPTR
#ifdef _WIN64
#define PRIdPTR "I64d"
#define PRIuPTR "I64u"
#define PRIxPTR "I64x"
#else
#define PRIdPTR "ld"
#define PRIuPTR "lu"
#define PRIxPTR "lx"
#endif
#endif
#include "regenc.h"
RUBY_SYMBOL_EXPORT_BEGIN
@ -366,6 +424,8 @@ typedef unsigned int BitStatusType;
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_NOTBOS(option) ((option) & ONIG_OPTION_NOTBOS)
#define IS_NOTEOS(option) ((option) & ONIG_OPTION_NOTEOS)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
@ -852,7 +912,9 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
/* extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); */
#ifdef ONIG_DEBUG
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
#endif
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));

View file

@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -281,6 +281,14 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
p += enclen(enc, p, end); \
} while (0)
#define PINC_S do { \
p += enclen(enc, p, end); \
} while (0)
#define PFETCH_S(c) do { \
c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
p += enclen(enc, p, end); \
} while (0)
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
@ -978,6 +986,8 @@ scan_env_add_mem_entry(ScanEnv* env)
Node** p;
need = env->num_mem + 1;
if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
return ONIGERR_TOO_MANY_CAPTURE_GROUPS;
if (need >= SCANENV_MEMNODES_SIZE) {
if (env->mem_alloc <= need) {
if (IS_NULL(env->mem_nodes_dynamic)) {
@ -1143,6 +1153,25 @@ node_new(void)
return node;
}
#if defined(USE_MULTI_THREAD_SYSTEM) && \
defined(USE_SHARED_CCLASS_TABLE) && \
defined(USE_PARSE_TREE_NODE_RECYCLE)
static Node*
node_new_locked(void)
{
Node* node;
if (IS_NOT_NULL(FreeNodeList)) {
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
return node;
}
node = (Node* )xmalloc(sizeof(Node));
/* xmemset(node, 0, sizeof(Node)); */
return node;
}
#endif
static void
initialize_cclass(CClassNode* cc)
@ -1164,6 +1193,24 @@ node_new_cclass(void)
return node;
}
#if defined(USE_MULTI_THREAD_SYSTEM) && \
defined(USE_SHARED_CCLASS_TABLE) && \
defined(USE_PARSE_TREE_NODE_RECYCLE)
static Node*
node_new_cclass_locked(void)
{
Node* node = node_new_locked();
CHECK_NULL_RETURN(node);
SET_NTYPE(node, NT_CCLASS);
initialize_cclass(NCCLASS(node));
return node;
}
#else
#define node_new_cclass_locked() node_new_cclass()
#endif
#ifdef USE_SHARED_CCLASS_TABLE
static Node*
node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
const OnigCodePoint ranges[])
@ -1172,7 +1219,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
CClassNode* cc;
OnigCodePoint j;
Node* node = node_new_cclass();
Node* node = node_new_cclass_locked();
CHECK_NULL_RETURN(node);
cc = NCCLASS(node);
@ -1213,6 +1260,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
return node;
}
#endif /* USE_SHARED_CCLASS_TABLE */
static Node*
node_new_ctype(int type, int not, int ascii_range)
@ -2486,22 +2534,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar* p = *src;
PFETCH_READY;
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
PFETCH_S(c);
switch (c) {
case 'M':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
PFETCH_S(c);
if (c != '-') return ONIGERR_META_CODE_SYNTAX;
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
PFETCH_S(c);
if (c == MC_ESC(env->syntax)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
}
c = ((c & 0xff) | 0x80);
@ -2513,7 +2560,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
case 'C':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
PFETCH_S(c);
if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
goto control;
}
@ -2524,9 +2571,9 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
control:
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
PFETCH_S(c);
if (c == '?') {
c = 0177;
c = 0177;
}
else {
if (c == MC_ESC(env->syntax)) {
@ -2534,7 +2581,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (v < 0) return v;
c = (OnigCodePoint )v;
}
c &= 0x9f;
c &= 0x9f;
}
break;
}
@ -2630,11 +2677,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
if (is_num != 0) {
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
is_num = 1;
is_num = 1;
}
else {
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
}
}
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@ -2701,7 +2748,6 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
UChar *name_end;
UChar *pnum_head;
UChar *p = *src;
PFETCH_READY;
*rback_num = 0;
@ -2716,23 +2762,23 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return ONIGERR_EMPTY_GROUP_NAME;
}
else {
PFETCH(c);
PFETCH_S(c);
if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
if (ref == 1)
is_num = 1;
is_num = 1;
else {
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
}
}
else if (c == '-') {
if (ref == 1) {
is_num = 2;
sign = -1;
pnum_head = p;
is_num = 2;
sign = -1;
pnum_head = p;
}
else {
r = ONIGERR_INVALID_GROUP_NAME;
@ -2747,29 +2793,28 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
if (r == 0) {
while (!PEND) {
name_end = p;
PFETCH(c);
PFETCH_S(c);
if (c == end_code || c == ')') {
if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
break;
}
if (is_num != 0) {
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
is_num = 1;
}
else {
if (!ONIGENC_IS_CODE_WORD(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
else
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
}
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
is_num = 1;
}
else {
if (!ONIGENC_IS_CODE_WORD(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
else
r = ONIGERR_INVALID_GROUP_NAME;
is_num = 0;
}
}
else {
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
}
@ -2782,8 +2827,8 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
r = ONIGERR_INVALID_GROUP_NAME;
goto err;
r = ONIGERR_INVALID_GROUP_NAME;
goto err;
}
*rback_num *= sign;
@ -2796,9 +2841,9 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
else {
while (!PEND) {
name_end = p;
PFETCH(c);
PFETCH_S(c);
if (c == end_code || c == ')')
break;
break;
}
if (PEND)
name_end = end;
@ -4263,10 +4308,9 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *p = *src;
PFETCH_READY;
if (PPEEK_IS('^')) {
PINC;
PINC_S;
not = 1;
}
else
@ -4279,14 +4323,14 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not,
IS_POSIX_BRACKET_ALL_RANGE(env->option),
env);
if (r != 0) return r;
PINC; PINC;
PINC_S; PINC_S;
*src = p;
return 0;
}
@ -4296,15 +4340,15 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
c = 0;
i = 0;
while (!PEND && ((c = PPEEK) != ':') && c != ']') {
PINC;
PINC_S;
if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
}
if (c == ':' && ! PEND) {
PINC;
PINC_S;
if (! PEND) {
PFETCH(c);
PFETCH_S(c);
if (c == ']')
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
}
}
@ -4318,14 +4362,13 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *prev, *start, *p = *src;
PFETCH_READY;
r = 0;
start = prev = p;
while (!PEND) {
prev = p;
PFETCH(c);
PFETCH_S(c);
if (c == '}') {
r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
if (r < 0) break;
@ -4483,7 +4526,6 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
OnigCodePoint code;
OnigEncoding enc = env->enc;
UChar* p = from;
PFETCH_READY;
in_esc = 0;
while (! PEND) {
@ -4491,7 +4533,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
in_esc = 0;
}
else {
PFETCH(code);
PFETCH_S(code);
if (code == c) return 1;
if (code == MC_ESC(env->syntax)) in_esc = 1;
}
@ -4960,14 +5002,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = node_new_enclose_memory(env->option, 0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) {
onig_node_free(*np);
return num;
}
else if (num >= (int )BIT_STATUS_BITS_NUM) {
onig_node_free(*np);
if (num < 0) return num;
if (num >= (int )BIT_STATUS_BITS_NUM)
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
NENCLOSE(*np)->regnum = num;
BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
}
@ -4985,11 +5023,14 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
PUNFETCH;
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
if (r < 0) return r;
#if 0
/* Relative number is not currently supported. (same as Perl) */
if (num < 0) {
num = BACKREF_REL_TO_ABS(num, env);
if (num <= 0)
return ONIGERR_INVALID_BACKREF;
}
#endif
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (num > env->num_mem ||
IS_NULL(SCANENV_MEM_NODES(env)[num]))
@ -5291,30 +5332,23 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
UChar buf[WARN_BUFSIZE];
switch (ReduceTypeTable[targetq_num][nestq_num]) {
case RQ_ASIS:
break;
case RQ_DEL:
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
(UChar* )"redundant nested repeat operator");
(*onig_verb_warn)((char* )buf);
if (onig_warn != onig_null_warn) {
onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
PopularQStr[targetq_num]);
}
goto warn_exit;
break;
default:
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
(UChar* )"nested repeat operator %s and %s was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
(*onig_verb_warn)((char* )buf);
if (onig_warn != onig_null_warn) {
onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
}
goto warn_exit;
break;
@ -5413,13 +5447,13 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
extern int
onig_free_shared_cclass_table(void)
{
THREAD_ATOMIC_START;
/* THREAD_ATOMIC_START; */
if (IS_NOT_NULL(OnigTypeCClassTable)) {
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
onig_st_free_table(OnigTypeCClassTable);
OnigTypeCClassTable = NULL;
}
THREAD_ATOMIC_END;
/* THREAD_ATOMIC_END; */
return 0;
}

View file

@ -294,7 +294,9 @@ props.each do |name|
end
make_const(name, data[name], category)
end
print "\n#ifdef USE_UNICODE_AGE_PROPERTIES"
ages = parse_age(data)
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
blocks = parse_block(data)
puts '#endif /* USE_UNICODE_PROPERTIES */'
puts(<<'__HEREDOC')
@ -304,7 +306,9 @@ __HEREDOC
POSIX_NAMES.each{|name|puts" CR_#{name},"}
puts "#ifdef USE_UNICODE_PROPERTIES"
props.each{|name| puts" CR_#{name},"}
puts "#ifdef USE_UNICODE_AGE_PROPERTIES"
ages.each{|name| puts" CR_#{constantize_agename(name)},"}
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
blocks.each{|name|puts" CR_#{name},"}
puts(<<'__HEREDOC')
@ -340,12 +344,14 @@ aliases.each_pair do |k, v|
next unless v = name_to_index[v]
puts "%-40s %3d" % [k + ',', v]
end
puts "#ifdef USE_UNICODE_AGE_PROPERTIES"
ages.each do |name|
i += 1
name = "age=#{name}"
name_to_index[name] = i
puts "%-40s %3d" % [name + ',', i]
end
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
blocks.each do |name|
i += 1
name = normalize_propname(name)