mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* regcomp.c: Merge Onigmo 5.14.1 25a8a69fc05ae3b56a09.
this includes Support for Unicode 7.0 [Bug #9092]. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46831 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
bb51e69af0
commit
64c81e40d4
12 changed files with 10416 additions and 5961 deletions
|
@ -1,3 +1,8 @@
|
|||
Wed Jul 16 12:25:39 2014 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* regcomp.c: Merge Onigmo 5.14.1 25a8a69fc05ae3b56a09.
|
||||
this includes Support for Unicode 7.0 [Bug #9092].
|
||||
|
||||
Tue Jul 15 23:59:27 2014 Jared Jennings <jared.jennings.ctr@us.af.mil>
|
||||
|
||||
* ext/digest: make built-in digest function implementations
|
||||
|
|
|
@ -86,7 +86,7 @@ class CaseFolding
|
|||
hash = "onigenc_unicode_#{key}_hash"
|
||||
lookup = "onigenc_unicode_#{key}_lookup"
|
||||
arity = Array(data[0][0]).size
|
||||
gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(",")} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup}"
|
||||
gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(",")} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup} -n"
|
||||
argname = arity > 1 ? "codes" : "code"
|
||||
argdecl = "const OnigCodePoint #{arity > 1 ? "*": ""}#{argname}"
|
||||
n = 7
|
||||
|
|
File diff suppressed because it is too large
Load diff
10517
enc/unicode/name2ctype.h
10517
enc/unicode/name2ctype.h
File diff suppressed because it is too large
Load diff
|
@ -5,7 +5,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,8 +39,8 @@ extern "C" {
|
|||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 5
|
||||
#define ONIGURUMA_VERSION_MINOR 13
|
||||
#define ONIGURUMA_VERSION_TEENY 5
|
||||
#define ONIGURUMA_VERSION_MINOR 14
|
||||
#define ONIGURUMA_VERSION_TEENY 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
|
@ -338,6 +338,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
|
|||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
|
||||
/* constants */
|
||||
|
@ -369,7 +370,9 @@ typedef unsigned int OnigOptionType;
|
|||
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
/* options (newline) */
|
||||
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
|
||||
#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NEWLINE_CRLF << 1)
|
||||
#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOTEOS /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
|
@ -582,6 +585,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -224
|
||||
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
|
@ -708,7 +712,7 @@ typedef struct {
|
|||
int num_of_elements;
|
||||
OnigEncoding pattern_enc;
|
||||
OnigEncoding target_enc;
|
||||
OnigSyntaxType* syntax;
|
||||
const OnigSyntaxType* syntax;
|
||||
OnigOptionType option;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
|
114
regcomp.c
114
regcomp.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -30,6 +30,15 @@
|
|||
|
||||
#include "regparse.h"
|
||||
|
||||
#if defined(USE_MULTI_THREAD_SYSTEM) \
|
||||
&& defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
|
||||
#ifdef _WIN32
|
||||
CRITICAL_SECTION gOnigMutex;
|
||||
#else
|
||||
pthread_mutex_t gOnigMutex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
||||
|
||||
extern OnigCaseFoldType
|
||||
|
@ -2669,22 +2678,22 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
break;
|
||||
|
||||
case NT_CCLASS:
|
||||
{
|
||||
CClassNode* cc = NCCLASS(y);
|
||||
{
|
||||
CClassNode* cc = NCCLASS(y);
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
||||
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
||||
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
||||
}
|
||||
break;
|
||||
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
||||
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
||||
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case NT_STR:
|
||||
{
|
||||
UChar *q;
|
||||
StrNode* ys = NSTR(y);
|
||||
len = NSTRING_LEN(x);
|
||||
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
||||
if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
||||
{
|
||||
UChar *q;
|
||||
StrNode* ys = NSTR(y);
|
||||
len = NSTRING_LEN(x);
|
||||
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
||||
if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
||||
/* tiny version */
|
||||
return 0;
|
||||
}
|
||||
|
@ -2697,7 +2706,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -3442,7 +3451,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
|||
break;
|
||||
}
|
||||
if (items[i].code_len != 1) {
|
||||
varclen = 1;
|
||||
varclen |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3493,29 +3502,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
|||
UChar *q = p + items[i].byte_len;
|
||||
|
||||
if (q < end) {
|
||||
r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
||||
if (r != 0) {
|
||||
onig_node_free(an);
|
||||
goto mem_err2;
|
||||
}
|
||||
r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
||||
if (r != 0) {
|
||||
onig_node_free(an);
|
||||
goto mem_err2;
|
||||
}
|
||||
|
||||
xnode = onig_node_list_add(NULL_NODE, snode);
|
||||
if (IS_NULL(xnode)) {
|
||||
onig_node_free(an);
|
||||
onig_node_free(rem);
|
||||
goto mem_err2;
|
||||
}
|
||||
if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
||||
onig_node_free(an);
|
||||
onig_node_free(xnode);
|
||||
onig_node_free(rem);
|
||||
goto mem_err;
|
||||
}
|
||||
xnode = onig_node_list_add(NULL_NODE, snode);
|
||||
if (IS_NULL(xnode)) {
|
||||
onig_node_free(an);
|
||||
onig_node_free(rem);
|
||||
goto mem_err2;
|
||||
}
|
||||
if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
||||
onig_node_free(an);
|
||||
onig_node_free(xnode);
|
||||
onig_node_free(rem);
|
||||
goto mem_err;
|
||||
}
|
||||
|
||||
NCAR(an) = xnode;
|
||||
NCAR(an) = xnode;
|
||||
}
|
||||
else {
|
||||
NCAR(an) = snode;
|
||||
NCAR(an) = snode;
|
||||
}
|
||||
|
||||
NCDR(var_anode) = an;
|
||||
|
@ -3598,7 +3607,10 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
}
|
||||
else {
|
||||
alt_num *= (n + 1);
|
||||
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
||||
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
|
||||
varlen = 1; /* Assume that expanded strings are variable length. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
||||
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
||||
|
@ -5448,14 +5460,14 @@ print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
|||
if (a == ONIG_INFINITE_DISTANCE)
|
||||
fputs("inf", f);
|
||||
else
|
||||
fprintf(f, "(%"PRIuSIZE")", a);
|
||||
fprintf(f, "(%"PRIuPTR")", a);
|
||||
|
||||
fputs("-", f);
|
||||
|
||||
if (b == ONIG_INFINITE_DISTANCE)
|
||||
fputs("inf", f);
|
||||
else
|
||||
fprintf(f, "(%"PRIuSIZE")", b);
|
||||
fprintf(f, "(%"PRIuPTR")", b);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -5532,7 +5544,7 @@ print_optimize_info(FILE* f, regex_t* reg)
|
|||
for (p = reg->exact; p < reg->exact_end; p++) {
|
||||
fputc(*p, f);
|
||||
}
|
||||
fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
|
||||
fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
|
||||
}
|
||||
else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
|
||||
int c, i, n = 0;
|
||||
|
@ -6394,7 +6406,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
|
|||
|
||||
GET_POINTER_INC(cc, bp);
|
||||
n = bitset_on_num(cc->bs);
|
||||
fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
|
||||
fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -6530,9 +6542,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
case NT_LIST:
|
||||
case NT_ALT:
|
||||
if (NTYPE(node) == NT_LIST)
|
||||
fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
|
||||
fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
|
||||
else
|
||||
fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
|
||||
fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
|
||||
|
||||
print_indent_tree(f, NCAR(node), indent + add);
|
||||
while (IS_NOT_NULL(node = NCDR(node))) {
|
||||
|
@ -6546,7 +6558,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
|
||||
case NT_STR:
|
||||
fprintf(f, "<string%s:%"PRIxPTR">",
|
||||
(NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
|
||||
(NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
|
||||
for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
|
||||
if (*p >= 0x20 && *p < 0x7f)
|
||||
fputc(*p, f);
|
||||
|
@ -6557,7 +6569,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
break;
|
||||
|
||||
case NT_CCLASS:
|
||||
fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
|
||||
fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
|
||||
if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
|
||||
if (NCCLASS(node)->mbuf) {
|
||||
BBuf* bbuf = NCCLASS(node)->mbuf;
|
||||
|
@ -6569,7 +6581,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
break;
|
||||
|
||||
case NT_CTYPE:
|
||||
fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
|
||||
fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
|
||||
switch (NCTYPE(node)->ctype) {
|
||||
case ONIGENC_CTYPE_WORD:
|
||||
if (NCTYPE(node)->not != 0)
|
||||
|
@ -6585,11 +6597,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
break;
|
||||
|
||||
case NT_CANY:
|
||||
fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
|
||||
fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
|
||||
break;
|
||||
|
||||
case NT_ANCHOR:
|
||||
fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
|
||||
fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
|
||||
switch (NANCHOR(node)->type) {
|
||||
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
||||
case ANCHOR_END_BUF: fputs("end buf", f); break;
|
||||
|
@ -6622,7 +6634,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
int* p;
|
||||
BRefNode* br = NBREF(node);
|
||||
p = BACKREFS_P(br);
|
||||
fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
|
||||
fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
|
||||
for (i = 0; i < br->back_num; i++) {
|
||||
if (i > 0) fputs(", ", f);
|
||||
fprintf(f, "%d", p[i]);
|
||||
|
@ -6634,21 +6646,21 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
case NT_CALL:
|
||||
{
|
||||
CallNode* cn = NCALL(node);
|
||||
fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
|
||||
fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
|
||||
p_string(f, cn->name_end - cn->name, cn->name);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
case NT_QTFR:
|
||||
fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
|
||||
fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
|
||||
NQTFR(node)->lower, NQTFR(node)->upper,
|
||||
(NQTFR(node)->greedy ? "" : "?"));
|
||||
print_indent_tree(f, NQTFR(node)->target, indent + add);
|
||||
break;
|
||||
|
||||
case NT_ENCLOSE:
|
||||
fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
|
||||
fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
|
||||
switch (NENCLOSE(node)->type) {
|
||||
case ENCLOSE_OPTION:
|
||||
fprintf(f, "option:%d", NENCLOSE(node)->option);
|
||||
|
|
1
regenc.h
1
regenc.h
|
@ -108,6 +108,7 @@ typedef struct {
|
|||
|
||||
#define USE_CRNL_AS_LINE_TERMINATOR
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
#define USE_UNICODE_AGE_PROPERTIES
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
|
24
regerror.c
24
regerror.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -53,8 +53,6 @@ onig_error_code_to_format(OnigPosition code)
|
|||
p = "no support in this configuration"; break;
|
||||
case ONIGERR_MEMORY:
|
||||
p = "failed to allocate memory"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_TYPE_BUG:
|
||||
p = "undefined type (bug)"; break;
|
||||
case ONIGERR_PARSER_BUG:
|
||||
|
@ -65,6 +63,8 @@ onig_error_code_to_format(OnigPosition code)
|
|||
p = "undefined bytecode (bug)"; break;
|
||||
case ONIGERR_UNEXPECTED_BYTECODE:
|
||||
p = "unexpected bytecode (bug)"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
|
||||
p = "default multibyte-encoding is not set"; break;
|
||||
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
|
||||
|
@ -141,14 +141,10 @@ onig_error_code_to_format(OnigPosition code)
|
|||
#endif
|
||||
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
|
||||
p = "numbered backref/call is not allowed. (use name)"; break;
|
||||
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||
p = "too big wide-char value"; break;
|
||||
case ONIGERR_TOO_SHORT_DIGITS:
|
||||
p = "too short digits"; break;
|
||||
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
|
||||
p = "too long wide-char value"; break;
|
||||
case ONIGERR_INVALID_CODE_POINT_VALUE:
|
||||
p = "invalid code point value"; break;
|
||||
case ONIGERR_EMPTY_GROUP_NAME:
|
||||
p = "group name is empty"; break;
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
|
@ -173,6 +169,12 @@ onig_error_code_to_format(OnigPosition code)
|
|||
p = "group number is too big for capture history"; break;
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
p = "invalid character property name {%n}"; break;
|
||||
case ONIGERR_TOO_MANY_CAPTURE_GROUPS:
|
||||
p = "too many capture groups are specified"; break;
|
||||
case ONIGERR_INVALID_CODE_POINT_VALUE:
|
||||
p = "invalid code point value"; break;
|
||||
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||
p = "too big wide-char value"; break;
|
||||
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
|
||||
p = "not supported encoding combination"; break;
|
||||
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
|
||||
|
@ -307,8 +309,12 @@ onig_error_code_to_str(s, code, va_alist)
|
|||
|
||||
default:
|
||||
q = onig_error_code_to_format(code);
|
||||
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
|
||||
xmemcpy(s, q, len);
|
||||
if (q) {
|
||||
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
|
||||
xmemcpy(s, q, len);
|
||||
} else {
|
||||
len = 0;
|
||||
}
|
||||
s[len] = '\0';
|
||||
break;
|
||||
}
|
||||
|
|
81
regexec.c
81
regexec.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -444,9 +444,26 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
|
|||
|
||||
|
||||
|
||||
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
|
||||
if (msa->stack_p) {\
|
||||
#define MAX_PTR_NUM 100
|
||||
|
||||
#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
|
||||
if (ptr_num > MAX_PTR_NUM) {\
|
||||
alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
|
||||
heap_addr = alloc_addr;\
|
||||
if (msa->stack_p) {\
|
||||
stk_alloc = (OnigStackType* )(msa->stack_p);\
|
||||
stk_base = stk_alloc;\
|
||||
stk = stk_base;\
|
||||
stk_end = stk_base + msa->stack_n;\
|
||||
} else {\
|
||||
stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
|
||||
stk_base = stk_alloc;\
|
||||
stk = stk_base;\
|
||||
stk_end = stk_base + (stack_num);\
|
||||
}\
|
||||
} else if (msa->stack_p) {\
|
||||
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
|
||||
heap_addr = NULL;\
|
||||
stk_alloc = (OnigStackType* )(msa->stack_p);\
|
||||
stk_base = stk_alloc;\
|
||||
stk = stk_base;\
|
||||
|
@ -455,6 +472,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
|
|||
else {\
|
||||
alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
|
||||
+ sizeof(OnigStackType) * (stack_num));\
|
||||
heap_addr = NULL;\
|
||||
stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
|
||||
stk_base = stk_alloc;\
|
||||
stk = stk_base;\
|
||||
|
@ -529,7 +547,11 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
|
|||
#define STACK_ENSURE(n) do {\
|
||||
if (stk_end - stk < (n)) {\
|
||||
int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
|
||||
if (r != 0) { STACK_SAVE; return r; } \
|
||||
if (r != 0) {\
|
||||
STACK_SAVE;\
|
||||
if (xmalloc_base) xfree(xmalloc_base);\
|
||||
return r;\
|
||||
}\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
|
@ -1299,9 +1321,6 @@ typedef struct {
|
|||
regoff_t rm_eo;
|
||||
} posix_regmatch_t;
|
||||
|
||||
void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
|
||||
OnigEncoding enc);
|
||||
|
||||
/* match data(str - end) from position (sstart). */
|
||||
/* if sstart == str then set sprev to NULL. */
|
||||
static OnigPosition
|
||||
|
@ -1325,6 +1344,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
UChar *p = reg->p;
|
||||
UChar *pkeep;
|
||||
char *alloca_base;
|
||||
char *xmalloc_base = NULL;
|
||||
OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
|
||||
OnigStackType *stkp; /* used as any purpose. */
|
||||
OnigStackIndex si;
|
||||
|
@ -1340,7 +1360,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, etc. */
|
||||
n = reg->num_repeat + (reg->num_mem + 1) * 2;
|
||||
|
||||
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
|
||||
STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
|
||||
pop_level = reg->stack_pop_level;
|
||||
num_mem = reg->num_mem;
|
||||
repeat_stk = (OnigStackIndex* )alloca_base;
|
||||
|
@ -1354,7 +1374,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
/* Stack #0 not is used. */
|
||||
n = reg->num_repeat + reg->num_mem * 2;
|
||||
|
||||
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
|
||||
STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
|
||||
pop_level = reg->stack_pop_level;
|
||||
num_mem = reg->num_mem;
|
||||
repeat_stk = (OnigStackIndex* )alloca_base;
|
||||
|
@ -1372,7 +1392,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
#ifdef ONIG_DEBUG_MATCH
|
||||
fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
|
||||
(intptr_t)str, str, (intptr_t)end, end, (intptr_t)sstart, sstart, (intptr_t)sprev, sprev);
|
||||
(intptr_t )str, str, (intptr_t )end, end, (intptr_t )sstart, sstart, (intptr_t )sprev, sprev);
|
||||
fprintf(stderr, "size: %d, start offset: %d\n",
|
||||
(int )(end - str), (int )(sstart - str));
|
||||
#endif
|
||||
|
@ -1386,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
if (s) {
|
||||
UChar *q, *bp, buf[50];
|
||||
int len;
|
||||
fprintf(stderr, "%4d> \"", (int )(s - str));
|
||||
fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str));
|
||||
bp = buf;
|
||||
if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */
|
||||
for (i = 0, q = s; i < 7 && q < end; i++) {
|
||||
|
@ -2150,6 +2170,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
|
||||
if (! ON_STR_BEGIN(s)) goto fail;
|
||||
if (IS_NOTBOS(msa->options)) goto fail;
|
||||
|
||||
MOP_OUT;
|
||||
continue;
|
||||
|
@ -2157,6 +2178,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
case OP_END_BUF: MOP_IN(OP_END_BUF);
|
||||
if (! ON_STR_END(s)) goto fail;
|
||||
if (IS_NOTEOS(msa->options)) goto fail;
|
||||
|
||||
MOP_OUT;
|
||||
continue;
|
||||
|
@ -2916,20 +2938,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
finish:
|
||||
STACK_SAVE;
|
||||
if (xmalloc_base) xfree(xmalloc_base);
|
||||
return best_len;
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
stack_error:
|
||||
STACK_SAVE;
|
||||
if (xmalloc_base) xfree(xmalloc_base);
|
||||
return ONIGERR_STACK_BUG;
|
||||
#endif
|
||||
|
||||
bytecode_error:
|
||||
STACK_SAVE;
|
||||
if (xmalloc_base) xfree(xmalloc_base);
|
||||
return ONIGERR_UNDEFINED_BYTECODE;
|
||||
|
||||
unexpected_bytecode_error:
|
||||
STACK_SAVE;
|
||||
if (xmalloc_base) xfree(xmalloc_base);
|
||||
return ONIGERR_UNEXPECTED_BYTECODE;
|
||||
}
|
||||
|
||||
|
@ -3159,8 +3185,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
|
|||
p = s;
|
||||
t = tail;
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "bm_search_loop: pos: %d %s\n",
|
||||
(int)(s - text), s);
|
||||
fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
|
||||
(intptr_t )(s - text), s);
|
||||
#endif
|
||||
while (*p == *t) {
|
||||
if (t == target) return (UChar* )p;
|
||||
|
@ -3294,8 +3320,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
|
|||
OnigEncoding enc = reg->enc;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "bm_search_notrev: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
|
||||
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
|
||||
fprintf(stderr, "bm_search_notrev: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
|
||||
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
|
||||
#endif
|
||||
|
||||
tail = target_end - 1;
|
||||
|
@ -3398,8 +3424,8 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
|
|||
int case_fold_flag = reg->case_fold_flag;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
|
||||
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
|
||||
fprintf(stderr, "bm_search_notrev_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
|
||||
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
|
||||
#endif
|
||||
|
||||
tail = target_end - 1;
|
||||
|
@ -3454,8 +3480,8 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
|
|||
int case_fold_flag = reg->case_fold_flag;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
|
||||
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
|
||||
fprintf(stderr, "bm_search_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
|
||||
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
|
||||
#endif
|
||||
|
||||
tail = target_end - 1;
|
||||
|
@ -3641,7 +3667,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
|||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
|
||||
str, str, end, end, s, s, range, range);
|
||||
(intptr_t )str, str, (intptr_t )end, end, (intptr_t )s, s, (intptr_t )range, range);
|
||||
#endif
|
||||
|
||||
p = s;
|
||||
|
@ -3754,8 +3780,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
|||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
"forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
|
||||
(int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
|
||||
"forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
|
||||
*low - str, *high - str, reg->dmin, reg->dmax);
|
||||
#endif
|
||||
return 1; /* success */
|
||||
}
|
||||
|
@ -3919,7 +3945,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
"onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
|
||||
str, str, end - str, start - str, range - str);
|
||||
(intptr_t )str, str, end - str, start - str, range - str);
|
||||
#endif
|
||||
|
||||
if (region
|
||||
|
@ -4181,11 +4207,6 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
}
|
||||
}
|
||||
else { /* backward search */
|
||||
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
|
||||
if (orig_start < end)
|
||||
orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
|
||||
#endif
|
||||
|
||||
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
|
||||
UChar *low, *high, *adjrange, *sch_start;
|
||||
|
||||
|
@ -4268,7 +4289,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
|
||||
#ifdef ONIG_DEBUG
|
||||
if (r != ONIG_MISMATCH)
|
||||
fprintf(stderr, "onig_search: error %d\n", r);
|
||||
fprintf(stderr, "onig_search: error %"PRIdPTR"\n", r);
|
||||
#endif
|
||||
return r;
|
||||
|
||||
|
@ -4278,7 +4299,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
ONIG_STATE_DEC_THREAD(reg);
|
||||
#ifdef ONIG_DEBUG
|
||||
if (r != ONIG_MISMATCH)
|
||||
fprintf(stderr, "onig_search: error %d\n", r);
|
||||
fprintf(stderr, "onig_search: error %"PRIdPTR"\n", r);
|
||||
#endif
|
||||
return r;
|
||||
|
||||
|
|
76
regint.h
76
regint.h
|
@ -5,7 +5,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -92,8 +92,6 @@
|
|||
# define ARG_UNUSED
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* escape other system UChar definition */
|
||||
#ifndef RUBY_DEFINES_H
|
||||
#include "ruby/ruby.h"
|
||||
#undef xmalloc
|
||||
|
@ -101,23 +99,67 @@
|
|||
#undef xcalloc
|
||||
#undef xfree
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* escape other system UChar definition */
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE
|
||||
#undef USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION /* needed for POSIX API support */
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
|
||||
|
||||
/* multithread config */
|
||||
/* #define USE_MULTI_THREAD_SYSTEM */
|
||||
/* #define USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#if defined(USE_MULTI_THREAD_SYSTEM) \
|
||||
&& defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
extern CRITICAL_SECTION gOnigMutex;
|
||||
#define THREAD_SYSTEM_INIT InitializeCriticalSection(&gOnigMutex)
|
||||
#define THREAD_SYSTEM_END DeleteCriticalSection(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_START EnterCriticalSection(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_END LeaveCriticalSection(&gOnigMutex)
|
||||
#define THREAD_PASS Sleep(0)
|
||||
#else /* _WIN32 */
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
extern pthread_mutex_t gOnigMutex;
|
||||
#define THREAD_SYSTEM_INIT pthread_mutex_init(&gOnigMutex, NULL)
|
||||
#define THREAD_SYSTEM_END pthread_mutex_destroy(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_START pthread_mutex_lock(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_END pthread_mutex_unlock(&gOnigMutex)
|
||||
#define THREAD_PASS sched_yield()
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#else /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifndef THREAD_SYSTEM_INIT
|
||||
#define THREAD_SYSTEM_INIT /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_SYSTEM_END
|
||||
#define THREAD_SYSTEM_END /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_ATOMIC_START
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_ATOMIC_END
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_PASS
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#endif
|
||||
|
||||
#endif /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifndef xmalloc
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
|
@ -235,12 +277,16 @@
|
|||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INTTYPES_H
|
||||
# include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
#ifdef __BORLANDC__
|
||||
#include <malloc.h>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h> /* for alloca() */
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
@ -260,6 +306,18 @@ typedef unsigned int uintptr_t;
|
|||
#endif
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#ifndef PRIdPTR
|
||||
#ifdef _WIN64
|
||||
#define PRIdPTR "I64d"
|
||||
#define PRIuPTR "I64u"
|
||||
#define PRIxPTR "I64x"
|
||||
#else
|
||||
#define PRIdPTR "ld"
|
||||
#define PRIuPTR "lu"
|
||||
#define PRIxPTR "lx"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
@ -366,6 +424,8 @@ typedef unsigned int BitStatusType;
|
|||
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
|
||||
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
|
||||
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define IS_NOTBOS(option) ((option) & ONIG_OPTION_NOTBOS)
|
||||
#define IS_NOTEOS(option) ((option) & ONIG_OPTION_NOTEOS)
|
||||
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
|
||||
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
|
||||
|
@ -852,7 +912,9 @@ typedef struct {
|
|||
|
||||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
/* extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); */
|
||||
#ifdef ONIG_DEBUG
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
|
|
188
regparse.c
188
regparse.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -281,6 +281,14 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
|
|||
p += enclen(enc, p, end); \
|
||||
} while (0)
|
||||
|
||||
#define PINC_S do { \
|
||||
p += enclen(enc, p, end); \
|
||||
} while (0)
|
||||
#define PFETCH_S(c) do { \
|
||||
c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
|
||||
p += enclen(enc, p, end); \
|
||||
} while (0)
|
||||
|
||||
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
|
||||
#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
|
||||
|
||||
|
@ -978,6 +986,8 @@ scan_env_add_mem_entry(ScanEnv* env)
|
|||
Node** p;
|
||||
|
||||
need = env->num_mem + 1;
|
||||
if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
|
||||
return ONIGERR_TOO_MANY_CAPTURE_GROUPS;
|
||||
if (need >= SCANENV_MEMNODES_SIZE) {
|
||||
if (env->mem_alloc <= need) {
|
||||
if (IS_NULL(env->mem_nodes_dynamic)) {
|
||||
|
@ -1143,6 +1153,25 @@ node_new(void)
|
|||
return node;
|
||||
}
|
||||
|
||||
#if defined(USE_MULTI_THREAD_SYSTEM) && \
|
||||
defined(USE_SHARED_CCLASS_TABLE) && \
|
||||
defined(USE_PARSE_TREE_NODE_RECYCLE)
|
||||
static Node*
|
||||
node_new_locked(void)
|
||||
{
|
||||
Node* node;
|
||||
|
||||
if (IS_NOT_NULL(FreeNodeList)) {
|
||||
node = (Node* )FreeNodeList;
|
||||
FreeNodeList = FreeNodeList->next;
|
||||
return node;
|
||||
}
|
||||
|
||||
node = (Node* )xmalloc(sizeof(Node));
|
||||
/* xmemset(node, 0, sizeof(Node)); */
|
||||
return node;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
initialize_cclass(CClassNode* cc)
|
||||
|
@ -1164,6 +1193,24 @@ node_new_cclass(void)
|
|||
return node;
|
||||
}
|
||||
|
||||
#if defined(USE_MULTI_THREAD_SYSTEM) && \
|
||||
defined(USE_SHARED_CCLASS_TABLE) && \
|
||||
defined(USE_PARSE_TREE_NODE_RECYCLE)
|
||||
static Node*
|
||||
node_new_cclass_locked(void)
|
||||
{
|
||||
Node* node = node_new_locked();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
SET_NTYPE(node, NT_CCLASS);
|
||||
initialize_cclass(NCCLASS(node));
|
||||
return node;
|
||||
}
|
||||
#else
|
||||
#define node_new_cclass_locked() node_new_cclass()
|
||||
#endif
|
||||
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
static Node*
|
||||
node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
|
||||
const OnigCodePoint ranges[])
|
||||
|
@ -1172,7 +1219,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
|
|||
CClassNode* cc;
|
||||
OnigCodePoint j;
|
||||
|
||||
Node* node = node_new_cclass();
|
||||
Node* node = node_new_cclass_locked();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
cc = NCCLASS(node);
|
||||
|
@ -1213,6 +1260,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
|
|||
|
||||
return node;
|
||||
}
|
||||
#endif /* USE_SHARED_CCLASS_TABLE */
|
||||
|
||||
static Node*
|
||||
node_new_ctype(int type, int not, int ascii_range)
|
||||
|
@ -2486,22 +2534,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
OnigCodePoint c;
|
||||
OnigEncoding enc = env->enc;
|
||||
UChar* p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
|
||||
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
switch (c) {
|
||||
case 'M':
|
||||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_META;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c != '-') return ONIGERR_META_CODE_SYNTAX;
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_META;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == MC_ESC(env->syntax)) {
|
||||
v = fetch_escaped_value(&p, end, env);
|
||||
if (v < 0) return v;
|
||||
v = fetch_escaped_value(&p, end, env);
|
||||
if (v < 0) return v;
|
||||
c = (OnigCodePoint )v;
|
||||
}
|
||||
c = ((c & 0xff) | 0x80);
|
||||
|
@ -2513,7 +2560,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
case 'C':
|
||||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
|
||||
goto control;
|
||||
}
|
||||
|
@ -2524,9 +2571,9 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
|
||||
control:
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == '?') {
|
||||
c = 0177;
|
||||
c = 0177;
|
||||
}
|
||||
else {
|
||||
if (c == MC_ESC(env->syntax)) {
|
||||
|
@ -2534,7 +2581,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
if (v < 0) return v;
|
||||
c = (OnigCodePoint )v;
|
||||
}
|
||||
c &= 0x9f;
|
||||
c &= 0x9f;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2630,11 +2677,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
|
||||
if (is_num != 0) {
|
||||
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
|
||||
is_num = 1;
|
||||
is_num = 1;
|
||||
}
|
||||
else {
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
is_num = 0;
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
is_num = 0;
|
||||
}
|
||||
}
|
||||
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
|
||||
|
@ -2701,7 +2748,6 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
UChar *name_end;
|
||||
UChar *pnum_head;
|
||||
UChar *p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
*rback_num = 0;
|
||||
|
||||
|
@ -2716,23 +2762,23 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
return ONIGERR_EMPTY_GROUP_NAME;
|
||||
}
|
||||
else {
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == end_code)
|
||||
return ONIGERR_EMPTY_GROUP_NAME;
|
||||
|
||||
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
|
||||
if (ref == 1)
|
||||
is_num = 1;
|
||||
is_num = 1;
|
||||
else {
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
is_num = 0;
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
is_num = 0;
|
||||
}
|
||||
}
|
||||
else if (c == '-') {
|
||||
if (ref == 1) {
|
||||
is_num = 2;
|
||||
sign = -1;
|
||||
pnum_head = p;
|
||||
is_num = 2;
|
||||
sign = -1;
|
||||
pnum_head = p;
|
||||
}
|
||||
else {
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
|
@ -2747,29 +2793,28 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
if (r == 0) {
|
||||
while (!PEND) {
|
||||
name_end = p;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == end_code || c == ')') {
|
||||
if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_num != 0) {
|
||||
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
|
||||
is_num = 1;
|
||||
}
|
||||
else {
|
||||
if (!ONIGENC_IS_CODE_WORD(enc, c))
|
||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||
else
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
|
||||
is_num = 0;
|
||||
}
|
||||
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
|
||||
is_num = 1;
|
||||
}
|
||||
else {
|
||||
if (!ONIGENC_IS_CODE_WORD(enc, c))
|
||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||
else
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
is_num = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
|
||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||
}
|
||||
if (!ONIGENC_IS_CODE_WORD(enc, c)) {
|
||||
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2782,8 +2827,8 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
|
||||
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
|
||||
else if (*rback_num == 0) {
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
goto err;
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
goto err;
|
||||
}
|
||||
|
||||
*rback_num *= sign;
|
||||
|
@ -2796,9 +2841,9 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
|||
else {
|
||||
while (!PEND) {
|
||||
name_end = p;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == end_code || c == ')')
|
||||
break;
|
||||
break;
|
||||
}
|
||||
if (PEND)
|
||||
name_end = end;
|
||||
|
@ -4263,10 +4308,9 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
OnigCodePoint c;
|
||||
OnigEncoding enc = env->enc;
|
||||
UChar *p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
if (PPEEK_IS('^')) {
|
||||
PINC;
|
||||
PINC_S;
|
||||
not = 1;
|
||||
}
|
||||
else
|
||||
|
@ -4279,14 +4323,14 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
|
||||
p = (UChar* )onigenc_step(enc, p, end, pb->len);
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
|
||||
r = add_ctype_to_cc(cc, pb->ctype, not,
|
||||
IS_POSIX_BRACKET_ALL_RANGE(env->option),
|
||||
env);
|
||||
if (r != 0) return r;
|
||||
|
||||
PINC; PINC;
|
||||
PINC_S; PINC_S;
|
||||
*src = p;
|
||||
return 0;
|
||||
}
|
||||
|
@ -4296,15 +4340,15 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
c = 0;
|
||||
i = 0;
|
||||
while (!PEND && ((c = PPEEK) != ':') && c != ']') {
|
||||
PINC;
|
||||
PINC_S;
|
||||
if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
|
||||
}
|
||||
if (c == ':' && ! PEND) {
|
||||
PINC;
|
||||
PINC_S;
|
||||
if (! PEND) {
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == ']')
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4318,14 +4362,13 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
|
|||
OnigCodePoint c;
|
||||
OnigEncoding enc = env->enc;
|
||||
UChar *prev, *start, *p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
r = 0;
|
||||
start = prev = p;
|
||||
|
||||
while (!PEND) {
|
||||
prev = p;
|
||||
PFETCH(c);
|
||||
PFETCH_S(c);
|
||||
if (c == '}') {
|
||||
r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
|
||||
if (r < 0) break;
|
||||
|
@ -4483,7 +4526,6 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
|
|||
OnigCodePoint code;
|
||||
OnigEncoding enc = env->enc;
|
||||
UChar* p = from;
|
||||
PFETCH_READY;
|
||||
|
||||
in_esc = 0;
|
||||
while (! PEND) {
|
||||
|
@ -4491,7 +4533,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
|
|||
in_esc = 0;
|
||||
}
|
||||
else {
|
||||
PFETCH(code);
|
||||
PFETCH_S(code);
|
||||
if (code == c) return 1;
|
||||
if (code == MC_ESC(env->syntax)) in_esc = 1;
|
||||
}
|
||||
|
@ -4960,14 +5002,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
|
|||
*np = node_new_enclose_memory(env->option, 0);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
num = scan_env_add_mem_entry(env);
|
||||
if (num < 0) {
|
||||
onig_node_free(*np);
|
||||
return num;
|
||||
}
|
||||
else if (num >= (int )BIT_STATUS_BITS_NUM) {
|
||||
onig_node_free(*np);
|
||||
if (num < 0) return num;
|
||||
if (num >= (int )BIT_STATUS_BITS_NUM)
|
||||
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
|
||||
}
|
||||
|
||||
NENCLOSE(*np)->regnum = num;
|
||||
BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
|
||||
}
|
||||
|
@ -4985,11 +5023,14 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
|
|||
PUNFETCH;
|
||||
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
|
||||
if (r < 0) return r;
|
||||
#if 0
|
||||
/* Relative number is not currently supported. (same as Perl) */
|
||||
if (num < 0) {
|
||||
num = BACKREF_REL_TO_ABS(num, env);
|
||||
if (num <= 0)
|
||||
return ONIGERR_INVALID_BACKREF;
|
||||
}
|
||||
#endif
|
||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
|
||||
if (num > env->num_mem ||
|
||||
IS_NULL(SCANENV_MEM_NODES(env)[num]))
|
||||
|
@ -5291,30 +5332,23 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
|
||||
switch (ReduceTypeTable[targetq_num][nestq_num]) {
|
||||
case RQ_ASIS:
|
||||
break;
|
||||
|
||||
case RQ_DEL:
|
||||
if (onig_verb_warn != onig_null_warn) {
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||
env->pattern, env->pattern_end,
|
||||
(UChar* )"redundant nested repeat operator");
|
||||
(*onig_verb_warn)((char* )buf);
|
||||
if (onig_warn != onig_null_warn) {
|
||||
onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
|
||||
PopularQStr[targetq_num]);
|
||||
}
|
||||
goto warn_exit;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (onig_verb_warn != onig_null_warn) {
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||
env->pattern, env->pattern_end,
|
||||
(UChar* )"nested repeat operator %s and %s was replaced with '%s'",
|
||||
PopularQStr[targetq_num], PopularQStr[nestq_num],
|
||||
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
|
||||
(*onig_verb_warn)((char* )buf);
|
||||
if (onig_warn != onig_null_warn) {
|
||||
onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
|
||||
PopularQStr[targetq_num], PopularQStr[nestq_num],
|
||||
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
|
||||
}
|
||||
goto warn_exit;
|
||||
break;
|
||||
|
@ -5413,13 +5447,13 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
|
|||
extern int
|
||||
onig_free_shared_cclass_table(void)
|
||||
{
|
||||
THREAD_ATOMIC_START;
|
||||
/* THREAD_ATOMIC_START; */
|
||||
if (IS_NOT_NULL(OnigTypeCClassTable)) {
|
||||
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
|
||||
onig_st_free_table(OnigTypeCClassTable);
|
||||
OnigTypeCClassTable = NULL;
|
||||
}
|
||||
THREAD_ATOMIC_END;
|
||||
/* THREAD_ATOMIC_END; */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -294,7 +294,9 @@ props.each do |name|
|
|||
end
|
||||
make_const(name, data[name], category)
|
||||
end
|
||||
print "\n#ifdef USE_UNICODE_AGE_PROPERTIES"
|
||||
ages = parse_age(data)
|
||||
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
|
||||
blocks = parse_block(data)
|
||||
puts '#endif /* USE_UNICODE_PROPERTIES */'
|
||||
puts(<<'__HEREDOC')
|
||||
|
@ -304,7 +306,9 @@ __HEREDOC
|
|||
POSIX_NAMES.each{|name|puts" CR_#{name},"}
|
||||
puts "#ifdef USE_UNICODE_PROPERTIES"
|
||||
props.each{|name| puts" CR_#{name},"}
|
||||
puts "#ifdef USE_UNICODE_AGE_PROPERTIES"
|
||||
ages.each{|name| puts" CR_#{constantize_agename(name)},"}
|
||||
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
|
||||
blocks.each{|name|puts" CR_#{name},"}
|
||||
|
||||
puts(<<'__HEREDOC')
|
||||
|
@ -340,12 +344,14 @@ aliases.each_pair do |k, v|
|
|||
next unless v = name_to_index[v]
|
||||
puts "%-40s %3d" % [k + ',', v]
|
||||
end
|
||||
puts "#ifdef USE_UNICODE_AGE_PROPERTIES"
|
||||
ages.each do |name|
|
||||
i += 1
|
||||
name = "age=#{name}"
|
||||
name_to_index[name] = i
|
||||
puts "%-40s %3d" % [name + ',', i]
|
||||
end
|
||||
puts "#endif /* USE_UNICODE_AGE_PROPERTIES */"
|
||||
blocks.each do |name|
|
||||
i += 1
|
||||
name = normalize_propname(name)
|
||||
|
|
Loading…
Reference in a new issue