mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* oniguruma.h: merge Oniguruma 4.0.0 [ruby-dev:28290]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9885 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
f0432871fd
commit
086e235f03
13 changed files with 477 additions and 373 deletions
161
regparse.c
161
regparse.c
|
@ -58,7 +58,21 @@ OnigSyntaxType OnigSyntaxRuby = {
|
|||
|
||||
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
|
||||
|
||||
extern void onig_null_warn(const char* s, ...) { }
|
||||
extern void onig_null_warn(const char* s) { }
|
||||
|
||||
#ifdef RUBY_PLATFORM
|
||||
extern void
|
||||
onig_rb_warn(const char* s)
|
||||
{
|
||||
rb_warn(s);
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_rb_warning(const char* s)
|
||||
{
|
||||
rb_warning(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEFAULT_WARN_FUNCTION
|
||||
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
|
||||
|
@ -1050,12 +1064,12 @@ onig_node_free(Node* node)
|
|||
|
||||
#ifdef USE_RECYCLE_NODE
|
||||
extern int
|
||||
onig_free_node_list(void)
|
||||
onig_free_node_list()
|
||||
{
|
||||
FreeNode* n;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
while (FreeNodeList) {
|
||||
while (IS_NOT_NULL(FreeNodeList)) {
|
||||
n = FreeNodeList;
|
||||
FreeNodeList = FreeNodeList->next;
|
||||
xfree(n);
|
||||
|
@ -1066,18 +1080,19 @@ onig_free_node_list(void)
|
|||
#endif
|
||||
|
||||
static Node*
|
||||
node_new(void)
|
||||
node_new()
|
||||
{
|
||||
Node* node;
|
||||
|
||||
#ifdef USE_RECYCLE_NODE
|
||||
THREAD_ATOMIC_START;
|
||||
if (IS_NOT_NULL(FreeNodeList)) {
|
||||
THREAD_ATOMIC_START;
|
||||
node = (Node* )FreeNodeList;
|
||||
FreeNodeList = FreeNodeList->next;
|
||||
THREAD_ATOMIC_END;
|
||||
return node;
|
||||
}
|
||||
THREAD_ATOMIC_END;
|
||||
#endif
|
||||
|
||||
node = (Node* )xmalloc(sizeof(Node));
|
||||
|
@ -1094,7 +1109,7 @@ initialize_cclass(CClassNode* cc)
|
|||
}
|
||||
|
||||
static Node*
|
||||
node_new_cclass(void)
|
||||
node_new_cclass()
|
||||
{
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
@ -1106,7 +1121,7 @@ node_new_cclass(void)
|
|||
|
||||
static Node*
|
||||
node_new_cclass_by_codepoint_range(int not,
|
||||
OnigCodePoint sbr[], OnigCodePoint mbr[])
|
||||
const OnigCodePoint sbr[], const OnigCodePoint mbr[])
|
||||
{
|
||||
CClassNode* cc;
|
||||
int n, i, j;
|
||||
|
@ -1163,7 +1178,7 @@ node_new_ctype(int type)
|
|||
}
|
||||
|
||||
static Node*
|
||||
node_new_anychar(void)
|
||||
node_new_anychar()
|
||||
{
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
@ -1434,7 +1449,7 @@ node_new_str_raw(UChar* s, UChar* end)
|
|||
}
|
||||
|
||||
static Node*
|
||||
node_new_empty(void)
|
||||
node_new_empty()
|
||||
{
|
||||
return node_new_str(NULL, NULL);
|
||||
}
|
||||
|
@ -2358,15 +2373,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
control:
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
|
||||
PFETCH(c);
|
||||
if (c == MC_ESC(enc)) {
|
||||
v = fetch_escaped_value(&p, end, env);
|
||||
if (v < 0) return v;
|
||||
c = (OnigCodePoint )(v & 0x9f);
|
||||
}
|
||||
else if (c == '?')
|
||||
if (c == '?') {
|
||||
c = 0177;
|
||||
else
|
||||
}
|
||||
else {
|
||||
if (c == MC_ESC(enc)) {
|
||||
v = fetch_escaped_value(&p, end, env);
|
||||
if (v < 0) return v;
|
||||
c = (OnigCodePoint )v;
|
||||
}
|
||||
c &= 0x9f;
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
|
@ -2512,11 +2529,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
|
|||
|
||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
|
||||
char buf[WARN_BUFSIZE];
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||
env->pattern, env->pattern_end,
|
||||
"character class has '%s' without escape", c);
|
||||
(*onig_warn)(buf);
|
||||
(UChar* )"character class has '%s' without escape", c);
|
||||
(*onig_warn)((char* )buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2526,11 +2543,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c)
|
|||
if (onig_warn == onig_null_warn) return ;
|
||||
|
||||
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
|
||||
char buf[WARN_BUFSIZE];
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
|
||||
(env)->pattern, (env)->pattern_end,
|
||||
"regular expression has '%s' without escape", c);
|
||||
(*onig_warn)(buf);
|
||||
(UChar* )"regular expression has '%s' without escape", c);
|
||||
(*onig_warn)((char* )buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2794,7 +2811,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
tok->type = TK_CC_CC_OPEN;
|
||||
}
|
||||
else {
|
||||
CC_ESC_WARN(env, "[");
|
||||
CC_ESC_WARN(env, (UChar* )"[");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2833,7 +2850,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
tok->backp = p;
|
||||
|
||||
PFETCH(c);
|
||||
if (c == MC_ESC(enc)) {
|
||||
if (IS_MC_ESC_CODE(c, enc, syn)) {
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
|
||||
|
||||
tok->backp = p;
|
||||
|
@ -3365,7 +3382,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
|
||||
case ']':
|
||||
if (*src > env->pattern) /* /].../ is allowed. */
|
||||
CCEND_ESC_WARN(env, "]");
|
||||
CCEND_ESC_WARN(env, (UChar* )"]");
|
||||
break;
|
||||
|
||||
case '#':
|
||||
|
@ -3400,7 +3417,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
|
||||
static int
|
||||
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
|
||||
OnigCodePoint sbr[], OnigCodePoint mbr[])
|
||||
const OnigCodePoint sbr[], const OnigCodePoint mbr[])
|
||||
{
|
||||
int i, r;
|
||||
OnigCodePoint j;
|
||||
|
@ -3464,7 +3481,7 @@ static int
|
|||
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
|
||||
{
|
||||
int c, r;
|
||||
OnigCodePoint *sbr, *mbr;
|
||||
const OnigCodePoint *sbr, *mbr;
|
||||
OnigEncoding enc = env->enc;
|
||||
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
|
||||
|
@ -3602,19 +3619,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
#define POSIX_BRACKET_NAME_MAX_LEN 6
|
||||
|
||||
static PosixBracketEntryType PBS[] = {
|
||||
{ "alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ "alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ "blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ "digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ "graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ "lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ "print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ "punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ "space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ "upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
|
||||
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
|
||||
{ (UChar* )NULL, -1, 0 }
|
||||
};
|
||||
|
||||
|
@ -3638,7 +3655,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
|
||||
p = (UChar* )onigenc_step(enc, p, end, pb->len);
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
|
||||
r = add_ctype_to_cc(cc, pb->ctype, not, env);
|
||||
|
@ -3673,19 +3690,19 @@ static int
|
|||
property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
|
||||
{
|
||||
static PosixBracketEntryType PBS[] = {
|
||||
{ "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ "Blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ "Digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ "Graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ "Lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ "Print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ "Punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ "Space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ "Upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ "ASCII", ONIGENC_CTYPE_ASCII, 5 },
|
||||
{ (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
|
||||
{ (UChar* )NULL, -1, 0 }
|
||||
};
|
||||
|
||||
|
@ -3935,7 +3952,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
*src, env->pattern_end, 1, env->enc))
|
||||
return ONIGERR_EMPTY_CHAR_CLASS;
|
||||
|
||||
CC_ESC_WARN(env, "]");
|
||||
CC_ESC_WARN(env, (UChar* )"]");
|
||||
r = tok->type = TK_CHAR; /* allow []...] */
|
||||
}
|
||||
|
||||
|
@ -4038,7 +4055,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
r = parse_posix_bracket(cc, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
if (r == 1) { /* is not POSIX bracket */
|
||||
CC_ESC_WARN(env, "[");
|
||||
CC_ESC_WARN(env, (UChar* )"[");
|
||||
p = tok->backp;
|
||||
v = (OnigCodePoint )tok->u.c;
|
||||
in_israw = 0;
|
||||
|
@ -4084,7 +4101,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
goto val_entry;
|
||||
}
|
||||
else if (r == TK_CC_AND) {
|
||||
CC_ESC_WARN(env, "-");
|
||||
CC_ESC_WARN(env, (UChar* )"-");
|
||||
goto range_end_val;
|
||||
}
|
||||
state = CCS_RANGE;
|
||||
|
@ -4099,12 +4116,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
fetched = 1;
|
||||
/* [--x] or [a&&-x] is warned. */
|
||||
if (r == TK_CC_RANGE || and_start != 0)
|
||||
CC_ESC_WARN(env, "-");
|
||||
CC_ESC_WARN(env, (UChar* )"-");
|
||||
|
||||
goto val_entry;
|
||||
}
|
||||
else if (state == CCS_RANGE) {
|
||||
CC_ESC_WARN(env, "-");
|
||||
CC_ESC_WARN(env, (UChar* )"-");
|
||||
goto sb_char; /* [!--x] is allowed */
|
||||
}
|
||||
else { /* CCS_COMPLETE */
|
||||
|
@ -4113,12 +4130,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
fetched = 1;
|
||||
if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
|
||||
else if (r == TK_CC_AND) {
|
||||
CC_ESC_WARN(env, "-");
|
||||
CC_ESC_WARN(env, (UChar* )"-");
|
||||
goto range_end_val;
|
||||
}
|
||||
|
||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
|
||||
CC_ESC_WARN(env, "-");
|
||||
CC_ESC_WARN(env, (UChar* )"-");
|
||||
goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
|
||||
}
|
||||
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
|
||||
|
@ -4495,7 +4512,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||
if (qn->by_number == 0 && qnt->by_number == 0 &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
||||
int nestq_num, targetq_num;
|
||||
char buf[WARN_BUFSIZE];
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
|
||||
nestq_num = popular_qualifier_num(qn);
|
||||
targetq_num = popular_qualifier_num(qnt);
|
||||
|
@ -4507,9 +4524,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||
case RQ_DEL:
|
||||
if (onig_verb_warn != onig_null_warn) {
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||
env->pattern, env->pattern_end,
|
||||
"redundant nested repeat operator");
|
||||
(*onig_verb_warn)(buf);
|
||||
env->pattern, env->pattern_end,
|
||||
(UChar* )"redundant nested repeat operator");
|
||||
(*onig_verb_warn)((char* )buf);
|
||||
}
|
||||
goto warn_exit;
|
||||
break;
|
||||
|
@ -4518,10 +4535,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
|||
if (onig_verb_warn != onig_null_warn) {
|
||||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||||
env->pattern, env->pattern_end,
|
||||
"nested repeat operator %s and %s was replaced with '%s'",
|
||||
(UChar* )"nested repeat operator %s and %s was replaced with '%s'",
|
||||
PopularQStr[targetq_num], PopularQStr[nestq_num],
|
||||
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
|
||||
(*onig_verb_warn)(buf);
|
||||
(*onig_verb_warn)((char* )buf);
|
||||
}
|
||||
goto warn_exit;
|
||||
break;
|
||||
|
@ -4553,8 +4570,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
|
|||
int r, i, j, k, clen, len, ncode, n;
|
||||
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
||||
Node **ptail, *snode = NULL_NODE;
|
||||
OnigCompAmbigCodes* ccs;
|
||||
OnigCompAmbigCodeItem* ci;
|
||||
const OnigCompAmbigCodes* ccs;
|
||||
const OnigCompAmbigCodeItem* ci;
|
||||
OnigAmbigType amb;
|
||||
|
||||
n = 0;
|
||||
|
@ -4662,7 +4679,7 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
|
|||
}
|
||||
|
||||
extern int
|
||||
onig_free_shared_cclass_table(void)
|
||||
onig_free_shared_cclass_table()
|
||||
{
|
||||
if (IS_NOT_NULL(OnigTypeCClassTable)) {
|
||||
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
|
||||
|
@ -4819,7 +4836,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
int ctype, not;
|
||||
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
OnigCodePoint *sbr, *mbr;
|
||||
const OnigCodePoint *sbr, *mbr;
|
||||
|
||||
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
|
||||
|
@ -4901,7 +4918,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
|
||||
if (IS_IGNORECASE(env->option)) {
|
||||
int i, n, in_cc;
|
||||
OnigPairAmbigCodes* ccs;
|
||||
const OnigPairAmbigCodes* ccs;
|
||||
BitSetRef bs = cc->bs;
|
||||
OnigAmbigType amb;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue