1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* oniguruma.h: merge Oniguruma 4.0.0 [ruby-dev:28290]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9885 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2006-02-04 12:31:19 +00:00
parent f0432871fd
commit 086e235f03
13 changed files with 477 additions and 373 deletions

View file

@ -58,7 +58,21 @@ OnigSyntaxType OnigSyntaxRuby = {
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
extern void onig_null_warn(const char* s, ...) { }
extern void onig_null_warn(const char* s) { }
#ifdef RUBY_PLATFORM
extern void
onig_rb_warn(const char* s)
{
rb_warn(s);
}
extern void
onig_rb_warning(const char* s)
{
rb_warning(s);
}
#endif
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
@ -1050,12 +1064,12 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
extern int
onig_free_node_list(void)
onig_free_node_list()
{
FreeNode* n;
THREAD_ATOMIC_START;
while (FreeNodeList) {
while (IS_NOT_NULL(FreeNodeList)) {
n = FreeNodeList;
FreeNodeList = FreeNodeList->next;
xfree(n);
@ -1066,18 +1080,19 @@ onig_free_node_list(void)
#endif
static Node*
node_new(void)
node_new()
{
Node* node;
#ifdef USE_RECYCLE_NODE
THREAD_ATOMIC_START;
if (IS_NOT_NULL(FreeNodeList)) {
THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
THREAD_ATOMIC_END;
return node;
}
THREAD_ATOMIC_END;
#endif
node = (Node* )xmalloc(sizeof(Node));
@ -1094,7 +1109,7 @@ initialize_cclass(CClassNode* cc)
}
static Node*
node_new_cclass(void)
node_new_cclass()
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@ -1106,7 +1121,7 @@ node_new_cclass(void)
static Node*
node_new_cclass_by_codepoint_range(int not,
OnigCodePoint sbr[], OnigCodePoint mbr[])
const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
CClassNode* cc;
int n, i, j;
@ -1163,7 +1178,7 @@ node_new_ctype(int type)
}
static Node*
node_new_anychar(void)
node_new_anychar()
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@ -1434,7 +1449,7 @@ node_new_str_raw(UChar* s, UChar* end)
}
static Node*
node_new_empty(void)
node_new_empty()
{
return node_new_str(NULL, NULL);
}
@ -2358,15 +2373,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
control:
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
if (c == MC_ESC(enc)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )(v & 0x9f);
}
else if (c == '?')
if (c == '?') {
c = 0177;
else
}
else {
if (c == MC_ESC(enc)) {
v = fetch_escaped_value(&p, end, env);
if (v < 0) return v;
c = (OnigCodePoint )v;
}
c &= 0x9f;
}
break;
}
/* fall through */
@ -2512,11 +2529,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
char buf[WARN_BUFSIZE];
UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
"character class has '%s' without escape", c);
(*onig_warn)(buf);
(UChar* )"character class has '%s' without escape", c);
(*onig_warn)((char* )buf);
}
}
@ -2526,11 +2543,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c)
if (onig_warn == onig_null_warn) return ;
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
char buf[WARN_BUFSIZE];
UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
(env)->pattern, (env)->pattern_end,
"regular expression has '%s' without escape", c);
(*onig_warn)(buf);
(UChar* )"regular expression has '%s' without escape", c);
(*onig_warn)((char* )buf);
}
}
@ -2794,7 +2811,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CC_CC_OPEN;
}
else {
CC_ESC_WARN(env, "[");
CC_ESC_WARN(env, (UChar* )"[");
}
}
}
@ -2833,7 +2850,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
if (c == MC_ESC(enc)) {
if (IS_MC_ESC_CODE(c, enc, syn)) {
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
@ -3365,7 +3382,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case ']':
if (*src > env->pattern) /* /].../ is allowed. */
CCEND_ESC_WARN(env, "]");
CCEND_ESC_WARN(env, (UChar* )"]");
break;
case '#':
@ -3400,7 +3417,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
OnigCodePoint sbr[], OnigCodePoint mbr[])
const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
@ -3464,7 +3481,7 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
OnigCodePoint *sbr, *mbr;
const OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
@ -3602,19 +3619,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
#define POSIX_BRACKET_NAME_MAX_LEN 6
static PosixBracketEntryType PBS[] = {
{ "alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ "alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ "blank", ONIGENC_CTYPE_BLANK, 5 },
{ "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ "digit", ONIGENC_CTYPE_DIGIT, 5 },
{ "graph", ONIGENC_CTYPE_GRAPH, 5 },
{ "lower", ONIGENC_CTYPE_LOWER, 5 },
{ "print", ONIGENC_CTYPE_PRINT, 5 },
{ "punct", ONIGENC_CTYPE_PUNCT, 5 },
{ "space", ONIGENC_CTYPE_SPACE, 5 },
{ "upper", ONIGENC_CTYPE_UPPER, 5 },
{ "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
{ (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
{ (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
{ (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
{ (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
{ (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
{ (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@ -3638,7 +3655,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not, env);
@ -3673,19 +3690,19 @@ static int
property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
{
static PosixBracketEntryType PBS[] = {
{ "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ "Blank", ONIGENC_CTYPE_BLANK, 5 },
{ "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ "Digit", ONIGENC_CTYPE_DIGIT, 5 },
{ "Graph", ONIGENC_CTYPE_GRAPH, 5 },
{ "Lower", ONIGENC_CTYPE_LOWER, 5 },
{ "Print", ONIGENC_CTYPE_PRINT, 5 },
{ "Punct", ONIGENC_CTYPE_PUNCT, 5 },
{ "Space", ONIGENC_CTYPE_SPACE, 5 },
{ "Upper", ONIGENC_CTYPE_UPPER, 5 },
{ "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ "ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
{ (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
{ (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
{ (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
{ (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
{ (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
{ (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
{ (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@ -3935,7 +3952,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
*src, env->pattern_end, 1, env->enc))
return ONIGERR_EMPTY_CHAR_CLASS;
CC_ESC_WARN(env, "]");
CC_ESC_WARN(env, (UChar* )"]");
r = tok->type = TK_CHAR; /* allow []...] */
}
@ -4038,7 +4055,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_posix_bracket(cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, "[");
CC_ESC_WARN(env, (UChar* )"[");
p = tok->backp;
v = (OnigCodePoint )tok->u.c;
in_israw = 0;
@ -4084,7 +4101,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
goto val_entry;
}
else if (r == TK_CC_AND) {
CC_ESC_WARN(env, "-");
CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
state = CCS_RANGE;
@ -4099,12 +4116,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
/* [--x] or [a&&-x] is warned. */
if (r == TK_CC_RANGE || and_start != 0)
CC_ESC_WARN(env, "-");
CC_ESC_WARN(env, (UChar* )"-");
goto val_entry;
}
else if (state == CCS_RANGE) {
CC_ESC_WARN(env, "-");
CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [!--x] is allowed */
}
else { /* CCS_COMPLETE */
@ -4113,12 +4130,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
else if (r == TK_CC_AND) {
CC_ESC_WARN(env, "-");
CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
CC_ESC_WARN(env, "-");
CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
}
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
@ -4495,7 +4512,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (qn->by_number == 0 && qnt->by_number == 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
int nestq_num, targetq_num;
char buf[WARN_BUFSIZE];
UChar buf[WARN_BUFSIZE];
nestq_num = popular_qualifier_num(qn);
targetq_num = popular_qualifier_num(qnt);
@ -4507,9 +4524,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
case RQ_DEL:
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
"redundant nested repeat operator");
(*onig_verb_warn)(buf);
env->pattern, env->pattern_end,
(UChar* )"redundant nested repeat operator");
(*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@ -4518,10 +4535,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
"nested repeat operator %s and %s was replaced with '%s'",
(UChar* )"nested repeat operator %s and %s was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
(*onig_verb_warn)(buf);
(*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@ -4553,8 +4570,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
int r, i, j, k, clen, len, ncode, n;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
Node **ptail, *snode = NULL_NODE;
OnigCompAmbigCodes* ccs;
OnigCompAmbigCodeItem* ci;
const OnigCompAmbigCodes* ccs;
const OnigCompAmbigCodeItem* ci;
OnigAmbigType amb;
n = 0;
@ -4662,7 +4679,7 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
}
extern int
onig_free_shared_cclass_table(void)
onig_free_shared_cclass_table()
{
if (IS_NOT_NULL(OnigTypeCClassTable)) {
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
@ -4819,7 +4836,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
int ctype, not;
#ifdef USE_SHARED_CCLASS_TABLE
OnigCodePoint *sbr, *mbr;
const OnigCodePoint *sbr, *mbr;
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
@ -4901,7 +4918,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (IS_IGNORECASE(env->option)) {
int i, n, in_cc;
OnigPairAmbigCodes* ccs;
const OnigPairAmbigCodes* ccs;
BitSetRef bs = cc->bs;
OnigAmbigType amb;