mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported Oni Guruma 3.5.4.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
7d765e5203
commit
a19d6b33d7
12 changed files with 4237 additions and 446 deletions
|
@ -1,3 +1,10 @@
|
|||
Sat Jan 29 00:10:33 2005 Kazuo Saito <ksaito@uranus.dti.ne.jp>
|
||||
|
||||
* ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c,
|
||||
regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h,
|
||||
regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported
|
||||
Oni Guruma 3.5.4.
|
||||
|
||||
Fri Jan 28 17:16:55 2005 Tanaka Akira <akr@m17n.org>
|
||||
|
||||
* lib/resolv.rb (Resolv::DNS::Config.parse_resolv_conf):
|
||||
|
|
2
hash.c
2
hash.c
|
@ -102,6 +102,8 @@ rb_any_hash(a)
|
|||
static struct st_hash_type objhash = {
|
||||
rb_any_cmp,
|
||||
rb_any_hash,
|
||||
st_nothing_key_free,
|
||||
st_nothing_key_clone
|
||||
};
|
||||
|
||||
struct foreach_safe_arg {
|
||||
|
|
8
imp.log
8
imp.log
|
@ -1,8 +0,0 @@
|
|||
Vim: Warning: Output is not to a terminal
|
||||
7[?47h[?1h=[1;24r[m[H[2J[24;1H"/tmp/cvss7mRju" 4L, 229C[1;1HCVS: ----------------------------------------------------------------------
|
||||
CVS: Enter Log. Lines beginning with `CVS:' are removed automatically
|
||||
CVS:
|
||||
CVS: ----------------------------------------------------------------------
|
||||
[1m[34m~ [6;1H~ [7;1H~ [8;1H~ [9;1H~ [10;1H~ [11;1H~ [12;1H~ [13;1H~ [14;1H~ [15;1H~ [16;1H~ [17;1H~ [18;1H~ [19;1H~ [20;1H~ [21;1H~ [22;1H~ [23;1H~ [1;1H[m[24;1H[K[24;1H:q![24;3H[K[24;3H[24;2H[K[24;2H[24;1H[K[1;1H[24;1H[1m-- INSERT --[1;23r[m[1;1H[L[1;24r[1;1H[24;1H[K[1;1Hi[24;1H[1m-- INSERT --[1;2H[mmported Oni Guruma 3.5.4.[24;1H[K[1;26H[24;1H"/private/tmp/cvss7mRju" 5L, 256C written
|
||||
|
||||
[?1l>[2J[?47l8
|
37
oniguruma.h
37
oniguruma.h
|
@ -4,7 +4,7 @@
|
|||
oniguruma.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -31,8 +31,17 @@
|
|||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 3
|
||||
#define ONIGURUMA_VERSION_MINOR 4
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
#define ONIGURUMA_VERSION_MINOR 5
|
||||
#define ONIGURUMA_VERSION_TEENY 4
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
# define HAVE_PROTOTYPES 1
|
||||
# endif
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
|
@ -72,12 +81,6 @@ typedef unsigned int OnigDistance;
|
|||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
OnigCodePoint to;
|
||||
} OnigCodePointRange;
|
||||
|
||||
|
||||
/* ambiguous match flag */
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
|
||||
|
@ -103,6 +106,11 @@ typedef unsigned int OnigAmbigType;
|
|||
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
|
||||
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
|
||||
|
||||
/* code range */
|
||||
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
|
||||
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
|
||||
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
|
||||
|
||||
typedef struct {
|
||||
int len;
|
||||
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
|
||||
|
@ -152,7 +160,7 @@ typedef struct {
|
|||
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
|
||||
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
|
||||
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
|
||||
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
|
||||
int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
|
||||
UChar* (*left_adjust_char_head)(UChar* start, UChar* p);
|
||||
int (*is_allowed_reverse_match)(UChar* p, UChar* end);
|
||||
} OnigEncodingType;
|
||||
|
@ -245,7 +253,6 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
|||
#define ONIGENC_CTYPE_ASCII (1<<13)
|
||||
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
|
||||
|
||||
|
||||
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
|
@ -275,7 +282,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
|||
onigenc_get_left_adjust_char_head(enc, start, s)
|
||||
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
|
||||
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
|
||||
ONIG_NO_SUPPORT_CONFIG
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
|
||||
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
|
||||
|
@ -390,8 +397,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
|
|||
#define ONIGENC_IS_CODE_WORD(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
|
||||
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
|
||||
(enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
|
||||
(enc)->get_ctype_code_range(ctype,sbr,mbr)
|
||||
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
|
||||
|
@ -600,7 +607,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
|
||||
#define ONIGERR_EMPTY_CHAR_CLASS -102
|
||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
|
||||
#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
|
||||
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
|
||||
#define ONIGERR_END_PATTERN_AT_META -105
|
||||
#define ONIGERR_END_PATTERN_AT_CONTROL -106
|
||||
#define ONIGERR_META_CODE_SYNTAX -108
|
||||
|
|
249
regcomp.c
249
regcomp.c
|
@ -2,7 +2,7 @@
|
|||
regcomp.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -33,6 +33,21 @@
|
|||
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
k_strdup(UChar* s, UChar* end)
|
||||
{
|
||||
int len = end - s;
|
||||
|
||||
if (len > 0) {
|
||||
UChar* r = (UChar* )xmalloc(len + 1);
|
||||
CHECK_NULL_RETURN(r);
|
||||
xmemcpy(r, s, len);
|
||||
r[len] = (UChar )0;
|
||||
return r;
|
||||
}
|
||||
else return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
Caution: node should not be a string node.
|
||||
(s and end member address break)
|
||||
|
@ -189,16 +204,14 @@ add_mem_num(regex_t* reg, int num)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
add_repeat_num(regex_t* reg, int num)
|
||||
add_pointer(regex_t* reg, void* addr)
|
||||
{
|
||||
RepeatNumType n = (RepeatNumType )num;
|
||||
PointerType ptr = (PointerType )addr;
|
||||
|
||||
BBUF_ADD(reg, &n, SIZE_REPEATNUM);
|
||||
BBUF_ADD(reg, &ptr, SIZE_POINTER);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
add_option(regex_t* reg, OnigOptionType option)
|
||||
|
@ -518,6 +531,11 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
|||
{
|
||||
int len;
|
||||
|
||||
if (IS_CCLASS_SHARE(cc)) {
|
||||
len = SIZE_OPCODE + SIZE_POINTER;
|
||||
return len;
|
||||
}
|
||||
|
||||
if (IS_NULL(cc->mbuf)) {
|
||||
len = SIZE_OPCODE + SIZE_BITSET;
|
||||
}
|
||||
|
@ -543,22 +561,34 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
|
|||
{
|
||||
int r;
|
||||
|
||||
if (IS_CCLASS_SHARE(cc)) {
|
||||
add_opcode(reg, OP_CCLASS_NODE);
|
||||
r = add_pointer(reg, cc);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (IS_NULL(cc->mbuf)) {
|
||||
if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
|
||||
else add_opcode(reg, OP_CCLASS);
|
||||
if (IS_CCLASS_NOT(cc))
|
||||
add_opcode(reg, OP_CCLASS_NOT);
|
||||
else
|
||||
add_opcode(reg, OP_CCLASS);
|
||||
|
||||
r = add_bitset(reg, cc->bs);
|
||||
}
|
||||
else {
|
||||
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
||||
if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
|
||||
else add_opcode(reg, OP_CCLASS_MB);
|
||||
if (IS_CCLASS_NOT(cc))
|
||||
add_opcode(reg, OP_CCLASS_MB_NOT);
|
||||
else
|
||||
add_opcode(reg, OP_CCLASS_MB);
|
||||
|
||||
r = add_multi_byte_cclass(cc->mbuf, reg);
|
||||
}
|
||||
else {
|
||||
if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
|
||||
else add_opcode(reg, OP_CCLASS_MIX);
|
||||
if (IS_CCLASS_NOT(cc))
|
||||
add_opcode(reg, OP_CCLASS_MIX_NOT);
|
||||
else
|
||||
add_opcode(reg, OP_CCLASS_MIX);
|
||||
|
||||
r = add_bitset(reg, cc->bs);
|
||||
if (r) return r;
|
||||
|
@ -631,7 +661,6 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
|
|||
else {
|
||||
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
||||
}
|
||||
|
||||
if (r) return r;
|
||||
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
||||
return r;
|
||||
|
@ -1408,12 +1437,9 @@ compile_tree(Node* node, regex_t* reg)
|
|||
}
|
||||
|
||||
#ifdef USE_NAMED_GROUP
|
||||
typedef struct {
|
||||
int new_val;
|
||||
} NumMap;
|
||||
|
||||
static int
|
||||
noname_disable_map(Node** plink, NumMap* map, int* counter)
|
||||
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
||||
{
|
||||
int r = 0;
|
||||
Node* node = *plink;
|
||||
|
@ -1467,7 +1493,7 @@ noname_disable_map(Node** plink, NumMap* map, int* counter)
|
|||
}
|
||||
|
||||
static int
|
||||
renumber_node_backref(Node* node, NumMap* map)
|
||||
renumber_node_backref(Node* node, GroupNumRemap* map)
|
||||
{
|
||||
int i, pos, n, old_num;
|
||||
int *backs;
|
||||
|
@ -1495,7 +1521,7 @@ renumber_node_backref(Node* node, NumMap* map)
|
|||
}
|
||||
|
||||
static int
|
||||
renumber_by_map(Node* node, NumMap* map)
|
||||
renumber_by_map(Node* node, GroupNumRemap* map)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
|
@ -1560,9 +1586,9 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
|||
{
|
||||
int r, i, pos, counter;
|
||||
BitStatusType loc;
|
||||
NumMap* map;
|
||||
GroupNumRemap* map;
|
||||
|
||||
map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
|
||||
map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
||||
CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
|
||||
for (i = 1; i <= env->num_mem; i++) {
|
||||
map[i].new_val = 0;
|
||||
|
@ -1591,7 +1617,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
|||
|
||||
env->num_mem = env->num_named;
|
||||
reg->num_mem = env->num_named;
|
||||
return 0;
|
||||
|
||||
return onig_renumber_name_table(reg, map);
|
||||
}
|
||||
#endif /* USE_NAMED_GROUP */
|
||||
|
||||
|
@ -2092,10 +2119,10 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
|||
found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
if (cc->not == 0)
|
||||
return found;
|
||||
else
|
||||
if (IS_CCLASS_NOT(cc))
|
||||
return !found;
|
||||
else
|
||||
return found;
|
||||
}
|
||||
|
||||
/* x is not included y ==> 1 : 0 */
|
||||
|
@ -2158,7 +2185,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
case N_CTYPE:
|
||||
switch (NCTYPE(y).type) {
|
||||
case CTYPE_WORD:
|
||||
if (IS_NULL(xc->mbuf) && xc->not == 0) {
|
||||
if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
if (BITSET_AT(xc->bs, i)) {
|
||||
if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
|
||||
|
@ -2171,7 +2198,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
case CTYPE_NOT_WORD:
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
|
||||
if (xc->not == 0) {
|
||||
if (!IS_CCLASS_NOT(xc)) {
|
||||
if (BITSET_AT(xc->bs, i))
|
||||
return 0;
|
||||
}
|
||||
|
@ -2196,14 +2223,16 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
v = BITSET_AT(xc->bs, i);
|
||||
if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
|
||||
if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
|
||||
(v == 0 && IS_CCLASS_NOT(xc))) {
|
||||
v = BITSET_AT(yc->bs, i);
|
||||
if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
|
||||
if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
|
||||
(v == 0 && IS_CCLASS_NOT(yc)))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
|
||||
(IS_NULL(yc->mbuf) && yc->not == 0))
|
||||
if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
|
||||
(IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -3333,22 +3362,27 @@ typedef struct {
|
|||
OptMapInfo map; /* boundary */
|
||||
} NodeOptInfo;
|
||||
|
||||
static short int ByteValTable[] = {
|
||||
14, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
||||
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
||||
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
||||
};
|
||||
|
||||
static int
|
||||
map_position_value(int i)
|
||||
map_position_value(OnigEncoding enc, int i)
|
||||
{
|
||||
if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0]))
|
||||
return (int )ByteValTable[i];
|
||||
static short int ByteValTable[] = {
|
||||
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
||||
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
||||
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
||||
};
|
||||
|
||||
if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
|
||||
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
||||
return 20;
|
||||
else
|
||||
return (int )ByteValTable[i];
|
||||
}
|
||||
else
|
||||
return 4; /* Take it easy. */
|
||||
}
|
||||
|
@ -3634,7 +3668,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
|
|||
}
|
||||
|
||||
static void
|
||||
select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
|
||||
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
||||
{
|
||||
int v1, v2;
|
||||
|
||||
|
@ -3643,8 +3677,8 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
|
|||
|
||||
if (v1 <= 2 && v2 <= 2) {
|
||||
/* ByteValTable[x] is big value --> low price */
|
||||
v2 = map_position_value(now->s[0]);
|
||||
v1 = map_position_value(alt->s[0]);
|
||||
v2 = map_position_value(enc, now->s[0]);
|
||||
v1 = map_position_value(enc, alt->s[0]);
|
||||
|
||||
if (now->len > 1) v1 += 5;
|
||||
if (alt->len > 1) v2 += 5;
|
||||
|
@ -3660,13 +3694,29 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
|
|||
static void
|
||||
clear_opt_map_info(OptMapInfo* map)
|
||||
{
|
||||
int i;
|
||||
static OptMapInfo clean_info = {
|
||||
{0, 0}, {0, 0}, 0,
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
}
|
||||
};
|
||||
|
||||
clear_mml(&map->mmd);
|
||||
clear_opt_anc_info(&map->anc);
|
||||
map->value = 0;
|
||||
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
||||
map->map[i] = 0;
|
||||
xmemcpy(map, &clean_info, sizeof(OptMapInfo));
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3676,11 +3726,11 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
|
|||
}
|
||||
|
||||
static void
|
||||
add_char_opt_map_info(OptMapInfo* map, UChar c)
|
||||
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
|
||||
{
|
||||
if (map->map[c] == 0) {
|
||||
map->map[c] = 1;
|
||||
map->value += map_position_value(c);
|
||||
map->value += map_position_value(enc, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3695,7 +3745,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
|||
OnigPairAmbigCodes* pccs;
|
||||
OnigAmbigType amb;
|
||||
|
||||
add_char_opt_map_info(map, p[0]);
|
||||
add_char_opt_map_info(map, p[0], enc);
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
|
||||
|
@ -3706,7 +3756,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
|||
if (pccs[i].from == code) {
|
||||
len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
|
||||
if (len < 0) return len;
|
||||
add_char_opt_map_info(map, buf[0]);
|
||||
add_char_opt_map_info(map, buf[0], enc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3718,7 +3768,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
|||
ccode = ccs[i].items[j].code[0];
|
||||
len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
|
||||
if (len < 0) return len;
|
||||
add_char_opt_map_info(map, buf[0]);
|
||||
add_char_opt_map_info(map, buf[0], enc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -3761,7 +3811,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
|
|||
}
|
||||
|
||||
static void
|
||||
alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
|
||||
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
|
||||
{
|
||||
int i, val;
|
||||
|
||||
|
@ -3780,7 +3830,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
|
|||
to->map[i] = 1;
|
||||
|
||||
if (to->map[i])
|
||||
val += map_position_value(i);
|
||||
val += map_position_value(enc, i);
|
||||
}
|
||||
to->value = val;
|
||||
|
||||
|
@ -3813,7 +3863,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
|
|||
}
|
||||
|
||||
static void
|
||||
concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
|
||||
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
||||
{
|
||||
int exb_reach, exm_reach;
|
||||
OptAncInfo tanc;
|
||||
|
@ -3848,8 +3898,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
|
|||
clear_opt_exact_info(&add->exb);
|
||||
}
|
||||
}
|
||||
select_opt_exact_info(&to->exm, &add->exb);
|
||||
select_opt_exact_info(&to->exm, &add->exm);
|
||||
select_opt_exact_info(enc, &to->exm, &add->exb);
|
||||
select_opt_exact_info(enc, &to->exm, &add->exm);
|
||||
|
||||
if (to->expr.len > 0) {
|
||||
if (add->len.max > 0) {
|
||||
|
@ -3857,9 +3907,9 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
|
|||
to->expr.len = add->len.max;
|
||||
|
||||
if (to->expr.mmd.max == 0)
|
||||
select_opt_exact_info(&to->exb, &to->expr);
|
||||
select_opt_exact_info(enc, &to->exb, &to->expr);
|
||||
else
|
||||
select_opt_exact_info(&to->exm, &to->expr);
|
||||
select_opt_exact_info(enc, &to->exm, &to->expr);
|
||||
}
|
||||
}
|
||||
else if (add->expr.len > 0) {
|
||||
|
@ -3878,7 +3928,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
|
|||
alt_merge_opt_exact_info(&to->exb, &add->exb, env);
|
||||
alt_merge_opt_exact_info(&to->exm, &add->exm, env);
|
||||
alt_merge_opt_exact_info(&to->expr, &add->expr, env);
|
||||
alt_merge_opt_map_info (&to->map, &add->map);
|
||||
alt_merge_opt_map_info(env->enc, &to->map, &add->map);
|
||||
|
||||
alt_merge_mml(&to->len, &add->len);
|
||||
}
|
||||
|
@ -3908,7 +3958,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
|
||||
if (r == 0) {
|
||||
add_mml(&nenv.mmd, &nopt.len);
|
||||
concat_left_node_opt_info(opt, &nopt);
|
||||
concat_left_node_opt_info(env->enc, opt, &nopt);
|
||||
}
|
||||
} while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
|
||||
}
|
||||
|
@ -3939,7 +3989,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
||||
NSTRING_IS_RAW(node), env->enc);
|
||||
if (slen > 0) {
|
||||
add_char_opt_map_info(&opt->map, *(sn->s));
|
||||
add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
|
||||
}
|
||||
set_mml(&opt->len, slen, slen);
|
||||
}
|
||||
|
@ -3978,7 +4028,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
|
||||
/* no need to check ignore case. (setted in setup_tree()) */
|
||||
|
||||
if (IS_NOT_NULL(cc->mbuf) || cc->not != 0) {
|
||||
if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
|
||||
OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
||||
OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
||||
|
||||
|
@ -3987,8 +4037,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
else {
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
z = BITSET_AT(cc->bs, i);
|
||||
if ((z && !cc->not) || (!z && cc->not)) {
|
||||
add_char_opt_map_info(&opt->map, (UChar )i);
|
||||
if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
|
||||
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
||||
}
|
||||
}
|
||||
set_mml(&opt->len, 1, 1);
|
||||
|
@ -4009,7 +4059,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
case CTYPE_NOT_WORD:
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
||||
add_char_opt_map_info(&opt->map, (UChar )i);
|
||||
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -4017,7 +4067,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
case CTYPE_WORD:
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
||||
add_char_opt_map_info(&opt->map, (UChar )i);
|
||||
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -4245,7 +4295,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
|
|||
else {
|
||||
int allow_reverse;
|
||||
|
||||
reg->exact = onig_strdup(e->s, e->s + e->len);
|
||||
reg->exact = k_strdup(e->s, e->s + e->len);
|
||||
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
|
||||
reg->exact_end = reg->exact + e->len;
|
||||
|
||||
|
@ -4334,7 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
|||
}
|
||||
|
||||
if (opt.exb.len > 0 || opt.exm.len > 0) {
|
||||
select_opt_exact_info(&opt.exb, &opt.exm);
|
||||
select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
|
||||
if (opt.map.value > 0 &&
|
||||
comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
|
||||
goto set_map;
|
||||
|
@ -4506,7 +4556,7 @@ onig_free_body(regex_t* reg)
|
|||
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
||||
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
||||
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
||||
if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
||||
if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
||||
|
||||
#ifdef USE_NAMED_GROUP
|
||||
onig_names_free(reg);
|
||||
|
@ -4579,11 +4629,12 @@ onig_clone(regex_t** to, regex_t* from)
|
|||
int r, size;
|
||||
regex_t* reg;
|
||||
|
||||
if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
|
||||
from->state++; /* increment as search counter */
|
||||
if (IS_NOT_NULL(from->chain)) {
|
||||
#ifdef USE_MULTI_THREAD_SYSTEM
|
||||
if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
|
||||
ONIG_STATE_INC(from);
|
||||
if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
onig_chain_reduce(from);
|
||||
from->state++;
|
||||
ONIG_STATE_INC(from);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -4593,19 +4644,20 @@ onig_clone(regex_t** to, regex_t* from)
|
|||
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
|
||||
THREAD_PASS;
|
||||
}
|
||||
from->state++; /* increment as search counter */
|
||||
ONIG_STATE_INC(from);
|
||||
}
|
||||
#endif /* USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
|
||||
from->enc, ONIG_SYNTAX_DEFAULT);
|
||||
if (r != 0) {
|
||||
from->state--;
|
||||
ONIG_STATE_DEC(from);
|
||||
return r;
|
||||
}
|
||||
|
||||
xmemcpy(reg, from, sizeof(onig_t));
|
||||
reg->state = ONIG_STATE_NORMAL;
|
||||
reg->chain = (regex_t* )NULL;
|
||||
reg->state = ONIG_STATE_NORMAL;
|
||||
|
||||
if (from->p) {
|
||||
reg->p = (UChar* )xmalloc(reg->alloc);
|
||||
|
@ -4638,12 +4690,12 @@ onig_clone(regex_t** to, regex_t* from)
|
|||
reg->name_table = names_clone(from); /* names_clone is not implemented */
|
||||
#endif
|
||||
|
||||
from->state--;
|
||||
ONIG_STATE_DEC(from);
|
||||
*to = reg;
|
||||
return 0;
|
||||
|
||||
mem_error:
|
||||
from->state--;
|
||||
ONIG_STATE_DEC(from);
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
#endif
|
||||
|
@ -4839,6 +4891,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
|
|||
|
||||
*reg = (regex_t* )xmalloc(sizeof(regex_t));
|
||||
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
||||
(*reg)->state = ONIG_STATE_MODIFY;
|
||||
|
||||
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
||||
option |= syntax->options;
|
||||
|
@ -4847,7 +4900,6 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
|
|||
else
|
||||
option |= syntax->options;
|
||||
|
||||
(*reg)->state = ONIG_STATE_NORMAL;
|
||||
(*reg)->enc = enc;
|
||||
(*reg)->options = option;
|
||||
(*reg)->syntax = syntax;
|
||||
|
@ -4910,9 +4962,14 @@ onig_init()
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onig_end()
|
||||
{
|
||||
extern int onig_free_shared_cclass_table();
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
onig_print_statistics(stderr);
|
||||
#endif
|
||||
|
@ -4921,10 +4978,17 @@ onig_end()
|
|||
onig_free_node_list();
|
||||
#endif
|
||||
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
onig_free_shared_cclass_table();
|
||||
#endif
|
||||
|
||||
onig_inited = 0;
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
OnigOpInfoType OnigOpInfo[] = {
|
||||
|
@ -4950,6 +5014,7 @@ OnigOpInfoType OnigOpInfo[] = {
|
|||
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
|
||||
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
|
||||
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
|
||||
{ OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
|
||||
{ OP_ANYCHAR, "anychar", ARG_NON },
|
||||
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
|
||||
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
|
||||
|
@ -5203,6 +5268,16 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
|||
fprintf(f, ":%d:%d:%d", n, (int )code, len);
|
||||
break;
|
||||
|
||||
case OP_CCLASS_NODE:
|
||||
{
|
||||
CClassNode *cc;
|
||||
|
||||
GET_POINTER_INC(cc, bp);
|
||||
n = bitset_on_num(cc->bs);
|
||||
fprintf(f, ":%u:%d", (unsigned int )cc, n);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_BACKREFN_IC:
|
||||
mem = *((MemNumType* )bp);
|
||||
bp += SIZE_MEMNUM;
|
||||
|
@ -5330,7 +5405,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
|||
|
||||
case N_CCLASS:
|
||||
fprintf(f, "<cclass:%x>", (int )node);
|
||||
if (NCCLASS(node).not) fputs(" not", f);
|
||||
if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
|
||||
if (NCCLASS(node).mbuf) {
|
||||
BBuf* bbuf = NCCLASS(node).mbuf;
|
||||
for (i = 0; i < bbuf->used; i++) {
|
||||
|
|
184
regexec.c
184
regexec.c
|
@ -2,7 +2,7 @@
|
|||
regexec.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -274,7 +274,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
|
|||
|
||||
/** stack **/
|
||||
#define INVALID_STACK_INDEX -1
|
||||
typedef int StackIndex;
|
||||
typedef long StackIndex;
|
||||
|
||||
typedef struct _StackType {
|
||||
unsigned int type;
|
||||
|
@ -986,7 +986,7 @@ trap_ensure(VALUE arg)
|
|||
TrapEnsureArg* ta = (TrapEnsureArg* )arg;
|
||||
|
||||
if (ta->state == 0) { /* trap_exec() is not normal return */
|
||||
ta->reg->state--;
|
||||
ONIG_STATE_DEC(ta->reg);
|
||||
if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
|
||||
xfree(ta->stk_base);
|
||||
|
||||
|
@ -1147,6 +1147,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code)
|
|||
return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
|
||||
{
|
||||
unsigned int in_cc;
|
||||
CClassNode* cc = (CClassNode* )node;
|
||||
|
||||
if (enclen == 1) {
|
||||
in_cc = BITSET_AT(cc->bs, code);
|
||||
}
|
||||
else {
|
||||
UChar* p = ((BBuf* )(cc->mbuf))->p;
|
||||
in_cc = onig_is_in_code_range(p, code);
|
||||
}
|
||||
|
||||
if (IS_CCLASS_NOT(cc)) {
|
||||
return (in_cc ? 0 : 1);
|
||||
}
|
||||
else {
|
||||
return (in_cc ? 1 : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* matching region of POSIX API */
|
||||
typedef int regoff_t;
|
||||
|
@ -1340,14 +1361,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
|
|||
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
|
||||
{
|
||||
int len;
|
||||
UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
|
||||
DATA_ENSURE(1);
|
||||
ss = s;
|
||||
sp = p;
|
||||
|
||||
exact1_ic_retry:
|
||||
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
while (len-- > 0) {
|
||||
if (*p != *q) goto fail;
|
||||
if (*p != *q) {
|
||||
#if 1
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
s = ss;
|
||||
p = sp;
|
||||
goto exact1_ic_retry;
|
||||
}
|
||||
else
|
||||
goto fail;
|
||||
#else
|
||||
goto fail;
|
||||
#endif
|
||||
}
|
||||
p++; q++;
|
||||
}
|
||||
}
|
||||
|
@ -1424,7 +1462,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
|
|||
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
|
||||
{
|
||||
int len;
|
||||
UChar *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
|
||||
GET_LENGTH_INC(tlen, p);
|
||||
endp = p + tlen;
|
||||
|
@ -1432,11 +1470,28 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
|
|||
while (p < endp) {
|
||||
sprev = s;
|
||||
DATA_ENSURE(1);
|
||||
ss = s;
|
||||
sp = p;
|
||||
|
||||
exactn_ic_retry:
|
||||
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
while (len-- > 0) {
|
||||
if (*p != *q) goto fail;
|
||||
if (*p != *q) {
|
||||
#if 1
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
s = ss;
|
||||
p = sp;
|
||||
goto exactn_ic_retry;
|
||||
}
|
||||
else
|
||||
goto fail;
|
||||
#else
|
||||
goto fail;
|
||||
#endif
|
||||
}
|
||||
p++; q++;
|
||||
}
|
||||
}
|
||||
|
@ -1655,6 +1710,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
|
|||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
|
||||
{
|
||||
OnigCodePoint code;
|
||||
void *node;
|
||||
int mb_len;
|
||||
UChar *ss;
|
||||
|
||||
DATA_ENSURE(1);
|
||||
GET_POINTER_INC(node, p);
|
||||
mb_len = enc_len(encode, s);
|
||||
ss = s;
|
||||
s += mb_len;
|
||||
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
|
||||
if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
|
||||
}
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
|
||||
DATA_ENSURE(1);
|
||||
n = enc_len(encode, s);
|
||||
|
@ -2519,13 +2592,26 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
|
|||
UChar* t, UChar* tend, UChar* p, UChar* end)
|
||||
{
|
||||
int lowlen;
|
||||
UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
|
||||
tsave = t;
|
||||
psave = p;
|
||||
|
||||
retry:
|
||||
while (t < tend) {
|
||||
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
|
||||
q = lowbuf;
|
||||
while (lowlen > 0) {
|
||||
if (*t++ != *q++) return 0;
|
||||
if (*t++ != *q++) {
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
t = tsave;
|
||||
p = psave;
|
||||
goto retry;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
lowlen--;
|
||||
}
|
||||
}
|
||||
|
@ -2538,9 +2624,7 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
|
|||
UChar* target, UChar* target_end,
|
||||
UChar* text, UChar* text_end, UChar* text_range)
|
||||
{
|
||||
int lowlen;
|
||||
UChar *t, *p, *s, *end, *z;
|
||||
UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
UChar *s, *end;
|
||||
|
||||
end = text_end - (target_end - target) + 1;
|
||||
if (end > text_range)
|
||||
|
@ -2549,21 +2633,10 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
|
|||
s = text;
|
||||
|
||||
while (s < end) {
|
||||
z = s;
|
||||
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
|
||||
if (*target == *lowbuf) {
|
||||
p = lowbuf + 1;
|
||||
t = target + 1;
|
||||
while (--lowlen > 0) {
|
||||
if (*p != *t) break;
|
||||
p++; t++;
|
||||
}
|
||||
if (lowlen == 0) {
|
||||
if (str_lower_case_match(enc, ambig_flag,
|
||||
t, target_end, s, text_end))
|
||||
return z;
|
||||
}
|
||||
}
|
||||
if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
|
||||
return s;
|
||||
|
||||
s += enc_len(enc, s);
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
|
@ -2605,9 +2678,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
|
|||
UChar* text, UChar* adjust_text,
|
||||
UChar* text_end, UChar* text_start)
|
||||
{
|
||||
int len, lowlen;
|
||||
UChar *t, *p, *s, *z;
|
||||
UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
UChar *s;
|
||||
|
||||
s = text_end - (target_end - target);
|
||||
if (s > text_start)
|
||||
|
@ -2616,24 +2687,11 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
|
|||
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
|
||||
|
||||
while (s >= text) {
|
||||
len = enc_len(enc, s);
|
||||
z = s;
|
||||
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
|
||||
if (*target == *lowbuf) {
|
||||
p = lowbuf + 1;
|
||||
t = target + 1;
|
||||
while (--lowlen > 0) {
|
||||
if (*p != *t) break;
|
||||
p++; t++;
|
||||
}
|
||||
if (lowlen == 0) {
|
||||
if (str_lower_case_match(enc, ambig_flag,
|
||||
t, target_end, s, text_end))
|
||||
return z;
|
||||
}
|
||||
}
|
||||
if (str_lower_case_match(enc, ambig_flag,
|
||||
target, target_end, s, text_end))
|
||||
return s;
|
||||
|
||||
s = onigenc_get_prev_char_head(enc, adjust_text, z);
|
||||
s = onigenc_get_prev_char_head(enc, adjust_text, s);
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
|
@ -2828,11 +2886,12 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
|
|||
UChar *prev;
|
||||
MatchArg msa;
|
||||
|
||||
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
reg->state++; /* increment as search counter */
|
||||
if (IS_NOT_NULL(reg->chain)) {
|
||||
#ifdef USE_MULTI_THREAD_SYSTEM
|
||||
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
|
||||
ONIG_STATE_INC(reg);
|
||||
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
onig_chain_reduce(reg);
|
||||
reg->state++;
|
||||
ONIG_STATE_INC(reg);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -2842,8 +2901,9 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
|
|||
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
|
||||
THREAD_PASS;
|
||||
}
|
||||
reg->state++; /* increment as search counter */
|
||||
ONIG_STATE_INC(reg);
|
||||
}
|
||||
#endif /* USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, at);
|
||||
|
||||
|
@ -2863,7 +2923,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
|
|||
}
|
||||
|
||||
MATCH_ARG_FREE(msa);
|
||||
reg->state--; /* decrement as search counter */
|
||||
ONIG_STATE_DEC(reg);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -3098,11 +3158,12 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
|
|||
UChar *s, *prev;
|
||||
MatchArg msa;
|
||||
|
||||
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
reg->state++; /* increment as search counter */
|
||||
if (IS_NOT_NULL(reg->chain)) {
|
||||
#ifdef USE_MULTI_THREAD_SYSTEM
|
||||
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
|
||||
ONIG_STATE_INC(reg);
|
||||
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
onig_chain_reduce(reg);
|
||||
reg->state++;
|
||||
ONIG_STATE_INC(reg);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -3112,8 +3173,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
|
|||
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
|
||||
THREAD_PASS;
|
||||
}
|
||||
reg->state++; /* increment as search counter */
|
||||
ONIG_STATE_INC(reg);
|
||||
}
|
||||
#endif /* USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
|
||||
|
@ -3360,7 +3422,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
|
|||
|
||||
finish:
|
||||
MATCH_ARG_FREE(msa);
|
||||
reg->state--; /* decrement as search counter */
|
||||
ONIG_STATE_DEC(reg);
|
||||
|
||||
/* If result is mismatch and no FIND_NOT_EMPTY option,
|
||||
then the region is not setted in match_at(). */
|
||||
|
@ -3381,7 +3443,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
|
|||
mismatch_no_msa:
|
||||
r = ONIG_MISMATCH;
|
||||
finish_no_msa:
|
||||
reg->state--; /* decrement as search counter */
|
||||
ONIG_STATE_DEC(reg);
|
||||
#ifdef ONIG_DEBUG
|
||||
if (r != ONIG_MISMATCH)
|
||||
fprintf(stderr, "onig_search: error %d\n", r);
|
||||
|
@ -3389,7 +3451,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
|
|||
return r;
|
||||
|
||||
match:
|
||||
reg->state--; /* decrement as search counter */
|
||||
ONIG_STATE_DEC(reg);
|
||||
MATCH_ARG_FREE(msa);
|
||||
return s - str;
|
||||
}
|
||||
|
|
107
regint.h
107
regint.h
|
@ -4,7 +4,7 @@
|
|||
regint.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -56,6 +56,7 @@
|
|||
|
||||
/* config */
|
||||
/* spec. config */
|
||||
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
|
||||
#define USE_NAMED_GROUP
|
||||
#define USE_SUBEXP_CALL
|
||||
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
|
||||
|
@ -65,6 +66,8 @@
|
|||
#define USE_RECYCLE_NODE
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define USE_QUALIFIER_PEEK_NEXT
|
||||
#define USE_ST_HASH_TABLE
|
||||
#define USE_SHARED_CCLASS_TABLE
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
|
||||
|
@ -76,17 +79,21 @@
|
|||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
|
||||
/* #define USE_MULTI_THREAD_SYSTEM */
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#define CHECK_INTERRUPT /* depend on application */
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xcalloc calloc
|
||||
#define xfree free
|
||||
#else
|
||||
#include "ruby.h"
|
||||
#include "version.h"
|
||||
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
|
||||
|
||||
#define USE_MULTI_THREAD_SYSTEM
|
||||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
|
@ -101,17 +108,9 @@
|
|||
#define DEFAULT_WARN_FUNCTION rb_warn
|
||||
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
|
||||
|
||||
#if defined(RUBY_VERSION_MAJOR)
|
||||
#if RUBY_VERSION_MAJOR > 1 || \
|
||||
(RUBY_VERSION_MAJOR == 1 && \
|
||||
defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
|
||||
#define USE_ST_HASH_TABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* else NOT_RUBY */
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 10
|
||||
#define THREAD_PASS_LIMIT_COUNT 8
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
#define xmemmove memmove
|
||||
|
@ -124,6 +123,69 @@
|
|||
#define xalloca alloca
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_MULTI_THREAD_SYSTEM
|
||||
#define ONIG_STATE_INC(reg) (reg)->state++
|
||||
#define ONIG_STATE_DEC(reg) (reg)->state--
|
||||
#else
|
||||
#define ONIG_STATE_INC(reg) /* Nothing */
|
||||
#define ONIG_STATE_DEC(reg) /* Nothing */
|
||||
#endif /* USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
|
||||
#define onig_st_is_member st_is_member
|
||||
|
||||
#ifdef NOT_RUBY
|
||||
|
||||
#define st_init_table onig_st_init_table
|
||||
#define st_init_table_with_size onig_st_init_table_with_size
|
||||
#define st_init_numtable onig_st_init_numtable
|
||||
#define st_init_numtable_with_size onig_st_init_numtable_with_size
|
||||
#define st_init_strtable onig_st_init_strtable
|
||||
#define st_init_strtable_with_size onig_st_init_strtable_with_size
|
||||
#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
|
||||
#define st_delete onig_st_delete
|
||||
#define st_delete_safe onig_st_delete_safe
|
||||
#define st_insert onig_st_insert
|
||||
#define st_insert_strend onig_st_insert_strend
|
||||
#define st_lookup onig_st_lookup
|
||||
#define st_lookup_strend onig_st_lookup_strend
|
||||
#define st_foreach onig_st_foreach
|
||||
#define st_add_direct onig_st_add_direct
|
||||
#define st_add_direct_strend onig_st_add_direct_strend
|
||||
#define st_free_table onig_st_free_table
|
||||
#define st_cleanup_safe onig_st_cleanup_safe
|
||||
#define st_copy onig_st_copy
|
||||
#define st_nothing_key_clone onig_st_nothing_key_clone
|
||||
#define st_nothing_key_free onig_st_nothing_key_free
|
||||
|
||||
#else /* NOT_RUBY */
|
||||
|
||||
#define onig_st_init_table st_init_table
|
||||
#define onig_st_init_table_with_size st_init_table_with_size
|
||||
#define onig_st_init_numtable st_init_numtable
|
||||
#define onig_st_init_numtable_with_size st_init_numtable_with_size
|
||||
#define onig_st_init_strtable st_init_strtable
|
||||
#define onig_st_init_strtable_with_size st_init_strtable_with_size
|
||||
#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
|
||||
#define onig_st_delete st_delete
|
||||
#define onig_st_delete_safe st_delete_safe
|
||||
#define onig_st_insert st_insert
|
||||
#define onig_st_insert_strend st_insert_strend
|
||||
#define onig_st_lookup st_lookup
|
||||
#define onig_st_lookup_strend st_lookup_strend
|
||||
#define onig_st_foreach st_foreach
|
||||
#define onig_st_add_direct st_add_direct
|
||||
#define onig_st_add_direct_strend st_add_direct_strend
|
||||
#define onig_st_free_table st_free_table
|
||||
#define onig_st_cleanup_safe st_cleanup_safe
|
||||
#define onig_st_copy st_copy
|
||||
#define onig_st_nothing_key_clone st_nothing_key_clone
|
||||
#define onig_st_nothing_key_free st_nothing_key_free
|
||||
|
||||
#endif /* NOT_RUBY */
|
||||
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
@ -139,9 +201,11 @@
|
|||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#ifndef __BORLANDC__
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
# include <stdio.h>
|
||||
|
@ -483,6 +547,7 @@ enum OpCode {
|
|||
OP_CCLASS_NOT,
|
||||
OP_CCLASS_MB_NOT,
|
||||
OP_CCLASS_MIX_NOT,
|
||||
OP_CCLASS_NODE, /* pointer to CClassNode node */
|
||||
|
||||
OP_ANYCHAR, /* "." */
|
||||
OP_ANYCHAR_ML, /* "." multi-line */
|
||||
|
@ -570,6 +635,7 @@ typedef int AbsAddrType;
|
|||
typedef int LengthType;
|
||||
typedef int RepeatNumType;
|
||||
typedef short int MemNumType;
|
||||
typedef void* PointerType;
|
||||
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR sizeof(RelAddrType)
|
||||
|
@ -579,7 +645,7 @@ typedef short int MemNumType;
|
|||
#define SIZE_REPEATNUM sizeof(RepeatNumType)
|
||||
#define SIZE_OPTION sizeof(OnigOptionType)
|
||||
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
|
||||
|
||||
#define SIZE_POINTER sizeof(PointerType)
|
||||
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
|
@ -604,6 +670,7 @@ typedef short int MemNumType;
|
|||
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
|
||||
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
|
||||
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
|
||||
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
|
||||
|
||||
/* code point's address must be aligned address. */
|
||||
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
|
||||
|
@ -679,6 +746,22 @@ typedef short int MemNumType;
|
|||
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
|
||||
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
|
||||
/* cclass node */
|
||||
#define FLAG_CCLASS_NOT 1
|
||||
#define FLAG_CCLASS_SHARE (1<<1)
|
||||
|
||||
#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
|
||||
#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
|
||||
#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
|
||||
#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
|
||||
#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
|
||||
|
||||
typedef struct {
|
||||
int flags;
|
||||
BitSet bs;
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
|
@ -700,13 +783,11 @@ extern void onig_print_statistics P_((FILE* f));
|
|||
|
||||
extern char* onig_error_code_to_format P_((int code));
|
||||
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
|
||||
extern UChar* onig_strdup P_((UChar* s, UChar* end));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, int size));
|
||||
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
|
||||
extern void onig_chain_reduce P_((regex_t* reg));
|
||||
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
|
||||
extern void onig_transfer P_((regex_t* to, regex_t* from));
|
||||
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
|
||||
|
||||
#endif /* REGINT_H */
|
||||
|
|
412
regparse.c
412
regparse.c
|
@ -2,7 +2,7 @@
|
|||
regparse.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -219,21 +219,26 @@ k_strcpy(UChar* dest, UChar* src, UChar* end)
|
|||
}
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onig_strdup(UChar* s, UChar* end)
|
||||
static UChar*
|
||||
strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
|
||||
{
|
||||
int len = end - s;
|
||||
int slen, term_len, i;
|
||||
UChar *r;
|
||||
|
||||
if (len > 0) {
|
||||
UChar* r = (UChar* )xmalloc(len + 1);
|
||||
CHECK_NULL_RETURN(r);
|
||||
xmemcpy(r, s, len);
|
||||
r[len] = (UChar )0;
|
||||
return r;
|
||||
}
|
||||
else return NULL;
|
||||
slen = end - s;
|
||||
term_len = ONIGENC_MBC_MINLEN(enc);
|
||||
|
||||
r = (UChar* )xmalloc(slen + term_len);
|
||||
CHECK_NULL_RETURN(r);
|
||||
xmemcpy(r, s, slen);
|
||||
|
||||
for (i = 0; i < term_len; i++)
|
||||
r[slen + i] = (UChar )0;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
/* scan pattern methods */
|
||||
#define PEND_VALUE 0
|
||||
|
||||
|
@ -298,7 +303,7 @@ typedef struct {
|
|||
|
||||
#ifdef USE_ST_HASH_TABLE
|
||||
|
||||
#include <st.h>
|
||||
#include "st.h"
|
||||
|
||||
typedef st_table NameTable;
|
||||
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
|
||||
|
@ -335,7 +340,7 @@ onig_print_names(FILE* fp, regex_t* reg)
|
|||
|
||||
if (IS_NOT_NULL(t)) {
|
||||
fprintf(fp, "name table\n");
|
||||
st_foreach(t, i_print_name_entry, (HashDataType )fp);
|
||||
onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
|
||||
fputs("\n", fp);
|
||||
}
|
||||
return 0;
|
||||
|
@ -356,7 +361,7 @@ names_clear(regex_t* reg)
|
|||
NameTable* t = (NameTable* )reg->name_table;
|
||||
|
||||
if (IS_NOT_NULL(t)) {
|
||||
st_foreach(t, i_free_name_entry, 0);
|
||||
onig_st_foreach(t, i_free_name_entry, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -371,7 +376,7 @@ onig_names_free(regex_t* reg)
|
|||
if (r) return r;
|
||||
|
||||
t = (NameTable* )reg->name_table;
|
||||
if (IS_NOT_NULL(t)) st_free_table(t);
|
||||
if (IS_NOT_NULL(t)) onig_st_free_table(t);
|
||||
reg->name_table = (void* )NULL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -379,33 +384,12 @@ onig_names_free(regex_t* reg)
|
|||
static NameEntry*
|
||||
name_find(regex_t* reg, UChar* name, UChar* name_end)
|
||||
{
|
||||
int len;
|
||||
UChar namebuf[NAMEBUF_SIZE_1];
|
||||
UChar *key;
|
||||
NameEntry* e;
|
||||
NameTable* t = (NameTable* )reg->name_table;
|
||||
|
||||
e = (NameEntry* )NULL;
|
||||
if (IS_NOT_NULL(t)) {
|
||||
if (*name_end == '\0') {
|
||||
key = name;
|
||||
}
|
||||
else {
|
||||
/* dirty, but st.c API claims NULL terminated key. */
|
||||
len = name_end - name;
|
||||
if (len <= NAMEBUF_SIZE) {
|
||||
xmemcpy(namebuf, name, len);
|
||||
namebuf[len] = '\0';
|
||||
key = namebuf;
|
||||
}
|
||||
else {
|
||||
key = onig_strdup(name, name_end);
|
||||
if (IS_NULL(key)) return (NameEntry* )NULL;
|
||||
}
|
||||
}
|
||||
|
||||
st_lookup(t, (HashDataType )key, (HashDataType * )&e);
|
||||
if (key != name && key != namebuf) xfree(key);
|
||||
onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
@ -422,7 +406,8 @@ static int
|
|||
i_names(UChar* key, NameEntry* e, INamesArg* arg)
|
||||
{
|
||||
int r = (*(arg->func))(e->name,
|
||||
e->name + onigenc_str_bytelen_null(arg->enc, e->name),
|
||||
/*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
|
||||
e->name + e->name_len,
|
||||
e->back_num,
|
||||
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
|
||||
arg->reg, arg->arg);
|
||||
|
@ -447,11 +432,40 @@ onig_foreach_name(regex_t* reg,
|
|||
narg.reg = reg;
|
||||
narg.arg = arg;
|
||||
narg.enc = reg->enc; /* should be pattern encoding. */
|
||||
st_foreach(t, i_names, (HashDataType )&narg);
|
||||
onig_st_foreach(t, i_names, (HashDataType )&narg);
|
||||
}
|
||||
return narg.ret;
|
||||
}
|
||||
|
||||
static int
|
||||
i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (e->back_num > 1) {
|
||||
for (i = 0; i < e->back_num; i++) {
|
||||
e->back_refs[i] = map[e->back_refs[i]].new_val;
|
||||
}
|
||||
}
|
||||
else if (e->back_num == 1) {
|
||||
e->back_ref1 = map[e->back_ref1].new_val;
|
||||
}
|
||||
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
|
||||
{
|
||||
NameTable* t = (NameTable* )reg->name_table;
|
||||
|
||||
if (IS_NOT_NULL(t)) {
|
||||
onig_st_foreach(t, i_renumber_name, (HashDataType )map);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onig_number_of_names(regex_t* reg)
|
||||
{
|
||||
|
@ -617,14 +631,16 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
|
|||
if (IS_NULL(e)) {
|
||||
#ifdef USE_ST_HASH_TABLE
|
||||
if (IS_NULL(t)) {
|
||||
reg->name_table = t = st_init_strtable();
|
||||
t = onig_st_init_strend_table_with_size(5);
|
||||
reg->name_table = (void* )t;
|
||||
}
|
||||
e = (NameEntry* )xmalloc(sizeof(NameEntry));
|
||||
CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
|
||||
|
||||
e->name = onig_strdup(name, name_end);
|
||||
e->name = strdup_with_null(reg->enc, name, name_end);
|
||||
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
|
||||
st_insert(t, (HashDataType )e->name, (HashDataType )e);
|
||||
onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
|
||||
(HashDataType )e);
|
||||
|
||||
e->name_len = name_end - name;
|
||||
e->back_num = 0;
|
||||
|
@ -669,7 +685,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
|
|||
}
|
||||
e = &(t->e[t->num]);
|
||||
t->num++;
|
||||
e->name = onig_strdup(name, name_end);
|
||||
e->name = strdup_with_null(reg->enc, name, name_end);
|
||||
e->name_len = name_end - name;
|
||||
#endif
|
||||
}
|
||||
|
@ -886,8 +902,11 @@ onig_node_free(Node* node)
|
|||
#ifdef USE_RECYCLE_NODE
|
||||
{
|
||||
FreeNode* n = (FreeNode* )node;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
n->next = FreeNodeList;
|
||||
FreeNodeList = n;
|
||||
THREAD_ATOMIC_END;
|
||||
}
|
||||
#else
|
||||
xfree(node);
|
||||
|
@ -899,8 +918,15 @@ onig_node_free(Node* node)
|
|||
break;
|
||||
|
||||
case N_CCLASS:
|
||||
if (NCCLASS(node).mbuf)
|
||||
bbuf_free(NCCLASS(node).mbuf);
|
||||
{
|
||||
CClassNode* cc = &(NCCLASS(node));
|
||||
|
||||
if (IS_CCLASS_SHARE(cc))
|
||||
return ;
|
||||
|
||||
if (cc->mbuf)
|
||||
bbuf_free(cc->mbuf);
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
|
@ -927,8 +953,11 @@ onig_node_free(Node* node)
|
|||
#ifdef USE_RECYCLE_NODE
|
||||
{
|
||||
FreeNode* n = (FreeNode* )node;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
n->next = FreeNodeList;
|
||||
FreeNodeList = n;
|
||||
THREAD_ATOMIC_END;
|
||||
}
|
||||
#else
|
||||
xfree(node);
|
||||
|
@ -959,8 +988,10 @@ node_new()
|
|||
|
||||
#ifdef USE_RECYCLE_NODE
|
||||
if (IS_NOT_NULL(FreeNodeList)) {
|
||||
THREAD_ATOMIC_START;
|
||||
node = (Node* )FreeNodeList;
|
||||
FreeNodeList = FreeNodeList->next;
|
||||
THREAD_ATOMIC_END;
|
||||
return node;
|
||||
}
|
||||
#endif
|
||||
|
@ -974,8 +1005,8 @@ static void
|
|||
initialize_cclass(CClassNode* cc)
|
||||
{
|
||||
BITSET_CLEAR(cc->bs);
|
||||
cc->not = 0;
|
||||
cc->mbuf = NULL;
|
||||
cc->flags = 0;
|
||||
cc->mbuf = NULL;
|
||||
}
|
||||
|
||||
static Node*
|
||||
|
@ -989,6 +1020,54 @@ node_new_cclass()
|
|||
return node;
|
||||
}
|
||||
|
||||
extern Node*
|
||||
node_new_cclass_by_codepoint_range(int not,
|
||||
OnigCodePoint sbr[], OnigCodePoint mbr[])
|
||||
{
|
||||
CClassNode* cc;
|
||||
int n, i, j;
|
||||
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
node->type = N_CCLASS;
|
||||
|
||||
cc = &(NCCLASS(node));
|
||||
cc->flags = 0;
|
||||
if (not != 0) CCLASS_SET_NOT(cc);
|
||||
|
||||
BITSET_CLEAR(cc->bs);
|
||||
if (IS_NOT_NULL(sbr)) {
|
||||
n = ONIGENC_CODE_RANGE_NUM(sbr);
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
|
||||
j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
|
||||
BITSET_SET_BIT(cc->bs, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_NULL(mbr)) {
|
||||
is_null:
|
||||
cc->mbuf = NULL;
|
||||
}
|
||||
else {
|
||||
BBuf* bbuf;
|
||||
|
||||
n = ONIGENC_CODE_RANGE_NUM(mbr);
|
||||
if (n == 0) goto is_null;
|
||||
|
||||
bbuf = (BBuf* )xmalloc(sizeof(BBuf));
|
||||
CHECK_NULL_RETURN_VAL(bbuf, NULL);
|
||||
bbuf->alloc = n + 1;
|
||||
bbuf->used = n + 1;
|
||||
bbuf->p = (UChar* )((void* )mbr);
|
||||
|
||||
cc->mbuf = bbuf;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_ctype(int type)
|
||||
{
|
||||
|
@ -1711,7 +1790,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
|
|||
BBuf *tbuf;
|
||||
int r;
|
||||
|
||||
if (cc->not != 0) {
|
||||
if (IS_CCLASS_NOT(cc)) {
|
||||
bitset_invert(cc->bs);
|
||||
|
||||
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
|
||||
|
@ -1722,7 +1801,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
|
|||
cc->mbuf = tbuf;
|
||||
}
|
||||
|
||||
cc->not = 0;
|
||||
CCLASS_CLEAR_NOT(cc);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1736,10 +1815,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
|
|||
BitSetRef bsr1, bsr2;
|
||||
BitSet bs1, bs2;
|
||||
|
||||
not1 = dest->not;
|
||||
not1 = IS_CCLASS_NOT(dest);
|
||||
bsr1 = dest->bs;
|
||||
buf1 = dest->mbuf;
|
||||
not2 = cc->not;
|
||||
not2 = IS_CCLASS_NOT(cc);
|
||||
bsr2 = cc->bs;
|
||||
buf2 = cc->mbuf;
|
||||
|
||||
|
@ -1794,10 +1873,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
|
|||
BitSetRef bsr1, bsr2;
|
||||
BitSet bs1, bs2;
|
||||
|
||||
not1 = dest->not;
|
||||
not1 = IS_CCLASS_NOT(dest);
|
||||
bsr1 = dest->bs;
|
||||
buf1 = dest->mbuf;
|
||||
not2 = cc->not;
|
||||
not2 = IS_CCLASS_NOT(cc);
|
||||
bsr2 = cc->bs;
|
||||
buf2 = cc->mbuf;
|
||||
|
||||
|
@ -2158,7 +2237,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
|
|||
UChar* p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
|
||||
|
||||
PFETCH(c);
|
||||
switch (c) {
|
||||
|
@ -2468,7 +2547,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
|
||||
goto end;
|
||||
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
|
||||
|
||||
PFETCH(c);
|
||||
tok->escaped = 1;
|
||||
|
@ -2576,9 +2655,9 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (p == prev) { /* can't read nothing. */
|
||||
num = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = (OnigCodePoint )num;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = (OnigCodePoint )num;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2669,7 +2748,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
|
||||
PFETCH(c);
|
||||
if (c == MC_ESC(enc)) {
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
|
||||
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
|
||||
|
||||
tok->backp = p;
|
||||
PFETCH(c);
|
||||
|
@ -2907,9 +2986,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (p == prev) { /* can't read nothing. */
|
||||
num = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = (OnigCodePoint )num;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = (OnigCodePoint )num;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -3057,7 +3136,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (num < 0) return num;
|
||||
/* set_raw: */
|
||||
if (tok->u.c != num) {
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = (OnigCodePoint )num;
|
||||
}
|
||||
else { /* string */
|
||||
|
@ -3225,21 +3304,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
|||
|
||||
static int
|
||||
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
|
||||
int nsb, int nmb,
|
||||
OnigCodePointRange *sbr, OnigCodePointRange *mbr)
|
||||
OnigCodePoint sbr[], OnigCodePoint mbr[])
|
||||
{
|
||||
int i, r;
|
||||
OnigCodePoint j;
|
||||
|
||||
int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
|
||||
int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
|
||||
|
||||
if (not == 0) {
|
||||
for (i = 0; i < nsb; i++) {
|
||||
for (j = sbr[i].from; j <= sbr[i].to; j++) {
|
||||
for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
|
||||
j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
|
||||
BITSET_SET_BIT(cc->bs, j);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nmb; i++) {
|
||||
r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
|
||||
r = add_code_range_to_buf(&(cc->mbuf),
|
||||
ONIGENC_CODE_RANGE_FROM(mbr, i),
|
||||
ONIGENC_CODE_RANGE_TO(mbr, i));
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
|
@ -3248,10 +3332,11 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
|
|||
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) {
|
||||
for (i = 0; i < nsb; i++) {
|
||||
for (j = prev; j < sbr[i].from; j++) {
|
||||
for (j = prev;
|
||||
j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
|
||||
BITSET_SET_BIT(cc->bs, j);
|
||||
}
|
||||
prev = sbr[i].to + 1;
|
||||
prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
|
||||
}
|
||||
if (prev < 0x7f) {
|
||||
for (j = prev; j < 0x7f; j++) {
|
||||
|
@ -3263,11 +3348,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
|
|||
}
|
||||
|
||||
for (i = 0; i < nmb; i++) {
|
||||
if (prev < mbr[i].from) {
|
||||
r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
|
||||
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
|
||||
r = add_code_range_to_buf(&(cc->mbuf), prev,
|
||||
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
prev = mbr[i].to + 1;
|
||||
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
|
||||
}
|
||||
if (prev < 0x7fffffff) {
|
||||
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
|
||||
|
@ -3282,14 +3368,12 @@ static int
|
|||
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
|
||||
{
|
||||
int c, r;
|
||||
int nsb, nmb;
|
||||
OnigCodePointRange *sbr, *mbr;
|
||||
OnigCodePoint *sbr, *mbr;
|
||||
OnigEncoding enc = env->enc;
|
||||
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
|
||||
if (r == 0) {
|
||||
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc,
|
||||
nsb, nmb, sbr, mbr);
|
||||
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
|
||||
}
|
||||
else if (r != ONIG_NO_SUPPORT_CONFIG) {
|
||||
return r;
|
||||
|
@ -3349,8 +3433,8 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
|
|||
}
|
||||
else {
|
||||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||||
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) &&
|
||||
! ONIGENC_IS_CODE_WORD(enc, c))
|
||||
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
|
||||
&& ! ONIGENC_IS_CODE_WORD(enc, c))
|
||||
BITSET_SET_BIT(cc->bs, c);
|
||||
}
|
||||
}
|
||||
|
@ -3839,7 +3923,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
break;
|
||||
|
||||
case TK_CODE_POINT:
|
||||
v = (OnigCodePoint )tok->u.code;
|
||||
v = tok->u.code;
|
||||
in_israw = 1;
|
||||
val_entry:
|
||||
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
|
||||
|
@ -4017,8 +4101,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
cc = prev_cc;
|
||||
}
|
||||
|
||||
cc->not = neg;
|
||||
if (cc->not != 0 &&
|
||||
if (neg != 0)
|
||||
CCLASS_SET_NOT(cc);
|
||||
else
|
||||
CCLASS_CLEAR_NOT(cc);
|
||||
if (IS_CCLASS_NOT(cc) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
|
||||
int is_empty;
|
||||
|
||||
|
@ -4388,7 +4475,7 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
|
|||
for (j = 0; j < ccs[i].n; j++) {
|
||||
ci = &(ccs[i].items[j]);
|
||||
if (ci->len > 1) { /* compound only */
|
||||
if (cc->not) clear_not_flag_cclass(cc, enc);
|
||||
if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
|
||||
|
||||
clen = ci->len;
|
||||
for (k = 0; k < clen; k++) {
|
||||
|
@ -4417,6 +4504,98 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
|
|||
return n;
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
|
||||
#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
|
||||
|
||||
/* for ctype node hash table */
|
||||
|
||||
typedef struct {
|
||||
OnigEncoding enc;
|
||||
int not;
|
||||
int type;
|
||||
} type_cclass_key;
|
||||
|
||||
static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
|
||||
{
|
||||
if (x->type != y->type) return 1;
|
||||
if (x->enc != y->enc) return 1;
|
||||
if (x->not != y->not) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int type_cclass_hash(type_cclass_key* key)
|
||||
{
|
||||
int i, val;
|
||||
unsigned char *p;
|
||||
|
||||
val = 0;
|
||||
|
||||
p = (unsigned char* )&(key->enc);
|
||||
for (i = 0; i < sizeof(OnigEncodingType); i++) {
|
||||
val = val * 997 + (int )*p++;
|
||||
}
|
||||
|
||||
p = (unsigned char* )(&key->type);
|
||||
for (i = 0; i < sizeof(int); i++) {
|
||||
val = val * 997 + (int )*p++;
|
||||
}
|
||||
|
||||
val += key->not;
|
||||
return val + (val >> 5);
|
||||
}
|
||||
|
||||
static int type_cclass_key_free(st_data_t x)
|
||||
{
|
||||
xfree((void* )x);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static st_data_t type_cclass_key_clone(st_data_t x)
|
||||
{
|
||||
type_cclass_key* new_key;
|
||||
type_cclass_key* key = (type_cclass_key* )x;
|
||||
|
||||
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
|
||||
*new_key = *key;
|
||||
return (st_data_t )new_key;
|
||||
}
|
||||
|
||||
static struct st_hash_type type_type_cclass_hash = {
|
||||
type_cclass_cmp,
|
||||
type_cclass_hash,
|
||||
type_cclass_key_free,
|
||||
type_cclass_key_clone
|
||||
};
|
||||
|
||||
static st_table* OnigTypeCClassTable;
|
||||
|
||||
|
||||
static int
|
||||
i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
|
||||
{
|
||||
if (IS_NOT_NULL(node)) {
|
||||
CClassNode* cc = &(NCCLASS(node));
|
||||
if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
|
||||
xfree(node);
|
||||
}
|
||||
return ST_DELETE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_free_shared_cclass_table()
|
||||
{
|
||||
if (IS_NOT_NULL(OnigTypeCClassTable)) {
|
||||
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* USE_SHARED_CCLASS_TABLE */
|
||||
|
||||
|
||||
static int
|
||||
parse_exp(Node** np, OnigToken* tok, int term,
|
||||
UChar** src, UChar* end, ScanEnv* env)
|
||||
|
@ -4561,13 +4740,63 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
CClassNode* cc;
|
||||
int ctype, not;
|
||||
|
||||
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
OnigCodePoint *sbr, *mbr;
|
||||
|
||||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
|
||||
cc = &(NCCLASS(*np));
|
||||
add_ctype_to_cc(cc, ctype, 0, env);
|
||||
if (not != 0) CCLASS_SET_NOT(cc);
|
||||
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
|
||||
if (r == 0 &&
|
||||
ONIGENC_CODE_RANGE_NUM(mbr)
|
||||
>= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
|
||||
type_cclass_key key;
|
||||
type_cclass_key* new_key;
|
||||
|
||||
key.enc = env->enc;
|
||||
key.not = not;
|
||||
key.type = ctype;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
if (IS_NULL(OnigTypeCClassTable)) {
|
||||
OnigTypeCClassTable
|
||||
= onig_st_init_table_with_size(&type_type_cclass_hash, 10);
|
||||
if (IS_NULL(OnigTypeCClassTable)) {
|
||||
THREAD_ATOMIC_END;
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
|
||||
(st_data_t* )np)) {
|
||||
THREAD_ATOMIC_END;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
|
||||
if (IS_NULL(*np)) {
|
||||
THREAD_ATOMIC_END;
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
|
||||
CCLASS_SET_SHARE(&(NCCLASS(*np)));
|
||||
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
|
||||
onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
|
||||
(st_data_t )*np);
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
|
||||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
|
||||
cc = &(NCCLASS(*np));
|
||||
add_ctype_to_cc(cc, ctype, 0, env);
|
||||
if (not != 0) CCLASS_SET_NOT(cc);
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -4605,7 +4834,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
for (i = 0; i < n; i++) {
|
||||
in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
|
||||
|
||||
if ((in_cc != 0 && cc->not == 0) || (in_cc == 0 && cc->not != 0)) {
|
||||
if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
|
||||
(in_cc == 0 && IS_CCLASS_NOT(cc))) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
|
||||
ccs[i].from >= SINGLE_BYTE_SIZE) {
|
||||
/* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
|
||||
|
|
18
regparse.h
18
regparse.h
|
@ -4,7 +4,7 @@
|
|||
regparse.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -95,8 +95,6 @@
|
|||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
|
||||
|
||||
#define CCLASS_SET_NOT(cc) (cc)->not = 1
|
||||
|
||||
#define NQ_TARGET_ISNOT_EMPTY 0
|
||||
#define NQ_TARGET_IS_EMPTY 1
|
||||
#define NQ_TARGET_IS_EMPTY_MEM 2
|
||||
|
@ -111,11 +109,14 @@ typedef struct {
|
|||
UChar buf[NODE_STR_BUF_SIZE];
|
||||
} StrNode;
|
||||
|
||||
/* move to regint.h */
|
||||
#if 0
|
||||
typedef struct {
|
||||
int not;
|
||||
int flags;
|
||||
BitSet bs;
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
|
@ -280,6 +281,15 @@ typedef struct {
|
|||
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
|
||||
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
|
||||
|
||||
|
||||
#ifdef USE_NAMED_GROUP
|
||||
typedef struct {
|
||||
int new_val;
|
||||
} GroupNumRemap;
|
||||
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
#endif
|
||||
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
|
|
157
st.c
157
st.c
|
@ -6,12 +6,29 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "st.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef NOT_RUBY
|
||||
#include "regint.h"
|
||||
#else
|
||||
#ifdef RUBY_PLATFORM
|
||||
#define xmalloc ruby_xmalloc
|
||||
#define xcalloc ruby_xcalloc
|
||||
#define xrealloc ruby_xrealloc
|
||||
#define xfree ruby_xfree
|
||||
|
||||
void *xmalloc(long);
|
||||
void *xcalloc(long, long);
|
||||
void *xrealloc(void *, long);
|
||||
void xfree(void *);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "st.h"
|
||||
|
||||
typedef struct st_table_entry st_table_entry;
|
||||
|
||||
struct st_table_entry {
|
||||
|
@ -33,11 +50,14 @@ struct st_table_entry {
|
|||
* allocated initially
|
||||
*
|
||||
*/
|
||||
|
||||
static int numcmp(long, long);
|
||||
static int numhash(long);
|
||||
static struct st_hash_type type_numhash = {
|
||||
numcmp,
|
||||
numhash,
|
||||
st_nothing_key_free,
|
||||
st_nothing_key_clone
|
||||
};
|
||||
|
||||
/* extern int strcmp(const char *, const char *); */
|
||||
|
@ -45,19 +65,21 @@ static int strhash(const char *);
|
|||
static struct st_hash_type type_strhash = {
|
||||
strcmp,
|
||||
strhash,
|
||||
st_nothing_key_free,
|
||||
st_nothing_key_clone
|
||||
};
|
||||
|
||||
#ifdef RUBY_PLATFORM
|
||||
#define xmalloc ruby_xmalloc
|
||||
#define xcalloc ruby_xcalloc
|
||||
#define xrealloc ruby_xrealloc
|
||||
#define xfree ruby_xfree
|
||||
static int strend_cmp(st_strend_key*, st_strend_key*);
|
||||
static int strend_hash(st_strend_key*);
|
||||
static int strend_key_free(st_data_t key);
|
||||
static st_data_t strend_key_clone(st_data_t x);
|
||||
|
||||
void *xmalloc(long);
|
||||
void *xcalloc(long, long);
|
||||
void *xrealloc(void *, long);
|
||||
void xfree(void *);
|
||||
#endif
|
||||
static struct st_hash_type type_strend_hash = {
|
||||
strend_cmp,
|
||||
strend_hash,
|
||||
strend_key_free,
|
||||
strend_key_clone
|
||||
};
|
||||
|
||||
static void rehash(st_table *);
|
||||
|
||||
|
@ -125,7 +147,7 @@ new_size(size)
|
|||
int newsize;
|
||||
|
||||
for (i = 0, newsize = MINSIZE;
|
||||
i < sizeof(primes)/sizeof(primes[0]);
|
||||
i < (int )(sizeof(primes)/sizeof(primes[0]));
|
||||
i++, newsize <<= 1)
|
||||
{
|
||||
if (newsize > size) return primes[i];
|
||||
|
@ -206,6 +228,13 @@ st_init_strtable_with_size(size)
|
|||
return st_init_table_with_size(&type_strhash, size);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strend_table_with_size(size)
|
||||
int size;
|
||||
{
|
||||
return st_init_table_with_size(&type_strend_hash, size);
|
||||
}
|
||||
|
||||
void
|
||||
st_free_table(table)
|
||||
st_table *table;
|
||||
|
@ -267,6 +296,21 @@ st_lookup(table, key, value)
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
st_lookup_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
unsigned char* str_key;
|
||||
unsigned char* end_key;
|
||||
st_data_t *value;
|
||||
{
|
||||
st_strend_key key;
|
||||
|
||||
key.s = (unsigned char* )str_key;
|
||||
key.end = (unsigned char* )end_key;
|
||||
|
||||
return st_lookup(table, (st_data_t )(&key), value);
|
||||
}
|
||||
|
||||
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
|
||||
do {\
|
||||
st_table_entry *entry;\
|
||||
|
@ -307,6 +351,22 @@ st_insert(table, key, value)
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
st_insert_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
unsigned char* str_key;
|
||||
unsigned char* end_key;
|
||||
st_data_t value;
|
||||
{
|
||||
st_strend_key* key;
|
||||
|
||||
key = alloc(st_strend_key);
|
||||
key->s = (unsigned char* )str_key;
|
||||
key->end = (unsigned char* )end_key;
|
||||
|
||||
return st_insert(table, (st_data_t )key, value);
|
||||
}
|
||||
|
||||
void
|
||||
st_add_direct(table, key, value)
|
||||
st_table *table;
|
||||
|
@ -320,6 +380,21 @@ st_add_direct(table, key, value)
|
|||
ADD_DIRECT(table, key, value, hash_val, bin_pos);
|
||||
}
|
||||
|
||||
void
|
||||
st_add_direct_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
unsigned char* str_key;
|
||||
unsigned char* end_key;
|
||||
st_data_t value;
|
||||
{
|
||||
st_strend_key* key;
|
||||
|
||||
key = alloc(st_strend_key);
|
||||
key->s = (unsigned char* )str_key;
|
||||
key->end = (unsigned char* )end_key;
|
||||
st_add_direct(table, (st_data_t )key, value);
|
||||
}
|
||||
|
||||
static void
|
||||
rehash(table)
|
||||
register st_table *table;
|
||||
|
@ -379,6 +454,7 @@ st_copy(old_table)
|
|||
return 0;
|
||||
}
|
||||
*entry = *ptr;
|
||||
entry->key = old_table->type->key_clone(ptr->key);
|
||||
entry->next = new_table->bins[i];
|
||||
new_table->bins[i] = entry;
|
||||
ptr = ptr->next;
|
||||
|
@ -522,6 +598,7 @@ st_foreach(table, func, arg)
|
|||
last->next = ptr->next;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
table->type->key_free(tmp->key);
|
||||
free(tmp);
|
||||
table->num_entries--;
|
||||
}
|
||||
|
@ -581,3 +658,59 @@ numhash(n)
|
|||
{
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
st_nothing_key_free(st_data_t key) { return 0; }
|
||||
|
||||
extern st_data_t
|
||||
st_nothing_key_clone(st_data_t x) { return x; }
|
||||
|
||||
static int strend_cmp(st_strend_key* x, st_strend_key* y)
|
||||
{
|
||||
unsigned char *p, *q;
|
||||
int c;
|
||||
|
||||
if ((x->end - x->s) != (y->end - y->s))
|
||||
return 1;
|
||||
|
||||
p = x->s;
|
||||
q = y->s;
|
||||
while (p < x->end) {
|
||||
c = (int )*p - (int )*q;
|
||||
if (c != 0) return c;
|
||||
|
||||
p++; q++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int strend_hash(st_strend_key* x)
|
||||
{
|
||||
int val;
|
||||
unsigned char *p;
|
||||
|
||||
val = 0;
|
||||
p = x->s;
|
||||
while (p < x->end) {
|
||||
val = val * 997 + (int )*p++;
|
||||
}
|
||||
|
||||
return val + (val >> 5);
|
||||
}
|
||||
|
||||
static int strend_key_free(st_data_t x)
|
||||
{
|
||||
xfree((void* )x);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static st_data_t strend_key_clone(st_data_t x)
|
||||
{
|
||||
st_strend_key* new_key;
|
||||
st_strend_key* key = (st_strend_key* )x;
|
||||
|
||||
new_key = alloc(st_strend_key);
|
||||
*new_key = *key;
|
||||
return (st_data_t )new_key;
|
||||
}
|
||||
|
|
16
st.h
16
st.h
|
@ -14,6 +14,8 @@ typedef struct st_table st_table;
|
|||
struct st_hash_type {
|
||||
int (*compare)();
|
||||
int (*hash)();
|
||||
int (*key_free)();
|
||||
st_data_t (*key_clone)();
|
||||
};
|
||||
|
||||
struct st_table {
|
||||
|
@ -23,6 +25,11 @@ struct st_table {
|
|||
struct st_table_entry **bins;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
unsigned char* s;
|
||||
unsigned char* end;
|
||||
} st_strend_key;
|
||||
|
||||
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
|
||||
|
||||
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
|
||||
|
@ -44,22 +51,27 @@ st_table *st_init_numtable _((void));
|
|||
st_table *st_init_numtable_with_size _((int));
|
||||
st_table *st_init_strtable _((void));
|
||||
st_table *st_init_strtable_with_size _((int));
|
||||
st_table *st_init_strend_table_with_size _((int));
|
||||
int st_delete _((st_table *, st_data_t *, st_data_t *));
|
||||
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
|
||||
int st_insert _((st_table *, st_data_t, st_data_t));
|
||||
int st_insert_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
|
||||
int st_lookup _((st_table *, st_data_t, st_data_t *));
|
||||
int st_lookup_strend _((st_table *, unsigned char*, unsigned char*, st_data_t*));
|
||||
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
|
||||
void st_add_direct _((st_table *, st_data_t, st_data_t));
|
||||
void st_add_direct_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
|
||||
void st_free_table _((st_table *));
|
||||
void st_cleanup_safe _((st_table *, st_data_t));
|
||||
st_table *st_copy _((st_table *));
|
||||
|
||||
extern st_data_t st_nothing_key_clone _((st_data_t key));
|
||||
extern int st_nothing_key_free _((st_data_t key));
|
||||
|
||||
#define ST_NUMCMP ((int (*)()) 0)
|
||||
#define ST_NUMHASH ((int (*)()) -2)
|
||||
|
||||
#define st_numcmp ST_NUMCMP
|
||||
#define st_numhash ST_NUMHASH
|
||||
|
||||
int st_strhash();
|
||||
|
||||
#endif /* ST_INCLUDED */
|
||||
|
|
Loading…
Reference in a new issue