* ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported Oni Guruma 3.5.4.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ksaito 2005-01-28 15:21:48 +00:00
parent 7d765e5203
commit a19d6b33d7
12 changed files with 4237 additions and 446 deletions

View File

@ -1,3 +1,10 @@
Sat Jan 29 00:10:33 2005 Kazuo Saito <ksaito@uranus.dti.ne.jp>
* ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c,
regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h,
regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported
Oni Guruma 3.5.4.
Fri Jan 28 17:16:55 2005 Tanaka Akira <akr@m17n.org>
* lib/resolv.rb (Resolv::DNS::Config.parse_resolv_conf):

2
hash.c
View File

@ -102,6 +102,8 @@ rb_any_hash(a)
static struct st_hash_type objhash = {
rb_any_cmp,
rb_any_hash,
st_nothing_key_free,
st_nothing_key_clone
};
struct foreach_safe_arg {

View File

@ -1,8 +0,0 @@
Vim: Warning: Output is not to a terminal
7[?47h[?1h="/tmp/cvss7mRju" 4L, 229CCVS: ----------------------------------------------------------------------
CVS: Enter Log. Lines beginning with `CVS:' are removed automatically
CVS:
CVS: ----------------------------------------------------------------------
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ :q!-- INSERT --i-- INSERT --mported Oni Guruma 3.5.4."/private/tmp/cvss7mRju" 5L, 256C written
[?1l>[?47l8

View File

@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,8 +31,17 @@
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 3
#define ONIGURUMA_VERSION_MINOR 4
#define ONIGURUMA_VERSION_TEENY 0
#define ONIGURUMA_VERSION_MINOR 5
#define ONIGURUMA_VERSION_TEENY 4
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
# define HAVE_PROTOTYPES 1
# endif
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@ -72,12 +81,6 @@ typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigCodePointRange;
/* ambiguous match flag */
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
@ -103,6 +106,11 @@ typedef unsigned int OnigAmbigType;
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
/* code range */
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
typedef struct {
int len;
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
@ -152,7 +160,7 @@ typedef struct {
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
UChar* (*left_adjust_char_head)(UChar* start, UChar* p);
int (*is_allowed_reverse_match)(UChar* p, UChar* end);
} OnigEncodingType;
@ -245,7 +253,6 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIGENC_CTYPE_ASCII (1<<13)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
@ -275,7 +282,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
onigenc_get_left_adjust_char_head(enc, start, s)
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
@ -390,8 +397,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
(enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
(enc)->get_ctype_code_range(ctype,sbr,mbr)
ONIG_EXTERN
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
@ -600,7 +607,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108

249
regcomp.c
View File

@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -33,6 +33,21 @@
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
static UChar*
k_strdup(UChar* s, UChar* end)
{
int len = end - s;
if (len > 0) {
UChar* r = (UChar* )xmalloc(len + 1);
CHECK_NULL_RETURN(r);
xmemcpy(r, s, len);
r[len] = (UChar )0;
return r;
}
else return NULL;
}
/*
Caution: node should not be a string node.
(s and end member address break)
@ -189,16 +204,14 @@ add_mem_num(regex_t* reg, int num)
return 0;
}
#if 0
static int
add_repeat_num(regex_t* reg, int num)
add_pointer(regex_t* reg, void* addr)
{
RepeatNumType n = (RepeatNumType )num;
PointerType ptr = (PointerType )addr;
BBUF_ADD(reg, &n, SIZE_REPEATNUM);
BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
#endif
static int
add_option(regex_t* reg, OnigOptionType option)
@ -518,6 +531,11 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
if (IS_CCLASS_SHARE(cc)) {
len = SIZE_OPCODE + SIZE_POINTER;
return len;
}
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
@ -543,22 +561,34 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
if (IS_CCLASS_SHARE(cc)) {
add_opcode(reg, OP_CCLASS_NODE);
r = add_pointer(reg, cc);
return r;
}
if (IS_NULL(cc->mbuf)) {
if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
else add_opcode(reg, OP_CCLASS);
if (IS_CCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_NOT);
else
add_opcode(reg, OP_CCLASS);
r = add_bitset(reg, cc->bs);
}
else {
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
else add_opcode(reg, OP_CCLASS_MB);
if (IS_CCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_MB_NOT);
else
add_opcode(reg, OP_CCLASS_MB);
r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
else add_opcode(reg, OP_CCLASS_MIX);
if (IS_CCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_MIX_NOT);
else
add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
if (r) return r;
@ -631,7 +661,6 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
else {
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
}
if (r) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
return r;
@ -1408,12 +1437,9 @@ compile_tree(Node* node, regex_t* reg)
}
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
} NumMap;
static int
noname_disable_map(Node** plink, NumMap* map, int* counter)
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
int r = 0;
Node* node = *plink;
@ -1467,7 +1493,7 @@ noname_disable_map(Node** plink, NumMap* map, int* counter)
}
static int
renumber_node_backref(Node* node, NumMap* map)
renumber_node_backref(Node* node, GroupNumRemap* map)
{
int i, pos, n, old_num;
int *backs;
@ -1495,7 +1521,7 @@ renumber_node_backref(Node* node, NumMap* map)
}
static int
renumber_by_map(Node* node, NumMap* map)
renumber_by_map(Node* node, GroupNumRemap* map)
{
int r = 0;
@ -1560,9 +1586,9 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
{
int r, i, pos, counter;
BitStatusType loc;
NumMap* map;
GroupNumRemap* map;
map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
for (i = 1; i <= env->num_mem; i++) {
map[i].new_val = 0;
@ -1591,7 +1617,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
env->num_mem = env->num_named;
reg->num_mem = env->num_named;
return 0;
return onig_renumber_name_table(reg, map);
}
#endif /* USE_NAMED_GROUP */
@ -2092,10 +2119,10 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
}
if (cc->not == 0)
return found;
else
if (IS_CCLASS_NOT(cc))
return !found;
else
return found;
}
/* x is not included y ==> 1 : 0 */
@ -2158,7 +2185,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case N_CTYPE:
switch (NCTYPE(y).type) {
case CTYPE_WORD:
if (IS_NULL(xc->mbuf) && xc->not == 0) {
if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(xc->bs, i)) {
if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
@ -2171,7 +2198,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
if (xc->not == 0) {
if (!IS_CCLASS_NOT(xc)) {
if (BITSET_AT(xc->bs, i))
return 0;
}
@ -2196,14 +2223,16 @@ is_not_included(Node* x, Node* y, regex_t* reg)
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
v = BITSET_AT(xc->bs, i);
if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
(v == 0 && IS_CCLASS_NOT(xc))) {
v = BITSET_AT(yc->bs, i);
if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
(v == 0 && IS_CCLASS_NOT(yc)))
return 0;
}
}
if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
(IS_NULL(yc->mbuf) && yc->not == 0))
if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
(IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
return 1;
return 0;
}
@ -3333,22 +3362,27 @@ typedef struct {
OptMapInfo map; /* boundary */
} NodeOptInfo;
static short int ByteValTable[] = {
14, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
static int
map_position_value(int i)
map_position_value(OnigEncoding enc, int i)
{
if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0]))
return (int )ByteValTable[i];
static short int ByteValTable[] = {
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
return (int )ByteValTable[i];
}
else
return 4; /* Take it easy. */
}
@ -3634,7 +3668,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
}
static void
select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
{
int v1, v2;
@ -3643,8 +3677,8 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
if (v1 <= 2 && v2 <= 2) {
/* ByteValTable[x] is big value --> low price */
v2 = map_position_value(now->s[0]);
v1 = map_position_value(alt->s[0]);
v2 = map_position_value(enc, now->s[0]);
v1 = map_position_value(enc, alt->s[0]);
if (now->len > 1) v1 += 5;
if (alt->len > 1) v2 += 5;
@ -3660,13 +3694,29 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
static void
clear_opt_map_info(OptMapInfo* map)
{
int i;
static OptMapInfo clean_info = {
{0, 0}, {0, 0}, 0,
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
}
};
clear_mml(&map->mmd);
clear_opt_anc_info(&map->anc);
map->value = 0;
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
map->map[i] = 0;
xmemcpy(map, &clean_info, sizeof(OptMapInfo));
}
static void
@ -3676,11 +3726,11 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
}
static void
add_char_opt_map_info(OptMapInfo* map, UChar c)
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
{
if (map->map[c] == 0) {
map->map[c] = 1;
map->value += map_position_value(c);
map->value += map_position_value(enc, c);
}
}
@ -3695,7 +3745,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
OnigPairAmbigCodes* pccs;
OnigAmbigType amb;
add_char_opt_map_info(map, p[0]);
add_char_opt_map_info(map, p[0], enc);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
@ -3706,7 +3756,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
if (pccs[i].from == code) {
len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
if (len < 0) return len;
add_char_opt_map_info(map, buf[0]);
add_char_opt_map_info(map, buf[0], enc);
}
}
@ -3718,7 +3768,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
ccode = ccs[i].items[j].code[0];
len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
if (len < 0) return len;
add_char_opt_map_info(map, buf[0]);
add_char_opt_map_info(map, buf[0], enc);
}
break;
}
@ -3761,7 +3811,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
}
static void
alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
{
int i, val;
@ -3780,7 +3830,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
to->map[i] = 1;
if (to->map[i])
val += map_position_value(i);
val += map_position_value(enc, i);
}
to->value = val;
@ -3813,7 +3863,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
}
static void
concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
{
int exb_reach, exm_reach;
OptAncInfo tanc;
@ -3848,8 +3898,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
clear_opt_exact_info(&add->exb);
}
}
select_opt_exact_info(&to->exm, &add->exb);
select_opt_exact_info(&to->exm, &add->exm);
select_opt_exact_info(enc, &to->exm, &add->exb);
select_opt_exact_info(enc, &to->exm, &add->exm);
if (to->expr.len > 0) {
if (add->len.max > 0) {
@ -3857,9 +3907,9 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
to->expr.len = add->len.max;
if (to->expr.mmd.max == 0)
select_opt_exact_info(&to->exb, &to->expr);
select_opt_exact_info(enc, &to->exb, &to->expr);
else
select_opt_exact_info(&to->exm, &to->expr);
select_opt_exact_info(enc, &to->exm, &to->expr);
}
}
else if (add->expr.len > 0) {
@ -3878,7 +3928,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
alt_merge_opt_exact_info(&to->exb, &add->exb, env);
alt_merge_opt_exact_info(&to->exm, &add->exm, env);
alt_merge_opt_exact_info(&to->expr, &add->expr, env);
alt_merge_opt_map_info (&to->map, &add->map);
alt_merge_opt_map_info(env->enc, &to->map, &add->map);
alt_merge_mml(&to->len, &add->len);
}
@ -3908,7 +3958,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
if (r == 0) {
add_mml(&nenv.mmd, &nopt.len);
concat_left_node_opt_info(opt, &nopt);
concat_left_node_opt_info(env->enc, opt, &nopt);
}
} while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
}
@ -3939,7 +3989,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
NSTRING_IS_RAW(node), env->enc);
if (slen > 0) {
add_char_opt_map_info(&opt->map, *(sn->s));
add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
}
set_mml(&opt->len, slen, slen);
}
@ -3978,7 +4028,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
/* no need to check ignore case. (setted in setup_tree()) */
if (IS_NOT_NULL(cc->mbuf) || cc->not != 0) {
if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
@ -3987,8 +4037,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
else {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
z = BITSET_AT(cc->bs, i);
if ((z && !cc->not) || (!z && cc->not)) {
add_char_opt_map_info(&opt->map, (UChar )i);
if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
set_mml(&opt->len, 1, 1);
@ -4009,7 +4059,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
add_char_opt_map_info(&opt->map, (UChar )i);
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
break;
@ -4017,7 +4067,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case CTYPE_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
add_char_opt_map_info(&opt->map, (UChar )i);
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
break;
@ -4245,7 +4295,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
else {
int allow_reverse;
reg->exact = onig_strdup(e->s, e->s + e->len);
reg->exact = k_strdup(e->s, e->s + e->len);
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
@ -4334,7 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
}
if (opt.exb.len > 0 || opt.exm.len > 0) {
select_opt_exact_info(&opt.exb, &opt.exm);
select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
@ -4506,7 +4556,7 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
#ifdef USE_NAMED_GROUP
onig_names_free(reg);
@ -4579,11 +4629,12 @@ onig_clone(regex_t** to, regex_t* from)
int r, size;
regex_t* reg;
if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
from->state++; /* increment as search counter */
if (IS_NOT_NULL(from->chain)) {
#ifdef USE_MULTI_THREAD_SYSTEM
if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(from);
if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(from);
from->state++;
ONIG_STATE_INC(from);
}
}
else {
@ -4593,19 +4644,20 @@ onig_clone(regex_t** to, regex_t* from)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
from->state++; /* increment as search counter */
ONIG_STATE_INC(from);
}
#endif /* USE_MULTI_THREAD_SYSTEM */
r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
from->enc, ONIG_SYNTAX_DEFAULT);
if (r != 0) {
from->state--;
ONIG_STATE_DEC(from);
return r;
}
xmemcpy(reg, from, sizeof(onig_t));
reg->state = ONIG_STATE_NORMAL;
reg->chain = (regex_t* )NULL;
reg->state = ONIG_STATE_NORMAL;
if (from->p) {
reg->p = (UChar* )xmalloc(reg->alloc);
@ -4638,12 +4690,12 @@ onig_clone(regex_t** to, regex_t* from)
reg->name_table = names_clone(from); /* names_clone is not implemented */
#endif
from->state--;
ONIG_STATE_DEC(from);
*to = reg;
return 0;
mem_error:
from->state--;
ONIG_STATE_DEC(from);
return ONIGERR_MEMORY;
}
#endif
@ -4839,6 +4891,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
(*reg)->state = ONIG_STATE_MODIFY;
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
@ -4847,7 +4900,6 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
else
option |= syntax->options;
(*reg)->state = ONIG_STATE_NORMAL;
(*reg)->enc = enc;
(*reg)->options = option;
(*reg)->syntax = syntax;
@ -4910,9 +4962,14 @@ onig_init()
return 0;
}
extern int
onig_end()
{
extern int onig_free_shared_cclass_table();
THREAD_ATOMIC_START;
#ifdef ONIG_DEBUG_STATISTICS
onig_print_statistics(stderr);
#endif
@ -4921,10 +4978,17 @@ onig_end()
onig_free_node_list();
#endif
#ifdef USE_SHARED_CCLASS_TABLE
onig_free_shared_cclass_table();
#endif
onig_inited = 0;
THREAD_ATOMIC_END;
return 0;
}
#ifdef ONIG_DEBUG
OnigOpInfoType OnigOpInfo[] = {
@ -4950,6 +5014,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
{ OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@ -5203,6 +5268,16 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
case OP_CCLASS_NODE:
{
CClassNode *cc;
GET_POINTER_INC(cc, bp);
n = bitset_on_num(cc->bs);
fprintf(f, ":%u:%d", (unsigned int )cc, n);
}
break;
case OP_BACKREFN_IC:
mem = *((MemNumType* )bp);
bp += SIZE_MEMNUM;
@ -5330,7 +5405,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case N_CCLASS:
fprintf(f, "<cclass:%x>", (int )node);
if (NCCLASS(node).not) fputs(" not", f);
if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
if (NCCLASS(node).mbuf) {
BBuf* bbuf = NCCLASS(node).mbuf;
for (i = 0; i < bbuf->used; i++) {

184
regexec.c
View File

@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -274,7 +274,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
#define INVALID_STACK_INDEX -1
typedef int StackIndex;
typedef long StackIndex;
typedef struct _StackType {
unsigned int type;
@ -986,7 +986,7 @@ trap_ensure(VALUE arg)
TrapEnsureArg* ta = (TrapEnsureArg* )arg;
if (ta->state == 0) { /* trap_exec() is not normal return */
ta->reg->state--;
ONIG_STATE_DEC(ta->reg);
if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
xfree(ta->stk_base);
@ -1147,6 +1147,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code)
return ((low < n && code >= data[low * 2]) ? 1 : 0);
}
static int
code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
{
unsigned int in_cc;
CClassNode* cc = (CClassNode* )node;
if (enclen == 1) {
in_cc = BITSET_AT(cc->bs, code);
}
else {
UChar* p = ((BBuf* )(cc->mbuf))->p;
in_cc = onig_is_in_code_range(p, code);
}
if (IS_CCLASS_NOT(cc)) {
return (in_cc ? 0 : 1);
}
else {
return (in_cc ? 1 : 0);
}
}
/* matching region of POSIX API */
typedef int regoff_t;
@ -1340,14 +1361,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
DATA_ENSURE(1);
ss = s;
sp = p;
exact1_ic_retry:
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
if (*p != *q) goto fail;
if (*p != *q) {
#if 1
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
s = ss;
p = sp;
goto exact1_ic_retry;
}
else
goto fail;
#else
goto fail;
#endif
}
p++; q++;
}
}
@ -1424,7 +1462,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
@ -1432,11 +1470,28 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
while (p < endp) {
sprev = s;
DATA_ENSURE(1);
ss = s;
sp = p;
exactn_ic_retry:
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
if (*p != *q) goto fail;
if (*p != *q) {
#if 1
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
s = ss;
p = sp;
goto exactn_ic_retry;
}
else
goto fail;
#else
goto fail;
#endif
}
p++; q++;
}
}
@ -1655,6 +1710,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
break;
case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
{
OnigCodePoint code;
void *node;
int mb_len;
UChar *ss;
DATA_ENSURE(1);
GET_POINTER_INC(node, p);
mb_len = enc_len(encode, s);
ss = s;
s += mb_len;
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
}
STAT_OP_OUT;
break;
case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enc_len(encode, s);
@ -2519,13 +2592,26 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
UChar* t, UChar* tend, UChar* p, UChar* end)
{
int lowlen;
UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
tsave = t;
psave = p;
retry:
while (t < tend) {
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
if (*t++ != *q++) return 0;
if (*t++ != *q++) {
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
t = tsave;
p = psave;
goto retry;
}
else
return 0;
}
lowlen--;
}
}
@ -2538,9 +2624,7 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
UChar* target, UChar* target_end,
UChar* text, UChar* text_end, UChar* text_range)
{
int lowlen;
UChar *t, *p, *s, *end, *z;
UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
UChar *s, *end;
end = text_end - (target_end - target) + 1;
if (end > text_range)
@ -2549,21 +2633,10 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
s = text;
while (s < end) {
z = s;
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
if (*target == *lowbuf) {
p = lowbuf + 1;
t = target + 1;
while (--lowlen > 0) {
if (*p != *t) break;
p++; t++;
}
if (lowlen == 0) {
if (str_lower_case_match(enc, ambig_flag,
t, target_end, s, text_end))
return z;
}
}
if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
return s;
s += enc_len(enc, s);
}
return (UChar* )NULL;
@ -2605,9 +2678,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
UChar* text, UChar* adjust_text,
UChar* text_end, UChar* text_start)
{
int len, lowlen;
UChar *t, *p, *s, *z;
UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
UChar *s;
s = text_end - (target_end - target);
if (s > text_start)
@ -2616,24 +2687,11 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
len = enc_len(enc, s);
z = s;
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
if (*target == *lowbuf) {
p = lowbuf + 1;
t = target + 1;
while (--lowlen > 0) {
if (*p != *t) break;
p++; t++;
}
if (lowlen == 0) {
if (str_lower_case_match(enc, ambig_flag,
t, target_end, s, text_end))
return z;
}
}
if (str_lower_case_match(enc, ambig_flag,
target, target_end, s, text_end))
return s;
s = onigenc_get_prev_char_head(enc, adjust_text, z);
s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
@ -2828,11 +2886,12 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
UChar *prev;
MatchArg msa;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
reg->state++; /* increment as search counter */
if (IS_NOT_NULL(reg->chain)) {
#ifdef USE_MULTI_THREAD_SYSTEM
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
reg->state++;
ONIG_STATE_INC(reg);
}
}
else {
@ -2842,8 +2901,9 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
reg->state++; /* increment as search counter */
ONIG_STATE_INC(reg);
}
#endif /* USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at);
@ -2863,7 +2923,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
}
MATCH_ARG_FREE(msa);
reg->state--; /* decrement as search counter */
ONIG_STATE_DEC(reg);
return r;
}
@ -3098,11 +3158,12 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
UChar *s, *prev;
MatchArg msa;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
reg->state++; /* increment as search counter */
if (IS_NOT_NULL(reg->chain)) {
#ifdef USE_MULTI_THREAD_SYSTEM
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
reg->state++;
ONIG_STATE_INC(reg);
}
}
else {
@ -3112,8 +3173,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
reg->state++; /* increment as search counter */
ONIG_STATE_INC(reg);
}
#endif /* USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
@ -3360,7 +3422,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
finish:
MATCH_ARG_FREE(msa);
reg->state--; /* decrement as search counter */
ONIG_STATE_DEC(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@ -3381,7 +3443,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
reg->state--; /* decrement as search counter */
ONIG_STATE_DEC(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@ -3389,7 +3451,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return r;
match:
reg->state--; /* decrement as search counter */
ONIG_STATE_DEC(reg);
MATCH_ARG_FREE(msa);
return s - str;
}

107
regint.h
View File

@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -56,6 +56,7 @@
/* config */
/* spec. config */
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
@ -65,6 +66,8 @@
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
#define USE_ST_HASH_TABLE
#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
@ -76,17 +79,21 @@
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define CHECK_INTERRUPT /* depend on application */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
#else
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
#define USE_MULTI_THREAD_SYSTEM
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
@ -101,17 +108,9 @@
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
#if defined(RUBY_VERSION_MAJOR)
#if RUBY_VERSION_MAJOR > 1 || \
(RUBY_VERSION_MAJOR == 1 && \
defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
#define USE_ST_HASH_TABLE
#endif
#endif
#endif /* else NOT_RUBY */
#define THREAD_PASS_LIMIT_COUNT 10
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
@ -124,6 +123,69 @@
#define xalloca alloca
#endif
#ifdef USE_MULTI_THREAD_SYSTEM
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
#else
#define ONIG_STATE_INC(reg) /* Nothing */
#define ONIG_STATE_DEC(reg) /* Nothing */
#endif /* USE_MULTI_THREAD_SYSTEM */
#define onig_st_is_member st_is_member
#ifdef NOT_RUBY
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
#define st_init_numtable_with_size onig_st_init_numtable_with_size
#define st_init_strtable onig_st_init_strtable
#define st_init_strtable_with_size onig_st_init_strtable_with_size
#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
#define st_delete onig_st_delete
#define st_delete_safe onig_st_delete_safe
#define st_insert onig_st_insert
#define st_insert_strend onig_st_insert_strend
#define st_lookup onig_st_lookup
#define st_lookup_strend onig_st_lookup_strend
#define st_foreach onig_st_foreach
#define st_add_direct onig_st_add_direct
#define st_add_direct_strend onig_st_add_direct_strend
#define st_free_table onig_st_free_table
#define st_cleanup_safe onig_st_cleanup_safe
#define st_copy onig_st_copy
#define st_nothing_key_clone onig_st_nothing_key_clone
#define st_nothing_key_free onig_st_nothing_key_free
#else /* NOT_RUBY */
#define onig_st_init_table st_init_table
#define onig_st_init_table_with_size st_init_table_with_size
#define onig_st_init_numtable st_init_numtable
#define onig_st_init_numtable_with_size st_init_numtable_with_size
#define onig_st_init_strtable st_init_strtable
#define onig_st_init_strtable_with_size st_init_strtable_with_size
#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
#define onig_st_delete st_delete
#define onig_st_delete_safe st_delete_safe
#define onig_st_insert st_insert
#define onig_st_insert_strend st_insert_strend
#define onig_st_lookup st_lookup
#define onig_st_lookup_strend st_lookup_strend
#define onig_st_foreach st_foreach
#define onig_st_add_direct st_add_direct
#define onig_st_add_direct_strend st_add_direct_strend
#define onig_st_free_table st_free_table
#define onig_st_cleanup_safe st_cleanup_safe
#define onig_st_copy st_copy
#define onig_st_nothing_key_clone st_nothing_key_clone
#define onig_st_nothing_key_free st_nothing_key_free
#endif /* NOT_RUBY */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@ -139,9 +201,11 @@
#endif
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
@ -483,6 +547,7 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@ -570,6 +635,7 @@ typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@ -579,7 +645,7 @@ typedef short int MemNumType;
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@ -604,6 +670,7 @@ typedef short int MemNumType;
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@ -679,6 +746,22 @@ typedef short int MemNumType;
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
/* cclass node */
#define FLAG_CCLASS_NOT 1
#define FLAG_CCLASS_SHARE (1<<1)
#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
typedef struct {
int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
#ifdef ONIG_DEBUG
@ -700,13 +783,11 @@ extern void onig_print_statistics P_((FILE* f));
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */

View File

@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -219,21 +219,26 @@ k_strcpy(UChar* dest, UChar* src, UChar* end)
}
}
extern UChar*
onig_strdup(UChar* s, UChar* end)
static UChar*
strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
int len = end - s;
int slen, term_len, i;
UChar *r;
if (len > 0) {
UChar* r = (UChar* )xmalloc(len + 1);
CHECK_NULL_RETURN(r);
xmemcpy(r, s, len);
r[len] = (UChar )0;
return r;
}
else return NULL;
slen = end - s;
term_len = ONIGENC_MBC_MINLEN(enc);
r = (UChar* )xmalloc(slen + term_len);
CHECK_NULL_RETURN(r);
xmemcpy(r, s, slen);
for (i = 0; i < term_len; i++)
r[slen + i] = (UChar )0;
return r;
}
/* scan pattern methods */
#define PEND_VALUE 0
@ -298,7 +303,7 @@ typedef struct {
#ifdef USE_ST_HASH_TABLE
#include <st.h>
#include "st.h"
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@ -335,7 +340,7 @@ onig_print_names(FILE* fp, regex_t* reg)
if (IS_NOT_NULL(t)) {
fprintf(fp, "name table\n");
st_foreach(t, i_print_name_entry, (HashDataType )fp);
onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
fputs("\n", fp);
}
return 0;
@ -356,7 +361,7 @@ names_clear(regex_t* reg)
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
st_foreach(t, i_free_name_entry, 0);
onig_st_foreach(t, i_free_name_entry, 0);
}
return 0;
}
@ -371,7 +376,7 @@ onig_names_free(regex_t* reg)
if (r) return r;
t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) st_free_table(t);
if (IS_NOT_NULL(t)) onig_st_free_table(t);
reg->name_table = (void* )NULL;
return 0;
}
@ -379,33 +384,12 @@ onig_names_free(regex_t* reg)
static NameEntry*
name_find(regex_t* reg, UChar* name, UChar* name_end)
{
int len;
UChar namebuf[NAMEBUF_SIZE_1];
UChar *key;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
e = (NameEntry* )NULL;
if (IS_NOT_NULL(t)) {
if (*name_end == '\0') {
key = name;
}
else {
/* dirty, but st.c API claims NULL terminated key. */
len = name_end - name;
if (len <= NAMEBUF_SIZE) {
xmemcpy(namebuf, name, len);
namebuf[len] = '\0';
key = namebuf;
}
else {
key = onig_strdup(name, name_end);
if (IS_NULL(key)) return (NameEntry* )NULL;
}
}
st_lookup(t, (HashDataType )key, (HashDataType * )&e);
if (key != name && key != namebuf) xfree(key);
onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
}
return e;
}
@ -422,7 +406,8 @@ static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
e->name + onigenc_str_bytelen_null(arg->enc, e->name),
/*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
e->name + e->name_len,
e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
arg->reg, arg->arg);
@ -447,11 +432,40 @@ onig_foreach_name(regex_t* reg,
narg.reg = reg;
narg.arg = arg;
narg.enc = reg->enc; /* should be pattern encoding. */
st_foreach(t, i_names, (HashDataType )&narg);
onig_st_foreach(t, i_names, (HashDataType )&narg);
}
return narg.ret;
}
static int
i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
{
int i;
if (e->back_num > 1) {
for (i = 0; i < e->back_num; i++) {
e->back_refs[i] = map[e->back_refs[i]].new_val;
}
}
else if (e->back_num == 1) {
e->back_ref1 = map[e->back_ref1].new_val;
}
return ST_CONTINUE;
}
extern int
onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
{
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
onig_st_foreach(t, i_renumber_name, (HashDataType )map);
}
return 0;
}
extern int
onig_number_of_names(regex_t* reg)
{
@ -617,14 +631,16 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
if (IS_NULL(e)) {
#ifdef USE_ST_HASH_TABLE
if (IS_NULL(t)) {
reg->name_table = t = st_init_strtable();
t = onig_st_init_strend_table_with_size(5);
reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
e->name = onig_strdup(name, name_end);
e->name = strdup_with_null(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
st_insert(t, (HashDataType )e->name, (HashDataType )e);
onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
(HashDataType )e);
e->name_len = name_end - name;
e->back_num = 0;
@ -669,7 +685,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
}
e = &(t->e[t->num]);
t->num++;
e->name = onig_strdup(name, name_end);
e->name = strdup_with_null(reg->enc, name, name_end);
e->name_len = name_end - name;
#endif
}
@ -886,8 +902,11 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
{
FreeNode* n = (FreeNode* )node;
THREAD_ATOMIC_START;
n->next = FreeNodeList;
FreeNodeList = n;
THREAD_ATOMIC_END;
}
#else
xfree(node);
@ -899,8 +918,15 @@ onig_node_free(Node* node)
break;
case N_CCLASS:
if (NCCLASS(node).mbuf)
bbuf_free(NCCLASS(node).mbuf);
{
CClassNode* cc = &(NCCLASS(node));
if (IS_CCLASS_SHARE(cc))
return ;
if (cc->mbuf)
bbuf_free(cc->mbuf);
}
break;
case N_QUALIFIER:
@ -927,8 +953,11 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
{
FreeNode* n = (FreeNode* )node;
THREAD_ATOMIC_START;
n->next = FreeNodeList;
FreeNodeList = n;
THREAD_ATOMIC_END;
}
#else
xfree(node);
@ -959,8 +988,10 @@ node_new()
#ifdef USE_RECYCLE_NODE
if (IS_NOT_NULL(FreeNodeList)) {
THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
THREAD_ATOMIC_END;
return node;
}
#endif
@ -974,8 +1005,8 @@ static void
initialize_cclass(CClassNode* cc)
{
BITSET_CLEAR(cc->bs);
cc->not = 0;
cc->mbuf = NULL;
cc->flags = 0;
cc->mbuf = NULL;
}
static Node*
@ -989,6 +1020,54 @@ node_new_cclass()
return node;
}
extern Node*
node_new_cclass_by_codepoint_range(int not,
OnigCodePoint sbr[], OnigCodePoint mbr[])
{
CClassNode* cc;
int n, i, j;
Node* node = node_new();
CHECK_NULL_RETURN(node);
node->type = N_CCLASS;
cc = &(NCCLASS(node));
cc->flags = 0;
if (not != 0) CCLASS_SET_NOT(cc);
BITSET_CLEAR(cc->bs);
if (IS_NOT_NULL(sbr)) {
n = ONIGENC_CODE_RANGE_NUM(sbr);
for (i = 0; i < n; i++) {
for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
BITSET_SET_BIT(cc->bs, j);
}
}
}
if (IS_NULL(mbr)) {
is_null:
cc->mbuf = NULL;
}
else {
BBuf* bbuf;
n = ONIGENC_CODE_RANGE_NUM(mbr);
if (n == 0) goto is_null;
bbuf = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_VAL(bbuf, NULL);
bbuf->alloc = n + 1;
bbuf->used = n + 1;
bbuf->p = (UChar* )((void* )mbr);
cc->mbuf = bbuf;
}
return node;
}
static Node*
node_new_ctype(int type)
{
@ -1711,7 +1790,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
BBuf *tbuf;
int r;
if (cc->not != 0) {
if (IS_CCLASS_NOT(cc)) {
bitset_invert(cc->bs);
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
@ -1722,7 +1801,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
cc->mbuf = tbuf;
}
cc->not = 0;
CCLASS_CLEAR_NOT(cc);
}
return 0;
@ -1736,10 +1815,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
not1 = dest->not;
not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
not2 = cc->not;
not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@ -1794,10 +1873,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
not1 = dest->not;
not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
not2 = cc->not;
not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@ -2158,7 +2237,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
UChar* p = *src;
PFETCH_READY;
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
switch (c) {
@ -2468,7 +2547,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
tok->escaped = 1;
@ -2576,9 +2655,9 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = (OnigCodePoint )num;
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = (OnigCodePoint )num;
}
break;
@ -2669,7 +2748,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH(c);
if (c == MC_ESC(enc)) {
if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
PFETCH(c);
@ -2907,9 +2986,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = (OnigCodePoint )num;
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = (OnigCodePoint )num;
}
break;
@ -3057,7 +3136,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num < 0) return num;
/* set_raw: */
if (tok->u.c != num) {
tok->type = TK_CODE_POINT;
tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )num;
}
else { /* string */
@ -3225,21 +3304,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
int nsb, int nmb,
OnigCodePointRange *sbr, OnigCodePointRange *mbr)
OnigCodePoint sbr[], OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
if (not == 0) {
for (i = 0; i < nsb; i++) {
for (j = sbr[i].from; j <= sbr[i].to; j++) {
for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
BITSET_SET_BIT(cc->bs, j);
}
}
for (i = 0; i < nmb; i++) {
r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
r = add_code_range_to_buf(&(cc->mbuf),
ONIGENC_CODE_RANGE_FROM(mbr, i),
ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r;
}
}
@ -3248,10 +3332,11 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
if (ONIGENC_MBC_MINLEN(enc) == 1) {
for (i = 0; i < nsb; i++) {
for (j = prev; j < sbr[i].from; j++) {
for (j = prev;
j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
BITSET_SET_BIT(cc->bs, j);
}
prev = sbr[i].to + 1;
prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
}
if (prev < 0x7f) {
for (j = prev; j < 0x7f; j++) {
@ -3263,11 +3348,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
}
for (i = 0; i < nmb; i++) {
if (prev < mbr[i].from) {
r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), prev,
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
if (r != 0) return r;
}
prev = mbr[i].to + 1;
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff) {
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
@ -3282,14 +3368,12 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
int nsb, nmb;
OnigCodePointRange *sbr, *mbr;
OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
if (r == 0) {
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc,
nsb, nmb, sbr, mbr);
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
@ -3349,8 +3433,8 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) &&
! ONIGENC_IS_CODE_WORD(enc, c))
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
&& ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c);
}
}
@ -3839,7 +3923,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CODE_POINT:
v = (OnigCodePoint )tok->u.code;
v = tok->u.code;
in_israw = 1;
val_entry:
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
@ -4017,8 +4101,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
cc = prev_cc;
}
cc->not = neg;
if (cc->not != 0 &&
if (neg != 0)
CCLASS_SET_NOT(cc);
else
CCLASS_CLEAR_NOT(cc);
if (IS_CCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
@ -4388,7 +4475,7 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
for (j = 0; j < ccs[i].n; j++) {
ci = &(ccs[i].items[j]);
if (ci->len > 1) { /* compound only */
if (cc->not) clear_not_flag_cclass(cc, enc);
if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
clen = ci->len;
for (k = 0; k < clen; k++) {
@ -4417,6 +4504,98 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
return n;
}
#ifdef USE_SHARED_CCLASS_TABLE
#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
/* for ctype node hash table */
typedef struct {
OnigEncoding enc;
int not;
int type;
} type_cclass_key;
static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
{
if (x->type != y->type) return 1;
if (x->enc != y->enc) return 1;
if (x->not != y->not) return 1;
return 0;
}
static int type_cclass_hash(type_cclass_key* key)
{
int i, val;
unsigned char *p;
val = 0;
p = (unsigned char* )&(key->enc);
for (i = 0; i < sizeof(OnigEncodingType); i++) {
val = val * 997 + (int )*p++;
}
p = (unsigned char* )(&key->type);
for (i = 0; i < sizeof(int); i++) {
val = val * 997 + (int )*p++;
}
val += key->not;
return val + (val >> 5);
}
static int type_cclass_key_free(st_data_t x)
{
xfree((void* )x);
return 0;
}
static st_data_t type_cclass_key_clone(st_data_t x)
{
type_cclass_key* new_key;
type_cclass_key* key = (type_cclass_key* )x;
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
*new_key = *key;
return (st_data_t )new_key;
}
static struct st_hash_type type_type_cclass_hash = {
type_cclass_cmp,
type_cclass_hash,
type_cclass_key_free,
type_cclass_key_clone
};
static st_table* OnigTypeCClassTable;
static int
i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
{
if (IS_NOT_NULL(node)) {
CClassNode* cc = &(NCCLASS(node));
if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
xfree(node);
}
return ST_DELETE;
}
extern int
onig_free_shared_cclass_table()
{
if (IS_NOT_NULL(OnigTypeCClassTable)) {
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
}
return 0;
}
#endif /* USE_SHARED_CCLASS_TABLE */
static int
parse_exp(Node** np, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
@ -4561,13 +4740,63 @@ parse_exp(Node** np, OnigToken* tok, int term,
CClassNode* cc;
int ctype, not;
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
#ifdef USE_SHARED_CCLASS_TABLE
OnigCodePoint *sbr, *mbr;
*np = node_new_cclass();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
cc = &(NCCLASS(*np));
add_ctype_to_cc(cc, ctype, 0, env);
if (not != 0) CCLASS_SET_NOT(cc);
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
if (r == 0 &&
ONIGENC_CODE_RANGE_NUM(mbr)
>= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
type_cclass_key key;
type_cclass_key* new_key;
key.enc = env->enc;
key.not = not;
key.type = ctype;
THREAD_ATOMIC_START;
if (IS_NULL(OnigTypeCClassTable)) {
OnigTypeCClassTable
= onig_st_init_table_with_size(&type_type_cclass_hash, 10);
if (IS_NULL(OnigTypeCClassTable)) {
THREAD_ATOMIC_END;
return ONIGERR_MEMORY;
}
}
else {
if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
(st_data_t* )np)) {
THREAD_ATOMIC_END;
break;
}
}
*np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
if (IS_NULL(*np)) {
THREAD_ATOMIC_END;
return ONIGERR_MEMORY;
}
CCLASS_SET_SHARE(&(NCCLASS(*np)));
new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
(st_data_t )*np);
THREAD_ATOMIC_END;
}
else {
#endif
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
*np = node_new_cclass();
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
cc = &(NCCLASS(*np));
add_ctype_to_cc(cc, ctype, 0, env);
if (not != 0) CCLASS_SET_NOT(cc);
#ifdef USE_SHARED_CCLASS_TABLE
}
#endif
}
break;
@ -4605,7 +4834,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
for (i = 0; i < n; i++) {
in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
if ((in_cc != 0 && cc->not == 0) || (in_cc == 0 && cc->not != 0)) {
if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
(in_cc == 0 && IS_CCLASS_NOT(cc))) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
ccs[i].from >= SINGLE_BYTE_SIZE) {
/* if (cc->not) clear_not_flag_cclass(cc, env->enc); */

View File

@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -95,8 +95,6 @@
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define CCLASS_SET_NOT(cc) (cc)->not = 1
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
@ -111,11 +109,14 @@ typedef struct {
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
/* move to regint.h */
#if 0
typedef struct {
int not;
int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
#endif
typedef struct {
int state;
@ -280,6 +281,15 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
} GroupNumRemap;
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));

157
st.c
View File

@ -6,12 +6,29 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "st.h"
#ifdef _WIN32
#include <malloc.h>
#endif
#ifdef NOT_RUBY
#include "regint.h"
#else
#ifdef RUBY_PLATFORM
#define xmalloc ruby_xmalloc
#define xcalloc ruby_xcalloc
#define xrealloc ruby_xrealloc
#define xfree ruby_xfree
void *xmalloc(long);
void *xcalloc(long, long);
void *xrealloc(void *, long);
void xfree(void *);
#endif
#endif
#include "st.h"
typedef struct st_table_entry st_table_entry;
struct st_table_entry {
@ -33,11 +50,14 @@ struct st_table_entry {
* allocated initially
*
*/
static int numcmp(long, long);
static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
st_nothing_key_free,
st_nothing_key_clone
};
/* extern int strcmp(const char *, const char *); */
@ -45,19 +65,21 @@ static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
st_nothing_key_free,
st_nothing_key_clone
};
#ifdef RUBY_PLATFORM
#define xmalloc ruby_xmalloc
#define xcalloc ruby_xcalloc
#define xrealloc ruby_xrealloc
#define xfree ruby_xfree
static int strend_cmp(st_strend_key*, st_strend_key*);
static int strend_hash(st_strend_key*);
static int strend_key_free(st_data_t key);
static st_data_t strend_key_clone(st_data_t x);
void *xmalloc(long);
void *xcalloc(long, long);
void *xrealloc(void *, long);
void xfree(void *);
#endif
static struct st_hash_type type_strend_hash = {
strend_cmp,
strend_hash,
strend_key_free,
strend_key_clone
};
static void rehash(st_table *);
@ -125,7 +147,7 @@ new_size(size)
int newsize;
for (i = 0, newsize = MINSIZE;
i < sizeof(primes)/sizeof(primes[0]);
i < (int )(sizeof(primes)/sizeof(primes[0]));
i++, newsize <<= 1)
{
if (newsize > size) return primes[i];
@ -206,6 +228,13 @@ st_init_strtable_with_size(size)
return st_init_table_with_size(&type_strhash, size);
}
st_table*
st_init_strend_table_with_size(size)
int size;
{
return st_init_table_with_size(&type_strend_hash, size);
}
void
st_free_table(table)
st_table *table;
@ -267,6 +296,21 @@ st_lookup(table, key, value)
}
}
int
st_lookup_strend(table, str_key, end_key, value)
st_table *table;
unsigned char* str_key;
unsigned char* end_key;
st_data_t *value;
{
st_strend_key key;
key.s = (unsigned char* )str_key;
key.end = (unsigned char* )end_key;
return st_lookup(table, (st_data_t )(&key), value);
}
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
@ -307,6 +351,22 @@ st_insert(table, key, value)
}
}
int
st_insert_strend(table, str_key, end_key, value)
st_table *table;
unsigned char* str_key;
unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
return st_insert(table, (st_data_t )key, value);
}
void
st_add_direct(table, key, value)
st_table *table;
@ -320,6 +380,21 @@ st_add_direct(table, key, value)
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
void
st_add_direct_strend(table, str_key, end_key, value)
st_table *table;
unsigned char* str_key;
unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
st_add_direct(table, (st_data_t )key, value);
}
static void
rehash(table)
register st_table *table;
@ -379,6 +454,7 @@ st_copy(old_table)
return 0;
}
*entry = *ptr;
entry->key = old_table->type->key_clone(ptr->key);
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
@ -522,6 +598,7 @@ st_foreach(table, func, arg)
last->next = ptr->next;
}
ptr = ptr->next;
table->type->key_free(tmp->key);
free(tmp);
table->num_entries--;
}
@ -581,3 +658,59 @@ numhash(n)
{
return n;
}
extern int
st_nothing_key_free(st_data_t key) { return 0; }
extern st_data_t
st_nothing_key_clone(st_data_t x) { return x; }
static int strend_cmp(st_strend_key* x, st_strend_key* y)
{
unsigned char *p, *q;
int c;
if ((x->end - x->s) != (y->end - y->s))
return 1;
p = x->s;
q = y->s;
while (p < x->end) {
c = (int )*p - (int )*q;
if (c != 0) return c;
p++; q++;
}
return 0;
}
static int strend_hash(st_strend_key* x)
{
int val;
unsigned char *p;
val = 0;
p = x->s;
while (p < x->end) {
val = val * 997 + (int )*p++;
}
return val + (val >> 5);
}
static int strend_key_free(st_data_t x)
{
xfree((void* )x);
return 0;
}
static st_data_t strend_key_clone(st_data_t x)
{
st_strend_key* new_key;
st_strend_key* key = (st_strend_key* )x;
new_key = alloc(st_strend_key);
*new_key = *key;
return (st_data_t )new_key;
}

16
st.h
View File

@ -14,6 +14,8 @@ typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
int (*key_free)();
st_data_t (*key_clone)();
};
struct st_table {
@ -23,6 +25,11 @@ struct st_table {
struct st_table_entry **bins;
};
typedef struct {
unsigned char* s;
unsigned char* end;
} st_strend_key;
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
@ -44,22 +51,27 @@ st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
st_table *st_init_strend_table_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
int st_insert_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
int st_lookup_strend _((st_table *, unsigned char*, unsigned char*, st_data_t*));
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
void st_add_direct_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
extern st_data_t st_nothing_key_clone _((st_data_t key));
extern int st_nothing_key_free _((st_data_t key));
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
#define st_numcmp ST_NUMCMP
#define st_numhash ST_NUMHASH
int st_strhash();
#endif /* ST_INCLUDED */

3486
utf8.c

File diff suppressed because it is too large Load Diff