From be587c77ef27f30112bb64459f1da83c0ad1deaa Mon Sep 17 00:00:00 2001 From: ksaito Date: Wed, 23 Feb 2005 12:47:23 +0000 Subject: [PATCH] * ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, regint.h, regparse.c, regparse.h, sjis.c, utf8.c: imported Oni Guruma 3.7.0. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8024 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 ++ oniguruma.h | 73 ++++++++++--------- regcomp.c | 6 +- regexec.c | 202 ++++++++++++++++++++++++++++++---------------------- regint.h | 2 +- regparse.c | 66 ++++++++--------- regparse.h | 10 +-- utf8.c | 22 +++--- 8 files changed, 215 insertions(+), 173 deletions(-) diff --git a/ChangeLog b/ChangeLog index 12a86948a3..71dfab63b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Feb 23 21:45:29 2005 Kazuo Saito + + * ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, + regenc.c, regenc.h, regerror.c, regexec.c, regint.h, + regparse.c, regparse.h, sjis.c, utf8.c: imported Oni Guruma + 3.7.0. + Thu Feb 23 15:04:32 2005 akira yamada * lib/uri/generic.rb (split_userinfo): should split ":pass" into "" diff --git a/oniguruma.h b/oniguruma.h index 69dd939e5b..ef448cc698 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -35,7 +35,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 3 -#define ONIGURUMA_VERSION_MINOR 6 +#define ONIGURUMA_VERSION_MINOR 7 #define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus @@ -149,24 +149,24 @@ typedef m17n_encoding* OnigEncoding; #else typedef struct { - int (*mbc_enc_len)(UChar* p); + int (*mbc_enc_len)(const UChar* p); const char* name; int max_enc_len; int min_enc_len; OnigAmbigType support_ambig_flag; OnigMetaCharTableType meta_char_table; - int (*is_mbc_newline)(UChar* p, UChar* end); - OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end); + int (*is_mbc_newline)(const UChar* p, const UChar* end); + OnigCodePoint (*mbc_to_code)(const UChar* p, const UChar* end); int (*code_to_mbclen)(OnigCodePoint code); int (*code_to_mbc)(OnigCodePoint code, UChar *buf); - int (*mbc_to_normalize)(OnigAmbigType flag, UChar** pp, UChar* end, UChar* to); - int (*is_mbc_ambiguous)(OnigAmbigType flag, UChar** pp, UChar* end); + int (*mbc_to_normalize)(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* to); + int (*is_mbc_ambiguous)(OnigAmbigType flag, const UChar** pp, const UChar* end); int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs); int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs); int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]); - UChar* (*left_adjust_char_head)(UChar* start, UChar* p); - int (*is_allowed_reverse_match)(UChar* p, UChar* end); + UChar* (*left_adjust_char_head)(const UChar* start, const UChar* p); + int (*is_allowed_reverse_match)(const UChar* p, const UChar* end); } OnigEncodingType; typedef OnigEncodingType* OnigEncoding; @@ -257,7 +257,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #define ONIGENC_CTYPE_ASCII (1<<13) #define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) -#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) +#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) @@ -335,20 +335,20 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); ONIG_EXTERN int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); ONIG_EXTERN -int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end, UChar* buf)); +int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* buf)); ONIG_EXTERN -int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end)); +int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end)); ONIG_EXTERN -int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)); +int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const UChar* end)); #else /* ONIG_RUBY_M17N */ #define ONIGENC_NAME(enc) ((enc)->name) #define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ - (enc)->mbc_to_normalize(flag,pp,end,buf) + (enc)->mbc_to_normalize(flag,(const UChar** )pp,end,buf) #define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ - (enc)->is_mbc_ambiguous(flag,pp,end) + (enc)->is_mbc_ambiguous(flag,(const UChar** )pp,end) #define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag) #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ (enc)->is_allowed_reverse_match(s,end) @@ -405,7 +405,7 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end) (enc)->get_ctype_code_range(ctype,sbr,mbr) ONIG_EXTERN -UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n)); +UChar* onigenc_step_back P_((OnigEncoding enc, const UChar* start, const UChar* s, int n)); #endif /* is not ONIG_RUBY_M17N */ @@ -418,21 +418,21 @@ int onigenc_set_default_encoding P_((OnigEncoding enc)); ONIG_EXTERN OnigEncoding onigenc_get_default_encoding P_(()); ONIG_EXTERN -void onigenc_set_default_caseconv_table P_((UChar* table)); +void onigenc_set_default_caseconv_table P_((const UChar* table)); ONIG_EXTERN -UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev)); +UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const UChar* start, const UChar* s, const UChar** prev)); ONIG_EXTERN -UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s)); ONIG_EXTERN -UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s)); ONIG_EXTERN -UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s)); ONIG_EXTERN -int onigenc_strlen P_((OnigEncoding enc, UChar* p, UChar* end)); +int onigenc_strlen P_((OnigEncoding enc, const UChar* p, const UChar* end)); ONIG_EXTERN -int onigenc_strlen_null P_((OnigEncoding enc, UChar* p)); +int onigenc_strlen_null P_((OnigEncoding enc, const UChar* p)); ONIG_EXTERN -int onigenc_str_bytelen_null P_((OnigEncoding enc, UChar* p)); +int onigenc_str_bytelen_null P_((OnigEncoding enc, const UChar* p)); @@ -704,8 +704,8 @@ typedef struct { int upper; } OnigRepeatRange; -typedef void (*OnigWarnFunc) P_((char* s)); -extern void onig_null_warn P_((char* s)); +typedef void (*OnigWarnFunc) P_((const char* s)); +extern void onig_null_warn P_((const char* s)); #define ONIG_NULL_WARN onig_null_warn #define ONIG_CHAR_TABLE_SIZE 256 @@ -782,22 +782,24 @@ void onig_set_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN -int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_new P_((regex_t**, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_new_deluxe P_((regex_t** reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_new_deluxe P_((regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN void onig_free P_((regex_t*)); ONIG_EXTERN -int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_recompile P_((regex_t*, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_recompile_deluxe P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_recompile_deluxe P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)); +int onig_search P_((regex_t*, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN -int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option)); +int onig_match P_((regex_t*, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN OnigRegion* onig_region_new P_((void)); ONIG_EXTERN +void onig_region_init P_((OnigRegion* region)); +ONIG_EXTERN void onig_region_free P_((OnigRegion* region, int free_self)); ONIG_EXTERN void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); @@ -806,12 +808,13 @@ void onig_region_clear P_((OnigRegion* region)); ONIG_EXTERN int onig_region_resize P_((OnigRegion* region, int n)); ONIG_EXTERN -int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, - int** nums)); +int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); ONIG_EXTERN -int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region)); +int onig_name_to_group_numbers P_((regex_t* reg, const UChar* name, const UChar* name_end, int** nums)); ONIG_EXTERN -int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg)); +int onig_name_to_backref_number P_((regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)); ONIG_EXTERN int onig_number_of_names P_((regex_t* reg)); ONIG_EXTERN diff --git a/regcomp.c b/regcomp.c index 80bc13b4c5..5171b15a36 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4726,7 +4726,7 @@ static void print_tree P_((FILE* f, Node* node)); #endif extern int -onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo) { #define COMPILE_INIT_SIZE 20 @@ -4877,7 +4877,7 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end, } extern int -onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end, +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo) { @@ -4939,7 +4939,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, } extern int -onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end, +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo) { diff --git a/regexec.c b/regexec.c index 795a26dd76..2c082de423 100644 --- a/regexec.c +++ b/regexec.c @@ -163,8 +163,6 @@ onig_region_clear(OnigRegion* region) extern int onig_region_resize(OnigRegion* region, int n) { - int i; - region->num_regs = n; if (n < ONIG_NREGION) @@ -189,17 +187,36 @@ onig_region_resize(OnigRegion* region, int n) region->allocated = n; } - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - -#ifdef USE_CAPTURE_HISTORY - history_root_free(region); -#endif return 0; } -static void +extern int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void onig_region_init(OnigRegion* region) { region->num_regs = 0; @@ -344,7 +361,7 @@ typedef struct { int stack_n; OnigOptionType options; OnigRegion* region; - UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ } MatchArg; #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ @@ -1127,7 +1144,7 @@ onig_print_statistics(FILE* f) #endif extern int -onig_is_in_code_range(UChar* p, OnigCodePoint code) +onig_is_in_code_range(const UChar* p, OnigCodePoint code) { OnigCodePoint n, *data; OnigCodePoint low, high, x; @@ -1180,7 +1197,7 @@ typedef struct { /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int -match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, +match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, UChar* sprev, MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1226,7 +1243,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; - s = sstart; + s = (UChar* )sstart; while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1318,7 +1335,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, stkp = stk_base; r = make_capture_history_tree(region->history_root, &stkp, - stk, str, reg); + stk, (UChar* )str, reg); if (r < 0) { best_len = r; /* error code */ goto finish; @@ -1669,7 +1686,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (s + mb_len > end) { DATA_ENSURE(1); - s = end; + s = (UChar* )end; p += tlen; goto cc_mb_not_success; } @@ -2466,9 +2483,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); - s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = onigenc_get_prev_char_head(encode, str, s); + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); STAT_OP_OUT; continue; break; @@ -2476,7 +2493,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); - q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(q)) { /* too short case -> success. ex. /(? text_range) end = text_range; - s = text; + s = (UChar* )text; while (s < end) { if (*s == *target) { @@ -2589,10 +2607,13 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, static int str_lower_case_match(OnigEncoding enc, int ambig_flag, - UChar* t, UChar* tend, UChar* p, UChar* end) + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) { int lowlen; - UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + const UChar* tsave; + const UChar* psave; tsave = t; psave = p; @@ -2622,15 +2643,16 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, static UChar* slow_search_ic(OnigEncoding enc, int ambig_flag, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *s, *end; - end = text_end - (target_end - target) + 1; + end = (UChar* )text_end; + end -= target_end - target - 1; if (end > text_range) end = text_range; - s = text; + s = (UChar* )text; while (s < end) { if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) @@ -2644,13 +2666,15 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *t, *p, *s; - s = text_end - (target_end - target); + s = (UChar* )text_end; + s -= (target_end - target); if (s > text_start) - s = text_start; + s = (UChar* )text_start; else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); @@ -2666,7 +2690,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s = onigenc_get_prev_char_head(enc, adjust_text, s); + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; @@ -2674,15 +2698,16 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, static UChar* slow_search_backward_ic(OnigEncoding enc, int ambig_flag, - UChar* target,UChar* target_end, - UChar* text, UChar* adjust_text, - UChar* text_end, UChar* text_start) + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *s; - s = text_end - (target_end - target); + s = (UChar* )text_end; + s -= (target_end - target); if (s > text_start) - s = text_start; + s = (UChar* )text_start; else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); @@ -2691,18 +2716,19 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, target, target_end, s, text_end)) return s; - s = onigenc_get_prev_char_head(enc, adjust_text, s); + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; } static UChar* -bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { - UChar *s, *t, *p, *end; - UChar *tail; + const UChar *s, *t, *p, *end; + const UChar *tail; int skip; #ifdef ONIG_DEBUG_SEARCH @@ -2728,7 +2754,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); skip = reg->map[*s]; p = s + 1; @@ -2748,7 +2774,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); skip = reg->int_map[*s]; p = s + 1; @@ -2765,11 +2791,11 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, } static UChar* -bm_search(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) { - UChar *s, *t, *p, *end; - UChar *tail; + const UChar *s, *t, *p, *end; + const UChar *tail; end = text_range + (target_end - target) - 1; if (end > text_end) @@ -2784,7 +2810,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); s += reg->map[*s]; } } @@ -2795,7 +2821,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); s += reg->int_map[*s]; } } @@ -2824,10 +2850,11 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) } static UChar* -bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, - UChar* adjust_text, UChar* text_end, UChar* text_start) +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { - UChar *s, *t, *p; + const UChar *s, *t, *p; s = text_end - (target_end - target); if (text_start < s) @@ -2842,7 +2869,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, p++; t++; } if (t == target_end) - return s; + return (UChar* )s; s -= reg->int_map_backward[*s]; s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); @@ -2852,12 +2879,13 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, } static UChar* -map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range) +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range) { - UChar *s = text; + const UChar *s = text; while (s < text_range) { - if (map[*s]) return s; + if (map[*s]) return (UChar* )s; s += enc_len(enc, s); } @@ -2866,12 +2894,13 @@ map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range) static UChar* map_search_backward(OnigEncoding enc, UChar map[], - UChar* text, UChar* adjust_text, UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_start) { - UChar *s = text_start; + const UChar *s = text_start; while (s >= text) { - if (map[*s]) return s; + if (map[*s]) return (UChar* )s; s = onigenc_get_prev_char_head(enc, adjust_text, s); } @@ -2879,7 +2908,7 @@ map_search_backward(OnigEncoding enc, UChar map[], } extern int -onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) { int r; @@ -2912,13 +2941,13 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, && !IS_POSIX_REGION(option) #endif ) { - r = onig_region_resize(region, reg->num_mem + 1); + r = onig_region_resize_clear(region, reg->num_mem + 1); } else r = 0; if (r == 0) { - prev = onigenc_get_prev_char_head(reg->enc, str, at); + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, at, prev, &msa); } @@ -2928,7 +2957,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, } static int -forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; @@ -2995,7 +3024,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, case ANCHOR_END_LINE: if (ON_STR_END(p)) { - prev = onigenc_get_prev_char_head(reg->enc, + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; @@ -3021,7 +3050,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, *low = p - reg->dmax; if (*low > s) { *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, low_prev); + *low, (const UChar** )low_prev); if (low_prev && IS_NULL(*low_prev)) *low_prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : s), *low); @@ -3053,8 +3082,9 @@ static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 static int -backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, - UChar* range, UChar* adjrange, UChar** low, UChar** high) +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) { int r; UChar *p; @@ -3151,8 +3181,8 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, extern int -onig_search(regex_t* reg, UChar* str, UChar* end, - UChar* start, UChar* range, OnigRegion* region, OnigOptionType option) +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) { int r; UChar *s, *prev; @@ -3178,8 +3208,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end, #endif /* USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif if (region @@ -3187,7 +3218,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, && !IS_POSIX_REGION(option) #endif ) { - r = onig_region_resize(region, reg->num_mem + 1); + r = onig_region_resize_clear(region, reg->num_mem + 1); if (r) goto finish_no_msa; } @@ -3228,7 +3259,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else if (reg->anchor & ANCHOR_END_BUF) { - semi_end = end; + semi_end = (UChar* )end; end_buf: if ((OnigDistance )(semi_end - str) < reg->anchor_dmin) @@ -3270,7 +3301,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else { - semi_end = end; + semi_end = (UChar* )end; goto end_buf; } } @@ -3279,14 +3310,15 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else if (str == end) { /* empty string */ - static UChar* address_for_empty_string = ""; + static const UChar* address_for_empty_string = ""; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search: empty string.\n"); #endif if (reg->threshold_len == 0) { - s = start = end = str = address_for_empty_string; + start = end = str = address_for_empty_string; + s = (UChar* )start; prev = (UChar* )NULL; MATCH_ARG_INIT(msa, option, region, start); @@ -3303,7 +3335,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, MATCH_ARG_INIT(msa, option, region, start); - s = start; + s = (UChar* )start; if (range > start) { /* forward search */ if (s > str) prev = onigenc_get_prev_char_head(reg->enc, str, s); @@ -3313,13 +3345,13 @@ onig_search(regex_t* reg, UChar* str, UChar* end, if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; - sch_range = range; + sch_range = (UChar* )range; if (reg->dmax != 0) { if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = end; + sch_range = (UChar* )end; else { sch_range += reg->dmax; - if (sch_range > end) sch_range = end; + if (sch_range > end) sch_range = (UChar* )end; } } if (reg->dmax != ONIG_INFINITE_DISTANCE && @@ -3368,13 +3400,13 @@ onig_search(regex_t* reg, UChar* str, UChar* end, if (range < end) adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); else - adjrange = end; + adjrange = (UChar* )end; if (reg->dmax != ONIG_INFINITE_DISTANCE && (end - range) >= reg->threshold_len) { do { sch_start = s + reg->dmax; - if (sch_start > end) sch_start = end; + if (sch_start > end) sch_start = (UChar* )end; if (backward_search_range(reg, str, end, sch_start, range, adjrange, &low, &high) <= 0) goto mismatch; @@ -3396,10 +3428,10 @@ onig_search(regex_t* reg, UChar* str, UChar* end, sch_start = s; if (reg->dmax != 0) { if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = end; + sch_start = (UChar* )end; else { sch_start += reg->dmax; - if (sch_start > end) sch_start = end; + if (sch_start > end) sch_start = (UChar* )end; else sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, start, sch_start); diff --git a/regint.h b/regint.h index 4cfd9c9768..a704b0e263 100644 --- a/regint.h +++ b/regint.h @@ -785,7 +785,7 @@ extern char* onig_error_code_to_format P_((int code)); extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...)); extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); -extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); diff --git a/regparse.c b/regparse.c index 6b2ccde2de..58e122f486 100644 --- a/regparse.c +++ b/regparse.c @@ -58,7 +58,7 @@ OnigSyntaxType OnigSyntaxRuby = { OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; -extern void onig_null_warn(char* s) { } +extern void onig_null_warn(const char* s) { } #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; @@ -198,7 +198,7 @@ bitset_copy(BitSetRef dest, BitSetRef bs) } extern int -onig_strncmp(UChar* s1, UChar* s2, int n) +onig_strncmp(const UChar* s1, const UChar* s2, int n) { int x; @@ -210,7 +210,7 @@ onig_strncmp(UChar* s1, UChar* s2, int n) } static void -k_strcpy(UChar* dest, UChar* src, UChar* end) +k_strcpy(UChar* dest, const UChar* src, const UChar* end) { int len = end - src; if (len > 0) { @@ -259,7 +259,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) static UChar* -k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end, +k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, int capa) { UChar* r; @@ -277,7 +277,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end, /* dest on static area */ static UChar* strcat_capa_from_static(UChar* dest, UChar* dest_end, - UChar* src, UChar* src_end, int capa) + const UChar* src, const UChar* src_end, int capa) { UChar* r; @@ -382,7 +382,7 @@ onig_names_free(regex_t* reg) } static NameEntry* -name_find(regex_t* reg, UChar* name, UChar* name_end) +name_find(regex_t* reg, const UChar* name, const UChar* name_end) { NameEntry* e; NameTable* t = (NameTable* )reg->name_table; @@ -395,7 +395,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) } typedef struct { - int (*func)(UChar*,UChar*,int,int*,regex_t*,void*); + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); regex_t* reg; void* arg; int ret; @@ -420,8 +420,8 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg) extern int onig_foreach_name(regex_t* reg, - int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) { INamesArg narg; NameTable* t = (NameTable* )reg->name_table; @@ -585,8 +585,8 @@ name_find(regex_t* reg, UChar* name, UChar* name_end) extern int onig_foreach_name(regex_t* reg, - int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) { int i, r; NameEntry* e; @@ -725,8 +725,8 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) } extern int -onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, - int** nums) +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) { NameEntry* e; @@ -747,8 +747,8 @@ onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, } extern int -onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end, - OnigRegion *region) +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) { int i, n, *nums; @@ -773,23 +773,23 @@ onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end, #else /* USE_NAMED_GROUP */ extern int -onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end, - int** nums) +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) { return ONIG_NO_SUPPORT_CONFIG; } extern int -onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end, - OnigRegion* region) +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) { return ONIG_NO_SUPPORT_CONFIG; } extern int onig_foreach_name(regex_t* reg, - int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) { return ONIG_NO_SUPPORT_CONFIG; } @@ -1248,7 +1248,7 @@ node_new_option(OnigOptionType option) } extern int -onig_node_str_cat(Node* node, UChar* s, UChar* end) +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) { int addlen = end - s; @@ -1318,7 +1318,7 @@ onig_node_str_clear(Node* node) } static Node* -node_new_str(UChar* s, UChar* end) +node_new_str(const UChar* s, const UChar* end) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1336,7 +1336,7 @@ node_new_str(UChar* s, UChar* end) } extern Node* -onig_node_new_str(UChar* s, UChar* end) +onig_node_new_str(const UChar* s, const UChar* end) { return node_new_str(s, end); } @@ -1367,7 +1367,7 @@ node_new_str_raw_char(UChar c) static Node* str_node_split_last_char(StrNode* sn, OnigEncoding enc) { - UChar *p; + const UChar *p; Node* n = NULL_NODE; if (sn->end > sn->s) { @@ -1376,7 +1376,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) n = node_new_str(p, sn->end); if ((sn->flag & NSTR_RAW) != 0) NSTRING_SET_RAW(n); - sn->end = p; + sn->end = (UChar* )p; } } return n; @@ -1392,7 +1392,7 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc) } extern int -onig_scan_unsigned_number(UChar** src, UChar* end, OnigEncoding enc) +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) { unsigned int num, val; OnigCodePoint c; @@ -3541,7 +3541,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { - p = onigenc_step(enc, p, end, pb->len); + p = (UChar* )onigenc_step(enc, p, end, pb->len); if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; @@ -5068,7 +5068,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) } extern int -onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg, +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env) { int r; @@ -5083,13 +5083,13 @@ onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg, env->ambig_flag = reg->ambig_flag; env->enc = reg->enc; env->syntax = reg->syntax; - env->pattern = pattern; - env->pattern_end = end; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; env->reg = reg; *root = NULL; - p = pattern; - r = parse_regexp(root, &p, end, env); + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); reg->num_mem = env->num_mem; return r; } diff --git a/regparse.h b/regparse.h index 6014b9290b..1a4ac7dea2 100644 --- a/regparse.h +++ b/regparse.h @@ -291,21 +291,21 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); #endif extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); -extern int onig_strncmp P_((UChar* s1, UChar* s2, int n)); +extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc)); +extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode)); extern void onig_node_conv_to_str_node P_((Node* node, int raw)); -extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end)); +extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); extern Node* onig_node_new_effect P_((int type)); extern Node* onig_node_new_anchor P_((int type)); -extern Node* onig_node_new_str P_((UChar* s, UChar* end)); +extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); extern int onig_free_node_list(); extern int onig_names_free P_((regex_t* reg)); -extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP diff --git a/utf8.c b/utf8.c index e7095baa5c..592bebfe8f 100644 --- a/utf8.c +++ b/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2005 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,13 +60,13 @@ static int EncLen_UTF8[] = { }; static int -utf8_mbc_enc_len(UChar* p) +utf8_mbc_enc_len(const UChar* p) { return EncLen_UTF8[*p]; } static OnigCodePoint -utf8_mbc_to_code(UChar* p, UChar* end) +utf8_mbc_to_code(const UChar* p, const UChar* end) { int c, len; OnigCodePoint n; @@ -195,9 +195,9 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf) } static int -utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower) +utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) { - UChar* p = *pp; + const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { if (end > p + 1 && @@ -260,9 +260,9 @@ utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower) } static int -utf8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end) +utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { - UChar* p = *pp; + const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { if (end > p + 1 && @@ -3696,15 +3696,15 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static UChar* -utf8_left_adjust_char_head(UChar* start, UChar* s) +utf8_left_adjust_char_head(const UChar* start, const UChar* s) { - UChar *p; + const UChar *p; - if (s <= start) return s; + if (s <= start) return (UChar* )s; p = s; while (!utf8_islead(*p) && p > start) p--; - return p; + return (UChar* )p; } OnigEncodingType OnigEncodingUTF8 = {