1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* include/ruby/oniguruma.h (onigenc_get_prev_char_head): add end

argument.

* include/ruby/encoding.h (rb_enc_prev_char): ditto.

* regenc.c (onigenc_get_prev_char_head): add end argument.

* regparse.c: follow the interface change.

* regexec.c: ditto.

* string.c: ditto.

* parse.y: ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19332 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-13 18:22:04 +00:00
parent d3974573e7
commit a953d28e3c
8 changed files with 50 additions and 33 deletions

View file

@ -1,3 +1,20 @@
Sun Sep 14 03:20:03 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/oniguruma.h (onigenc_get_prev_char_head): add end
argument.
* include/ruby/encoding.h (rb_enc_prev_char): ditto.
* regenc.c (onigenc_get_prev_char_head): add end argument.
* regparse.c: follow the interface change.
* regexec.c: ditto.
* string.c: ditto.
* parse.y: ditto.
Sun Sep 14 02:04:28 2008 Tanaka Akira <akr@fsij.org> Sun Sep 14 02:04:28 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/oniguruma.h * include/ruby/oniguruma.h

View file

@ -129,9 +129,9 @@ int rb_enc_codelen(int code, rb_encoding *enc);
/* code,ptr,encoding -> write buf */ /* code,ptr,encoding -> write buf */
#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) #define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf))
/* ptr, ptr, encoding -> prev_char */ /* start, ptr, end, encoding -> prev_char */
#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p)) #define rb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
/* ptr, ptr, encoding -> next_char */ /* start, ptr, end, encoding -> next_char */
#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p)) #define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p))
#define rb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) #define rb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))

View file

@ -305,7 +305,7 @@ void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
ONIG_EXTERN ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev)); OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev));
ONIG_EXTERN ONIG_EXTERN
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
ONIG_EXTERN ONIG_EXTERN
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN ONIG_EXTERN

View file

@ -4877,11 +4877,11 @@ parser_yyerror(struct parser_params *parser, const char *msg)
if (len > max_line_margin * 2 + 10) { if (len > max_line_margin * 2 + 10) {
if (lex_p - p > max_line_margin) { if (lex_p - p > max_line_margin) {
p = rb_enc_prev_char(p, lex_p - max_line_margin, rb_enc_get(lex_lastline)); p = rb_enc_prev_char(p, lex_p - max_line_margin, pe, rb_enc_get(lex_lastline));
pre = "..."; pre = "...";
} }
if (pe - lex_p > max_line_margin) { if (pe - lex_p > max_line_margin) {
pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, rb_enc_get(lex_lastline)); pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, pe, rb_enc_get(lex_lastline));
post = "..."; post = "...";
} }
len = pe - p; len = pe - p;

View file

@ -88,7 +88,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
} }
extern UChar* extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
{ {
if (s <= start) if (s <= start)
return (UChar* )NULL; return (UChar* )NULL;

View file

@ -2649,7 +2649,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p); GET_LENGTH_INC(tlen, p);
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail; if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
MOP_OUT; MOP_OUT;
continue; continue;
break; break;
@ -2667,7 +2667,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else { else {
STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
s = q; s = q;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
} }
MOP_OUT; MOP_OUT;
continue; continue;
@ -2857,7 +2857,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end) if (t == target_end)
return s; return s;
} }
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
} }
return (UChar* )NULL; return (UChar* )NULL;
@ -2883,7 +2883,7 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
target, target_end, s, text_end)) target, target_end, s, text_end))
return s; return s;
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
} }
return (UChar* )NULL; return (UChar* )NULL;
@ -3053,14 +3053,14 @@ map_search(OnigEncoding enc, UChar map[],
static UChar* static UChar*
map_search_backward(OnigEncoding enc, UChar map[], map_search_backward(OnigEncoding enc, UChar map[],
const UChar* text, const UChar* adjust_text, const UChar* text, const UChar* adjust_text,
const UChar* text_start) const UChar* text_start, const UChar* text_end)
{ {
const UChar *s = text_start; const UChar *s = text_start;
while (s >= text) { while (s >= text) {
if (map[*s]) return (UChar* )s; if (map[*s]) return (UChar* )s;
s = onigenc_get_prev_char_head(enc, adjust_text, s); s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
} }
return (UChar* )NULL; return (UChar* )NULL;
} }
@ -3117,7 +3117,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
r = 0; r = 0;
if (r == 0) { if (r == 0) {
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
r = match_at(reg, str, end, r = match_at(reg, str, end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
end, end,
@ -3190,7 +3190,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
case ANCHOR_BEGIN_LINE: case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) { if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc, prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p); (pprev ? pprev : str), p, end);
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate; goto retry_gate;
} }
@ -3219,10 +3219,10 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
*low = p; *low = p;
if (low_prev) { if (low_prev) {
if (*low > s) if (*low > s)
*low_prev = onigenc_get_prev_char_head(reg->enc, s, p); *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
else else
*low_prev = onigenc_get_prev_char_head(reg->enc, *low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p); (pprev ? pprev : str), p, end);
} }
} }
else { else {
@ -3233,12 +3233,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
*low, end, (const UChar** )low_prev); *low, end, (const UChar** )low_prev);
if (low_prev && IS_NULL(*low_prev)) if (low_prev && IS_NULL(*low_prev))
*low_prev = onigenc_get_prev_char_head(reg->enc, *low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : s), *low); (pprev ? pprev : s), *low, end);
} }
else { else {
if (low_prev) if (low_prev)
*low_prev = onigenc_get_prev_char_head(reg->enc, *low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), *low); (pprev ? pprev : str), *low, end);
} }
} }
} }
@ -3301,7 +3301,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
break; break;
case ONIG_OPTIMIZE_MAP: case ONIG_OPTIMIZE_MAP:
p = map_search_backward(reg->enc, reg->map, range, adjrange, p); p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
break; break;
} }
@ -3312,7 +3312,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
switch (reg->sub_anchor) { switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE: case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) { if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc, str, p); prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev; p = prev;
goto retry; goto retry;
@ -3336,7 +3336,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
&& ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
#endif #endif
) { ) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p); p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
if (IS_NULL(p)) goto fail; if (IS_NULL(p)) goto fail;
goto retry; goto retry;
} }
@ -3508,7 +3508,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start < end) if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
else { /* match with empty at end */ else { /* match with empty at end */
start = onigenc_get_prev_char_head(reg->enc, str, end); start = onigenc_get_prev_char_head(reg->enc, str, end, end);
} }
} }
if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
@ -3594,7 +3594,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start; s = (UChar* )start;
if (range > start) { /* forward search */ if (range > start) { /* forward search */
if (s > str) if (s > str)
prev = onigenc_get_prev_char_head(reg->enc, str, s); prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
else else
prev = (UChar* )NULL; prev = (UChar* )NULL;
@ -3687,7 +3687,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
s = high; s = high;
while (s >= low) { while (s >= low) {
prev = onigenc_get_prev_char_head(reg->enc, str, s); prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
MATCH_AND_RETURN_CHECK(orig_start); MATCH_AND_RETURN_CHECK(orig_start);
s = prev; s = prev;
} }
@ -3715,7 +3715,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
} }
do { do {
prev = onigenc_get_prev_char_head(reg->enc, str, s); prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
MATCH_AND_RETURN_CHECK(orig_start); MATCH_AND_RETURN_CHECK(orig_start);
s = prev; s = prev;
} while (s >= range); } while (s >= range);

View file

@ -1540,7 +1540,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)
Node* n = NULL_NODE; Node* n = NULL_NODE;
if (sn->end > sn->s) { if (sn->end > sn->s) {
p = onigenc_get_prev_char_head(enc, sn->s, sn->end); p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
if (p && p > sn->s) { /* can be splitted. */ if (p && p > sn->s) { /* can be splitted. */
n = node_new_str(p, sn->end); n = node_new_str(p, sn->end);
if ((sn->flag & NSTR_RAW) != 0) if ((sn->flag & NSTR_RAW) != 0)

View file

@ -1291,10 +1291,10 @@ rb_str_substr(VALUE str, long beg, long len)
if (len > -beg) len = -beg; if (len > -beg) len = -beg;
if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) { if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
beg = -beg; beg = -beg;
while (beg-- > len && (e = rb_enc_prev_char(s, e, enc)) != 0); while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
p = e; p = e;
if (!p) return Qnil; if (!p) return Qnil;
while (len-- > 0 && (p = rb_enc_prev_char(s, p, enc)) != 0); while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
if (!p) return Qnil; if (!p) return Qnil;
len = e - p; len = e - p;
goto sub; goto sub;
@ -2572,7 +2572,7 @@ rb_str_succ(VALUE orig)
sbeg = RSTRING_PTR(str); sbeg = RSTRING_PTR(str);
s = e = sbeg + RSTRING_LEN(str); s = e = sbeg + RSTRING_LEN(str);
while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) { if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
if (ISALPHA(*last_alnum) ? ISDIGIT(*s) : if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) { ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
@ -2597,7 +2597,7 @@ rb_str_succ(VALUE orig)
} }
if (c == -1) { /* str contains no alnum */ if (c == -1) { /* str contains no alnum */
s = e; s = e;
while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
enum neighbor_char neighbor; enum neighbor_char neighbor;
if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
neighbor = enc_succ_char(s, l, enc); neighbor = enc_succ_char(s, l, enc);
@ -5336,10 +5336,10 @@ chopped_length(VALUE str)
beg = RSTRING_PTR(str); beg = RSTRING_PTR(str);
end = beg + RSTRING_LEN(str); end = beg + RSTRING_LEN(str);
if (beg > end) return 0; if (beg > end) return 0;
p = rb_enc_prev_char(beg, end, enc); p = rb_enc_prev_char(beg, end, end, enc);
if (!p) return 0; if (!p) return 0;
if (p > beg && rb_enc_codepoint(p, end, enc) == '\n') { if (p > beg && rb_enc_codepoint(p, end, enc) == '\n') {
p2 = rb_enc_prev_char(beg, p, enc); p2 = rb_enc_prev_char(beg, p, end, enc);
if (p2 && rb_enc_codepoint(p2, end, enc) == '\r') p = p2; if (p2 && rb_enc_codepoint(p2, end, enc) == '\r') p = p2;
} }
return p - beg; return p - beg;