1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* array.c (rb_ary_cycle): typo in rdoc. a patch from Yugui

<yugui@yugui.sakura.ne.jp>.  [ruby-dev:31748]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13348 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2007-09-06 12:33:45 +00:00
parent 629b1e4324
commit edd7c787ad
19 changed files with 146 additions and 133 deletions

View file

@ -1,3 +1,8 @@
Thu Sep 6 21:31:49 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* array.c (rb_ary_cycle): typo in rdoc. a patch from Yugui
<yugui@yugui.sakura.ne.jp>. [ruby-dev:31748]
Thu Sep 6 12:42:10 2007 Nobuyoshi Nakada <nobu@ruby-lang.org> Thu Sep 6 12:42:10 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support. * string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support.

View file

@ -2929,7 +2929,7 @@ rb_ary_choice(VALUE ary)
* Calls <i>block</i> repeatedly forever. * Calls <i>block</i> repeatedly forever.
* *
* a = ["a", "b", "c"] * a = ["a", "b", "c"]
* a.each {|x| puts x } # print, a, b, c, a, b, c,.. forever. * a.cycle {|x| puts x } # print, a, b, c, a, b, c,.. forever.
* *
*/ */

View file

@ -189,7 +189,7 @@ rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc)
} }
else { else {
for (c=0; p<e && nth--; c++) { for (c=0; p<e && nth--; c++) {
int n = rb_enc_mbclen(p, enc); int n = rb_enc_mbclen(p, e, enc);
if (n == 0) return 0; if (n == 0) return 0;
p += n; p += n;
@ -208,7 +208,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
} }
for (c=0; p<e; c++) { for (c=0; p<e; c++) {
int n = rb_enc_mbclen(p, enc); int n = rb_enc_mbclen(p, e, enc);
if (n == 0) return -1; if (n == 0) return -1;
p += n; p += n;
@ -217,9 +217,9 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
} }
int int
rb_enc_mbclen(const char *p, rb_encoding *enc) rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
{ {
int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p); int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
if (n == 0) { if (n == 0) {
rb_raise(rb_eArgError, "invalid mbstring sequence"); rb_raise(rb_eArgError, "invalid mbstring sequence");
} }

View file

@ -51,7 +51,7 @@ static const int EncLen_EUCJP[] = {
}; };
static int static int
mbc_enc_len(const UChar* p) mbc_enc_len(const UChar* p, const UChar* e)
{ {
return EncLen_EUCJP[*p]; return EncLen_EUCJP[*p];
} }
@ -62,7 +62,7 @@ mbc_to_code(const UChar* p, const UChar* end)
int c, i, len; int c, i, len;
OnigCodePoint n; OnigCodePoint n;
len = enc_len(ONIG_ENCODING_EUC_JP, p); len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
n = (OnigCodePoint )*p++; n = (OnigCodePoint )*p++;
if (len == 1) return n; if (len == 1) return n;
@ -113,7 +113,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff); *p++ = (UChar )(code & 0xff);
#if 1 #if 1
if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) if (enc_len(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf))
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif #endif
return p - buf; return p - buf;
@ -134,7 +134,7 @@ mbc_case_fold(OnigCaseFoldType flag,
else { else {
int i; int i;
len = enc_len(ONIG_ENCODING_EUC_JP, p); len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
*lower++ = *p++; *lower++ = *p++;
} }
@ -156,7 +156,7 @@ left_adjust_char_head(const UChar* start, const UChar* s)
p = s; p = s;
while (!eucjp_islead(*p) && p > start) p--; while (!eucjp_islead(*p) && p > start) p--;
len = enc_len(ONIG_ENCODING_EUC_JP, p); len = enc_len(ONIG_ENCODING_EUC_JP, p, s);
if (p + len > s) return (UChar* )p; if (p + len > s) return (UChar* )p;
p += len; p += len;
return (UChar* )(p + ((s - p) & ~1)); return (UChar* )(p + ((s - p) & ~1));

View file

@ -661,7 +661,7 @@ strscan_getch(VALUE self)
if (EOS_P(p)) if (EOS_P(p))
return Qnil; return Qnil;
len = rb_enc_mbclen(CURPTR(p), enc); len = rb_enc_mbclen(CURPTR(p), S_PEND(p), enc);
if (p->curr + len > S_LEN(p)) { if (p->curr + len > S_LEN(p)) {
len = S_LEN(p) - p->curr; len = S_LEN(p) - p->curr;
} }

View file

@ -50,7 +50,7 @@ rb_encoding * rb_enc_find(const char *name);
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
/* ptr,encoding -> mbclen */ /* ptr,encoding -> mbclen */
int rb_enc_mbclen(const char*, rb_encoding*); int rb_enc_mbclen(const char*, const char *, rb_encoding*);
/* code,encoding -> codelen */ /* code,encoding -> codelen */
int rb_enc_codelen(int, rb_encoding*); int rb_enc_codelen(int, rb_encoding*);

View file

@ -144,7 +144,7 @@ typedef struct {
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
typedef struct OnigEncodingTypeST { typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p); int (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e);
const char* name; const char* name;
int max_enc_len; int max_enc_len;
int min_enc_len; int min_enc_len;
@ -255,11 +255,11 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p) #define enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) #define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_MBC_WORD(enc,s,end) \ #define ONIGENC_IS_MBC_WORD(enc,s,end) \
@ -281,7 +281,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_STEP_BACK(enc,start,s,n) \ #define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n)) onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) #define ONIGENC_MBC_ENC_LEN(enc,p,e) (enc)->mbc_enc_len(p,e)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)

View file

@ -29,8 +29,8 @@ extern "C" {
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
#define ismbchar(p, enc) (mbclen((p),(enc)) != 1) #define ismbchar(p, e, enc) (mbclen((p),(e),(enc)) != 1)
#define mbclen(p,enc) rb_enc_mbclen((p), (enc)) #define mbclen(p,e,enc) rb_enc_mbclen((p),(e),(enc))
#endif /* ifndef ONIG_RUBY_M17N */ #endif /* ifndef ONIG_RUBY_M17N */

29
parse.y
View file

@ -4558,10 +4558,10 @@ ripper_dispatch_delayed_token(struct parser_params *parser, int t)
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif #endif
#define parser_mbclen() mbclen((lex_p-1),parser->enc) #define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc)
#define is_identchar(p, enc) (rb_enc_isalnum(*p, enc) || (*p) == '_' || ismbchar(p, enc)) #define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || ismbchar(p,e,enc))
#define parser_ismbchar() ismbchar((lex_p-1), parser->enc) #define parser_ismbchar() ismbchar((lex_p-1), lex_pend, parser->enc)
#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),parser->enc)) #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc))
static int static int
parser_yyerror(struct parser_params *parser, const char *msg) parser_yyerror(struct parser_params *parser, const char *msg)
@ -5995,7 +5995,7 @@ parser_yylex(struct parser_params *parser)
} }
} }
else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && else if ((rb_enc_isalnum(c, parser->enc) || c == '_') &&
lex_p < lex_pend && is_identchar(lex_p, parser->enc)) { lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) {
goto ternary; goto ternary;
} }
else if (c == '\\') { else if (c == '\\') {
@ -8328,7 +8328,7 @@ internal_id_gen(struct parser_params *parser)
} }
static int static int
is_special_global_name(const char *m, rb_encoding *enc) is_special_global_name(const char *m, const char *e, rb_encoding *enc)
{ {
switch (*m) { switch (*m) {
case '~': case '*': case '$': case '?': case '!': case '@': case '~': case '*': case '$': case '?': case '!': case '@':
@ -8340,7 +8340,7 @@ is_special_global_name(const char *m, rb_encoding *enc)
break; break;
case '-': case '-':
++m; ++m;
if (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc); if (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
break; break;
default: default:
if (!rb_enc_isdigit(*m, enc)) return 0; if (!rb_enc_isdigit(*m, enc)) return 0;
@ -8353,6 +8353,7 @@ int
rb_symname_p(const char *name) rb_symname_p(const char *name)
{ {
const char *m = name; const char *m = name;
const char *e = m + strlen(m);
int localid = Qfalse; int localid = Qfalse;
rb_encoding *enc = rb_enc_from_index(0); rb_encoding *enc = rb_enc_from_index(0);
@ -8362,7 +8363,7 @@ rb_symname_p(const char *name)
return Qfalse; return Qfalse;
case '$': case '$':
if (is_special_global_name(++m, enc)) return Qtrue; if (is_special_global_name(++m, e, enc)) return Qtrue;
goto id; goto id;
case '@': case '@':
@ -8411,8 +8412,9 @@ rb_symname_p(const char *name)
default: default:
localid = !rb_enc_isupper(*m, enc); localid = !rb_enc_isupper(*m, enc);
id: id:
if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, enc)) return Qfalse; if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, e, enc))
while (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc); return Qfalse;
while (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
if (localid) { if (localid) {
switch (*m) { switch (*m) {
case '!': case '?': case '=': ++m; case '!': case '?': case '=': ++m;
@ -8427,6 +8429,7 @@ ID
rb_intern3(const char *name, long len, rb_encoding *enc) rb_intern3(const char *name, long len, rb_encoding *enc)
{ {
const char *m = name; const char *m = name;
const char *e = m + len;
VALUE str; VALUE str;
ID id; ID id;
int last; int last;
@ -8445,7 +8448,7 @@ rb_intern3(const char *name, long len, rb_encoding *enc)
switch (*m) { switch (*m) {
case '$': case '$':
id |= ID_GLOBAL; id |= ID_GLOBAL;
if (is_special_global_name(++m, enc)) goto new_id; if (is_special_global_name(++m, e, enc)) goto new_id;
break; break;
case '@': case '@':
if (m[1] == '@') { if (m[1] == '@') {
@ -8490,8 +8493,8 @@ rb_intern3(const char *name, long len, rb_encoding *enc)
break; break;
} }
if (!rb_enc_isdigit(*m, enc)) { if (!rb_enc_isdigit(*m, enc)) {
while (m <= name + last && is_identchar(m, enc)) { while (m <= name + last && is_identchar(m, e, enc)) {
m += rb_enc_mbclen(m, enc); m += rb_enc_mbclen(m, e, enc);
} }
} }
if (m - name < len) id = ID_JUNK; if (m - name < len) id = ID_JUNK;

26
re.c
View file

@ -393,11 +393,11 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
p = s; pend = p + len; p = s; pend = p + len;
while (p<pend) { while (p<pend) {
if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, enc))) { if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, pend, enc))) {
need_escape = 1; need_escape = 1;
break; break;
} }
p += mbclen(p, enc); p += mbclen(p, pend, enc);
} }
if (!need_escape) { if (!need_escape) {
rb_str_buf_cat(str, s, len); rb_str_buf_cat(str, s, len);
@ -406,7 +406,7 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
p = s; p = s;
while (p<pend) { while (p<pend) {
if (*p == '\\') { if (*p == '\\') {
int n = mbclen(p+1, enc) + 1; int n = mbclen(p+1, pend, enc) + 1;
rb_str_buf_cat(str, p, n); rb_str_buf_cat(str, p, n);
p += n; p += n;
continue; continue;
@ -416,9 +416,9 @@ rb_reg_expr_str(VALUE str, const char *s, long len)
rb_str_buf_cat(str, &c, 1); rb_str_buf_cat(str, &c, 1);
rb_str_buf_cat(str, p, 1); rb_str_buf_cat(str, p, 1);
} }
else if (ismbchar(p, enc)) { else if (ismbchar(p, pend, enc)) {
rb_str_buf_cat(str, p, mbclen(p, enc)); rb_str_buf_cat(str, p, mbclen(p, pend, enc));
p += mbclen(p, enc); p += mbclen(p, pend, enc);
continue; continue;
} }
else if (rb_enc_isprint(*p, enc)) { else if (rb_enc_isprint(*p, enc)) {
@ -1906,8 +1906,8 @@ rb_reg_quote(VALUE str)
send = s + RSTRING_LEN(str); send = s + RSTRING_LEN(str);
for (; s < send; s++) { for (; s < send; s++) {
c = *s; c = *s;
if (ismbchar(s, enc)) { if (ismbchar(s, send, enc)) {
int n = mbclen(s, enc); int n = mbclen(s, send, enc);
while (n-- && s < send) while (n-- && s < send)
s++; s++;
@ -1935,8 +1935,8 @@ rb_reg_quote(VALUE str)
for (; s < send; s++) { for (; s < send; s++) {
c = *s; c = *s;
if (ismbchar(s, enc)) { if (ismbchar(s, send, enc)) {
int n = mbclen(s, enc); int n = mbclen(s, send, enc);
while (n-- && s < send) while (n-- && s < send)
*t++ = *s++; *t++ = *s++;
@ -2180,8 +2180,8 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
while (s < e) { while (s < e) {
char *ss = s++; char *ss = s++;
if (ismbchar(ss, enc)) { if (ismbchar(ss, e, enc)) {
s += mbclen(ss, enc) - 1; s += mbclen(ss, e, enc) - 1;
continue; continue;
} }
if (*ss != '\\' || s == e) continue; if (*ss != '\\' || s == e) continue;
@ -2214,7 +2214,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
name_end = name = s + 1; name_end = name = s + 1;
while (name_end < e) { while (name_end < e) {
if (*name_end == '>') break; if (*name_end == '>') break;
name_end += mbclen(name_end, enc); name_end += mbclen(name_end, e, enc);
} }
if (name_end < e) { if (name_end < e) {
no = name_to_backref_number(regs, regexp, name, name_end); no = name_to_backref_number(regs, regexp, name, name_end);

View file

@ -469,13 +469,13 @@ compile_length_string_node(Node* node, regex_t* reg)
ambig = NSTRING_IS_AMBIG(node); ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s; p = prev = sn->s;
prev_len = enc_len(enc, p); prev_len = enc_len(enc, p, sn->end);
p += prev_len; p += prev_len;
slen = 1; slen = 1;
rlen = 0; rlen = 0;
for (; p < sn->end; ) { for (; p < sn->end; ) {
len = enc_len(enc, p); len = enc_len(enc, p, sn->end);
if (len == prev_len) { if (len == prev_len) {
slen++; slen++;
} }
@ -518,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg)
ambig = NSTRING_IS_AMBIG(node); ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s; p = prev = sn->s;
prev_len = enc_len(enc, p); prev_len = enc_len(enc, p, end);
p += prev_len; p += prev_len;
slen = 1; slen = 1;
for (; p < end; ) { for (; p < end; ) {
len = enc_len(enc, p); len = enc_len(enc, p, end);
if (len == prev_len) { if (len == prev_len) {
slen++; slen++;
} }
@ -2312,7 +2312,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
StrNode* sn = NSTR(node); StrNode* sn = NSTR(node);
UChar *s = sn->s; UChar *s = sn->s;
while (s < sn->end) { while (s < sn->end) {
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, sn->end);
(*len)++; (*len)++;
} }
} }
@ -3389,7 +3389,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
goto err; goto err;
} }
len = enc_len(reg->enc, p); len = enc_len(reg->enc, p, end);
if (n == 0) { if (n == 0) {
if (IS_NULL(snode)) { if (IS_NULL(snode)) {
@ -4212,7 +4212,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
p = add->s; p = add->s;
end = p + add->len; end = p + add->len;
for (i = to->len; p < end; ) { for (i = to->len; p < end; ) {
len = enc_len(enc, p); len = enc_len(enc, p, end);
if (i + len > OPT_EXACT_MAXLEN) break; if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len && p < end; j++) for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++; to->s[i++] = *p++;
@ -4234,7 +4234,7 @@ concat_opt_exact_info_str(OptExactInfo* to,
UChar *p; UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
len = enc_len(enc, p); len = enc_len(enc, p, end);
if (i + len > OPT_EXACT_MAXLEN) break; if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len && p < end; j++) for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++; to->s[i++] = *p++;
@ -4260,7 +4260,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
for (i = 0; i < to->len && i < add->len; ) { for (i = 0; i < to->len && i < add->len; ) {
if (to->s[i] != add->s[i]) break; if (to->s[i] != add->s[i]) break;
len = enc_len(env->enc, to->s + i); len = enc_len(env->enc, to->s + i, to->s + to->len);
for (j = 1; j < len; j++) { for (j = 1; j < len; j++) {
if (to->s[i+j] != add->s[i+j]) break; if (to->s[i+j] != add->s[i+j]) break;

View file

@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
{ {
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) { if (p < s) {
p += enc_len(enc, p); p += enc_len(enc, p, s);
} }
return p; return p;
} }
@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
if (p < s) { if (p < s) {
if (prev) *prev = (const UChar* )p; if (prev) *prev = (const UChar* )p;
p += enc_len(enc, p); p += enc_len(enc, p, s);
} }
else { else {
if (prev) *prev = (const UChar* )NULL; /* Sorry */ if (prev) *prev = (const UChar* )NULL; /* Sorry */
@ -102,7 +102,7 @@ onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{ {
UChar* q = (UChar* )p; UChar* q = (UChar* )p;
while (n-- > 0) { while (n-- > 0) {
q += ONIGENC_MBC_ENC_LEN(enc, q); q += ONIGENC_MBC_ENC_LEN(enc, q, end);
} }
return (q <= end ? q : NULL); return (q <= end ? q : NULL);
} }
@ -114,7 +114,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
UChar* q = (UChar* )p; UChar* q = (UChar* )p;
while (q < end) { while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q); q += ONIGENC_MBC_ENC_LEN(enc, q, end);
n++; n++;
} }
return n; return n;
@ -125,6 +125,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{ {
int n = 0; int n = 0;
UChar* p = (UChar* )s; UChar* p = (UChar* )s;
UChar* e = p + strlen(s);
while (1) { while (1) {
if (*p == '\0') { if (*p == '\0') {
@ -140,7 +141,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
} }
if (len == 1) return n; if (len == 1) return n;
} }
p += ONIGENC_MBC_ENC_LEN(enc, p); p += ONIGENC_MBC_ENC_LEN(enc, p, e);
n++; n++;
} }
} }
@ -150,6 +151,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{ {
UChar* start = (UChar* )s; UChar* start = (UChar* )s;
UChar* p = (UChar* )s; UChar* p = (UChar* )s;
UChar* e = p + strlen(s);
while (1) { while (1) {
if (*p == '\0') { if (*p == '\0') {
@ -165,7 +167,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
} }
if (len == 1) return (int )(p - start); if (len == 1) return (int )(p - start);
} }
p += ONIGENC_MBC_ENC_LEN(enc, p); p += ONIGENC_MBC_ENC_LEN(enc, p, e);
} }
} }
@ -638,7 +640,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
int c, i, len; int c, i, len;
OnigCodePoint n; OnigCodePoint n;
len = enc_len(enc, p); len = enc_len(enc, p, end);
n = (OnigCodePoint )(*p++); n = (OnigCodePoint )(*p++);
if (len == 1) return n; if (len == 1) return n;
@ -665,7 +667,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
else { else {
int i; int i;
len = enc_len(enc, p); len = enc_len(enc, p, end);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
*lower++ = *p++; *lower++ = *p++;
} }
@ -718,7 +720,7 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff); *p++ = (UChar )(code & 0xff);
#if 1 #if 1
if (enc_len(enc, buf) != (p - buf)) if (enc_len(enc, buf, p) != (p - buf))
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif #endif
return p - buf; return p - buf;
@ -741,7 +743,7 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff); *p++ = (UChar )(code & 0xff);
#if 1 #if 1
if (enc_len(enc, buf) != (p - buf)) if (enc_len(enc, buf, p) != (p - buf))
return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
#endif #endif
return p - buf; return p - buf;
@ -825,7 +827,7 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
if (x) return x; if (x) return x;
sascii++; sascii++;
p += enc_len(enc, p); p += enc_len(enc, p, end);
} }
return 0; return 0;
} }

View file

@ -209,7 +209,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
buf[len++] = (UChar )code; buf[len++] = (UChar )code;
} }
p += enc_len(enc, p); p += enc_len(enc, p, end);
if (len >= buf_size) break; if (len >= buf_size) break;
} }
@ -330,15 +330,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
while (p < pat_end) { while (p < pat_end) {
if (*p == '\\') { if (*p == '\\') {
*s++ = *p++; *s++ = *p++;
len = enc_len(enc, p); len = enc_len(enc, p, pat_end);
while (len-- > 0) *s++ = *p++; while (len-- > 0) *s++ = *p++;
} }
else if (*p == '/') { else if (*p == '/') {
*s++ = (unsigned char )'\\'; *s++ = (unsigned char )'\\';
*s++ = *p++; *s++ = *p++;
} }
else if (ONIGENC_IS_MBC_HEAD(enc, p)) { else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
len = enc_len(enc, p); len = enc_len(enc, p, pat_end);
if (ONIGENC_MBC_MINLEN(enc) == 1) { if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++; while (len-- > 0) *s++ = *p++;
} }

View file

@ -1642,12 +1642,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1); DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET; p += SIZE_BITSET;
s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ s += enc_len(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
MOP_OUT; MOP_OUT;
break; break;
case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
cclass_mb: cclass_mb:
GET_LENGTH_INC(tlen, p); GET_LENGTH_INC(tlen, p);
@ -1657,7 +1657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int mb_len; int mb_len;
DATA_ENSURE(1); DATA_ENSURE(1);
mb_len = enc_len(encode, s); mb_len = enc_len(encode, s, end);
DATA_ENSURE(mb_len); DATA_ENSURE(mb_len);
ss = s; ss = s;
s += mb_len; s += mb_len;
@ -1677,7 +1677,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1); DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) { if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
p += SIZE_BITSET; p += SIZE_BITSET;
goto cclass_mb; goto cclass_mb;
} }
@ -1697,13 +1697,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1); DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET; p += SIZE_BITSET;
s += enc_len(encode, s); s += enc_len(encode, s, end);
MOP_OUT; MOP_OUT;
break; break;
case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
DATA_ENSURE(1); DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) { if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
s++; s++;
GET_LENGTH_INC(tlen, p); GET_LENGTH_INC(tlen, p);
p += tlen; p += tlen;
@ -1715,7 +1715,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
{ {
OnigCodePoint code; OnigCodePoint code;
UChar *ss; UChar *ss;
int mb_len = enc_len(encode, s); int mb_len = enc_len(encode, s, end);
if (! DATA_ENSURE_CHECK(mb_len)) { if (! DATA_ENSURE_CHECK(mb_len)) {
DATA_ENSURE(1); DATA_ENSURE(1);
@ -1744,7 +1744,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1); DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) { if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
p += SIZE_BITSET; p += SIZE_BITSET;
goto cclass_mb_not; goto cclass_mb_not;
} }
@ -1769,7 +1769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1); DATA_ENSURE(1);
GET_POINTER_INC(node, p); GET_POINTER_INC(node, p);
mb_len = enc_len(encode, s); mb_len = enc_len(encode, s, end);
ss = s; ss = s;
s += mb_len; s += mb_len;
DATA_ENSURE(0); DATA_ENSURE(0);
@ -1781,7 +1781,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1); DATA_ENSURE(1);
n = enc_len(encode, s); n = enc_len(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n; s += n;
@ -1790,7 +1790,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1); DATA_ENSURE(1);
n = enc_len(encode, s); n = enc_len(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
s += n; s += n;
MOP_OUT; MOP_OUT;
@ -1799,7 +1799,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev); STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s); n = enc_len(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s; sprev = s;
@ -1811,7 +1811,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) { while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev); STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s); n = enc_len(encode, s, end);
if (n > 1) { if (n > 1) {
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
@ -1830,7 +1830,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p == *s) { if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev); STACK_PUSH_ALT(p + 1, s, sprev);
} }
n = enc_len(encode, s); n = enc_len(encode, s, end);
DATA_ENSURE(n); DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s; sprev = s;
@ -1845,7 +1845,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p == *s) { if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev); STACK_PUSH_ALT(p + 1, s, sprev);
} }
n = enc_len(encode, s); n = enc_len(encode, s, end);
if (n > 1) { if (n > 1) {
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
@ -1906,7 +1906,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! ONIGENC_IS_MBC_WORD(encode, s, end)) if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail; goto fail;
s += enc_len(encode, s); s += enc_len(encode, s, end);
MOP_OUT; MOP_OUT;
break; break;
@ -1915,7 +1915,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (ONIGENC_IS_MBC_WORD(encode, s, end)) if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail; goto fail;
s += enc_len(encode, s); s += enc_len(encode, s, end);
MOP_OUT; MOP_OUT;
break; break;
@ -2043,7 +2043,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif #endif
} }
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enc_len(encode, s))) { ON_STR_END(s + enc_len(encode, s, end))) {
MOP_OUT; MOP_OUT;
continue; continue;
} }
@ -2157,7 +2157,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
STRING_CMP(pstart, s, n); STRING_CMP(pstart, s, n);
while (sprev + (len = enc_len(encode, sprev)) < s) while (sprev + (len = enc_len(encode, sprev, end)) < s)
sprev += len; sprev += len;
MOP_OUT; MOP_OUT;
@ -2189,7 +2189,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n); DATA_ENSURE(n);
sprev = s; sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n); STRING_CMP_IC(case_fold_flag, pstart, &s, n);
while (sprev + (len = enc_len(encode, sprev)) < s) while (sprev + (len = enc_len(encode, sprev, end)) < s)
sprev += len; sprev += len;
MOP_OUT; MOP_OUT;
@ -2224,7 +2224,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE(pstart, swork, n, is_fail); STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue; if (is_fail) continue;
s = swork; s = swork;
while (sprev + (len = enc_len(encode, sprev)) < s) while (sprev + (len = enc_len(encode, sprev, end)) < s)
sprev += len; sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1)); p += (SIZE_MEMNUM * (tlen - i - 1));
@ -2263,7 +2263,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue; if (is_fail) continue;
s = swork; s = swork;
while (sprev + (len = enc_len(encode, sprev)) < s) while (sprev + (len = enc_len(encode, sprev, end)) < s)
sprev += len; sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1)); p += (SIZE_MEMNUM * (tlen - i - 1));
@ -2289,7 +2289,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s; sprev = s;
if (backref_match_at_nested_level(reg, stk, stk_base, ic if (backref_match_at_nested_level(reg, stk, stk_base, ic
, case_fold_flag, (int )level, (int )tlen, p, &s, end)) { , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
while (sprev + (len = enc_len(encode, sprev)) < s) while (sprev + (len = enc_len(encode, sprev, end)) < s)
sprev += len; sprev += len;
p += (SIZE_MEMNUM * tlen); p += (SIZE_MEMNUM * tlen);
@ -2760,7 +2760,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end) if (t == target_end)
return s; return s;
} }
s += enc_len(enc, s); s += enc_len(enc, s, end);
} }
return (UChar* )NULL; return (UChar* )NULL;
@ -2805,7 +2805,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
s, text_end)) s, text_end))
return s; return s;
s += enc_len(enc, s); s += enc_len(enc, s, text_end);
} }
return (UChar* )NULL; return (UChar* )NULL;
@ -2903,7 +2903,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[*se]; skip = reg->map[*se];
t = s; t = s;
do { do {
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
} while ((s - t) < skip && s < end); } while ((s - t) < skip && s < end);
} }
} }
@ -2919,7 +2919,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[*se]; skip = reg->int_map[*se];
t = s; t = s;
do { do {
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
} while ((s - t) < skip && s < end); } while ((s - t) < skip && s < end);
} }
} }
@ -3024,7 +3024,7 @@ map_search(OnigEncoding enc, UChar map[],
while (s < text_range) { while (s < text_range) {
if (map[*s]) return (UChar* )s; if (map[*s]) return (UChar* )s;
s += enc_len(enc, s); s += enc_len(enc, s, text_range);
} }
return (UChar* )NULL; return (UChar* )NULL;
} }
@ -3127,7 +3127,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
} }
else { else {
UChar *q = p + reg->dmin; UChar *q = p + reg->dmin;
while (p < q) p += enc_len(reg->enc, p); while (p < q) p += enc_len(reg->enc, p, end);
} }
} }
@ -3158,7 +3158,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
if (p - reg->dmin < s) { if (p - reg->dmin < s) {
retry_gate: retry_gate:
pprev = p; pprev = p;
p += enc_len(reg->enc, p); p += enc_len(reg->enc, p, end);
goto retry; goto retry;
} }
@ -3604,7 +3604,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
while (s <= high) { while (s <= high) {
MATCH_AND_RETURN_CHECK(orig_range); MATCH_AND_RETURN_CHECK(orig_range);
prev = s; prev = s;
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
} }
} while (s < range); } while (s < range);
goto mismatch; goto mismatch;
@ -3617,11 +3617,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do { do {
MATCH_AND_RETURN_CHECK(orig_range); MATCH_AND_RETURN_CHECK(orig_range);
prev = s; prev = s;
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
prev = s; prev = s;
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
} }
} while (s < range); } while (s < range);
goto mismatch; goto mismatch;
@ -3632,7 +3632,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do { do {
MATCH_AND_RETURN_CHECK(orig_range); MATCH_AND_RETURN_CHECK(orig_range);
prev = s; prev = s;
s += enc_len(reg->enc, s); s += enc_len(reg->enc, s, end);
} while (s < range); } while (s < range);
if (s == range) { /* because empty match with /$/. */ if (s == range) { /* because empty match with /$/. */

View file

@ -246,12 +246,12 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
#define PUNFETCH p = pfetch_prev #define PUNFETCH p = pfetch_prev
#define PINC do { \ #define PINC do { \
pfetch_prev = p; \ pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p); \ p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
} while (0) } while (0)
#define PFETCH(c) do { \ #define PFETCH(c) do { \
c = ONIGENC_MBC_TO_CODE(enc, p, end); \ c = ONIGENC_MBC_TO_CODE(enc, p, end); \
pfetch_prev = p; \ pfetch_prev = p; \
p += ONIGENC_MBC_ENC_LEN(enc, p); \ p += ONIGENC_MBC_ENC_LEN(enc, p, end); \
} while (0) } while (0)
#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
@ -1539,7 +1539,7 @@ static int
str_node_can_be_split(StrNode* sn, OnigEncoding enc) str_node_can_be_split(StrNode* sn, OnigEncoding enc)
{ {
if (sn->end > sn->s) { if (sn->end > sn->s) {
return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0); return ((enc_len(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
} }
return 0; return 0;
} }
@ -2733,12 +2733,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) { while (p < to) {
x = ONIGENC_MBC_TO_CODE(enc, p, to); x = ONIGENC_MBC_TO_CODE(enc, p, to);
q = p + enc_len(enc, p); q = p + enc_len(enc, p, to);
if (x == s[0]) { if (x == s[0]) {
for (i = 1; i < n && q < to; i++) { for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to); x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break; if (x != s[i]) break;
q += enc_len(enc, q); q += enc_len(enc, q, to);
} }
if (i >= n) { if (i >= n) {
if (IS_NOT_NULL(next)) if (IS_NOT_NULL(next))
@ -2764,19 +2764,19 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) { while (p < to) {
if (in_esc) { if (in_esc) {
in_esc = 0; in_esc = 0;
p += enc_len(enc, p); p += enc_len(enc, p, to);
} }
else { else {
x = ONIGENC_MBC_TO_CODE(enc, p, to); x = ONIGENC_MBC_TO_CODE(enc, p, to);
q = p + enc_len(enc, p); q = p + enc_len(enc, p, to);
if (x == s[0]) { if (x == s[0]) {
for (i = 1; i < n && q < to; i++) { for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to); x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break; if (x != s[i]) break;
q += enc_len(enc, q); q += enc_len(enc, q, to);
} }
if (i >= n) return 1; if (i >= n) return 1;
p += enc_len(enc, p); p += enc_len(enc, p, to);
} }
else { else {
x = ONIGENC_MBC_TO_CODE(enc, p, to); x = ONIGENC_MBC_TO_CODE(enc, p, to);
@ -2904,7 +2904,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
} }
if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { if (p > prev + enc_len(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
PINC; PINC;
tok->type = TK_CODE_POINT; tok->type = TK_CODE_POINT;
tok->base = 16; tok->base = 16;
@ -3244,7 +3244,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
} }
if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { if ((p > prev + enc_len(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
PINC; PINC;
tok->type = TK_CODE_POINT; tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )num; tok->u.code = (OnigCodePoint )num;
@ -3443,7 +3443,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.code = (OnigCodePoint )num; tok->u.code = (OnigCodePoint )num;
} }
else { /* string */ else { /* string */
p = tok->backp + enc_len(enc, tok->backp); p = tok->backp + enc_len(enc, tok->backp, end);
} }
break; break;
} }
@ -4120,7 +4120,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
goto err; goto err;
} }
len = enc_len(env->enc, buf); len = enc_len(env->enc, buf, buf+i);
if (i < len) { if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
goto err; goto err;
@ -4927,7 +4927,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
len = 1; len = 1;
while (1) { while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
if (len == enc_len(env->enc, NSTR(*np)->s)) { if (len == enc_len(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
r = fetch_token(tok, src, end, env); r = fetch_token(tok, src, end, env);
NSTRING_CLEAR_RAW(*np); NSTRING_CLEAR_RAW(*np);
goto string_end; goto string_end;

8
sjis.c
View file

@ -71,7 +71,7 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] #define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static int static int
mbc_enc_len(const UChar* p) mbc_enc_len(const UChar* p, const UChar* e)
{ {
return EncLen_SJIS[*p]; return EncLen_SJIS[*p];
} }
@ -98,7 +98,7 @@ mbc_to_code(const UChar* p, const UChar* end)
int c, i, len; int c, i, len;
OnigCodePoint n; OnigCodePoint n;
len = enc_len(ONIG_ENCODING_SJIS, p); len = enc_len(ONIG_ENCODING_SJIS, p, end);
c = *p++; c = *p++;
n = c; n = c;
if (len == 1) return n; if (len == 1) return n;
@ -139,7 +139,7 @@ mbc_case_fold(OnigCaseFoldType flag,
} }
else { else {
int i; int i;
int len = enc_len(ONIG_ENCODING_SJIS, p); int len = enc_len(ONIG_ENCODING_SJIS, p, end);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
*lower++ = *p++; *lower++ = *p++;
@ -192,7 +192,7 @@ left_adjust_char_head(const UChar* start, const UChar* s)
} }
} }
} }
len = enc_len(ONIG_ENCODING_SJIS, p); len = enc_len(ONIG_ENCODING_SJIS, p, s);
if (p + len > s) return (UChar* )p; if (p + len > s) return (UChar* )p;
p += len; p += len;
return (UChar* )(p + ((s - p) & ~1)); return (UChar* )(p + ((s - p) & ~1));

View file

@ -707,7 +707,7 @@ str_sublen(VALUE str, long pos, rb_encoding *enc)
i = 0; i = 0;
while (p < e) { while (p < e) {
p += rb_enc_mbclen(p, enc); p += rb_enc_mbclen(p, e, enc);
i++; i++;
} }
return i; return i;
@ -2375,7 +2375,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang)
* in order to prevent infinite loops. * in order to prevent infinite loops.
*/ */
if (RSTRING_LEN(str) <= END(0)) break; if (RSTRING_LEN(str) <= END(0)) break;
len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), enc); len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), RSTRING_END(str), enc);
memcpy(bp, RSTRING_PTR(str)+END(0), len); memcpy(bp, RSTRING_PTR(str)+END(0), len);
bp += len; bp += len;
offset = END(0) + len; offset = END(0) + len;
@ -2595,7 +2595,7 @@ rb_str_reverse(VALUE str)
} }
else { else {
while (s < e) { while (s < e) {
int clen = rb_enc_mbclen(s, enc); int clen = rb_enc_mbclen(s, e, enc);
if (clen == 0) { if (clen == 0) {
rb_raise(rb_eArgError, "invalid mbstring sequence"); rb_raise(rb_eArgError, "invalid mbstring sequence");
@ -3861,11 +3861,13 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
} }
else if (last_null == 1) { else if (last_null == 1) {
rb_ary_push(result, rb_str_subseq(str, beg, rb_ary_push(result, rb_str_subseq(str, beg,
rb_enc_mbclen(RSTRING_PTR(str)+beg,enc))); rb_enc_mbclen(RSTRING_PTR(str)+beg,
RSTRING_END(str),
enc)));
beg = start; beg = start;
} }
else { else {
start += rb_enc_mbclen(RSTRING_PTR(str)+start,enc); start += rb_enc_mbclen(RSTRING_PTR(str)+start,RSTRING_END(str),enc);
last_null = 1; last_null = 1;
continue; continue;
} }
@ -4455,7 +4457,8 @@ scan_once(VALUE str, VALUE pat, long *start)
* Always consume at least one character of the input string * Always consume at least one character of the input string
*/ */
if (RSTRING_LEN(str) > END(0)) if (RSTRING_LEN(str) > END(0))
*start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),enc); *start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),
RSTRING_END(str), enc);
else else
*start = END(0)+1; *start = END(0)+1;
} }

View file

@ -10832,7 +10832,7 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
if (len >= PROPERTY_NAME_MAX_SIZE) if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME; return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
p += enc_len(enc, p); p += enc_len(enc, p, end);
} }
buf[len] = 0; buf[len] = 0;
@ -10963,7 +10963,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
if (CaseFoldInited == 0) init_case_fold_table(); if (CaseFoldInited == 0) init_case_fold_table();
code = ONIGENC_MBC_TO_CODE(enc, p, end); code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enc_len(enc, p); len = enc_len(enc, p, end);
*pp += len; *pp += len;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
@ -11155,7 +11155,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
n = 0; n = 0;
code = ONIGENC_MBC_TO_CODE(enc, p, end); code = ONIGENC_MBC_TO_CODE(enc, p, end);
len = enc_len(enc, p); len = enc_len(enc, p, end);
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
@ -11304,7 +11304,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
else else
codes[1] = code; codes[1] = code;
clen = enc_len(enc, p); clen = enc_len(enc, p, end);
len += clen; len += clen;
if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) { for (i = 0; i < z2->n; i++) {
@ -11325,7 +11325,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
else else
codes[2] = code; codes[2] = code;
clen = enc_len(enc, p); clen = enc_len(enc, p, end);
len += clen; len += clen;
if (onig_st_lookup(Unfold3Table, (st_data_t )codes, if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
(void* )&z2) != 0) { (void* )&z2) != 0) {

4
utf8.c
View file

@ -60,7 +60,7 @@ static const int EncLen_UTF8[] = {
}; };
static int static int
utf8_mbc_enc_len(const UChar* p) utf8_mbc_enc_len(const UChar* p, const UChar* e)
{ {
return EncLen_UTF8[*p]; return EncLen_UTF8[*p];
} }
@ -96,7 +96,7 @@ utf8_mbc_to_code(const UChar* p, const UChar* end)
int c, len; int c, len;
OnigCodePoint n; OnigCodePoint n;
len = enc_len(ONIG_ENCODING_UTF8, p); len = enc_len(ONIG_ENCODING_UTF8, p, end);
c = *p++; c = *p++;
if (len > 1) { if (len > 1) {
len--; len--;