From 1d2d25dcadda0764f303183ac091d0c87b432566 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 25 Oct 2022 15:45:40 +0900 Subject: [PATCH] Prevent potential buffer overrun in onigmo A code pattern `p + enclen(enc, p, pend)` may lead to a buffer overrun if incomplete bytes of a UTF-8 character is placed at the end of a string. Because this pattern is used in several places in onigmo, this change fixes the issue in the side of `enclen`: the function should not return a number that is larger than `pend - p`. Co-Authored-By: Nobuyoshi Nakada --- include/ruby/onigmo.h | 4 ++-- regenc.c | 15 +++++++++++++++ regparse.c | 1 - 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index a7ef59c7c8..d71dfb80fb 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -356,9 +356,9 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) ONIG_EXTERN -int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); +int onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); -#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen(p,e,enc) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) diff --git a/regenc.c b/regenc.c index 16d62fdf40..fc131d2533 100644 --- a/regenc.c +++ b/regenc.c @@ -51,6 +51,21 @@ onigenc_set_default_encoding(OnigEncoding enc) return 0; } +extern int +onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) { + ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + if (ret > (int)(e - p)) ret = (int)(e - p); // just for case + return ret; + } + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) { + return (int)(e - p); + } + return p < e ? 1 : 0; +} + extern int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) { diff --git a/regparse.c b/regparse.c index 513e0a8c7a..4ebd5f1c46 100644 --- a/regparse.c +++ b/regparse.c @@ -3799,7 +3799,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } else { /* string */ p = tok->backp + enclen(enc, tok->backp, end); - if (p > end) return ONIGERR_END_PATTERN_AT_ESCAPE; } } break;