re.c: Add Regexp.timeout= and Regexp.timeout

[Feature #17837]
This commit is contained in:
Yusuke Endoh 2022-03-24 16:59:11 +09:00
parent 23530d68cb
commit ffc3b37f96
Notes: git 2022-03-30 16:51:10 +09:00
6 changed files with 125 additions and 0 deletions

View File

@ -793,6 +793,13 @@ typedef struct re_pattern_buffer {
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
/* rb_hrtime_t from hrtime.h */
#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
int128_t timelimit;
#else
uint64_t timelimit;
#endif
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
} OnigRegexType;

88
re.c
View File

@ -14,12 +14,14 @@
#include <ctype.h>
#include "encindex.h"
#include "hrtime.h"
#include "internal.h"
#include "internal/hash.h"
#include "internal/imemo.h"
#include "internal/re.h"
#include "internal/string.h"
#include "internal/variable.h"
#include "ractor_core.h"
#include "regint.h"
#include "ruby/encoding.h"
#include "ruby/re.h"
@ -1593,6 +1595,9 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
}
// inherit the timeout settings
rb_hrtime_t timelimit = reg->timelimit;
const char *ptr;
long len;
RSTRING_GETMEM(unescaped, ptr, len);
@ -1604,6 +1609,8 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
}
reg->timelimit = timelimit;
RB_GC_GUARD(unescaped);
return reg;
}
@ -4091,6 +4098,84 @@ re_warn(const char *s)
rb_warn("%s", s);
}
// The process-global timeout for regexp matching
rb_hrtime_t rb_reg_match_time_limit = 0;
// This function is periodically called during regexp matching
void
rb_reg_check_timeout(regex_t *reg, void *end_time_)
{
rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
if (*end_time == 0) {
// This is the first time to check interrupts;
// just measure the current time and determine the end time
// if timeout is set.
rb_hrtime_t timelimit = reg->timelimit;
if (!timelimit) {
// no per-object timeout.
timelimit = rb_reg_match_time_limit;
}
if (timelimit) {
*end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
}
else {
// no timeout is set
*end_time = RB_HRTIME_MAX;
}
}
else {
if (*end_time < rb_hrtime_now()) {
// timeout is exceeded
rb_raise(rb_eRuntimeError, "regexp match timeout");
}
}
}
/*
* call-seq:
* Regexp.timeout -> int or float or nil
*
* It returns the current default timeout interval for Regexp matching in second.
* +nil+ means no default timeout configuration.
*/
static VALUE
rb_reg_s_timeout_get(VALUE dummy)
{
double d = hrtime2double(rb_reg_match_time_limit);
if (d == 0.0) return Qnil;
return DBL2NUM(d);
}
/*
* call-seq:
* Regexp.timeout = int or float or nil
*
* It sets the default timeout interval for Regexp matching in second.
* +nil+ means no default timeout configuration.
* This configuration is process-global. If you want to set timeout for
* each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
*
* Regexp.timeout = 1
* /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
*/
static VALUE
rb_reg_s_timeout_set(VALUE dummy, VALUE limit)
{
double timeout = NIL_P(limit) ? 0.0 : NUM2DBL(limit);
rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors");
if (timeout < 0) timeout = 0;
double2hrtime(&rb_reg_match_time_limit, timeout);
return limit;
}
/*
* Document-class: RegexpError
*
@ -4170,6 +4255,9 @@ Init_Regexp(void)
rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
/* see Regexp.options and Regexp.new */
rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
/* see Regexp.options and Regexp.new */

View File

@ -5973,6 +5973,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
(reg)->name_table = (void* )NULL;
(reg)->case_fold_flag = case_fold_flag;
(reg)->timelimit = 0;
return 0;
}

View File

@ -422,6 +422,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).gpos = (arg_gpos);\
(msa).best_len = ONIG_MISMATCH;\
(msa).counter = 0;\
(msa).end_time = 0;\
} while(0)
#else
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
@ -431,6 +432,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from)
(msa).start = (arg_start);\
(msa).gpos = (arg_gpos);\
(msa).counter = 0;\
(msa).end_time = 0;\
} while(0)
#endif

View File

@ -152,6 +152,7 @@
msa->counter++; \
if (msa->counter >= 128) { \
msa->counter = 0; \
rb_reg_check_timeout(reg, &msa->end_time); \
rb_thread_check_ints(); \
} \
} while(0)
@ -877,6 +878,12 @@ typedef struct {
int state_check_buff_size;
#endif
int counter;
/* rb_hrtime_t from hrtime.h */
#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
int128_t end_time;
#else
uint64_t end_time;
#endif
} OnigMatchArg;
@ -942,6 +949,7 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
#ifdef RUBY
extern size_t onig_memsize(const regex_t *reg);
extern size_t onig_region_memsize(const struct re_registers *regs);
void rb_reg_check_timeout(regex_t *reg, void *end_time);
#endif
RUBY_SYMBOL_EXPORT_END

View File

@ -1457,4 +1457,21 @@ class TestRegexp < Test::Unit::TestCase
}
assert_empty(errs, msg)
end
def test_s_timeout
assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
begin;
Regexp.timeout = 0.2
assert_equal(0.2, Regexp.timeout)
t = Time.now
assert_raise_with_message(RuntimeError, "regexp match timeout") do
# A typical ReDoS case
/^(a*)*$/ =~ "a" * 1000000 + "x"
end
t = Time.now - t
assert_in_delta(0.2, t, 0.1)
end;
end
end