diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb index 7ce2e9d636e..75ba0799058 100644 --- a/lib/gitlab/untrusted_regexp.rb +++ b/lib/gitlab/untrusted_regexp.rb @@ -11,7 +11,11 @@ module Gitlab class UntrustedRegexp delegate :===, to: :regexp - def initialize(pattern) + def initialize(pattern, multiline: false) + if multiline + pattern = "(?m)#{pattern}" + end + @regexp = RE2::Regexp.new(pattern, log_errors: false) raise RegexpError.new(regexp.error) unless regexp.ok? @@ -31,6 +35,19 @@ module Gitlab RE2.Replace(text, regexp, rewrite) end + # Handles regular expressions with the preferred RE2 library where possible + # via UntustedRegex. Falls back to Ruby's built-in regular expression library + # when the syntax would be invalid in RE2. + # + # One difference between these is `(?m)` multi-line mode. Ruby regex enables + # this by default, but also handles `^` and `$` differently. + # See: https://www.regular-expressions.info/modifiers.html + def self.with_fallback(pattern, multiline: false) + UntrustedRegexp.new(pattern, multiline: multiline) + rescue RegexpError + Regexp.new(pattern) + end + private attr_reader :regexp diff --git a/spec/lib/gitlab/untrusted_regexp_spec.rb b/spec/lib/gitlab/untrusted_regexp_spec.rb index bed58d407ef..0ee7fa1e570 100644 --- a/spec/lib/gitlab/untrusted_regexp_spec.rb +++ b/spec/lib/gitlab/untrusted_regexp_spec.rb @@ -39,6 +39,14 @@ describe Gitlab::UntrustedRegexp do expect(result).to be_falsy end + + it 'can handle regular expressions in multiline mode' do + regexp = described_class.new('^\d', multiline: true) + + result = regexp === "Header\n\n1. Content" + + expect(result).to be_truthy + end end describe '#scan' do