diff --git a/changelogs/unreleased/feature-gb-add-regexp-variables-expression.yml b/changelogs/unreleased/feature-gb-add-regexp-variables-expression.yml new file mode 100644 index 00000000000..d77c5b42497 --- /dev/null +++ b/changelogs/unreleased/feature-gb-add-regexp-variables-expression.yml @@ -0,0 +1,5 @@ +--- +title: Add support for variables expression pattern matching syntax +merge_request: 18902 +author: +type: added diff --git a/doc/ci/variables/README.md b/doc/ci/variables/README.md index 42367bf13f7..f66b2b374ba 100644 --- a/doc/ci/variables/README.md +++ b/doc/ci/variables/README.md @@ -530,6 +530,16 @@ Below you can find supported syntax reference: `$STAGING` value needs to a string, with length higher than zero. Variable that contains only whitespace characters is not an empty variable. +1. Pattern matching _(added in 11.0)_ + + > Example: `$VARIABLE =~ /^content.*/` + + It is possible perform pattern matching against a variable and regular + expression. Expression like this evaluates to truth if matches are found. + + Pattern matching is case-sensitive by default. Use `i` flag modifier, like + `/pattern/i` to make a pattern case-insensitive. + ### Unsupported predefined variables Because GitLab evaluates variables before creating jobs, we do not support a diff --git a/doc/ci/yaml/README.md b/doc/ci/yaml/README.md index 2a17a51d7f8..3e77a6f58b7 100644 --- a/doc/ci/yaml/README.md +++ b/doc/ci/yaml/README.md @@ -344,10 +344,11 @@ job: kubernetes: active ``` -Example of using variables expressions: +Examples of using variables expressions: ```yaml deploy: + script: cap staging deploy only: refs: - branches @@ -356,6 +357,16 @@ deploy: - $STAGING ``` +Another use case is exluding jobs depending on a commit message _(added in 11.0)_: + +```yaml +end-to-end: + script: rake test:end-to-end + except: + variables: + - $CI_COMMIT_MESSAGE =~ /skip-end-to-end-tests/ +``` + Learn more about variables expressions on [a separate page][variables-expressions]. ## `tags` diff --git a/lib/gitlab/ci/pipeline/expression.rb b/lib/gitlab/ci/pipeline/expression.rb new file mode 100644 index 00000000000..f57df7c5637 --- /dev/null +++ b/lib/gitlab/ci/pipeline/expression.rb @@ -0,0 +1,10 @@ +module Gitlab + module Ci + module Pipeline + module Expression + ExpressionError = Class.new(StandardError) + RuntimeError = Class.new(ExpressionError) + end + end + end +end diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/matches.rb b/lib/gitlab/ci/pipeline/expression/lexeme/matches.rb new file mode 100644 index 00000000000..10957598f76 --- /dev/null +++ b/lib/gitlab/ci/pipeline/expression/lexeme/matches.rb @@ -0,0 +1,29 @@ +module Gitlab + module Ci + module Pipeline + module Expression + module Lexeme + class Matches < Lexeme::Operator + PATTERN = /=~/.freeze + + def initialize(left, right) + @left = left + @right = right + end + + def evaluate(variables = {}) + text = @left.evaluate(variables) + regexp = @right.evaluate(variables) + + regexp.scan(text.to_s).any? + end + + def self.build(_value, behind, ahead) + new(behind, ahead) + end + end + end + end + end + end +end diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb new file mode 100644 index 00000000000..9b239c29ea4 --- /dev/null +++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb @@ -0,0 +1,33 @@ +module Gitlab + module Ci + module Pipeline + module Expression + module Lexeme + require_dependency 're2' + + class Pattern < Lexeme::Value + PATTERN = %r{^/.+/[ismU]*$}.freeze + + def initialize(regexp) + @value = regexp + + unless Gitlab::UntrustedRegexp.valid?(@value) + raise Lexer::SyntaxError, 'Invalid regular expression!' + end + end + + def evaluate(variables = {}) + Gitlab::UntrustedRegexp.fabricate(@value) + rescue RegexpError + raise Expression::RuntimeError, 'Invalid regular expression!' + end + + def self.build(string) + new(string) + end + end + end + end + end + end +end diff --git a/lib/gitlab/ci/pipeline/expression/lexer.rb b/lib/gitlab/ci/pipeline/expression/lexer.rb index e1c68b7c3c2..4cacb1e62c9 100644 --- a/lib/gitlab/ci/pipeline/expression/lexer.rb +++ b/lib/gitlab/ci/pipeline/expression/lexer.rb @@ -5,15 +5,17 @@ module Gitlab class Lexer include ::Gitlab::Utils::StrongMemoize + SyntaxError = Class.new(Expression::ExpressionError) + LEXEMES = [ Expression::Lexeme::Variable, Expression::Lexeme::String, + Expression::Lexeme::Pattern, Expression::Lexeme::Null, - Expression::Lexeme::Equals + Expression::Lexeme::Equals, + Expression::Lexeme::Matches ].freeze - SyntaxError = Class.new(Statement::StatementError) - MAX_TOKENS = 100 def initialize(statement, max_tokens: MAX_TOKENS) diff --git a/lib/gitlab/ci/pipeline/expression/statement.rb b/lib/gitlab/ci/pipeline/expression/statement.rb index 09a7c98464b..b36f1e0f865 100644 --- a/lib/gitlab/ci/pipeline/expression/statement.rb +++ b/lib/gitlab/ci/pipeline/expression/statement.rb @@ -3,15 +3,16 @@ module Gitlab module Pipeline module Expression class Statement - StatementError = Class.new(StandardError) + StatementError = Class.new(Expression::ExpressionError) GRAMMAR = [ + %w[variable], %w[variable equals string], %w[variable equals variable], %w[variable equals null], %w[string equals variable], %w[null equals variable], - %w[variable] + %w[variable matches pattern] ].freeze def initialize(statement, variables = {}) @@ -35,11 +36,13 @@ module Gitlab def truthful? evaluate.present? + rescue Expression::ExpressionError + false end def valid? parse_tree.is_a?(Lexeme::Base) - rescue StatementError + rescue Expression::ExpressionError false end end diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb index 75ba0799058..dc2d91dfa23 100644 --- a/lib/gitlab/untrusted_regexp.rb +++ b/lib/gitlab/untrusted_regexp.rb @@ -9,7 +9,9 @@ module Gitlab # there is a strict limit on total execution time. See the RE2 documentation # at https://github.com/google/re2/wiki/Syntax for more details. class UntrustedRegexp - delegate :===, to: :regexp + require_dependency 're2' + + delegate :===, :source, to: :regexp def initialize(pattern, multiline: false) if multiline @@ -35,6 +37,10 @@ module Gitlab RE2.Replace(text, regexp, rewrite) end + def ==(other) + self.source == other.source + end + # Handles regular expressions with the preferred RE2 library where possible # via UntustedRegex. Falls back to Ruby's built-in regular expression library # when the syntax would be invalid in RE2. @@ -48,6 +54,24 @@ module Gitlab Regexp.new(pattern) end + def self.valid?(pattern) + !!self.fabricate(pattern) + rescue RegexpError + false + end + + def self.fabricate(pattern) + matches = pattern.match(%r{^/(?.+)/(?[ismU]*)$}) + + raise RegexpError, 'Invalid regular expression!' if matches.nil? + + expression = matches[:regexp] + flags = matches[:flags] + expression.prepend("(?#{flags})") if flags.present? + + self.new(expression, multiline: false) + end + private attr_reader :regexp diff --git a/spec/lib/gitlab/ci/config/entry/policy_spec.rb b/spec/lib/gitlab/ci/config/entry/policy_spec.rb index 08718c382b9..83d39b82068 100644 --- a/spec/lib/gitlab/ci/config/entry/policy_spec.rb +++ b/spec/lib/gitlab/ci/config/entry/policy_spec.rb @@ -111,7 +111,15 @@ describe Gitlab::Ci::Config::Entry::Policy do context 'when specifying invalid variables expressions token' do let(:config) { { variables: ['$MY_VAR == 123'] } } - it 'reports an error about invalid statement' do + it 'reports an error about invalid expression' do + expect(entry.errors).to include /invalid expression syntax/ + end + end + + context 'when using invalid variables expressions regexp' do + let(:config) { { variables: ['$MY_VAR =~ /some ( thing/'] } } + + it 'reports an error about invalid expression' do expect(entry.errors).to include /invalid expression syntax/ end end diff --git a/spec/lib/gitlab/ci/pipeline/expression/lexeme/matches_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/lexeme/matches_spec.rb new file mode 100644 index 00000000000..49e5af52f4d --- /dev/null +++ b/spec/lib/gitlab/ci/pipeline/expression/lexeme/matches_spec.rb @@ -0,0 +1,80 @@ +require 'fast_spec_helper' +require_dependency 're2' + +describe Gitlab::Ci::Pipeline::Expression::Lexeme::Matches do + let(:left) { double('left') } + let(:right) { double('right') } + + describe '.build' do + it 'creates a new instance of the token' do + expect(described_class.build('=~', left, right)) + .to be_a(described_class) + end + end + + describe '.type' do + it 'is an operator' do + expect(described_class.type).to eq :operator + end + end + + describe '#evaluate' do + it 'returns false when left and right do not match' do + allow(left).to receive(:evaluate).and_return('my-string') + allow(right).to receive(:evaluate) + .and_return(Gitlab::UntrustedRegexp.new('something')) + + operator = described_class.new(left, right) + + expect(operator.evaluate).to eq false + end + + it 'returns true when left and right match' do + allow(left).to receive(:evaluate).and_return('my-awesome-string') + allow(right).to receive(:evaluate) + .and_return(Gitlab::UntrustedRegexp.new('awesome.string$')) + + operator = described_class.new(left, right) + + expect(operator.evaluate).to eq true + end + + it 'supports matching against a nil value' do + allow(left).to receive(:evaluate).and_return(nil) + allow(right).to receive(:evaluate) + .and_return(Gitlab::UntrustedRegexp.new('pattern')) + + operator = described_class.new(left, right) + + expect(operator.evaluate).to eq false + end + + it 'supports multiline strings' do + allow(left).to receive(:evaluate).and_return <<~TEXT + My awesome contents + + My-text-string! + TEXT + + allow(right).to receive(:evaluate) + .and_return(Gitlab::UntrustedRegexp.new('text-string')) + + operator = described_class.new(left, right) + + expect(operator.evaluate).to eq true + end + + it 'supports regexp flags' do + allow(left).to receive(:evaluate).and_return <<~TEXT + My AWESOME content + TEXT + + allow(right).to receive(:evaluate) + .and_return(Gitlab::UntrustedRegexp.new('(?i)awesome')) + + operator = described_class.new(left, right) + + expect(operator.evaluate).to eq true + end + end +end diff --git a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb new file mode 100644 index 00000000000..3ebc2e94727 --- /dev/null +++ b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb @@ -0,0 +1,96 @@ +require 'fast_spec_helper' + +describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do + describe '.build' do + it 'creates a new instance of the token' do + expect(described_class.build('/.*/')) + .to be_a(described_class) + end + + it 'raises an error if pattern is invalid' do + expect { described_class.build('/ some ( thin/i') } + .to raise_error(Gitlab::Ci::Pipeline::Expression::Lexer::SyntaxError) + end + end + + describe '.type' do + it 'is a value lexeme' do + expect(described_class.type).to eq :value + end + end + + describe '.scan' do + it 'correctly identifies a pattern token' do + scanner = StringScanner.new('/pattern/') + + token = described_class.scan(scanner) + + expect(token).not_to be_nil + expect(token.build.evaluate) + .to eq Gitlab::UntrustedRegexp.new('pattern') + end + + it 'is a greedy scanner for regexp boundaries' do + scanner = StringScanner.new('/some .* / pattern/') + + token = described_class.scan(scanner) + + expect(token).not_to be_nil + expect(token.build.evaluate) + .to eq Gitlab::UntrustedRegexp.new('some .* / pattern') + end + + it 'does not allow to use an empty pattern' do + scanner = StringScanner.new(%(//)) + + token = described_class.scan(scanner) + + expect(token).to be_nil + end + + it 'support single flag' do + scanner = StringScanner.new('/pattern/i') + + token = described_class.scan(scanner) + + expect(token).not_to be_nil + expect(token.build.evaluate) + .to eq Gitlab::UntrustedRegexp.new('(?i)pattern') + end + + it 'support multiple flags' do + scanner = StringScanner.new('/pattern/im') + + token = described_class.scan(scanner) + + expect(token).not_to be_nil + expect(token.build.evaluate) + .to eq Gitlab::UntrustedRegexp.new('(?im)pattern') + end + + it 'does not support arbitrary flags' do + scanner = StringScanner.new('/pattern/x') + + token = described_class.scan(scanner) + + expect(token).to be_nil + end + end + + describe '#evaluate' do + it 'returns a regular expression' do + regexp = described_class.new('/abc/') + + expect(regexp.evaluate).to eq Gitlab::UntrustedRegexp.new('abc') + end + + it 'raises error if evaluated regexp is not valid' do + allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true) + + regexp = described_class.new('/invalid ( .*/') + + expect { regexp.evaluate } + .to raise_error(Gitlab::Ci::Pipeline::Expression::RuntimeError) + end + end +end diff --git a/spec/lib/gitlab/ci/pipeline/expression/lexer_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/lexer_spec.rb index 230ceeb07f8..3f11b3f7673 100644 --- a/spec/lib/gitlab/ci/pipeline/expression/lexer_spec.rb +++ b/spec/lib/gitlab/ci/pipeline/expression/lexer_spec.rb @@ -6,7 +6,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do end describe '#tokens' do - it 'tokenss single value' do + it 'returns single value' do tokens = described_class.new('$VARIABLE').tokens expect(tokens).to be_one @@ -20,14 +20,14 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do expect(tokens).to all(be_an_instance_of(token_class)) end - it 'tokenss multiple values of the same token' do + it 'returns multiple values of the same token' do tokens = described_class.new("$VARIABLE1 $VARIABLE2").tokens expect(tokens.size).to eq 2 expect(tokens).to all(be_an_instance_of(token_class)) end - it 'tokenss multiple values with different tokens' do + it 'returns multiple values with different tokens' do tokens = described_class.new('$VARIABLE "text" "value"').tokens expect(tokens.size).to eq 3 @@ -36,7 +36,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do expect(tokens.third.value).to eq '"value"' end - it 'tokenss tokens and operators' do + it 'returns tokens and operators' do tokens = described_class.new('$VARIABLE == "text"').tokens expect(tokens.size).to eq 3 diff --git a/spec/lib/gitlab/ci/pipeline/expression/parser_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/parser_spec.rb index e8e6f585310..2b78b1dd4a7 100644 --- a/spec/lib/gitlab/ci/pipeline/expression/parser_spec.rb +++ b/spec/lib/gitlab/ci/pipeline/expression/parser_spec.rb @@ -1,4 +1,4 @@ -require 'spec_helper' +require 'fast_spec_helper' describe Gitlab::Ci::Pipeline::Expression::Parser do describe '#tree' do diff --git a/spec/lib/gitlab/ci/pipeline/expression/statement_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/statement_spec.rb index 6685bf5385b..11e73294f18 100644 --- a/spec/lib/gitlab/ci/pipeline/expression/statement_spec.rb +++ b/spec/lib/gitlab/ci/pipeline/expression/statement_spec.rb @@ -1,4 +1,5 @@ -require 'spec_helper' +require 'fast_spec_helper' +require 'rspec-parameterized' describe Gitlab::Ci::Pipeline::Expression::Statement do subject do @@ -36,7 +37,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do '== "123"', # invalid left side '"some string"', # only string provided '$VAR ==', # invalid right side - '12345', # unknown syntax + 'null', # missing lexemes '' # empty statement ] @@ -44,7 +45,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do context "when expression grammar is #{syntax.inspect}" do let(:text) { syntax } - it 'aises a statement error exception' do + it 'raises a statement error exception' do expect { subject.parse_tree } .to raise_error described_class::StatementError end @@ -82,48 +83,66 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do end describe '#evaluate' do - statements = [ - ['$PRESENT_VARIABLE == "my variable"', true], - ["$PRESENT_VARIABLE == 'my variable'", true], - ['"my variable" == $PRESENT_VARIABLE', true], - ['$PRESENT_VARIABLE == null', false], - ['$EMPTY_VARIABLE == null', false], - ['"" == $EMPTY_VARIABLE', true], - ['$EMPTY_VARIABLE', ''], - ['$UNDEFINED_VARIABLE == null', true], - ['null == $UNDEFINED_VARIABLE', true], - ['$PRESENT_VARIABLE', 'my variable'], - ['$UNDEFINED_VARIABLE', nil] - ] + using RSpec::Parameterized::TableSyntax - statements.each do |expression, value| - context "when using expression `#{expression}`" do - let(:text) { expression } + where(:expression, :value) do + '$PRESENT_VARIABLE == "my variable"' | true + '"my variable" == $PRESENT_VARIABLE' | true + '$PRESENT_VARIABLE == null' | false + '$EMPTY_VARIABLE == null' | false + '"" == $EMPTY_VARIABLE' | true + '$EMPTY_VARIABLE' | '' + '$UNDEFINED_VARIABLE == null' | true + 'null == $UNDEFINED_VARIABLE' | true + '$PRESENT_VARIABLE' | 'my variable' + '$UNDEFINED_VARIABLE' | nil + "$PRESENT_VARIABLE =~ /var.*e$/" | true + "$PRESENT_VARIABLE =~ /^var.*/" | false + "$EMPTY_VARIABLE =~ /var.*/" | false + "$UNDEFINED_VARIABLE =~ /var.*/" | false + "$PRESENT_VARIABLE =~ /VAR.*/i" | true + end - it "evaluates to `#{value.inspect}`" do - expect(subject.evaluate).to eq value - end + with_them do + let(:text) { expression } + + it "evaluates to `#{params[:value].inspect}`" do + expect(subject.evaluate).to eq value end end end describe '#truthful?' do - statements = [ - ['$PRESENT_VARIABLE == "my variable"', true], - ["$PRESENT_VARIABLE == 'no match'", false], - ['$UNDEFINED_VARIABLE == null', true], - ['$PRESENT_VARIABLE', true], - ['$UNDEFINED_VARIABLE', false], - ['$EMPTY_VARIABLE', false] - ] + using RSpec::Parameterized::TableSyntax - statements.each do |expression, value| - context "when using expression `#{expression}`" do - let(:text) { expression } + where(:expression, :value) do + '$PRESENT_VARIABLE == "my variable"' | true + "$PRESENT_VARIABLE == 'no match'" | false + '$UNDEFINED_VARIABLE == null' | true + '$PRESENT_VARIABLE' | true + '$UNDEFINED_VARIABLE' | false + '$EMPTY_VARIABLE' | false + '$INVALID = 1' | false + "$PRESENT_VARIABLE =~ /var.*/" | true + "$UNDEFINED_VARIABLE =~ /var.*/" | false + end - it "returns `#{value.inspect}`" do - expect(subject.truthful?).to eq value - end + with_them do + let(:text) { expression } + + it "returns `#{params[:value].inspect}`" do + expect(subject.truthful?).to eq value + end + end + + context 'when evaluating expression raises an error' do + let(:text) { '$PRESENT_VARIABLE' } + + it 'returns false' do + allow(subject).to receive(:evaluate) + .and_raise(described_class::StatementError) + + expect(subject.truthful?).to be_falsey end end end diff --git a/spec/lib/gitlab/ci/pipeline/expression/token_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/token_spec.rb index 6d7453f0de5..cedfe270f9d 100644 --- a/spec/lib/gitlab/ci/pipeline/expression/token_spec.rb +++ b/spec/lib/gitlab/ci/pipeline/expression/token_spec.rb @@ -1,4 +1,4 @@ -require 'spec_helper' +require 'fast_spec_helper' describe Gitlab::Ci::Pipeline::Expression::Token do let(:value) { '$VARIABLE' } diff --git a/spec/lib/gitlab/untrusted_regexp_spec.rb b/spec/lib/gitlab/untrusted_regexp_spec.rb index 0ee7fa1e570..0a6ac0aa294 100644 --- a/spec/lib/gitlab/untrusted_regexp_spec.rb +++ b/spec/lib/gitlab/untrusted_regexp_spec.rb @@ -1,6 +1,49 @@ -require 'spec_helper' +require 'fast_spec_helper' +require 'support/shared_examples/malicious_regexp_shared_examples' describe Gitlab::UntrustedRegexp do + describe '.valid?' do + it 'returns true if regexp is valid' do + expect(described_class.valid?('/some ( thing/')) + .to be false + end + + it 'returns true if regexp is invalid' do + expect(described_class.valid?('/some .* thing/')) + .to be true + end + end + + describe '.fabricate' do + context 'when regexp is using /regexp/ scheme with flags' do + it 'fabricates regexp with a single flag' do + regexp = described_class.fabricate('/something/i') + + expect(regexp).to eq described_class.new('(?i)something') + expect(regexp.scan('SOMETHING')).to be_one + end + + it 'fabricates regexp with multiple flags' do + regexp = described_class.fabricate('/something/im') + + expect(regexp).to eq described_class.new('(?im)something') + end + + it 'fabricates regexp without flags' do + regexp = described_class.fabricate('/something/') + + expect(regexp).to eq described_class.new('something') + end + end + + context 'when regexp is a raw pattern' do + it 'raises an error' do + expect { described_class.fabricate('some .* thing') } + .to raise_error(RegexpError) + end + end + end + describe '#initialize' do subject { described_class.new(pattern) } diff --git a/spec/support/shared_examples/malicious_regexp_shared_examples.rb b/spec/support/shared_examples/malicious_regexp_shared_examples.rb index ac5d22298bb..65026f1d7c0 100644 --- a/spec/support/shared_examples/malicious_regexp_shared_examples.rb +++ b/spec/support/shared_examples/malicious_regexp_shared_examples.rb @@ -1,3 +1,5 @@ +require 'timeout' + shared_examples 'malicious regexp' do let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' } let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }