Merge branch 'feature/gb/add-regexp-variables-expression' into 'master'

Add support for variables expression regexp syntax

Closes #43601

See merge request gitlab-org/gitlab-ce!18902
This commit is contained in:
Kamil Trzciński 2018-05-18 12:08:12 +00:00
commit 46d5ab68b7
18 changed files with 427 additions and 52 deletions

View file

@ -0,0 +1,5 @@
---
title: Add support for variables expression pattern matching syntax
merge_request: 18902
author:
type: added

View file

@ -530,6 +530,16 @@ Below you can find supported syntax reference:
`$STAGING` value needs to a string, with length higher than zero.
Variable that contains only whitespace characters is not an empty variable.
1. Pattern matching _(added in 11.0)_
> Example: `$VARIABLE =~ /^content.*/`
It is possible perform pattern matching against a variable and regular
expression. Expression like this evaluates to truth if matches are found.
Pattern matching is case-sensitive by default. Use `i` flag modifier, like
`/pattern/i` to make a pattern case-insensitive.
### Unsupported predefined variables
Because GitLab evaluates variables before creating jobs, we do not support a

View file

@ -344,10 +344,11 @@ job:
kubernetes: active
```
Example of using variables expressions:
Examples of using variables expressions:
```yaml
deploy:
script: cap staging deploy
only:
refs:
- branches
@ -356,6 +357,16 @@ deploy:
- $STAGING
```
Another use case is exluding jobs depending on a commit message _(added in 11.0)_:
```yaml
end-to-end:
script: rake test:end-to-end
except:
variables:
- $CI_COMMIT_MESSAGE =~ /skip-end-to-end-tests/
```
Learn more about variables expressions on [a separate page][variables-expressions].
## `tags`

View file

@ -0,0 +1,10 @@
module Gitlab
module Ci
module Pipeline
module Expression
ExpressionError = Class.new(StandardError)
RuntimeError = Class.new(ExpressionError)
end
end
end
end

View file

@ -0,0 +1,29 @@
module Gitlab
module Ci
module Pipeline
module Expression
module Lexeme
class Matches < Lexeme::Operator
PATTERN = /=~/.freeze
def initialize(left, right)
@left = left
@right = right
end
def evaluate(variables = {})
text = @left.evaluate(variables)
regexp = @right.evaluate(variables)
regexp.scan(text.to_s).any?
end
def self.build(_value, behind, ahead)
new(behind, ahead)
end
end
end
end
end
end
end

View file

@ -0,0 +1,33 @@
module Gitlab
module Ci
module Pipeline
module Expression
module Lexeme
require_dependency 're2'
class Pattern < Lexeme::Value
PATTERN = %r{^/.+/[ismU]*$}.freeze
def initialize(regexp)
@value = regexp
unless Gitlab::UntrustedRegexp.valid?(@value)
raise Lexer::SyntaxError, 'Invalid regular expression!'
end
end
def evaluate(variables = {})
Gitlab::UntrustedRegexp.fabricate(@value)
rescue RegexpError
raise Expression::RuntimeError, 'Invalid regular expression!'
end
def self.build(string)
new(string)
end
end
end
end
end
end
end

View file

@ -5,15 +5,17 @@ module Gitlab
class Lexer
include ::Gitlab::Utils::StrongMemoize
SyntaxError = Class.new(Expression::ExpressionError)
LEXEMES = [
Expression::Lexeme::Variable,
Expression::Lexeme::String,
Expression::Lexeme::Pattern,
Expression::Lexeme::Null,
Expression::Lexeme::Equals
Expression::Lexeme::Equals,
Expression::Lexeme::Matches
].freeze
SyntaxError = Class.new(Statement::StatementError)
MAX_TOKENS = 100
def initialize(statement, max_tokens: MAX_TOKENS)

View file

@ -3,15 +3,16 @@ module Gitlab
module Pipeline
module Expression
class Statement
StatementError = Class.new(StandardError)
StatementError = Class.new(Expression::ExpressionError)
GRAMMAR = [
%w[variable],
%w[variable equals string],
%w[variable equals variable],
%w[variable equals null],
%w[string equals variable],
%w[null equals variable],
%w[variable]
%w[variable matches pattern]
].freeze
def initialize(statement, variables = {})
@ -35,11 +36,13 @@ module Gitlab
def truthful?
evaluate.present?
rescue Expression::ExpressionError
false
end
def valid?
parse_tree.is_a?(Lexeme::Base)
rescue StatementError
rescue Expression::ExpressionError
false
end
end

View file

@ -9,7 +9,9 @@ module Gitlab
# there is a strict limit on total execution time. See the RE2 documentation
# at https://github.com/google/re2/wiki/Syntax for more details.
class UntrustedRegexp
delegate :===, to: :regexp
require_dependency 're2'
delegate :===, :source, to: :regexp
def initialize(pattern, multiline: false)
if multiline
@ -35,6 +37,10 @@ module Gitlab
RE2.Replace(text, regexp, rewrite)
end
def ==(other)
self.source == other.source
end
# Handles regular expressions with the preferred RE2 library where possible
# via UntustedRegex. Falls back to Ruby's built-in regular expression library
# when the syntax would be invalid in RE2.
@ -48,6 +54,24 @@ module Gitlab
Regexp.new(pattern)
end
def self.valid?(pattern)
!!self.fabricate(pattern)
rescue RegexpError
false
end
def self.fabricate(pattern)
matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
raise RegexpError, 'Invalid regular expression!' if matches.nil?
expression = matches[:regexp]
flags = matches[:flags]
expression.prepend("(?#{flags})") if flags.present?
self.new(expression, multiline: false)
end
private
attr_reader :regexp

View file

@ -111,7 +111,15 @@ describe Gitlab::Ci::Config::Entry::Policy do
context 'when specifying invalid variables expressions token' do
let(:config) { { variables: ['$MY_VAR == 123'] } }
it 'reports an error about invalid statement' do
it 'reports an error about invalid expression' do
expect(entry.errors).to include /invalid expression syntax/
end
end
context 'when using invalid variables expressions regexp' do
let(:config) { { variables: ['$MY_VAR =~ /some ( thing/'] } }
it 'reports an error about invalid expression' do
expect(entry.errors).to include /invalid expression syntax/
end
end

View file

@ -0,0 +1,80 @@
require 'fast_spec_helper'
require_dependency 're2'
describe Gitlab::Ci::Pipeline::Expression::Lexeme::Matches do
let(:left) { double('left') }
let(:right) { double('right') }
describe '.build' do
it 'creates a new instance of the token' do
expect(described_class.build('=~', left, right))
.to be_a(described_class)
end
end
describe '.type' do
it 'is an operator' do
expect(described_class.type).to eq :operator
end
end
describe '#evaluate' do
it 'returns false when left and right do not match' do
allow(left).to receive(:evaluate).and_return('my-string')
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('something'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq false
end
it 'returns true when left and right match' do
allow(left).to receive(:evaluate).and_return('my-awesome-string')
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('awesome.string$'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
it 'supports matching against a nil value' do
allow(left).to receive(:evaluate).and_return(nil)
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('pattern'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq false
end
it 'supports multiline strings' do
allow(left).to receive(:evaluate).and_return <<~TEXT
My awesome contents
My-text-string!
TEXT
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('text-string'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
it 'supports regexp flags' do
allow(left).to receive(:evaluate).and_return <<~TEXT
My AWESOME content
TEXT
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('(?i)awesome'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
end
end

View file

@ -0,0 +1,96 @@
require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do
describe '.build' do
it 'creates a new instance of the token' do
expect(described_class.build('/.*/'))
.to be_a(described_class)
end
it 'raises an error if pattern is invalid' do
expect { described_class.build('/ some ( thin/i') }
.to raise_error(Gitlab::Ci::Pipeline::Expression::Lexer::SyntaxError)
end
end
describe '.type' do
it 'is a value lexeme' do
expect(described_class.type).to eq :value
end
end
describe '.scan' do
it 'correctly identifies a pattern token' do
scanner = StringScanner.new('/pattern/')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('pattern')
end
it 'is a greedy scanner for regexp boundaries' do
scanner = StringScanner.new('/some .* / pattern/')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('some .* / pattern')
end
it 'does not allow to use an empty pattern' do
scanner = StringScanner.new(%(//))
token = described_class.scan(scanner)
expect(token).to be_nil
end
it 'support single flag' do
scanner = StringScanner.new('/pattern/i')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('(?i)pattern')
end
it 'support multiple flags' do
scanner = StringScanner.new('/pattern/im')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('(?im)pattern')
end
it 'does not support arbitrary flags' do
scanner = StringScanner.new('/pattern/x')
token = described_class.scan(scanner)
expect(token).to be_nil
end
end
describe '#evaluate' do
it 'returns a regular expression' do
regexp = described_class.new('/abc/')
expect(regexp.evaluate).to eq Gitlab::UntrustedRegexp.new('abc')
end
it 'raises error if evaluated regexp is not valid' do
allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true)
regexp = described_class.new('/invalid ( .*/')
expect { regexp.evaluate }
.to raise_error(Gitlab::Ci::Pipeline::Expression::RuntimeError)
end
end
end

View file

@ -6,7 +6,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
end
describe '#tokens' do
it 'tokenss single value' do
it 'returns single value' do
tokens = described_class.new('$VARIABLE').tokens
expect(tokens).to be_one
@ -20,14 +20,14 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
expect(tokens).to all(be_an_instance_of(token_class))
end
it 'tokenss multiple values of the same token' do
it 'returns multiple values of the same token' do
tokens = described_class.new("$VARIABLE1 $VARIABLE2").tokens
expect(tokens.size).to eq 2
expect(tokens).to all(be_an_instance_of(token_class))
end
it 'tokenss multiple values with different tokens' do
it 'returns multiple values with different tokens' do
tokens = described_class.new('$VARIABLE "text" "value"').tokens
expect(tokens.size).to eq 3
@ -36,7 +36,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
expect(tokens.third.value).to eq '"value"'
end
it 'tokenss tokens and operators' do
it 'returns tokens and operators' do
tokens = described_class.new('$VARIABLE == "text"').tokens
expect(tokens.size).to eq 3

View file

@ -1,4 +1,4 @@
require 'spec_helper'
require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Parser do
describe '#tree' do

View file

@ -1,4 +1,5 @@
require 'spec_helper'
require 'fast_spec_helper'
require 'rspec-parameterized'
describe Gitlab::Ci::Pipeline::Expression::Statement do
subject do
@ -36,7 +37,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
'== "123"', # invalid left side
'"some string"', # only string provided
'$VAR ==', # invalid right side
'12345', # unknown syntax
'null', # missing lexemes
'' # empty statement
]
@ -44,7 +45,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
context "when expression grammar is #{syntax.inspect}" do
let(:text) { syntax }
it 'aises a statement error exception' do
it 'raises a statement error exception' do
expect { subject.parse_tree }
.to raise_error described_class::StatementError
end
@ -82,48 +83,66 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
end
describe '#evaluate' do
statements = [
['$PRESENT_VARIABLE == "my variable"', true],
["$PRESENT_VARIABLE == 'my variable'", true],
['"my variable" == $PRESENT_VARIABLE', true],
['$PRESENT_VARIABLE == null', false],
['$EMPTY_VARIABLE == null', false],
['"" == $EMPTY_VARIABLE', true],
['$EMPTY_VARIABLE', ''],
['$UNDEFINED_VARIABLE == null', true],
['null == $UNDEFINED_VARIABLE', true],
['$PRESENT_VARIABLE', 'my variable'],
['$UNDEFINED_VARIABLE', nil]
]
using RSpec::Parameterized::TableSyntax
statements.each do |expression, value|
context "when using expression `#{expression}`" do
let(:text) { expression }
where(:expression, :value) do
'$PRESENT_VARIABLE == "my variable"' | true
'"my variable" == $PRESENT_VARIABLE' | true
'$PRESENT_VARIABLE == null' | false
'$EMPTY_VARIABLE == null' | false
'"" == $EMPTY_VARIABLE' | true
'$EMPTY_VARIABLE' | ''
'$UNDEFINED_VARIABLE == null' | true
'null == $UNDEFINED_VARIABLE' | true
'$PRESENT_VARIABLE' | 'my variable'
'$UNDEFINED_VARIABLE' | nil
"$PRESENT_VARIABLE =~ /var.*e$/" | true
"$PRESENT_VARIABLE =~ /^var.*/" | false
"$EMPTY_VARIABLE =~ /var.*/" | false
"$UNDEFINED_VARIABLE =~ /var.*/" | false
"$PRESENT_VARIABLE =~ /VAR.*/i" | true
end
it "evaluates to `#{value.inspect}`" do
expect(subject.evaluate).to eq value
end
with_them do
let(:text) { expression }
it "evaluates to `#{params[:value].inspect}`" do
expect(subject.evaluate).to eq value
end
end
end
describe '#truthful?' do
statements = [
['$PRESENT_VARIABLE == "my variable"', true],
["$PRESENT_VARIABLE == 'no match'", false],
['$UNDEFINED_VARIABLE == null', true],
['$PRESENT_VARIABLE', true],
['$UNDEFINED_VARIABLE', false],
['$EMPTY_VARIABLE', false]
]
using RSpec::Parameterized::TableSyntax
statements.each do |expression, value|
context "when using expression `#{expression}`" do
let(:text) { expression }
where(:expression, :value) do
'$PRESENT_VARIABLE == "my variable"' | true
"$PRESENT_VARIABLE == 'no match'" | false
'$UNDEFINED_VARIABLE == null' | true
'$PRESENT_VARIABLE' | true
'$UNDEFINED_VARIABLE' | false
'$EMPTY_VARIABLE' | false
'$INVALID = 1' | false
"$PRESENT_VARIABLE =~ /var.*/" | true
"$UNDEFINED_VARIABLE =~ /var.*/" | false
end
it "returns `#{value.inspect}`" do
expect(subject.truthful?).to eq value
end
with_them do
let(:text) { expression }
it "returns `#{params[:value].inspect}`" do
expect(subject.truthful?).to eq value
end
end
context 'when evaluating expression raises an error' do
let(:text) { '$PRESENT_VARIABLE' }
it 'returns false' do
allow(subject).to receive(:evaluate)
.and_raise(described_class::StatementError)
expect(subject.truthful?).to be_falsey
end
end
end

View file

@ -1,4 +1,4 @@
require 'spec_helper'
require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Token do
let(:value) { '$VARIABLE' }

View file

@ -1,6 +1,49 @@
require 'spec_helper'
require 'fast_spec_helper'
require 'support/shared_examples/malicious_regexp_shared_examples'
describe Gitlab::UntrustedRegexp do
describe '.valid?' do
it 'returns true if regexp is valid' do
expect(described_class.valid?('/some ( thing/'))
.to be false
end
it 'returns true if regexp is invalid' do
expect(described_class.valid?('/some .* thing/'))
.to be true
end
end
describe '.fabricate' do
context 'when regexp is using /regexp/ scheme with flags' do
it 'fabricates regexp with a single flag' do
regexp = described_class.fabricate('/something/i')
expect(regexp).to eq described_class.new('(?i)something')
expect(regexp.scan('SOMETHING')).to be_one
end
it 'fabricates regexp with multiple flags' do
regexp = described_class.fabricate('/something/im')
expect(regexp).to eq described_class.new('(?im)something')
end
it 'fabricates regexp without flags' do
regexp = described_class.fabricate('/something/')
expect(regexp).to eq described_class.new('something')
end
end
context 'when regexp is a raw pattern' do
it 'raises an error' do
expect { described_class.fabricate('some .* thing') }
.to raise_error(RegexpError)
end
end
end
describe '#initialize' do
subject { described_class.new(pattern) }

View file

@ -1,3 +1,5 @@
require 'timeout'
shared_examples 'malicious regexp' do
let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }