gitlab-org--gitlab-foss/spec/lib/gitlab/path_regex_spec.rb

546 lines
18 KiB
Ruby

# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::PathRegex do
let(:starting_with_namespace) { %r{^/\*namespace_id/:(project_)?id} }
let(:non_param_parts) { %r{[^:*][a-z\-_/]*} }
let(:any_other_path_part) { %r{[a-z\-_/:]*} }
let(:wildcard_segment) { /\*/ }
# Pass in a full path to remove the format segment:
# `/ci/lint(.:format)` -> `/ci/lint`
def without_format(path)
path.split('(', 2)[0]
end
# Pass in a full path and get the last segment before a wildcard
# That's not a parameter
# `/*namespace_id/:project_id/builds/artifacts/*ref_name_and_path`
# -> 'builds/artifacts'
def path_before_wildcard(path)
path = path.gsub(starting_with_namespace, "")
path_segments = path.split('/').reject(&:empty?)
wildcard_index = path_segments.index { |segment| parameter?(segment) }
segments_before_wildcard = path_segments[0..wildcard_index - 1]
segments_before_wildcard.join('/')
end
def parameter?(segment)
segment =~ /[*:]/
end
# If the path is reserved. Then no conflicting paths can# be created for any
# route using this reserved word.
#
# Both `builds/artifacts` & `build` are covered by reserving the word
# `build`
def wildcards_include?(path)
described_class::PROJECT_WILDCARD_ROUTES.include?(path) ||
described_class::PROJECT_WILDCARD_ROUTES.include?(path.split('/').first)
end
def failure_message(constant_name, migration_helper, missing_words: [], additional_words: [])
missing_words = Array(missing_words)
additional_words = Array(additional_words)
message = ""
if missing_words.any?
message += <<-MISSING
Found new routes that could cause conflicts with existing namespaced routes
for groups or projects.
Nest <#{missing_words.join(', ')}> in a route containing `-`, that way
we know there will be no conflicts with groups or projects created with those
paths.
MISSING
end
if additional_words.any?
message += <<-ADDITIONAL
Is <#{additional_words.join(', ')}> in `#{constant_name}` required?
If they are really required, update these specs to reflect that.
ADDITIONAL
end
message
end
let(:all_non_legacy_routes) do
route_set = Rails.application.routes
routes_collection = route_set.routes
routes_array = routes_collection.routes
non_legacy_routes = routes_array.reject do |route|
route.name.to_s =~ /legacy_(\w*)_redirect/
end
non_deprecated_redirect_routes = non_legacy_routes.reject do |route|
app = route.app
# `app.app` is either another app, or `self`. We want to find the final app.
app = app.app while app.try(:app) && app.app != app
app.is_a?(ActionDispatch::Routing::PathRedirect) && app.block.include?('/-/')
end
non_deprecated_redirect_routes.map { |route| route.path.spec.to_s }
end
let(:routes_without_format) { all_non_legacy_routes.map { |path| without_format(path) } }
# Routes not starting with `/:` or `/*`
# all routes not starting with a param
let(:routes_not_starting_in_wildcard) { routes_without_format.select { |p| p !~ %r{^/[:*]} } }
let(:top_level_words) do
routes_not_starting_in_wildcard
.map { |route| route.split('/')[1] }
.concat(ee_top_level_words)
.concat(files_in_public)
.concat(Array(API::API.prefix.to_s))
.concat(sitemap_words)
.compact
.uniq
end
let(:sitemap_words) do
%w(sitemap sitemap.xml sitemap.xml.gz)
end
let(:ee_top_level_words) do
%w(unsubscribes v2)
end
let(:files_in_public) do
git = Gitlab.config.git.bin_path
tracked = `cd #{Rails.root} && #{git} ls-files public`
.split("\n")
.map { |entry| entry.start_with?('public/-/') ? '-' : entry.gsub('public/', '') }
.uniq
tracked + %w(assets uploads)
end
# All routes that start with a namespaced path, that have 1 or more
# path-segments before having another wildcard parameter.
# - Starting with paths:
# - `/*namespace_id/:project_id/`
# - `/*namespace_id/:id/`
# - Followed by one or more path-parts not starting with `:` or `*`
# - Followed by a path-part that includes a wildcard parameter `*`
# At the time of writing these routes match: http://rubular.com/r/Rv2pDE5Dvw
let(:namespaced_wildcard_routes) do
routes_without_format.select do |p|
p =~ %r{#{starting_with_namespace}/#{non_param_parts}/#{any_other_path_part}#{wildcard_segment}}
end
end
# This will return all paths that are used in a namespaced route
# before another wildcard path:
#
# /*namespace_id/:project_id/builds/artifacts/*ref_name_and_path
# /*namespace_id/:project_id/info/lfs/objects/*oid
# /*namespace_id/:project_id/commits/*id
# /*namespace_id/:project_id/builds/:build_id/artifacts/file/*path
# -> ['builds/artifacts', 'info/lfs/objects', 'commits', 'artifacts/file']
let(:all_wildcard_paths) do
namespaced_wildcard_routes.map do |route|
path_before_wildcard(route)
end.uniq
end
let(:starting_with_group) { %r{^/groups/\*(group_)?id/} }
let(:group_routes) do
routes_without_format.grep(starting_with_group)
end
let(:paths_after_group_id) do
group_routes.map do |route|
route.gsub(starting_with_group, '').split('/').first
end.uniq
end
describe 'TOP_LEVEL_ROUTES' do
it 'includes all the top level namespaces' do
failure_block = lambda do
missing_words = top_level_words - described_class::TOP_LEVEL_ROUTES
additional_words = described_class::TOP_LEVEL_ROUTES - top_level_words
failure_message('TOP_LEVEL_ROUTES', 'rename_root_paths',
missing_words: missing_words, additional_words: additional_words)
end
expect(described_class::TOP_LEVEL_ROUTES)
.to contain_exactly(*top_level_words), failure_block
end
# We ban new items in this list, see https://gitlab.com/gitlab-org/gitlab/-/issues/215362
it 'does not allow expansion' do
expect(described_class::TOP_LEVEL_ROUTES.size).to eq(44)
end
end
describe 'GROUP_ROUTES' do
it "don't contain a second wildcard" do
failure_block = lambda do
missing_words = paths_after_group_id - described_class::GROUP_ROUTES
additional_words = described_class::GROUP_ROUTES - paths_after_group_id
failure_message('GROUP_ROUTES', 'rename_child_paths',
missing_words: missing_words, additional_words: additional_words)
end
expect(described_class::GROUP_ROUTES)
.to contain_exactly(*paths_after_group_id), failure_block
end
# We ban new items in this list, see https://gitlab.com/gitlab-org/gitlab/-/issues/215362
it 'does not allow expansion' do
expect(described_class::GROUP_ROUTES.size).to eq(1)
end
end
describe 'PROJECT_WILDCARD_ROUTES' do
it 'includes all paths that can be used after a namespace/project path' do
aggregate_failures do
all_wildcard_paths.each do |path|
expect(wildcards_include?(path))
.to be(true), failure_message('PROJECT_WILDCARD_ROUTES', 'rename_wildcard_paths', missing_words: path)
end
end
end
# We ban new items in this list, see https://gitlab.com/gitlab-org/gitlab/-/issues/215362
it 'does not allow expansion' do
expect(described_class::PROJECT_WILDCARD_ROUTES.size).to eq(21)
end
end
describe '.root_namespace_route_regex' do
subject { %r{\A#{described_class.root_namespace_route_regex}/\z} }
it 'rejects top level routes' do
expect(subject).not_to match('admin/')
expect(subject).not_to match('api/')
expect(subject).not_to match('.well-known/')
expect(subject).not_to match('sitemap.xml/')
expect(subject).not_to match('sitemap.xml.gz/')
end
it 'accepts project wildcard routes' do
expect(subject).to match('blob/')
expect(subject).to match('edit/')
expect(subject).to match('wikis/')
end
it 'accepts group routes' do
expect(subject).to match('activity/')
end
it 'is not case sensitive' do
expect(subject).not_to match('Users/')
end
it 'does not allow extra slashes' do
expect(subject).not_to match('/blob/')
expect(subject).not_to match('blob//')
end
end
describe '.full_namespace_path_regex' do
subject { described_class.full_namespace_path_regex }
context 'at the top level' do
context 'when the final level' do
it 'rejects top level routes' do
expect(subject).not_to match('admin/')
expect(subject).not_to match('api/')
expect(subject).not_to match('.well-known/')
end
it 'accepts project wildcard routes' do
expect(subject).to match('blob/')
expect(subject).to match('edit/')
expect(subject).to match('wikis/')
end
it 'accepts group routes' do
expect(subject).to match('activity/')
end
end
context 'when more levels follow' do
it 'rejects top level routes' do
expect(subject).not_to match('admin/more/')
expect(subject).not_to match('api/more/')
expect(subject).not_to match('.well-known/more/')
end
it 'accepts project wildcard routes' do
expect(subject).to match('blob/more/')
expect(subject).to match('edit/more/')
expect(subject).to match('wikis/more/')
expect(subject).to match('environments/folders/')
expect(subject).to match('info/lfs/objects/')
end
it 'accepts group routes' do
expect(subject).to match('activity/more/')
end
end
end
context 'at the second level' do
context 'when the final level' do
it 'accepts top level routes' do
expect(subject).to match('root/admin/')
expect(subject).to match('root/api/')
expect(subject).to match('root/.well-known/')
end
it 'rejects project wildcard routes' do
expect(subject).not_to match('root/blob/')
expect(subject).not_to match('root/edit/')
expect(subject).not_to match('root/wikis/')
expect(subject).not_to match('root/environments/folders/')
expect(subject).not_to match('root/info/lfs/objects/')
end
it 'rejects group routes' do
expect(subject).not_to match('root/-/')
end
end
context 'when more levels follow' do
it 'accepts top level routes' do
expect(subject).to match('root/admin/more/')
expect(subject).to match('root/api/more/')
expect(subject).to match('root/.well-known/more/')
end
it 'rejects project wildcard routes' do
expect(subject).not_to match('root/blob/more/')
expect(subject).not_to match('root/edit/more/')
expect(subject).not_to match('root/wikis/more/')
expect(subject).not_to match('root/environments/folders/more/')
expect(subject).not_to match('root/info/lfs/objects/more/')
end
it 'rejects group routes' do
expect(subject).not_to match('root/-/')
end
end
end
it 'is not case sensitive' do
expect(subject).not_to match('root/Blob/')
end
it 'does not allow extra slashes' do
expect(subject).not_to match('/root/admin/')
expect(subject).not_to match('root/admin//')
end
end
describe '.project_route_regex' do
subject { %r{\A#{described_class.project_route_regex}/\z} }
it 'accepts top level routes' do
expect(subject).to match('admin/')
expect(subject).to match('api/')
expect(subject).to match('.well-known/')
end
it 'rejects project wildcard routes' do
expect(subject).not_to match('blob/')
expect(subject).not_to match('edit/')
expect(subject).not_to match('wikis/')
expect(subject).not_to match('environments/folders/')
expect(subject).not_to match('info/lfs/objects/')
end
it 'accepts group routes' do
expect(subject).to match('analytics/')
end
it 'is not case sensitive' do
expect(subject).not_to match('Blob/')
end
it 'does not allow extra slashes' do
expect(subject).not_to match('/admin/')
expect(subject).not_to match('admin//')
end
end
describe '.full_project_path_regex' do
subject { described_class.full_project_path_regex }
it 'accepts top level routes' do
expect(subject).to match('root/admin/')
expect(subject).to match('root/api/')
expect(subject).to match('root/.well-known/')
end
it 'rejects project wildcard routes' do
expect(subject).not_to match('root/blob/')
expect(subject).not_to match('root/edit/')
expect(subject).not_to match('root/wikis/')
expect(subject).not_to match('root/environments/folders/')
expect(subject).not_to match('root/info/lfs/objects/')
end
it 'accepts group routes' do
expect(subject).to match('root/analytics/')
end
it 'is not case sensitive' do
expect(subject).not_to match('root/Blob/')
end
it 'does not allow extra slashes' do
expect(subject).not_to match('/root/admin/')
expect(subject).not_to match('root/admin//')
end
end
describe '.namespace_format_regex' do
subject { described_class.namespace_format_regex }
it { is_expected.to match('gitlab-ce') }
it { is_expected.to match('gitlab_git') }
it { is_expected.to match('_underscore.js') }
it { is_expected.to match('100px.com') }
it { is_expected.to match('gitlab.org') }
it { is_expected.not_to match('?gitlab') }
it { is_expected.not_to match('git lab') }
it { is_expected.not_to match('gitlab.git') }
it { is_expected.not_to match('gitlab.org.') }
it { is_expected.not_to match('gitlab.org/') }
it { is_expected.not_to match('/gitlab.org') }
it { is_expected.not_to match('gitlab git') }
end
describe '.project_path_format_regex' do
subject { described_class.project_path_format_regex }
it { is_expected.to match('gitlab-ce') }
it { is_expected.to match('gitlab_git') }
it { is_expected.to match('_underscore.js') }
it { is_expected.to match('100px.com') }
it { is_expected.not_to match('?gitlab') }
it { is_expected.not_to match('git lab') }
it { is_expected.not_to match('gitlab.git') }
end
context 'repository routes' do
# Paths that match a known container
let_it_be(:container_paths) do
[
'gitlab-org',
'gitlab-org/gitlab-test',
'gitlab-org/gitlab-test/snippets/1',
'gitlab-org/gitlab-test/snippets/foo', # ambiguous, we allow creating a sub-group called 'snippets'
'snippets/1'
]
end
# Paths that never match a container
let_it_be(:invalid_paths) do
[
'gitlab/',
'/gitlab',
'gitlab/foo/',
'?gitlab',
'git lab',
'/snippets/1',
'snippets/foo',
'gitlab-org/gitlab/snippets/'
]
end
let_it_be(:git_paths) { container_paths.map { |path| path + '.git' } }
let_it_be(:snippet_paths) { container_paths.grep(%r{snippets/\d}) }
let_it_be(:wiki_git_paths) { (container_paths - snippet_paths).map { |path| path + '.wiki.git' } }
let_it_be(:invalid_git_paths) { invalid_paths.map { |path| path + '.git' } }
def expect_route_match(paths)
paths.each { |path| is_expected.to match(path) }
end
def expect_no_route_match(paths)
paths.each { |path| is_expected.not_to match(path) }
end
describe '.repository_route_regex' do
subject { %r{\A#{described_class.repository_route_regex}\z} }
it 'matches the expected paths' do
expect_route_match(container_paths)
expect_no_route_match(invalid_paths + git_paths)
end
end
describe '.repository_git_route_regex' do
subject { %r{\A#{described_class.repository_git_route_regex}\z} }
it 'matches the expected paths' do
expect_route_match(git_paths + wiki_git_paths)
expect_no_route_match(container_paths + invalid_paths + invalid_git_paths)
end
end
describe '.repository_wiki_git_route_regex' do
subject { %r{\A#{described_class.repository_wiki_git_route_regex}\z} }
it 'matches the expected paths' do
expect_route_match(wiki_git_paths)
expect_no_route_match(git_paths + invalid_git_paths)
end
it { is_expected.not_to match('snippets/1.wiki.git') }
end
describe '.full_snippets_repository_path_regex' do
subject { described_class.full_snippets_repository_path_regex }
it 'matches the expected paths' do
expect_route_match(snippet_paths)
expect_no_route_match(container_paths - snippet_paths + git_paths + invalid_paths)
end
it { is_expected.not_to match('root/snippets/1') }
it { is_expected.not_to match('gitlab-org/gitlab-test/snippets/foo') }
end
end
describe '.container_image_regex' do
subject { described_class.container_image_regex }
it { is_expected.to match('gitlab-foss') }
it { is_expected.to match('gitlab_foss') }
it { is_expected.to match('gitlab-org/gitlab-foss') }
it { is_expected.to match('100px.com/100px.ruby') }
it 'only matches at most one slash' do
expect(subject.match('foo/bar/baz')[0]).to eq('foo/bar')
end
it 'does not match other non-word characters' do
expect(subject.match('ruby:2.7.0')[0]).to eq('ruby')
end
end
describe '.container_image_blob_sha_regex' do
subject { described_class.container_image_blob_sha_regex }
it { is_expected.to match('sha256:asdf1234567890ASDF') }
it { is_expected.to match('foo:123') }
it { is_expected.to match('a12bc3f590szp') }
it { is_expected.not_to match('') }
it 'does not match malicious characters' do
expect(subject.match('sha256:asdf1234%2f')[0]).to eq('sha256:asdf1234')
end
end
end