2018-10-22 03:00:50 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-05-24 16:59:26 -04:00
|
|
|
module Gitlab
|
|
|
|
module PathRegex
|
|
|
|
extend self
|
|
|
|
|
|
|
|
# All routes that appear on the top level must be listed here.
|
|
|
|
# This will make sure that groups cannot be created with these names
|
|
|
|
# as these routes would be masked by the paths already in place.
|
|
|
|
#
|
|
|
|
# Example:
|
|
|
|
# /api/api-project
|
|
|
|
#
|
|
|
|
# the path `api` shouldn't be allowed because it would be masked by `api/*`
|
|
|
|
#
|
|
|
|
TOP_LEVEL_ROUTES = %w[
|
|
|
|
-
|
|
|
|
.well-known
|
2017-07-18 03:37:38 -04:00
|
|
|
404.html
|
|
|
|
422.html
|
|
|
|
500.html
|
|
|
|
502.html
|
|
|
|
503.html
|
2017-05-24 16:59:26 -04:00
|
|
|
abuse_reports
|
|
|
|
admin
|
|
|
|
api
|
2017-07-18 03:37:38 -04:00
|
|
|
apple-touch-icon-precomposed.png
|
|
|
|
apple-touch-icon.png
|
2017-05-24 16:59:26 -04:00
|
|
|
assets
|
|
|
|
autocomplete
|
|
|
|
ci
|
|
|
|
dashboard
|
2017-07-18 03:37:38 -04:00
|
|
|
deploy.html
|
2017-05-24 16:59:26 -04:00
|
|
|
explore
|
2018-06-18 05:31:37 -04:00
|
|
|
favicon.ico
|
2017-12-07 07:15:49 -05:00
|
|
|
favicon.png
|
2017-05-24 16:59:26 -04:00
|
|
|
files
|
|
|
|
groups
|
|
|
|
health_check
|
|
|
|
help
|
|
|
|
import
|
|
|
|
invites
|
|
|
|
jwt
|
2018-08-16 21:05:07 -04:00
|
|
|
login
|
2017-05-24 16:59:26 -04:00
|
|
|
notification_settings
|
|
|
|
oauth
|
|
|
|
profile
|
|
|
|
projects
|
|
|
|
public
|
|
|
|
robots.txt
|
|
|
|
s
|
|
|
|
search
|
|
|
|
sent_notifications
|
2017-07-18 03:37:38 -04:00
|
|
|
slash-command-logo.png
|
2017-05-24 16:59:26 -04:00
|
|
|
snippets
|
|
|
|
u
|
|
|
|
unsubscribes
|
|
|
|
uploads
|
|
|
|
users
|
|
|
|
].freeze
|
|
|
|
|
|
|
|
# This list should contain all words following `/*namespace_id/:project_id` in
|
|
|
|
# routes that contain a second wildcard.
|
|
|
|
#
|
|
|
|
# Example:
|
|
|
|
# /*namespace_id/:project_id/badges/*ref/build
|
|
|
|
#
|
|
|
|
# If `badges` was allowed as a project/group name, we would not be able to access the
|
|
|
|
# `badges` route for those projects:
|
|
|
|
#
|
|
|
|
# Consider a namespace with path `foo/bar` and a project called `badges`.
|
|
|
|
# The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
|
|
|
|
#
|
|
|
|
# When accessing this path the route would be matched to the `badges` path
|
|
|
|
# with the following params:
|
|
|
|
# - namespace_id: `foo`
|
|
|
|
# - project_id: `bar`
|
|
|
|
# - ref: `badges/master`
|
|
|
|
#
|
|
|
|
# Failing to find the project, this would result in a 404.
|
|
|
|
#
|
|
|
|
# By rejecting `badges` the router can _count_ on the fact that `badges` will
|
|
|
|
# be preceded by the `namespace/project`.
|
|
|
|
PROJECT_WILDCARD_ROUTES = %w[
|
2017-05-26 09:55:29 -04:00
|
|
|
-
|
2017-05-24 16:59:26 -04:00
|
|
|
badges
|
|
|
|
blame
|
|
|
|
blob
|
|
|
|
builds
|
|
|
|
commits
|
|
|
|
create
|
|
|
|
create_dir
|
|
|
|
edit
|
|
|
|
environments/folders
|
|
|
|
files
|
|
|
|
find_file
|
|
|
|
gitlab-lfs/objects
|
|
|
|
info/lfs/objects
|
|
|
|
new
|
|
|
|
preview
|
|
|
|
raw
|
|
|
|
refs
|
|
|
|
tree
|
|
|
|
update
|
|
|
|
wikis
|
|
|
|
].freeze
|
|
|
|
|
|
|
|
# These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
|
|
|
|
# We need to reject these because we have a `/groups/*id` page that is the same
|
|
|
|
# as the `/*id`.
|
|
|
|
#
|
|
|
|
# If we would allow a subgroup to be created with the name `activity` then
|
|
|
|
# this group would not be accessible through `/groups/parent/activity` since
|
|
|
|
# this would map to the activity-page of its parent.
|
|
|
|
GROUP_ROUTES = %w[
|
2017-07-03 12:08:05 -04:00
|
|
|
-
|
2017-05-24 16:59:26 -04:00
|
|
|
].freeze
|
|
|
|
|
|
|
|
ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES
|
|
|
|
ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze
|
|
|
|
|
|
|
|
# The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript
|
|
|
|
# does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
|
|
|
|
# Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
|
|
|
|
# allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of
|
|
|
|
# `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation
|
|
|
|
# will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
|
2019-01-09 03:55:29 -05:00
|
|
|
PATH_START_CHAR = '[a-zA-Z0-9_\.]'.freeze
|
|
|
|
PATH_REGEX_STR = PATH_START_CHAR + '[a-zA-Z0-9_\-\.]*'.freeze
|
2017-05-24 16:59:26 -04:00
|
|
|
NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze
|
|
|
|
|
|
|
|
NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze
|
|
|
|
NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze
|
|
|
|
PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze
|
|
|
|
FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/)*#{NAMESPACE_FORMAT_REGEX}}.freeze
|
|
|
|
|
|
|
|
def root_namespace_route_regex
|
|
|
|
@root_namespace_route_regex ||= begin
|
|
|
|
illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE)
|
|
|
|
|
|
|
|
single_line_regexp %r{
|
|
|
|
(?!(#{illegal_words})/)
|
|
|
|
#{NAMESPACE_FORMAT_REGEX}
|
|
|
|
}x
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def full_namespace_route_regex
|
|
|
|
@full_namespace_route_regex ||= begin
|
|
|
|
illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE)
|
|
|
|
|
|
|
|
single_line_regexp %r{
|
|
|
|
#{root_namespace_route_regex}
|
|
|
|
(?:
|
|
|
|
/
|
|
|
|
(?!#{illegal_words}/)
|
|
|
|
#{NAMESPACE_FORMAT_REGEX}
|
|
|
|
)*
|
|
|
|
}x
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def project_route_regex
|
|
|
|
@project_route_regex ||= begin
|
|
|
|
illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE)
|
|
|
|
|
|
|
|
single_line_regexp %r{
|
|
|
|
(?!(#{illegal_words})/)
|
|
|
|
#{PROJECT_PATH_FORMAT_REGEX}
|
|
|
|
}x
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def project_git_route_regex
|
|
|
|
@project_git_route_regex ||= /#{project_route_regex}\.git/.freeze
|
|
|
|
end
|
|
|
|
|
|
|
|
def full_namespace_path_regex
|
|
|
|
@full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z}
|
|
|
|
end
|
|
|
|
|
|
|
|
def full_project_path_regex
|
|
|
|
@full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z}
|
|
|
|
end
|
|
|
|
|
2018-01-25 07:26:52 -05:00
|
|
|
def full_project_git_path_regex
|
2018-02-02 10:27:30 -05:00
|
|
|
@full_project_git_path_regex ||= %r{\A\/?(?<namespace_path>#{full_namespace_route_regex})\/(?<project_path>#{project_route_regex})\.git\z}
|
2018-01-25 07:26:52 -05:00
|
|
|
end
|
|
|
|
|
2017-05-24 16:59:26 -04:00
|
|
|
def namespace_format_regex
|
|
|
|
@namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze
|
|
|
|
end
|
|
|
|
|
|
|
|
def namespace_format_message
|
|
|
|
"can contain only letters, digits, '_', '-' and '.'. " \
|
|
|
|
"Cannot start with '-' or end in '.', '.git' or '.atom'." \
|
|
|
|
end
|
|
|
|
|
|
|
|
def project_path_format_regex
|
|
|
|
@project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze
|
|
|
|
end
|
|
|
|
|
|
|
|
def project_path_format_message
|
|
|
|
"can contain only letters, digits, '_', '-' and '.'. " \
|
|
|
|
"Cannot start with '-', end in '.git' or end in '.atom'" \
|
|
|
|
end
|
|
|
|
|
|
|
|
def archive_formats_regex
|
|
|
|
# |zip|tar| tar.gz | tar.bz2 |
|
|
|
|
@archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
|
|
|
|
end
|
|
|
|
|
|
|
|
def git_reference_regex
|
|
|
|
# Valid git ref regex, see:
|
|
|
|
# https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
|
|
|
|
|
|
|
|
@git_reference_regex ||= single_line_regexp %r{
|
|
|
|
(?!
|
|
|
|
(?# doesn't begins with)
|
|
|
|
\/| (?# rule #6)
|
|
|
|
(?# doesn't contain)
|
|
|
|
.*(?:
|
|
|
|
[\/.]\.| (?# rule #1,3)
|
|
|
|
\/\/| (?# rule #6)
|
|
|
|
@\{| (?# rule #8)
|
|
|
|
\\ (?# rule #9)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
[^\000-\040\177~^:?*\[]+ (?# rule #4-5)
|
|
|
|
(?# doesn't end with)
|
|
|
|
(?<!\.lock) (?# rule #1)
|
|
|
|
(?<![\/.]) (?# rule #6-7)
|
|
|
|
}x
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def single_line_regexp(regex)
|
|
|
|
# Turns a multiline extended regexp into a single line one,
|
2018-10-30 06:53:01 -04:00
|
|
|
# because `rake routes` breaks on multiline regexes.
|
2017-05-24 17:56:16 -04:00
|
|
|
Regexp.new(regex.source.gsub(/\(\?#.+?\)/, '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze
|
2017-05-24 16:59:26 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|