gitlab-org--gitlab-foss/app/models/repository.rb
Zeger-Jan van de Weg ec4423665c
Gitlab::Shell works on shard name, not path
Direct disk access is done through Gitaly now, so the legacy path was
deprecated. This path was used in Gitlab::Shell however. This required
the refactoring in this commit.

Added is the removal of direct path access on the project model, as that
lookup wasn't needed anymore is most cases.

Closes https://gitlab.com/gitlab-org/gitaly/issues/1111
2018-04-25 13:36:22 +02:00

982 lines
25 KiB
Ruby

require 'securerandom'
class Repository
REF_MERGE_REQUEST = 'merge-requests'.freeze
REF_KEEP_AROUND = 'keep-around'.freeze
REF_ENVIRONMENTS = 'environments'.freeze
MAX_DIVERGING_COUNT = 1000
RESERVED_REFS_NAMES = %W[
heads
tags
replace
#{REF_ENVIRONMENTS}
#{REF_KEEP_AROUND}
#{REF_ENVIRONMENTS}
].freeze
include Gitlab::ShellAdapter
include Gitlab::RepositoryCacheAdapter
attr_accessor :full_path, :disk_path, :project, :is_wiki
delegate :ref_name_for_sha, to: :raw_repository
delegate :bundle_to_disk, :create_from_bundle, to: :raw_repository
CreateTreeError = Class.new(StandardError)
# Methods that cache data from the Git repository.
#
# Each entry in this Array should have a corresponding method with the exact
# same name. The cache key used by those methods must also match method's
# name.
#
# For example, for entry `:commit_count` there's a method called `commit_count` which
# stores its data in the `commit_count` cache key.
CACHED_METHODS = %i(size commit_count rendered_readme contribution_guide
changelog license_blob license_key gitignore koding_yml
gitlab_ci_yml branch_names tag_names branch_count
tag_count avatar exists? root_ref has_visible_content?
issue_template_names merge_request_template_names).freeze
# Methods that use cache_method but only memoize the value
MEMOIZED_CACHED_METHODS = %i(license).freeze
# Certain method caches should be refreshed when certain types of files are
# changed. This Hash maps file types (as returned by Gitlab::FileDetector) to
# the corresponding methods to call for refreshing caches.
METHOD_CACHES_FOR_FILE_TYPES = {
readme: :rendered_readme,
changelog: :changelog,
license: %i(license_blob license_key license),
contributing: :contribution_guide,
gitignore: :gitignore,
koding: :koding_yml,
gitlab_ci: :gitlab_ci_yml,
avatar: :avatar,
issue_template: :issue_template_names,
merge_request_template: :merge_request_template_names
}.freeze
def initialize(full_path, project, disk_path: nil, is_wiki: false)
@full_path = full_path
@disk_path = disk_path || full_path
@project = project
@commit_cache = {}
@is_wiki = is_wiki
end
def ==(other)
@disk_path == other.disk_path
end
def raw_repository
return nil unless full_path
@raw_repository ||= initialize_raw_repository
end
alias_method :raw, :raw_repository
def cleanup
@raw_repository&.cleanup
end
# Return absolute path to repository
def path_to_repo
@path_to_repo ||=
begin
storage = Gitlab.config.repositories.storages[@project.repository_storage]
File.expand_path(
File.join(storage.legacy_disk_path, disk_path + '.git')
)
end
end
def inspect
"#<#{self.class.name}:#{@disk_path}>"
end
def commit(ref = 'HEAD')
return nil unless exists?
return ref if ref.is_a?(::Commit)
find_commit(ref)
end
# Finding a commit by the passed SHA
# Also takes care of caching, based on the SHA
def commit_by(oid:)
return @commit_cache[oid] if @commit_cache.key?(oid)
@commit_cache[oid] = find_commit(oid)
end
def commits_by(oids:)
return [] unless oids.present?
commits = Gitlab::Git::Commit.batch_by_oid(raw_repository, oids)
if commits.present?
Commit.decorate(commits, @project)
else
[]
end
end
def commits(ref = nil, path: nil, limit: nil, offset: nil, skip_merges: false, after: nil, before: nil, all: nil)
options = {
repo: raw_repository,
ref: ref,
path: path,
limit: limit,
offset: offset,
after: after,
before: before,
follow: Array(path).length == 1,
skip_merges: skip_merges,
all: all
}
commits = Gitlab::Git::Commit.where(options)
commits = Commit.decorate(commits, @project) if commits.present?
CommitCollection.new(project, commits, ref)
end
def commits_between(from, to)
commits = Gitlab::Git::Commit.between(raw_repository, from, to)
commits = Commit.decorate(commits, @project) if commits.present?
commits
end
# Returns a list of commits that are not present in any reference
def new_commits(newrev)
refs = ::Gitlab::Git::RevList.new(raw, newrev: newrev).new_refs
refs.map { |sha| commit(sha.strip) }
end
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/384
def find_commits_by_message(query, ref = nil, path = nil, limit = 1000, offset = 0)
unless exists? && has_visible_content? && query.present?
return []
end
commits = raw_repository.find_commits_by_message(query, ref, path, limit, offset).map do |c|
commit(c)
end
CommitCollection.new(project, commits, ref)
end
def find_branch(name, fresh_repo: true)
raw_repository.find_branch(name, fresh_repo)
end
def find_tag(name)
tags.find { |tag| tag.name == name }
end
def add_branch(user, branch_name, ref)
branch = raw_repository.add_branch(branch_name, user: user, target: ref)
after_create_branch
branch
rescue Gitlab::Git::Repository::InvalidRef
false
end
def add_tag(user, tag_name, target, message = nil)
raw_repository.add_tag(tag_name, user: user, target: target, message: message)
rescue Gitlab::Git::Repository::InvalidRef
false
end
def rm_branch(user, branch_name)
before_remove_branch
raw_repository.rm_branch(branch_name, user: user)
after_remove_branch
true
end
def rm_tag(user, tag_name)
before_remove_tag
raw_repository.rm_tag(tag_name, user: user)
after_remove_tag
true
end
def ref_names
branch_names + tag_names
end
def branch_exists?(branch_name)
return false unless raw_repository
branch_names.include?(branch_name)
end
def tag_exists?(tag_name)
return false unless raw_repository
tag_names.include?(tag_name)
end
def ref_exists?(ref)
!!raw_repository&.ref_exists?(ref)
rescue ArgumentError
false
end
# Makes sure a commit is kept around when Git garbage collection runs.
# Git GC will delete commits from the repository that are no longer in any
# branches or tags, but we want to keep some of these commits around, for
# example if they have comments or CI builds.
def keep_around(sha)
return unless sha.present? && commit_by(oid: sha)
return if kept_around?(sha)
# This will still fail if the file is corrupted (e.g. 0 bytes)
raw_repository.write_ref(keep_around_ref_name(sha), sha, shell: false)
rescue Gitlab::Git::CommandError => ex
Rails.logger.error "Unable to create keep-around reference for repository #{path}: #{ex}"
end
def kept_around?(sha)
ref_exists?(keep_around_ref_name(sha))
end
def diverging_commit_counts(branch)
@root_ref_hash ||= raw_repository.commit(root_ref).id
cache.fetch(:"diverging_commit_counts_#{branch.name}") do
# Rugged seems to throw a `ReferenceError` when given branch_names rather
# than SHA-1 hashes
number_commits_behind, number_commits_ahead =
raw_repository.count_commits_between(
@root_ref_hash,
branch.dereferenced_target.sha,
left_right: true,
max_count: MAX_DIVERGING_COUNT)
{ behind: number_commits_behind, ahead: number_commits_ahead }
end
end
def expire_tags_cache
expire_method_caches(%i(tag_names tag_count))
@tags = nil
end
def expire_branches_cache
expire_method_caches(%i(branch_names branch_count has_visible_content?))
@local_branches = nil
@branch_exists_memo = nil
end
def expire_statistics_caches
expire_method_caches(%i(size commit_count))
end
def expire_all_method_caches
expire_method_caches(CACHED_METHODS)
end
def expire_avatar_cache
expire_method_caches(%i(avatar))
end
# Refreshes the method caches of this repository.
#
# types - An Array of file types (e.g. `:readme`) used to refresh extra
# caches.
def refresh_method_caches(types)
to_refresh = []
types.each do |type|
methods = METHOD_CACHES_FOR_FILE_TYPES[type.to_sym]
to_refresh.concat(Array(methods)) if methods
end
expire_method_caches(to_refresh)
to_refresh.each { |method| send(method) } # rubocop:disable GitlabSecurity/PublicSend
end
def expire_branch_cache(branch_name = nil)
# When we push to the root branch we have to flush the cache for all other
# branches as their statistics are based on the commits relative to the
# root branch.
if !branch_name || branch_name == root_ref
branches.each do |branch|
cache.expire(:"diverging_commit_counts_#{branch.name}")
cache.expire(:"commit_count_#{branch.name}")
end
# In case a commit is pushed to a non-root branch we only have to flush the
# cache for said branch.
else
cache.expire(:"diverging_commit_counts_#{branch_name}")
cache.expire(:"commit_count_#{branch_name}")
end
end
def expire_root_ref_cache
expire_method_caches(%i(root_ref))
end
# Expires the cache(s) used to determine if a repository is empty or not.
def expire_emptiness_caches
return unless empty?
expire_method_caches(%i(has_visible_content?))
raw_repository.expire_has_local_branches_cache
end
def lookup_cache
@lookup_cache ||= {}
end
def expire_exists_cache
expire_method_caches(%i(exists?))
end
# expire cache that doesn't depend on repository data (when expiring)
def expire_content_cache
expire_tags_cache
expire_branches_cache
expire_root_ref_cache
expire_emptiness_caches
expire_exists_cache
expire_statistics_caches
end
# Runs code after a repository has been created.
def after_create
expire_exists_cache
expire_root_ref_cache
expire_emptiness_caches
repository_event(:create_repository)
end
# Runs code just before a repository is deleted.
def before_delete
expire_exists_cache
expire_all_method_caches
expire_branch_cache if exists?
expire_content_cache
repository_event(:remove_repository)
end
# Runs code just before the HEAD of a repository is changed.
def before_change_head
# Cached divergent commit counts are based on repository head
expire_branch_cache
expire_root_ref_cache
repository_event(:change_default_branch)
end
# Runs code before pushing (= creating or removing) a tag.
def before_push_tag
expire_statistics_caches
expire_emptiness_caches
expire_tags_cache
repository_event(:push_tag)
end
# Runs code before removing a tag.
def before_remove_tag
expire_tags_cache
expire_statistics_caches
repository_event(:remove_tag)
end
# Runs code after removing a tag.
def after_remove_tag
expire_tags_cache
end
# Runs code after the HEAD of a repository is changed.
def after_change_head
expire_method_caches(METHOD_CACHES_FOR_FILE_TYPES.keys)
end
# Runs code after a repository has been forked/imported.
def after_import
expire_content_cache
end
# Runs code after a new commit has been pushed.
def after_push_commit(branch_name)
expire_statistics_caches
expire_branch_cache(branch_name)
repository_event(:push_commit, branch: branch_name)
end
# Runs code after a new branch has been created.
def after_create_branch
expire_branches_cache
repository_event(:push_branch)
end
# Runs code before removing an existing branch.
def before_remove_branch
expire_branches_cache
repository_event(:remove_branch)
end
# Runs code after an existing branch has been removed.
def after_remove_branch
expire_branches_cache
end
def method_missing(m, *args, &block)
if m == :lookup && !block_given?
lookup_cache[m] ||= {}
lookup_cache[m][args.join(":")] ||= raw_repository.__send__(m, *args, &block) # rubocop:disable GitlabSecurity/PublicSend
else
raw_repository.__send__(m, *args, &block) # rubocop:disable GitlabSecurity/PublicSend
end
end
def respond_to_missing?(method, include_private = false)
raw_repository.respond_to?(method, include_private) || super
end
def blob_at(sha, path)
Blob.decorate(raw_repository.blob_at(sha, path), project)
rescue Gitlab::Git::Repository::NoRepository
nil
end
# items is an Array like: [[oid, path], [oid1, path1]]
def blobs_at(items)
raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) }
end
def root_ref
# When the repo does not exist, or there is no root ref, we raise this error so no data is cached.
raw_repository&.root_ref or raise Gitlab::Git::Repository::NoRepository # rubocop:disable Style/AndOr
end
cache_method :root_ref
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/314
def exists?
return false unless full_path
raw_repository.exists?
end
cache_method :exists?
# We don't need to cache the output of this method because both exists? and
# has_visible_content? are already memoized and cached. There's no guarantee
# that the values are expired and loaded atomically.
def empty?
return true unless exists?
!has_visible_content?
end
# The size of this repository in megabytes.
def size
exists? ? raw_repository.size : 0.0
end
cache_method :size, fallback: 0.0
def commit_count
root_ref ? raw_repository.commit_count(root_ref) : 0
end
cache_method :commit_count, fallback: 0
def commit_count_for_ref(ref)
return 0 unless exists?
cache.fetch(:"commit_count_#{ref}") { raw_repository.commit_count(ref) }
end
delegate :branch_names, to: :raw_repository
cache_method :branch_names, fallback: []
delegate :tag_names, to: :raw_repository
cache_method :tag_names, fallback: []
delegate :branch_count, :tag_count, :has_visible_content?, to: :raw_repository
cache_method :branch_count, fallback: 0
cache_method :tag_count, fallback: 0
cache_method :has_visible_content?, fallback: false
def avatar
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/38327
Gitlab::GitalyClient.allow_n_plus_1_calls do
if tree = file_on_head(:avatar)
tree.path
end
end
end
cache_method :avatar
def issue_template_names
Gitlab::Template::IssueTemplate.dropdown_names(project)
end
cache_method :issue_template_names, fallback: []
def merge_request_template_names
Gitlab::Template::MergeRequestTemplate.dropdown_names(project)
end
cache_method :merge_request_template_names, fallback: []
def readme
if readme = tree(:head)&.readme
ReadmeBlob.new(readme, self)
end
end
def rendered_readme
MarkupHelper.markup_unsafe(readme.name, readme.data, project: project) if readme
end
cache_method :rendered_readme
def contribution_guide
file_on_head(:contributing)
end
cache_method :contribution_guide
def changelog
file_on_head(:changelog)
end
cache_method :changelog
def license_blob
file_on_head(:license)
end
cache_method :license_blob
def license_key
return unless exists?
raw_repository.license_short_name
end
cache_method :license_key
def license
return unless license_key
Licensee::License.new(license_key)
end
cache_method :license, memoize_only: true
def gitignore
file_on_head(:gitignore)
end
cache_method :gitignore
def koding_yml
file_on_head(:koding)
end
cache_method :koding_yml
def gitlab_ci_yml
file_on_head(:gitlab_ci)
end
cache_method :gitlab_ci_yml
def head_commit
@head_commit ||= commit(self.root_ref)
end
def head_tree
if head_commit
@head_tree ||= Tree.new(self, head_commit.sha, nil)
end
end
def tree(sha = :head, path = nil, recursive: false)
if sha == :head
return unless head_commit
if path.nil?
return head_tree
else
sha = head_commit.sha
end
end
Tree.new(self, sha, path, recursive: recursive)
end
def blob_at_branch(branch_name, path)
last_commit = commit(branch_name)
if last_commit
blob_at(last_commit.sha, path)
else
nil
end
end
def last_commit_for_path(sha, path)
commit = raw_repository.last_commit_for_path(sha, path)
::Commit.new(commit, @project) if commit
end
def last_commit_id_for_path(sha, path)
key = path.blank? ? "last_commit_id_for_path:#{sha}" : "last_commit_id_for_path:#{sha}:#{Digest::SHA1.hexdigest(path)}"
cache.fetch(key) do
last_commit_for_path(sha, path)&.id
end
end
def next_branch(name, opts = {})
branch_ids = self.branch_names.map do |n|
next 1 if n == name
result = n.match(/\A#{name}-([0-9]+)\z/)
result[1].to_i if result
end.compact
highest_branch_id = branch_ids.max || 0
return name if opts[:mild] && 0 == highest_branch_id
"#{name}-#{highest_branch_id + 1}"
end
def branches_sorted_by(value)
raw_repository.local_branches(sort_by: value)
end
def tags_sorted_by(value)
case value
when 'name_asc'
VersionSorter.sort(tags) { |tag| tag.name }
when 'name_desc'
VersionSorter.rsort(tags) { |tag| tag.name }
when 'updated_desc'
tags_sorted_by_committed_date.reverse
when 'updated_asc'
tags_sorted_by_committed_date
else
tags
end
end
# Params:
#
# order_by: name|email|commits
# sort: asc|desc default: 'asc'
def contributors(order_by: nil, sort: 'asc')
commits = self.commits(nil, limit: 2000, offset: 0, skip_merges: true)
commits = commits.group_by(&:author_email).map do |email, commits|
contributor = Gitlab::Contributor.new
contributor.email = email
commits.each do |commit|
if contributor.name.blank?
contributor.name = commit.author_name
end
contributor.commits += 1
end
contributor
end
Commit.order_by(collection: commits, order_by: order_by, sort: sort)
end
def branch_names_contains(sha)
raw_repository.branch_names_contains_sha(sha)
end
def tag_names_contains(sha)
raw_repository.tag_names_contains_sha(sha)
end
def local_branches
@local_branches ||= raw_repository.local_branches
end
alias_method :branches, :local_branches
def tags
@tags ||= raw_repository.tags
end
def create_dir(user, path, **options)
options[:actions] = [{ action: :create_dir, file_path: path }]
multi_action(user, **options)
end
def create_file(user, path, content, **options)
options[:actions] = [{ action: :create, file_path: path, content: content }]
multi_action(user, **options)
end
def update_file(user, path, content, **options)
previous_path = options.delete(:previous_path)
action = previous_path && previous_path != path ? :move : :update
options[:actions] = [{ action: action, file_path: path, previous_path: previous_path, content: content }]
multi_action(user, **options)
end
def delete_file(user, path, **options)
options[:actions] = [{ action: :delete, file_path: path }]
multi_action(user, **options)
end
def with_cache_hooks
result = yield
return unless result
after_create if result.repo_created?
after_create_branch if result.branch_created?
result.newrev
end
def multi_action(user, **options)
start_project = options.delete(:start_project)
if start_project
options[:start_repository] = start_project.repository.raw_repository
end
with_cache_hooks { raw.multi_action(user, **options) }
end
def merge(user, source_sha, merge_request, message)
with_cache_hooks do
raw_repository.merge(user, source_sha, merge_request.target_branch, message) do |commit_id|
merge_request.update(in_progress_merge_commit_sha: commit_id)
nil # Return value does not matter.
end
end
end
def ff_merge(user, source, target_branch, merge_request: nil)
their_commit_id = commit(source)&.id
raise 'Invalid merge source' if their_commit_id.nil?
merge_request&.update(in_progress_merge_commit_sha: their_commit_id)
with_cache_hooks { raw.ff_merge(user, their_commit_id, target_branch) }
end
def revert(
user, commit, branch_name, message,
start_branch_name: nil, start_project: project)
with_cache_hooks do
raw_repository.revert(
user: user,
commit: commit.raw,
branch_name: branch_name,
message: message,
start_branch_name: start_branch_name,
start_repository: start_project.repository.raw_repository
)
end
end
def cherry_pick(
user, commit, branch_name, message,
start_branch_name: nil, start_project: project)
with_cache_hooks do
raw_repository.cherry_pick(
user: user,
commit: commit.raw,
branch_name: branch_name,
message: message,
start_branch_name: start_branch_name,
start_repository: start_project.repository.raw_repository
)
end
end
def merged_to_root_ref?(branch_or_name)
branch = Gitlab::Git::Branch.find(self, branch_or_name)
if branch
same_head = branch.target == root_ref_sha
merged = ancestor?(branch.target, root_ref_sha)
!same_head && merged
else
nil
end
end
def root_ref_sha
@root_ref_sha ||= commit(root_ref).sha
end
delegate :merged_branch_names, :can_be_merged?, to: :raw_repository
def merge_base(first_commit_id, second_commit_id)
first_commit_id = commit(first_commit_id).try(:id) || first_commit_id
second_commit_id = commit(second_commit_id).try(:id) || second_commit_id
raw_repository.merge_base(first_commit_id, second_commit_id)
end
def ancestor?(ancestor_id, descendant_id)
return false if ancestor_id.nil? || descendant_id.nil?
raw_repository.ancestor?(ancestor_id, descendant_id)
end
def fetch_as_mirror(url, forced: false, refmap: :all_refs, remote_name: nil, prune: true)
unless remote_name
remote_name = "tmp-#{SecureRandom.hex}"
tmp_remote_name = true
end
add_remote(remote_name, url, mirror_refmap: refmap)
fetch_remote(remote_name, forced: forced, prune: prune)
ensure
remove_remote(remote_name) if tmp_remote_name
end
def fetch_remote(remote, forced: false, ssh_auth: nil, no_tags: false, prune: true)
gitlab_shell.fetch_remote(raw_repository, remote, ssh_auth: ssh_auth, forced: forced, no_tags: no_tags, prune: prune)
end
def fetch_source_branch!(source_repository, source_branch, local_ref)
raw_repository.fetch_source_branch!(source_repository.raw_repository, source_branch, local_ref)
end
def compare_source_branch(target_branch_name, source_repository, source_branch_name, straight:)
raw_repository.compare_source_branch(target_branch_name, source_repository.raw_repository, source_branch_name, straight: straight)
end
def create_ref(ref, ref_path)
raw_repository.write_ref(ref_path, ref)
end
def ls_files(ref)
actual_ref = ref || root_ref
raw_repository.ls_files(actual_ref)
end
def search_files_by_content(query, ref)
return [] if empty? || query.blank?
raw_repository.search_files_by_content(query, ref)
end
def search_files_by_name(query, ref)
return [] if empty?
raw_repository.search_files_by_name(query, ref)
end
def copy_gitattributes(ref)
actual_ref = ref || root_ref
begin
raw_repository.copy_gitattributes(actual_ref)
true
rescue Gitlab::Git::Repository::InvalidRef
false
end
end
def file_on_head(type)
if head = tree(:head)
head.blobs.find do |blob|
Gitlab::FileDetector.type_of(blob.path) == type
end
end
end
def route_map_for(sha)
blob_data_at(sha, '.gitlab/route-map.yml')
end
def gitlab_ci_yml_for(sha, path = '.gitlab-ci.yml')
blob_data_at(sha, path)
end
def fetch_ref(source_repository, source_ref:, target_ref:)
raw_repository.fetch_ref(source_repository.raw_repository, source_ref: source_ref, target_ref: target_ref)
end
def rebase(user, merge_request)
raw.rebase(user, merge_request.id, branch: merge_request.source_branch,
branch_sha: merge_request.source_branch_sha,
remote_repository: merge_request.target_project.repository.raw,
remote_branch: merge_request.target_branch)
end
private
# TODO Generice finder, later split this on finders by Ref or Oid
# gitlab-org/gitlab-ce#39239
def find_commit(oid_or_ref)
commit = if oid_or_ref.is_a?(Gitlab::Git::Commit)
oid_or_ref
else
Gitlab::Git::Commit.find(raw_repository, oid_or_ref)
end
::Commit.new(commit, @project) if commit
end
def blob_data_at(sha, path)
blob = blob_at(sha, path)
return unless blob
blob.load_all_data!
blob.data
end
def cache
@cache ||= Gitlab::RepositoryCache.new(self)
end
def tags_sorted_by_committed_date
tags.sort_by do |tag|
# Annotated tags can point to any object (e.g. a blob), but generally
# tags point to a commit. If we don't have a commit, then just default
# to putting the tag at the end of the list.
target = tag.dereferenced_target
if target
target.committed_date
else
Time.now
end
end
end
def keep_around_ref_name(sha)
"refs/#{REF_KEEP_AROUND}/#{sha}"
end
def repository_event(event, tags = {})
Gitlab::Metrics.add_event(event, { path: full_path }.merge(tags))
end
def initialize_raw_repository
Gitlab::Git::Repository.new(project.repository_storage, disk_path + '.git', Gitlab::GlRepository.gl_repository(project, is_wiki))
end
end