gitlab-org--gitlab-foss/lib/gitlab/git/repository.rb

1113 lines
34 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
2017-01-04 13:43:06 -05:00
require 'tempfile'
require 'forwardable'
require "rubygems/package"
module Gitlab
module Git
class Repository
include Gitlab::Git::RepositoryMirroring
include Gitlab::Git::WrapsGitalyErrors
include Gitlab::EncodingHelper
include Gitlab::Utils::StrongMemoize
prepend Gitlab::Git::RuggedImpl::Repository
2017-01-04 13:43:06 -05:00
SEARCH_CONTEXT_LINES = 3
REV_LIST_COMMIT_LIMIT = 2_000
GITALY_INTERNAL_URL = 'ssh://gitaly/internal.git'
GITLAB_PROJECTS_TIMEOUT = Gitlab.config.gitlab_shell.git_timeout
EMPTY_REPOSITORY_CHECKSUM = '0000000000000000000000000000000000000000'
2017-01-04 13:43:06 -05:00
NoRepository = Class.new(StandardError)
InvalidRepository = Class.new(StandardError)
InvalidBlobName = Class.new(StandardError)
InvalidRef = Class.new(StandardError)
GitError = Class.new(StandardError)
DeleteBranchError = Class.new(StandardError)
TagExistsError = Class.new(StandardError)
ChecksumError = Class.new(StandardError)
class CreateTreeError < StandardError
attr_reader :error_code
def initialize(error_code)
super(self.class.name)
# The value coming from Gitaly is an uppercase String (e.g., "EMPTY")
@error_code = error_code.downcase.to_sym
end
end
2017-01-04 13:43:06 -05:00
# Directory name of repo
attr_reader :name
# Relative path of repo
attr_reader :relative_path
attr_reader :storage, :gl_repository, :gl_project_path
Allow public forks to be deduplicated When a project is forked, the new repository used to be a deep copy of everything stored on disk by leveraging `git clone`. This works well, and makes isolation between repository easy. However, the clone is at the start 100% the same as the origin repository. And in the case of the objects in the object directory, this is almost always going to be a lot of duplication. Object Pools are a way to create a third repository that essentially only exists for its 'objects' subdirectory. This third repository's object directory will be set as alternate location for objects. This means that in the case an object is missing in the local repository, git will look in another location. This other location is the object pool repository. When Git performs garbage collection, it's smart enough to check the alternate location. When objects are duplicated, it will allow git to throw one copy away. This copy is on the local repository, where to pool remains as is. These pools have an origin location, which for now will always be a repository that itself is not a fork. When the root of a fork network is forked by a user, the fork still clones the full repository. Async, the pool repository will be created. Either one of these processes can be done earlier than the other. To handle this race condition, the Join ObjectPool operation is idempotent. Given its idempotent, we can schedule it twice, with the same effect. To accommodate the holding of state two migrations have been added. 1. Added a state column to the pool_repositories column. This column is managed by the state machine, allowing for hooks on transitions. 2. pool_repositories now has a source_project_id. This column in convenient to have for multiple reasons: it has a unique index allowing the database to handle race conditions when creating a new record. Also, it's nice to know who the host is. As that's a short link to the fork networks root. Object pools are only available for public project, which use hashed storage and when forking from the root of the fork network. (That is, the project being forked from itself isn't a fork) In this commit message I use both ObjectPool and Pool repositories, which are alike, but different from each other. ObjectPool refers to whatever is on the disk stored and managed by Gitaly. PoolRepository is the record in the database.
2018-12-03 08:49:58 -05:00
# This remote name has to be stable for all types of repositories that
# can join an object pool. If it's structure ever changes, a migration
# has to be performed on the object pools to update the remote names.
# Else the pool can't be updated anymore and is left in an inconsistent
# state.
alias_method :object_pool_remote_name, :gl_repository
# This initializer method is only used on the client side (gitlab-ce).
# Gitaly-ruby uses a different initializer.
def initialize(storage, relative_path, gl_repository, gl_project_path)
@storage = storage
@relative_path = relative_path
@gl_repository = gl_repository
@gl_project_path = gl_project_path
@name = @relative_path.split("/").last
2017-01-04 13:43:06 -05:00
end
def to_s
"<#{self.class.name}: #{self.gl_project_path}>"
end
2017-09-01 05:40:07 -04:00
def ==(other)
other.is_a?(self.class) && [storage, relative_path] == [other.storage, other.relative_path]
end
alias_method :eql?, :==
def hash
[self.class, storage, relative_path].hash
2017-09-01 05:40:07 -04:00
end
2018-07-12 05:49:25 -04:00
# This method will be removed when Gitaly reaches v1.1.
def path
2018-07-12 05:49:25 -04:00
File.join(
Gitlab.config.repositories.storages[@storage].legacy_disk_path, @relative_path
)
end
2017-01-04 13:43:06 -05:00
# Default branch in the repository
def root_ref
gitaly_ref_client.default_branch_name
rescue GRPC::NotFound => e
raise NoRepository.new(e.message)
rescue GRPC::Unknown => e
raise Gitlab::Git::CommandError.new(e.message)
2017-01-04 13:43:06 -05:00
end
def exists?
gitaly_repository_client.exists?
end
def create_repository
wrapped_gitaly_errors do
gitaly_repository_client.create_repository
end
end
2017-01-04 13:43:06 -05:00
# Returns an Array of branch names
# sorted by name ASC
def branch_names
wrapped_gitaly_errors do
gitaly_ref_client.branch_names
end
2017-01-04 13:43:06 -05:00
end
# Returns an Array of Branches
def branches
wrapped_gitaly_errors do
gitaly_ref_client.branches
end
2017-01-04 13:43:06 -05:00
end
# Directly find a branch with a simple name (e.g. master)
#
2018-07-12 09:12:00 -04:00
def find_branch(name)
wrapped_gitaly_errors do
gitaly_ref_client.find_branch(name)
2017-07-14 09:30:58 -04:00
end
2017-01-04 13:43:06 -05:00
end
def local_branches(sort_by: nil, pagination_params: nil)
wrapped_gitaly_errors do
gitaly_ref_client.local_branches(sort_by: sort_by, pagination_params: pagination_params)
2017-01-04 13:43:06 -05:00
end
end
# Returns the number of valid branches
def branch_count
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_client.count_branch_names
end
end
def rename(new_relative_path)
wrapped_gitaly_errors do
gitaly_repository_client.rename(new_relative_path)
end
end
def remove
wrapped_gitaly_errors do
gitaly_repository_client.remove
end
end
2017-01-04 13:43:06 -05:00
def replicate(source_repository)
wrapped_gitaly_errors do
gitaly_repository_client.replicate(source_repository)
end
end
def expire_has_local_branches_cache
clear_memoization(:has_local_branches)
end
def has_local_branches?
strong_memoize(:has_local_branches) do
uncached_has_local_branches?
end
end
2017-12-07 10:33:30 -05:00
# Git repository can contains some hidden refs like:
# /refs/notes/*
# /refs/git-as-svn/*
# /refs/pulls/*
# This refs by default not visible in project page and not cloned to client side.
alias_method :has_visible_content?, :has_local_branches?
# Returns the number of valid tags
def tag_count
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_client.count_tag_names
2017-01-04 13:43:06 -05:00
end
end
# Returns an Array of tag names
def tag_names
wrapped_gitaly_errors do
gitaly_ref_client.tag_names
end
2017-01-04 13:43:06 -05:00
end
# Returns an Array of Tags
2017-07-14 09:51:31 -04:00
#
2017-01-04 13:43:06 -05:00
def tags
wrapped_gitaly_errors do
gitaly_ref_client.tags
end
2017-01-04 13:43:06 -05:00
end
# Returns true if the given ref name exists
#
# Ref names must start with `refs/`.
def ref_exists?(ref_name)
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_exists?(ref_name)
end
end
2017-01-04 13:43:06 -05:00
# Returns true if the given tag exists
#
# name - The name of the tag as a String.
def tag_exists?(name)
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_exists?("refs/tags/#{name}")
end
2017-01-04 13:43:06 -05:00
end
# Returns true if the given branch exists
#
# name - The name of the branch as a String.
def branch_exists?(name)
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_exists?("refs/heads/#{name}")
end
2017-01-04 13:43:06 -05:00
end
# Returns an Array of branch and tag names
def ref_names
branch_names + tag_names
end
def delete_all_refs_except(prefixes)
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_ref_client.delete_refs(except_with_prefixes: prefixes)
end
end
def archive_metadata(ref, storage_path, project_path, format = "tar.gz", append_sha:, path: nil)
2017-01-04 13:43:06 -05:00
ref ||= root_ref
commit = Gitlab::Git::Commit.find(self, ref)
return {} if commit.nil?
prefix = archive_prefix(ref, commit.id, project_path, append_sha: append_sha, path: path)
2017-01-04 13:43:06 -05:00
{
'ArchivePrefix' => prefix,
'ArchivePath' => archive_file_path(storage_path, commit.id, prefix, format),
'CommitId' => commit.id,
'GitalyRepository' => gitaly_repository.to_h
2017-01-04 13:43:06 -05:00
}
end
# This is both the filename of the archive (missing the extension) and the
# name of the top-level member of the archive under which all files go
def archive_prefix(ref, sha, project_path, append_sha:, path:)
append_sha = (ref != sha) if append_sha.nil?
formatted_ref = ref.tr('/', '-')
prefix_segments = [project_path, formatted_ref]
prefix_segments << sha if append_sha
prefix_segments << path.tr('/', '-').gsub(%r{^/|/$}, '') if path
prefix_segments.join('-')
end
private :archive_prefix
# The full path on disk where the archive should be stored. This is used
# to cache the archive between requests.
#
# The path is a global namespace, so needs to be globally unique. This is
# achieved by including `gl_repository` in the path.
#
# Archives relating to a particular ref when the SHA is not present in the
# filename must be invalidated when the ref is updated to point to a new
# SHA. This is achieved by including the SHA in the path.
#
# As this is a full path on disk, it is not "cloud native". This should
# be resolved by either removing the cache, or moving the implementation
# into Gitaly and removing the ArchivePath parameter from the git-archive
# senddata response.
def archive_file_path(storage_path, sha, name, format = "tar.gz")
2017-01-04 13:43:06 -05:00
# Build file path
return unless name
2017-01-04 13:43:06 -05:00
extension =
case format
when "tar.bz2", "tbz", "tbz2", "tb2", "bz2"
"tar.bz2"
when "tar"
"tar"
when "zip"
"zip"
else
# everything else should fall back to tar.gz
"tar.gz"
end
file_name = "#{name}.#{extension}"
File.join(storage_path, self.gl_repository, sha, file_name)
2017-01-04 13:43:06 -05:00
end
private :archive_file_path
2017-01-04 13:43:06 -05:00
# Return repo size in megabytes
def size
size = gitaly_repository_client.repository_size
2017-01-04 13:43:06 -05:00
(size.to_f / 1024).round(2)
end
# Return git object directory size in bytes
def object_directory_size
gitaly_repository_client.get_object_directory_size.to_f * 1024
end
# Build an array of commits.
2017-01-04 13:43:06 -05:00
#
# Usage.
# repo.log(
# ref: 'master',
# path: 'app/models',
# limit: 10,
# offset: 5,
# after: Time.new(2016, 4, 21, 14, 32, 10)
# )
def log(options)
default_options = {
limit: 10,
offset: 0,
path: nil,
author: nil,
follow: false,
skip_merges: false,
after: nil,
2018-02-19 09:42:00 -05:00
before: nil,
all: false
}
options = default_options.merge(options)
options[:offset] ||= 0
limit = options[:limit]
if limit == 0 || !limit.is_a?(Integer)
raise ArgumentError.new("invalid Repository#log limit: #{limit.inspect}")
end
2018-07-09 06:02:02 -04:00
wrapped_gitaly_errors do
gitaly_commit_client.find_commits(options)
end
2017-01-04 13:43:06 -05:00
end
def new_commits(newrev)
wrapped_gitaly_errors do
gitaly_ref_client.list_new_commits(newrev)
end
end
def new_blobs(newrev, dynamic_timeout: nil)
2018-08-08 17:18:37 -04:00
return [] if newrev.blank? || newrev == ::Gitlab::Git::BLANK_SHA
strong_memoize("new_blobs_#{newrev}") do
wrapped_gitaly_errors do
gitaly_ref_client.list_new_blobs(newrev, REV_LIST_COMMIT_LIMIT, dynamic_timeout: dynamic_timeout)
end
end
end
def count_commits(options)
options = process_count_commits_options(options.dup)
wrapped_gitaly_errors do
if options[:left_right]
from = options[:from]
to = options[:to]
right_count = gitaly_commit_client
.commit_count("#{from}..#{to}", options)
left_count = gitaly_commit_client
.commit_count("#{to}..#{from}", options)
[left_count, right_count]
else
gitaly_commit_client.commit_count(options[:ref], options)
end
end
end
2017-01-04 13:43:06 -05:00
# Counts the amount of commits between `from` and `to`.
def count_commits_between(from, to, options = {})
count_commits(from: from, to: to, **options)
2017-01-04 13:43:06 -05:00
end
2018-04-11 23:05:07 -04:00
# old_rev and new_rev are commit ID's
# the result of this method is an array of Gitlab::Git::RawDiffChange
def raw_changes_between(old_rev, new_rev)
@raw_changes_between ||= {}
@raw_changes_between[[old_rev, new_rev]] ||=
begin
return [] if new_rev.blank? || new_rev == Gitlab::Git::BLANK_SHA
wrapped_gitaly_errors do
gitaly_repository_client.raw_changes_between(old_rev, new_rev)
.each_with_object([]) do |msg, arr|
msg.raw_changes.each { |change| arr << ::Gitlab::Git::RawDiffChange.new(change) }
end
end
2018-04-11 23:05:07 -04:00
end
rescue ArgumentError => e
raise Gitlab::Git::Repository::GitError.new(e)
2018-04-11 23:05:07 -04:00
end
2017-01-04 13:43:06 -05:00
# Returns the SHA of the most recent common ancestor of +from+ and +to+
def merge_base(*commits)
wrapped_gitaly_errors do
gitaly_repository_client.find_merge_base(*commits)
end
2017-01-04 13:43:06 -05:00
end
# Returns true is +from+ is direct ancestor to +to+, otherwise false
def ancestor?(from, to)
gitaly_commit_client.ancestor?(from, to)
end
2017-10-27 11:55:08 -04:00
def merged_branch_names(branch_names = [])
return [] unless root_ref
root_sha = find_branch(root_ref)&.target
return [] unless root_sha
branches = wrapped_gitaly_errors do
gitaly_merged_branch_names(branch_names, root_sha)
end
Set.new(branches)
2017-10-27 11:55:08 -04:00
end
2017-01-04 13:43:06 -05:00
# Return an array of Diff objects that represent the diff
# between +from+ and +to+. See Diff::filter_diff_options for the allowed
# diff options. The +options+ hash can also include :break_rewrites to
# split larger rewrites into delete/add pairs.
def diff(from, to, options = {}, *paths)
2018-07-06 06:01:15 -04:00
iterator = gitaly_commit_client.diff(from, to, options.merge(paths: paths))
Gitlab::Git::DiffCollection.new(iterator, options)
2017-01-04 13:43:06 -05:00
end
def diff_stats(left_id, right_id)
if [left_id, right_id].any? { |ref| ref.blank? || Gitlab::Git.blank_ref?(ref) }
return empty_diff_stats
end
stats = wrapped_gitaly_errors do
gitaly_commit_client.diff_stats(left_id, right_id)
end
Gitlab::Git::DiffStatsCollection.new(stats)
rescue CommandError, TypeError
empty_diff_stats
end
2017-01-04 13:43:06 -05:00
# Returns a RefName for a given SHA
def ref_name_for_sha(ref_path, sha)
raise ArgumentError, "sha can't be empty" unless sha.present?
gitaly_ref_client.find_ref_name(sha, ref_path)
end
# Get refs hash which key is the commit id
# and value is a Gitlab::Git::Tag or Gitlab::Git::Branch
# Note that both inherit from Gitlab::Git::Ref
2017-01-04 13:43:06 -05:00
def refs_hash
return @refs_hash if @refs_hash
@refs_hash = Hash.new { |h, k| h[k] = [] }
(tags + branches).each do |ref|
2019-04-09 20:31:20 -04:00
next unless ref.target && ref.name && ref.dereferenced_target&.id
@refs_hash[ref.dereferenced_target.id] << ref.name
2017-01-04 13:43:06 -05:00
end
2017-01-04 13:43:06 -05:00
@refs_hash
end
# Returns url for submodule
2017-01-04 13:43:06 -05:00
#
# Ex.
# @repository.submodule_url_for('master', 'rack')
# # => git@localhost:rack.git
2017-01-04 13:43:06 -05:00
#
def submodule_url_for(ref, path)
2018-07-03 11:39:08 -04:00
wrapped_gitaly_errors do
gitaly_submodule_url_for(ref, path)
2017-01-04 13:43:06 -05:00
end
end
# Returns path to url mappings for submodules
#
# Ex.
# @repository.submodule_urls_for('master')
# # => { 'rack' => 'git@localhost:rack.git' }
#
def submodule_urls_for(ref)
wrapped_gitaly_errors do
gitaly_submodule_urls_for(ref)
end
end
2017-01-04 13:43:06 -05:00
# Return total commits count accessible from passed ref
def commit_count(ref)
wrapped_gitaly_errors do
gitaly_commit_client.commit_count(ref)
end
2017-01-04 13:43:06 -05:00
end
# Return total diverging commits count
def diverging_commit_count(from, to, max_count: 0)
wrapped_gitaly_errors do
gitaly_commit_client.diverging_commit_count(from, to, max_count: max_count)
end
end
2017-01-04 13:43:06 -05:00
# Mimic the `git clean` command and recursively delete untracked files.
# Valid keys that can be passed in the +options+ hash are:
#
# :d - Remove untracked directories
# :f - Remove untracked directories that are managed by a different
# repository
# :x - Remove ignored files
#
# The value in +options+ must evaluate to true for an option to take
# effect.
#
# Examples:
#
# repo.clean(d: true, f: true) # Enable the -d and -f options
#
# repo.clean(d: false, x: true) # -x is enabled, -d is not
def clean(options = {})
strategies = [:remove_untracked]
strategies.push(:force) if options[:f]
strategies.push(:remove_ignored) if options[:x]
# TODO: implement this method
end
2017-09-13 12:16:56 -04:00
def add_branch(branch_name, user:, target:)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_create_branch(branch_name, user, target)
end
end
2017-09-13 12:16:56 -04:00
def add_tag(tag_name, user:, target:, message: nil)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.add_tag(tag_name, user, target, message)
end
end
def update_branch(branch_name, user:, newrev:, oldrev:)
wrapped_gitaly_errors do
gitaly_operation_client.user_update_branch(branch_name, user, newrev, oldrev)
end
end
2017-09-13 12:16:56 -04:00
def rm_branch(branch_name, user:)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_delete_branch(branch_name, user)
end
end
2017-09-13 12:16:56 -04:00
def rm_tag(tag_name, user:)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.rm_tag(tag_name, user)
end
end
def find_tag(name)
tags.find { |tag| tag.name == name }
end
def merge_to_ref(user, source_sha, branch, target_ref, message, first_parent_ref)
wrapped_gitaly_errors do
gitaly_operation_client.user_merge_to_ref(user, source_sha, branch, target_ref, message, first_parent_ref)
end
end
2017-10-10 08:15:21 -04:00
def merge(user, source_sha, target_branch, message, &block)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_merge_branch(user, source_sha, target_branch, message, &block)
end
end
def ff_merge(user, source_sha, target_branch)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_ff_branch(user, source_sha, target_branch)
end
end
def revert(user:, commit:, branch_name:, message:, start_branch_name:, start_repository:)
2018-07-03 05:12:03 -04:00
args = {
user: user,
commit: commit,
branch_name: branch_name,
message: message,
start_branch_name: start_branch_name,
start_repository: start_repository
}
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_revert(args)
end
end
def cherry_pick(user:, commit:, branch_name:, message:, start_branch_name:, start_repository:)
2018-07-03 05:12:03 -04:00
args = {
user: user,
commit: commit,
branch_name: branch_name,
message: message,
start_branch_name: start_branch_name,
start_repository: start_repository
}
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_cherry_pick(args)
end
end
def update_submodule(user:, submodule:, commit_sha:, message:, branch:)
args = {
user: user,
submodule: submodule,
commit_sha: commit_sha,
branch: branch,
message: message
}
wrapped_gitaly_errors do
gitaly_operation_client.user_update_submodule(args)
end
end
2017-01-04 13:43:06 -05:00
# Delete the specified branch from the repository
# Note: No Git hooks are executed for this action
2017-01-04 13:43:06 -05:00
def delete_branch(branch_name)
write_ref(branch_name, Gitlab::Git::BLANK_SHA)
2018-07-12 09:12:00 -04:00
rescue CommandError => e
raise DeleteBranchError, e
2017-01-04 13:43:06 -05:00
end
def delete_refs(*ref_names)
2018-07-12 09:12:00 -04:00
wrapped_gitaly_errors do
gitaly_delete_refs(*ref_names)
end
end
2017-01-04 13:43:06 -05:00
# Create a new branch named **ref+ based on **stat_point+, HEAD by default
# Note: No Git hooks are executed for this action
2017-01-04 13:43:06 -05:00
#
# Examples:
# create_branch("feature")
# create_branch("other-feature", "master")
def create_branch(ref, start_point = "HEAD")
write_ref(ref, start_point)
2017-01-04 13:43:06 -05:00
end
# If `mirror_refmap` is present the remote is set as mirror with that mapping
def add_remote(remote_name, url, mirror_refmap: nil)
wrapped_gitaly_errors do
gitaly_remote_client.add_remote(remote_name, url, mirror_refmap)
end
2017-01-04 13:43:06 -05:00
end
def remove_remote(remote_name)
wrapped_gitaly_errors do
gitaly_remote_client.remove_remote(remote_name)
end
Rewrite the GitHub importer from scratch Prior to this MR there were two GitHub related importers: * Github::Import: the main importer used for GitHub projects * Gitlab::GithubImport: importer that's somewhat confusingly used for importing Gitea projects (apparently they have a compatible API) This MR renames the Gitea importer to Gitlab::LegacyGithubImport and introduces a new GitHub importer in the Gitlab::GithubImport namespace. This new GitHub importer uses Sidekiq for importing multiple resources in parallel, though it also has the ability to import data sequentially should this be necessary. The new code is spread across the following directories: * lib/gitlab/github_import: this directory contains most of the importer code such as the classes used for importing resources. * app/workers/gitlab/github_import: this directory contains the Sidekiq workers, most of which simply use the code from the directory above. * app/workers/concerns/gitlab/github_import: this directory provides a few modules that are included in every GitHub importer worker. == Stages The import work is divided into separate stages, with each stage importing a specific set of data. Stages will schedule the work that needs to be performed, followed by scheduling a job for the "AdvanceStageWorker" worker. This worker will periodically check if all work is completed and schedule the next stage if this is the case. If work is not yet completed this worker will reschedule itself. Using this approach we don't have to block threads by calling `sleep()`, as doing so for large projects could block the thread from doing any work for many hours. == Retrying Work Workers will reschedule themselves whenever necessary. For example, hitting the GitHub API's rate limit will result in jobs rescheduling themselves. These jobs are not processed until the rate limit has been reset. == User Lookups Part of the importing process involves looking up user details in the GitHub API so we can map them to GitLab users. The old importer used an in-memory cache, but this obviously doesn't work when the work is spread across different threads. The new importer uses a Redis cache and makes sure we only perform API/database calls if absolutely necessary. Frequently used keys are refreshed, and lookup misses are also cached; removing the need for performing API/database calls if we know we don't have the data we're looking for. == Performance & Models The new importer in various places uses raw INSERT statements (as generated by `Gitlab::Database.bulk_insert`) instead of using Rails models. This allows us to bypass any validations and callbacks, drastically reducing the number of SQL queries and Gitaly RPC calls necessary to import projects. To ensure the code produces valid data the corresponding tests check if the produced rows are valid according to the model validation rules.
2017-10-13 12:50:36 -04:00
end
def find_remote_root_ref(remote_name)
return unless remote_name.present?
wrapped_gitaly_errors do
gitaly_remote_client.find_remote_root_ref(remote_name)
end
end
2017-01-04 13:43:06 -05:00
# Returns result like "git ls-files" , recursive and full file path
#
# Ex.
# repo.ls_files('master')
#
def ls_files(ref)
gitaly_commit_client.ls_files(ref)
2017-01-04 13:43:06 -05:00
end
def copy_gitattributes(ref)
2018-07-03 11:39:08 -04:00
wrapped_gitaly_errors do
gitaly_repository_client.apply_gitattributes(ref)
2017-01-04 13:43:06 -05:00
end
end
def info_attributes
return @info_attributes if @info_attributes
content = gitaly_repository_client.info_attributes
@info_attributes = AttributesParser.new(content)
end
2017-01-04 13:43:06 -05:00
# Returns the Git attributes for the given file path.
#
# See `Gitlab::Git::Attributes` for more information.
def attributes(path)
info_attributes.attributes(path)
2017-01-04 13:43:06 -05:00
end
def gitattribute(path, name)
attributes(path)[name]
end
# Returns parsed .gitattributes for a given ref
#
# This only parses the root .gitattributes file,
# it does not traverse subfolders to find additional .gitattributes files
#
# This method is around 30 times slower than `attributes`, which uses
# `$GIT_DIR/info/attributes`. Consider caching AttributesAtRefParser
# and reusing that for multiple calls instead of this method.
def attributes_at(ref)
AttributesAtRefParser.new(self, ref)
end
2017-07-31 09:23:05 -04:00
def languages(ref = nil)
2018-06-25 05:50:41 -04:00
wrapped_gitaly_errors do
gitaly_commit_client.languages(ref)
2017-07-31 09:23:05 -04:00
end
end
def license_short_name
wrapped_gitaly_errors do
gitaly_repository_client.license_short_name
end
end
def fetch_source_branch!(source_repository, source_branch, local_ref)
wrapped_gitaly_errors do
gitaly_repository_client.fetch_source_branch(source_repository, source_branch, local_ref)
end
end
def compare_source_branch(target_branch_name, source_repository, source_branch_name, straight:)
CrossRepoComparer
.new(source_repository, self)
.compare(source_branch_name, target_branch_name, straight: straight)
end
def write_ref(ref_path, ref, old_ref: nil)
ref_path = "#{Gitlab::Git::BRANCH_REF_PREFIX}#{ref_path}" unless ref_path.start_with?("refs/") || ref_path == "HEAD"
wrapped_gitaly_errors do
gitaly_repository_client.write_ref(ref_path, ref, old_ref)
end
end
# Refactoring aid; allows us to copy code from app/models/repository.rb
def commit(ref = 'HEAD')
Gitlab::Git::Commit.find(self, ref)
end
2017-12-07 10:33:30 -05:00
def empty?
!has_visible_content?
end
# Fetch remote for repository
#
# remote - remote name
# ssh_auth - SSH known_hosts data and a private key to use for public-key authentication
# forced - should we use --force flag?
# no_tags - should we use --no-tags flag?
# prune - should we use --prune flag?
def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true)
wrapped_gitaly_errors do
gitaly_repository_client.fetch_remote(
remote,
ssh_auth: ssh_auth,
forced: forced,
no_tags: no_tags,
prune: prune,
timeout: GITLAB_PROJECTS_TIMEOUT
)
end
end
def import_repository(url)
raise ArgumentError, "don't use disk paths with import_repository: #{url.inspect}" if url.start_with?('.', '/')
wrapped_gitaly_errors do
gitaly_repository_client.import_repository(url)
end
end
def blob_at(sha, path)
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
end
# Items should be of format [[commit_id, path], [commit_id1, path1]]
def batch_blobs(items, blob_size_limit: Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE)
Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit)
end
def fsck
2018-03-08 10:27:31 -05:00
msg, status = gitaly_repository_client.fsck
2017-12-07 19:27:11 -05:00
2018-03-08 10:27:31 -05:00
raise GitError.new("Could not fsck repository: #{msg}") unless status.zero?
end
def create_from_bundle(bundle_path)
# It's important to check that the linked-to file is actually a valid
# .bundle file as it is passed to `git clone`, which may otherwise
# interpret it as a pointer to another repository
::Gitlab::Git::BundleFile.check!(bundle_path)
gitaly_repository_client.create_from_bundle(bundle_path)
end
def create_from_snapshot(url, auth)
gitaly_repository_client.create_from_snapshot(url, auth)
end
def rebase(user, rebase_id, branch:, branch_sha:, remote_repository:, remote_branch:, push_options: [], &block)
wrapped_gitaly_errors do
gitaly_operation_client.rebase(
user,
rebase_id,
branch: branch,
branch_sha: branch_sha,
remote_repository: remote_repository,
remote_branch: remote_branch,
push_options: push_options,
&block
)
end
end
def rebase_in_progress?(rebase_id)
wrapped_gitaly_errors do
gitaly_repository_client.rebase_in_progress?(rebase_id)
end
end
def squash(user, squash_id, start_sha:, end_sha:, author:, message:)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_squash(user, squash_id, start_sha, end_sha, author, message)
end
end
def squash_in_progress?(squash_id)
wrapped_gitaly_errors do
gitaly_repository_client.squash_in_progress?(squash_id)
end
end
def bundle_to_disk(save_path)
wrapped_gitaly_errors do
gitaly_repository_client.create_bundle(save_path)
end
true
end
# rubocop:disable Metrics/ParameterLists
def multi_action(
user, branch_name:, message:, actions:,
author_email: nil, author_name: nil,
start_branch_name: nil, start_sha: nil, start_repository: self,
force: false)
2018-07-03 05:12:03 -04:00
wrapped_gitaly_errors do
gitaly_operation_client.user_commit_files(user, branch_name,
message, actions, author_email, author_name,
start_branch_name, start_repository, force, start_sha)
end
end
# rubocop:enable Metrics/ParameterLists
2018-01-19 12:08:07 -05:00
def write_config(full_path:)
return unless full_path.present?
2018-06-05 08:55:41 -04:00
# This guard avoids Gitaly log/error spam
raise NoRepository, 'repository does not exist' unless exists?
2018-06-05 08:55:41 -04:00
set_config('gitlab.fullpath' => full_path)
end
def set_config(entries)
wrapped_gitaly_errors do
gitaly_repository_client.set_config(entries)
end
end
def delete_config(*keys)
wrapped_gitaly_errors do
gitaly_repository_client.delete_config(keys)
end
end
def disconnect_alternates
wrapped_gitaly_errors do
gitaly_repository_client.disconnect_alternates
end
end
2017-04-06 10:54:15 -04:00
def gitaly_repository
Gitlab::GitalyClient::Util.repository(@storage, @relative_path, @gl_repository, @gl_project_path)
2017-04-06 10:54:15 -04:00
end
def gitaly_ref_client
@gitaly_ref_client ||= Gitlab::GitalyClient::RefService.new(self)
end
def gitaly_commit_client
@gitaly_commit_client ||= Gitlab::GitalyClient::CommitService.new(self)
end
def gitaly_repository_client
@gitaly_repository_client ||= Gitlab::GitalyClient::RepositoryService.new(self)
end
def gitaly_operation_client
@gitaly_operation_client ||= Gitlab::GitalyClient::OperationService.new(self)
end
def gitaly_remote_client
@gitaly_remote_client ||= Gitlab::GitalyClient::RemoteService.new(self)
end
def gitaly_blob_client
@gitaly_blob_client ||= Gitlab::GitalyClient::BlobService.new(self)
end
def gitaly_conflicts_client(our_commit_oid, their_commit_oid)
Gitlab::GitalyClient::ConflictsService.new(self, our_commit_oid, their_commit_oid)
end
def praefect_info_client
@praefect_info_client ||= Gitlab::GitalyClient::PraefectInfoService.new(self)
end
def clean_stale_repository_files
2018-07-24 07:25:27 -04:00
wrapped_gitaly_errors do
gitaly_repository_client.cleanup if exists?
end
rescue Gitlab::Git::CommandError => e # Don't fail if we can't cleanup
Rails.logger.error("Unable to clean repository on storage #{storage} with relative path #{relative_path}: #{e.message}") # rubocop:disable Gitlab/RailsLogger
Gitlab::Metrics.counter(
:failed_repository_cleanup_total,
'Number of failed repository cleanup events'
).increment
end
def branch_names_contains_sha(sha)
gitaly_ref_client.branch_names_contains_sha(sha)
end
def tag_names_contains_sha(sha)
gitaly_ref_client.tag_names_contains_sha(sha)
end
def search_files_by_content(query, ref, options = {})
return [] if empty? || query.blank?
safe_query = Regexp.escape(query)
ref ||= root_ref
gitaly_repository_client.search_files_by_content(ref, safe_query, options)
end
def can_be_merged?(source_sha, target_branch)
2018-07-12 12:06:31 -04:00
if target_sha = find_branch(target_branch)&.target
!gitaly_conflicts_client(source_sha, target_sha).conflicts?
else
false
end
end
def search_files_by_name(query, ref)
2018-01-27 00:35:53 -05:00
safe_query = Regexp.escape(query.sub(%r{^/*}, ""))
ref ||= root_ref
return [] if empty? || safe_query.blank?
gitaly_repository_client.search_files_by_name(ref, safe_query)
end
def find_commits_by_message(query, ref, path, limit, offset)
2018-07-09 06:02:02 -04:00
wrapped_gitaly_errors do
gitaly_commit_client
.commits_by_message(query, revision: ref, path: path, limit: limit, offset: offset)
.map { |c| commit(c) }
end
end
def list_last_commits_for_tree(sha, path, offset: 0, limit: 25, literal_pathspec: false)
wrapped_gitaly_errors do
gitaly_commit_client.list_last_commits_for_tree(sha, path, offset: offset, limit: limit, literal_pathspec: literal_pathspec)
end
end
def list_commits_by_ref_name(refs)
wrapped_gitaly_errors do
gitaly_commit_client.list_commits_by_ref_name(refs)
end
end
def last_commit_for_path(sha, path, literal_pathspec: false)
2018-07-09 06:02:02 -04:00
wrapped_gitaly_errors do
gitaly_commit_client.last_commit_for_path(sha, path, literal_pathspec: literal_pathspec)
end
end
def checksum
# The exists? RPC is much cheaper, so we perform this request first
raise NoRepository, "Repository does not exists" unless exists?
gitaly_repository_client.calculate_checksum
rescue GRPC::NotFound
raise NoRepository # Guard against data races.
end
def replicas
wrapped_gitaly_errors do
praefect_info_client.replicas
end
end
2017-01-04 13:43:06 -05:00
private
def empty_diff_stats
Gitlab::Git::DiffStatsCollection.new([])
end
def uncached_has_local_branches?
wrapped_gitaly_errors do
gitaly_repository_client.has_local_branches?
end
end
def gitaly_merged_branch_names(branch_names, root_sha)
qualified_branch_names = branch_names.map { |b| "refs/heads/#{b}" }
gitaly_ref_client.merged_branches(qualified_branch_names)
.reject { |b| b.target == root_sha }
.map(&:name)
end
def process_count_commits_options(options)
if options[:from] || options[:to]
ref =
if options[:left_right] # Compare with merge-base for left-right
"#{options[:from]}...#{options[:to]}"
else
"#{options[:from]}..#{options[:to]}"
end
options.merge(ref: ref)
elsif options[:ref] && options[:left_right]
from, to = options[:ref].match(/\A([^\.]*)\.{2,3}([^\.]*)\z/)[1..2]
options.merge(from: from, to: to)
else
options
end
end
2017-07-04 09:14:13 -04:00
def gitaly_submodule_url_for(ref, path)
# We don't care about the contents so 1 byte is enough. Can't request 0 bytes, 0 means unlimited.
commit_object = gitaly_commit_client.tree_entry(ref, path, 1)
return unless commit_object && commit_object.type == :COMMIT
urls = gitaly_submodule_urls_for(ref)
urls && urls[path]
end
def gitaly_submodule_urls_for(ref)
2017-07-11 17:06:38 -04:00
gitmodules = gitaly_commit_client.tree_entry(ref, '.gitmodules', Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE)
return unless gitmodules
submodules = GitmodulesParser.new(gitmodules.data).parse
submodules.transform_values { |submodule| submodule['url'] }
2017-07-04 09:14:13 -04:00
end
# Returns true if the given ref name exists
#
# Ref names must start with `refs/`.
def gitaly_ref_exists?(ref_name)
gitaly_ref_client.ref_exists?(ref_name)
end
def gitaly_copy_gitattributes(revision)
gitaly_repository_client.apply_gitattributes(revision)
end
def gitaly_delete_refs(*ref_names)
gitaly_ref_client.delete_refs(refs: ref_names) if ref_names.any?
end
2017-01-04 13:43:06 -05:00
end
end
end