gitlab-org--gitlab-foss/lib/gitlab/git/repository.rb
Rémy Coutable d415419532 Merge branch '28805-download-archive-with-branch-like-feature-xxxx-add-extra-directory-level' into 'master'
Ensure archive download is only one directory deep

Closes #28805

See merge request !9616
2017-03-02 08:37:07 +00:00

1202 lines
37 KiB
Ruby

# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object
require 'tempfile'
require 'forwardable'
require "rubygems/package"
module Gitlab
module Git
class Repository
include Gitlab::Git::Popen
SEARCH_CONTEXT_LINES = 3
NoRepository = Class.new(StandardError)
InvalidBlobName = Class.new(StandardError)
InvalidRef = Class.new(StandardError)
# Full path to repo
attr_reader :path
# Directory name of repo
attr_reader :name
# Rugged repo object
attr_reader :rugged
# 'path' must be the path to a _bare_ git repository, e.g.
# /path/to/my-repo.git
def initialize(path)
@path = path
@name = path.split("/").last
@attributes = Gitlab::Git::Attributes.new(path)
end
delegate :empty?,
:bare?,
to: :rugged
# Default branch in the repository
def root_ref
@root_ref ||= discover_default_branch
end
# Alias to old method for compatibility
def raw
rugged
end
def rugged
@rugged ||= Rugged::Repository.new(path)
rescue Rugged::RepositoryError, Rugged::OSError
raise NoRepository.new('no repository for such path')
end
# Returns an Array of branch names
# sorted by name ASC
def branch_names
branches.map(&:name)
end
# Returns an Array of Branches
def branches
rugged.branches.map do |rugged_ref|
begin
Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target)
rescue Rugged::ReferenceError
# Omit invalid branch
end
end.compact.sort_by(&:name)
end
def reload_rugged
@rugged = nil
end
# Directly find a branch with a simple name (e.g. master)
#
# force_reload causes a new Rugged repository to be instantiated
#
# This is to work around a bug in libgit2 that causes in-memory refs to
# be stale/invalid when packed-refs is changed.
# See https://gitlab.com/gitlab-org/gitlab-ce/issues/15392#note_14538333
def find_branch(name, force_reload = false)
reload_rugged if force_reload
rugged_ref = rugged.branches[name]
Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target) if rugged_ref
end
def local_branches
rugged.branches.each(:local).map do |branch|
Gitlab::Git::Branch.new(self, branch.name, branch.target)
end
end
# Returns the number of valid branches
def branch_count
rugged.branches.count do |ref|
begin
ref.name && ref.target # ensures the branch is valid
true
rescue Rugged::ReferenceError
false
end
end
end
# Returns an Array of tag names
def tag_names
rugged.tags.map { |t| t.name }
end
# Returns an Array of Tags
def tags
rugged.references.each("refs/tags/*").map do |ref|
message = nil
if ref.target.is_a?(Rugged::Tag::Annotation)
tag_message = ref.target.message
if tag_message.respond_to?(:chomp)
message = tag_message.chomp
end
end
Gitlab::Git::Tag.new(self, ref.name, ref.target, message)
end.sort_by(&:name)
end
# Returns true if the given tag exists
#
# name - The name of the tag as a String.
def tag_exists?(name)
!!rugged.tags[name]
end
# Returns true if the given branch exists
#
# name - The name of the branch as a String.
def branch_exists?(name)
rugged.branches.exists?(name)
# If the branch name is invalid (e.g. ".foo") Rugged will raise an error.
# Whatever code calls this method shouldn't have to deal with that so
# instead we just return `false` (which is true since a branch doesn't
# exist when it has an invalid name).
rescue Rugged::ReferenceError
false
end
# Returns an Array of branch and tag names
def ref_names
branch_names + tag_names
end
# Deprecated. Will be removed in 5.2
def heads
rugged.references.each("refs/heads/*").map do |head|
Gitlab::Git::Ref.new(self, head.name, head.target)
end.sort_by(&:name)
end
def has_commits?
!empty?
end
def repo_exists?
!!rugged
end
# Discovers the default branch based on the repository's available branches
#
# - If no branches are present, returns nil
# - If one branch is present, returns its name
# - If two or more branches are present, returns current HEAD or master or first branch
def discover_default_branch
names = branch_names
return if names.empty?
return names[0] if names.length == 1
if rugged_head
extracted_name = Ref.extract_branch_name(rugged_head.name)
return extracted_name if names.include?(extracted_name)
end
if names.include?('master')
'master'
else
names[0]
end
end
def rugged_head
rugged.head
rescue Rugged::ReferenceError
nil
end
def archive_prefix(ref, sha)
project_name = self.name.chomp('.git')
"#{project_name}-#{ref.parameterize}-#{sha}"
end
def archive_metadata(ref, storage_path, format = "tar.gz")
ref ||= root_ref
commit = Gitlab::Git::Commit.find(self, ref)
return {} if commit.nil?
prefix = archive_prefix(ref, commit.id)
{
'RepoPath' => path,
'ArchivePrefix' => prefix,
'ArchivePath' => archive_file_path(prefix, storage_path, format),
'CommitId' => commit.id,
}
end
def archive_file_path(name, storage_path, format = "tar.gz")
# Build file path
return nil unless name
extension =
case format
when "tar.bz2", "tbz", "tbz2", "tb2", "bz2"
"tar.bz2"
when "tar"
"tar"
when "zip"
"zip"
else
# everything else should fall back to tar.gz
"tar.gz"
end
file_name = "#{name}.#{extension}"
File.join(storage_path, self.name, file_name)
end
# Return repo size in megabytes
def size
size = popen(%w(du -sk), path).first.strip.to_i
(size.to_f / 1024).round(2)
end
# Returns an array of BlobSnippets for files at the specified +ref+ that
# contain the +query+ string.
def search_files(query, ref = nil)
greps = []
ref ||= root_ref
populated_index(ref).each do |entry|
# Discard submodules
next if submodule?(entry)
blob = Gitlab::Git::Blob.raw(self, entry[:oid])
# Skip binary files
next if blob.data.encoding == Encoding::ASCII_8BIT
blob.load_all_data!(self)
greps += build_greps(blob.data, query, ref, entry[:path])
end
greps
end
# Use the Rugged Walker API to build an array of commits.
#
# Usage.
# repo.log(
# ref: 'master',
# path: 'app/models',
# limit: 10,
# offset: 5,
# after: Time.new(2016, 4, 21, 14, 32, 10)
# )
#
def log(options)
default_options = {
limit: 10,
offset: 0,
path: nil,
follow: false,
skip_merges: false,
disable_walk: false,
after: nil,
before: nil
}
options = default_options.merge(options)
options[:limit] ||= 0
options[:offset] ||= 0
actual_ref = options[:ref] || root_ref
begin
sha = sha_from_ref(actual_ref)
rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
# Return an empty array if the ref wasn't found
return []
end
if log_using_shell?(options)
log_by_shell(sha, options)
else
log_by_walk(sha, options)
end
end
def log_using_shell?(options)
options[:path].present? ||
options[:disable_walk] ||
options[:skip_merges] ||
options[:after] ||
options[:before]
end
def log_by_walk(sha, options)
walk_options = {
show: sha,
sort: Rugged::SORT_DATE,
limit: options[:limit],
offset: options[:offset]
}
Rugged::Walker.walk(rugged, walk_options).to_a
end
def log_by_shell(sha, options)
limit = options[:limit].to_i
offset = options[:offset].to_i
use_follow_flag = options[:follow] && options[:path].present?
# We will perform the offset in Ruby because --follow doesn't play well with --skip.
# See: https://gitlab.com/gitlab-org/gitlab-ce/issues/3574#note_3040520
offset_in_ruby = use_follow_flag && options[:offset].present?
limit += offset if offset_in_ruby
cmd = %W[#{Gitlab.config.git.bin_path} --git-dir=#{path} log]
cmd << "--max-count=#{limit}"
cmd << '--format=%H'
cmd << "--skip=#{offset}" unless offset_in_ruby
cmd << '--follow' if use_follow_flag
cmd << '--no-merges' if options[:skip_merges]
cmd << "--after=#{options[:after].iso8601}" if options[:after]
cmd << "--before=#{options[:before].iso8601}" if options[:before]
cmd << sha
cmd += %W[-- #{options[:path]}] if options[:path].present?
raw_output = IO.popen(cmd) { |io| io.read }
lines = offset_in_ruby ? raw_output.lines.drop(offset) : raw_output.lines
lines.map! { |c| Rugged::Commit.new(rugged, c.strip) }
end
def sha_from_ref(ref)
rev_parse_target(ref).oid
end
# Return the object that +revspec+ points to. If +revspec+ is an
# annotated tag, then return the tag's target instead.
def rev_parse_target(revspec)
obj = rugged.rev_parse(revspec)
Ref.dereference_object(obj)
end
# Return a collection of Rugged::Commits between the two revspec arguments.
# See http://git-scm.com/docs/git-rev-parse.html#_specifying_revisions for
# a detailed list of valid arguments.
def commits_between(from, to)
walker = Rugged::Walker.new(rugged)
walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
sha_from = sha_from_ref(from)
sha_to = sha_from_ref(to)
walker.push(sha_to)
walker.hide(sha_from)
commits = walker.to_a
walker.reset
commits
end
# Counts the amount of commits between `from` and `to`.
def count_commits_between(from, to)
commits_between(from, to).size
end
# Returns the SHA of the most recent common ancestor of +from+ and +to+
def merge_base_commit(from, to)
rugged.merge_base(from, to)
end
# Return an array of Diff objects that represent the diff
# between +from+ and +to+. See Diff::filter_diff_options for the allowed
# diff options. The +options+ hash can also include :break_rewrites to
# split larger rewrites into delete/add pairs.
def diff(from, to, options = {}, *paths)
Gitlab::Git::DiffCollection.new(diff_patches(from, to, options, *paths), options)
end
# Returns commits collection
#
# Ex.
# repo.find_commits(
# ref: 'master',
# max_count: 10,
# skip: 5,
# order: :date
# )
#
# +options+ is a Hash of optional arguments to git
# :ref is the ref from which to begin (SHA1 or name)
# :contains is the commit contained by the refs from which to begin (SHA1 or name)
# :max_count is the maximum number of commits to fetch
# :skip is the number of commits to skip
# :order is the commits order and allowed value is :date(default) or :topo
#
def find_commits(options = {})
actual_options = options.dup
allowed_options = [:ref, :max_count, :skip, :contains, :order]
actual_options.keep_if do |key|
allowed_options.include?(key)
end
default_options = { skip: 0 }
actual_options = default_options.merge(actual_options)
walker = Rugged::Walker.new(rugged)
if actual_options[:ref]
walker.push(rugged.rev_parse_oid(actual_options[:ref]))
elsif actual_options[:contains]
branches_contains(actual_options[:contains]).each do |branch|
walker.push(branch.target_id)
end
else
rugged.references.each("refs/heads/*") do |ref|
walker.push(ref.target_id)
end
end
if actual_options[:order] == :topo
walker.sorting(Rugged::SORT_TOPO)
else
walker.sorting(Rugged::SORT_DATE)
end
commits = []
offset = actual_options[:skip]
limit = actual_options[:max_count]
walker.each(offset: offset, limit: limit) do |commit|
gitlab_commit = Gitlab::Git::Commit.decorate(commit)
commits.push(gitlab_commit)
end
walker.reset
commits
rescue Rugged::OdbError
[]
end
# Returns branch names collection that contains the special commit(SHA1
# or name)
#
# Ex.
# repo.branch_names_contains('master')
#
def branch_names_contains(commit)
branches_contains(commit).map { |c| c.name }
end
# Returns branch collection that contains the special commit(SHA1 or name)
#
# Ex.
# repo.branch_names_contains('master')
#
def branches_contains(commit)
commit_obj = rugged.rev_parse(commit)
parent = commit_obj.parents.first unless commit_obj.parents.empty?
walker = Rugged::Walker.new(rugged)
rugged.branches.select do |branch|
walker.push(branch.target_id)
walker.hide(parent) if parent
result = walker.any? { |c| c.oid == commit_obj.oid }
walker.reset
result
end
end
# Get refs hash which key is SHA1
# and value is a Rugged::Reference
def refs_hash
# Initialize only when first call
if @refs_hash.nil?
@refs_hash = Hash.new { |h, k| h[k] = [] }
rugged.references.each do |r|
# Symbolic/remote references may not have an OID; skip over them
target_oid = r.target.try(:oid)
if target_oid
sha = rev_parse_target(target_oid).oid
@refs_hash[sha] << r
end
end
end
@refs_hash
end
# Lookup for rugged object by oid or ref name
def lookup(oid_or_ref_name)
rugged.rev_parse(oid_or_ref_name)
end
# Return hash with submodules info for this repository
#
# Ex.
# {
# "rack" => {
# "id" => "c67be4624545b4263184c4a0e8f887efd0a66320",
# "path" => "rack",
# "url" => "git://github.com/chneukirchen/rack.git"
# },
# "encoding" => {
# "id" => ....
# }
# }
#
def submodules(ref)
commit = rev_parse_target(ref)
return {} unless commit
begin
content = blob_content(commit, ".gitmodules")
rescue InvalidBlobName
return {}
end
parse_gitmodules(commit, content)
end
# Return total commits count accessible from passed ref
def commit_count(ref)
walker = Rugged::Walker.new(rugged)
walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
oid = rugged.rev_parse_oid(ref)
walker.push(oid)
walker.count
end
# Sets HEAD to the commit specified by +ref+; +ref+ can be a branch or
# tag name or a commit SHA. Valid +reset_type+ values are:
#
# [:soft]
# the head will be moved to the commit.
# [:mixed]
# will trigger a +:soft+ reset, plus the index will be replaced
# with the content of the commit tree.
# [:hard]
# will trigger a +:mixed+ reset and the working directory will be
# replaced with the content of the index. (Untracked and ignored files
# will be left alone)
delegate :reset, to: :rugged
# Mimic the `git clean` command and recursively delete untracked files.
# Valid keys that can be passed in the +options+ hash are:
#
# :d - Remove untracked directories
# :f - Remove untracked directories that are managed by a different
# repository
# :x - Remove ignored files
#
# The value in +options+ must evaluate to true for an option to take
# effect.
#
# Examples:
#
# repo.clean(d: true, f: true) # Enable the -d and -f options
#
# repo.clean(d: false, x: true) # -x is enabled, -d is not
def clean(options = {})
strategies = [:remove_untracked]
strategies.push(:force) if options[:f]
strategies.push(:remove_ignored) if options[:x]
# TODO: implement this method
end
# Check out the specified ref. Valid options are:
#
# :b - Create a new branch at +start_point+ and set HEAD to the new
# branch.
#
# * These options are passed to the Rugged::Repository#checkout method:
#
# :progress ::
# A callback that will be executed for checkout progress notifications.
# Up to 3 parameters are passed on each execution:
#
# - The path to the last updated file (or +nil+ on the very first
# invocation).
# - The number of completed checkout steps.
# - The number of total checkout steps to be performed.
#
# :notify ::
# A callback that will be executed for each checkout notification
# types specified with +:notify_flags+. Up to 5 parameters are passed
# on each execution:
#
# - An array containing the +:notify_flags+ that caused the callback
# execution.
# - The path of the current file.
# - A hash describing the baseline blob (or +nil+ if it does not
# exist).
# - A hash describing the target blob (or +nil+ if it does not exist).
# - A hash describing the workdir blob (or +nil+ if it does not
# exist).
#
# :strategy ::
# A single symbol or an array of symbols representing the strategies
# to use when performing the checkout. Possible values are:
#
# :none ::
# Perform a dry run (default).
#
# :safe ::
# Allow safe updates that cannot overwrite uncommitted data.
#
# :safe_create ::
# Allow safe updates plus creation of missing files.
#
# :force ::
# Allow all updates to force working directory to look like index.
#
# :allow_conflicts ::
# Allow checkout to make safe updates even if conflicts are found.
#
# :remove_untracked ::
# Remove untracked files not in index (that are not ignored).
#
# :remove_ignored ::
# Remove ignored files not in index.
#
# :update_only ::
# Only update existing files, don't create new ones.
#
# :dont_update_index ::
# Normally checkout updates index entries as it goes; this stops
# that.
#
# :no_refresh ::
# Don't refresh index/config/etc before doing checkout.
#
# :disable_pathspec_match ::
# Treat pathspec as simple list of exact match file paths.
#
# :skip_locked_directories ::
# Ignore directories in use, they will be left empty.
#
# :skip_unmerged ::
# Allow checkout to skip unmerged files (NOT IMPLEMENTED).
#
# :use_ours ::
# For unmerged files, checkout stage 2 from index (NOT IMPLEMENTED).
#
# :use_theirs ::
# For unmerged files, checkout stage 3 from index (NOT IMPLEMENTED).
#
# :update_submodules ::
# Recursively checkout submodules with same options (NOT
# IMPLEMENTED).
#
# :update_submodules_if_changed ::
# Recursively checkout submodules if HEAD moved in super repo (NOT
# IMPLEMENTED).
#
# :disable_filters ::
# If +true+, filters like CRLF line conversion will be disabled.
#
# :dir_mode ::
# Mode for newly created directories. Default: +0755+.
#
# :file_mode ::
# Mode for newly created files. Default: +0755+ or +0644+.
#
# :file_open_flags ::
# Mode for opening files. Default:
# <code>IO::CREAT | IO::TRUNC | IO::WRONLY</code>.
#
# :notify_flags ::
# A single symbol or an array of symbols representing the cases in
# which the +:notify+ callback should be invoked. Possible values are:
#
# :none ::
# Do not invoke the +:notify+ callback (default).
#
# :conflict ::
# Invoke the callback for conflicting paths.
#
# :dirty ::
# Invoke the callback for "dirty" files, i.e. those that do not need
# an update but no longer match the baseline.
#
# :updated ::
# Invoke the callback for any file that was changed.
#
# :untracked ::
# Invoke the callback for untracked files.
#
# :ignored ::
# Invoke the callback for ignored files.
#
# :all ::
# Invoke the callback for all these cases.
#
# :paths ::
# A glob string or an array of glob strings specifying which paths
# should be taken into account for the checkout operation. +nil+ will
# match all files. Default: +nil+.
#
# :baseline ::
# A Rugged::Tree that represents the current, expected contents of the
# workdir. Default: +HEAD+.
#
# :target_directory ::
# A path to an alternative workdir directory in which the checkout
# should be performed.
def checkout(ref, options = {}, start_point = "HEAD")
if options[:b]
rugged.branches.create(ref, start_point)
options.delete(:b)
end
default_options = { strategy: [:recreate_missing, :safe] }
rugged.checkout(ref, default_options.merge(options))
end
# Delete the specified branch from the repository
def delete_branch(branch_name)
rugged.branches.delete(branch_name)
end
# Create a new branch named **ref+ based on **stat_point+, HEAD by default
#
# Examples:
# create_branch("feature")
# create_branch("other-feature", "master")
def create_branch(ref, start_point = "HEAD")
rugged_ref = rugged.branches.create(ref, start_point)
Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target)
rescue Rugged::ReferenceError => e
raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/
raise InvalidRef.new("Invalid reference #{start_point}")
end
# Return an array of this repository's remote names
def remote_names
rugged.remotes.each_name.to_a
end
# Delete the specified remote from this repository.
def remote_delete(remote_name)
rugged.remotes.delete(remote_name)
end
# Add a new remote to this repository. Returns a Rugged::Remote object
def remote_add(remote_name, url)
rugged.remotes.create(remote_name, url)
end
# Update the specified remote using the values in the +options+ hash
#
# Example
# repo.update_remote("origin", url: "path/to/repo")
def remote_update(remote_name, options = {})
# TODO: Implement other remote options
rugged.remotes.set_url(remote_name, options[:url]) if options[:url]
end
# Fetch the specified remote
def fetch(remote_name)
rugged.remotes[remote_name].fetch
end
# Push +*refspecs+ to the remote identified by +remote_name+.
def push(remote_name, *refspecs)
rugged.remotes[remote_name].push(refspecs)
end
# Merge the +source_name+ branch into the +target_name+ branch. This is
# equivalent to `git merge --no_ff +source_name+`, since a merge commit
# is always created.
def merge(source_name, target_name, options = {})
our_commit = rugged.branches[target_name].target
their_commit = rugged.branches[source_name].target
raise "Invalid merge target" if our_commit.nil?
raise "Invalid merge source" if their_commit.nil?
merge_index = rugged.merge_commits(our_commit, their_commit)
return false if merge_index.conflicts?
actual_options = options.merge(
parents: [our_commit, their_commit],
tree: merge_index.write_tree(rugged),
update_ref: "refs/heads/#{target_name}"
)
Rugged::Commit.create(rugged, actual_options)
end
def commits_since(from_date)
walker = Rugged::Walker.new(rugged)
walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
rugged.references.each("refs/heads/*") do |ref|
walker.push(ref.target_id)
end
commits = []
walker.each do |commit|
break if commit.author[:time].to_date < from_date
commits.push(commit)
end
commits
end
AUTOCRLF_VALUES = {
"true" => true,
"false" => false,
"input" => :input
}.freeze
def autocrlf
AUTOCRLF_VALUES[rugged.config['core.autocrlf']]
end
def autocrlf=(value)
rugged.config['core.autocrlf'] = AUTOCRLF_VALUES.invert[value]
end
# Returns result like "git ls-files" , recursive and full file path
#
# Ex.
# repo.ls_files('master')
#
def ls_files(ref)
actual_ref = ref || root_ref
begin
sha_from_ref(actual_ref)
rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
# Return an empty array if the ref wasn't found
return []
end
cmd = %W(#{Gitlab.config.git.bin_path} --git-dir=#{path} ls-tree)
cmd += %w(-r)
cmd += %w(--full-tree)
cmd += %w(--full-name)
cmd += %W(-- #{actual_ref})
raw_output = IO.popen(cmd, &:read).split("\n").map do |f|
stuff, path = f.split("\t")
_mode, type, _sha = stuff.split(" ")
path if type == "blob"
# Contain only blob type
end
raw_output.compact
end
def copy_gitattributes(ref)
begin
commit = lookup(ref)
rescue Rugged::ReferenceError
raise InvalidRef.new("Ref #{ref} is invalid")
end
# Create the paths
info_dir_path = File.join(path, 'info')
info_attributes_path = File.join(info_dir_path, 'attributes')
begin
# Retrieve the contents of the blob
gitattributes_content = blob_content(commit, '.gitattributes')
rescue InvalidBlobName
# No .gitattributes found. Should now remove any info/attributes and return
File.delete(info_attributes_path) if File.exist?(info_attributes_path)
return
end
# Create the info directory if needed
Dir.mkdir(info_dir_path) unless File.directory?(info_dir_path)
# Write the contents of the .gitattributes file to info/attributes
# Use binary mode to prevent Rails from converting ASCII-8BIT to UTF-8
File.open(info_attributes_path, "wb") do |file|
file.write(gitattributes_content)
end
end
# Checks if the blob should be diffable according to its attributes
def diffable?(blob)
attributes(blob.path).fetch('diff') { blob.text? }
end
# Returns the Git attributes for the given file path.
#
# See `Gitlab::Git::Attributes` for more information.
def attributes(path)
@attributes.attributes(path)
end
private
# Get the content of a blob for a given commit. If the blob is a commit
# (for submodules) then return the blob's OID.
def blob_content(commit, blob_name)
blob_entry = tree_entry(commit, blob_name)
unless blob_entry
raise InvalidBlobName.new("Invalid blob name: #{blob_name}")
end
case blob_entry[:type]
when :commit
blob_entry[:oid]
when :tree
raise InvalidBlobName.new("#{blob_name} is a tree, not a blob")
when :blob
rugged.lookup(blob_entry[:oid]).content
end
end
# Parses the contents of a .gitmodules file and returns a hash of
# submodule information.
def parse_gitmodules(commit, content)
results = {}
current = ""
content.split("\n").each do |txt|
if txt =~ /^\s*\[/
current = txt.match(/(?<=").*(?=")/)[0]
results[current] = {}
else
next unless results[current]
match_data = txt.match(/(\w+)\s*=\s*(.*)/)
next unless match_data
target = match_data[2].chomp
results[current][match_data[1]] = target
if match_data[1] == "path"
begin
results[current]["id"] = blob_content(commit, target)
rescue InvalidBlobName
results.delete(current)
end
end
end
end
results
end
# Returns true if +commit+ introduced changes to +path+, using commit
# trees to make that determination. Uses the history simplification
# rules that `git log` uses by default, where a commit is omitted if it
# is TREESAME to any parent.
#
# If the +follow+ option is true and the file specified by +path+ was
# renamed, then the path value is set to the old path.
def commit_touches_path?(commit, path, follow, walker)
entry = tree_entry(commit, path)
if commit.parents.empty?
# This is the root commit, return true if it has +path+ in its tree
return !entry.nil?
end
num_treesame = 0
commit.parents.each do |parent|
parent_entry = tree_entry(parent, path)
# Only follow the first TREESAME parent for merge commits
if num_treesame > 0
walker.hide(parent)
next
end
if entry.nil? && parent_entry.nil?
num_treesame += 1
elsif entry && parent_entry && entry[:oid] == parent_entry[:oid]
num_treesame += 1
end
end
case num_treesame
when 0
detect_rename(commit, commit.parents.first, path) if follow
true
else false
end
end
# Find the entry for +path+ in the tree for +commit+
def tree_entry(commit, path)
pathname = Pathname.new(path)
first = true
tmp_entry = nil
pathname.each_filename do |dir|
if first
tmp_entry = commit.tree[dir]
first = false
elsif tmp_entry.nil?
return nil
else
tmp_entry = rugged.lookup(tmp_entry[:oid])
return nil unless tmp_entry.type == :tree
tmp_entry = tmp_entry[dir]
end
end
tmp_entry
end
# Compare +commit+ and +parent+ for +path+. If +path+ is a file and was
# renamed in +commit+, then set +path+ to the old filename.
def detect_rename(commit, parent, path)
diff = parent.diff(commit, paths: [path], disable_pathspec_match: true)
# If +path+ is a filename, not a directory, then we should only have
# one delta. We don't need to follow renames for directories.
return nil if diff.each_delta.count > 1
delta = diff.each_delta.first
if delta.added?
full_diff = parent.diff(commit)
full_diff.find_similar!
full_diff.each_delta do |full_delta|
if full_delta.renamed? && path == full_delta.new_file[:path]
# Look for the old path in ancestors
path.replace(full_delta.old_file[:path])
end
end
end
end
def archive_to_file(treeish = 'master', filename = 'archive.tar.gz', format = nil, compress_cmd = %w(gzip -n))
git_archive_cmd = %W(#{Gitlab.config.git.bin_path} --git-dir=#{path} archive)
# Put files into a directory before archiving
prefix = "#{archive_name(treeish)}/"
git_archive_cmd << "--prefix=#{prefix}"
# Format defaults to tar
git_archive_cmd << "--format=#{format}" if format
git_archive_cmd += %W(-- #{treeish})
open(filename, 'w') do |file|
# Create a pipe to act as the '|' in 'git archive ... | gzip'
pipe_rd, pipe_wr = IO.pipe
# Get the compression process ready to accept data from the read end
# of the pipe
compress_pid = spawn(*nice(compress_cmd), in: pipe_rd, out: file)
# The read end belongs to the compression process now; we should
# close our file descriptor for it.
pipe_rd.close
# Start 'git archive' and tell it to write into the write end of the
# pipe.
git_archive_pid = spawn(*nice(git_archive_cmd), out: pipe_wr)
# The write end belongs to 'git archive' now; close it.
pipe_wr.close
# When 'git archive' and the compression process are finished, we are
# done.
Process.waitpid(git_archive_pid)
raise "#{git_archive_cmd.join(' ')} failed" unless $?.success?
Process.waitpid(compress_pid)
raise "#{compress_cmd.join(' ')} failed" unless $?.success?
end
end
def nice(cmd)
nice_cmd = %w(nice -n 20)
unless unsupported_platform?
nice_cmd += %w(ionice -c 2 -n 7)
end
nice_cmd + cmd
end
def unsupported_platform?
%w[darwin freebsd solaris].map { |platform| RUBY_PLATFORM.include?(platform) }.any?
end
# Returns true if the index entry has the special file mode that denotes
# a submodule.
def submodule?(index_entry)
index_entry[:mode] == 57344
end
# Return a Rugged::Index that has read from the tree at +ref_name+
def populated_index(ref_name)
commit = rev_parse_target(ref_name)
index = rugged.index
index.read_tree(commit.tree)
index
end
# Return an array of BlobSnippets for lines in +file_contents+ that match
# +query+
def build_greps(file_contents, query, ref, filename)
# The file_contents string is potentially huge so we make sure to loop
# through it one line at a time. This gives Ruby the chance to GC lines
# we are not interested in.
#
# We need to do a little extra work because we are not looking for just
# the lines that matches the query, but also for the context
# (surrounding lines). We will use Enumerable#each_cons to efficiently
# loop through the lines while keeping surrounding lines on hand.
#
# First, we turn "foo\nbar\nbaz" into
# [
# [nil, -3], [nil, -2], [nil, -1],
# ['foo', 0], ['bar', 1], ['baz', 3],
# [nil, 4], [nil, 5], [nil, 6]
# ]
lines_with_index = Enumerator.new do |yielder|
# Yield fake 'before' lines for the first line of file_contents
(-SEARCH_CONTEXT_LINES..-1).each do |i|
yielder.yield [nil, i]
end
# Yield the actual file contents
count = 0
file_contents.each_line do |line|
line.chomp!
yielder.yield [line, count]
count += 1
end
# Yield fake 'after' lines for the last line of file_contents
(count + 1..count + SEARCH_CONTEXT_LINES).each do |i|
yielder.yield [nil, i]
end
end
greps = []
# Loop through consecutive blocks of lines with indexes
lines_with_index.each_cons(2 * SEARCH_CONTEXT_LINES + 1) do |line_block|
# Get the 'middle' line and index from the block
line, _ = line_block[SEARCH_CONTEXT_LINES]
next unless line && line.match(/#{Regexp.escape(query)}/i)
# Yay, 'line' contains a match!
# Get an array with just the context lines (no indexes)
match_with_context = line_block.map(&:first)
# Remove 'nil' lines in case we are close to the first or last line
match_with_context.compact!
# Get the line number (1-indexed) of the first context line
first_context_line_number = line_block[0][1] + 1
greps << Gitlab::Git::BlobSnippet.new(
ref,
match_with_context,
first_context_line_number,
filename
)
end
greps
end
# Return the Rugged patches for the diff between +from+ and +to+.
def diff_patches(from, to, options = {}, *paths)
options ||= {}
break_rewrites = options[:break_rewrites]
actual_options = Gitlab::Git::Diff.filter_diff_options(options.merge(paths: paths))
diff = rugged.diff(from, to, actual_options)
diff.find_similar!(break_rewrites: break_rewrites)
diff.each_patch
end
end
end
end