2017-07-12 10:31:36 -04:00
|
|
|
# Gitaly note: JV: seems to be completely migrated (behind feature flags).
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
module Gitlab
|
|
|
|
module Git
|
|
|
|
class Blob
|
|
|
|
include Linguist::BlobHelper
|
2017-06-01 17:21:14 -04:00
|
|
|
include Gitlab::EncodingHelper
|
2017-01-04 13:43:06 -05:00
|
|
|
|
|
|
|
# This number is the maximum amount of data that we want to display to
|
|
|
|
# the user. We load as much as we can for encoding detection
|
|
|
|
# (Linguist) and LFS pointer parsing. All other cases where we need full
|
|
|
|
# blob data should use load_all_data!.
|
2017-04-13 12:49:24 -04:00
|
|
|
MAX_DATA_DISPLAY_SIZE = 10.megabytes
|
2017-01-04 13:43:06 -05:00
|
|
|
|
2017-08-24 21:30:12 -04:00
|
|
|
# These limits are used as a heuristic to ignore files which can't be LFS
|
|
|
|
# pointers. The format of these is described in
|
|
|
|
# https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
|
|
|
|
LFS_POINTER_MIN_SIZE = 120.bytes
|
|
|
|
LFS_POINTER_MAX_SIZE = 200.bytes
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary
|
|
|
|
|
|
|
|
class << self
|
|
|
|
def find(repository, sha, path)
|
2017-06-12 14:55:28 -04:00
|
|
|
Gitlab::GitalyClient.migrate(:project_raw_show) do |is_enabled|
|
|
|
|
if is_enabled
|
|
|
|
find_by_gitaly(repository, sha, path)
|
|
|
|
else
|
2017-08-01 07:49:57 -04:00
|
|
|
find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def raw(repository, sha)
|
2017-07-07 10:45:52 -04:00
|
|
|
Gitlab::GitalyClient.migrate(:git_blob_raw) do |is_enabled|
|
|
|
|
if is_enabled
|
2018-01-17 08:21:46 -05:00
|
|
|
repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
|
2017-07-07 10:45:52 -04:00
|
|
|
else
|
2017-08-24 21:30:12 -04:00
|
|
|
rugged_raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE)
|
2017-07-07 10:45:52 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2017-01-04 13:43:06 -05:00
|
|
|
|
2017-08-01 07:49:57 -04:00
|
|
|
# Returns an array of Blob instances, specified in blob_references as
|
2017-08-07 10:26:50 -04:00
|
|
|
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
|
|
|
|
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
|
2017-08-01 07:49:57 -04:00
|
|
|
# contain no more than limit bytes in its data attribute.
|
2017-08-15 13:44:37 -04:00
|
|
|
#
|
2017-08-01 07:49:57 -04:00
|
|
|
# Keep in mind that this method may allocate a lot of memory. It is up
|
2017-08-07 10:26:50 -04:00
|
|
|
# to the caller to limit the number of blobs and blob_size_limit.
|
2017-08-01 07:49:57 -04:00
|
|
|
#
|
2017-11-30 11:05:55 -05:00
|
|
|
# Gitaly migration issue: https://gitlab.com/gitlab-org/gitaly/issues/798
|
2017-12-21 09:05:35 -05:00
|
|
|
def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
|
|
|
|
Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled|
|
|
|
|
if is_enabled
|
2018-02-01 14:56:41 -05:00
|
|
|
repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
|
2017-12-21 09:05:35 -05:00
|
|
|
else
|
|
|
|
blob_references.map do |sha, path|
|
|
|
|
find_by_rugged(repository, sha, path, limit: blob_size_limit)
|
|
|
|
end
|
|
|
|
end
|
2017-08-01 07:49:57 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-05-09 14:20:28 -04:00
|
|
|
# Returns an array of Blob instances just with the metadata, that means
|
|
|
|
# the data attribute has no content.
|
|
|
|
def batch_metadata(repository, blob_references)
|
|
|
|
batch(repository, blob_references, blob_size_limit: 0)
|
|
|
|
end
|
|
|
|
|
2017-08-24 21:30:12 -04:00
|
|
|
# Find LFS blobs given an array of sha ids
|
|
|
|
# Returns array of Gitlab::Git::Blob
|
|
|
|
# Does not guarantee blob data will be set
|
|
|
|
def batch_lfs_pointers(repository, blob_ids)
|
2018-01-17 08:21:46 -05:00
|
|
|
repository.gitaly_migrate(:batch_lfs_pointers) do |is_enabled|
|
|
|
|
if is_enabled
|
2018-01-25 04:46:15 -05:00
|
|
|
repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a)
|
2018-01-17 08:21:46 -05:00
|
|
|
else
|
|
|
|
blob_ids.lazy
|
|
|
|
.select { |sha| possible_lfs_blob?(repository, sha) }
|
|
|
|
.map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) }
|
|
|
|
.select(&:lfs_pointer?)
|
|
|
|
.force
|
|
|
|
end
|
|
|
|
end
|
2017-08-24 21:30:12 -04:00
|
|
|
end
|
|
|
|
|
2017-09-04 15:32:57 -04:00
|
|
|
def binary?(data)
|
2017-09-05 13:16:08 -04:00
|
|
|
EncodingHelper.detect_libgit2_binary?(data)
|
2017-09-04 15:32:57 -04:00
|
|
|
end
|
|
|
|
|
2017-08-24 21:30:12 -04:00
|
|
|
def size_could_be_lfs?(size)
|
|
|
|
size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE)
|
|
|
|
end
|
|
|
|
|
2017-07-12 10:31:36 -04:00
|
|
|
private
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
# Recursive search of blob id by path
|
|
|
|
#
|
|
|
|
# Ex.
|
|
|
|
# blog/ # oid: 1a
|
|
|
|
# app/ # oid: 2a
|
|
|
|
# models/ # oid: 3a
|
|
|
|
# file.rb # oid: 4a
|
|
|
|
#
|
|
|
|
#
|
2018-05-18 09:07:06 -04:00
|
|
|
# Blob.find_entry_by_path(repo, '1a', 'blog', 'app', 'file.rb') # => '4a'
|
2017-01-04 13:43:06 -05:00
|
|
|
#
|
2018-05-18 09:07:06 -04:00
|
|
|
def find_entry_by_path(repository, root_id, *path_parts)
|
2017-01-04 13:43:06 -05:00
|
|
|
root_tree = repository.lookup(root_id)
|
|
|
|
|
|
|
|
entry = root_tree.find do |entry|
|
2018-05-18 09:07:06 -04:00
|
|
|
entry[:name] == path_parts[0]
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
return nil unless entry
|
|
|
|
|
2018-05-18 09:07:06 -04:00
|
|
|
if path_parts.size > 1
|
2017-01-04 13:43:06 -05:00
|
|
|
return nil unless entry[:type] == :tree
|
2017-11-14 04:02:39 -05:00
|
|
|
|
2018-05-18 09:07:06 -04:00
|
|
|
path_parts.shift
|
|
|
|
find_entry_by_path(repository, entry[:oid], *path_parts)
|
2017-01-04 13:43:06 -05:00
|
|
|
else
|
|
|
|
[:blob, :commit].include?(entry[:type]) ? entry : nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def submodule_blob(blob_entry, path, sha)
|
2017-01-04 14:54:47 -05:00
|
|
|
new(
|
2017-01-04 13:43:06 -05:00
|
|
|
id: blob_entry[:oid],
|
|
|
|
name: blob_entry[:name],
|
2017-05-26 19:27:30 -04:00
|
|
|
size: 0,
|
2017-01-04 13:43:06 -05:00
|
|
|
data: '',
|
|
|
|
path: path,
|
2017-05-03 07:27:17 -04:00
|
|
|
commit_id: sha
|
2017-01-04 13:43:06 -05:00
|
|
|
)
|
|
|
|
end
|
2017-08-01 07:49:57 -04:00
|
|
|
|
2017-12-21 09:05:35 -05:00
|
|
|
def find_by_gitaly(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
|
2018-01-21 00:00:39 -05:00
|
|
|
return unless path
|
|
|
|
|
2018-01-27 00:35:53 -05:00
|
|
|
path = path.sub(%r{\A/*}, '')
|
2017-08-01 07:49:57 -04:00
|
|
|
path = '/' if path.empty?
|
|
|
|
name = File.basename(path)
|
2017-12-21 09:05:35 -05:00
|
|
|
|
|
|
|
# Gitaly will think that setting the limit to 0 means unlimited, while
|
|
|
|
# the client might only need the metadata and thus set the limit to 0.
|
2018-01-05 04:41:05 -05:00
|
|
|
# In this method we'll then set the limit to 1, but clear the byte of data
|
|
|
|
# that we got back so for the outside world it looks like the limit was
|
2017-12-21 09:05:35 -05:00
|
|
|
# actually 0.
|
|
|
|
req_limit = limit == 0 ? 1 : limit
|
|
|
|
|
|
|
|
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit)
|
2017-08-01 07:49:57 -04:00
|
|
|
return unless entry
|
|
|
|
|
2017-12-21 09:05:35 -05:00
|
|
|
entry.data = "" if limit == 0
|
|
|
|
|
2017-08-01 07:49:57 -04:00
|
|
|
case entry.type
|
|
|
|
when :COMMIT
|
|
|
|
new(
|
|
|
|
id: entry.oid,
|
|
|
|
name: name,
|
|
|
|
size: 0,
|
|
|
|
data: '',
|
|
|
|
path: path,
|
|
|
|
commit_id: sha
|
|
|
|
)
|
|
|
|
when :BLOB
|
|
|
|
new(
|
|
|
|
id: entry.oid,
|
|
|
|
name: name,
|
|
|
|
size: entry.size,
|
|
|
|
data: entry.data.dup,
|
|
|
|
mode: entry.mode.to_s(8),
|
|
|
|
path: path,
|
|
|
|
commit_id: sha,
|
|
|
|
binary: binary?(entry.data)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def find_by_rugged(repository, sha, path, limit:)
|
2018-01-21 00:00:39 -05:00
|
|
|
return unless path
|
|
|
|
|
2018-05-18 09:07:06 -04:00
|
|
|
# Strip any leading / characters from the path
|
|
|
|
path = path.sub(%r{\A/*}, '')
|
|
|
|
|
2018-01-04 12:00:28 -05:00
|
|
|
rugged_commit = repository.lookup(sha)
|
|
|
|
root_tree = rugged_commit.tree
|
2017-08-01 07:49:57 -04:00
|
|
|
|
2018-05-18 09:07:06 -04:00
|
|
|
blob_entry = find_entry_by_path(repository, root_tree.oid, *path.split('/'))
|
2017-08-01 07:49:57 -04:00
|
|
|
|
|
|
|
return nil unless blob_entry
|
|
|
|
|
|
|
|
if blob_entry[:type] == :commit
|
|
|
|
submodule_blob(blob_entry, path, sha)
|
|
|
|
else
|
|
|
|
blob = repository.lookup(blob_entry[:oid])
|
|
|
|
|
|
|
|
if blob
|
|
|
|
new(
|
|
|
|
id: blob.oid,
|
|
|
|
name: blob_entry[:name],
|
|
|
|
size: blob.size,
|
|
|
|
# Rugged::Blob#content is expensive; don't call it if we don't have to.
|
|
|
|
data: limit.zero? ? '' : blob.content(limit),
|
|
|
|
mode: blob_entry[:filemode].to_s(8),
|
|
|
|
path: path,
|
|
|
|
commit_id: sha,
|
|
|
|
binary: blob.binary?
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
2017-11-03 09:16:43 -04:00
|
|
|
rescue Rugged::ReferenceError
|
|
|
|
nil
|
2017-08-01 07:49:57 -04:00
|
|
|
end
|
2017-08-24 21:30:12 -04:00
|
|
|
|
|
|
|
def rugged_raw(repository, sha, limit:)
|
|
|
|
blob = repository.lookup(sha)
|
|
|
|
|
|
|
|
return unless blob.is_a?(Rugged::Blob)
|
|
|
|
|
|
|
|
new(
|
|
|
|
id: blob.oid,
|
|
|
|
size: blob.size,
|
|
|
|
data: blob.content(limit),
|
|
|
|
binary: blob.binary?
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Efficient lookup to determine if object size
|
|
|
|
# and type make it a possible LFS blob without loading
|
|
|
|
# blob content into memory with repository.lookup(sha)
|
|
|
|
def possible_lfs_blob?(repository, sha)
|
|
|
|
object_header = repository.rugged.read_header(sha)
|
|
|
|
|
|
|
|
object_header[:type] == :blob &&
|
|
|
|
size_could_be_lfs?(object_header[:len])
|
|
|
|
end
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(options)
|
|
|
|
%w(id name path size data mode commit_id binary).each do |key|
|
2017-08-10 12:39:26 -04:00
|
|
|
self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
# Retain the actual size before it is encoded
|
|
|
|
@loaded_size = @data.bytesize if @data
|
2018-02-28 17:22:44 -05:00
|
|
|
@loaded_all_data = @loaded_size == size
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def binary?
|
|
|
|
@binary.nil? ? super : @binary == true
|
|
|
|
end
|
|
|
|
|
|
|
|
def data
|
|
|
|
encode! @data
|
|
|
|
end
|
|
|
|
|
|
|
|
# Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
|
|
|
|
# memory as a Ruby string.
|
|
|
|
def load_all_data!(repository)
|
|
|
|
return if @data == '' # don't mess with submodule blobs
|
|
|
|
|
2018-02-28 17:22:44 -05:00
|
|
|
# Even if we return early, recalculate wether this blob is binary in
|
|
|
|
# case a blob was initialized as text but the full data isn't
|
|
|
|
@binary = nil
|
|
|
|
|
|
|
|
return if @loaded_all_data
|
|
|
|
|
|
|
|
@data = Gitlab::GitalyClient.migrate(:git_blob_load_all_data) do |is_enabled|
|
|
|
|
begin
|
2017-07-07 11:10:55 -04:00
|
|
|
if is_enabled
|
2018-01-17 08:21:46 -05:00
|
|
|
repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data
|
2017-07-07 11:10:55 -04:00
|
|
|
else
|
|
|
|
repository.lookup(id).content
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
@loaded_all_data = true
|
|
|
|
@loaded_size = @data.bytesize
|
|
|
|
end
|
|
|
|
|
|
|
|
def name
|
|
|
|
encode! @name
|
|
|
|
end
|
|
|
|
|
2017-07-04 15:10:34 -04:00
|
|
|
def path
|
|
|
|
encode! @path
|
|
|
|
end
|
|
|
|
|
2017-05-02 18:45:50 -04:00
|
|
|
def truncated?
|
|
|
|
size && (size > loaded_size)
|
|
|
|
end
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
# Valid LFS object pointer is a text file consisting of
|
|
|
|
# version
|
|
|
|
# oid
|
|
|
|
# size
|
|
|
|
# see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
|
|
|
|
def lfs_pointer?
|
2017-08-24 21:30:12 -04:00
|
|
|
self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def lfs_oid
|
|
|
|
if has_lfs_version_key?
|
|
|
|
oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
|
|
|
|
return oid[1] if oid
|
|
|
|
end
|
|
|
|
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def lfs_size
|
|
|
|
if has_lfs_version_key?
|
|
|
|
size = data.match(/(?<=size )([0-9]+)/)
|
2017-04-13 12:49:24 -04:00
|
|
|
return size[1].to_i if size
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2017-05-02 18:45:50 -04:00
|
|
|
def external_storage
|
|
|
|
return unless lfs_pointer?
|
|
|
|
|
|
|
|
:lfs
|
2017-01-04 13:43:06 -05:00
|
|
|
end
|
|
|
|
|
2017-05-02 18:45:50 -04:00
|
|
|
alias_method :external_size, :lfs_size
|
|
|
|
|
2017-01-04 13:43:06 -05:00
|
|
|
private
|
|
|
|
|
|
|
|
def has_lfs_version_key?
|
|
|
|
!empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|