2018-11-09 13:39:43 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
module Gitlab
|
|
|
|
module GitalyClient
|
2017-07-18 03:59:36 -04:00
|
|
|
class BlobService
|
2018-02-01 14:56:41 -05:00
|
|
|
include Gitlab::EncodingHelper
|
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
def initialize(repository)
|
|
|
|
@gitaly_repo = repository.gitaly_repository
|
|
|
|
end
|
2017-07-18 03:59:36 -04:00
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
def get_blob(oid:, limit:)
|
|
|
|
request = Gitaly::GetBlobRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
oid: oid,
|
|
|
|
limit: limit
|
|
|
|
)
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :get_blob, request, timeout: GitalyClient.fast_timeout)
|
|
|
|
consume_blob_response(response)
|
2017-07-07 10:45:52 -04:00
|
|
|
end
|
2018-01-17 08:21:46 -05:00
|
|
|
|
2021-08-31 08:11:07 -04:00
|
|
|
def list_blobs(revisions, limit: 0, bytes_limit: 0, with_paths: false, dynamic_timeout: nil)
|
2021-06-24 11:07:28 -04:00
|
|
|
request = Gitaly::ListBlobsRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
revisions: Array.wrap(revisions),
|
|
|
|
limit: limit,
|
2021-08-31 08:11:07 -04:00
|
|
|
bytes_limit: bytes_limit,
|
|
|
|
with_paths: with_paths
|
2021-06-24 11:07:28 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
timeout =
|
|
|
|
if dynamic_timeout
|
|
|
|
[dynamic_timeout, GitalyClient.medium_timeout].min
|
|
|
|
else
|
|
|
|
GitalyClient.medium_timeout
|
|
|
|
end
|
|
|
|
|
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :list_blobs, request, timeout: timeout)
|
|
|
|
GitalyClient::BlobsStitcher.new(GitalyClient::ListBlobsAdapter.new(response))
|
|
|
|
end
|
|
|
|
|
2018-01-17 08:21:46 -05:00
|
|
|
def batch_lfs_pointers(blob_ids)
|
2018-01-25 04:46:15 -05:00
|
|
|
return [] if blob_ids.empty?
|
|
|
|
|
2018-01-17 08:21:46 -05:00
|
|
|
request = Gitaly::GetLFSPointersRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
blob_ids: blob_ids
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :get_lfs_pointers, request, timeout: GitalyClient.medium_timeout)
|
|
|
|
map_lfs_pointers(response)
|
2018-01-17 08:21:46 -05:00
|
|
|
end
|
2018-02-01 14:56:41 -05:00
|
|
|
|
|
|
|
def get_blobs(revision_paths, limit = -1)
|
|
|
|
return [] if revision_paths.empty?
|
|
|
|
|
Ensure that we only request blobs in one batch
Blob.lazy adds a blob to a batch to load at a later point, using the
BatchLoader library. Whenever any lazy blob's attributes are accessed,
all lazy blobs requested to that point will be loaded.
BatchLoader, the library we use for this, should only request items in a
batch once. That is, if we have these batches:
1. a, b, c
2. d, e, f
Then a, b, and c should only be requested in the first batch. But if you
modify the list of items in the batch, then the second batch will
request a, b, c, d, e, f, which is almost certainly not what we want!
https://github.com/exAspArk/batch-loader/issues/44 is the upstream issue
for this, but we can also solve this in our application by not modifying
the arguments we're using inside a BatchLoader batch.
2019-04-24 09:13:32 -04:00
|
|
|
request_revision_paths = revision_paths.map do |rev, path|
|
2018-02-01 14:56:41 -05:00
|
|
|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Gitaly::GetBlobsRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
Ensure that we only request blobs in one batch
Blob.lazy adds a blob to a batch to load at a later point, using the
BatchLoader library. Whenever any lazy blob's attributes are accessed,
all lazy blobs requested to that point will be loaded.
BatchLoader, the library we use for this, should only request items in a
batch once. That is, if we have these batches:
1. a, b, c
2. d, e, f
Then a, b, and c should only be requested in the first batch. But if you
modify the list of items in the batch, then the second batch will
request a, b, c, d, e, f, which is almost certainly not what we want!
https://github.com/exAspArk/batch-loader/issues/44 is the upstream issue
for this, but we can also solve this in our application by not modifying
the arguments we're using inside a BatchLoader batch.
2019-04-24 09:13:32 -04:00
|
|
|
revision_paths: request_revision_paths,
|
2018-02-01 14:56:41 -05:00
|
|
|
limit: limit
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2018-02-01 14:56:41 -05:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
|
|
|
:get_blobs,
|
|
|
|
request,
|
2020-06-30 20:09:02 -04:00
|
|
|
timeout: GitalyClient.fast_timeout)
|
|
|
|
|
|
|
|
GitalyClient::BlobsStitcher.new(response)
|
2018-02-01 14:56:41 -05:00
|
|
|
end
|
2018-02-06 17:49:33 -05:00
|
|
|
|
2019-09-24 08:06:20 -04:00
|
|
|
def get_blob_types(revision_paths, limit = -1)
|
|
|
|
return {} if revision_paths.empty?
|
|
|
|
|
|
|
|
request_revision_paths = revision_paths.map do |rev, path|
|
|
|
|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Gitaly::GetBlobsRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
revision_paths: request_revision_paths,
|
|
|
|
limit: limit
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2019-09-24 08:06:20 -04:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
|
|
|
:get_blobs,
|
|
|
|
request,
|
|
|
|
timeout: GitalyClient.fast_timeout
|
2020-06-30 20:09:02 -04:00
|
|
|
)
|
|
|
|
map_blob_types(response)
|
2019-09-24 08:06:20 -04:00
|
|
|
end
|
|
|
|
|
2021-06-15 14:09:57 -04:00
|
|
|
def get_new_lfs_pointers(revisions, limit, not_in, dynamic_timeout = nil)
|
|
|
|
request, rpc = create_new_lfs_pointers_request(revisions, limit, not_in)
|
2018-02-06 17:49:33 -05:00
|
|
|
|
2018-10-22 10:49:20 -04:00
|
|
|
timeout =
|
|
|
|
if dynamic_timeout
|
|
|
|
[dynamic_timeout, GitalyClient.medium_timeout].min
|
|
|
|
else
|
|
|
|
GitalyClient.medium_timeout
|
|
|
|
end
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2018-10-22 10:49:20 -04:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
2021-04-12 05:09:09 -04:00
|
|
|
rpc,
|
2018-10-22 10:49:20 -04:00
|
|
|
request,
|
|
|
|
timeout: timeout
|
2020-06-30 20:09:02 -04:00
|
|
|
)
|
|
|
|
map_lfs_pointers(response)
|
2018-02-06 17:49:33 -05:00
|
|
|
end
|
|
|
|
|
2020-03-25 05:08:11 -04:00
|
|
|
def get_all_lfs_pointers
|
2021-04-20 05:09:48 -04:00
|
|
|
request = Gitaly::ListLFSPointersRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
revisions: [encode_binary("--all")]
|
2018-02-06 17:49:33 -05:00
|
|
|
)
|
|
|
|
|
2021-04-20 05:09:48 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :list_lfs_pointers, request, timeout: GitalyClient.medium_timeout)
|
2020-06-30 20:09:02 -04:00
|
|
|
map_lfs_pointers(response)
|
2018-02-06 17:49:33 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2021-06-15 14:09:57 -04:00
|
|
|
def create_new_lfs_pointers_request(revisions, limit, not_in)
|
2021-04-12 05:09:09 -04:00
|
|
|
# If the check happens for a change which is using a quarantine
|
|
|
|
# environment for incoming objects, then we can avoid doing the
|
|
|
|
# necessary graph walk to detect only new LFS pointers and instead scan
|
|
|
|
# through all quarantined objects.
|
|
|
|
git_env = ::Gitlab::Git::HookEnv.all(@gitaly_repo.gl_repository)
|
2021-04-26 17:10:25 -04:00
|
|
|
if git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present?
|
2021-04-12 05:09:09 -04:00
|
|
|
repository = @gitaly_repo.dup
|
|
|
|
repository.git_alternate_object_directories = Google::Protobuf::RepeatedField.new(:string)
|
|
|
|
|
|
|
|
request = Gitaly::ListAllLFSPointersRequest.new(
|
|
|
|
repository: repository,
|
|
|
|
limit: limit || 0
|
|
|
|
)
|
|
|
|
|
|
|
|
[request, :list_all_lfs_pointers]
|
|
|
|
else
|
2021-06-15 14:09:57 -04:00
|
|
|
revisions = Array.wrap(revisions)
|
2021-04-20 05:09:48 -04:00
|
|
|
revisions += if not_in.nil? || not_in == :all
|
|
|
|
["--not", "--all"]
|
|
|
|
else
|
|
|
|
not_in.prepend "--not"
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Gitaly::ListLFSPointersRequest.new(
|
2021-04-12 05:09:09 -04:00
|
|
|
repository: @gitaly_repo,
|
2021-04-20 05:09:48 -04:00
|
|
|
limit: limit || 0,
|
|
|
|
revisions: revisions.map { |rev| encode_binary(rev) }
|
2021-04-12 05:09:09 -04:00
|
|
|
)
|
|
|
|
|
2021-04-20 05:09:48 -04:00
|
|
|
[request, :list_lfs_pointers]
|
2021-04-12 05:09:09 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-19 17:08:32 -04:00
|
|
|
def consume_blob_response(response)
|
|
|
|
data = []
|
|
|
|
blob = nil
|
|
|
|
response.each do |msg|
|
|
|
|
if blob.nil?
|
|
|
|
blob = msg
|
|
|
|
end
|
|
|
|
|
|
|
|
data << msg.data
|
|
|
|
end
|
|
|
|
|
|
|
|
return if blob.oid.blank?
|
|
|
|
|
|
|
|
data = data.join
|
|
|
|
|
|
|
|
Gitlab::Git::Blob.new(
|
|
|
|
id: blob.oid,
|
|
|
|
size: blob.size,
|
|
|
|
data: data,
|
|
|
|
binary: Gitlab::Git::Blob.binary?(data)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2018-02-06 17:49:33 -05:00
|
|
|
def map_lfs_pointers(response)
|
|
|
|
response.flat_map do |message|
|
|
|
|
message.lfs_pointers.map do |lfs_pointer|
|
|
|
|
Gitlab::Git::Blob.new(
|
|
|
|
id: lfs_pointer.oid,
|
|
|
|
size: lfs_pointer.size,
|
|
|
|
data: lfs_pointer.data,
|
|
|
|
binary: Gitlab::Git::Blob.binary?(lfs_pointer.data)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-09-24 08:06:20 -04:00
|
|
|
|
|
|
|
def map_blob_types(response)
|
|
|
|
types = {}
|
|
|
|
|
|
|
|
response.each do |msg|
|
|
|
|
types[msg.path.dup.force_encoding('utf-8')] = msg.type.downcase
|
|
|
|
end
|
|
|
|
|
|
|
|
types
|
|
|
|
end
|
2017-07-07 10:45:52 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|