2018-11-09 13:39:43 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
module Gitlab
|
|
|
|
module GitalyClient
|
2017-07-18 03:59:36 -04:00
|
|
|
class BlobService
|
2018-02-01 14:56:41 -05:00
|
|
|
include Gitlab::EncodingHelper
|
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
def initialize(repository)
|
|
|
|
@gitaly_repo = repository.gitaly_repository
|
|
|
|
end
|
2017-07-18 03:59:36 -04:00
|
|
|
|
2017-07-07 10:45:52 -04:00
|
|
|
def get_blob(oid:, limit:)
|
|
|
|
request = Gitaly::GetBlobRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
oid: oid,
|
|
|
|
limit: limit
|
|
|
|
)
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :get_blob, request, timeout: GitalyClient.fast_timeout)
|
|
|
|
consume_blob_response(response)
|
2017-07-07 10:45:52 -04:00
|
|
|
end
|
2018-01-17 08:21:46 -05:00
|
|
|
|
|
|
|
def batch_lfs_pointers(blob_ids)
|
2018-01-25 04:46:15 -05:00
|
|
|
return [] if blob_ids.empty?
|
|
|
|
|
2018-01-17 08:21:46 -05:00
|
|
|
request = Gitaly::GetLFSPointersRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
blob_ids: blob_ids
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :get_lfs_pointers, request, timeout: GitalyClient.medium_timeout)
|
|
|
|
map_lfs_pointers(response)
|
2018-01-17 08:21:46 -05:00
|
|
|
end
|
2018-02-01 14:56:41 -05:00
|
|
|
|
|
|
|
def get_blobs(revision_paths, limit = -1)
|
|
|
|
return [] if revision_paths.empty?
|
|
|
|
|
Ensure that we only request blobs in one batch
Blob.lazy adds a blob to a batch to load at a later point, using the
BatchLoader library. Whenever any lazy blob's attributes are accessed,
all lazy blobs requested to that point will be loaded.
BatchLoader, the library we use for this, should only request items in a
batch once. That is, if we have these batches:
1. a, b, c
2. d, e, f
Then a, b, and c should only be requested in the first batch. But if you
modify the list of items in the batch, then the second batch will
request a, b, c, d, e, f, which is almost certainly not what we want!
https://github.com/exAspArk/batch-loader/issues/44 is the upstream issue
for this, but we can also solve this in our application by not modifying
the arguments we're using inside a BatchLoader batch.
2019-04-24 09:13:32 -04:00
|
|
|
request_revision_paths = revision_paths.map do |rev, path|
|
2018-02-01 14:56:41 -05:00
|
|
|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Gitaly::GetBlobsRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
Ensure that we only request blobs in one batch
Blob.lazy adds a blob to a batch to load at a later point, using the
BatchLoader library. Whenever any lazy blob's attributes are accessed,
all lazy blobs requested to that point will be loaded.
BatchLoader, the library we use for this, should only request items in a
batch once. That is, if we have these batches:
1. a, b, c
2. d, e, f
Then a, b, and c should only be requested in the first batch. But if you
modify the list of items in the batch, then the second batch will
request a, b, c, d, e, f, which is almost certainly not what we want!
https://github.com/exAspArk/batch-loader/issues/44 is the upstream issue
for this, but we can also solve this in our application by not modifying
the arguments we're using inside a BatchLoader batch.
2019-04-24 09:13:32 -04:00
|
|
|
revision_paths: request_revision_paths,
|
2018-02-01 14:56:41 -05:00
|
|
|
limit: limit
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2018-02-01 14:56:41 -05:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
|
|
|
:get_blobs,
|
|
|
|
request,
|
2020-06-30 20:09:02 -04:00
|
|
|
timeout: GitalyClient.fast_timeout)
|
|
|
|
|
|
|
|
GitalyClient::BlobsStitcher.new(response)
|
2018-02-01 14:56:41 -05:00
|
|
|
end
|
2018-02-06 17:49:33 -05:00
|
|
|
|
2019-09-24 08:06:20 -04:00
|
|
|
def get_blob_types(revision_paths, limit = -1)
|
|
|
|
return {} if revision_paths.empty?
|
|
|
|
|
|
|
|
request_revision_paths = revision_paths.map do |rev, path|
|
|
|
|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Gitaly::GetBlobsRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
revision_paths: request_revision_paths,
|
|
|
|
limit: limit
|
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2019-09-24 08:06:20 -04:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
|
|
|
:get_blobs,
|
|
|
|
request,
|
|
|
|
timeout: GitalyClient.fast_timeout
|
2020-06-30 20:09:02 -04:00
|
|
|
)
|
|
|
|
map_blob_types(response)
|
2019-09-24 08:06:20 -04:00
|
|
|
end
|
|
|
|
|
2018-10-22 10:49:20 -04:00
|
|
|
def get_new_lfs_pointers(revision, limit, not_in, dynamic_timeout = nil)
|
2018-02-06 17:49:33 -05:00
|
|
|
request = Gitaly::GetNewLFSPointersRequest.new(
|
|
|
|
repository: @gitaly_repo,
|
|
|
|
revision: encode_binary(revision),
|
|
|
|
limit: limit || 0
|
|
|
|
)
|
|
|
|
|
|
|
|
if not_in.nil? || not_in == :all
|
|
|
|
request.not_in_all = true
|
|
|
|
else
|
|
|
|
request.not_in_refs += not_in
|
|
|
|
end
|
|
|
|
|
2018-10-22 10:49:20 -04:00
|
|
|
timeout =
|
|
|
|
if dynamic_timeout
|
|
|
|
[dynamic_timeout, GitalyClient.medium_timeout].min
|
|
|
|
else
|
|
|
|
GitalyClient.medium_timeout
|
|
|
|
end
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(
|
2018-10-22 10:49:20 -04:00
|
|
|
@gitaly_repo.storage_name,
|
|
|
|
:blob_service,
|
|
|
|
:get_new_lfs_pointers,
|
|
|
|
request,
|
|
|
|
timeout: timeout
|
2020-06-30 20:09:02 -04:00
|
|
|
)
|
|
|
|
map_lfs_pointers(response)
|
2018-02-06 17:49:33 -05:00
|
|
|
end
|
|
|
|
|
2020-03-25 05:08:11 -04:00
|
|
|
def get_all_lfs_pointers
|
|
|
|
request = Gitaly::GetAllLFSPointersRequest.new(
|
|
|
|
repository: @gitaly_repo
|
2018-02-06 17:49:33 -05:00
|
|
|
)
|
|
|
|
|
2020-06-30 20:09:02 -04:00
|
|
|
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :get_all_lfs_pointers, request, timeout: GitalyClient.medium_timeout)
|
|
|
|
map_lfs_pointers(response)
|
2018-02-06 17:49:33 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2020-06-19 17:08:32 -04:00
|
|
|
def consume_blob_response(response)
|
|
|
|
data = []
|
|
|
|
blob = nil
|
|
|
|
response.each do |msg|
|
|
|
|
if blob.nil?
|
|
|
|
blob = msg
|
|
|
|
end
|
|
|
|
|
|
|
|
data << msg.data
|
|
|
|
end
|
|
|
|
|
|
|
|
return if blob.oid.blank?
|
|
|
|
|
|
|
|
data = data.join
|
|
|
|
|
|
|
|
Gitlab::Git::Blob.new(
|
|
|
|
id: blob.oid,
|
|
|
|
size: blob.size,
|
|
|
|
data: data,
|
|
|
|
binary: Gitlab::Git::Blob.binary?(data)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2018-02-06 17:49:33 -05:00
|
|
|
def map_lfs_pointers(response)
|
|
|
|
response.flat_map do |message|
|
|
|
|
message.lfs_pointers.map do |lfs_pointer|
|
|
|
|
Gitlab::Git::Blob.new(
|
|
|
|
id: lfs_pointer.oid,
|
|
|
|
size: lfs_pointer.size,
|
|
|
|
data: lfs_pointer.data,
|
|
|
|
binary: Gitlab::Git::Blob.binary?(lfs_pointer.data)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-09-24 08:06:20 -04:00
|
|
|
|
|
|
|
def map_blob_types(response)
|
|
|
|
types = {}
|
|
|
|
|
|
|
|
response.each do |msg|
|
|
|
|
types[msg.path.dup.force_encoding('utf-8')] = msg.type.downcase
|
|
|
|
end
|
|
|
|
|
|
|
|
types
|
|
|
|
end
|
2017-07-07 10:45:52 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|