Merge branch 'jej/fix-lfs-changes-laziness' into 'master'
Implement lazy popen so LfsChanges doesn't have to wait for rev-list to complete See merge request gitlab-org/gitlab-ce!15180
This commit is contained in:
commit
a2cd9ad251
6 changed files with 112 additions and 50 deletions
|
@ -8,25 +8,22 @@ module Gitlab
|
|||
|
||||
def new_pointers(object_limit: nil, not_in: nil)
|
||||
@new_pointers ||= begin
|
||||
object_ids = new_objects(not_in: not_in)
|
||||
object_ids = object_ids.take(object_limit) if object_limit
|
||||
rev_list.new_objects(not_in: not_in, require_path: true) do |object_ids|
|
||||
object_ids = object_ids.take(object_limit) if object_limit
|
||||
|
||||
Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids)
|
||||
Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def all_pointers
|
||||
object_ids = rev_list.all_objects(require_path: true)
|
||||
|
||||
Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids)
|
||||
rev_list.all_objects(require_path: true) do |object_ids|
|
||||
Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def new_objects(not_in:)
|
||||
rev_list.new_objects(require_path: true, lazy: true, not_in: not_in)
|
||||
end
|
||||
|
||||
def rev_list
|
||||
::Gitlab::Git::RevList.new(path_to_repo: @repository.path_to_repo,
|
||||
newrev: @newrev)
|
||||
|
|
|
@ -7,7 +7,7 @@ module Gitlab
|
|||
module Popen
|
||||
FAST_GIT_PROCESS_TIMEOUT = 15.seconds
|
||||
|
||||
def popen(cmd, path, vars = {})
|
||||
def popen(cmd, path, vars = {}, lazy_block: nil)
|
||||
unless cmd.is_a?(Array)
|
||||
raise "System commands must be given as an array of strings"
|
||||
end
|
||||
|
@ -22,7 +22,12 @@ module Gitlab
|
|||
yield(stdin) if block_given?
|
||||
stdin.close
|
||||
|
||||
@cmd_output << stdout.read
|
||||
if lazy_block
|
||||
return lazy_block.call(stdout.lazy)
|
||||
else
|
||||
@cmd_output << stdout.read
|
||||
end
|
||||
|
||||
@cmd_output << stderr.read
|
||||
@cmd_status = wait_thr.value.exitstatus
|
||||
end
|
||||
|
|
|
@ -25,17 +25,18 @@ module Gitlab
|
|||
# This skips commit objects and root trees, which might not be needed when
|
||||
# looking for blobs
|
||||
#
|
||||
# Can return a lazy enumerator to limit work done on megabytes of data
|
||||
def new_objects(require_path: nil, lazy: false, not_in: nil)
|
||||
object_output = execute([*base_args, newrev, *not_in_refs(not_in), '--objects'])
|
||||
# When given a block it will yield objects as a lazy enumerator so
|
||||
# the caller can limit work done instead of processing megabytes of data
|
||||
def new_objects(require_path: nil, not_in: nil, &lazy_block)
|
||||
args = [*base_args, newrev, *not_in_refs(not_in), '--objects']
|
||||
|
||||
objects_from_output(object_output, require_path: require_path, lazy: lazy)
|
||||
get_objects(args, require_path: require_path, &lazy_block)
|
||||
end
|
||||
|
||||
def all_objects(require_path: nil)
|
||||
object_output = execute([*base_args, '--all', '--objects'])
|
||||
def all_objects(require_path: nil, &lazy_block)
|
||||
args = [*base_args, '--all', '--objects']
|
||||
|
||||
objects_from_output(object_output, require_path: require_path, lazy: true)
|
||||
get_objects(args, require_path: require_path, &lazy_block)
|
||||
end
|
||||
|
||||
# This methods returns an array of missed references
|
||||
|
@ -64,6 +65,10 @@ module Gitlab
|
|||
output.split("\n")
|
||||
end
|
||||
|
||||
def lazy_execute(args, &lazy_block)
|
||||
popen(args, nil, Gitlab::Git::Env.to_env_hash, lazy_block: lazy_block)
|
||||
end
|
||||
|
||||
def base_args
|
||||
[
|
||||
Gitlab.config.git.bin_path,
|
||||
|
@ -72,20 +77,28 @@ module Gitlab
|
|||
]
|
||||
end
|
||||
|
||||
def objects_from_output(object_output, require_path: nil, lazy: nil)
|
||||
objects = object_output.lazy.map do |output_line|
|
||||
def get_objects(args, require_path: nil)
|
||||
if block_given?
|
||||
lazy_execute(args) do |lazy_output|
|
||||
objects = objects_from_output(lazy_output, require_path: require_path)
|
||||
|
||||
yield(objects)
|
||||
end
|
||||
else
|
||||
object_output = execute(args)
|
||||
|
||||
objects_from_output(object_output, require_path: require_path)
|
||||
end
|
||||
end
|
||||
|
||||
def objects_from_output(object_output, require_path: nil)
|
||||
object_output.map do |output_line|
|
||||
sha, path = output_line.split(' ', 2)
|
||||
|
||||
next if require_path && path.blank?
|
||||
|
||||
sha
|
||||
end.reject(&:nil?)
|
||||
|
||||
if lazy
|
||||
objects
|
||||
else
|
||||
objects.force
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -9,7 +9,7 @@ describe Gitlab::Git::LfsChanges do
|
|||
|
||||
describe 'new_pointers' do
|
||||
before do
|
||||
allow_any_instance_of(Gitlab::Git::RevList).to receive(:new_objects).and_return([blob_object_id])
|
||||
allow_any_instance_of(Gitlab::Git::RevList).to receive(:new_objects).and_yield([blob_object_id])
|
||||
end
|
||||
|
||||
it 'uses rev-list to find new objects' do
|
||||
|
@ -38,7 +38,7 @@ describe Gitlab::Git::LfsChanges do
|
|||
it 'uses rev-list to find all objects' do
|
||||
rev_list = double
|
||||
allow(Gitlab::Git::RevList).to receive(:new).and_return(rev_list)
|
||||
allow(rev_list).to receive(:all_objects).and_return([blob_object_id])
|
||||
allow(rev_list).to receive(:all_objects).and_yield([blob_object_id])
|
||||
|
||||
expect(Gitlab::Git::Blob).to receive(:batch_lfs_pointers).with(project.repository, [blob_object_id])
|
||||
|
||||
|
|
|
@ -53,6 +53,23 @@ describe 'Gitlab::Git::Popen' do
|
|||
it { expect(status).to be_zero }
|
||||
it { expect(output).to eq('hello') }
|
||||
end
|
||||
|
||||
context 'with lazy block' do
|
||||
it 'yields a lazy io' do
|
||||
expect_lazy_io = lambda do |io|
|
||||
expect(io).to be_a Enumerator::Lazy
|
||||
expect(io.inspect).to include('#<IO:fd')
|
||||
end
|
||||
|
||||
klass.new.popen(%w[ls], path, lazy_block: expect_lazy_io)
|
||||
end
|
||||
|
||||
it "doesn't wait for process exit" do
|
||||
Timeout.timeout(2) do
|
||||
klass.new.popen(%w[yes], path, lazy_block: ->(io) {})
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'popen_with_timeout' do
|
||||
|
|
|
@ -3,26 +3,44 @@ require 'spec_helper'
|
|||
describe Gitlab::Git::RevList do
|
||||
let(:project) { create(:project, :repository) }
|
||||
let(:rev_list) { described_class.new(newrev: 'newrev', path_to_repo: project.repository.path_to_repo) }
|
||||
|
||||
before do
|
||||
allow(Gitlab::Git::Env).to receive(:all).and_return({
|
||||
GIT_OBJECT_DIRECTORY: 'foo',
|
||||
GIT_ALTERNATE_OBJECT_DIRECTORIES: 'bar'
|
||||
})
|
||||
end
|
||||
|
||||
def stub_popen_rev_list(*additional_args, output:)
|
||||
expect(rev_list).to receive(:popen).with([
|
||||
Gitlab.config.git.bin_path,
|
||||
"--git-dir=#{project.repository.path_to_repo}",
|
||||
'rev-list',
|
||||
*additional_args
|
||||
],
|
||||
nil,
|
||||
let(:env_hash) do
|
||||
{
|
||||
'GIT_OBJECT_DIRECTORY' => 'foo',
|
||||
'GIT_ALTERNATE_OBJECT_DIRECTORIES' => 'bar'
|
||||
}).and_return([output, 0])
|
||||
}
|
||||
end
|
||||
|
||||
before do
|
||||
allow(Gitlab::Git::Env).to receive(:all).and_return(env_hash.symbolize_keys)
|
||||
end
|
||||
|
||||
def args_for_popen(args_list)
|
||||
[
|
||||
Gitlab.config.git.bin_path,
|
||||
"--git-dir=#{project.repository.path_to_repo}",
|
||||
'rev-list',
|
||||
*args_list
|
||||
]
|
||||
end
|
||||
|
||||
def stub_popen_rev_list(*additional_args, output:)
|
||||
args = args_for_popen(additional_args)
|
||||
|
||||
expect(rev_list).to receive(:popen).with(args, nil, env_hash)
|
||||
.and_return([output, 0])
|
||||
end
|
||||
|
||||
def stub_lazy_popen_rev_list(*additional_args, output:)
|
||||
params = [
|
||||
args_for_popen(additional_args),
|
||||
nil,
|
||||
env_hash,
|
||||
hash_including(lazy_block: anything)
|
||||
]
|
||||
|
||||
expect(rev_list).to receive(:popen).with(*params) do |*_, lazy_block:|
|
||||
lazy_block.call(output.split("\n").lazy)
|
||||
end
|
||||
end
|
||||
|
||||
context "#new_refs" do
|
||||
|
@ -46,10 +64,22 @@ describe Gitlab::Git::RevList do
|
|||
expect(rev_list.new_objects(require_path: true)).to eq(%w[sha2])
|
||||
end
|
||||
|
||||
it 'can return a lazy enumerator' do
|
||||
stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2")
|
||||
it 'can yield a lazy enumerator' do
|
||||
stub_lazy_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2")
|
||||
|
||||
expect(rev_list.new_objects(lazy: true)).to be_a Enumerator::Lazy
|
||||
rev_list.new_objects do |object_ids|
|
||||
expect(object_ids).to be_a Enumerator::Lazy
|
||||
end
|
||||
end
|
||||
|
||||
it 'returns the result of the block when given' do
|
||||
stub_lazy_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2")
|
||||
|
||||
objects = rev_list.new_objects do |object_ids|
|
||||
object_ids.first
|
||||
end
|
||||
|
||||
expect(objects).to eq 'sha1'
|
||||
end
|
||||
|
||||
it 'can accept list of references to exclude' do
|
||||
|
@ -69,7 +99,7 @@ describe Gitlab::Git::RevList do
|
|||
it 'fetches list of all pushed objects using rev-list' do
|
||||
stub_popen_rev_list('--all', '--objects', output: "sha1\nsha2")
|
||||
|
||||
expect(rev_list.all_objects.force).to eq(%w[sha1 sha2])
|
||||
expect(rev_list.all_objects).to eq(%w[sha1 sha2])
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue