From ca049902dc7dad6e6177b05c8e3dc74c00487d27 Mon Sep 17 00:00:00 2001 From: James Edwards-Jones Date: Fri, 3 Nov 2017 14:33:06 +0000 Subject: [PATCH] Gitlab::Git::RevList and LfsChanges use lazy popen --- lib/gitlab/git/lfs_changes.rb | 17 +++--- lib/gitlab/git/rev_list.rb | 43 +++++++++------ spec/lib/gitlab/git/lfs_changes_spec.rb | 4 +- spec/lib/gitlab/git/rev_list_spec.rb | 72 +++++++++++++++++-------- 4 files changed, 88 insertions(+), 48 deletions(-) diff --git a/lib/gitlab/git/lfs_changes.rb b/lib/gitlab/git/lfs_changes.rb index 2749e2e69e2..732dd5d998a 100644 --- a/lib/gitlab/git/lfs_changes.rb +++ b/lib/gitlab/git/lfs_changes.rb @@ -8,25 +8,22 @@ module Gitlab def new_pointers(object_limit: nil, not_in: nil) @new_pointers ||= begin - object_ids = new_objects(not_in: not_in) - object_ids = object_ids.take(object_limit) if object_limit + rev_list.new_objects(not_in: not_in, require_path: true) do |object_ids| + object_ids = object_ids.take(object_limit) if object_limit - Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + end end end def all_pointers - object_ids = rev_list.all_objects(require_path: true) - - Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + rev_list.all_objects(require_path: true) do |object_ids| + Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + end end private - def new_objects(not_in:) - rev_list.new_objects(require_path: true, lazy: true, not_in: not_in) - end - def rev_list ::Gitlab::Git::RevList.new(path_to_repo: @repository.path_to_repo, newrev: @newrev) diff --git a/lib/gitlab/git/rev_list.rb b/lib/gitlab/git/rev_list.rb index e0c884aceaa..4974205b8fd 100644 --- a/lib/gitlab/git/rev_list.rb +++ b/lib/gitlab/git/rev_list.rb @@ -25,17 +25,18 @@ module Gitlab # This skips commit objects and root trees, which might not be needed when # looking for blobs # - # Can return a lazy enumerator to limit work done on megabytes of data - def new_objects(require_path: nil, lazy: false, not_in: nil) - object_output = execute([*base_args, newrev, *not_in_refs(not_in), '--objects']) + # When given a block it will yield objects as a lazy enumerator so + # the caller can limit work done instead of processing megabytes of data + def new_objects(require_path: nil, not_in: nil, &lazy_block) + args = [*base_args, newrev, *not_in_refs(not_in), '--objects'] - objects_from_output(object_output, require_path: require_path, lazy: lazy) + get_objects(args, require_path: require_path, &lazy_block) end - def all_objects(require_path: nil) - object_output = execute([*base_args, '--all', '--objects']) + def all_objects(require_path: nil, &lazy_block) + args = [*base_args, '--all', '--objects'] - objects_from_output(object_output, require_path: require_path, lazy: true) + get_objects(args, require_path: require_path, &lazy_block) end # This methods returns an array of missed references @@ -64,6 +65,10 @@ module Gitlab output.split("\n") end + def lazy_execute(args, &lazy_block) + popen(args, nil, Gitlab::Git::Env.to_env_hash, lazy_block: lazy_block) + end + def base_args [ Gitlab.config.git.bin_path, @@ -72,20 +77,28 @@ module Gitlab ] end - def objects_from_output(object_output, require_path: nil, lazy: nil) - objects = object_output.lazy.map do |output_line| + def get_objects(args, require_path: nil) + if block_given? + lazy_execute(args) do |lazy_output| + objects = objects_from_output(lazy_output, require_path: require_path) + + yield(objects) + end + else + object_output = execute(args) + + objects_from_output(object_output, require_path: require_path) + end + end + + def objects_from_output(object_output, require_path: nil) + object_output.map do |output_line| sha, path = output_line.split(' ', 2) next if require_path && path.blank? sha end.reject(&:nil?) - - if lazy - objects - else - objects.force - end end end end diff --git a/spec/lib/gitlab/git/lfs_changes_spec.rb b/spec/lib/gitlab/git/lfs_changes_spec.rb index c2d2c6e1bc8..c9007d7d456 100644 --- a/spec/lib/gitlab/git/lfs_changes_spec.rb +++ b/spec/lib/gitlab/git/lfs_changes_spec.rb @@ -9,7 +9,7 @@ describe Gitlab::Git::LfsChanges do describe 'new_pointers' do before do - allow_any_instance_of(Gitlab::Git::RevList).to receive(:new_objects).and_return([blob_object_id]) + allow_any_instance_of(Gitlab::Git::RevList).to receive(:new_objects).and_yield([blob_object_id]) end it 'uses rev-list to find new objects' do @@ -38,7 +38,7 @@ describe Gitlab::Git::LfsChanges do it 'uses rev-list to find all objects' do rev_list = double allow(Gitlab::Git::RevList).to receive(:new).and_return(rev_list) - allow(rev_list).to receive(:all_objects).and_return([blob_object_id]) + allow(rev_list).to receive(:all_objects).and_yield([blob_object_id]) expect(Gitlab::Git::Blob).to receive(:batch_lfs_pointers).with(project.repository, [blob_object_id]) diff --git a/spec/lib/gitlab/git/rev_list_spec.rb b/spec/lib/gitlab/git/rev_list_spec.rb index 643a4b2d03e..eaf74951b0e 100644 --- a/spec/lib/gitlab/git/rev_list_spec.rb +++ b/spec/lib/gitlab/git/rev_list_spec.rb @@ -3,26 +3,44 @@ require 'spec_helper' describe Gitlab::Git::RevList do let(:project) { create(:project, :repository) } let(:rev_list) { described_class.new(newrev: 'newrev', path_to_repo: project.repository.path_to_repo) } - - before do - allow(Gitlab::Git::Env).to receive(:all).and_return({ - GIT_OBJECT_DIRECTORY: 'foo', - GIT_ALTERNATE_OBJECT_DIRECTORIES: 'bar' - }) - end - - def stub_popen_rev_list(*additional_args, output:) - expect(rev_list).to receive(:popen).with([ - Gitlab.config.git.bin_path, - "--git-dir=#{project.repository.path_to_repo}", - 'rev-list', - *additional_args - ], - nil, + let(:env_hash) do { 'GIT_OBJECT_DIRECTORY' => 'foo', 'GIT_ALTERNATE_OBJECT_DIRECTORIES' => 'bar' - }).and_return([output, 0]) + } + end + + before do + allow(Gitlab::Git::Env).to receive(:all).and_return(env_hash.symbolize_keys) + end + + def args_for_popen(args_list) + [ + Gitlab.config.git.bin_path, + "--git-dir=#{project.repository.path_to_repo}", + 'rev-list', + *args_list + ] + end + + def stub_popen_rev_list(*additional_args, output:) + args = args_for_popen(additional_args) + + expect(rev_list).to receive(:popen).with(args, nil, env_hash) + .and_return([output, 0]) + end + + def stub_lazy_popen_rev_list(*additional_args, output:) + params = [ + args_for_popen(additional_args), + nil, + env_hash, + hash_including(lazy_block: anything) + ] + + expect(rev_list).to receive(:popen).with(*params) do |*_, lazy_block:| + lazy_block.call(output.split("\n").lazy) + end end context "#new_refs" do @@ -46,10 +64,22 @@ describe Gitlab::Git::RevList do expect(rev_list.new_objects(require_path: true)).to eq(%w[sha2]) end - it 'can return a lazy enumerator' do - stub_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") + it 'can yield a lazy enumerator' do + stub_lazy_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") - expect(rev_list.new_objects(lazy: true)).to be_a Enumerator::Lazy + rev_list.new_objects do |object_ids| + expect(object_ids).to be_a Enumerator::Lazy + end + end + + it 'returns the result of the block when given' do + stub_lazy_popen_rev_list('newrev', '--not', '--all', '--objects', output: "sha1\nsha2") + + objects = rev_list.new_objects do |object_ids| + object_ids.first + end + + expect(objects).to eq 'sha1' end it 'can accept list of references to exclude' do @@ -69,7 +99,7 @@ describe Gitlab::Git::RevList do it 'fetches list of all pushed objects using rev-list' do stub_popen_rev_list('--all', '--objects', output: "sha1\nsha2") - expect(rev_list.all_objects.force).to eq(%w[sha1 sha2]) + expect(rev_list.all_objects).to eq(%w[sha1 sha2]) end end