From 2c4cb7a6a851d0c49077518a5dd0c289526a66e6 Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Thu, 7 Mar 2019 04:29:51 -0800 Subject: [PATCH 1/3] Bring back Rugged implementation of GetTreeEntries This brings back some of the changes in https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/20343. For users using Gitaly on top of NFS, accessing the Git data directly via Rugged is more performant than Gitaly. This merge request introduces the feature flag `rugged_tree_entries` to activate the Rugged method. Part of four Rugged changes identified in https://gitlab.com/gitlab-org/gitlab-ce/issues/57317. --- .../unreleased/sh-rugged-tree-entries.yml | 5 + lib/gitlab/git/rugged_impl/repository.rb | 5 + lib/gitlab/git/rugged_impl/tree.rb | 103 ++++++++++++++++++ lib/gitlab/git/tree.rb | 6 + spec/lib/gitlab/git/tree_spec.rb | 74 ++++++++++++- 5 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 changelogs/unreleased/sh-rugged-tree-entries.yml create mode 100644 lib/gitlab/git/rugged_impl/tree.rb diff --git a/changelogs/unreleased/sh-rugged-tree-entries.yml b/changelogs/unreleased/sh-rugged-tree-entries.yml new file mode 100644 index 00000000000..fca1f204b9b --- /dev/null +++ b/changelogs/unreleased/sh-rugged-tree-entries.yml @@ -0,0 +1,5 @@ +--- +title: Bring back Rugged implementation of GetTreeEntries +merge_request: 25674 +author: +type: other diff --git a/lib/gitlab/git/rugged_impl/repository.rb b/lib/gitlab/git/rugged_impl/repository.rb index d4500573235..fe0120b1199 100644 --- a/lib/gitlab/git/rugged_impl/repository.rb +++ b/lib/gitlab/git/rugged_impl/repository.rb @@ -66,6 +66,11 @@ module Gitlab rescue Rugged::ReferenceError nil end + + # Lookup for rugged object by oid or ref name + def lookup(oid_or_ref_name) + rugged.rev_parse(oid_or_ref_name) + end end end end diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb new file mode 100644 index 00000000000..c1205bca1d2 --- /dev/null +++ b/lib/gitlab/git/rugged_impl/tree.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +# NOTE: This code is legacy. Do not add/modify code here unless you have +# discussed with the Gitaly team. See +# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code +# for more details. + +module Gitlab + module Git + module RuggedImpl + module Tree + module ClassMethods + extend ::Gitlab::Utils::Override + + override :tree_entries + def tree_entries(repository, sha, path, recursive) + if Feature.enabled?(:rugged_tree_entries) + tree_entries_from_rugged(repository, sha, path, recursive) + else + super + end + end + + def tree_entries_from_rugged(repository, sha, path, recursive) + current_path_entries = get_tree_entries_from_rugged(repository, sha, path) + ordered_entries = [] + + current_path_entries.each do |entry| + ordered_entries << entry + + if recursive && entry.dir? + ordered_entries.concat(tree_entries_from_rugged(repository, sha, entry.path, true)) + end + end + + # This is an optimization to reduce N+1 queries for Gitaly. It's + # currently done in TreeHelper#flatten_tree, but to emulate Gitaly + # as much as possible we populate the value here. + rugged_populate_flat_path(repository, sha, path, ordered_entries) + end + + def rugged_populate_flat_path(repository, sha, path, entries) + entries.each do |entry| + entry.flat_path = entry.path + + next unless entry.dir? + + entry.flat_path = + if path + File.join(path, rugged_flatten_tree(repository, sha, entry, path)) + else + rugged_flatten_tree(repository, sha, entry, path) + end + end + end + + # Returns the relative path of the first subdir that doesn't have only one directory descendant + def rugged_flatten_tree(repository, sha, tree, root_path) + subtree = tree_entries_from_rugged(repository, sha, tree.path, false) + + if subtree.count == 1 && subtree.first.dir? + return File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path)) + else + return tree.name + end + end + + def get_tree_entries_from_rugged(repository, sha, path) + commit = repository.lookup(sha) + root_tree = commit.tree + + tree = if path + id = find_id_by_path(repository, root_tree.oid, path) + if id + repository.lookup(id) + else + [] + end + else + root_tree + end + + tree.map do |entry| + current_path = path ? File.join(path, entry[:name]) : entry[:name] + + new( + id: entry[:oid], + root_id: root_tree.oid, + name: entry[:name], + type: entry[:type], + mode: entry[:filemode].to_s(8), + path: current_path, + commit_id: sha + ) + end + rescue Rugged::ReferenceError + [] + end + end + end + end + end +end diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb index 6fcea4e12b4..7e072c5db50 100644 --- a/lib/gitlab/git/tree.rb +++ b/lib/gitlab/git/tree.rb @@ -18,6 +18,10 @@ module Gitlab def where(repository, sha, path = nil, recursive = false) path = nil if path == '' || path == '/' + tree_entries(repository, sha, path, recursive) + end + + def tree_entries(repository, sha, path, recursive) wrapped_gitaly_errors do repository.gitaly_commit_client.tree_entries(repository, sha, path, recursive) end @@ -95,3 +99,5 @@ module Gitlab end end end + +Gitlab::Git::Tree.singleton_class.prepend Gitlab::Git::RuggedImpl::Tree::ClassMethods diff --git a/spec/lib/gitlab/git/tree_spec.rb b/spec/lib/gitlab/git/tree_spec.rb index 4a4d69490a3..60060c41616 100644 --- a/spec/lib/gitlab/git/tree_spec.rb +++ b/spec/lib/gitlab/git/tree_spec.rb @@ -3,7 +3,7 @@ require "spec_helper" describe Gitlab::Git::Tree, :seed_helper do let(:repository) { Gitlab::Git::Repository.new('default', TEST_REPO_PATH, '', 'group/project') } - context :repo do + shared_examples :repo do let(:tree) { Gitlab::Git::Tree.where(repository, SeedRepo::Commit::ID) } it { expect(tree).to be_kind_of Array } @@ -12,6 +12,17 @@ describe Gitlab::Git::Tree, :seed_helper do it { expect(tree.select(&:file?).size).to eq(10) } it { expect(tree.select(&:submodule?).size).to eq(2) } + it 'returns an empty array when called with an invalid ref' do + expect(described_class.where(repository, 'foobar-does-not-exist')).to eq([]) + end + + it 'returns a list of tree objects' do + entries = described_class.where(repository, SeedRepo::Commit::ID, 'files', true) + + expect(entries.count).to be > 10 + expect(entries).to all(be_a(Gitlab::Git::Tree)) + end + describe '#dir?' do let(:dir) { tree.select(&:dir?).first } @@ -20,8 +31,8 @@ describe Gitlab::Git::Tree, :seed_helper do it { expect(dir.commit_id).to eq(SeedRepo::Commit::ID) } it { expect(dir.name).to eq('encoding') } it { expect(dir.path).to eq('encoding') } - it { expect(dir.flat_path).to eq('encoding') } it { expect(dir.mode).to eq('40000') } + it { expect(dir.flat_path).to eq('encoding') } context :subdir do let(:subdir) { Gitlab::Git::Tree.where(repository, SeedRepo::Commit::ID, 'files').first } @@ -44,6 +55,51 @@ describe Gitlab::Git::Tree, :seed_helper do it { expect(subdir_file.path).to eq('files/ruby/popen.rb') } it { expect(subdir_file.flat_path).to eq('files/ruby/popen.rb') } end + + context :flat_path do + let(:filename) { 'files/flat/path/correct/content.txt' } + let(:oid) { create_file(filename) } + let(:subdir_file) { Gitlab::Git::Tree.where(repository, oid, 'files/flat').first } + let(:repository_rugged) { Rugged::Repository.new(File.join(SEED_STORAGE_PATH, TEST_REPO_PATH)) } + + it { expect(subdir_file.flat_path).to eq('files/flat/path/correct') } + end + + def create_file(path) + oid = repository_rugged.write('test', :blob) + index = repository_rugged.index + index.add(path: filename, oid: oid, mode: 0100644) + + options = commit_options( + repository_rugged, + index, + repository_rugged.head.target, + 'HEAD', + 'Add new file') + + Rugged::Commit.create(repository_rugged, options) + end + + # Build the options hash that's passed to Rugged::Commit#create + def commit_options(repo, index, target, ref, message) + options = {} + options[:tree] = index.write_tree(repo) + options[:author] = { + email: "test@example.com", + name: "Test Author", + time: Time.gm(2014, "mar", 3, 20, 15, 1) + } + options[:committer] = { + email: "test@example.com", + name: "Test Author", + time: Time.gm(2014, "mar", 3, 20, 15, 1) + } + options[:message] ||= message + options[:parents] = repo.empty? ? [] : [target].compact + options[:update_ref] = ref + + options + end end describe '#file?' do @@ -79,9 +135,17 @@ describe Gitlab::Git::Tree, :seed_helper do end end - describe '#where' do - it 'returns an empty array when called with an invalid ref' do - expect(described_class.where(repository, 'foobar-does-not-exist')).to eq([]) + describe '.where with Gitaly enabled' do + it_behaves_like :repo + end + + describe '.where with Rugged enabled', :enable_rugged do + it 'calls out to the Rugged implementation' do + allow_any_instance_of(Rugged).to receive(:lookup).with(SeedRepo::Commit::ID) + + described_class.where(repository, SeedRepo::Commit::ID, 'files', false) end + + it_behaves_like :repo end end From 28883d8e449acc6cb8ad6f0312d77a8ee5027e75 Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Wed, 6 Mar 2019 10:36:28 -0800 Subject: [PATCH 2/3] Remove old code in TreeHelper#flatten_tree --- app/helpers/tree_helper.rb | 11 +---------- lib/gitlab/git/rugged_impl/tree.rb | 8 +++++--- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/app/helpers/tree_helper.rb b/app/helpers/tree_helper.rb index e2879bfdcf1..c5bab877c00 100644 --- a/app/helpers/tree_helper.rb +++ b/app/helpers/tree_helper.rb @@ -136,18 +136,9 @@ module TreeHelper end # returns the relative path of the first subdir that doesn't have only one directory descendant - # rubocop: disable CodeReuse/ActiveRecord def flatten_tree(root_path, tree) - return tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '') if tree.flat_path.present? - - subtree = Gitlab::Git::Tree.where(@repository, @commit.id, tree.path) - if subtree.count == 1 && subtree.first.dir? - return tree_join(tree.name, flatten_tree(root_path, subtree.first)) - else - return tree.name - end + tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '') end - # rubocop: enable CodeReuse/ActiveRecord def selected_branch @branch_name || tree_edit_branch diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb index c1205bca1d2..f6ddc4e4346 100644 --- a/lib/gitlab/git/rugged_impl/tree.rb +++ b/lib/gitlab/git/rugged_impl/tree.rb @@ -33,9 +33,11 @@ module Gitlab end end - # This is an optimization to reduce N+1 queries for Gitaly. It's - # currently done in TreeHelper#flatten_tree, but to emulate Gitaly - # as much as possible we populate the value here. + # This was an optimization to reduce N+1 queries for Gitaly + # (https://gitlab.com/gitlab-org/gitaly/issues/530). It + # used to be done lazily in the view via + # TreeHelper#flatten_tree, so it's possible there's a + # performance impact by loading this eagerly. rugged_populate_flat_path(repository, sha, path, ordered_entries) end From 46c8cc35a0e93a17f479d90dbfe1cbc651b8acad Mon Sep 17 00:00:00 2001 From: Stan Hu <stanhu@gmail.com> Date: Thu, 7 Mar 2019 06:33:55 -0800 Subject: [PATCH 3/3] Remove unnecessary return statements in tree.rb --- lib/gitlab/git/rugged_impl/tree.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb index f6ddc4e4346..0ebfd496695 100644 --- a/lib/gitlab/git/rugged_impl/tree.rb +++ b/lib/gitlab/git/rugged_impl/tree.rb @@ -61,9 +61,9 @@ module Gitlab subtree = tree_entries_from_rugged(repository, sha, tree.path, false) if subtree.count == 1 && subtree.first.dir? - return File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path)) + File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path)) else - return tree.name + tree.name end end