From 2c4cb7a6a851d0c49077518a5dd0c289526a66e6 Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Thu, 7 Mar 2019 04:29:51 -0800
Subject: [PATCH 1/3] Bring back Rugged implementation of GetTreeEntries

This brings back some of the changes in
https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/20343.

For users using Gitaly on top of NFS, accessing the Git data directly
via Rugged is more performant than Gitaly. This merge request introduces
the feature flag `rugged_tree_entries` to activate the Rugged method.

Part of four Rugged changes identified in
https://gitlab.com/gitlab-org/gitlab-ce/issues/57317.
---
 .../unreleased/sh-rugged-tree-entries.yml     |   5 +
 lib/gitlab/git/rugged_impl/repository.rb      |   5 +
 lib/gitlab/git/rugged_impl/tree.rb            | 103 ++++++++++++++++++
 lib/gitlab/git/tree.rb                        |   6 +
 spec/lib/gitlab/git/tree_spec.rb              |  74 ++++++++++++-
 5 files changed, 188 insertions(+), 5 deletions(-)
 create mode 100644 changelogs/unreleased/sh-rugged-tree-entries.yml
 create mode 100644 lib/gitlab/git/rugged_impl/tree.rb

diff --git a/changelogs/unreleased/sh-rugged-tree-entries.yml b/changelogs/unreleased/sh-rugged-tree-entries.yml
new file mode 100644
index 00000000000..fca1f204b9b
--- /dev/null
+++ b/changelogs/unreleased/sh-rugged-tree-entries.yml
@@ -0,0 +1,5 @@
+---
+title: Bring back Rugged implementation of GetTreeEntries
+merge_request: 25674
+author:
+type: other
diff --git a/lib/gitlab/git/rugged_impl/repository.rb b/lib/gitlab/git/rugged_impl/repository.rb
index d4500573235..fe0120b1199 100644
--- a/lib/gitlab/git/rugged_impl/repository.rb
+++ b/lib/gitlab/git/rugged_impl/repository.rb
@@ -66,6 +66,11 @@ module Gitlab
         rescue Rugged::ReferenceError
           nil
         end
+
+        # Lookup for rugged object by oid or ref name
+        def lookup(oid_or_ref_name)
+          rugged.rev_parse(oid_or_ref_name)
+        end
       end
     end
   end
diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb
new file mode 100644
index 00000000000..c1205bca1d2
--- /dev/null
+++ b/lib/gitlab/git/rugged_impl/tree.rb
@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+
+# NOTE: This code is legacy. Do not add/modify code here unless you have
+# discussed with the Gitaly team.  See
+# https://docs.gitlab.com/ee/development/gitaly.html#legacy-rugged-code
+# for more details.
+
+module Gitlab
+  module Git
+    module RuggedImpl
+      module Tree
+        module ClassMethods
+          extend ::Gitlab::Utils::Override
+
+          override :tree_entries
+          def tree_entries(repository, sha, path, recursive)
+            if Feature.enabled?(:rugged_tree_entries)
+              tree_entries_from_rugged(repository, sha, path, recursive)
+            else
+              super
+            end
+          end
+
+          def tree_entries_from_rugged(repository, sha, path, recursive)
+            current_path_entries = get_tree_entries_from_rugged(repository, sha, path)
+            ordered_entries = []
+
+            current_path_entries.each do |entry|
+              ordered_entries << entry
+
+              if recursive && entry.dir?
+                ordered_entries.concat(tree_entries_from_rugged(repository, sha, entry.path, true))
+              end
+            end
+
+            # This is an optimization to reduce N+1 queries for Gitaly. It's
+            # currently done in TreeHelper#flatten_tree, but to emulate Gitaly
+            # as much as possible we populate the value here.
+            rugged_populate_flat_path(repository, sha, path, ordered_entries)
+          end
+
+          def rugged_populate_flat_path(repository, sha, path, entries)
+            entries.each do |entry|
+              entry.flat_path = entry.path
+
+              next unless entry.dir?
+
+              entry.flat_path =
+                if path
+                  File.join(path, rugged_flatten_tree(repository, sha, entry, path))
+                else
+                  rugged_flatten_tree(repository, sha, entry, path)
+                end
+            end
+          end
+
+          # Returns the relative path of the first subdir that doesn't have only one directory descendant
+          def rugged_flatten_tree(repository, sha, tree, root_path)
+            subtree = tree_entries_from_rugged(repository, sha, tree.path, false)
+
+            if subtree.count == 1 && subtree.first.dir?
+              return File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path))
+            else
+              return tree.name
+            end
+          end
+
+          def get_tree_entries_from_rugged(repository, sha, path)
+            commit = repository.lookup(sha)
+            root_tree = commit.tree
+
+            tree = if path
+                     id = find_id_by_path(repository, root_tree.oid, path)
+                     if id
+                       repository.lookup(id)
+                     else
+                       []
+                     end
+                   else
+                     root_tree
+                   end
+
+            tree.map do |entry|
+              current_path = path ? File.join(path, entry[:name]) : entry[:name]
+
+              new(
+                id: entry[:oid],
+                root_id: root_tree.oid,
+                name: entry[:name],
+                type: entry[:type],
+                mode: entry[:filemode].to_s(8),
+                path: current_path,
+                commit_id: sha
+              )
+            end
+          rescue Rugged::ReferenceError
+            []
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb
index 6fcea4e12b4..7e072c5db50 100644
--- a/lib/gitlab/git/tree.rb
+++ b/lib/gitlab/git/tree.rb
@@ -18,6 +18,10 @@ module Gitlab
         def where(repository, sha, path = nil, recursive = false)
           path = nil if path == '' || path == '/'
 
+          tree_entries(repository, sha, path, recursive)
+        end
+
+        def tree_entries(repository, sha, path, recursive)
           wrapped_gitaly_errors do
             repository.gitaly_commit_client.tree_entries(repository, sha, path, recursive)
           end
@@ -95,3 +99,5 @@ module Gitlab
     end
   end
 end
+
+Gitlab::Git::Tree.singleton_class.prepend Gitlab::Git::RuggedImpl::Tree::ClassMethods
diff --git a/spec/lib/gitlab/git/tree_spec.rb b/spec/lib/gitlab/git/tree_spec.rb
index 4a4d69490a3..60060c41616 100644
--- a/spec/lib/gitlab/git/tree_spec.rb
+++ b/spec/lib/gitlab/git/tree_spec.rb
@@ -3,7 +3,7 @@ require "spec_helper"
 describe Gitlab::Git::Tree, :seed_helper do
   let(:repository) { Gitlab::Git::Repository.new('default', TEST_REPO_PATH, '', 'group/project') }
 
-  context :repo do
+  shared_examples :repo do
     let(:tree) { Gitlab::Git::Tree.where(repository, SeedRepo::Commit::ID) }
 
     it { expect(tree).to be_kind_of Array }
@@ -12,6 +12,17 @@ describe Gitlab::Git::Tree, :seed_helper do
     it { expect(tree.select(&:file?).size).to eq(10) }
     it { expect(tree.select(&:submodule?).size).to eq(2) }
 
+    it 'returns an empty array when called with an invalid ref' do
+      expect(described_class.where(repository, 'foobar-does-not-exist')).to eq([])
+    end
+
+    it 'returns a list of tree objects' do
+      entries = described_class.where(repository, SeedRepo::Commit::ID, 'files', true)
+
+      expect(entries.count).to be > 10
+      expect(entries).to all(be_a(Gitlab::Git::Tree))
+    end
+
     describe '#dir?' do
       let(:dir) { tree.select(&:dir?).first }
 
@@ -20,8 +31,8 @@ describe Gitlab::Git::Tree, :seed_helper do
       it { expect(dir.commit_id).to eq(SeedRepo::Commit::ID) }
       it { expect(dir.name).to eq('encoding') }
       it { expect(dir.path).to eq('encoding') }
-      it { expect(dir.flat_path).to eq('encoding') }
       it { expect(dir.mode).to eq('40000') }
+      it { expect(dir.flat_path).to eq('encoding') }
 
       context :subdir do
         let(:subdir) { Gitlab::Git::Tree.where(repository, SeedRepo::Commit::ID, 'files').first }
@@ -44,6 +55,51 @@ describe Gitlab::Git::Tree, :seed_helper do
         it { expect(subdir_file.path).to eq('files/ruby/popen.rb') }
         it { expect(subdir_file.flat_path).to eq('files/ruby/popen.rb') }
       end
+
+      context :flat_path do
+        let(:filename) { 'files/flat/path/correct/content.txt' }
+        let(:oid) { create_file(filename) }
+        let(:subdir_file) { Gitlab::Git::Tree.where(repository, oid, 'files/flat').first }
+        let(:repository_rugged) { Rugged::Repository.new(File.join(SEED_STORAGE_PATH, TEST_REPO_PATH)) }
+
+        it { expect(subdir_file.flat_path).to eq('files/flat/path/correct') }
+      end
+
+      def create_file(path)
+        oid = repository_rugged.write('test', :blob)
+        index = repository_rugged.index
+        index.add(path: filename, oid: oid, mode: 0100644)
+
+        options = commit_options(
+          repository_rugged,
+          index,
+          repository_rugged.head.target,
+          'HEAD',
+          'Add new file')
+
+        Rugged::Commit.create(repository_rugged, options)
+      end
+
+      # Build the options hash that's passed to Rugged::Commit#create
+      def commit_options(repo, index, target, ref, message)
+        options = {}
+        options[:tree] = index.write_tree(repo)
+        options[:author] = {
+          email: "test@example.com",
+          name: "Test Author",
+          time: Time.gm(2014, "mar", 3, 20, 15, 1)
+        }
+        options[:committer] = {
+          email: "test@example.com",
+          name: "Test Author",
+          time: Time.gm(2014, "mar", 3, 20, 15, 1)
+        }
+        options[:message] ||= message
+        options[:parents] = repo.empty? ? [] : [target].compact
+        options[:update_ref] = ref
+
+        options
+      end
     end
 
     describe '#file?' do
@@ -79,9 +135,17 @@ describe Gitlab::Git::Tree, :seed_helper do
     end
   end
 
-  describe '#where' do
-    it 'returns an empty array when called with an invalid ref' do
-      expect(described_class.where(repository, 'foobar-does-not-exist')).to eq([])
+  describe '.where with Gitaly enabled' do
+    it_behaves_like :repo
+  end
+
+  describe '.where with Rugged enabled', :enable_rugged do
+    it 'calls out to the Rugged implementation' do
+      allow_any_instance_of(Rugged).to receive(:lookup).with(SeedRepo::Commit::ID)
+
+      described_class.where(repository, SeedRepo::Commit::ID, 'files', false)
     end
+
+    it_behaves_like :repo
   end
 end

From 28883d8e449acc6cb8ad6f0312d77a8ee5027e75 Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Wed, 6 Mar 2019 10:36:28 -0800
Subject: [PATCH 2/3] Remove old code in TreeHelper#flatten_tree

---
 app/helpers/tree_helper.rb         | 11 +----------
 lib/gitlab/git/rugged_impl/tree.rb |  8 +++++---
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/app/helpers/tree_helper.rb b/app/helpers/tree_helper.rb
index e2879bfdcf1..c5bab877c00 100644
--- a/app/helpers/tree_helper.rb
+++ b/app/helpers/tree_helper.rb
@@ -136,18 +136,9 @@ module TreeHelper
   end
 
   # returns the relative path of the first subdir that doesn't have only one directory descendant
-  # rubocop: disable CodeReuse/ActiveRecord
   def flatten_tree(root_path, tree)
-    return tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '') if tree.flat_path.present?
-
-    subtree = Gitlab::Git::Tree.where(@repository, @commit.id, tree.path)
-    if subtree.count == 1 && subtree.first.dir?
-      return tree_join(tree.name, flatten_tree(root_path, subtree.first))
-    else
-      return tree.name
-    end
+    tree.flat_path.sub(%r{\A#{Regexp.escape(root_path)}/}, '')
   end
-  # rubocop: enable CodeReuse/ActiveRecord
 
   def selected_branch
     @branch_name || tree_edit_branch
diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb
index c1205bca1d2..f6ddc4e4346 100644
--- a/lib/gitlab/git/rugged_impl/tree.rb
+++ b/lib/gitlab/git/rugged_impl/tree.rb
@@ -33,9 +33,11 @@ module Gitlab
               end
             end
 
-            # This is an optimization to reduce N+1 queries for Gitaly. It's
-            # currently done in TreeHelper#flatten_tree, but to emulate Gitaly
-            # as much as possible we populate the value here.
+            # This was an optimization to reduce N+1 queries for Gitaly
+            # (https://gitlab.com/gitlab-org/gitaly/issues/530).  It
+            # used to be done lazily in the view via
+            # TreeHelper#flatten_tree, so it's possible there's a
+            # performance impact by loading this eagerly.
             rugged_populate_flat_path(repository, sha, path, ordered_entries)
           end
 

From 46c8cc35a0e93a17f479d90dbfe1cbc651b8acad Mon Sep 17 00:00:00 2001
From: Stan Hu <stanhu@gmail.com>
Date: Thu, 7 Mar 2019 06:33:55 -0800
Subject: [PATCH 3/3] Remove unnecessary return statements in tree.rb

---
 lib/gitlab/git/rugged_impl/tree.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/gitlab/git/rugged_impl/tree.rb b/lib/gitlab/git/rugged_impl/tree.rb
index f6ddc4e4346..0ebfd496695 100644
--- a/lib/gitlab/git/rugged_impl/tree.rb
+++ b/lib/gitlab/git/rugged_impl/tree.rb
@@ -61,9 +61,9 @@ module Gitlab
             subtree = tree_entries_from_rugged(repository, sha, tree.path, false)
 
             if subtree.count == 1 && subtree.first.dir?
-              return File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path))
+              File.join(tree.name, rugged_flatten_tree(repository, sha, subtree.first, root_path))
             else
-              return tree.name
+              tree.name
             end
           end