Merge branch 'finding-multiple-projects-by-paths' into 'master'

Add Project.where_paths_in

In https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/4410 I'm working on reducing the SQL queries needed to render Markdown. One reason for the large amount of queries is executing a large number of queries needed to find projects and related data. Basically `Project.find_with_namespace` is called in a loop and then any relations have to be retrieved separately.

By using `Project.where_paths_in` we can work around this by doing something like:

```ruby
project_paths = [...] # populated by some method
projects = Project.where_paths_in(project_paths).includes(:namespace, ...)
```

Ref: https://gitlab.com/gitlab-org/gitlab-ce/issues/18042

See merge request !4535
This commit is contained in:
Robert Speicher 2016-06-10 17:17:19 +00:00
commit b4e84809e8
2 changed files with 93 additions and 11 deletions

View file

@ -253,20 +253,69 @@ class Project < ActiveRecord::Base
non_archived.where(table[:name].matches(pattern))
end
def find_with_namespace(id)
namespace_path, project_path = id.split('/', 2)
# Finds a single project for the given path.
#
# path - The full project path (including namespace path).
#
# Returns a Project, or nil if no project could be found.
def find_with_namespace(path)
where_paths_in([path]).reorder(nil).take
end
return nil if !namespace_path || !project_path
# Builds a relation to find multiple projects by their full paths.
#
# Each path must be in the following format:
#
# namespace_path/project_path
#
# For example:
#
# gitlab-org/gitlab-ce
#
# Usage:
#
# Project.where_paths_in(%w{gitlab-org/gitlab-ce gitlab-org/gitlab-ee})
#
# This would return the projects with the full paths matching the values
# given.
#
# paths - An Array of full paths (namespace path + project path) for which
# to find the projects.
#
# Returns an ActiveRecord::Relation.
def where_paths_in(paths)
wheres = []
cast_lower = Gitlab::Database.postgresql?
# Use of unscoped ensures we're not secretly adding any ORDER BYs, which
# have a negative impact on performance (and aren't needed for this
# query).
projects = unscoped.
joins(:namespace).
iwhere('namespaces.path' => namespace_path)
paths.each do |path|
namespace_path, project_path = path.split('/', 2)
projects.find_by('projects.path' => project_path) ||
projects.iwhere('projects.path' => project_path).take
next unless namespace_path && project_path
namespace_path = connection.quote(namespace_path)
project_path = connection.quote(project_path)
where = "(namespaces.path = #{namespace_path}
AND projects.path = #{project_path})"
if cast_lower
where = "(
#{where}
OR (
LOWER(namespaces.path) = LOWER(#{namespace_path})
AND LOWER(projects.path) = LOWER(#{project_path})
)
)"
end
wheres << where
end
if wheres.empty?
none
else
joins(:namespace).where(wheres.join(' OR '))
end
end
def visibility_levels

View file

@ -922,4 +922,37 @@ describe Project, models: true do
it { is_expected.to be_falsey }
end
end
describe '.where_paths_in' do
context 'without any paths' do
it 'returns an empty relation' do
expect(Project.where_paths_in([])).to eq([])
end
end
context 'without any valid paths' do
it 'returns an empty relation' do
expect(Project.where_paths_in(%w[foo])).to eq([])
end
end
context 'with valid paths' do
let!(:project1) { create(:project) }
let!(:project2) { create(:project) }
it 'returns the projects matching the paths' do
projects = Project.where_paths_in([project1.path_with_namespace,
project2.path_with_namespace])
expect(projects).to contain_exactly(project1, project2)
end
it 'returns projects regardless of the casing of paths' do
projects = Project.where_paths_in([project1.path_with_namespace.upcase,
project2.path_with_namespace.upcase])
expect(projects).to contain_exactly(project1, project2)
end
end
end
end