gitlab-org--gitlab-foss/app/finders/projects_finder.rb

105 lines
2.6 KiB
Ruby
Raw Normal View History

# ProjectsFinder
#
# Used to filter Projects by set of params
#
# Arguments:
# current_user - which user use
# project_ids_relation: int[] - project ids to use
# params:
# trending: boolean
# owned: boolean
# non_public: boolean
# starred: boolean
# sort: string
# visibility_level: int
# tags: string[]
# personal: boolean
# search: string
# non_archived: boolean
#
2016-03-20 16:03:53 -04:00
class ProjectsFinder < UnionFinder
attr_accessor :params
attr_reader :current_user, :project_ids_relation
def initialize(params: {}, current_user: nil, project_ids_relation: nil)
@params = params
@current_user = current_user
@project_ids_relation = project_ids_relation
end
def execute
items = init_collection
UNION of SELECT/WHERE is faster than WHERE on UNION Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate SELECT statements, and do UNION on that. Local tests with about 2_000_000 projects: - 1_500_000 private projects - 40_000 internal projects - 400_000 public projects For the API endpoint `/api/v4/projects?visibility=private` the slowest query was: ```sql SELECT "projects".* FROM "projects" WHERE ... ``` The original query took 1073.8ms. The query refactored to UNION of SELECT/WHERE took 2.3ms. The original query was: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10))) AND "projects"."visibility_level" = $2 AND "projects"."archived" = $3 ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"], ["visibility_level", 0], ["archived", "f"]] ``` The refactored query: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f' UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10) AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f')) ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"]] ```
2017-05-24 09:03:45 -04:00
items = items.map do |item|
item = by_ids(item)
item = by_personal(item)
item = by_starred(item)
item = by_trending(item)
UNION of SELECT/WHERE is faster than WHERE on UNION Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate SELECT statements, and do UNION on that. Local tests with about 2_000_000 projects: - 1_500_000 private projects - 40_000 internal projects - 400_000 public projects For the API endpoint `/api/v4/projects?visibility=private` the slowest query was: ```sql SELECT "projects".* FROM "projects" WHERE ... ``` The original query took 1073.8ms. The query refactored to UNION of SELECT/WHERE took 2.3ms. The original query was: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10))) AND "projects"."visibility_level" = $2 AND "projects"."archived" = $3 ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"], ["visibility_level", 0], ["archived", "f"]] ``` The refactored query: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f' UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10) AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f')) ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"]] ```
2017-05-24 09:03:45 -04:00
item = by_visibilty_level(item)
item = by_tags(item)
item = by_search(item)
by_archived(item)
end
items = union(items)
sort(items)
end
private
def init_collection
2016-03-20 16:03:53 -04:00
projects = []
if params[:owned].present?
projects << current_user.owned_projects if current_user
else
projects << current_user.authorized_projects if current_user
projects << Project.unscoped.public_to_user(current_user) unless params[:non_public].present?
end
2016-03-20 16:03:53 -04:00
projects
end
def by_ids(items)
UNION of SELECT/WHERE is faster than WHERE on UNION Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate SELECT statements, and do UNION on that. Local tests with about 2_000_000 projects: - 1_500_000 private projects - 40_000 internal projects - 400_000 public projects For the API endpoint `/api/v4/projects?visibility=private` the slowest query was: ```sql SELECT "projects".* FROM "projects" WHERE ... ``` The original query took 1073.8ms. The query refactored to UNION of SELECT/WHERE took 2.3ms. The original query was: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10))) AND "projects"."visibility_level" = $2 AND "projects"."archived" = $3 ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"], ["visibility_level", 0], ["archived", "f"]] ``` The refactored query: ```sql SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = $1 AND (projects.id IN (SELECT "projects"."id" FROM "projects" INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id" WHERE "projects"."pending_delete" = 'f' AND "project_authorizations"."user_id" = 23 AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f' UNION SELECT "projects"."id" FROM "projects" WHERE "projects"."visibility_level" IN (20, 10) AND "projects"."visibility_level" = 0 AND "projects"."archived" = 'f')) ORDER BY "projects"."created_at" DESC LIMIT 20 OFFSET 0 [["pending_delete", "f"]] ```
2017-05-24 09:03:45 -04:00
project_ids_relation ? items.where(id: project_ids_relation) : items
end
def union(items)
find_union(items, Project).with_route
end
def by_personal(items)
(params[:personal].present? && current_user) ? items.personal(current_user) : items
end
def by_starred(items)
(params[:starred].present? && current_user) ? items.starred_by(current_user) : items
end
def by_trending(items)
params[:trending].present? ? items.trending : items
end
def by_visibilty_level(items)
params[:visibility_level].present? ? items.where(visibility_level: params[:visibility_level]) : items
end
def by_tags(items)
params[:tag].present? ? items.tagged_with(params[:tag]) : items
end
def by_search(items)
params[:search] ||= params[:name]
params[:search].present? ? items.search(params[:search]) : items
end
def sort(items)
params[:sort].present? ? items.sort(params[:sort]) : items
end
def by_archived(projects)
# Back-compatibility with the places where `params[:archived]` can be set explicitly to `false`
params[:non_archived] = !Gitlab::Utils.to_boolean(params[:archived]) if params.key?(:archived)
params[:non_archived] ? projects.non_archived : projects
end
end