2017-03-03 05:35:04 -05:00
|
|
|
# ProjectsFinder
|
|
|
|
#
|
|
|
|
# Used to filter Projects by set of params
|
|
|
|
#
|
|
|
|
# Arguments:
|
|
|
|
# current_user - which user use
|
|
|
|
# project_ids_relation: int[] - project ids to use
|
|
|
|
# params:
|
|
|
|
# trending: boolean
|
2017-05-26 10:31:37 -04:00
|
|
|
# owned: boolean
|
2017-03-03 05:35:04 -05:00
|
|
|
# non_public: boolean
|
|
|
|
# starred: boolean
|
|
|
|
# sort: string
|
|
|
|
# visibility_level: int
|
|
|
|
# tags: string[]
|
|
|
|
# personal: boolean
|
|
|
|
# search: string
|
|
|
|
# non_archived: boolean
|
|
|
|
#
|
2016-03-20 16:03:53 -04:00
|
|
|
class ProjectsFinder < UnionFinder
|
2017-03-03 05:35:04 -05:00
|
|
|
attr_accessor :params
|
|
|
|
attr_reader :current_user, :project_ids_relation
|
2014-02-25 12:15:08 -05:00
|
|
|
|
2017-03-03 05:35:04 -05:00
|
|
|
def initialize(params: {}, current_user: nil, project_ids_relation: nil)
|
|
|
|
@params = params
|
|
|
|
@current_user = current_user
|
|
|
|
@project_ids_relation = project_ids_relation
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
items = init_collection
|
UNION of SELECT/WHERE is faster than WHERE on UNION
Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate
SELECT statements, and do UNION on that.
Local tests with about 2_000_000 projects:
- 1_500_000 private projects
- 40_000 internal projects
- 400_000 public projects
For the API endpoint `/api/v4/projects?visibility=private` the slowest query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE ...
```
The original query took 1073.8ms.
The query refactored to UNION of SELECT/WHERE took 2.3ms.
The original query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)))
AND "projects"."visibility_level" = $2
AND "projects"."archived" = $3
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"],
["visibility_level", 0],
["archived", "f"]]
```
The refactored query:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'))
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"]]
```
2017-05-24 09:03:45 -04:00
|
|
|
items = items.map do |item|
|
|
|
|
item = by_ids(item)
|
|
|
|
item = by_personal(item)
|
|
|
|
item = by_starred(item)
|
2017-05-26 09:39:38 -04:00
|
|
|
item = by_trending(item)
|
UNION of SELECT/WHERE is faster than WHERE on UNION
Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate
SELECT statements, and do UNION on that.
Local tests with about 2_000_000 projects:
- 1_500_000 private projects
- 40_000 internal projects
- 400_000 public projects
For the API endpoint `/api/v4/projects?visibility=private` the slowest query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE ...
```
The original query took 1073.8ms.
The query refactored to UNION of SELECT/WHERE took 2.3ms.
The original query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)))
AND "projects"."visibility_level" = $2
AND "projects"."archived" = $3
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"],
["visibility_level", 0],
["archived", "f"]]
```
The refactored query:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'))
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"]]
```
2017-05-24 09:03:45 -04:00
|
|
|
item = by_visibilty_level(item)
|
|
|
|
item = by_tags(item)
|
|
|
|
item = by_search(item)
|
|
|
|
by_archived(item)
|
|
|
|
end
|
2017-03-03 05:35:04 -05:00
|
|
|
items = union(items)
|
|
|
|
sort(items)
|
2014-02-25 12:15:08 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2017-03-03 05:35:04 -05:00
|
|
|
def init_collection
|
2016-03-20 16:03:53 -04:00
|
|
|
projects = []
|
2016-03-10 16:08:11 -05:00
|
|
|
|
2017-05-26 10:31:37 -04:00
|
|
|
if params[:owned].present?
|
|
|
|
projects << current_user.owned_projects if current_user
|
|
|
|
else
|
|
|
|
projects << current_user.authorized_projects if current_user
|
|
|
|
projects << Project.unscoped.public_to_user(current_user) unless params[:non_public].present?
|
|
|
|
end
|
2015-11-18 06:21:06 -05:00
|
|
|
|
2016-03-20 16:03:53 -04:00
|
|
|
projects
|
2015-11-18 06:21:06 -05:00
|
|
|
end
|
2017-03-03 05:35:04 -05:00
|
|
|
|
|
|
|
def by_ids(items)
|
UNION of SELECT/WHERE is faster than WHERE on UNION
Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate
SELECT statements, and do UNION on that.
Local tests with about 2_000_000 projects:
- 1_500_000 private projects
- 40_000 internal projects
- 400_000 public projects
For the API endpoint `/api/v4/projects?visibility=private` the slowest query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE ...
```
The original query took 1073.8ms.
The query refactored to UNION of SELECT/WHERE took 2.3ms.
The original query was:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)))
AND "projects"."visibility_level" = $2
AND "projects"."archived" = $3
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"],
["visibility_level", 0],
["archived", "f"]]
```
The refactored query:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
AND (projects.id IN
(SELECT "projects"."id"
FROM "projects"
INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
WHERE "projects"."pending_delete" = 'f'
AND "project_authorizations"."user_id" = 23
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'
UNION SELECT "projects"."id"
FROM "projects"
WHERE "projects"."visibility_level" IN (20,
10)
AND "projects"."visibility_level" = 0
AND "projects"."archived" = 'f'))
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"]]
```
2017-05-24 09:03:45 -04:00
|
|
|
project_ids_relation ? items.where(id: project_ids_relation) : items
|
2017-03-03 05:35:04 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def union(items)
|
|
|
|
find_union(items, Project).with_route
|
|
|
|
end
|
|
|
|
|
|
|
|
def by_personal(items)
|
|
|
|
(params[:personal].present? && current_user) ? items.personal(current_user) : items
|
|
|
|
end
|
|
|
|
|
2017-05-23 16:38:12 -04:00
|
|
|
def by_starred(items)
|
|
|
|
(params[:starred].present? && current_user) ? items.starred_by(current_user) : items
|
|
|
|
end
|
|
|
|
|
2017-05-26 09:39:38 -04:00
|
|
|
def by_trending(items)
|
|
|
|
params[:trending].present? ? items.trending : items
|
|
|
|
end
|
|
|
|
|
2017-03-03 05:35:04 -05:00
|
|
|
def by_visibilty_level(items)
|
|
|
|
params[:visibility_level].present? ? items.where(visibility_level: params[:visibility_level]) : items
|
|
|
|
end
|
|
|
|
|
|
|
|
def by_tags(items)
|
|
|
|
params[:tag].present? ? items.tagged_with(params[:tag]) : items
|
|
|
|
end
|
|
|
|
|
|
|
|
def by_search(items)
|
|
|
|
params[:search] ||= params[:name]
|
|
|
|
params[:search].present? ? items.search(params[:search]) : items
|
|
|
|
end
|
|
|
|
|
|
|
|
def sort(items)
|
|
|
|
params[:sort].present? ? items.sort(params[:sort]) : items
|
|
|
|
end
|
|
|
|
|
|
|
|
def by_archived(projects)
|
|
|
|
# Back-compatibility with the places where `params[:archived]` can be set explicitly to `false`
|
|
|
|
params[:non_archived] = !Gitlab::Utils.to_boolean(params[:archived]) if params.key?(:archived)
|
|
|
|
|
|
|
|
params[:non_archived] ? projects.non_archived : projects
|
|
|
|
end
|
2014-02-25 12:15:08 -05:00
|
|
|
end
|