gitlab-org--gitlab-foss/lib/gitlab/background_migration/backfill_project_repositories.rb

230 lines
6.2 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Class that will create fill the project_repositories table
# for projects an entry is is missing in this table.
class BackfillProjectRepositories
OrphanedNamespaceError = Class.new(StandardError)
# Shard model
class Shard < ActiveRecord::Base
self.table_name = 'shards'
end
# Class that will find or create the shard by name.
# There is only a small set of shards, which would
# not change quickly, so look them up from memory
# instead of hitting the DB each time.
class ShardFinder
def find_shard_id(name)
shard_id = shards.fetch(name, nil)
return shard_id if shard_id.present?
Shard.transaction(requires_new: true) do
create!(name)
end
rescue ActiveRecord::RecordNotUnique
reload!
retry
end
private
def create!(name)
Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
end
def shards
@shards ||= reload!
end
def reload!
@shards = Hash[*Shard.all.flat_map { |shard| [shard.name, shard.id] }]
end
end
module Storage
# Class that returns the disk path for a project using hashed storage
class Hashed
attr_accessor :project
ROOT_PATH_PREFIX = '@hashed'
def initialize(project)
@project = project
end
def disk_path
"#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
end
def disk_hash
@disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s)
end
end
# Class that returns the disk path for a project using legacy storage
class LegacyProject
attr_accessor :project
def initialize(project)
@project = project
end
def disk_path
project.full_path
end
end
end
# Concern used by Project and Namespace to determine the full route to the project
module Routable
extend ActiveSupport::Concern
def full_path
route&.path || build_full_path
end
def build_full_path
return path unless has_parent?
raise OrphanedNamespaceError if parent.nil?
parent.full_path + '/' + path
end
def has_parent?
read_attribute(association(:parent).reflection.foreign_key)
end
end
Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ```
2019-01-09 04:47:24 -05:00
# Route model
class Route < ActiveRecord::Base
belongs_to :source, inverse_of: :route, polymorphic: true
end
Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ```
2019-01-09 04:47:24 -05:00
# Namespace model
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
self.inheritance_column = nil
include Routable
belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'
Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ```
2019-01-09 04:47:24 -05:00
has_one :route, -> { where(source_type: 'Namespace') }, inverse_of: :source, foreign_key: :source_id
has_many :projects, inverse_of: :parent
has_many :namespaces, inverse_of: :parent
end
# ProjectRegistry model
class ProjectRepository < ActiveRecord::Base
self.table_name = 'project_repositories'
belongs_to :project, inverse_of: :project_repository
end
# Project model
class Project < ActiveRecord::Base
self.table_name = 'projects'
include Routable
HASHED_STORAGE_FEATURES = {
repository: 1,
attachments: 2
}.freeze
scope :with_parent, -> { includes(:parent) }
belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'
Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ```
2019-01-09 04:47:24 -05:00
has_one :route, -> { where(source_type: 'Project') }, inverse_of: :source, foreign_key: :source_id
has_one :project_repository, inverse_of: :project
delegate :disk_path, to: :storage
class << self
def on_hashed_storage
where(Project.arel_table[:storage_version]
.gteq(HASHED_STORAGE_FEATURES[:repository]))
end
def on_legacy_storage
where(Project.arel_table[:storage_version].eq(nil)
.or(Project.arel_table[:storage_version].eq(0)))
end
def without_project_repository
joins(left_outer_join_project_repository)
.where(ProjectRepository.arel_table[:project_id].eq(nil))
end
def left_outer_join_project_repository
projects_table = Project.arel_table
repository_table = ProjectRepository.arel_table
projects_table
.join(repository_table, Arel::Nodes::OuterJoin)
.on(projects_table[:id].eq(repository_table[:project_id]))
.join_sources
end
end
def storage
@storage ||=
if hashed_storage?
Storage::Hashed.new(self)
else
Storage::LegacyProject.new(self)
end
end
def hashed_storage?
self.storage_version &&
self.storage_version >= HASHED_STORAGE_FEATURES[:repository]
end
end
def perform(start_id, stop_id)
Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
end
private
def projects
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
def project_repositories(start_id, stop_id)
projects
.without_project_repository
Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ```
2019-01-09 04:47:24 -05:00
.includes(:route, parent: [:route]).references(:routes)
.includes(:parent).references(:namespaces)
.where(id: start_id..stop_id)
.map { |project| build_attributes_for_project(project) }
.compact
end
def build_attributes_for_project(project)
{
project_id: project.id,
shard_id: find_shard_id(project.repository_storage),
disk_path: project.disk_path
}
end
def find_shard_id(repository_storage)
shard_finder.find_shard_id(repository_storage)
end
def shard_finder
@shard_finder ||= ShardFinder.new
end
end
end
end