gitlab-org--gitlab-foss/lib/gitlab/background_migration/backfill_project_repositories.rb

229 lines
6.3 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Class that will create fill the project_repositories table
# for projects an entry is is missing in this table.
class BackfillProjectRepositories
OrphanedNamespaceError = Class.new(StandardError)
# Shard model
class Shard < ActiveRecord::Base
self.table_name = 'shards'
end
# Class that will find or create the shard by name.
# There is only a small set of shards, which would
# not change quickly, so look them up from memory
# instead of hitting the DB each time.
class ShardFinder
def find_shard_id(name)
shard_id = shards.fetch(name, nil)
return shard_id if shard_id.present?
Shard.transaction(requires_new: true) do
create!(name)
end
rescue ActiveRecord::RecordNotUnique
reload!
retry
end
private
def create!(name)
Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
end
def shards
@shards ||= reload!
end
def reload!
@shards = Hash[*Shard.all.flat_map { |shard| [shard.name, shard.id] }]
end
end
module Storage
# Class that returns the disk path for a project using hashed storage
class Hashed
attr_accessor :project
ROOT_PATH_PREFIX = '@hashed'
def initialize(project)
@project = project
end
def disk_path
"#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
end
def disk_hash
@disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s)
end
end
# Class that returns the disk path for a project using legacy storage
class LegacyProject
attr_accessor :project
def initialize(project)
@project = project
end
def disk_path
project.full_path
end
end
end
# Concern used by Project and Namespace to determine the full route to the project
module Routable
extend ActiveSupport::Concern
def full_path
route&.path || build_full_path
end
def build_full_path
return path unless has_parent?
raise OrphanedNamespaceError if parent.nil?
parent.full_path + '/' + path
end
def has_parent?
read_attribute(association(:parent).reflection.foreign_key)
end
end
# Route model
class Route < ActiveRecord::Base
belongs_to :source, inverse_of: :route, polymorphic: true
end
# Namespace model
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
self.inheritance_column = nil
include Routable
belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'
has_one :route, -> { where(source_type: 'Namespace') }, inverse_of: :source, foreign_key: :source_id
has_many :projects, inverse_of: :parent
has_many :namespaces, inverse_of: :parent
end
# ProjectRegistry model
class ProjectRepository < ActiveRecord::Base
self.table_name = 'project_repositories'
belongs_to :project, inverse_of: :project_repository
end
# Project model
class Project < ActiveRecord::Base
self.table_name = 'projects'
include Routable
HASHED_STORAGE_FEATURES = {
repository: 1,
attachments: 2
}.freeze
scope :with_parent, -> { includes(:parent) }
belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'
has_one :route, -> { where(source_type: 'Project') }, inverse_of: :source, foreign_key: :source_id
has_one :project_repository, inverse_of: :project
delegate :disk_path, to: :storage
class << self
def on_hashed_storage
where(Project.arel_table[:storage_version]
.gteq(HASHED_STORAGE_FEATURES[:repository]))
end
def on_legacy_storage
where(Project.arel_table[:storage_version].eq(nil)
.or(Project.arel_table[:storage_version].eq(0)))
end
def without_project_repository
joins(left_outer_join_project_repository)
.where(ProjectRepository.arel_table[:project_id].eq(nil))
end
def left_outer_join_project_repository
projects_table = Project.arel_table
repository_table = ProjectRepository.arel_table
projects_table
.join(repository_table, Arel::Nodes::OuterJoin)
.on(projects_table[:id].eq(repository_table[:project_id]))
.join_sources
end
end
def storage
@storage ||=
if hashed_storage?
Storage::Hashed.new(self)
else
Storage::LegacyProject.new(self)
end
end
def hashed_storage?
self.storage_version &&
self.storage_version >= HASHED_STORAGE_FEATURES[:repository]
end
end
def perform(start_id, stop_id)
Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) # rubocop:disable Gitlab/BulkInsert
end
private
def projects
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
def project_repositories(start_id, stop_id)
projects
.without_project_repository
.includes(:route, parent: [:route]).references(:routes)
.includes(:parent).references(:namespaces)
.where(id: start_id..stop_id)
.map { |project| build_attributes_for_project(project) }
.compact
end
def build_attributes_for_project(project)
{
project_id: project.id,
shard_id: find_shard_id(project.repository_storage),
disk_path: project.disk_path
}
end
def find_shard_id(repository_storage)
shard_finder.find_shard_id(repository_storage)
end
def shard_finder
@shard_finder ||= ShardFinder.new
end
end
end
end