Merge branch 'tc-backfill-hashed-project_repositories' into 'master'
Fill project_repositories for hashed storage projects Closes #48527 See merge request gitlab-org/gitlab-ce!23482
This commit is contained in:
commit
9655a602ac
4 changed files with 255 additions and 0 deletions
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Fill project_repositories for hashed storage projects
|
||||
merge_request: 23482
|
||||
author:
|
||||
type: added
|
|
@ -0,0 +1,26 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2]
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
BATCH_SIZE = 1_000
|
||||
DELAY_INTERVAL = 5.minutes
|
||||
MIGRATION = 'BackfillHashedProjectRepositories'
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
class Project < ActiveRecord::Base
|
||||
include EachBatch
|
||||
|
||||
self.table_name = 'projects'
|
||||
end
|
||||
|
||||
def up
|
||||
queue_background_migration_jobs_by_range_at_intervals(Project, MIGRATION, DELAY_INTERVAL)
|
||||
end
|
||||
|
||||
def down
|
||||
# no-op: since there could have been existing rows before the migration do not remove anything
|
||||
end
|
||||
end
|
|
@ -0,0 +1,134 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module BackgroundMigration
|
||||
# Class that will create fill the project_repositories table
|
||||
# for all projects that are on hashed storage and an entry is
|
||||
# is missing in this table.
|
||||
class BackfillHashedProjectRepositories
|
||||
# Shard model
|
||||
class Shard < ActiveRecord::Base
|
||||
self.table_name = 'shards'
|
||||
end
|
||||
|
||||
# Class that will find or create the shard by name.
|
||||
# There is only a small set of shards, which would
|
||||
# not change quickly, so look them up from memory
|
||||
# instead of hitting the DB each time.
|
||||
class ShardFinder
|
||||
def find_shard_id(name)
|
||||
shard_id = shards.fetch(name, nil)
|
||||
return shard_id if shard_id.present?
|
||||
|
||||
Shard.transaction(requires_new: true) do
|
||||
create!(name)
|
||||
end
|
||||
rescue ActiveRecord::RecordNotUnique
|
||||
reload!
|
||||
retry
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def create!(name)
|
||||
Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
|
||||
end
|
||||
|
||||
def shards
|
||||
@shards ||= reload!
|
||||
end
|
||||
|
||||
def reload!
|
||||
@shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
|
||||
end
|
||||
end
|
||||
|
||||
# ProjectRegistry model
|
||||
class ProjectRepository < ActiveRecord::Base
|
||||
self.table_name = 'project_repositories'
|
||||
|
||||
belongs_to :project, inverse_of: :project_repository
|
||||
end
|
||||
|
||||
# Project model
|
||||
class Project < ActiveRecord::Base
|
||||
self.table_name = 'projects'
|
||||
|
||||
HASHED_PATH_PREFIX = '@hashed'
|
||||
|
||||
HASHED_STORAGE_FEATURES = {
|
||||
repository: 1,
|
||||
attachments: 2
|
||||
}.freeze
|
||||
|
||||
has_one :project_repository, inverse_of: :project
|
||||
|
||||
class << self
|
||||
def on_hashed_storage
|
||||
where(Project.arel_table[:storage_version]
|
||||
.gteq(HASHED_STORAGE_FEATURES[:repository]))
|
||||
end
|
||||
|
||||
def without_project_repository
|
||||
joins(left_outer_join_project_repository)
|
||||
.where(ProjectRepository.arel_table[:project_id].eq(nil))
|
||||
end
|
||||
|
||||
def left_outer_join_project_repository
|
||||
projects_table = Project.arel_table
|
||||
repository_table = ProjectRepository.arel_table
|
||||
|
||||
projects_table
|
||||
.join(repository_table, Arel::Nodes::OuterJoin)
|
||||
.on(projects_table[:id].eq(repository_table[:project_id]))
|
||||
.join_sources
|
||||
end
|
||||
end
|
||||
|
||||
def hashed_storage?
|
||||
self.storage_version && self.storage_version >= 1
|
||||
end
|
||||
|
||||
def hashed_disk_path
|
||||
"#{HASHED_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
|
||||
end
|
||||
|
||||
def disk_hash
|
||||
@disk_hash ||= Digest::SHA2.hexdigest(id.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
def perform(start_id, stop_id)
|
||||
Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def project_repositories(start_id, stop_id)
|
||||
Project.on_hashed_storage
|
||||
.without_project_repository
|
||||
.where(id: start_id..stop_id)
|
||||
.map { |project| build_attributes_for_project(project) }
|
||||
.compact
|
||||
end
|
||||
|
||||
def build_attributes_for_project(project)
|
||||
return unless project.hashed_storage?
|
||||
|
||||
{
|
||||
project_id: project.id,
|
||||
shard_id: find_shard_id(project.repository_storage),
|
||||
disk_path: project.hashed_disk_path
|
||||
}
|
||||
end
|
||||
|
||||
def find_shard_id(repository_storage)
|
||||
shard_finder.find_shard_id(repository_storage)
|
||||
end
|
||||
|
||||
def shard_finder
|
||||
@shard_finder ||= ShardFinder.new
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,90 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
|
||||
let(:namespaces) { table(:namespaces) }
|
||||
let(:project_repositories) { table(:project_repositories) }
|
||||
let(:projects) { table(:projects) }
|
||||
let(:shards) { table(:shards) }
|
||||
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
|
||||
let(:shard) { shards.create!(name: 'default') }
|
||||
|
||||
describe described_class::ShardFinder do
|
||||
describe '#find_shard_id' do
|
||||
it 'creates a new shard when it does not exist yet' do
|
||||
expect { subject.find_shard_id('other') }.to change(shards, :count).by(1)
|
||||
end
|
||||
|
||||
it 'returns the shard when it exists' do
|
||||
shards.create(id: 5, name: 'other')
|
||||
|
||||
shard_id = subject.find_shard_id('other')
|
||||
|
||||
expect(shard_id).to eq(5)
|
||||
end
|
||||
|
||||
it 'only queries the database once to retrieve shards' do
|
||||
subject.find_shard_id('default')
|
||||
|
||||
expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe described_class::Project do
|
||||
describe '.on_hashed_storage' do
|
||||
it 'finds projects with repository on hashed storage' do
|
||||
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
|
||||
projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
|
||||
projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil)
|
||||
|
||||
expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2])
|
||||
end
|
||||
end
|
||||
|
||||
describe '.without_project_repository' do
|
||||
it 'finds projects which do not have a projects_repositories entry' do
|
||||
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id)
|
||||
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id)
|
||||
project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id)
|
||||
|
||||
expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
it 'creates a project_repository row for projects on hashed storage that need one' do
|
||||
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
|
||||
|
||||
expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2)
|
||||
end
|
||||
|
||||
it 'does nothing for projects on hashed storage that have already a project_repository row' do
|
||||
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||
project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id)
|
||||
|
||||
expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
|
||||
end
|
||||
|
||||
it 'does nothing for projects on legacy storage' do
|
||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
|
||||
|
||||
expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
|
||||
end
|
||||
|
||||
it 'inserts rows in a single query' do
|
||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||
|
||||
control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) }
|
||||
|
||||
projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||
projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||
|
||||
expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue