Fill project_repositories for hashed storage
This commit is contained in:
parent
5237a55d62
commit
8c9e692095
3 changed files with 92 additions and 148 deletions
|
@ -1,11 +1,11 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
|
class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2]
|
||||||
include Gitlab::Database::MigrationHelpers
|
include Gitlab::Database::MigrationHelpers
|
||||||
|
|
||||||
DOWNTIME = false
|
DOWNTIME = false
|
||||||
BATCH_SIZE = 1_000
|
BATCH_SIZE = 1_000
|
||||||
DELAY_INTERVAL = 1.minutes
|
DELAY_INTERVAL = 5.minutes
|
||||||
MIGRATION = 'BackfillHashedProjectRepositories'
|
MIGRATION = 'BackfillHashedProjectRepositories'
|
||||||
|
|
||||||
disable_ddl_transaction!
|
disable_ddl_transaction!
|
||||||
|
@ -21,7 +21,6 @@ class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0]
|
||||||
end
|
end
|
||||||
|
|
||||||
def down
|
def down
|
||||||
# Since there could have been existing rows before the migration
|
# no-op: since there could have been existing rows before the migration do not remove anything
|
||||||
# do not remove anything
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -2,58 +2,60 @@
|
||||||
|
|
||||||
module Gitlab
|
module Gitlab
|
||||||
module BackgroundMigration
|
module BackgroundMigration
|
||||||
# Class the will create rows in project_repositories for all
|
# Class that will create fill the project_repositories table
|
||||||
# projects that are on hashed storage
|
# for all projects that are on hashed storage and an entry is
|
||||||
|
# is missing in this table.
|
||||||
class BackfillHashedProjectRepositories
|
class BackfillHashedProjectRepositories
|
||||||
# Model for a Shard
|
# Shard model
|
||||||
class Shard < ActiveRecord::Base
|
class Shard < ActiveRecord::Base
|
||||||
self.table_name = 'shards'
|
self.table_name = 'shards'
|
||||||
|
|
||||||
def self.by_name(name)
|
|
||||||
to_a.detect { |shard| shard.name == name } || create_by(name: name)
|
|
||||||
rescue ActiveRecord::RecordNotUnique
|
|
||||||
retry
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Class that will find or create the shard by name.
|
# Class that will find or create the shard by name.
|
||||||
# There is only a small set of shards, which would not change quickly,
|
# There is only a small set of shards, which would
|
||||||
# so look them up from memory instead of hitting the DB each time.
|
# not change quickly, so look them up from memory
|
||||||
|
# instead of hitting the DB each time.
|
||||||
class ShardFinder
|
class ShardFinder
|
||||||
def find(name)
|
def find_shard_id(name)
|
||||||
shards.detect { |shard| shard.name == name } || create!(name)
|
shard_id = shards.fetch(name, nil)
|
||||||
|
return shard_id if shard_id.present?
|
||||||
|
|
||||||
|
Shard.transaction(requires_new: true) do
|
||||||
|
create!(name)
|
||||||
|
end
|
||||||
rescue ActiveRecord::RecordNotUnique
|
rescue ActiveRecord::RecordNotUnique
|
||||||
load!
|
reload!
|
||||||
retry
|
retry
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def create!(name)
|
def create!(name)
|
||||||
Shard.create!(name: name).tap { |shard| @shards << shard }
|
Shard.create!(name: name).tap { |shard| @shards[name] = shard.id }
|
||||||
end
|
end
|
||||||
|
|
||||||
def shards
|
def shards
|
||||||
@shards || load!
|
@shards ||= reload!
|
||||||
end
|
end
|
||||||
|
|
||||||
def load!
|
def reload!
|
||||||
@shards = Shard.all.to_a
|
@shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Model for a ProjectRepository
|
# ProjectRegistry model
|
||||||
class ProjectRepository < ActiveRecord::Base
|
class ProjectRepository < ActiveRecord::Base
|
||||||
self.table_name = 'project_repositories'
|
self.table_name = 'project_repositories'
|
||||||
|
|
||||||
belongs_to :project, inverse_of: :project_repository
|
belongs_to :project, inverse_of: :project_repository
|
||||||
end
|
end
|
||||||
|
|
||||||
# Model for a Project
|
# Project model
|
||||||
class Project < ActiveRecord::Base
|
class Project < ActiveRecord::Base
|
||||||
self.table_name = 'projects'
|
self.table_name = 'projects'
|
||||||
|
|
||||||
HASHED_PATH_PREFIX = '@hashed'
|
HASHED_PATH_PREFIX = '@hashed'
|
||||||
|
|
||||||
HASHED_STORAGE_FEATURES = {
|
HASHED_STORAGE_FEATURES = {
|
||||||
repository: 1,
|
repository: 1,
|
||||||
attachments: 2
|
attachments: 2
|
||||||
|
@ -63,33 +65,26 @@ module Gitlab
|
||||||
|
|
||||||
class << self
|
class << self
|
||||||
def on_hashed_storage
|
def on_hashed_storage
|
||||||
where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[:repository]))
|
where(Project.arel_table[:storage_version]
|
||||||
|
.gteq(HASHED_STORAGE_FEATURES[:repository]))
|
||||||
end
|
end
|
||||||
|
|
||||||
def without_project_repository
|
def without_project_repository
|
||||||
cond = ProjectRepository.arel_table[:project_id].eq(nil)
|
joins(left_outer_join_project_repository)
|
||||||
left_outer_joins(:project_repository).where(cond)
|
.where(ProjectRepository.arel_table[:project_id].eq(nil))
|
||||||
end
|
end
|
||||||
|
|
||||||
def left_outer_joins(relation)
|
def left_outer_join_project_repository
|
||||||
return super if Gitlab.rails5?
|
projects_table = Project.arel_table
|
||||||
|
repository_table = ProjectRepository.arel_table
|
||||||
|
|
||||||
# TODO Rails 4?
|
projects_table
|
||||||
|
.join(repository_table, Arel::Nodes::OuterJoin)
|
||||||
|
.on(projects_table[:id].eq(repository_table[:project_id]))
|
||||||
|
.join_sources
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def project_repository_attributes(shard_finder)
|
|
||||||
return unless hashed_storage?
|
|
||||||
|
|
||||||
{
|
|
||||||
project_id: id,
|
|
||||||
shard_id: shard_finder.find(repository_storage).id,
|
|
||||||
disk_path: hashed_disk_path
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def hashed_storage?
|
def hashed_storage?
|
||||||
self.storage_version && self.storage_version >= 1
|
self.storage_version && self.storage_version >= 1
|
||||||
end
|
end
|
||||||
|
@ -99,7 +94,7 @@ module Gitlab
|
||||||
end
|
end
|
||||||
|
|
||||||
def disk_hash
|
def disk_hash
|
||||||
@disk_hash ||= Digest::SHA2.hexdigest(id.to_s) if id
|
@disk_hash ||= Digest::SHA2.hexdigest(id.to_s)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -110,12 +105,27 @@ module Gitlab
|
||||||
private
|
private
|
||||||
|
|
||||||
def project_repositories(start_id, stop_id)
|
def project_repositories(start_id, stop_id)
|
||||||
Project.on_hashed_storage.without_project_repository
|
Project.on_hashed_storage
|
||||||
|
.without_project_repository
|
||||||
.where(id: start_id..stop_id)
|
.where(id: start_id..stop_id)
|
||||||
.map { |project| project.project_repository_attributes(shard_finder) }
|
.map { |project| build_attributes_for_project(project) }
|
||||||
.compact
|
.compact
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def build_attributes_for_project(project)
|
||||||
|
return unless project.hashed_storage?
|
||||||
|
|
||||||
|
{
|
||||||
|
project_id: project.id,
|
||||||
|
shard_id: find_shard_id(project.repository_storage),
|
||||||
|
disk_path: project.hashed_disk_path
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def find_shard_id(repository_storage)
|
||||||
|
shard_finder.find_shard_id(repository_storage)
|
||||||
|
end
|
||||||
|
|
||||||
def shard_finder
|
def shard_finder
|
||||||
@shard_finder ||= ShardFinder.new
|
@shard_finder ||= ShardFinder.new
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,59 +3,31 @@
|
||||||
require 'spec_helper'
|
require 'spec_helper'
|
||||||
|
|
||||||
describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
|
describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do
|
||||||
let(:shards) { table(:shards) }
|
|
||||||
let(:namespaces) { table(:namespaces) }
|
let(:namespaces) { table(:namespaces) }
|
||||||
let(:projects) { table(:projects) }
|
|
||||||
let(:project_repositories) { table(:project_repositories) }
|
let(:project_repositories) { table(:project_repositories) }
|
||||||
|
let(:projects) { table(:projects) }
|
||||||
|
let(:shards) { table(:shards) }
|
||||||
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
|
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
|
||||||
let(:default_shard) { shards.create!(name: 'default') }
|
let(:shard) { shards.create!(name: 'default') }
|
||||||
|
|
||||||
describe described_class::ShardFinder do
|
describe described_class::ShardFinder do
|
||||||
describe '#find' do
|
describe '#find_shard_id' do
|
||||||
subject(:finder) { described_class.new }
|
|
||||||
|
|
||||||
it 'creates the shard by name' do
|
|
||||||
expect(finder).to receive(:create!).and_call_original
|
|
||||||
|
|
||||||
expect(finder.find('default')).to be_present
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'does not try to create existing shards' do
|
|
||||||
shards.create(name: 'default')
|
|
||||||
|
|
||||||
expect(finder).not_to receive(:create!)
|
|
||||||
|
|
||||||
finder.find('default')
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'only queries the database once for shards' do
|
|
||||||
finder.find('default')
|
|
||||||
|
|
||||||
expect do
|
|
||||||
finder.find('default')
|
|
||||||
end.not_to exceed_query_limit(0)
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'creates a new shard when it does not exist yet' do
|
it 'creates a new shard when it does not exist yet' do
|
||||||
expect do
|
expect { subject.find_shard_id('other') }.to change(shards, :count).by(1)
|
||||||
finder.find('other')
|
|
||||||
end.to change(shards, :count).by(1)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'only creates a new shard once' do
|
it 'returns the shard when it exists' do
|
||||||
finder.find('other')
|
shards.create(id: 5, name: 'other')
|
||||||
|
|
||||||
expect do
|
shard_id = subject.find_shard_id('other')
|
||||||
finder.find('other')
|
|
||||||
end.not_to change(shards, :count)
|
expect(shard_id).to eq(5)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'is not vulnerable to race conditions' do
|
it 'only queries the database once to retrieve shards' do
|
||||||
finder.find('default')
|
subject.find_shard_id('default')
|
||||||
|
|
||||||
other_shard = shards.create(name: 'other')
|
expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0)
|
||||||
|
|
||||||
expect(finder.find('other').id).to eq(other_shard.id)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -63,93 +35,56 @@ describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migrat
|
||||||
describe described_class::Project do
|
describe described_class::Project do
|
||||||
describe '.on_hashed_storage' do
|
describe '.on_hashed_storage' do
|
||||||
it 'finds projects with repository on hashed storage' do
|
it 'finds projects with repository on hashed storage' do
|
||||||
hashed_projects = [
|
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1),
|
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
|
||||||
projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
|
projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
|
||||||
]
|
projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil)
|
||||||
|
|
||||||
projects.create!(name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0)
|
expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2])
|
||||||
projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: nil)
|
|
||||||
|
|
||||||
expect(described_class.on_hashed_storage.pluck(:id)).to match_array(hashed_projects.map(&:id))
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.without_project_repository' do
|
describe '.without_project_repository' do
|
||||||
it 'finds projects which do not have a projects_repositories row' do
|
it 'finds projects which do not have a projects_repositories entry' do
|
||||||
without_project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id)
|
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id)
|
||||||
with_project = projects.create!(name: 'bar', path: 'bar', namespace_id: group.id)
|
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id)
|
||||||
project_repositories.create!(project_id: with_project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
|
project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id)
|
||||||
|
|
||||||
expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(without_project.id)
|
expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1)
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe '#project_repository_attributes' do
|
|
||||||
let(:shard_finder) { Gitlab::BackgroundMigration::BackfillHashedProjectRepositories::ShardFinder.new }
|
|
||||||
|
|
||||||
it 'composes the correct attributes for project_repository' do
|
|
||||||
shiny_shard = shards.create!(name: 'shiny')
|
|
||||||
project = projects.create!(id: 5, name: 'foo', path: 'foo', namespace_id: group.id, repository_storage: shiny_shard.name, storage_version: 1)
|
|
||||||
|
|
||||||
expected_attributes = {
|
|
||||||
project_id: project.id,
|
|
||||||
shard_id: shiny_shard.id,
|
|
||||||
disk_path: '@hashed/ef/2d/ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d'
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to eq(expected_attributes)
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'returns nil for a project not on hashed storage' do
|
|
||||||
project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
|
|
||||||
|
|
||||||
expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to be_nil
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#perform' do
|
describe '#perform' do
|
||||||
def perform!
|
it 'creates a project_repository row for projects on hashed storage that need one' do
|
||||||
described_class.new.perform(1, projects.last.id)
|
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||||
|
projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2)
|
||||||
|
|
||||||
|
expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'create project_repository row for hashed storage project' do
|
it 'does nothing for projects on hashed storage that have already a project_repository row' do
|
||||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
||||||
|
project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id)
|
||||||
|
|
||||||
expect do
|
expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
|
||||||
perform!
|
|
||||||
end.to change(project_repositories, :count).by(1)
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'does nothing for projects that have already a project_repository' do
|
|
||||||
project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1)
|
|
||||||
project_repositories.create!(project_id: project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id)
|
|
||||||
|
|
||||||
expect do
|
|
||||||
perform!
|
|
||||||
end.not_to change(project_repositories, :count)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'does nothing for projects on legacy storage' do
|
it 'does nothing for projects on legacy storage' do
|
||||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
|
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0)
|
||||||
|
|
||||||
expect do
|
expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count)
|
||||||
perform!
|
|
||||||
end.not_to change(project_repositories, :count)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'inserts rows in a single query' do
|
it 'inserts rows in a single query' do
|
||||||
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
|
projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||||
|
|
||||||
control_count = ActiveRecord::QueryRecorder.new do
|
control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) }
|
||||||
perform!
|
|
||||||
end
|
|
||||||
|
|
||||||
projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
|
projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||||
projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name)
|
projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name)
|
||||||
|
|
||||||
expect { perform! }.not_to exceed_query_limit(control_count)
|
expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue