diff --git a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb index b989d9fb43d..7814cdba58a 100644 --- a/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb +++ b/db/post_migrate/20181130102132_backfill_hashed_project_repositories.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0] +class BackfillHashedProjectRepositories < ActiveRecord::Migration[4.2] include Gitlab::Database::MigrationHelpers DOWNTIME = false BATCH_SIZE = 1_000 - DELAY_INTERVAL = 1.minutes + DELAY_INTERVAL = 5.minutes MIGRATION = 'BackfillHashedProjectRepositories' disable_ddl_transaction! @@ -21,7 +21,6 @@ class BackfillHashedProjectRepositories < ActiveRecord::Migration[5.0] end def down - # Since there could have been existing rows before the migration - # do not remove anything + # no-op: since there could have been existing rows before the migration do not remove anything end end diff --git a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb index 88696dd1aa6..2f76f2f7434 100644 --- a/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_hashed_project_repositories.rb @@ -2,58 +2,60 @@ module Gitlab module BackgroundMigration - # Class the will create rows in project_repositories for all - # projects that are on hashed storage + # Class that will create fill the project_repositories table + # for all projects that are on hashed storage and an entry is + # is missing in this table. class BackfillHashedProjectRepositories - # Model for a Shard + # Shard model class Shard < ActiveRecord::Base self.table_name = 'shards' - - def self.by_name(name) - to_a.detect { |shard| shard.name == name } || create_by(name: name) - rescue ActiveRecord::RecordNotUnique - retry - end end # Class that will find or create the shard by name. - # There is only a small set of shards, which would not change quickly, - # so look them up from memory instead of hitting the DB each time. + # There is only a small set of shards, which would + # not change quickly, so look them up from memory + # instead of hitting the DB each time. class ShardFinder - def find(name) - shards.detect { |shard| shard.name == name } || create!(name) + def find_shard_id(name) + shard_id = shards.fetch(name, nil) + return shard_id if shard_id.present? + + Shard.transaction(requires_new: true) do + create!(name) + end rescue ActiveRecord::RecordNotUnique - load! + reload! retry end private def create!(name) - Shard.create!(name: name).tap { |shard| @shards << shard } + Shard.create!(name: name).tap { |shard| @shards[name] = shard.id } end def shards - @shards || load! + @shards ||= reload! end - def load! - @shards = Shard.all.to_a + def reload! + @shards = Hash[*Shard.all.map { |shard| [shard.name, shard.id] }.flatten] end end - # Model for a ProjectRepository + # ProjectRegistry model class ProjectRepository < ActiveRecord::Base self.table_name = 'project_repositories' belongs_to :project, inverse_of: :project_repository end - # Model for a Project + # Project model class Project < ActiveRecord::Base self.table_name = 'projects' HASHED_PATH_PREFIX = '@hashed' + HASHED_STORAGE_FEATURES = { repository: 1, attachments: 2 @@ -63,33 +65,26 @@ module Gitlab class << self def on_hashed_storage - where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[:repository])) + where(Project.arel_table[:storage_version] + .gteq(HASHED_STORAGE_FEATURES[:repository])) end def without_project_repository - cond = ProjectRepository.arel_table[:project_id].eq(nil) - left_outer_joins(:project_repository).where(cond) + joins(left_outer_join_project_repository) + .where(ProjectRepository.arel_table[:project_id].eq(nil)) end - def left_outer_joins(relation) - return super if Gitlab.rails5? + def left_outer_join_project_repository + projects_table = Project.arel_table + repository_table = ProjectRepository.arel_table - # TODO Rails 4? + projects_table + .join(repository_table, Arel::Nodes::OuterJoin) + .on(projects_table[:id].eq(repository_table[:project_id])) + .join_sources end end - def project_repository_attributes(shard_finder) - return unless hashed_storage? - - { - project_id: id, - shard_id: shard_finder.find(repository_storage).id, - disk_path: hashed_disk_path - } - end - - private - def hashed_storage? self.storage_version && self.storage_version >= 1 end @@ -99,7 +94,7 @@ module Gitlab end def disk_hash - @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) if id + @disk_hash ||= Digest::SHA2.hexdigest(id.to_s) end end @@ -110,12 +105,27 @@ module Gitlab private def project_repositories(start_id, stop_id) - Project.on_hashed_storage.without_project_repository + Project.on_hashed_storage + .without_project_repository .where(id: start_id..stop_id) - .map { |project| project.project_repository_attributes(shard_finder) } + .map { |project| build_attributes_for_project(project) } .compact end + def build_attributes_for_project(project) + return unless project.hashed_storage? + + { + project_id: project.id, + shard_id: find_shard_id(project.repository_storage), + disk_path: project.hashed_disk_path + } + end + + def find_shard_id(repository_storage) + shard_finder.find_shard_id(repository_storage) + end + def shard_finder @shard_finder ||= ShardFinder.new end diff --git a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb index d2f499ffa64..b6c1edbbf8b 100644 --- a/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb +++ b/spec/lib/gitlab/background_migration/backfill_hashed_project_repositories_spec.rb @@ -3,59 +3,31 @@ require 'spec_helper' describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migration, schema: 20181130102132 do - let(:shards) { table(:shards) } let(:namespaces) { table(:namespaces) } - let(:projects) { table(:projects) } let(:project_repositories) { table(:project_repositories) } + let(:projects) { table(:projects) } + let(:shards) { table(:shards) } let(:group) { namespaces.create!(name: 'foo', path: 'foo') } - let(:default_shard) { shards.create!(name: 'default') } + let(:shard) { shards.create!(name: 'default') } describe described_class::ShardFinder do - describe '#find' do - subject(:finder) { described_class.new } - - it 'creates the shard by name' do - expect(finder).to receive(:create!).and_call_original - - expect(finder.find('default')).to be_present - end - - it 'does not try to create existing shards' do - shards.create(name: 'default') - - expect(finder).not_to receive(:create!) - - finder.find('default') - end - - it 'only queries the database once for shards' do - finder.find('default') - - expect do - finder.find('default') - end.not_to exceed_query_limit(0) - end - + describe '#find_shard_id' do it 'creates a new shard when it does not exist yet' do - expect do - finder.find('other') - end.to change(shards, :count).by(1) + expect { subject.find_shard_id('other') }.to change(shards, :count).by(1) end - it 'only creates a new shard once' do - finder.find('other') + it 'returns the shard when it exists' do + shards.create(id: 5, name: 'other') - expect do - finder.find('other') - end.not_to change(shards, :count) + shard_id = subject.find_shard_id('other') + + expect(shard_id).to eq(5) end - it 'is not vulnerable to race conditions' do - finder.find('default') + it 'only queries the database once to retrieve shards' do + subject.find_shard_id('default') - other_shard = shards.create(name: 'other') - - expect(finder.find('other').id).to eq(other_shard.id) + expect { subject.find_shard_id('default') }.not_to exceed_query_limit(0) end end end @@ -63,93 +35,56 @@ describe Gitlab::BackgroundMigration::BackfillHashedProjectRepositories, :migrat describe described_class::Project do describe '.on_hashed_storage' do it 'finds projects with repository on hashed storage' do - hashed_projects = [ - projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1), - projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2) - ] + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2) + projects.create!(id: 3, name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0) + projects.create!(id: 4, name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: nil) - projects.create!(name: 'baz', path: 'baz', namespace_id: group.id, storage_version: 0) - projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: nil) - - expect(described_class.on_hashed_storage.pluck(:id)).to match_array(hashed_projects.map(&:id)) + expect(described_class.on_hashed_storage.pluck(:id)).to match_array([1, 2]) end end describe '.without_project_repository' do - it 'finds projects which do not have a projects_repositories row' do - without_project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id) - with_project = projects.create!(name: 'bar', path: 'bar', namespace_id: group.id) - project_repositories.create!(project_id: with_project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id) + it 'finds projects which do not have a projects_repositories entry' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id) + project_repositories.create!(project_id: 2, disk_path: '@phony/foo/bar', shard_id: shard.id) - expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(without_project.id) - end - end - - describe '#project_repository_attributes' do - let(:shard_finder) { Gitlab::BackgroundMigration::BackfillHashedProjectRepositories::ShardFinder.new } - - it 'composes the correct attributes for project_repository' do - shiny_shard = shards.create!(name: 'shiny') - project = projects.create!(id: 5, name: 'foo', path: 'foo', namespace_id: group.id, repository_storage: shiny_shard.name, storage_version: 1) - - expected_attributes = { - project_id: project.id, - shard_id: shiny_shard.id, - disk_path: '@hashed/ef/2d/ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d' - } - - expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to eq(expected_attributes) - end - - it 'returns nil for a project not on hashed storage' do - project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0) - - expect(described_class.find(project.id).project_repository_attributes(shard_finder)).to be_nil + expect(described_class.without_project_repository.pluck(:id)).to contain_exactly(1) end end end describe '#perform' do - def perform! - described_class.new.perform(1, projects.last.id) + it 'creates a project_repository row for projects on hashed storage that need one' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + projects.create!(id: 2, name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 2) + + expect { described_class.new.perform(1, projects.last.id) }.to change(project_repositories, :count).by(2) end - it 'create project_repository row for hashed storage project' do - projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + it 'does nothing for projects on hashed storage that have already a project_repository row' do + projects.create!(id: 1, name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) + project_repositories.create!(project_id: 1, disk_path: '@phony/foo/bar', shard_id: shard.id) - expect do - perform! - end.to change(project_repositories, :count).by(1) - end - - it 'does nothing for projects that have already a project_repository' do - project = projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1) - project_repositories.create!(project_id: project.id, disk_path: '@phony/foo/bar', shard_id: default_shard.id) - - expect do - perform! - end.not_to change(project_repositories, :count) + expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count) end it 'does nothing for projects on legacy storage' do projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 0) - expect do - perform! - end.not_to change(project_repositories, :count) + expect { described_class.new.perform(1, projects.last.id) }.not_to change(project_repositories, :count) end it 'inserts rows in a single query' do - projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name) + projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) - control_count = ActiveRecord::QueryRecorder.new do - perform! - end + control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) } - projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name) - projects.create!(name: 'quz', path: 'quz', namespace_id: group.id, storage_version: 1, repository_storage: default_shard.name) + projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) + projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: 1, repository_storage: shard.name) - expect { perform! }.not_to exceed_query_limit(control_count) + expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count) end end end