From 618875c58c50b0c9dcfa90033ebe975004f401a2 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 7 Jan 2019 23:31:30 -0800 Subject: [PATCH 1/4] Fix duplicate disk path in Backfill ProjectRepos On GitLab.com, we saw numerous duplicate disk entry inserts because the migration was not taking the routes table into account. We now implement this in the migration to be consistent. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/56061 --- .../backfill_project_repositories.rb | 13 ++++++++++++- .../backfill_project_repositories_spec.rb | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb index aaf520d70f6..2a7f10c21e1 100644 --- a/lib/gitlab/background_migration/backfill_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -83,7 +83,7 @@ module Gitlab extend ActiveSupport::Concern def full_path - @full_path ||= build_full_path + route&.path || build_full_path end def build_full_path @@ -99,6 +99,9 @@ module Gitlab end end + class Route < ActiveRecord::Base + end + # Namespace model. class Namespace < ActiveRecord::Base self.table_name = 'namespaces' @@ -110,6 +113,10 @@ module Gitlab has_many :projects, inverse_of: :parent has_many :namespaces, inverse_of: :parent + + def route + Route.find_by(source_type: 'Namespace', source_id: self.id) + end end # ProjectRegistry model @@ -165,6 +172,10 @@ module Gitlab end end + def route + Route.find_by(source_type: 'Project', source_id: self.id) + end + def storage @storage ||= if hashed_storage? diff --git a/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb index 53c071f0268..d4662175bb1 100644 --- a/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb +++ b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb @@ -34,6 +34,7 @@ describe Gitlab::BackgroundMigration::BackfillProjectRepositories do let!(:project_hashed_storage_2) { create(:project, name: 'bar', path: 'bar', namespace: group, storage_version: 2) } let!(:project_legacy_storage_3) { create(:project, name: 'baz', path: 'baz', namespace: group, storage_version: 0) } let!(:project_legacy_storage_4) { create(:project, name: 'zoo', path: 'zoo', namespace: group, storage_version: nil) } + let!(:project_legacy_storage_5) { create(:project, name: 'test', path: 'test', namespace: group, storage_version: nil) } describe '.on_hashed_storage' do it 'finds projects with repository on hashed storage' do @@ -47,7 +48,7 @@ describe Gitlab::BackgroundMigration::BackfillProjectRepositories do it 'finds projects with repository on legacy storage' do projects = described_class.on_legacy_storage.pluck(:id) - expect(projects).to match_array([project_legacy_storage_3.id, project_legacy_storage_4.id]) + expect(projects).to match_array([project_legacy_storage_3.id, project_legacy_storage_4.id, project_legacy_storage_5.id]) end end @@ -58,7 +59,7 @@ describe Gitlab::BackgroundMigration::BackfillProjectRepositories do projects = described_class.without_project_repository.pluck(:id) - expect(projects).to contain_exactly(project_hashed_storage_2.id, project_legacy_storage_4.id) + expect(projects).to contain_exactly(project_hashed_storage_2.id, project_legacy_storage_4.id, project_legacy_storage_5.id) end end @@ -78,12 +79,21 @@ describe Gitlab::BackgroundMigration::BackfillProjectRepositories do expect(project.disk_path).to eq(project_legacy_storage_3.disk_path) end + it 'returns the correct disk_path using the route entry' do + project_legacy_storage_5.route.update(path: 'zoo/new-test') + project = described_class.find(project_legacy_storage_5.id) + + expect(project.disk_path).to eq('zoo/new-test') + end + it 'raises OrphanedNamespaceError when any parent namespace does not exist' do subgroup = create(:group, parent: group) project_orphaned_namespace = create(:project, name: 'baz', path: 'baz', namespace: subgroup, storage_version: nil) subgroup.update_column(:parent_id, Namespace.maximum(:id).succ) project = described_class.find(project_orphaned_namespace.id) + project.route.destroy + subgroup.route.destroy expect { project.disk_path } .to raise_error(Gitlab::BackgroundMigration::BackfillProjectRepositories::OrphanedNamespaceError) From d3e028b85f3f380239394f9ebf274357abf7142a Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 9 Jan 2019 10:47:24 +0100 Subject: [PATCH 2/4] Load all projects, namespaces, routes in 1 query Avoid doing sequential database queries to load the namespaces and the routes of projects and namespaces. This results in the following query: ```sql SELECT "projects"."id" AS t0_r0, "projects"."name" AS t0_r1, "projects"."path" AS t0_r2, "projects"."description" AS t0_r3, "projects"."created_at" AS t0_r4, "projects"."updated_at" AS t0_r5, "projects"."creator_id" AS t0_r6, "projects"."namespace_id" AS t0_r7, "projects"."last_activity_at" AS t0_r8, "projects"."import_url" AS t0_r9, "projects"."visibility_level" AS t0_r10, "projects"."archived" AS t0_r11, "projects"."avatar" AS t0_r12, "projects"."import_status" AS t0_r13, "projects"."star_count" AS t0_r14, "projects"."import_type" AS t0_r15, "projects"."import_source" AS t0_r16, "projects"."import_error" AS t0_r17, "projects"."ci_id" AS t0_r18, "projects"."shared_runners_enabled" AS t0_r19, "projects"."runners_token" AS t0_r20, "projects"."build_coverage_regex" AS t0_r21, "projects"."build_allow_git_fetch" AS t0_r22, "projects"."build_timeout" AS t0_r23, "projects"."pending_delete" AS t0_r24, "projects"."public_builds" AS t0_r25, "projects"."last_repository_check_failed" AS t0_r26, "projects"."last_repository_check_at" AS t0_r27, "projects"."container_registry_enabled" AS t0_r28, "projects"."only_allow_merge_if_pipeline_succeeds" AS t0_r29, "projects"."has_external_issue_tracker" AS t0_r30, "projects"."repository_storage" AS t0_r31, "projects"."request_access_enabled" AS t0_r32, "projects"."has_external_wiki" AS t0_r33, "projects"."ci_config_path" AS t0_r34, "projects"."lfs_enabled" AS t0_r35, "projects"."description_html" AS t0_r36, "projects"."only_allow_merge_if_all_discussions_are_resolved" AS t0_r37, "projects"."printing_merge_request_link_enabled" AS t0_r38, "projects"."auto_cancel_pending_pipelines" AS t0_r39, "projects"."import_jid" AS t0_r40, "projects"."cached_markdown_version" AS t0_r41, "projects"."delete_error" AS t0_r42, "projects"."last_repository_updated_at" AS t0_r43, "projects"."storage_version" AS t0_r44, "projects"."resolve_outdated_diff_discussions" AS t0_r45, "projects"."repository_read_only" AS t0_r46, "projects"."merge_requests_ff_only_enabled" AS t0_r47, "projects"."merge_requests_rebase_enabled" AS t0_r48, "projects"."jobs_cache_index" AS t0_r49, "projects"."pages_https_only" AS t0_r50, "projects"."remote_mirror_available_overridden" AS t0_r51, "projects"."pool_repository_id" AS t0_r52, "projects"."runners_token_encrypted" AS t0_r53, "projects"."bfg_object_map" AS t0_r54, "namespaces"."id" AS t1_r0, "namespaces"."name" AS t1_r1, "namespaces"."path" AS t1_r2, "namespaces"."owner_id" AS t1_r3, "namespaces"."created_at" AS t1_r4, "namespaces"."updated_at" AS t1_r5, "namespaces"."type" AS t1_r6, "namespaces"."description" AS t1_r7, "namespaces"."avatar" AS t1_r8, "namespaces"."share_with_group_lock" AS t1_r9, "namespaces"."visibility_level" AS t1_r10, "namespaces"."request_access_enabled" AS t1_r11, "namespaces"."description_html" AS t1_r12, "namespaces"."lfs_enabled" AS t1_r13, "namespaces"."parent_id" AS t1_r14, "namespaces"."require_two_factor_authentication" AS t1_r15, "namespaces"."two_factor_grace_period" AS t1_r16, "namespaces"."cached_markdown_version" AS t1_r17, "namespaces"."runners_token" AS t1_r18, "namespaces"."runners_token_encrypted" AS t1_r19, "routes"."id" AS t2_r0, "routes"."source_id" AS t2_r1, "routes"."source_type" AS t2_r2, "routes"."path" AS t2_r3, "routes"."created_at" AS t2_r4, "routes"."updated_at" AS t2_r5, "routes"."name" AS t2_r6, "routes_projects"."id" AS t3_r0, "routes_projects"."source_id" AS t3_r1, "routes_projects"."source_type" AS t3_r2, "routes_projects"."path" AS t3_r3, "routes_projects"."created_at" AS t3_r4, "routes_projects"."updated_at" AS t3_r5, "routes_projects"."name" AS t3_r6 FROM "projects" LEFT OUTER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" LEFT OUTER JOIN "routes" ON "routes"."source_id" = "namespaces"."id" AND "routes"."source_type" = $1 LEFT OUTER JOIN "routes" "routes_projects" ON "routes_projects"."source_id" = "projects"."id" AND "routes_projects"."source_type" = $2 LEFT OUTER JOIN "project_repositories" ON "projects"."id" = "project_repositories"."project_id" WHERE ("projects"."storage_version" IS NULL OR "projects"."storage_version" = 0) AND "project_repositories"."project_id" IS NULL AND ("projects"."id" BETWEEN $3 AND $4); -- [["source_type", "Namespace"], -- ["source_type", "Project"], -- ["id", 1], -- ["id", 4]] ``` --- .../backfill_project_repositories.rb | 37 ++++++++++++++----- .../backfill_project_repositories_examples.rb | 4 +- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb index 2a7f10c21e1..d03ad813d9e 100644 --- a/lib/gitlab/background_migration/backfill_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -99,10 +99,12 @@ module Gitlab end end + # Route model class Route < ActiveRecord::Base + belongs_to :source, inverse_of: :route end - # Namespace model. + # Namespace model class Namespace < ActiveRecord::Base self.table_name = 'namespaces' self.inheritance_column = nil @@ -111,12 +113,10 @@ module Gitlab belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces' + has_one :route, -> { where(source_type: 'Namespace') }, inverse_of: :source, foreign_key: :source_id + has_many :projects, inverse_of: :parent has_many :namespaces, inverse_of: :parent - - def route - Route.find_by(source_type: 'Namespace', source_id: self.id) - end end # ProjectRegistry model @@ -141,6 +141,7 @@ module Gitlab belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects' + has_one :route, -> { where(source_type: 'Project') }, inverse_of: :source, foreign_key: :source_id has_one :project_repository, inverse_of: :project delegate :disk_path, to: :storage @@ -172,10 +173,6 @@ module Gitlab end end - def route - Route.find_by(source_type: 'Project', source_id: self.id) - end - def storage @storage ||= if hashed_storage? @@ -192,7 +189,11 @@ module Gitlab end def perform(start_id, stop_id) + ActiveRecord::Base.logger = Logger.new(STDOUT) + Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) + + ActiveRecord::Base.logger = nil end private @@ -203,13 +204,31 @@ module Gitlab end def project_repositories(start_id, stop_id) +# .eager_load(:route, :parent, parent: [:route]) +# .includes(:parent, :route, parent: [:route]).references(:namespaces) +# .includes(:parent).references(:namespaces) +# .joins(*routes_joins).references(:routes) + + projects .without_project_repository + .includes(:route, parent: [:route]).references(:routes) + .includes(:parent).references(:namespaces) .where(id: start_id..stop_id) .map { |project| build_attributes_for_project(project) } .compact end + def routes_joins + routes = Route.arel_table + projects = Project.arel_table + routes_projects = routes.alias('routes_projects') + + projects.join(routes, Arel::Nodes::OuterJoin).on(projects[:namespace_id].eq(routes[:source_id]).and(routes[:source_type].eq('Namespace'))) + .join(routes_projects, Arel::Nodes::OuterJoin).on(projects[:id].eq(routes_projects[:source_id]).and(routes_projects[:source_type].eq('Project'))) + .join_sources + end + def build_attributes_for_project(project) { project_id: project.id, diff --git a/spec/support/shared_examples/lib/gitlab/background_migration/backfill_project_repositories_examples.rb b/spec/support/shared_examples/lib/gitlab/background_migration/backfill_project_repositories_examples.rb index 1f688c0f9d3..dcf7c1a90c2 100644 --- a/spec/support/shared_examples/lib/gitlab/background_migration/backfill_project_repositories_examples.rb +++ b/spec/support/shared_examples/lib/gitlab/background_migration/backfill_project_repositories_examples.rb @@ -32,11 +32,13 @@ shared_examples 'backfill migration for project repositories' do |storage| it 'inserts rows in a single query' do projects.create!(name: 'foo', path: 'foo', namespace_id: group.id, storage_version: storage_version, repository_storage: shard.name) + group2 = namespaces.create!(name: 'gro', path: 'gro') control_count = ActiveRecord::QueryRecorder.new { described_class.new.perform(1, projects.last.id) } projects.create!(name: 'bar', path: 'bar', namespace_id: group.id, storage_version: storage_version, repository_storage: shard.name) - projects.create!(name: 'zoo', path: 'zoo', namespace_id: group.id, storage_version: storage_version, repository_storage: shard.name) + projects.create!(name: 'top', path: 'top', namespace_id: group.id, storage_version: storage_version, repository_storage: shard.name) + projects.create!(name: 'zoo', path: 'zoo', namespace_id: group2.id, storage_version: storage_version, repository_storage: shard.name) expect { described_class.new.perform(1, projects.last.id) }.not_to exceed_query_limit(control_count) end From 082cc1222512c826fa12b5e0fc99cb72da249f72 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 9 Jan 2019 21:52:57 +0100 Subject: [PATCH 3/4] Fix failing spec with orphaned namespace --- .../background_migration/backfill_project_repositories.rb | 2 +- .../background_migration/backfill_project_repositories_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb index d03ad813d9e..46aa7019f03 100644 --- a/lib/gitlab/background_migration/backfill_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -101,7 +101,7 @@ module Gitlab # Route model class Route < ActiveRecord::Base - belongs_to :source, inverse_of: :route + belongs_to :source, inverse_of: :route, polymorphic: true end # Namespace model diff --git a/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb index d4662175bb1..f4a6f4be754 100644 --- a/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb +++ b/spec/lib/gitlab/background_migration/backfill_project_repositories_spec.rb @@ -95,7 +95,7 @@ describe Gitlab::BackgroundMigration::BackfillProjectRepositories do project.route.destroy subgroup.route.destroy - expect { project.disk_path } + expect { project.reload.disk_path } .to raise_error(Gitlab::BackgroundMigration::BackfillProjectRepositories::OrphanedNamespaceError) end end From 83767dd0b3d6fd477f6b08659737a29e2644343e Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 9 Jan 2019 21:54:38 +0100 Subject: [PATCH 4/4] Cleanup dead code and comments --- ...sh-fix-backfill-project-repo-migration.yml | 5 +++++ .../backfill_project_repositories.rb | 20 ------------------- 2 files changed, 5 insertions(+), 20 deletions(-) create mode 100644 changelogs/unreleased/sh-fix-backfill-project-repo-migration.yml diff --git a/changelogs/unreleased/sh-fix-backfill-project-repo-migration.yml b/changelogs/unreleased/sh-fix-backfill-project-repo-migration.yml new file mode 100644 index 00000000000..d1d4412eb50 --- /dev/null +++ b/changelogs/unreleased/sh-fix-backfill-project-repo-migration.yml @@ -0,0 +1,5 @@ +--- +title: Fix duplicate project disk path in BackfillLegacyProjectRepositories +merge_request: 24213 +author: +type: changed diff --git a/lib/gitlab/background_migration/backfill_project_repositories.rb b/lib/gitlab/background_migration/backfill_project_repositories.rb index 46aa7019f03..c8d83cc1803 100644 --- a/lib/gitlab/background_migration/backfill_project_repositories.rb +++ b/lib/gitlab/background_migration/backfill_project_repositories.rb @@ -189,11 +189,7 @@ module Gitlab end def perform(start_id, stop_id) - ActiveRecord::Base.logger = Logger.new(STDOUT) - Gitlab::Database.bulk_insert(:project_repositories, project_repositories(start_id, stop_id)) - - ActiveRecord::Base.logger = nil end private @@ -204,12 +200,6 @@ module Gitlab end def project_repositories(start_id, stop_id) -# .eager_load(:route, :parent, parent: [:route]) -# .includes(:parent, :route, parent: [:route]).references(:namespaces) -# .includes(:parent).references(:namespaces) -# .joins(*routes_joins).references(:routes) - - projects .without_project_repository .includes(:route, parent: [:route]).references(:routes) @@ -219,16 +209,6 @@ module Gitlab .compact end - def routes_joins - routes = Route.arel_table - projects = Project.arel_table - routes_projects = routes.alias('routes_projects') - - projects.join(routes, Arel::Nodes::OuterJoin).on(projects[:namespace_id].eq(routes[:source_id]).and(routes[:source_type].eq('Namespace'))) - .join(routes_projects, Arel::Nodes::OuterJoin).on(projects[:id].eq(routes_projects[:source_id]).and(routes_projects[:source_type].eq('Project'))) - .join_sources - end - def build_attributes_for_project(project) { project_id: project.id,