From 48e993af9bd7e05c95a52e242e2a0bedf628d49a Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Fri, 7 Sep 2018 07:38:44 -0700 Subject: [PATCH] Remove orphaned label links On GitLab.com, there are over 2 million orphaned label links out of a total of 13 million. These orphaned label links can cause quiet failures, such as unexpected nil values in ExportCsvWorker. Closes gitlab-org/gitlab-ee#7482 --- .../sh-remove-orphaned-label-links.yml | 5 +++ ...80906051323_remove_orphaned_label_links.rb | 43 +++++++++++++++++++ db/schema.rb | 1 + .../remove_orphaned_label_links_spec.rb | 40 +++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 changelogs/unreleased/sh-remove-orphaned-label-links.yml create mode 100644 db/post_migrate/20180906051323_remove_orphaned_label_links.rb create mode 100644 spec/migrations/remove_orphaned_label_links_spec.rb diff --git a/changelogs/unreleased/sh-remove-orphaned-label-links.yml b/changelogs/unreleased/sh-remove-orphaned-label-links.yml new file mode 100644 index 00000000000..b035b57ff1b --- /dev/null +++ b/changelogs/unreleased/sh-remove-orphaned-label-links.yml @@ -0,0 +1,5 @@ +--- +title: Remove orphaned label links +merge_request: 21552 +author: +type: fixed diff --git a/db/post_migrate/20180906051323_remove_orphaned_label_links.rb b/db/post_migrate/20180906051323_remove_orphaned_label_links.rb new file mode 100644 index 00000000000..b56b74f483e --- /dev/null +++ b/db/post_migrate/20180906051323_remove_orphaned_label_links.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +class RemoveOrphanedLabelLinks < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + class LabelLinks < ActiveRecord::Base + self.table_name = 'label_links' + include EachBatch + + def self.orphaned + where('NOT EXISTS ( SELECT 1 FROM labels WHERE labels.id = label_links.label_id )') + end + end + + def up + # Some of these queries can take up to 10 seconds to run on GitLab.com, + # which is pretty close to our 15 second statement timeout. To ensure a + # smooth deployment procedure we disable the statement timeouts for this + # migration, just in case. + disable_statement_timeout do + # On GitLab.com there are over 2,000,000 orphaned label links. On + # staging, removing 100,000 rows generated a max replication lag of 6.7 + # MB. In total, removing all these rows will only generate about 136 MB + # of data, so it should be safe to do this. + LabelLinks.orphaned.each_batch(of: 100_000) do |batch| + batch.delete_all + end + end + + add_concurrent_foreign_key(:label_links, :labels, column: :label_id, on_delete: :cascade) + end + + def down + # There is no way to restore orphaned label links. + if foreign_key_exists?(:label_links, column: :label_id) + remove_foreign_key(:label_links, column: :label_id) + end + end +end diff --git a/db/schema.rb b/db/schema.rb index a417d0bc70a..98ae9f7b131 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2357,6 +2357,7 @@ ActiveRecord::Schema.define(version: 20180906101639) do add_foreign_key "issues", "users", column: "author_id", name: "fk_05f1e72feb", on_delete: :nullify add_foreign_key "issues", "users", column: "closed_by_id", name: "fk_c63cbf6c25", on_delete: :nullify add_foreign_key "issues", "users", column: "updated_by_id", name: "fk_ffed080f01", on_delete: :nullify + add_foreign_key "label_links", "labels", name: "fk_d97dd08678", on_delete: :cascade add_foreign_key "label_priorities", "labels", on_delete: :cascade add_foreign_key "label_priorities", "projects", on_delete: :cascade add_foreign_key "labels", "namespaces", column: "group_id", on_delete: :cascade diff --git a/spec/migrations/remove_orphaned_label_links_spec.rb b/spec/migrations/remove_orphaned_label_links_spec.rb new file mode 100644 index 00000000000..13b8919343e --- /dev/null +++ b/spec/migrations/remove_orphaned_label_links_spec.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20180906051323_remove_orphaned_label_links.rb') + +describe RemoveOrphanedLabelLinks, :migration do + let(:label_links) { table(:label_links) } + let(:labels) { table(:labels) } + + let(:project) { create(:project) } # rubocop:disable RSpec/FactoriesInMigrationSpecs + let(:label) { create_label } + + context 'add foreign key on label_id' do + let!(:label_link_with_label) { create_label_link(label_id: label.id) } + let!(:label_link_without_label) { create_label_link(label_id: nil) } + + it 'removes orphaned labels without corresponding label' do + expect { migrate! }.to change { LabelLink.count }.from(2).to(1) + end + + it 'does not remove entries with valid label_id' do + expect { migrate! }.not_to change { label_link_with_label.reload } + end + end + + def create_label(**opts) + labels.create!( + project_id: project.id, + **opts + ) + end + + def create_label_link(**opts) + label_links.create!( + target_id: 1, + target_type: 'Issue', + **opts + ) + end +end