From 1af540122563c372c03668f36e6a2123bff94222 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 2 May 2017 13:59:18 +0200 Subject: [PATCH 1/4] Add post-deploy migrate to cleanup projects in pending delete state There are many projects in `pending_delete` state, this post-deploy migration cleans them up. The script is based on https://gitlab.com/gitlab-org/gitlab-ce/snippets/1648654 and https://gitlab.com/gitlab-org/gitlab-ce/snippets/1611429. The use of these scripts were described in https://gitlab.com/gitlab-com/infrastructure/issues/888. --- .../tc-clean-pending-delete-projects.yml | 4 ++ ...101023_clean_up_pending_delete_projects.rb | 47 ++++++++++++ .../clean_up_pending_delete_projects_spec.rb | 72 +++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 changelogs/unreleased/tc-clean-pending-delete-projects.yml create mode 100644 db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb create mode 100644 spec/migrations/clean_up_pending_delete_projects_spec.rb diff --git a/changelogs/unreleased/tc-clean-pending-delete-projects.yml b/changelogs/unreleased/tc-clean-pending-delete-projects.yml new file mode 100644 index 00000000000..31b43999c31 --- /dev/null +++ b/changelogs/unreleased/tc-clean-pending-delete-projects.yml @@ -0,0 +1,4 @@ +--- +title: Add post-deploy migration to clean up projects in `pending_delete` state +merge_request: 11044 +author: diff --git a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb new file mode 100644 index 00000000000..52c7c160ea2 --- /dev/null +++ b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb @@ -0,0 +1,47 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class CleanUpPendingDeleteProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + admin = User.find_by(admin: true) + return unless admin + + Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) } + end + + def down + # noop + end + + private + + def delete_project(project, user) + project.team.truncate + + unlink_fork(project) if project.forked? + + [:events, :issues, :merge_requests, :labels, :milestones, :notes, :snippets].each do |thing| + project.send(thing).delete_all + end + + # Override Project#remove_pages for this instance so it doesn't do anything + def project.remove_pages + end + + project.destroy! + end + + def unlink_fork(project) + merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) + + merge_requests.update_all(state: 'closed') + + project.forked_project_link.destroy + end +end diff --git a/spec/migrations/clean_up_pending_delete_projects_spec.rb b/spec/migrations/clean_up_pending_delete_projects_spec.rb new file mode 100644 index 00000000000..a7c13f91245 --- /dev/null +++ b/spec/migrations/clean_up_pending_delete_projects_spec.rb @@ -0,0 +1,72 @@ +# encoding: utf-8 + +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170502101023_clean_up_pending_delete_projects.rb') + +describe CleanUpPendingDeleteProjects do + let(:migration) { described_class.new } + let!(:admin) { create(:admin) } + let!(:project) { create(:empty_project, pending_delete: true) } + + describe '#up' do + it 'only cleans up pending delete projects' do + create(:empty_project) + + expect do + migration.up + end.to change { Project.unscoped.count }.by(-1) + end + + it "truncates the project's team" do + project.add_master(admin) + + expect_any_instance_of(ProjectTeam).to receive(:truncate) + + migration.up + end + + it 'calls Project#destroy!' do + expect_any_instance_of(Project).to receive(:destroy!) + + migration.up + end + + it 'does not do anything in Project#remove_pages method' do + expect(Gitlab::PagesTransfer).not_to receive(:new) + + migration.up + end + + context 'project not a fork of another project' do + it "doesn't call unlink_fork" do + expect(migration).not_to receive(:unlink_fork) + + migration.up + end + end + + context 'project forked from another' do + let!(:parent_project) { create(:empty_project) } + + before do + create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project) + end + + it 'closes open merge requests' do + project.update_attribute(:pending_delete, false) # needed to create the MR + merge_request = create(:merge_request, source_project: project, target_project: parent_project) + project.update_attribute(:pending_delete, true) + + migration.up + + expect(merge_request.reload).to be_closed + end + + it 'destroys the link' do + migration.up + + expect(parent_project.forked_project_links).to be_empty + end + end + end +end From 4f446dd45a4068a77f606c02c95389c5cf6a6647 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 3 May 2017 16:48:20 +0200 Subject: [PATCH 2/4] No user is needed to delete a project --- .../20170502101023_clean_up_pending_delete_projects.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb index 52c7c160ea2..dd812964902 100644 --- a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb +++ b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb @@ -9,9 +9,6 @@ class CleanUpPendingDeleteProjects < ActiveRecord::Migration disable_ddl_transaction! def up - admin = User.find_by(admin: true) - return unless admin - Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) } end @@ -21,7 +18,7 @@ class CleanUpPendingDeleteProjects < ActiveRecord::Migration private - def delete_project(project, user) + def delete_project(project) project.team.truncate unlink_fork(project) if project.forked? From 0ad80cab40097658b1eec5b87d440cfcd60d2755 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 8 May 2017 17:13:02 +0200 Subject: [PATCH 3/4] Use worker to destroy namespaceless projects in post-deploy Destroying projects can be very time consuming. So instead of destroying them in the post-deploy, just schedule them and make Sidekiq do the hard work. They are scheduled in batches of 5000 records. This way the number of database requests is limited while also the amount data read to memory is limited. --- .../namespaceless_project_destroy_worker.rb | 46 +++++++++++ config/sidekiq_queues.yml | 1 + ...101023_clean_up_pending_delete_projects.rb | 44 ---------- ...p_namespaceless_pending_delete_projects.rb | 50 ++++++++++++ .../clean_up_pending_delete_projects_spec.rb | 72 ----------------- ...espaceless_pending_delete_projects_spec.rb | 24 ++++++ ...mespaceless_project_destroy_worker_spec.rb | 81 +++++++++++++++++++ 7 files changed, 202 insertions(+), 116 deletions(-) create mode 100644 app/workers/namespaceless_project_destroy_worker.rb delete mode 100644 db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb create mode 100644 db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb delete mode 100644 spec/migrations/clean_up_pending_delete_projects_spec.rb create mode 100644 spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb create mode 100644 spec/workers/namespaceless_project_destroy_worker_spec.rb diff --git a/app/workers/namespaceless_project_destroy_worker.rb b/app/workers/namespaceless_project_destroy_worker.rb new file mode 100644 index 00000000000..da02084c688 --- /dev/null +++ b/app/workers/namespaceless_project_destroy_worker.rb @@ -0,0 +1,46 @@ +# Worker to destroy projects that do not have a namespace +# +# It destroys everything it can without having the info about the namespace it +# used to belong to. Projects in this state should be rare. +# The worker will reject doing anything for projects that *do* have a +# namespace. For those use ProjectDestroyWorker instead. +class NamespacelessProjectDestroyWorker + include Sidekiq::Worker + include DedicatedSidekiqQueue + + def self.bulk_perform_async(args_list) + Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => args_list) + end + + def perform(project_id, user_id, params) + begin + project = Project.unscoped.find(project_id) + rescue ActiveRecord::RecordNotFound + return + end + return unless project.namespace_id.nil? # Reject doing anything for projects that *do* have a namespace + + user = User.find(user_id) + return unless user.can?(:remove_project, project) + + project.team.truncate + + unlink_fork(project) if project.forked? + + # Override Project#remove_pages for this instance so it doesn't do anything + def project.remove_pages + end + + project.destroy! + end + + private + + def unlink_fork(project) + merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) + + merge_requests.update_all(state: 'closed') + + project.forked_project_link.destroy + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 433381e79d3..0ca1f565185 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -40,6 +40,7 @@ - [expire_build_instance_artifacts, 1] - [group_destroy, 1] - [irker, 1] + - [namespaceless_project_destroy, 1] - [project_cache, 1] - [project_destroy, 1] - [project_export, 1] diff --git a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb b/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb deleted file mode 100644 index dd812964902..00000000000 --- a/db/post_migrate/20170502101023_clean_up_pending_delete_projects.rb +++ /dev/null @@ -1,44 +0,0 @@ -# See http://doc.gitlab.com/ce/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - -class CleanUpPendingDeleteProjects < ActiveRecord::Migration - include Gitlab::Database::MigrationHelpers - - DOWNTIME = false - - disable_ddl_transaction! - - def up - Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) } - end - - def down - # noop - end - - private - - def delete_project(project) - project.team.truncate - - unlink_fork(project) if project.forked? - - [:events, :issues, :merge_requests, :labels, :milestones, :notes, :snippets].each do |thing| - project.send(thing).delete_all - end - - # Override Project#remove_pages for this instance so it doesn't do anything - def project.remove_pages - end - - project.destroy! - end - - def unlink_fork(project) - merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) - - merge_requests.update_all(state: 'closed') - - project.forked_project_link.destroy - end -end diff --git a/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb new file mode 100644 index 00000000000..76ed109898b --- /dev/null +++ b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb @@ -0,0 +1,50 @@ +# This is the counterpart of RequeuePendingDeleteProjects and cleans all +# projects with `pending_delete = true` and that do not have a namespace. +class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + admin = User.find_by(admin: true) + return unless admin + + @offset = 0 + + loop do + ids = pending_delete_batch + + break if ids.rows.count.zero? + + args = ids.map { |id| [id['id'], admin.id, {}] } + + NamespacelessProjectDestroyWorker.bulk_perform_async(args) + + @offset += 1 + end + end + + def down + # noop + end + + private + + def pending_delete_batch + connection.exec_query(find_batch) + end + + BATCH_SIZE = 5000 + + def find_batch + projects = Arel::Table.new(:projects) + projects.project(projects[:id]). + where(projects[:pending_delete].eq(true)). + where(projects[:namespace_id].eq(nil)). + skip(@offset * BATCH_SIZE). + take(BATCH_SIZE). + to_sql + end +end diff --git a/spec/migrations/clean_up_pending_delete_projects_spec.rb b/spec/migrations/clean_up_pending_delete_projects_spec.rb deleted file mode 100644 index a7c13f91245..00000000000 --- a/spec/migrations/clean_up_pending_delete_projects_spec.rb +++ /dev/null @@ -1,72 +0,0 @@ -# encoding: utf-8 - -require 'spec_helper' -require Rails.root.join('db', 'post_migrate', '20170502101023_clean_up_pending_delete_projects.rb') - -describe CleanUpPendingDeleteProjects do - let(:migration) { described_class.new } - let!(:admin) { create(:admin) } - let!(:project) { create(:empty_project, pending_delete: true) } - - describe '#up' do - it 'only cleans up pending delete projects' do - create(:empty_project) - - expect do - migration.up - end.to change { Project.unscoped.count }.by(-1) - end - - it "truncates the project's team" do - project.add_master(admin) - - expect_any_instance_of(ProjectTeam).to receive(:truncate) - - migration.up - end - - it 'calls Project#destroy!' do - expect_any_instance_of(Project).to receive(:destroy!) - - migration.up - end - - it 'does not do anything in Project#remove_pages method' do - expect(Gitlab::PagesTransfer).not_to receive(:new) - - migration.up - end - - context 'project not a fork of another project' do - it "doesn't call unlink_fork" do - expect(migration).not_to receive(:unlink_fork) - - migration.up - end - end - - context 'project forked from another' do - let!(:parent_project) { create(:empty_project) } - - before do - create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project) - end - - it 'closes open merge requests' do - project.update_attribute(:pending_delete, false) # needed to create the MR - merge_request = create(:merge_request, source_project: project, target_project: parent_project) - project.update_attribute(:pending_delete, true) - - migration.up - - expect(merge_request.reload).to be_closed - end - - it 'destroys the link' do - migration.up - - expect(parent_project.forked_project_links).to be_empty - end - end - end -end diff --git a/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb new file mode 100644 index 00000000000..cee12d521dc --- /dev/null +++ b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb @@ -0,0 +1,24 @@ +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170502101023_cleanup_namespaceless_pending_delete_projects.rb') + +describe CleanupNamespacelessPendingDeleteProjects do + before do + # Stub after_save callbacks that will fail when Project has no namespace + allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil) + allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil) + end + + describe '#up' do + it 'only cleans up pending delete projects' do + admin = create(:admin) + create(:empty_project) + create(:empty_project, pending_delete: true) + project = build(:empty_project, pending_delete: true, namespace_id: nil) + project.save(validate: false) + + expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s, admin.id, {}]]) + + described_class.new.up + end + end +end diff --git a/spec/workers/namespaceless_project_destroy_worker_spec.rb b/spec/workers/namespaceless_project_destroy_worker_spec.rb new file mode 100644 index 00000000000..ed78236ebc0 --- /dev/null +++ b/spec/workers/namespaceless_project_destroy_worker_spec.rb @@ -0,0 +1,81 @@ +require 'spec_helper' + +describe NamespacelessProjectDestroyWorker do + subject { described_class.new } + + before do + # Stub after_save callbacks that will fail when Project has no namespace + allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil) + allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil) + end + + describe '#perform' do + context 'project has namespace' do + it 'does not do anything' do + project = create(:empty_project) + + subject.perform(project.id, project.owner.id, {}) + + expect(Project.unscoped.all).to include(project) + end + end + + context 'project has no namespace' do + let(:admin) { create(:admin) } + + let!(:project) do + project = build(:empty_project, namespace_id: nil) + project.save(validate: false) + project + end + + context 'project not a fork of another project' do + it "truncates the project's team" do + expect_any_instance_of(ProjectTeam).to receive(:truncate) + + subject.perform(project.id, admin.id, {}) + end + + it 'deletes the project' do + subject.perform(project.id, admin.id, {}) + + expect(Project.unscoped.all).not_to include(project) + end + + it 'does not call unlink_fork' do + is_expected.not_to receive(:unlink_fork) + + subject.perform(project.id, admin.id, {}) + end + + it 'does not do anything in Project#remove_pages method' do + expect(Gitlab::PagesTransfer).not_to receive(:new) + + subject.perform(project.id, admin.id, {}) + end + end + + context 'project forked from another' do + let!(:parent_project) { create(:empty_project) } + + before do + create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project) + end + + it 'closes open merge requests' do + merge_request = create(:merge_request, source_project: project, target_project: parent_project) + + subject.perform(project.id, admin.id, {}) + + expect(merge_request.reload).to be_closed + end + + it 'destroys the link' do + subject.perform(project.id, admin.id, {}) + + expect(parent_project.forked_project_links).to be_empty + end + end + end + end +end From 37a79409d446955eeb443ea5397d5bf263601a2d Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 9 May 2017 22:16:52 +0200 Subject: [PATCH 4/4] No user needed to cleanup namespaceless pending delete projects Since this is a cleanup, ran by a post-deploy, there is no need to lookup the admin to run the cleanup. --- .../namespaceless_project_destroy_worker.rb | 5 +---- ...anup_namespaceless_pending_delete_projects.rb | 9 +++------ ...namespaceless_pending_delete_projects_spec.rb | 12 ++++++++++-- .../namespaceless_project_destroy_worker_spec.rb | 16 +++++++--------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/app/workers/namespaceless_project_destroy_worker.rb b/app/workers/namespaceless_project_destroy_worker.rb index da02084c688..bfae0c77700 100644 --- a/app/workers/namespaceless_project_destroy_worker.rb +++ b/app/workers/namespaceless_project_destroy_worker.rb @@ -12,7 +12,7 @@ class NamespacelessProjectDestroyWorker Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => args_list) end - def perform(project_id, user_id, params) + def perform(project_id) begin project = Project.unscoped.find(project_id) rescue ActiveRecord::RecordNotFound @@ -20,9 +20,6 @@ class NamespacelessProjectDestroyWorker end return unless project.namespace_id.nil? # Reject doing anything for projects that *do* have a namespace - user = User.find(user_id) - return unless user.can?(:remove_project, project) - project.team.truncate unlink_fork(project) if project.forked? diff --git a/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb index 76ed109898b..2d242da9ef8 100644 --- a/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb +++ b/db/post_migrate/20170502101023_cleanup_namespaceless_pending_delete_projects.rb @@ -8,17 +8,14 @@ class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration disable_ddl_transaction! def up - admin = User.find_by(admin: true) - return unless admin - @offset = 0 loop do ids = pending_delete_batch - break if ids.rows.count.zero? + break if ids.empty? - args = ids.map { |id| [id['id'], admin.id, {}] } + args = ids.map { |id| Array(id) } NamespacelessProjectDestroyWorker.bulk_perform_async(args) @@ -33,7 +30,7 @@ class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration private def pending_delete_batch - connection.exec_query(find_batch) + connection.exec_query(find_batch).map{ |row| row['id'] } end BATCH_SIZE = 5000 diff --git a/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb index cee12d521dc..0b8af5010ba 100644 --- a/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb +++ b/spec/migrations/cleanup_namespaceless_pending_delete_projects_spec.rb @@ -10,13 +10,21 @@ describe CleanupNamespacelessPendingDeleteProjects do describe '#up' do it 'only cleans up pending delete projects' do - admin = create(:admin) create(:empty_project) create(:empty_project, pending_delete: true) project = build(:empty_project, pending_delete: true, namespace_id: nil) project.save(validate: false) - expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s, admin.id, {}]]) + expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s]]) + + described_class.new.up + end + + it 'does nothing when no pending delete projects without namespace found' do + create(:empty_project) + create(:empty_project, pending_delete: true) + + expect(NamespacelessProjectDestroyWorker).not_to receive(:bulk_perform_async) described_class.new.up end diff --git a/spec/workers/namespaceless_project_destroy_worker_spec.rb b/spec/workers/namespaceless_project_destroy_worker_spec.rb index ed78236ebc0..8533b7b85e9 100644 --- a/spec/workers/namespaceless_project_destroy_worker_spec.rb +++ b/spec/workers/namespaceless_project_destroy_worker_spec.rb @@ -14,15 +14,13 @@ describe NamespacelessProjectDestroyWorker do it 'does not do anything' do project = create(:empty_project) - subject.perform(project.id, project.owner.id, {}) + subject.perform(project.id) expect(Project.unscoped.all).to include(project) end end context 'project has no namespace' do - let(:admin) { create(:admin) } - let!(:project) do project = build(:empty_project, namespace_id: nil) project.save(validate: false) @@ -33,11 +31,11 @@ describe NamespacelessProjectDestroyWorker do it "truncates the project's team" do expect_any_instance_of(ProjectTeam).to receive(:truncate) - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) end it 'deletes the project' do - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) expect(Project.unscoped.all).not_to include(project) end @@ -45,13 +43,13 @@ describe NamespacelessProjectDestroyWorker do it 'does not call unlink_fork' do is_expected.not_to receive(:unlink_fork) - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) end it 'does not do anything in Project#remove_pages method' do expect(Gitlab::PagesTransfer).not_to receive(:new) - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) end end @@ -65,13 +63,13 @@ describe NamespacelessProjectDestroyWorker do it 'closes open merge requests' do merge_request = create(:merge_request, source_project: project, target_project: parent_project) - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) expect(merge_request.reload).to be_closed end it 'destroys the link' do - subject.perform(project.id, admin.id, {}) + subject.perform(project.id) expect(parent_project.forked_project_links).to be_empty end