Use worker to destroy namespaceless projects in post-deploy

Destroying projects can be very time consuming. So instead of destroying them in
the post-deploy, just schedule them and make Sidekiq do the hard work.

They are scheduled in batches of 5000 records. This way the number of database
requests is limited while also the amount data read to memory is limited.
This commit is contained in:
Toon Claes 2017-05-08 17:13:02 +02:00
parent 4f446dd45a
commit 0ad80cab40
7 changed files with 202 additions and 116 deletions

View file

@ -0,0 +1,46 @@
# Worker to destroy projects that do not have a namespace
#
# It destroys everything it can without having the info about the namespace it
# used to belong to. Projects in this state should be rare.
# The worker will reject doing anything for projects that *do* have a
# namespace. For those use ProjectDestroyWorker instead.
class NamespacelessProjectDestroyWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
def self.bulk_perform_async(args_list)
Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => args_list)
end
def perform(project_id, user_id, params)
begin
project = Project.unscoped.find(project_id)
rescue ActiveRecord::RecordNotFound
return
end
return unless project.namespace_id.nil? # Reject doing anything for projects that *do* have a namespace
user = User.find(user_id)
return unless user.can?(:remove_project, project)
project.team.truncate
unlink_fork(project) if project.forked?
# Override Project#remove_pages for this instance so it doesn't do anything
def project.remove_pages
end
project.destroy!
end
private
def unlink_fork(project)
merge_requests = project.forked_from_project.merge_requests.opened.from_project(project)
merge_requests.update_all(state: 'closed')
project.forked_project_link.destroy
end
end

View file

@ -40,6 +40,7 @@
- [expire_build_instance_artifacts, 1]
- [group_destroy, 1]
- [irker, 1]
- [namespaceless_project_destroy, 1]
- [project_cache, 1]
- [project_destroy, 1]
- [project_export, 1]

View file

@ -1,44 +0,0 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class CleanUpPendingDeleteProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) }
end
def down
# noop
end
private
def delete_project(project)
project.team.truncate
unlink_fork(project) if project.forked?
[:events, :issues, :merge_requests, :labels, :milestones, :notes, :snippets].each do |thing|
project.send(thing).delete_all
end
# Override Project#remove_pages for this instance so it doesn't do anything
def project.remove_pages
end
project.destroy!
end
def unlink_fork(project)
merge_requests = project.forked_from_project.merge_requests.opened.from_project(project)
merge_requests.update_all(state: 'closed')
project.forked_project_link.destroy
end
end

View file

@ -0,0 +1,50 @@
# This is the counterpart of RequeuePendingDeleteProjects and cleans all
# projects with `pending_delete = true` and that do not have a namespace.
class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
admin = User.find_by(admin: true)
return unless admin
@offset = 0
loop do
ids = pending_delete_batch
break if ids.rows.count.zero?
args = ids.map { |id| [id['id'], admin.id, {}] }
NamespacelessProjectDestroyWorker.bulk_perform_async(args)
@offset += 1
end
end
def down
# noop
end
private
def pending_delete_batch
connection.exec_query(find_batch)
end
BATCH_SIZE = 5000
def find_batch
projects = Arel::Table.new(:projects)
projects.project(projects[:id]).
where(projects[:pending_delete].eq(true)).
where(projects[:namespace_id].eq(nil)).
skip(@offset * BATCH_SIZE).
take(BATCH_SIZE).
to_sql
end
end

View file

@ -1,72 +0,0 @@
# encoding: utf-8
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170502101023_clean_up_pending_delete_projects.rb')
describe CleanUpPendingDeleteProjects do
let(:migration) { described_class.new }
let!(:admin) { create(:admin) }
let!(:project) { create(:empty_project, pending_delete: true) }
describe '#up' do
it 'only cleans up pending delete projects' do
create(:empty_project)
expect do
migration.up
end.to change { Project.unscoped.count }.by(-1)
end
it "truncates the project's team" do
project.add_master(admin)
expect_any_instance_of(ProjectTeam).to receive(:truncate)
migration.up
end
it 'calls Project#destroy!' do
expect_any_instance_of(Project).to receive(:destroy!)
migration.up
end
it 'does not do anything in Project#remove_pages method' do
expect(Gitlab::PagesTransfer).not_to receive(:new)
migration.up
end
context 'project not a fork of another project' do
it "doesn't call unlink_fork" do
expect(migration).not_to receive(:unlink_fork)
migration.up
end
end
context 'project forked from another' do
let!(:parent_project) { create(:empty_project) }
before do
create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project)
end
it 'closes open merge requests' do
project.update_attribute(:pending_delete, false) # needed to create the MR
merge_request = create(:merge_request, source_project: project, target_project: parent_project)
project.update_attribute(:pending_delete, true)
migration.up
expect(merge_request.reload).to be_closed
end
it 'destroys the link' do
migration.up
expect(parent_project.forked_project_links).to be_empty
end
end
end
end

View file

@ -0,0 +1,24 @@
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170502101023_cleanup_namespaceless_pending_delete_projects.rb')
describe CleanupNamespacelessPendingDeleteProjects do
before do
# Stub after_save callbacks that will fail when Project has no namespace
allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil)
allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil)
end
describe '#up' do
it 'only cleans up pending delete projects' do
admin = create(:admin)
create(:empty_project)
create(:empty_project, pending_delete: true)
project = build(:empty_project, pending_delete: true, namespace_id: nil)
project.save(validate: false)
expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s, admin.id, {}]])
described_class.new.up
end
end
end

View file

@ -0,0 +1,81 @@
require 'spec_helper'
describe NamespacelessProjectDestroyWorker do
subject { described_class.new }
before do
# Stub after_save callbacks that will fail when Project has no namespace
allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil)
allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil)
end
describe '#perform' do
context 'project has namespace' do
it 'does not do anything' do
project = create(:empty_project)
subject.perform(project.id, project.owner.id, {})
expect(Project.unscoped.all).to include(project)
end
end
context 'project has no namespace' do
let(:admin) { create(:admin) }
let!(:project) do
project = build(:empty_project, namespace_id: nil)
project.save(validate: false)
project
end
context 'project not a fork of another project' do
it "truncates the project's team" do
expect_any_instance_of(ProjectTeam).to receive(:truncate)
subject.perform(project.id, admin.id, {})
end
it 'deletes the project' do
subject.perform(project.id, admin.id, {})
expect(Project.unscoped.all).not_to include(project)
end
it 'does not call unlink_fork' do
is_expected.not_to receive(:unlink_fork)
subject.perform(project.id, admin.id, {})
end
it 'does not do anything in Project#remove_pages method' do
expect(Gitlab::PagesTransfer).not_to receive(:new)
subject.perform(project.id, admin.id, {})
end
end
context 'project forked from another' do
let!(:parent_project) { create(:empty_project) }
before do
create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project)
end
it 'closes open merge requests' do
merge_request = create(:merge_request, source_project: project, target_project: parent_project)
subject.perform(project.id, admin.id, {})
expect(merge_request.reload).to be_closed
end
it 'destroys the link' do
subject.perform(project.id, admin.id, {})
expect(parent_project.forked_project_links).to be_empty
end
end
end
end
end