2018-11-19 10:03:58 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Projects
|
|
|
|
# The CleanupService removes data from the project repository following a
|
|
|
|
# BFG rewrite: https://rtyley.github.io/bfg-repo-cleaner/
|
|
|
|
#
|
|
|
|
# Before executing this service, all refs rewritten by BFG should have been
|
|
|
|
# pushed to the repository
|
|
|
|
class CleanupService < BaseService
|
|
|
|
NoUploadError = StandardError.new("Couldn't find uploaded object map")
|
|
|
|
|
|
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
|
|
|
|
# Attempt to clean up the project following the push. Warning: this is
|
|
|
|
# destructive!
|
|
|
|
#
|
|
|
|
# path is the path of an upload of a BFG object map file. It contains a line
|
|
|
|
# per rewritten object, with the old and new SHAs space-separated. It can be
|
|
|
|
# used to update or remove content that references the objects that BFG has
|
|
|
|
# altered
|
|
|
|
def execute
|
|
|
|
apply_bfg_object_map!
|
|
|
|
|
|
|
|
# Remove older objects that are no longer referenced
|
2020-08-06 08:09:59 -04:00
|
|
|
GitGarbageCollectWorker.new.perform(project.id, :gc, "project_cleanup:gc:#{project.id}")
|
2018-11-19 10:03:58 -05:00
|
|
|
|
|
|
|
# The cache may now be inaccurate, and holding onto it could prevent
|
|
|
|
# bugs assuming the presence of some object from manifesting for some
|
|
|
|
# time. Better to feel the pain immediately.
|
|
|
|
project.repository.expire_all_method_caches
|
|
|
|
|
|
|
|
project.bfg_object_map.remove!
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def apply_bfg_object_map!
|
|
|
|
raise NoUploadError unless project.bfg_object_map.exists?
|
|
|
|
|
|
|
|
project.bfg_object_map.open do |io|
|
2019-03-25 10:29:51 -04:00
|
|
|
repository_cleaner.apply_bfg_object_map_stream(io) do |response|
|
|
|
|
cleanup_diffs(response)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def cleanup_diffs(response)
|
|
|
|
old_commit_shas = extract_old_commit_shas(response.entries)
|
|
|
|
|
|
|
|
ActiveRecord::Base.transaction do
|
|
|
|
cleanup_merge_request_diffs(old_commit_shas)
|
|
|
|
cleanup_note_diff_files(old_commit_shas)
|
2018-11-19 10:03:58 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-03-25 10:29:51 -04:00
|
|
|
def extract_old_commit_shas(batch)
|
|
|
|
batch.lazy.select { |entry| entry.type == :COMMIT }.map(&:old_oid).force
|
|
|
|
end
|
|
|
|
|
|
|
|
def cleanup_merge_request_diffs(old_commit_shas)
|
|
|
|
merge_request_diffs = MergeRequestDiff
|
|
|
|
.by_project_id(project.id)
|
|
|
|
.by_commit_sha(old_commit_shas)
|
|
|
|
|
|
|
|
# It's important to run the ActiveRecord callbacks here
|
|
|
|
merge_request_diffs.destroy_all # rubocop:disable Cop/DestroyAll
|
|
|
|
|
|
|
|
# TODO: ensure the highlight cache is removed immediately. It's too hard
|
|
|
|
# to calculate the Redis keys at present.
|
|
|
|
#
|
2019-09-18 10:02:45 -04:00
|
|
|
# https://gitlab.com/gitlab-org/gitlab-foss/issues/61115
|
2019-03-25 10:29:51 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def cleanup_note_diff_files(old_commit_shas)
|
|
|
|
# Pluck the IDs instead of running the query twice to ensure we clear the
|
|
|
|
# cache for exactly the note diffs we remove
|
|
|
|
ids = NoteDiffFile
|
|
|
|
.referencing_sha(old_commit_shas, project_id: project.id)
|
|
|
|
.pluck_primary_key
|
|
|
|
|
|
|
|
NoteDiffFile.id_in(ids).delete_all
|
|
|
|
|
|
|
|
# A highlighted version of the diff is stored in redis. Remove it now.
|
|
|
|
Gitlab::DiscussionsDiff::HighlightCache.clear_multiple(ids)
|
|
|
|
end
|
|
|
|
|
2018-11-19 10:03:58 -05:00
|
|
|
def repository_cleaner
|
|
|
|
@repository_cleaner ||= Gitlab::Git::RepositoryCleaner.new(repository.raw)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-09-13 09:26:31 -04:00
|
|
|
|
|
|
|
Projects::CleanupService.prepend_if_ee('EE::Projects::CleanupService')
|