Remove cleaned up OIDs from database and cache
This commit is contained in:
parent
d7eb886b9f
commit
8973f32d42
14 changed files with 307 additions and 128 deletions
|
@ -1 +1 @@
|
|||
1.40.0
|
||||
1.41.0
|
||||
|
|
|
@ -51,6 +51,10 @@ class MergeRequestDiff < ApplicationRecord
|
|||
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
|
||||
end
|
||||
|
||||
scope :by_project_id, -> (project_id) do
|
||||
joins(:merge_request).where(merge_requests: { target_project_id: project_id })
|
||||
end
|
||||
|
||||
scope :recent, -> { order(id: :desc).limit(100) }
|
||||
scope :files_in_database, -> { where(stored_externally: [false, nil]) }
|
||||
|
||||
|
|
|
@ -7,6 +7,10 @@ class NoteDiffFile < ApplicationRecord
|
|||
joins(:diff_note).where("resolved_at IS NULL OR noteable_type = 'Commit'")
|
||||
end
|
||||
|
||||
scope :referencing_sha, -> (oids, project_id:) do
|
||||
joins(:diff_note).where(notes: { project_id: project_id, commit_id: oids })
|
||||
end
|
||||
|
||||
delegate :original_position, :project, to: :diff_note
|
||||
|
||||
belongs_to :diff_note, inverse_of: :note_diff_file
|
||||
|
|
|
@ -18,9 +18,6 @@ module Projects
|
|||
# per rewritten object, with the old and new SHAs space-separated. It can be
|
||||
# used to update or remove content that references the objects that BFG has
|
||||
# altered
|
||||
#
|
||||
# Currently, only the project repository is modified by this service, but we
|
||||
# may wish to modify other data sources in the future.
|
||||
def execute
|
||||
apply_bfg_object_map!
|
||||
|
||||
|
@ -41,10 +38,52 @@ module Projects
|
|||
raise NoUploadError unless project.bfg_object_map.exists?
|
||||
|
||||
project.bfg_object_map.open do |io|
|
||||
repository_cleaner.apply_bfg_object_map(io)
|
||||
repository_cleaner.apply_bfg_object_map_stream(io) do |response|
|
||||
cleanup_diffs(response)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def cleanup_diffs(response)
|
||||
old_commit_shas = extract_old_commit_shas(response.entries)
|
||||
|
||||
ActiveRecord::Base.transaction do
|
||||
cleanup_merge_request_diffs(old_commit_shas)
|
||||
cleanup_note_diff_files(old_commit_shas)
|
||||
end
|
||||
end
|
||||
|
||||
def extract_old_commit_shas(batch)
|
||||
batch.lazy.select { |entry| entry.type == :COMMIT }.map(&:old_oid).force
|
||||
end
|
||||
|
||||
def cleanup_merge_request_diffs(old_commit_shas)
|
||||
merge_request_diffs = MergeRequestDiff
|
||||
.by_project_id(project.id)
|
||||
.by_commit_sha(old_commit_shas)
|
||||
|
||||
# It's important to run the ActiveRecord callbacks here
|
||||
merge_request_diffs.destroy_all # rubocop:disable Cop/DestroyAll
|
||||
|
||||
# TODO: ensure the highlight cache is removed immediately. It's too hard
|
||||
# to calculate the Redis keys at present.
|
||||
#
|
||||
# https://gitlab.com/gitlab-org/gitlab-ce/issues/61115
|
||||
end
|
||||
|
||||
def cleanup_note_diff_files(old_commit_shas)
|
||||
# Pluck the IDs instead of running the query twice to ensure we clear the
|
||||
# cache for exactly the note diffs we remove
|
||||
ids = NoteDiffFile
|
||||
.referencing_sha(old_commit_shas, project_id: project.id)
|
||||
.pluck_primary_key
|
||||
|
||||
NoteDiffFile.id_in(ids).delete_all
|
||||
|
||||
# A highlighted version of the diff is stored in redis. Remove it now.
|
||||
Gitlab::DiscussionsDiff::HighlightCache.clear_multiple(ids)
|
||||
end
|
||||
|
||||
def repository_cleaner
|
||||
@repository_cleaner ||= Gitlab::Git::RepositoryCleaner.new(repository.raw)
|
||||
end
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Remove cleaned up OIDs from database and cache
|
||||
merge_request: 26555
|
||||
author:
|
||||
type: added
|
|
@ -98,6 +98,12 @@ up its own internal state, maximizing the space saved.
|
|||
`git gc` against the repository. You will receive an email once it has
|
||||
completed.
|
||||
|
||||
This process will remove some copies of the rewritten commits from GitLab's
|
||||
cache and database, but there are still numerous gaps in coverage - at present,
|
||||
some of the copies may persist indefinitely. [Clearing the instance cache]
|
||||
(../../../administration/raketasks/maintenance.md#clear-redis-cache) may help to
|
||||
remove some of them, but it should not be depended on for security purposes!
|
||||
|
||||
## Using `git filter-branch`
|
||||
|
||||
1. Navigate to your repository:
|
||||
|
|
|
@ -52,6 +52,19 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
# Clears multiple cache keys at once.
|
||||
#
|
||||
# raw_keys - An Array of unique cache keys, without namespaces.
|
||||
#
|
||||
# It returns the number of cache keys cleared. Ex.: 42
|
||||
def clear_multiple(raw_keys)
|
||||
return [] if raw_keys.empty?
|
||||
|
||||
keys = raw_keys.map { |id| cache_key_for(id) }
|
||||
|
||||
Redis::Cache.with { |redis| redis.del(keys) }
|
||||
end
|
||||
|
||||
def cache_key_for(raw_key)
|
||||
"#{cache_key_prefix}:#{raw_key}"
|
||||
end
|
||||
|
|
|
@ -12,9 +12,9 @@ module Gitlab
|
|||
@repository = repository
|
||||
end
|
||||
|
||||
def apply_bfg_object_map(io)
|
||||
def apply_bfg_object_map_stream(io, &blk)
|
||||
wrapped_gitaly_errors do
|
||||
gitaly_cleanup_client.apply_bfg_object_map(io)
|
||||
gitaly_cleanup_client.apply_bfg_object_map_stream(io, &blk)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -12,25 +12,32 @@ module Gitlab
|
|||
@storage = repository.storage
|
||||
end
|
||||
|
||||
def apply_bfg_object_map(io)
|
||||
first_request = Gitaly::ApplyBfgObjectMapRequest.new(repository: gitaly_repo)
|
||||
def apply_bfg_object_map_stream(io, &blk)
|
||||
responses = GitalyClient.call(
|
||||
storage,
|
||||
:cleanup_service,
|
||||
:apply_bfg_object_map_stream,
|
||||
build_object_map_enum(io),
|
||||
timeout: GitalyClient.no_timeout
|
||||
)
|
||||
|
||||
enum = Enumerator.new do |y|
|
||||
y.yield first_request
|
||||
responses.each(&blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def build_object_map_enum(io)
|
||||
Enumerator.new do |y|
|
||||
# First request. For simplicity, doesn't include any object map data
|
||||
y << Gitaly::ApplyBfgObjectMapStreamRequest.new(repository: gitaly_repo)
|
||||
|
||||
# Now stream the BFG object map file to gitaly in chunks
|
||||
while data = io.read(RepositoryService::MAX_MSG_SIZE)
|
||||
y.yield Gitaly::ApplyBfgObjectMapRequest.new(object_map: data)
|
||||
y << Gitaly::ApplyBfgObjectMapStreamRequest.new(object_map: data)
|
||||
|
||||
break if io&.eof?
|
||||
end
|
||||
end
|
||||
|
||||
GitalyClient.call(
|
||||
storage,
|
||||
:cleanup_service,
|
||||
:apply_bfg_object_map,
|
||||
enum,
|
||||
timeout: GitalyClient.no_timeout
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,31 +3,32 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
|
||||
def fake_file(offset)
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 2 + offset,
|
||||
old_pos: 10 + offset,
|
||||
new_pos: 11 + offset,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blips>blops</blips>'
|
||||
}
|
||||
end
|
||||
|
||||
let(:mapping) do
|
||||
{
|
||||
3 => [
|
||||
fake_file(0),
|
||||
fake_file(1)
|
||||
],
|
||||
4 => [
|
||||
fake_file(2)
|
||||
]
|
||||
}
|
||||
end
|
||||
|
||||
describe '#write_multiple' do
|
||||
it 'sets multiple keys serializing content as JSON' do
|
||||
mapping = {
|
||||
3 => [
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 2,
|
||||
old_pos: 10,
|
||||
new_pos: 11,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blips>blops</blips>'
|
||||
},
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 3,
|
||||
old_pos: 11,
|
||||
new_pos: 12,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blops>blips</blops>'
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
described_class.write_multiple(mapping)
|
||||
|
||||
mapping.each do |key, value|
|
||||
|
@ -41,53 +42,16 @@ describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
|
|||
|
||||
describe '#read_multiple' do
|
||||
it 'reads multiple keys and serializes content into Gitlab::Diff::Line objects' do
|
||||
mapping = {
|
||||
3 => [
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 2,
|
||||
old_pos: 11,
|
||||
new_pos: 12,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blips>blops</blips>'
|
||||
},
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 3,
|
||||
old_pos: 10,
|
||||
new_pos: 11,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blips>blops</blips>'
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
described_class.write_multiple(mapping)
|
||||
|
||||
found = described_class.read_multiple(mapping.keys)
|
||||
|
||||
expect(found.size).to eq(1)
|
||||
expect(found.size).to eq(2)
|
||||
expect(found.first.size).to eq(2)
|
||||
expect(found.first).to all(be_a(Gitlab::Diff::Line))
|
||||
end
|
||||
|
||||
it 'returns nil when cached key is not found' do
|
||||
mapping = {
|
||||
3 => [
|
||||
{
|
||||
text: 'foo',
|
||||
type: 'new',
|
||||
index: 2,
|
||||
old_pos: 11,
|
||||
new_pos: 12,
|
||||
line_code: 'xpto',
|
||||
rich_text: '<blips>blops</blips>'
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
described_class.write_multiple(mapping)
|
||||
|
||||
found = described_class.read_multiple([2, 3])
|
||||
|
@ -95,8 +59,30 @@ describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
|
|||
expect(found.size).to eq(2)
|
||||
|
||||
expect(found.first).to eq(nil)
|
||||
expect(found.second.size).to eq(1)
|
||||
expect(found.second.size).to eq(2)
|
||||
expect(found.second).to all(be_a(Gitlab::Diff::Line))
|
||||
end
|
||||
end
|
||||
|
||||
describe '#clear_multiple' do
|
||||
it 'removes all named keys' do
|
||||
described_class.write_multiple(mapping)
|
||||
|
||||
described_class.clear_multiple(mapping.keys)
|
||||
|
||||
expect(described_class.read_multiple(mapping.keys)).to all(be_nil)
|
||||
end
|
||||
|
||||
it 'only removed named keys' do
|
||||
to_clear, to_leave = mapping.keys
|
||||
|
||||
described_class.write_multiple(mapping)
|
||||
described_class.clear_multiple([to_clear])
|
||||
|
||||
cleared, left = described_class.read_multiple([to_clear, to_leave])
|
||||
|
||||
expect(cleared).to be_nil
|
||||
expect(left).to all(be_a(Gitlab::Diff::Line))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,55 +6,62 @@ describe Gitlab::Git::RepositoryCleaner do
|
|||
let(:project) { create(:project, :repository) }
|
||||
let(:repository) { project.repository }
|
||||
let(:head_sha) { repository.head_commit.id }
|
||||
let(:object_map_data) { "#{head_sha} #{'0' * 40}" }
|
||||
let(:object_map_data) { "#{head_sha} #{Gitlab::Git::BLANK_SHA}" }
|
||||
|
||||
let(:clean_refs) { %W[refs/environments/1 refs/merge-requests/1 refs/keep-around/#{head_sha}] }
|
||||
let(:keep_refs) { %w[refs/heads/_keep refs/tags/_keep] }
|
||||
|
||||
subject(:cleaner) { described_class.new(repository.raw) }
|
||||
|
||||
describe '#apply_bfg_object_map' do
|
||||
let(:clean_refs) { %W[refs/environments/1 refs/merge-requests/1 refs/keep-around/#{head_sha}] }
|
||||
let(:keep_refs) { %w[refs/heads/_keep refs/tags/_keep] }
|
||||
|
||||
shared_examples_for '#apply_bfg_object_map_stream' do
|
||||
before do
|
||||
(clean_refs + keep_refs).each { |ref| repository.create_ref(head_sha, ref) }
|
||||
end
|
||||
|
||||
context 'from StringIO' do
|
||||
let(:object_map) { StringIO.new(object_map_data) }
|
||||
it 'removes internal references' do
|
||||
entries = []
|
||||
|
||||
it 'removes internal references' do
|
||||
cleaner.apply_bfg_object_map(object_map)
|
||||
|
||||
aggregate_failures do
|
||||
clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_falsy }
|
||||
keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_truthy }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'from Gitlab::HttpIO' do
|
||||
let(:url) { 'http://example.com/bfg_object_map.txt' }
|
||||
let(:tempfile) { Tempfile.new }
|
||||
let(:object_map) { Gitlab::HttpIO.new(url, object_map_data.size) }
|
||||
|
||||
around do |example|
|
||||
tempfile.write(object_map_data)
|
||||
tempfile.close
|
||||
|
||||
example.run
|
||||
ensure
|
||||
tempfile.unlink
|
||||
cleaner.apply_bfg_object_map_stream(object_map) do |rsp|
|
||||
entries.concat(rsp.entries)
|
||||
end
|
||||
|
||||
it 'removes internal references' do
|
||||
stub_remote_url_200(url, tempfile.path)
|
||||
aggregate_failures do
|
||||
clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be(false) }
|
||||
keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be(true) }
|
||||
|
||||
cleaner.apply_bfg_object_map(object_map)
|
||||
|
||||
aggregate_failures do
|
||||
clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_falsy }
|
||||
keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_truthy }
|
||||
end
|
||||
expect(entries).to contain_exactly(
|
||||
Gitaly::ApplyBfgObjectMapStreamResponse::Entry.new(
|
||||
type: :COMMIT,
|
||||
old_oid: head_sha,
|
||||
new_oid: Gitlab::Git::BLANK_SHA
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#apply_bfg_object_map_stream (from StringIO)' do
|
||||
let(:object_map) { StringIO.new(object_map_data) }
|
||||
|
||||
include_examples '#apply_bfg_object_map_stream'
|
||||
end
|
||||
|
||||
describe '#apply_bfg_object_map_stream (from Gitlab::HttpIO)' do
|
||||
let(:url) { 'http://example.com/bfg_object_map.txt' }
|
||||
let(:tempfile) { Tempfile.new }
|
||||
let(:object_map) { Gitlab::HttpIO.new(url, object_map_data.size) }
|
||||
|
||||
around do |example|
|
||||
tempfile.write(object_map_data)
|
||||
tempfile.close
|
||||
|
||||
stub_remote_url_200(url, tempfile.path)
|
||||
|
||||
example.run
|
||||
ensure
|
||||
tempfile.unlink
|
||||
end
|
||||
|
||||
include_examples '#apply_bfg_object_map_stream'
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,14 +6,14 @@ describe Gitlab::GitalyClient::CleanupService do
|
|||
let(:relative_path) { project.disk_path + '.git' }
|
||||
let(:client) { described_class.new(project.repository) }
|
||||
|
||||
describe '#apply_bfg_object_map' do
|
||||
it 'sends an apply_bfg_object_map message' do
|
||||
describe '#apply_bfg_object_map_stream' do
|
||||
it 'sends an apply_bfg_object_map_stream message' do
|
||||
expect_any_instance_of(Gitaly::CleanupService::Stub)
|
||||
.to receive(:apply_bfg_object_map)
|
||||
.to receive(:apply_bfg_object_map_stream)
|
||||
.with(kind_of(Enumerator), kind_of(Hash))
|
||||
.and_return(double)
|
||||
.and_return([])
|
||||
|
||||
client.apply_bfg_object_map(StringIO.new)
|
||||
client.apply_bfg_object_map_stream(StringIO.new)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,4 +10,31 @@ describe NoteDiffFile do
|
|||
describe 'validations' do
|
||||
it { is_expected.to validate_presence_of(:diff_note) }
|
||||
end
|
||||
|
||||
describe '.referencing_sha' do
|
||||
let!(:diff_note) { create(:diff_note_on_commit) }
|
||||
|
||||
let(:note_diff_file) { diff_note.note_diff_file }
|
||||
let(:project) { diff_note.project }
|
||||
|
||||
it 'finds note diff files by project and sha' do
|
||||
found = described_class.referencing_sha(diff_note.commit_id, project_id: project.id)
|
||||
|
||||
expect(found).to contain_exactly(note_diff_file)
|
||||
end
|
||||
|
||||
it 'excludes note diff files with the wrong project' do
|
||||
other_project = create(:project)
|
||||
|
||||
found = described_class.referencing_sha(diff_note.commit_id, project_id: other_project.id)
|
||||
|
||||
expect(found).to be_empty
|
||||
end
|
||||
|
||||
it 'excludes note diff files with the wrong sha' do
|
||||
found = described_class.referencing_sha(Gitlab::Git::BLANK_SHA, project_id: project.id)
|
||||
|
||||
expect(found).to be_empty
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,13 +6,13 @@ describe Projects::CleanupService do
|
|||
let(:project) { create(:project, :repository, bfg_object_map: fixture_file_upload('spec/fixtures/bfg_object_map.txt')) }
|
||||
let(:object_map) { project.bfg_object_map }
|
||||
|
||||
let(:cleaner) { service.__send__(:repository_cleaner) }
|
||||
|
||||
subject(:service) { described_class.new(project) }
|
||||
|
||||
describe '#execute' do
|
||||
it 'runs the apply_bfg_object_map gitaly RPC' do
|
||||
expect_next_instance_of(Gitlab::Git::RepositoryCleaner) do |cleaner|
|
||||
expect(cleaner).to receive(:apply_bfg_object_map).with(kind_of(IO))
|
||||
end
|
||||
it 'runs the apply_bfg_object_map_stream gitaly RPC' do
|
||||
expect(cleaner).to receive(:apply_bfg_object_map_stream).with(kind_of(IO))
|
||||
|
||||
service.execute
|
||||
end
|
||||
|
@ -37,10 +37,91 @@ describe Projects::CleanupService do
|
|||
expect(object_map.exists?).to be_falsy
|
||||
end
|
||||
|
||||
context 'with a tainted merge request diff' do
|
||||
let(:merge_request) { create(:merge_request, source_project: project, target_project: project) }
|
||||
let(:diff) { merge_request.merge_request_diff }
|
||||
let(:entry) { build_entry(diff.commits.first.id) }
|
||||
|
||||
before do
|
||||
allow(cleaner)
|
||||
.to receive(:apply_bfg_object_map_stream)
|
||||
.and_yield(Gitaly::ApplyBfgObjectMapStreamResponse.new(entries: [entry]))
|
||||
end
|
||||
|
||||
it 'removes the tainted commit from the database' do
|
||||
service.execute
|
||||
|
||||
expect(MergeRequestDiff.exists?(diff.id)).to be_falsy
|
||||
end
|
||||
|
||||
it 'ignores non-commit responses from Gitaly' do
|
||||
entry.type = :UNKNOWN
|
||||
|
||||
service.execute
|
||||
|
||||
expect(MergeRequestDiff.exists?(diff.id)).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a tainted diff note' do
|
||||
let(:diff_note) { create(:diff_note_on_commit, project: project) }
|
||||
let(:note_diff_file) { diff_note.note_diff_file }
|
||||
let(:entry) { build_entry(diff_note.commit_id) }
|
||||
|
||||
let(:highlight_cache) { Gitlab::DiscussionsDiff::HighlightCache }
|
||||
let(:cache_id) { note_diff_file.id }
|
||||
|
||||
before do
|
||||
allow(cleaner)
|
||||
.to receive(:apply_bfg_object_map_stream)
|
||||
.and_yield(Gitaly::ApplyBfgObjectMapStreamResponse.new(entries: [entry]))
|
||||
end
|
||||
|
||||
it 'removes the tainted commit from the database' do
|
||||
service.execute
|
||||
|
||||
expect(NoteDiffFile.exists?(note_diff_file.id)).to be_falsy
|
||||
end
|
||||
|
||||
it 'removes the highlight cache from redis' do
|
||||
write_cache(highlight_cache, cache_id, [{}])
|
||||
|
||||
expect(read_cache(highlight_cache, cache_id)).not_to be_nil
|
||||
|
||||
service.execute
|
||||
|
||||
expect(read_cache(highlight_cache, cache_id)).to be_nil
|
||||
end
|
||||
|
||||
it 'ignores non-commit responses from Gitaly' do
|
||||
entry.type = :UNKNOWN
|
||||
|
||||
service.execute
|
||||
|
||||
expect(NoteDiffFile.exists?(note_diff_file.id)).to be_truthy
|
||||
end
|
||||
end
|
||||
|
||||
it 'raises an error if no object map can be found' do
|
||||
object_map.remove!
|
||||
|
||||
expect { service.execute }.to raise_error(described_class::NoUploadError)
|
||||
end
|
||||
end
|
||||
|
||||
def build_entry(old_oid)
|
||||
Gitaly::ApplyBfgObjectMapStreamResponse::Entry.new(
|
||||
type: :COMMIT,
|
||||
old_oid: old_oid,
|
||||
new_oid: Gitlab::Git::BLANK_SHA
|
||||
)
|
||||
end
|
||||
|
||||
def read_cache(cache, key)
|
||||
cache.read_multiple([key]).first
|
||||
end
|
||||
|
||||
def write_cache(cache, key, value)
|
||||
cache.write_multiple(key => value)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue