Clean orphaned files in object storage
This commit is contained in:
parent
ab08f9986d
commit
6aaeb6dc41
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Clean orphaned files in object storage
|
||||
merge_request: 20918
|
||||
author:
|
||||
type: added
|
|
@ -52,4 +52,33 @@ D, [2018-07-27T12:08:33.293568 #89817] DEBUG -- : Processing batch of 500 projec
|
|||
I, [2018-07-27T12:08:33.689869 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out
|
||||
I, [2018-07-27T12:08:33.755624 #89817] INFO -- : Did fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt
|
||||
I, [2018-07-27T12:08:33.760257 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png
|
||||
```
|
||||
```
|
||||
|
||||
Remove object store upload files if they don't exist in GitLab database.
|
||||
|
||||
```
|
||||
# omnibus-gitlab
|
||||
sudo gitlab-rake gitlab:cleanup:remote_upload_files
|
||||
|
||||
# installation from source
|
||||
bundle exec rake gitlab:cleanup:remote_upload_files RAILS_ENV=production
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
$ sudo gitlab-rake gitlab:cleanup:remote_upload_files
|
||||
|
||||
I, [2018-08-02T10:26:13.995978 #45011] INFO -- : Looking for orphaned remote uploads to remove. Dry run...
|
||||
I, [2018-08-02T10:26:14.120400 #45011] INFO -- : Can be moved to lost and found: @hashed/6b/DSC_6152.JPG
|
||||
I, [2018-08-02T10:26:14.120482 #45011] INFO -- : Can be moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
|
||||
I, [2018-08-02T10:26:14.120634 #45011] INFO -- : To cleanup these files run this command with DRY_RUN=false
|
||||
```
|
||||
|
||||
```
|
||||
$ sudo gitlab-rake gitlab:cleanup:remote_upload_files DRY_RUN=false
|
||||
|
||||
I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote uploads to remove...
|
||||
I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG
|
||||
I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
|
||||
```
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
# frozen_string_literal: true
|
||||
module Gitlab
|
||||
module Cleanup
|
||||
class RemoteUploads
|
||||
attr_reader :logger
|
||||
|
||||
BATCH_SIZE = 100
|
||||
|
||||
def initialize(logger: nil)
|
||||
@logger = logger || Rails.logger
|
||||
end
|
||||
|
||||
def run!(dry_run: false)
|
||||
unless configuration.enabled
|
||||
logger.warn "Object storage not enabled. Exit".color(:yellow)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
logger.info "Looking for orphaned remote uploads to remove#{'. Dry run' if dry_run}..."
|
||||
|
||||
each_orphan_file do |file|
|
||||
info = if dry_run
|
||||
"Can be moved to lost and found: #{file.key}"
|
||||
else
|
||||
new_path = move_to_lost_and_found(file)
|
||||
"Moved to lost and found: #{file.key} -> #{new_path}"
|
||||
end
|
||||
|
||||
logger.info(info)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def each_orphan_file
|
||||
# we want to skip files already moved to lost_and_found directory
|
||||
lost_dir_match = "^#{lost_and_found_dir}\/"
|
||||
|
||||
remote_directory.files.each_slice(BATCH_SIZE) do |remote_files|
|
||||
remote_files.reject! { |file| file.key.match(/#{lost_dir_match}/) }
|
||||
file_paths = remote_files.map(&:key)
|
||||
tracked_paths = Upload
|
||||
.where(store: ObjectStorage::Store::REMOTE, path: file_paths)
|
||||
.pluck(:path)
|
||||
|
||||
remote_files.reject! { |file| tracked_paths.include?(file.key) }
|
||||
remote_files.each do |file|
|
||||
yield file
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def move_to_lost_and_found(file)
|
||||
new_path = "#{lost_and_found_dir}/#{file.key}"
|
||||
|
||||
file.copy(configuration['remote_directory'], new_path)
|
||||
file.destroy
|
||||
|
||||
new_path
|
||||
end
|
||||
|
||||
def lost_and_found_dir
|
||||
'lost_and_found'
|
||||
end
|
||||
|
||||
def remote_directory
|
||||
connection.directories.get(configuration['remote_directory'])
|
||||
end
|
||||
|
||||
def connection
|
||||
::Fog::Storage.new(configuration['connection'].symbolize_keys)
|
||||
end
|
||||
|
||||
def configuration
|
||||
Gitlab.config.uploads.object_store
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -116,6 +116,16 @@ namespace :gitlab do
|
|||
end
|
||||
end
|
||||
|
||||
desc 'GitLab | Cleanup | Clean orphan remote upload files that do not exist in the db'
|
||||
task remote_upload_files: :environment do
|
||||
cleaner = Gitlab::Cleanup::RemoteUploads.new(logger: logger)
|
||||
cleaner.run!(dry_run: dry_run?)
|
||||
|
||||
if dry_run?
|
||||
logger.info "To cleanup these files run this command with DRY_RUN=false".color(:yellow)
|
||||
end
|
||||
end
|
||||
|
||||
def remove?
|
||||
ENV['REMOVE'] == 'true'
|
||||
end
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# frozen_string_literal: true
|
||||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Cleanup::RemoteUploads do
|
||||
context 'when object_storage is enabled' do
|
||||
let(:connection) { double }
|
||||
let(:directory) { double }
|
||||
let!(:uploads) do
|
||||
[
|
||||
create(:upload, path: 'dir/file1', store: ObjectStorage::Store::REMOTE),
|
||||
create(:upload, path: 'dir/file2', store: ObjectStorage::Store::LOCAL)
|
||||
]
|
||||
end
|
||||
let(:remote_files) do
|
||||
[
|
||||
double(key: 'dir/file1'),
|
||||
double(key: 'dir/file2'),
|
||||
double(key: 'dir/file3'),
|
||||
double(key: 'lost_and_found/dir/file3')
|
||||
]
|
||||
end
|
||||
|
||||
before do
|
||||
stub_uploads_object_storage(FileUploader)
|
||||
|
||||
expect(::Fog::Storage).to receive(:new).and_return(connection)
|
||||
|
||||
expect(connection).to receive(:directories).and_return(double(get: directory))
|
||||
expect(directory).to receive(:files).and_return(remote_files)
|
||||
end
|
||||
|
||||
context 'when dry_run is set to false' do
|
||||
subject { described_class.new.run!(dry_run: false) }
|
||||
|
||||
it 'moves files that are not in uploads table' do
|
||||
expect(remote_files[0]).not_to receive(:copy)
|
||||
expect(remote_files[0]).not_to receive(:destroy)
|
||||
expect(remote_files[1]).to receive(:copy)
|
||||
expect(remote_files[1]).to receive(:destroy)
|
||||
expect(remote_files[2]).to receive(:copy)
|
||||
expect(remote_files[2]).to receive(:destroy)
|
||||
expect(remote_files[3]).not_to receive(:copy)
|
||||
expect(remote_files[3]).not_to receive(:destroy)
|
||||
|
||||
subject
|
||||
end
|
||||
end
|
||||
|
||||
context 'when dry_run is set to true' do
|
||||
subject { described_class.new.run!(dry_run: true) }
|
||||
|
||||
it 'does not move filese' do
|
||||
expect(remote_files[0]).not_to receive(:copy)
|
||||
expect(remote_files[0]).not_to receive(:destroy)
|
||||
expect(remote_files[1]).not_to receive(:copy)
|
||||
expect(remote_files[1]).not_to receive(:destroy)
|
||||
expect(remote_files[2]).not_to receive(:copy)
|
||||
expect(remote_files[2]).not_to receive(:destroy)
|
||||
expect(remote_files[3]).not_to receive(:copy)
|
||||
expect(remote_files[3]).not_to receive(:destroy)
|
||||
|
||||
subject
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when object_storage is not enabled' do
|
||||
it 'does not connect to any storage' do
|
||||
expect(::Fog::Storage).not_to receive(:new)
|
||||
|
||||
subject
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue