Add rake tasks for migrating leacy uploads

- move uploads created by AttachmentUploader
- handle also files created for legacy_diff_notes
This commit is contained in:
Jarka Košanová 2019-08-13 10:30:32 +02:00
parent 0297f1c2b8
commit 19db315734
11 changed files with 584 additions and 4 deletions

View file

@ -0,0 +1,5 @@
---
title: Create rake tasks for migrating legacy uploads out of deprecated paths
merge_request: 29409
author:
type: other

View file

@ -309,6 +309,12 @@ GraphQL queries are recorded in that file. For example:
{"query_string":"query IntrospectionQuery{__schema {queryType { name },mutationType { name }}}...(etc)","variables":{"a":1,"b":2},"complexity":181,"depth":1,"duration":7}
```
## `migrations.log`
Introduced in GitLab 12.3. This file lives in `/var/log/gitlab/gitlab-rails/migrations.log` for
Omnibus GitLab packages or in `/home/git/gitlab/log/migrations.log` for
installations from source.
## Reconfigure Logs
Reconfigure log files live in `/var/log/gitlab/reconfigure` for Omnibus GitLab

View file

@ -103,3 +103,13 @@ sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, S
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
```
## Migrate legacy uploads out of deprecated paths
> Introduced in GitLab 12.3.
To migrate all uploads created by legacy uploaders, run:
```shell
bundle exec rake gitlab:uploads:legacy:migrate
```

View file

@ -216,3 +216,4 @@ bundle exec rake routes
Since these take some time to create, it's often helpful to save the output to
a file for quick reference.

View file

@ -0,0 +1,140 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This class takes a legacy upload and migrates it to the correct location
class LegacyUploadMover
include Gitlab::Utils::StrongMemoize
attr_reader :upload, :project, :note
attr_accessor :logger
def initialize(upload)
@upload = upload
@note = Note.find_by(id: upload.model_id)
@project = note&.project
@logger = Gitlab::BackgroundMigration::Logger.build
end
def execute
return unless upload
if !project
# if we don't have models associated with the upload we can not move it
warn('Deleting upload due to model not found.')
destroy_legacy_upload
elsif note.is_a?(LegacyDiffNote)
return unless move_legacy_diff_file
migrate_upload
elsif !legacy_file_exists?
warn('Deleting upload due to file not found.')
destroy_legacy_upload
else
migrate_upload
end
end
private
def migrate_upload
return unless copy_upload_to_project
add_upload_link_to_note_text
destroy_legacy_file
destroy_legacy_upload
end
# we should proceed and log whenever one upload copy fails, no matter the reasons
# rubocop: disable Lint/RescueException
def copy_upload_to_project
@uploader = FileUploader.copy_to(legacy_file_uploader, project)
logger.info(
message: 'MigrateLegacyUploads: File copied successfully',
old_path: legacy_file_uploader.file.path, new_path: @uploader.file.path
)
true
rescue Exception => e
warn(
'File could not be copied to project uploads',
file_path: legacy_file_uploader.file.path, error: e.message
)
false
end
# rubocop: enable Lint/RescueException
def destroy_legacy_upload
if note
note.remove_attachment = true
note.save
end
if upload.destroy
logger.info(message: 'MigrateLegacyUploads: Upload was destroyed.', upload: upload.inspect)
else
warn('MigrateLegacyUploads: Upload destroy failed.')
end
end
def destroy_legacy_file
legacy_file_uploader.file.delete
end
def add_upload_link_to_note_text
new_text = "#{note.note} \n #{@uploader.markdown_link}"
# Bypass validations because old data may have invalid
# noteable values. If we fail hard here, we may kill the
# entire background migration, which affects a range of notes.
note.update_attribute(:note, new_text)
end
def legacy_file_uploader
strong_memoize(:legacy_file_uploader) do
uploader = upload.build_uploader
uploader.retrieve_from_store!(File.basename(upload.path))
uploader
end
end
def legacy_file_exists?
legacy_file_uploader.file.exists?
end
# we should proceed and log whenever one upload copy fails, no matter the reasons
# rubocop: disable Lint/RescueException
def move_legacy_diff_file
old_path = upload.absolute_path
old_path_sub = '-/system/note/attachment'
if !File.exist?(old_path) || !old_path.include?(old_path_sub)
log_legacy_diff_note_problem(old_path)
return false
end
new_path = upload.absolute_path.sub(old_path_sub, '-/system/legacy_diff_note/attachment')
new_dir = File.dirname(new_path)
FileUtils.mkdir_p(new_dir)
FileUtils.mv(old_path, new_path)
rescue Exception => e
log_legacy_diff_note_problem(old_path, new_path, e)
false
end
def warn(message, params = {})
logger.warn(
params.merge(message: "MigrateLegacyUploads: #{message}", upload: upload.inspect)
)
end
def log_legacy_diff_note_problem(old_path, new_path = nil, error = nil)
warn('LegacyDiffNote upload could not be moved to a new path',
old_path: old_path, new_path: new_path, error: error&.message
)
end
# rubocop: enable Lint/RescueException
end
end
end

View file

@ -0,0 +1,23 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This migration takes all legacy uploads (that were uploaded using AttachmentUploader)
# and migrate them to the new (FileUploader) location (=under projects).
#
# We have dependencies (uploaders) in this migration because extracting code would add a lot of complexity
# and possible errors could appear as the logic in the uploaders is not trivial.
#
# This migration will be removed in 13.0 in order to get rid of a migration that depends on
# the application code.
class LegacyUploadsMigrator
include Database::MigrationHelpers
def perform(start_id, end_id)
Upload.where(id: start_id..end_id, uploader: 'AttachmentUploader').find_each do |upload|
LegacyUploadMover.new(upload).execute
end
end
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Logger that can be used for migrations logging
class Logger < ::Gitlab::JsonLogger
def self.file_name_noext
'migrations'
end
end
end
end

View file

@ -0,0 +1,27 @@
# frozen_string_literal: true
namespace :gitlab do
namespace :uploads do
namespace :legacy do
desc "GitLab | Uploads | Migrate all legacy attachments"
task migrate: :environment do
class Upload < ApplicationRecord
self.table_name = 'uploads'
include ::EachBatch
end
migration = 'LegacyUploadsMigrator'.freeze
batch_size = 5000
delay_interval = 5.minutes.to_i
Upload.where(uploader: 'AttachmentUploader').each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
delay = index * delay_interval
BackgroundMigrationWorker.perform_in(delay, migration, [start_id, end_id])
end
end
end
end
end

View file

@ -56,10 +56,7 @@ FactoryBot.define do
end
trait :attachment_upload do
transient do
mount_point :attachment
end
model { build(:note) }
uploader "AttachmentUploader"
end

View file

@ -0,0 +1,296 @@
# frozen_string_literal: true
require 'spec_helper'
# rubocop: disable RSpec/FactoriesInMigrationSpecs
describe Gitlab::BackgroundMigration::LegacyUploadMover do
let(:test_dir) { FileUploader.options['storage_path'] }
let(:filename) { 'image.png' }
let!(:namespace) { create(:namespace) }
let!(:legacy_project) { create(:project, :legacy_storage, namespace: namespace) }
let!(:hashed_project) { create(:project, namespace: namespace) }
# default project
let(:project) { legacy_project }
let!(:issue) { create(:issue, project: project) }
let!(:note) { create(:note, note: 'some note', project: project, noteable: issue) }
let(:legacy_upload) { create_upload(note, filename) }
def create_remote_upload(model, filename)
create(:upload, :attachment_upload,
path: "note/attachment/#{model.id}/#{filename}", secret: nil,
store: ObjectStorage::Store::REMOTE, model: model)
end
def create_upload(model, filename, with_file = true)
params = {
path: "uploads/-/system/note/attachment/#{model.id}/#{filename}",
model: model,
store: ObjectStorage::Store::LOCAL
}
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
end
end
def new_upload
Upload.find_by(model_id: project.id, model_type: 'Project')
end
def expect_error_log
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:warn)
end
end
shared_examples 'legacy upload deletion' do
it 'removes the upload record' do
described_class.new(legacy_upload).execute
expect { legacy_upload.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
shared_examples 'move error' do
it 'does not remove the upload file' do
expect_error_log
described_class.new(legacy_upload).execute
expect(legacy_upload.reload).to eq(legacy_upload)
end
end
shared_examples 'migrates the file correctly' do
before do
described_class.new(legacy_upload).execute
end
it 'creates a new uplaod record correctly' do
expect(new_upload.secret).not_to be_nil
expect(new_upload.path).to end_with("#{new_upload.secret}/image.png")
expect(new_upload.model_id).to eq(project.id)
expect(new_upload.model_type).to eq('Project')
expect(new_upload.uploader).to eq('FileUploader')
end
it 'updates the legacy upload note so that it references the file in the markdown' do
expected_path = File.join('/uploads', new_upload.secret, 'image.png')
expected_markdown = "some note \n ![image](#{expected_path})"
expect(note.reload.note).to eq(expected_markdown)
end
it 'removes the attachment from the note model' do
expect(note.reload.attachment.file).to be_nil
end
end
context 'when no model found for the upload' do
before do
legacy_upload.model = nil
expect_error_log
end
it_behaves_like 'legacy upload deletion'
end
context 'when the upload move fails' do
before do
expect(FileUploader).to receive(:copy_to).and_raise('failed')
end
it_behaves_like 'move error'
end
context 'when the upload is in local storage' do
shared_examples 'legacy local file' do
it 'removes the file correctly' do
expect(File.exist?(legacy_upload.absolute_path)).to be_truthy
described_class.new(legacy_upload).execute
expect(File.exist?(legacy_upload.absolute_path)).to be_falsey
end
it 'moves legacy uploads to the correct location' do
described_class.new(legacy_upload).execute
expected_path = File.join(test_dir, 'uploads', project.disk_path, new_upload.secret, filename)
expect(File.exist?(expected_path)).to be_truthy
end
end
context 'when the upload file does not exist on the filesystem' do
let(:legacy_upload) { create_upload(note, filename, false) }
before do
expect_error_log
end
it_behaves_like 'legacy upload deletion'
end
context 'when an upload belongs to a legacy_diff_note' do
let!(:merge_request) { create(:merge_request, source_project: project) }
let!(:note) do
create(:legacy_diff_note_on_merge_request,
note: 'some note', project: project, noteable: merge_request)
end
let(:legacy_upload) do
create(:upload, :with_file, :attachment_upload,
path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note)
end
context 'when the file does not exist for the upload' do
let(:legacy_upload) do
create(:upload, :attachment_upload,
path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file does not exist on expected path' do
let(:legacy_upload) do
create(:upload, :attachment_upload, :with_file,
path: "uploads/-/system/note/attachment/some_part/#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file path does not include system/note/attachment' do
let(:legacy_upload) do
create(:upload, :attachment_upload, :with_file,
path: "uploads/-/system#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file move raises an error' do
before do
allow(FileUtils).to receive(:mv).and_raise(Errno::EACCES)
end
it_behaves_like 'move error'
end
context 'when the file can be handled correctly' do
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
context 'when object storage is disabled for FileUploader' do
context 'when the file belongs to a legacy project' do
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
context 'when object storage is enabled for FileUploader' do
# The process of migrating to object storage is a manual one,
# so it would go against expectations to automatically migrate these files
# to object storage during this migration.
# After this migration, these files should be able to successfully migrate to object storage.
before do
stub_uploads_object_storage(FileUploader)
end
context 'when the file belongs to a legacy project' do
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
end
context 'when legacy uploads are stored in object storage' do
let(:legacy_upload) { create_remote_upload(note, filename) }
let(:remote_file) do
{ key: "#{legacy_upload.path}" }
end
let(:connection) { ::Fog::Storage.new(FileUploader.object_store_credentials) }
let(:bucket) { connection.directories.create(key: 'uploads') }
before do
stub_uploads_object_storage(FileUploader)
end
shared_examples 'legacy remote file' do
it 'removes the file correctly' do
# expect(bucket.files.get(remote_file[:key])).to be_nil
described_class.new(legacy_upload).execute
expect(bucket.files.get(remote_file[:key])).to be_nil
end
it 'moves legacy uploads to the correct remote location' do
described_class.new(legacy_upload).execute
connection = ::Fog::Storage.new(FileUploader.object_store_credentials)
expect(connection.get_object('uploads', new_upload.path)[:status]).to eq(200)
end
end
context 'when the upload file does not exist on the filesystem' do
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a legacy project' do
before do
bucket.files.create(remote_file)
end
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy remote file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
before do
bucket.files.create(remote_file)
end
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy remote file'
it_behaves_like 'legacy upload deletion'
end
end
end
# rubocop: enable RSpec/FactoriesInMigrationSpecs

View file

@ -0,0 +1,63 @@
# frozen_string_literal: true
require 'spec_helper'
# rubocop: disable RSpec/FactoriesInMigrationSpecs
describe Gitlab::BackgroundMigration::LegacyUploadsMigrator do
let(:test_dir) { FileUploader.options['storage_path'] }
let!(:hashed_project) { create(:project) }
let!(:legacy_project) { create(:project, :legacy_storage) }
let!(:issue) { create(:issue, project: hashed_project) }
let!(:issue_legacy) { create(:issue, project: legacy_project) }
let!(:note1) { create(:note, project: hashed_project, noteable: issue) }
let!(:note2) { create(:note, project: hashed_project, noteable: issue) }
let!(:note_legacy) { create(:note, project: legacy_project, noteable: issue_legacy) }
def create_upload(model, with_file = true)
filename = 'image.png'
params = {
path: "uploads/-/system/note/attachment/#{model.id}/#{filename}",
model: model,
store: ObjectStorage::Store::LOCAL
}
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
end
end
let!(:legacy_upload) { create_upload(note1) }
let!(:legacy_upload_no_file) { create_upload(note2, false) }
let!(:legacy_upload_legacy_project) { create_upload(note_legacy) }
let(:start_id) { 1 }
let(:end_id) { 10000 }
subject { described_class.new.perform(start_id, end_id) }
it 'removes all legacy files' do
expect(File.exist?(legacy_upload.absolute_path)).to be_truthy
expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_truthy
subject
expect(File.exist?(legacy_upload.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_falsey
end
it 'removes all AttachmentUploader records' do
expect { subject }.to change { Upload.where(uploader: 'AttachmentUploader').count }.from(3).to(0)
end
it 'creates new uploads for successfully migrated records' do
expect { subject }.to change { Upload.where(uploader: 'FileUploader').count }.from(0).to(2)
end
end
# rubocop: enable RSpec/FactoriesInMigrationSpecs