Add rake tasks for migrating leacy uploads

- move uploads created by AttachmentUploader
- handle also files created for legacy_diff_notes
This commit is contained in:
Jarka Košanová 2019-08-13 10:30:32 +02:00
parent 0297f1c2b8
commit 19db315734
11 changed files with 584 additions and 4 deletions

View file

@ -0,0 +1,5 @@
---
title: Create rake tasks for migrating legacy uploads out of deprecated paths
merge_request: 29409
author:
type: other

View file

@ -309,6 +309,12 @@ GraphQL queries are recorded in that file. For example:
{"query_string":"query IntrospectionQuery{__schema {queryType { name },mutationType { name }}}...(etc)","variables":{"a":1,"b":2},"complexity":181,"depth":1,"duration":7} {"query_string":"query IntrospectionQuery{__schema {queryType { name },mutationType { name }}}...(etc)","variables":{"a":1,"b":2},"complexity":181,"depth":1,"duration":7}
``` ```
## `migrations.log`
Introduced in GitLab 12.3. This file lives in `/var/log/gitlab/gitlab-rails/migrations.log` for
Omnibus GitLab packages or in `/home/git/gitlab/log/migrations.log` for
installations from source.
## Reconfigure Logs ## Reconfigure Logs
Reconfigure log files live in `/var/log/gitlab/reconfigure` for Omnibus GitLab Reconfigure log files live in `/var/log/gitlab/reconfigure` for Omnibus GitLab

View file

@ -103,3 +103,13 @@ sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, S
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
``` ```
## Migrate legacy uploads out of deprecated paths
> Introduced in GitLab 12.3.
To migrate all uploads created by legacy uploaders, run:
```shell
bundle exec rake gitlab:uploads:legacy:migrate
```

View file

@ -216,3 +216,4 @@ bundle exec rake routes
Since these take some time to create, it's often helpful to save the output to Since these take some time to create, it's often helpful to save the output to
a file for quick reference. a file for quick reference.

View file

@ -0,0 +1,140 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This class takes a legacy upload and migrates it to the correct location
class LegacyUploadMover
include Gitlab::Utils::StrongMemoize
attr_reader :upload, :project, :note
attr_accessor :logger
def initialize(upload)
@upload = upload
@note = Note.find_by(id: upload.model_id)
@project = note&.project
@logger = Gitlab::BackgroundMigration::Logger.build
end
def execute
return unless upload
if !project
# if we don't have models associated with the upload we can not move it
warn('Deleting upload due to model not found.')
destroy_legacy_upload
elsif note.is_a?(LegacyDiffNote)
return unless move_legacy_diff_file
migrate_upload
elsif !legacy_file_exists?
warn('Deleting upload due to file not found.')
destroy_legacy_upload
else
migrate_upload
end
end
private
def migrate_upload
return unless copy_upload_to_project
add_upload_link_to_note_text
destroy_legacy_file
destroy_legacy_upload
end
# we should proceed and log whenever one upload copy fails, no matter the reasons
# rubocop: disable Lint/RescueException
def copy_upload_to_project
@uploader = FileUploader.copy_to(legacy_file_uploader, project)
logger.info(
message: 'MigrateLegacyUploads: File copied successfully',
old_path: legacy_file_uploader.file.path, new_path: @uploader.file.path
)
true
rescue Exception => e
warn(
'File could not be copied to project uploads',
file_path: legacy_file_uploader.file.path, error: e.message
)
false
end
# rubocop: enable Lint/RescueException
def destroy_legacy_upload
if note
note.remove_attachment = true
note.save
end
if upload.destroy
logger.info(message: 'MigrateLegacyUploads: Upload was destroyed.', upload: upload.inspect)
else
warn('MigrateLegacyUploads: Upload destroy failed.')
end
end
def destroy_legacy_file
legacy_file_uploader.file.delete
end
def add_upload_link_to_note_text
new_text = "#{note.note} \n #{@uploader.markdown_link}"
# Bypass validations because old data may have invalid
# noteable values. If we fail hard here, we may kill the
# entire background migration, which affects a range of notes.
note.update_attribute(:note, new_text)
end
def legacy_file_uploader
strong_memoize(:legacy_file_uploader) do
uploader = upload.build_uploader
uploader.retrieve_from_store!(File.basename(upload.path))
uploader
end
end
def legacy_file_exists?
legacy_file_uploader.file.exists?
end
# we should proceed and log whenever one upload copy fails, no matter the reasons
# rubocop: disable Lint/RescueException
def move_legacy_diff_file
old_path = upload.absolute_path
old_path_sub = '-/system/note/attachment'
if !File.exist?(old_path) || !old_path.include?(old_path_sub)
log_legacy_diff_note_problem(old_path)
return false
end
new_path = upload.absolute_path.sub(old_path_sub, '-/system/legacy_diff_note/attachment')
new_dir = File.dirname(new_path)
FileUtils.mkdir_p(new_dir)
FileUtils.mv(old_path, new_path)
rescue Exception => e
log_legacy_diff_note_problem(old_path, new_path, e)
false
end
def warn(message, params = {})
logger.warn(
params.merge(message: "MigrateLegacyUploads: #{message}", upload: upload.inspect)
)
end
def log_legacy_diff_note_problem(old_path, new_path = nil, error = nil)
warn('LegacyDiffNote upload could not be moved to a new path',
old_path: old_path, new_path: new_path, error: error&.message
)
end
# rubocop: enable Lint/RescueException
end
end
end

View file

@ -0,0 +1,23 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This migration takes all legacy uploads (that were uploaded using AttachmentUploader)
# and migrate them to the new (FileUploader) location (=under projects).
#
# We have dependencies (uploaders) in this migration because extracting code would add a lot of complexity
# and possible errors could appear as the logic in the uploaders is not trivial.
#
# This migration will be removed in 13.0 in order to get rid of a migration that depends on
# the application code.
class LegacyUploadsMigrator
include Database::MigrationHelpers
def perform(start_id, end_id)
Upload.where(id: start_id..end_id, uploader: 'AttachmentUploader').find_each do |upload|
LegacyUploadMover.new(upload).execute
end
end
end
end
end

View file

@ -0,0 +1,12 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Logger that can be used for migrations logging
class Logger < ::Gitlab::JsonLogger
def self.file_name_noext
'migrations'
end
end
end
end

View file

@ -0,0 +1,27 @@
# frozen_string_literal: true
namespace :gitlab do
namespace :uploads do
namespace :legacy do
desc "GitLab | Uploads | Migrate all legacy attachments"
task migrate: :environment do
class Upload < ApplicationRecord
self.table_name = 'uploads'
include ::EachBatch
end
migration = 'LegacyUploadsMigrator'.freeze
batch_size = 5000
delay_interval = 5.minutes.to_i
Upload.where(uploader: 'AttachmentUploader').each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
delay = index * delay_interval
BackgroundMigrationWorker.perform_in(delay, migration, [start_id, end_id])
end
end
end
end
end

View file

@ -56,10 +56,7 @@ FactoryBot.define do
end end
trait :attachment_upload do trait :attachment_upload do
transient do mount_point :attachment
mount_point :attachment
end
model { build(:note) } model { build(:note) }
uploader "AttachmentUploader" uploader "AttachmentUploader"
end end

View file

@ -0,0 +1,296 @@
# frozen_string_literal: true
require 'spec_helper'
# rubocop: disable RSpec/FactoriesInMigrationSpecs
describe Gitlab::BackgroundMigration::LegacyUploadMover do
let(:test_dir) { FileUploader.options['storage_path'] }
let(:filename) { 'image.png' }
let!(:namespace) { create(:namespace) }
let!(:legacy_project) { create(:project, :legacy_storage, namespace: namespace) }
let!(:hashed_project) { create(:project, namespace: namespace) }
# default project
let(:project) { legacy_project }
let!(:issue) { create(:issue, project: project) }
let!(:note) { create(:note, note: 'some note', project: project, noteable: issue) }
let(:legacy_upload) { create_upload(note, filename) }
def create_remote_upload(model, filename)
create(:upload, :attachment_upload,
path: "note/attachment/#{model.id}/#{filename}", secret: nil,
store: ObjectStorage::Store::REMOTE, model: model)
end
def create_upload(model, filename, with_file = true)
params = {
path: "uploads/-/system/note/attachment/#{model.id}/#{filename}",
model: model,
store: ObjectStorage::Store::LOCAL
}
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
end
end
def new_upload
Upload.find_by(model_id: project.id, model_type: 'Project')
end
def expect_error_log
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:warn)
end
end
shared_examples 'legacy upload deletion' do
it 'removes the upload record' do
described_class.new(legacy_upload).execute
expect { legacy_upload.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
shared_examples 'move error' do
it 'does not remove the upload file' do
expect_error_log
described_class.new(legacy_upload).execute
expect(legacy_upload.reload).to eq(legacy_upload)
end
end
shared_examples 'migrates the file correctly' do
before do
described_class.new(legacy_upload).execute
end
it 'creates a new uplaod record correctly' do
expect(new_upload.secret).not_to be_nil
expect(new_upload.path).to end_with("#{new_upload.secret}/image.png")
expect(new_upload.model_id).to eq(project.id)
expect(new_upload.model_type).to eq('Project')
expect(new_upload.uploader).to eq('FileUploader')
end
it 'updates the legacy upload note so that it references the file in the markdown' do
expected_path = File.join('/uploads', new_upload.secret, 'image.png')
expected_markdown = "some note \n ![image](#{expected_path})"
expect(note.reload.note).to eq(expected_markdown)
end
it 'removes the attachment from the note model' do
expect(note.reload.attachment.file).to be_nil
end
end
context 'when no model found for the upload' do
before do
legacy_upload.model = nil
expect_error_log
end
it_behaves_like 'legacy upload deletion'
end
context 'when the upload move fails' do
before do
expect(FileUploader).to receive(:copy_to).and_raise('failed')
end
it_behaves_like 'move error'
end
context 'when the upload is in local storage' do
shared_examples 'legacy local file' do
it 'removes the file correctly' do
expect(File.exist?(legacy_upload.absolute_path)).to be_truthy
described_class.new(legacy_upload).execute
expect(File.exist?(legacy_upload.absolute_path)).to be_falsey
end
it 'moves legacy uploads to the correct location' do
described_class.new(legacy_upload).execute
expected_path = File.join(test_dir, 'uploads', project.disk_path, new_upload.secret, filename)
expect(File.exist?(expected_path)).to be_truthy
end
end
context 'when the upload file does not exist on the filesystem' do
let(:legacy_upload) { create_upload(note, filename, false) }
before do
expect_error_log
end
it_behaves_like 'legacy upload deletion'
end
context 'when an upload belongs to a legacy_diff_note' do
let!(:merge_request) { create(:merge_request, source_project: project) }
let!(:note) do
create(:legacy_diff_note_on_merge_request,
note: 'some note', project: project, noteable: merge_request)
end
let(:legacy_upload) do
create(:upload, :with_file, :attachment_upload,
path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note)
end
context 'when the file does not exist for the upload' do
let(:legacy_upload) do
create(:upload, :attachment_upload,
path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file does not exist on expected path' do
let(:legacy_upload) do
create(:upload, :attachment_upload, :with_file,
path: "uploads/-/system/note/attachment/some_part/#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file path does not include system/note/attachment' do
let(:legacy_upload) do
create(:upload, :attachment_upload, :with_file,
path: "uploads/-/system#{note.id}/#{filename}", model: note)
end
it_behaves_like 'move error'
end
context 'when the file move raises an error' do
before do
allow(FileUtils).to receive(:mv).and_raise(Errno::EACCES)
end
it_behaves_like 'move error'
end
context 'when the file can be handled correctly' do
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
context 'when object storage is disabled for FileUploader' do
context 'when the file belongs to a legacy project' do
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
context 'when object storage is enabled for FileUploader' do
# The process of migrating to object storage is a manual one,
# so it would go against expectations to automatically migrate these files
# to object storage during this migration.
# After this migration, these files should be able to successfully migrate to object storage.
before do
stub_uploads_object_storage(FileUploader)
end
context 'when the file belongs to a legacy project' do
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy local file'
it_behaves_like 'legacy upload deletion'
end
end
end
context 'when legacy uploads are stored in object storage' do
let(:legacy_upload) { create_remote_upload(note, filename) }
let(:remote_file) do
{ key: "#{legacy_upload.path}" }
end
let(:connection) { ::Fog::Storage.new(FileUploader.object_store_credentials) }
let(:bucket) { connection.directories.create(key: 'uploads') }
before do
stub_uploads_object_storage(FileUploader)
end
shared_examples 'legacy remote file' do
it 'removes the file correctly' do
# expect(bucket.files.get(remote_file[:key])).to be_nil
described_class.new(legacy_upload).execute
expect(bucket.files.get(remote_file[:key])).to be_nil
end
it 'moves legacy uploads to the correct remote location' do
described_class.new(legacy_upload).execute
connection = ::Fog::Storage.new(FileUploader.object_store_credentials)
expect(connection.get_object('uploads', new_upload.path)[:status]).to eq(200)
end
end
context 'when the upload file does not exist on the filesystem' do
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a legacy project' do
before do
bucket.files.create(remote_file)
end
let(:project) { legacy_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy remote file'
it_behaves_like 'legacy upload deletion'
end
context 'when the file belongs to a hashed project' do
before do
bucket.files.create(remote_file)
end
let(:project) { hashed_project }
it_behaves_like 'migrates the file correctly'
it_behaves_like 'legacy remote file'
it_behaves_like 'legacy upload deletion'
end
end
end
# rubocop: enable RSpec/FactoriesInMigrationSpecs

View file

@ -0,0 +1,63 @@
# frozen_string_literal: true
require 'spec_helper'
# rubocop: disable RSpec/FactoriesInMigrationSpecs
describe Gitlab::BackgroundMigration::LegacyUploadsMigrator do
let(:test_dir) { FileUploader.options['storage_path'] }
let!(:hashed_project) { create(:project) }
let!(:legacy_project) { create(:project, :legacy_storage) }
let!(:issue) { create(:issue, project: hashed_project) }
let!(:issue_legacy) { create(:issue, project: legacy_project) }
let!(:note1) { create(:note, project: hashed_project, noteable: issue) }
let!(:note2) { create(:note, project: hashed_project, noteable: issue) }
let!(:note_legacy) { create(:note, project: legacy_project, noteable: issue_legacy) }
def create_upload(model, with_file = true)
filename = 'image.png'
params = {
path: "uploads/-/system/note/attachment/#{model.id}/#{filename}",
model: model,
store: ObjectStorage::Store::LOCAL
}
if with_file
upload = create(:upload, :with_file, :attachment_upload, params)
model.update(attachment: upload.build_uploader)
model.attachment.upload
else
create(:upload, :attachment_upload, params)
end
end
let!(:legacy_upload) { create_upload(note1) }
let!(:legacy_upload_no_file) { create_upload(note2, false) }
let!(:legacy_upload_legacy_project) { create_upload(note_legacy) }
let(:start_id) { 1 }
let(:end_id) { 10000 }
subject { described_class.new.perform(start_id, end_id) }
it 'removes all legacy files' do
expect(File.exist?(legacy_upload.absolute_path)).to be_truthy
expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_truthy
subject
expect(File.exist?(legacy_upload.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey
expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_falsey
end
it 'removes all AttachmentUploader records' do
expect { subject }.to change { Upload.where(uploader: 'AttachmentUploader').count }.from(3).to(0)
end
it 'creates new uploads for successfully migrated records' do
expect { subject }.to change { Upload.where(uploader: 'FileUploader').count }.from(0).to(2)
end
end
# rubocop: enable RSpec/FactoriesInMigrationSpecs