diff --git a/changelogs/unreleased/50070-legacy-attachments.yml b/changelogs/unreleased/50070-legacy-attachments.yml new file mode 100644 index 00000000000..03f1cec0f67 --- /dev/null +++ b/changelogs/unreleased/50070-legacy-attachments.yml @@ -0,0 +1,5 @@ +--- +title: Create rake tasks for migrating legacy uploads out of deprecated paths +merge_request: 29409 +author: +type: other diff --git a/doc/administration/logs.md b/doc/administration/logs.md index 47abbc512e0..306d611f6bf 100644 --- a/doc/administration/logs.md +++ b/doc/administration/logs.md @@ -309,6 +309,12 @@ GraphQL queries are recorded in that file. For example: {"query_string":"query IntrospectionQuery{__schema {queryType { name },mutationType { name }}}...(etc)","variables":{"a":1,"b":2},"complexity":181,"depth":1,"duration":7} ``` +## `migrations.log` + +Introduced in GitLab 12.3. This file lives in `/var/log/gitlab/gitlab-rails/migrations.log` for +Omnibus GitLab packages or in `/home/git/gitlab/log/migrations.log` for +installations from source. + ## Reconfigure Logs Reconfigure log files live in `/var/log/gitlab/reconfigure` for Omnibus GitLab diff --git a/doc/administration/raketasks/uploads/migrate.md b/doc/administration/raketasks/uploads/migrate.md index fd8ea8d3162..86e8b763f51 100644 --- a/doc/administration/raketasks/uploads/migrate.md +++ b/doc/administration/raketasks/uploads/migrate.md @@ -103,3 +103,13 @@ sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, S sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" ``` + +## Migrate legacy uploads out of deprecated paths + +> Introduced in GitLab 12.3. + +To migrate all uploads created by legacy uploaders, run: + +```shell +bundle exec rake gitlab:uploads:legacy:migrate +``` diff --git a/doc/development/rake_tasks.md b/doc/development/rake_tasks.md index c97e179910b..67f36eb1ab4 100644 --- a/doc/development/rake_tasks.md +++ b/doc/development/rake_tasks.md @@ -216,3 +216,4 @@ bundle exec rake routes Since these take some time to create, it's often helpful to save the output to a file for quick reference. + diff --git a/lib/gitlab/background_migration/legacy_upload_mover.rb b/lib/gitlab/background_migration/legacy_upload_mover.rb new file mode 100644 index 00000000000..051c1176edb --- /dev/null +++ b/lib/gitlab/background_migration/legacy_upload_mover.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # This class takes a legacy upload and migrates it to the correct location + class LegacyUploadMover + include Gitlab::Utils::StrongMemoize + + attr_reader :upload, :project, :note + attr_accessor :logger + + def initialize(upload) + @upload = upload + @note = Note.find_by(id: upload.model_id) + @project = note&.project + @logger = Gitlab::BackgroundMigration::Logger.build + end + + def execute + return unless upload + + if !project + # if we don't have models associated with the upload we can not move it + warn('Deleting upload due to model not found.') + + destroy_legacy_upload + elsif note.is_a?(LegacyDiffNote) + return unless move_legacy_diff_file + + migrate_upload + elsif !legacy_file_exists? + warn('Deleting upload due to file not found.') + destroy_legacy_upload + else + migrate_upload + end + end + + private + + def migrate_upload + return unless copy_upload_to_project + + add_upload_link_to_note_text + destroy_legacy_file + destroy_legacy_upload + end + + # we should proceed and log whenever one upload copy fails, no matter the reasons + # rubocop: disable Lint/RescueException + def copy_upload_to_project + @uploader = FileUploader.copy_to(legacy_file_uploader, project) + + logger.info( + message: 'MigrateLegacyUploads: File copied successfully', + old_path: legacy_file_uploader.file.path, new_path: @uploader.file.path + ) + true + rescue Exception => e + warn( + 'File could not be copied to project uploads', + file_path: legacy_file_uploader.file.path, error: e.message + ) + false + end + # rubocop: enable Lint/RescueException + + def destroy_legacy_upload + if note + note.remove_attachment = true + note.save + end + + if upload.destroy + logger.info(message: 'MigrateLegacyUploads: Upload was destroyed.', upload: upload.inspect) + else + warn('MigrateLegacyUploads: Upload destroy failed.') + end + end + + def destroy_legacy_file + legacy_file_uploader.file.delete + end + + def add_upload_link_to_note_text + new_text = "#{note.note} \n #{@uploader.markdown_link}" + # Bypass validations because old data may have invalid + # noteable values. If we fail hard here, we may kill the + # entire background migration, which affects a range of notes. + note.update_attribute(:note, new_text) + end + + def legacy_file_uploader + strong_memoize(:legacy_file_uploader) do + uploader = upload.build_uploader + uploader.retrieve_from_store!(File.basename(upload.path)) + uploader + end + end + + def legacy_file_exists? + legacy_file_uploader.file.exists? + end + + # we should proceed and log whenever one upload copy fails, no matter the reasons + # rubocop: disable Lint/RescueException + def move_legacy_diff_file + old_path = upload.absolute_path + old_path_sub = '-/system/note/attachment' + + if !File.exist?(old_path) || !old_path.include?(old_path_sub) + log_legacy_diff_note_problem(old_path) + return false + end + + new_path = upload.absolute_path.sub(old_path_sub, '-/system/legacy_diff_note/attachment') + new_dir = File.dirname(new_path) + FileUtils.mkdir_p(new_dir) + + FileUtils.mv(old_path, new_path) + rescue Exception => e + log_legacy_diff_note_problem(old_path, new_path, e) + false + end + + def warn(message, params = {}) + logger.warn( + params.merge(message: "MigrateLegacyUploads: #{message}", upload: upload.inspect) + ) + end + + def log_legacy_diff_note_problem(old_path, new_path = nil, error = nil) + warn('LegacyDiffNote upload could not be moved to a new path', + old_path: old_path, new_path: new_path, error: error&.message + ) + end + # rubocop: enable Lint/RescueException + end + end +end diff --git a/lib/gitlab/background_migration/legacy_uploads_migrator.rb b/lib/gitlab/background_migration/legacy_uploads_migrator.rb new file mode 100644 index 00000000000..a9d38a27e0c --- /dev/null +++ b/lib/gitlab/background_migration/legacy_uploads_migrator.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # This migration takes all legacy uploads (that were uploaded using AttachmentUploader) + # and migrate them to the new (FileUploader) location (=under projects). + # + # We have dependencies (uploaders) in this migration because extracting code would add a lot of complexity + # and possible errors could appear as the logic in the uploaders is not trivial. + # + # This migration will be removed in 13.0 in order to get rid of a migration that depends on + # the application code. + class LegacyUploadsMigrator + include Database::MigrationHelpers + + def perform(start_id, end_id) + Upload.where(id: start_id..end_id, uploader: 'AttachmentUploader').find_each do |upload| + LegacyUploadMover.new(upload).execute + end + end + end + end +end diff --git a/lib/gitlab/background_migration/logger.rb b/lib/gitlab/background_migration/logger.rb new file mode 100644 index 00000000000..4ea89771eff --- /dev/null +++ b/lib/gitlab/background_migration/logger.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Logger that can be used for migrations logging + class Logger < ::Gitlab::JsonLogger + def self.file_name_noext + 'migrations' + end + end + end +end diff --git a/lib/tasks/gitlab/uploads/legacy.rake b/lib/tasks/gitlab/uploads/legacy.rake new file mode 100644 index 00000000000..18fb8afe455 --- /dev/null +++ b/lib/tasks/gitlab/uploads/legacy.rake @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +namespace :gitlab do + namespace :uploads do + namespace :legacy do + desc "GitLab | Uploads | Migrate all legacy attachments" + task migrate: :environment do + class Upload < ApplicationRecord + self.table_name = 'uploads' + + include ::EachBatch + end + + migration = 'LegacyUploadsMigrator'.freeze + batch_size = 5000 + delay_interval = 5.minutes.to_i + + Upload.where(uploader: 'AttachmentUploader').each_batch(of: batch_size) do |relation, index| + start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + delay = index * delay_interval + + BackgroundMigrationWorker.perform_in(delay, migration, [start_id, end_id]) + end + end + end + end +end diff --git a/spec/factories/uploads.rb b/spec/factories/uploads.rb index 2ede92c8af0..3f6326114c9 100644 --- a/spec/factories/uploads.rb +++ b/spec/factories/uploads.rb @@ -56,10 +56,7 @@ FactoryBot.define do end trait :attachment_upload do - transient do - mount_point :attachment - end - + mount_point :attachment model { build(:note) } uploader "AttachmentUploader" end diff --git a/spec/lib/gitlab/background_migration/legacy_upload_mover_spec.rb b/spec/lib/gitlab/background_migration/legacy_upload_mover_spec.rb new file mode 100644 index 00000000000..7d67dc0251d --- /dev/null +++ b/spec/lib/gitlab/background_migration/legacy_upload_mover_spec.rb @@ -0,0 +1,296 @@ +# frozen_string_literal: true +require 'spec_helper' + +# rubocop: disable RSpec/FactoriesInMigrationSpecs +describe Gitlab::BackgroundMigration::LegacyUploadMover do + let(:test_dir) { FileUploader.options['storage_path'] } + let(:filename) { 'image.png' } + + let!(:namespace) { create(:namespace) } + let!(:legacy_project) { create(:project, :legacy_storage, namespace: namespace) } + let!(:hashed_project) { create(:project, namespace: namespace) } + # default project + let(:project) { legacy_project } + + let!(:issue) { create(:issue, project: project) } + let!(:note) { create(:note, note: 'some note', project: project, noteable: issue) } + + let(:legacy_upload) { create_upload(note, filename) } + + def create_remote_upload(model, filename) + create(:upload, :attachment_upload, + path: "note/attachment/#{model.id}/#{filename}", secret: nil, + store: ObjectStorage::Store::REMOTE, model: model) + end + + def create_upload(model, filename, with_file = true) + params = { + path: "uploads/-/system/note/attachment/#{model.id}/#{filename}", + model: model, + store: ObjectStorage::Store::LOCAL + } + + if with_file + upload = create(:upload, :with_file, :attachment_upload, params) + model.update(attachment: upload.build_uploader) + model.attachment.upload + else + create(:upload, :attachment_upload, params) + end + end + + def new_upload + Upload.find_by(model_id: project.id, model_type: 'Project') + end + + def expect_error_log + expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger| + expect(logger).to receive(:warn) + end + end + + shared_examples 'legacy upload deletion' do + it 'removes the upload record' do + described_class.new(legacy_upload).execute + + expect { legacy_upload.reload }.to raise_error(ActiveRecord::RecordNotFound) + end + end + + shared_examples 'move error' do + it 'does not remove the upload file' do + expect_error_log + + described_class.new(legacy_upload).execute + + expect(legacy_upload.reload).to eq(legacy_upload) + end + end + + shared_examples 'migrates the file correctly' do + before do + described_class.new(legacy_upload).execute + end + + it 'creates a new uplaod record correctly' do + expect(new_upload.secret).not_to be_nil + expect(new_upload.path).to end_with("#{new_upload.secret}/image.png") + expect(new_upload.model_id).to eq(project.id) + expect(new_upload.model_type).to eq('Project') + expect(new_upload.uploader).to eq('FileUploader') + end + + it 'updates the legacy upload note so that it references the file in the markdown' do + expected_path = File.join('/uploads', new_upload.secret, 'image.png') + expected_markdown = "some note \n ![image](#{expected_path})" + expect(note.reload.note).to eq(expected_markdown) + end + + it 'removes the attachment from the note model' do + expect(note.reload.attachment.file).to be_nil + end + end + + context 'when no model found for the upload' do + before do + legacy_upload.model = nil + expect_error_log + end + + it_behaves_like 'legacy upload deletion' + end + + context 'when the upload move fails' do + before do + expect(FileUploader).to receive(:copy_to).and_raise('failed') + end + + it_behaves_like 'move error' + end + + context 'when the upload is in local storage' do + shared_examples 'legacy local file' do + it 'removes the file correctly' do + expect(File.exist?(legacy_upload.absolute_path)).to be_truthy + + described_class.new(legacy_upload).execute + + expect(File.exist?(legacy_upload.absolute_path)).to be_falsey + end + + it 'moves legacy uploads to the correct location' do + described_class.new(legacy_upload).execute + + expected_path = File.join(test_dir, 'uploads', project.disk_path, new_upload.secret, filename) + expect(File.exist?(expected_path)).to be_truthy + end + end + + context 'when the upload file does not exist on the filesystem' do + let(:legacy_upload) { create_upload(note, filename, false) } + + before do + expect_error_log + end + + it_behaves_like 'legacy upload deletion' + end + + context 'when an upload belongs to a legacy_diff_note' do + let!(:merge_request) { create(:merge_request, source_project: project) } + let!(:note) do + create(:legacy_diff_note_on_merge_request, + note: 'some note', project: project, noteable: merge_request) + end + let(:legacy_upload) do + create(:upload, :with_file, :attachment_upload, + path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note) + end + + context 'when the file does not exist for the upload' do + let(:legacy_upload) do + create(:upload, :attachment_upload, + path: "uploads/-/system/note/attachment/#{note.id}/#{filename}", model: note) + end + + it_behaves_like 'move error' + end + + context 'when the file does not exist on expected path' do + let(:legacy_upload) do + create(:upload, :attachment_upload, :with_file, + path: "uploads/-/system/note/attachment/some_part/#{note.id}/#{filename}", model: note) + end + + it_behaves_like 'move error' + end + + context 'when the file path does not include system/note/attachment' do + let(:legacy_upload) do + create(:upload, :attachment_upload, :with_file, + path: "uploads/-/system#{note.id}/#{filename}", model: note) + end + + it_behaves_like 'move error' + end + + context 'when the file move raises an error' do + before do + allow(FileUtils).to receive(:mv).and_raise(Errno::EACCES) + end + + it_behaves_like 'move error' + end + + context 'when the file can be handled correctly' do + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy local file' + it_behaves_like 'legacy upload deletion' + end + end + + context 'when object storage is disabled for FileUploader' do + context 'when the file belongs to a legacy project' do + let(:project) { legacy_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy local file' + it_behaves_like 'legacy upload deletion' + end + + context 'when the file belongs to a hashed project' do + let(:project) { hashed_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy local file' + it_behaves_like 'legacy upload deletion' + end + end + + context 'when object storage is enabled for FileUploader' do + # The process of migrating to object storage is a manual one, + # so it would go against expectations to automatically migrate these files + # to object storage during this migration. + # After this migration, these files should be able to successfully migrate to object storage. + + before do + stub_uploads_object_storage(FileUploader) + end + + context 'when the file belongs to a legacy project' do + let(:project) { legacy_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy local file' + it_behaves_like 'legacy upload deletion' + end + + context 'when the file belongs to a hashed project' do + let(:project) { hashed_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy local file' + it_behaves_like 'legacy upload deletion' + end + end + end + + context 'when legacy uploads are stored in object storage' do + let(:legacy_upload) { create_remote_upload(note, filename) } + let(:remote_file) do + { key: "#{legacy_upload.path}" } + end + let(:connection) { ::Fog::Storage.new(FileUploader.object_store_credentials) } + let(:bucket) { connection.directories.create(key: 'uploads') } + + before do + stub_uploads_object_storage(FileUploader) + end + + shared_examples 'legacy remote file' do + it 'removes the file correctly' do + # expect(bucket.files.get(remote_file[:key])).to be_nil + + described_class.new(legacy_upload).execute + + expect(bucket.files.get(remote_file[:key])).to be_nil + end + + it 'moves legacy uploads to the correct remote location' do + described_class.new(legacy_upload).execute + + connection = ::Fog::Storage.new(FileUploader.object_store_credentials) + expect(connection.get_object('uploads', new_upload.path)[:status]).to eq(200) + end + end + + context 'when the upload file does not exist on the filesystem' do + it_behaves_like 'legacy upload deletion' + end + + context 'when the file belongs to a legacy project' do + before do + bucket.files.create(remote_file) + end + + let(:project) { legacy_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy remote file' + it_behaves_like 'legacy upload deletion' + end + + context 'when the file belongs to a hashed project' do + before do + bucket.files.create(remote_file) + end + + let(:project) { hashed_project } + + it_behaves_like 'migrates the file correctly' + it_behaves_like 'legacy remote file' + it_behaves_like 'legacy upload deletion' + end + end +end +# rubocop: enable RSpec/FactoriesInMigrationSpecs diff --git a/spec/lib/gitlab/background_migration/legacy_uploads_migrator_spec.rb b/spec/lib/gitlab/background_migration/legacy_uploads_migrator_spec.rb new file mode 100644 index 00000000000..ed8cbfeb11f --- /dev/null +++ b/spec/lib/gitlab/background_migration/legacy_uploads_migrator_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true +require 'spec_helper' + +# rubocop: disable RSpec/FactoriesInMigrationSpecs +describe Gitlab::BackgroundMigration::LegacyUploadsMigrator do + let(:test_dir) { FileUploader.options['storage_path'] } + + let!(:hashed_project) { create(:project) } + let!(:legacy_project) { create(:project, :legacy_storage) } + let!(:issue) { create(:issue, project: hashed_project) } + let!(:issue_legacy) { create(:issue, project: legacy_project) } + + let!(:note1) { create(:note, project: hashed_project, noteable: issue) } + let!(:note2) { create(:note, project: hashed_project, noteable: issue) } + let!(:note_legacy) { create(:note, project: legacy_project, noteable: issue_legacy) } + + def create_upload(model, with_file = true) + filename = 'image.png' + params = { + path: "uploads/-/system/note/attachment/#{model.id}/#{filename}", + model: model, + store: ObjectStorage::Store::LOCAL + } + + if with_file + upload = create(:upload, :with_file, :attachment_upload, params) + model.update(attachment: upload.build_uploader) + model.attachment.upload + else + create(:upload, :attachment_upload, params) + end + end + + let!(:legacy_upload) { create_upload(note1) } + let!(:legacy_upload_no_file) { create_upload(note2, false) } + let!(:legacy_upload_legacy_project) { create_upload(note_legacy) } + + let(:start_id) { 1 } + let(:end_id) { 10000 } + + subject { described_class.new.perform(start_id, end_id) } + + it 'removes all legacy files' do + expect(File.exist?(legacy_upload.absolute_path)).to be_truthy + expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey + expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_truthy + + subject + + expect(File.exist?(legacy_upload.absolute_path)).to be_falsey + expect(File.exist?(legacy_upload_no_file.absolute_path)).to be_falsey + expect(File.exist?(legacy_upload_legacy_project.absolute_path)).to be_falsey + end + + it 'removes all AttachmentUploader records' do + expect { subject }.to change { Upload.where(uploader: 'AttachmentUploader').count }.from(3).to(0) + end + + it 'creates new uploads for successfully migrated records' do + expect { subject }.to change { Upload.where(uploader: 'FileUploader').count }.from(0).to(2) + end +end +# rubocop: enable RSpec/FactoriesInMigrationSpecs