gitlab-org--gitlab-foss/lib/gitlab/background_migration/populate_untracked_uploads.rb

112 lines
3.6 KiB
Ruby
Raw Normal View History

2017-11-27 01:57:21 -05:00
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
2017-11-27 01:57:21 -05:00
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
2018-07-02 06:43:06 -04:00
class PopulateUntrackedUploads
def perform(start_id, end_id)
return unless migrate?
files = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.where(id: start_id..end_id)
processed_files = insert_uploads_if_needed(files)
processed_files.delete_all
drop_temp_table_if_finished
end
private
def migrate?
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.table_exists? &&
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.table_exists?
end
2017-11-24 02:12:24 -05:00
def insert_uploads_if_needed(files)
filtered_files, error_files = filter_error_files(files)
filtered_files = filter_existing_uploads(filtered_files)
2017-11-24 02:12:24 -05:00
filtered_files = filter_deleted_models(filtered_files)
insert(filtered_files)
processed_files = files.where.not(id: error_files.map(&:id))
processed_files
end
def filter_error_files(files)
files.partition do |file|
2019-03-13 09:42:43 -04:00
file.to_h
true
rescue => e
msg = <<~MSG
Error parsing path "#{file.path}":
#{e.message}
#{e.backtrace.join("\n ")}
MSG
Rails.logger.error(msg) # rubocop:disable Gitlab/RailsLogger
2019-03-13 09:42:43 -04:00
false
end
2017-11-24 02:12:24 -05:00
end
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
existing_paths = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.where(path: paths).pluck(:path).to_set
2017-11-24 02:12:24 -05:00
files.reject do |file|
existing_paths.include?(file.upload_path)
end
end
2017-11-24 03:49:04 -05:00
# There are files on disk that are not in the uploads table because their
# model was deleted, and we don't delete the files on disk.
2017-11-24 02:12:24 -05:00
def filter_deleted_models(files)
2017-11-24 03:49:04 -05:00
ids = deleted_model_ids(files)
files.reject do |file|
ids[file.model_type].include?(file.model_id)
end
end
def deleted_model_ids(files)
ids = {
'Appearance' => [],
'Namespace' => [],
'Note' => [],
'Project' => [],
'User' => []
}
# group model IDs by model type
files.each do |file|
ids[file.model_type] << file.model_id
end
ids.each do |model_type, model_ids|
2018-02-21 13:04:42 -05:00
model_class = "Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::#{model_type}".constantize
2017-11-27 01:57:21 -05:00
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
2017-11-24 03:49:04 -05:00
end
ids
2017-11-24 02:12:24 -05:00
end
def insert(files)
2017-11-24 02:49:16 -05:00
rows = files.map do |file|
file.to_h.merge(created_at: 'NOW()')
2017-11-24 02:12:24 -05:00
end
2017-11-24 02:49:16 -05:00
Gitlab::Database.bulk_insert('uploads', # rubocop:disable Gitlab/BulkInsert
2017-11-27 01:57:21 -05:00
rows,
disable_quote: :created_at)
2017-11-24 02:12:24 -05:00
end
def drop_temp_table_if_finished
if Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
2017-11-27 12:33:13 -05:00
if_exists: true)
2017-11-27 01:57:21 -05:00
end
end
end
end
end