2017-11-27 01:57:21 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-11-07 15:53:24 -05:00
|
|
|
module Gitlab
|
|
|
|
module BackgroundMigration
|
2017-11-27 01:57:21 -05:00
|
|
|
# This class processes a batch of rows in `untracked_files_for_uploads` by
|
|
|
|
# adding each file to the `uploads` table if it does not exist.
|
|
|
|
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
|
|
|
|
# This class is responsible for producing the attributes necessary to
|
|
|
|
# track an uploaded file in the `uploads` table.
|
|
|
|
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
|
2017-11-09 20:17:56 -05:00
|
|
|
self.table_name = 'untracked_files_for_uploads'
|
2017-11-07 15:53:24 -05:00
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
# Ends with /:random_hex/:filename
|
2017-11-27 01:57:21 -05:00
|
|
|
FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
|
|
|
|
FULL_PATH_CAPTURE = %r{\A(.+)#{FILE_UPLOADER_PATH}}
|
2017-11-07 22:08:02 -05:00
|
|
|
|
|
|
|
# These regex patterns are tested against a relative path, relative to
|
|
|
|
# the upload directory.
|
|
|
|
# For convenience, if there exists a capture group in the pattern, then
|
|
|
|
# it indicates the model_id.
|
|
|
|
PATH_PATTERNS = [
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/appearance/logo/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Appearance'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Appearance'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/note/attachment/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Note'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/user/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'User'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/group/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Namespace'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/project/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Project'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-27 01:57:21 -05:00
|
|
|
pattern: FILE_UPLOADER_PATH,
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'FileUploader',
|
|
|
|
model_type: 'Project'
|
2017-11-07 23:54:54 -05:00
|
|
|
}
|
|
|
|
].freeze
|
2017-11-07 22:08:02 -05:00
|
|
|
|
2017-11-24 02:12:24 -05:00
|
|
|
def to_h
|
2017-12-05 15:26:20 -05:00
|
|
|
@upload_hash ||= {
|
2017-11-24 02:12:24 -05:00
|
|
|
path: upload_path,
|
|
|
|
uploader: uploader,
|
|
|
|
model_type: model_type,
|
|
|
|
model_id: model_id,
|
2017-11-24 02:49:16 -05:00
|
|
|
size: file_size,
|
|
|
|
checksum: checksum
|
2017-11-24 02:12:24 -05:00
|
|
|
}
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def upload_path
|
2017-11-09 20:17:56 -05:00
|
|
|
# UntrackedFile#path is absolute, but Upload#path depends on uploader
|
2017-11-27 01:57:21 -05:00
|
|
|
@upload_path ||=
|
|
|
|
if uploader == 'FileUploader'
|
|
|
|
# Path relative to project directory in uploads
|
|
|
|
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
|
|
|
|
matchd[0].sub(%r{\A/}, '') # remove leading slash
|
|
|
|
else
|
|
|
|
path
|
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def uploader
|
2017-11-07 23:15:28 -05:00
|
|
|
matching_pattern_map[:uploader]
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def model_type
|
2017-11-07 23:15:28 -05:00
|
|
|
matching_pattern_map[:model_type]
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def model_id
|
2017-11-24 03:49:04 -05:00
|
|
|
return @model_id if defined?(@model_id)
|
|
|
|
|
2017-11-27 01:57:21 -05:00
|
|
|
pattern = matching_pattern_map[:pattern]
|
|
|
|
matchd = path_relative_to_upload_dir.match(pattern)
|
2017-11-07 22:08:02 -05:00
|
|
|
|
2017-11-07 23:15:28 -05:00
|
|
|
# If something is captured (matchd[1] is not nil), it is a model_id
|
2017-11-07 22:08:02 -05:00
|
|
|
# Only the FileUploader pattern will not match an ID
|
2017-11-24 03:49:04 -05:00
|
|
|
@model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def file_size
|
2017-11-14 19:11:53 -05:00
|
|
|
File.size(absolute_path)
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
2017-11-24 02:49:16 -05:00
|
|
|
def checksum
|
|
|
|
Digest::SHA256.file(absolute_path).hexdigest
|
|
|
|
end
|
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
private
|
|
|
|
|
2017-11-07 23:15:28 -05:00
|
|
|
def matching_pattern_map
|
|
|
|
@matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
|
|
|
|
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
|
|
|
|
end
|
|
|
|
|
2017-11-27 01:57:21 -05:00
|
|
|
unless @matching_pattern_map
|
|
|
|
raise "Unknown upload path pattern \"#{path}\""
|
|
|
|
end
|
2017-11-07 23:15:28 -05:00
|
|
|
|
|
|
|
@matching_pattern_map
|
|
|
|
end
|
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
def file_uploader_model_id
|
2017-11-27 01:57:21 -05:00
|
|
|
matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
|
|
|
|
not_found_msg = <<~MSG
|
|
|
|
Could not capture project full_path from a FileUploader path:
|
|
|
|
"#{path_relative_to_upload_dir}"
|
|
|
|
MSG
|
|
|
|
raise not_found_msg unless matchd
|
2017-11-21 19:05:33 -05:00
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
full_path = matchd[1]
|
|
|
|
project = Project.find_by_full_path(full_path)
|
2017-11-24 03:49:04 -05:00
|
|
|
return nil unless project
|
|
|
|
|
|
|
|
project.id
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
2017-11-07 22:54:28 -05:00
|
|
|
|
2017-11-24 03:52:16 -05:00
|
|
|
# Not including a leading slash
|
|
|
|
def path_relative_to_upload_dir
|
2017-11-27 01:57:21 -05:00
|
|
|
upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
|
|
|
|
base = %r{\A#{Regexp.escape(upload_dir)}/}
|
2017-11-24 03:52:16 -05:00
|
|
|
@path_relative_to_upload_dir ||= path.sub(base, '')
|
|
|
|
end
|
|
|
|
|
2017-11-07 22:54:28 -05:00
|
|
|
def absolute_path
|
2017-11-24 02:49:16 -05:00
|
|
|
File.join(CarrierWave.root, path)
|
2017-11-07 22:54:28 -05:00
|
|
|
end
|
2017-11-24 02:49:16 -05:00
|
|
|
end
|
2017-11-07 22:54:28 -05:00
|
|
|
|
2017-11-27 01:57:21 -05:00
|
|
|
# This class is used to query the `uploads` table.
|
2017-11-24 02:49:16 -05:00
|
|
|
class Upload < ActiveRecord::Base
|
|
|
|
self.table_name = 'uploads'
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def perform(start_id, end_id)
|
|
|
|
return unless migrate?
|
|
|
|
|
2017-11-22 13:44:33 -05:00
|
|
|
files = UntrackedFile.where(id: start_id..end_id)
|
2017-12-05 15:26:20 -05:00
|
|
|
processed_files = insert_uploads_if_needed(files)
|
|
|
|
processed_files.delete_all
|
2017-11-15 05:36:25 -05:00
|
|
|
|
|
|
|
drop_temp_table_if_finished
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def migrate?
|
2017-11-09 20:17:56 -05:00
|
|
|
UntrackedFile.table_exists? && Upload.table_exists?
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
2017-11-15 05:36:25 -05:00
|
|
|
|
2017-11-24 02:12:24 -05:00
|
|
|
def insert_uploads_if_needed(files)
|
2017-12-05 15:26:20 -05:00
|
|
|
filtered_files, error_files = filter_error_files(files)
|
|
|
|
filtered_files = filter_existing_uploads(filtered_files)
|
2017-11-24 02:12:24 -05:00
|
|
|
filtered_files = filter_deleted_models(filtered_files)
|
|
|
|
insert(filtered_files)
|
2017-12-05 15:26:20 -05:00
|
|
|
|
|
|
|
processed_files = files.where.not(id: error_files.map(&:id))
|
|
|
|
processed_files
|
|
|
|
end
|
|
|
|
|
|
|
|
def filter_error_files(files)
|
|
|
|
files.partition do |file|
|
|
|
|
begin
|
|
|
|
file.to_h
|
|
|
|
true
|
|
|
|
rescue => e
|
|
|
|
msg = <<~MSG
|
|
|
|
Error parsing path "#{file.path}":
|
|
|
|
#{e.message}
|
|
|
|
#{e.backtrace.join("\n ")}
|
|
|
|
MSG
|
|
|
|
Rails.logger.error(msg)
|
|
|
|
false
|
|
|
|
end
|
|
|
|
end
|
2017-11-24 02:12:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def filter_existing_uploads(files)
|
|
|
|
paths = files.map(&:upload_path)
|
|
|
|
existing_paths = Upload.where(path: paths).pluck(:path).to_set
|
|
|
|
|
|
|
|
files.reject do |file|
|
|
|
|
existing_paths.include?(file.upload_path)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-11-24 03:49:04 -05:00
|
|
|
# There are files on disk that are not in the uploads table because their
|
|
|
|
# model was deleted, and we don't delete the files on disk.
|
2017-11-24 02:12:24 -05:00
|
|
|
def filter_deleted_models(files)
|
2017-11-24 03:49:04 -05:00
|
|
|
ids = deleted_model_ids(files)
|
|
|
|
|
|
|
|
files.reject do |file|
|
|
|
|
ids[file.model_type].include?(file.model_id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def deleted_model_ids(files)
|
|
|
|
ids = {
|
|
|
|
'Appearance' => [],
|
|
|
|
'Namespace' => [],
|
|
|
|
'Note' => [],
|
|
|
|
'Project' => [],
|
|
|
|
'User' => []
|
|
|
|
}
|
|
|
|
|
|
|
|
# group model IDs by model type
|
|
|
|
files.each do |file|
|
|
|
|
ids[file.model_type] << file.model_id
|
|
|
|
end
|
|
|
|
|
|
|
|
ids.each do |model_type, model_ids|
|
2017-11-27 01:57:21 -05:00
|
|
|
model_class = Object.const_get(model_type)
|
|
|
|
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
|
|
|
|
deleted_ids = ids[model_type] - found_ids
|
|
|
|
ids[model_type] = deleted_ids
|
2017-11-24 03:49:04 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
ids
|
2017-11-24 02:12:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def insert(files)
|
2017-11-24 02:49:16 -05:00
|
|
|
rows = files.map do |file|
|
|
|
|
file.to_h.merge(created_at: 'NOW()')
|
2017-11-24 02:12:24 -05:00
|
|
|
end
|
2017-11-24 02:49:16 -05:00
|
|
|
|
2017-11-27 01:57:21 -05:00
|
|
|
Gitlab::Database.bulk_insert('uploads',
|
|
|
|
rows,
|
|
|
|
disable_quote: :created_at)
|
2017-11-24 02:12:24 -05:00
|
|
|
end
|
|
|
|
|
2017-11-15 05:36:25 -05:00
|
|
|
def drop_temp_table_if_finished
|
2017-11-27 01:57:21 -05:00
|
|
|
if UntrackedFile.all.empty?
|
2017-11-27 12:33:13 -05:00
|
|
|
UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
|
|
|
|
if_exists: true)
|
2017-11-27 01:57:21 -05:00
|
|
|
end
|
2017-11-15 05:36:25 -05:00
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|