gitlab-org--gitlab-foss/lib/gitlab/background_migration/populate_untracked_uploads.rb

186 lines
5.5 KiB
Ruby
Raw Normal View History

module Gitlab
module BackgroundMigration
class PopulateUntrackedUploads
class UntrackedFile < ActiveRecord::Base
self.table_name = 'untracked_files_for_uploads'
2017-11-07 22:08:02 -05:00
# Ends with /:random_hex/:filename
2017-11-08 15:44:49 -05:00
FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z}
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}}
2017-11-07 22:08:02 -05:00
# These regex patterns are tested against a relative path, relative to
# the upload directory.
# For convenience, if there exists a capture group in the pattern, then
# it indicates the model_id.
PATH_PATTERNS = [
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/appearance/logo/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AttachmentUploader',
2017-11-07 23:54:54 -05:00
model_type: 'Appearance'
2017-11-07 22:08:02 -05:00
},
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AttachmentUploader',
2017-11-07 23:54:54 -05:00
model_type: 'Appearance'
2017-11-07 22:08:02 -05:00
},
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/note/attachment/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AttachmentUploader',
2017-11-07 23:54:54 -05:00
model_type: 'Note'
2017-11-07 22:08:02 -05:00
},
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/user/avatar/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AvatarUploader',
2017-11-07 23:54:54 -05:00
model_type: 'User'
2017-11-07 22:08:02 -05:00
},
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/group/avatar/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AvatarUploader',
2017-11-07 23:54:54 -05:00
model_type: 'Namespace'
2017-11-07 22:08:02 -05:00
},
{
2017-11-08 15:44:49 -05:00
pattern: %r{\A-/system/project/avatar/(\d+)/},
2017-11-07 22:08:02 -05:00
uploader: 'AvatarUploader',
2017-11-07 23:54:54 -05:00
model_type: 'Project'
2017-11-07 22:08:02 -05:00
},
{
pattern: FILE_UPLOADER_PATH_PATTERN,
uploader: 'FileUploader',
model_type: 'Project'
2017-11-07 23:54:54 -05:00
}
].freeze
2017-11-07 22:08:02 -05:00
2017-11-24 02:12:24 -05:00
def to_h
{
path: upload_path,
uploader: uploader,
model_type: model_type,
model_id: model_id,
2017-11-24 02:49:16 -05:00
size: file_size,
checksum: checksum
2017-11-24 02:12:24 -05:00
}
2017-11-07 22:08:02 -05:00
end
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
2017-11-24 02:12:24 -05:00
@upload_path ||= if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
def uploader
2017-11-07 23:15:28 -05:00
matching_pattern_map[:uploader]
2017-11-07 22:08:02 -05:00
end
def model_type
2017-11-07 23:15:28 -05:00
matching_pattern_map[:model_type]
2017-11-07 22:08:02 -05:00
end
def model_id
2017-11-07 23:15:28 -05:00
matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern])
2017-11-07 22:08:02 -05:00
2017-11-07 23:15:28 -05:00
# If something is captured (matchd[1] is not nil), it is a model_id
return matchd[1] if matchd[1]
2017-11-07 22:08:02 -05:00
# Only the FileUploader pattern will not match an ID
file_uploader_model_id
end
def file_size
File.size(absolute_path)
2017-11-07 22:08:02 -05:00
end
2017-11-24 02:49:16 -05:00
def checksum
Digest::SHA256.file(absolute_path).hexdigest
end
2017-11-07 22:08:02 -05:00
# Not including a leading slash
def path_relative_to_upload_dir
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/}
2017-11-08 18:05:08 -05:00
@path_relative_to_upload_dir ||= path.sub(base, '')
2017-11-07 22:08:02 -05:00
end
private
2017-11-07 23:15:28 -05:00
def matching_pattern_map
@matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end
raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map
@matching_pattern_map
end
2017-11-07 22:08:02 -05:00
def file_uploader_model_id
2017-11-08 15:44:49 -05:00
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN)
2017-11-07 22:08:02 -05:00
raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd
2017-11-21 19:05:33 -05:00
2017-11-07 22:08:02 -05:00
full_path = matchd[1]
project = Project.find_by_full_path(full_path)
project.id.to_s
end
def absolute_path
2017-11-24 02:49:16 -05:00
File.join(CarrierWave.root, path)
end
2017-11-24 02:49:16 -05:00
end
2017-11-24 02:49:16 -05:00
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
def perform(start_id, end_id)
return unless migrate?
files = UntrackedFile.where(id: start_id..end_id)
2017-11-24 02:12:24 -05:00
insert_uploads_if_needed(files)
files.delete_all
drop_temp_table_if_finished
end
private
def migrate?
UntrackedFile.table_exists? && Upload.table_exists?
end
2017-11-24 02:12:24 -05:00
def insert_uploads_if_needed(files)
filtered_files = filter_existing_uploads(files)
filtered_files = filter_deleted_models(filtered_files)
insert(filtered_files)
end
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
existing_paths = Upload.where(path: paths).pluck(:path).to_set
files.reject do |file|
existing_paths.include?(file.upload_path)
end
end
def filter_deleted_models(files)
files # TODO
end
def insert(files)
2017-11-24 02:49:16 -05:00
rows = files.map do |file|
file.to_h.merge(created_at: 'NOW()')
2017-11-24 02:12:24 -05:00
end
2017-11-24 02:49:16 -05:00
Gitlab::Database.bulk_insert('uploads', rows)
2017-11-24 02:12:24 -05:00
end
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
end
end
end
end