2017-11-07 15:53:24 -05:00
|
|
|
module Gitlab
|
|
|
|
module BackgroundMigration
|
|
|
|
class PopulateUntrackedUploads
|
2017-11-09 20:17:56 -05:00
|
|
|
class UntrackedFile < ActiveRecord::Base
|
|
|
|
self.table_name = 'untracked_files_for_uploads'
|
2017-11-07 15:53:24 -05:00
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
# Ends with /:random_hex/:filename
|
2017-11-08 15:44:49 -05:00
|
|
|
FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z}
|
|
|
|
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}}
|
2017-11-07 22:08:02 -05:00
|
|
|
|
|
|
|
# These regex patterns are tested against a relative path, relative to
|
|
|
|
# the upload directory.
|
|
|
|
# For convenience, if there exists a capture group in the pattern, then
|
|
|
|
# it indicates the model_id.
|
|
|
|
PATH_PATTERNS = [
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/appearance/logo/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Appearance'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Appearance'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/note/attachment/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AttachmentUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Note'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/user/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'User'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/group/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Namespace'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
2017-11-08 15:44:49 -05:00
|
|
|
pattern: %r{\A-/system/project/avatar/(\d+)/},
|
2017-11-07 22:08:02 -05:00
|
|
|
uploader: 'AvatarUploader',
|
2017-11-07 23:54:54 -05:00
|
|
|
model_type: 'Project'
|
2017-11-07 22:08:02 -05:00
|
|
|
},
|
|
|
|
{
|
|
|
|
pattern: FILE_UPLOADER_PATH_PATTERN,
|
|
|
|
uploader: 'FileUploader',
|
|
|
|
model_type: 'Project'
|
2017-11-07 23:54:54 -05:00
|
|
|
}
|
|
|
|
].freeze
|
2017-11-07 22:08:02 -05:00
|
|
|
|
2017-11-07 15:53:24 -05:00
|
|
|
scope :untracked, -> { where(tracked: false) }
|
|
|
|
|
|
|
|
def ensure_tracked!
|
2017-11-07 22:08:02 -05:00
|
|
|
return if persisted? && tracked?
|
|
|
|
|
2017-11-08 18:05:08 -05:00
|
|
|
add_to_uploads unless in_uploads?
|
2017-11-07 22:08:02 -05:00
|
|
|
|
|
|
|
mark_as_tracked
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
def in_uploads?
|
|
|
|
# Even though we are checking relative paths, path is enough to
|
|
|
|
# uniquely identify uploads. There is no ambiguity between
|
|
|
|
# FileUploader paths and other Uploader paths because we use the /-/
|
|
|
|
# separator kind of like an escape character. Project full_path will
|
|
|
|
# never conflict with an upload path starting with "uploads/-/".
|
|
|
|
Upload.exists?(path: upload_path)
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
def add_to_uploads
|
|
|
|
Upload.create!(
|
|
|
|
path: upload_path,
|
|
|
|
uploader: uploader,
|
|
|
|
model_type: model_type,
|
|
|
|
model_id: model_id,
|
|
|
|
size: file_size
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def mark_as_tracked
|
2017-11-08 18:05:08 -05:00
|
|
|
update!(tracked: true)
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def upload_path
|
2017-11-09 20:17:56 -05:00
|
|
|
# UntrackedFile#path is absolute, but Upload#path depends on uploader
|
2017-11-07 22:08:02 -05:00
|
|
|
if uploader == 'FileUploader'
|
|
|
|
# Path relative to project directory in uploads
|
|
|
|
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
|
2017-11-08 15:44:49 -05:00
|
|
|
matchd[0].sub(%r{\A/}, '') # remove leading slash
|
2017-11-07 22:08:02 -05:00
|
|
|
else
|
2017-11-14 19:11:53 -05:00
|
|
|
path
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def uploader
|
2017-11-07 23:15:28 -05:00
|
|
|
matching_pattern_map[:uploader]
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def model_type
|
2017-11-07 23:15:28 -05:00
|
|
|
matching_pattern_map[:model_type]
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def model_id
|
2017-11-07 23:15:28 -05:00
|
|
|
matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern])
|
2017-11-07 22:08:02 -05:00
|
|
|
|
2017-11-07 23:15:28 -05:00
|
|
|
# If something is captured (matchd[1] is not nil), it is a model_id
|
|
|
|
return matchd[1] if matchd[1]
|
2017-11-07 22:08:02 -05:00
|
|
|
|
|
|
|
# Only the FileUploader pattern will not match an ID
|
|
|
|
file_uploader_model_id
|
|
|
|
end
|
|
|
|
|
|
|
|
def file_size
|
2017-11-14 19:11:53 -05:00
|
|
|
absolute_path = File.join(CarrierWave.root, path)
|
|
|
|
File.size(absolute_path)
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
# Not including a leading slash
|
|
|
|
def path_relative_to_upload_dir
|
2017-11-14 19:11:53 -05:00
|
|
|
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/}
|
2017-11-08 18:05:08 -05:00
|
|
|
@path_relative_to_upload_dir ||= path.sub(base, '')
|
2017-11-07 22:08:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2017-11-07 23:15:28 -05:00
|
|
|
def matching_pattern_map
|
|
|
|
@matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
|
|
|
|
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
|
|
|
|
end
|
|
|
|
|
|
|
|
raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map
|
|
|
|
|
|
|
|
@matching_pattern_map
|
|
|
|
end
|
|
|
|
|
2017-11-07 22:08:02 -05:00
|
|
|
def file_uploader_model_id
|
2017-11-08 15:44:49 -05:00
|
|
|
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN)
|
2017-11-07 22:08:02 -05:00
|
|
|
raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd
|
|
|
|
full_path = matchd[1]
|
|
|
|
project = Project.find_by_full_path(full_path)
|
|
|
|
project.id.to_s
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-11-07 22:54:28 -05:00
|
|
|
# Copy-pasted class for less fragile migration
|
2017-11-07 15:53:24 -05:00
|
|
|
class Upload < ActiveRecord::Base
|
2017-11-07 22:54:28 -05:00
|
|
|
self.table_name = 'uploads' # This is the only line different from copy-paste
|
|
|
|
|
|
|
|
# Upper limit for foreground checksum processing
|
|
|
|
CHECKSUM_THRESHOLD = 100.megabytes
|
|
|
|
|
|
|
|
belongs_to :model, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
|
|
|
|
|
|
|
|
before_save :calculate_checksum, if: :foreground_checksum?
|
|
|
|
after_commit :schedule_checksum, unless: :foreground_checksum?
|
|
|
|
|
|
|
|
def absolute_path
|
|
|
|
return path unless relative_path?
|
|
|
|
|
|
|
|
uploader_class.absolute_path(self)
|
|
|
|
end
|
|
|
|
|
|
|
|
def calculate_checksum
|
|
|
|
return unless exist?
|
|
|
|
|
|
|
|
self.checksum = Digest::SHA256.file(absolute_path).hexdigest
|
2017-11-15 01:49:10 -05:00
|
|
|
rescue StandardError
|
|
|
|
schedule_checksum
|
2017-11-07 22:54:28 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def exist?
|
|
|
|
File.exist?(absolute_path)
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def foreground_checksum?
|
|
|
|
size <= CHECKSUM_THRESHOLD
|
|
|
|
end
|
|
|
|
|
|
|
|
def schedule_checksum
|
|
|
|
UploadChecksumWorker.perform_async(id)
|
|
|
|
end
|
|
|
|
|
|
|
|
def relative_path?
|
|
|
|
!path.start_with?('/')
|
|
|
|
end
|
|
|
|
|
|
|
|
def uploader_class
|
|
|
|
Object.const_get(uploader)
|
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def perform(start_id, end_id)
|
|
|
|
return unless migrate?
|
|
|
|
|
2017-11-09 20:17:56 -05:00
|
|
|
files = UntrackedFile.untracked.where(id: start_id..end_id)
|
|
|
|
files.each do |untracked_file|
|
2017-11-07 23:15:28 -05:00
|
|
|
begin
|
2017-11-09 20:17:56 -05:00
|
|
|
untracked_file.ensure_tracked!
|
2017-11-07 23:15:28 -05:00
|
|
|
rescue StandardError => e
|
|
|
|
Rails.logger.warn "Failed to add untracked file to uploads: #{e.message}"
|
|
|
|
|
|
|
|
# The untracked rows will remain in the DB. We will be able to see
|
|
|
|
# which ones failed to become tracked, and then we can decide what
|
|
|
|
# to do.
|
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
2017-11-15 05:36:25 -05:00
|
|
|
|
|
|
|
drop_temp_table_if_finished
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def migrate?
|
2017-11-09 20:17:56 -05:00
|
|
|
UntrackedFile.table_exists? && Upload.table_exists?
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
2017-11-15 05:36:25 -05:00
|
|
|
|
|
|
|
def drop_temp_table_if_finished
|
|
|
|
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.untracked.empty?
|
|
|
|
end
|
2017-11-07 15:53:24 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|