Bulk insert uploads

This commit is contained in:
Michael Kozono 2017-11-23 23:49:16 -08:00
parent 908aacddda
commit a9c868d111

View file

@ -56,7 +56,8 @@ module Gitlab
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size
size: file_size,
checksum: checksum
}
end
@ -90,10 +91,13 @@ module Gitlab
end
def file_size
absolute_path = File.join(CarrierWave.root, path)
File.size(absolute_path)
end
def checksum
Digest::SHA256.file(absolute_path).hexdigest
end
# Not including a leading slash
def path_relative_to_upload_dir
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/}
@ -120,55 +124,14 @@ module Gitlab
project = Project.find_by_full_path(full_path)
project.id.to_s
end
end
# Copy-pasted class for less fragile migration
class Upload < ActiveRecord::Base
self.table_name = 'uploads' # This is the only line different from copy-paste
# Upper limit for foreground checksum processing
CHECKSUM_THRESHOLD = 100.megabytes
belongs_to :model, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
before_save :calculate_checksum, if: :foreground_checksum?
after_commit :schedule_checksum, unless: :foreground_checksum?
def absolute_path
return path unless relative_path?
uploader_class.absolute_path(self)
File.join(CarrierWave.root, path)
end
end
def calculate_checksum
return unless exist?
self.checksum = Digest::SHA256.file(absolute_path).hexdigest
rescue StandardError
schedule_checksum
end
def exist?
File.exist?(absolute_path)
end
private
def foreground_checksum?
size <= CHECKSUM_THRESHOLD
end
def schedule_checksum
UploadChecksumWorker.perform_async(id)
end
def relative_path?
!path.start_with?('/')
end
def uploader_class
Object.const_get(uploader)
end
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
def perform(start_id, end_id)
@ -207,9 +170,11 @@ module Gitlab
end
def insert(files)
files.each do |file|
Upload.create!(file.to_h)
rows = files.map do |file|
file.to_h.merge(created_at: 'NOW()')
end
Gitlab::Database.bulk_insert('uploads', rows)
end
def drop_temp_table_if_finished