gitlab-org--gitlab-foss/app/models/upload.rb

182 lines
4.9 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
class Upload < ApplicationRecord
include Checksummable
# Upper limit for foreground checksum processing
CHECKSUM_THRESHOLD = 100.megabytes
belongs_to :model, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :size, presence: true
validates :path, presence: true
validates :model, presence: true
validates :uploader, presence: true
Enhance performance of counting local Uploads Add an index to the `store` column on `uploads`. This makes counting local uploads faster. Also, there is no longer need to check for objects with `store = NULL`. See https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/18557 --- ### Query plans Query: ```sql SELECT COUNT(*) FROM "uploads" WHERE ("uploads"."store" = ? OR "uploads"."store" IS NULL) ``` #### Without index ``` gitlabhq_production=# EXPLAIN ANALYZE SELECT uploads.* FROM uploads WHERE (uploads.store = 1 OR uploads.store IS NULL); QUERY PLAN --------------------------------------------------------------------------------------------------------------- Seq Scan on uploads (cost=0.00..601729.54 rows=578 width=272) (actual time=6.170..2308.256 rows=545 loops=1) Filter: ((store = 1) OR (store IS NULL)) Rows Removed by Filter: 4411957 Planning time: 38.652 ms Execution time: 2308.454 ms (5 rows) ``` #### Add index ``` gitlabhq_production=# create index uploads_tmp1 on uploads (store); CREATE INDEX ``` #### With index ``` gitlabhq_production=# EXPLAIN ANALYZE SELECT uploads.* FROM uploads WHERE (uploads.store = 1 OR uploads.store IS NULL); QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------- Bitmap Heap Scan on uploads (cost=11.46..1238.88 rows=574 width=272) (actual time=0.155..0.577 rows=545 loops=1) Recheck Cond: ((store = 1) OR (store IS NULL)) Heap Blocks: exact=217 -> BitmapOr (cost=11.46..11.46 rows=574 width=0) (actual time=0.116..0.116 rows=0 loops=1) -> Bitmap Index Scan on uploads_tmp1 (cost=0.00..8.74 rows=574 width=0) (actual time=0.095..0.095 rows=545 loops=1) Index Cond: (store = 1) -> Bitmap Index Scan on uploads_tmp1 (cost=0.00..2.44 rows=1 width=0) (actual time=0.020..0.020 rows=0 loops=1) Index Cond: (store IS NULL) Planning time: 0.274 ms Execution time: 0.637 ms (10 rows) ``` Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/6070
2018-10-05 09:59:58 -04:00
scope :with_files_stored_locally, -> { where(store: ObjectStorage::Store::LOCAL) }
scope :with_files_stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
2018-02-21 11:43:21 -05:00
before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :needs_checksum?
after_commit :update_project_statistics, on: [:create, :destroy], if: :project?
# as the FileUploader is not mounted, the default CarrierWave ActiveRecord
# hooks are not executed and the file will not be deleted
2018-01-31 10:59:35 -05:00
after_destroy :delete_file!, if: -> { uploader_class <= FileUploader }
class << self
def inner_join_local_uploads_projects
upload_table = Upload.arel_table
project_table = Project.arel_table
join_statement = upload_table.project(upload_table[Arel.star])
.join(project_table)
.on(
upload_table[:model_type].eq('Project')
.and(upload_table[:model_id].eq(project_table[:id]))
.and(upload_table[:store].eq(ObjectStorage::Store::LOCAL))
)
joins(join_statement.join_sources)
end
##
# FastDestroyAll concerns
def begin_fast_destroy
{
Uploads::Local => Uploads::Local.new.keys(with_files_stored_locally),
Uploads::Fog => Uploads::Fog.new.keys(with_files_stored_remotely)
}
end
##
# FastDestroyAll concerns
def finalize_fast_destroy(items_to_remove)
items_to_remove.each do |store_class, keys|
store_class.new.delete_keys_async(keys)
end
end
end
def absolute_path
raise ObjectStorage::RemoteStoreError, _("Remote object has no absolute path.") unless local?
return path unless relative_path?
uploader_class.absolute_path(self)
end
def relative_path
uploader_class.relative_path(self)
end
def calculate_checksum!
self.checksum = nil
return unless needs_checksum?
self.checksum = self.class.sha256_hexdigest(absolute_path)
end
# Initialize the associated Uploader class with current model
#
# @param [String] mounted_as
# @return [GitlabUploader] one of the subclasses, defined at the model's uploader attribute
2018-02-21 11:43:21 -05:00
def build_uploader(mounted_as = nil)
uploader_class.new(model, mounted_as || mount_point).tap do |uploader|
uploader.upload = self
end
end
# Initialize the associated Uploader class with current model and
# retrieve existing file from the store to a local cache
#
# @param [String] mounted_as
# @return [GitlabUploader] one of the subclasses, defined at the model's uploader attribute
def retrieve_uploader(mounted_as = nil)
build_uploader(mounted_as).tap do |uploader|
uploader.retrieve_from_store!(identifier)
end
end
# This checks for existence of the upload on storage
#
# @return [Boolean] whether upload exists on storage
def exist?
exist = if local?
File.exist?(absolute_path)
else
retrieve_uploader.exists?
end
# Help sysadmins find missing upload files
if persisted? && !exist
exception = RuntimeError.new("Uploaded file does not exist")
Gitlab::ErrorTracking.track_exception(exception, self.attributes)
Gitlab::Metrics.counter(:upload_file_does_not_exist_total, _('The number of times an upload record could not find its file')).increment
end
exist
end
def uploader_context
{
identifier: identifier,
secret: secret
}.compact
end
2018-02-21 11:43:21 -05:00
def local?
store == ObjectStorage::Store::LOCAL
end
# Returns whether generating checksum is needed
#
# This takes into account whether file exists, if any checksum exists
# or if the storage has checksum generation code implemented
#
# @return [Boolean] whether generating a checksum is needed
def needs_checksum?
checksum.nil? && local? && exist?
end
private
def delete_file!
retrieve_uploader.remove!
end
def foreground_checksummable?
needs_checksum? && size <= CHECKSUM_THRESHOLD
end
def schedule_checksum
UploadChecksumWorker.perform_async(id)
end
def relative_path?
!path.start_with?('/')
end
def uploader_class
Object.const_get(uploader, false)
end
def identifier
File.basename(path)
end
def mount_point
super&.to_sym
end
def project?
model_type == "Project"
end
def update_project_statistics
ProjectCacheWorker.perform_async(model_id, [], [:uploads_size])
end
end
Upload.prepend_mod_with('Upload')