2017-08-12 08:32:15 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-07-24 13:05:15 -04:00
|
|
|
# A blob is a record that contains the metadata about a file and a key for where that file resides on the service.
|
|
|
|
# Blobs can be created in two ways:
|
|
|
|
#
|
2019-09-24 07:32:59 -04:00
|
|
|
# 1. Ahead of the file being uploaded server-side to the service, via <tt>create_and_upload!</tt>. A rewindable
|
|
|
|
# <tt>io</tt> with the file contents must be available at the server for this operation.
|
|
|
|
# 2. Ahead of the file being directly uploaded client-side to the service, via <tt>create_before_direct_upload!</tt>.
|
2017-07-24 13:05:15 -04:00
|
|
|
#
|
|
|
|
# The first option doesn't require any client-side JavaScript integration, and can be used by any other back-end
|
|
|
|
# service that deals with files. The second option is faster, since you're not using your own server as a staging
|
|
|
|
# point for uploads, and can work with deployments like Heroku that do not provide large amounts of disk space.
|
|
|
|
#
|
|
|
|
# Blobs are intended to be immutable in as-so-far as their reference to a specific file goes. You're allowed to
|
|
|
|
# update a blob's metadata on a subsequent pass, but you should not update the key or change the uploaded file.
|
2017-08-15 12:48:19 -04:00
|
|
|
# If you need to create a derivative or otherwise change the blob, simply create a new blob and purge the old one.
|
2017-07-06 05:33:29 -04:00
|
|
|
class ActiveStorage::Blob < ActiveRecord::Base
|
2020-04-29 18:27:19 -04:00
|
|
|
# We use constant paths in the following include calls to avoid a gotcha of
|
|
|
|
# classic mode: If the parent application defines a top-level Analyzable, for
|
|
|
|
# example, and ActiveStorage::Blob::Analyzable is not yet loaded, a bare
|
|
|
|
#
|
|
|
|
# include Analyzable
|
|
|
|
#
|
|
|
|
# would resolve to the top-level one, const_missing would not be triggered,
|
|
|
|
# and therefore ActiveStorage::Blob::Analyzable would not be autoloaded.
|
|
|
|
#
|
|
|
|
# By using qualified names, we ensure const_missing is invoked if needed.
|
|
|
|
# Please, note that Ruby 2.5 or newer is required, so Object is not checked
|
|
|
|
# when looking up the ancestors of ActiveStorage::Blob.
|
|
|
|
#
|
|
|
|
# Zeitwerk mode does not have this gotcha. If we ever drop classic mode, this
|
|
|
|
# can be simplified, bare constant names would just work.
|
|
|
|
include ActiveStorage::Blob::Analyzable
|
|
|
|
include ActiveStorage::Blob::Identifiable
|
|
|
|
include ActiveStorage::Blob::Representable
|
2017-09-28 16:43:37 -04:00
|
|
|
|
2017-07-06 05:33:29 -04:00
|
|
|
self.table_name = "active_storage_blobs"
|
2020-06-04 20:24:29 -04:00
|
|
|
self.signed_id_verifier = ActiveStorage.verifier
|
2017-06-30 13:12:58 -04:00
|
|
|
|
2019-04-09 19:45:26 -04:00
|
|
|
MINIMUM_TOKEN_LENGTH = 28
|
|
|
|
|
2019-09-30 19:17:31 -04:00
|
|
|
has_secure_token :key, length: MINIMUM_TOKEN_LENGTH
|
2018-02-01 06:17:11 -05:00
|
|
|
store :metadata, accessors: [ :analyzed, :identified ], coder: ActiveRecord::Coders::JSON
|
2017-06-30 13:12:58 -04:00
|
|
|
|
2019-11-08 15:03:42 -05:00
|
|
|
class_attribute :services, default: {}
|
2019-11-13 15:31:23 -05:00
|
|
|
class_attribute :service, instance_accessor: false
|
2017-06-30 13:12:58 -04:00
|
|
|
|
2017-10-03 09:27:21 -04:00
|
|
|
has_many :attachments
|
|
|
|
|
2020-05-03 18:01:30 -04:00
|
|
|
scope :unattached, -> { where.missing(:attachments) }
|
2018-02-12 15:09:52 -05:00
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
after_initialize do
|
2019-10-01 16:54:37 -04:00
|
|
|
self.service_name ||= self.class.service.name
|
2019-01-14 14:56:35 -05:00
|
|
|
end
|
|
|
|
|
2020-08-13 10:45:46 -04:00
|
|
|
after_update_commit :update_service_metadata, if: :content_type_previously_changed?
|
2020-08-11 18:08:09 -04:00
|
|
|
|
2018-09-16 14:52:42 -04:00
|
|
|
before_destroy(prepend: true) do
|
|
|
|
raise ActiveRecord::InvalidForeignKey if attachments.exists?
|
|
|
|
end
|
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
validates :service_name, presence: true
|
|
|
|
|
|
|
|
validate do
|
2019-11-08 15:03:42 -05:00
|
|
|
if service_name_changed? && service_name.present?
|
|
|
|
services.fetch(service_name) do
|
2019-01-14 14:56:35 -05:00
|
|
|
errors.add(:service_name, :invalid)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-06-30 13:12:58 -04:00
|
|
|
class << self
|
2018-11-27 18:27:58 -05:00
|
|
|
# You can use the signed ID of a blob to refer to it on the client side without fear of tampering.
|
2017-08-15 12:48:19 -04:00
|
|
|
# This is particularly helpful for direct uploads where the client-side needs to refer to the blob
|
2017-07-24 13:05:15 -04:00
|
|
|
# that was created ahead of the upload itself on form submission.
|
|
|
|
#
|
2017-08-15 12:48:19 -04:00
|
|
|
# The signed ID is also used to create stable URLs for the blob through the BlobsController.
|
2020-06-04 20:24:29 -04:00
|
|
|
def find_signed!(id, record: nil)
|
|
|
|
super(id, purpose: :blob_id)
|
2017-07-23 12:05:20 -04:00
|
|
|
end
|
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
def build_after_upload(io:, filename:, content_type: nil, metadata: nil, service_name: nil, identify: true, record: nil) #:nodoc:
|
|
|
|
new(filename: filename, content_type: content_type, metadata: metadata, service_name: service_name).tap do |blob|
|
2018-05-06 11:25:05 -04:00
|
|
|
blob.upload(io, identify: identify)
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-29 15:52:12 -04:00
|
|
|
deprecate :build_after_upload
|
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
def build_after_unfurling(key: nil, io:, filename:, content_type: nil, metadata: nil, service_name: nil, identify: true, record: nil) #:nodoc:
|
|
|
|
new(key: key, filename: filename, content_type: content_type, metadata: metadata, service_name: service_name).tap do |blob|
|
2018-07-07 23:25:33 -04:00
|
|
|
blob.unfurl(io, identify: identify)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
def create_after_unfurling!(key: nil, io:, filename:, content_type: nil, metadata: nil, service_name: nil, identify: true, record: nil) #:nodoc:
|
|
|
|
build_after_unfurling(key: key, io: io, filename: filename, content_type: content_type, metadata: metadata, service_name: service_name, identify: identify).tap(&:save!)
|
2019-09-24 07:32:59 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
# Creates a new blob instance and then uploads the contents of
|
|
|
|
# the given <tt>io</tt> to the service. The blob instance is going to
|
|
|
|
# be saved before the upload begins to prevent the upload clobbering another due to key collisions.
|
|
|
|
# When providing a content type, pass <tt>identify: false</tt> to bypass
|
|
|
|
# automatic content type inference.
|
2019-01-14 14:56:35 -05:00
|
|
|
def create_and_upload!(key: nil, io:, filename:, content_type: nil, metadata: nil, service_name: nil, identify: true, record: nil)
|
|
|
|
create_after_unfurling!(key: key, io: io, filename: filename, content_type: content_type, metadata: metadata, service_name: service_name, identify: identify).tap do |blob|
|
2019-09-24 07:32:59 -04:00
|
|
|
blob.upload_without_unfurling(io)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-29 15:52:12 -04:00
|
|
|
alias_method :create_after_upload!, :create_and_upload!
|
|
|
|
deprecate create_after_upload!: :create_and_upload!
|
2017-07-09 12:03:13 -04:00
|
|
|
|
2017-07-24 13:05:15 -04:00
|
|
|
# Returns a saved blob _without_ uploading a file to the service. This blob will point to a key where there is
|
|
|
|
# no file yet. It's intended to be used together with a client-side upload, which will first create the blob
|
|
|
|
# in order to produce the signed URL for uploading. This signed URL points to the key generated by the blob.
|
|
|
|
# Once the form using the direct upload is submitted, the blob can be associated with the right record using
|
|
|
|
# the signed ID.
|
2019-01-14 14:56:35 -05:00
|
|
|
def create_before_direct_upload!(key: nil, filename:, byte_size:, checksum:, content_type: nil, metadata: nil, service_name: nil, record: nil)
|
|
|
|
create! key: key, filename: filename, byte_size: byte_size, checksum: checksum, content_type: content_type, metadata: metadata, service_name: service_name
|
2017-07-09 12:03:13 -04:00
|
|
|
end
|
2018-12-30 11:56:22 -05:00
|
|
|
|
|
|
|
# To prevent problems with case-insensitive filesystems, especially in combination
|
|
|
|
# with databases which treat indices as case-sensitive, all blob keys generated are going
|
|
|
|
# to only contain the base-36 character alphabet and will therefore be lowercase. To maintain
|
|
|
|
# the same or higher amount of entropy as in the base-58 encoding used by `has_secure_token`
|
|
|
|
# the number of bytes used is increased to 28 from the standard 24
|
2019-04-09 19:45:26 -04:00
|
|
|
def generate_unique_secure_token(length: MINIMUM_TOKEN_LENGTH)
|
|
|
|
SecureRandom.base36(length)
|
2018-12-30 11:56:22 -05:00
|
|
|
end
|
2020-06-04 20:24:29 -04:00
|
|
|
|
|
|
|
# Customize signed ID purposes for backwards compatibility.
|
|
|
|
def combine_signed_id_purposes(purpose)
|
|
|
|
purpose.to_s
|
|
|
|
end
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
|
|
|
|
2017-07-24 13:05:15 -04:00
|
|
|
# Returns a signed ID for this blob that's suitable for reference on the client-side without fear of tampering.
|
2017-07-23 12:05:20 -04:00
|
|
|
def signed_id
|
2020-06-04 20:24:29 -04:00
|
|
|
super(purpose: :blob_id)
|
2017-07-23 12:05:20 -04:00
|
|
|
end
|
|
|
|
|
2018-12-30 11:56:22 -05:00
|
|
|
# Returns the key pointing to the file on the service that's associated with this blob. The key is the
|
|
|
|
# secure-token format from Rails in lower case. So it'll look like: xtapjjcjiudrlk3tmwyjgpuobabd.
|
|
|
|
# This key is not intended to be revealed directly to the user.
|
|
|
|
# Always refer to blobs using the signed_id or a verified form of the key.
|
2017-07-01 06:47:13 -04:00
|
|
|
def key
|
2017-07-21 16:51:31 -04:00
|
|
|
# We can't wait until the record is first saved to have a key for it
|
2019-09-30 19:17:31 -04:00
|
|
|
self[:key] ||= self.class.generate_unique_secure_token(length: MINIMUM_TOKEN_LENGTH)
|
2017-07-01 06:47:13 -04:00
|
|
|
end
|
2017-06-30 18:13:23 -04:00
|
|
|
|
2017-08-15 12:48:19 -04:00
|
|
|
# Returns an ActiveStorage::Filename instance of the filename that can be
|
|
|
|
# queried for basename, extension, and a sanitized version of the filename
|
|
|
|
# that's safe to use in URLs.
|
2017-06-30 13:12:58 -04:00
|
|
|
def filename
|
2017-07-06 05:33:29 -04:00
|
|
|
ActiveStorage::Filename.new(self[:filename])
|
2017-07-03 14:13:50 -04:00
|
|
|
end
|
2017-07-03 14:14:28 -04:00
|
|
|
|
2017-07-28 16:49:43 -04:00
|
|
|
# Returns true if the content_type of this blob is in the image range, like image/png.
|
2017-08-03 15:23:28 -04:00
|
|
|
def image?
|
2017-08-03 20:22:57 -04:00
|
|
|
content_type.start_with?("image")
|
2017-08-03 15:23:28 -04:00
|
|
|
end
|
2017-07-28 16:49:43 -04:00
|
|
|
|
|
|
|
# Returns true if the content_type of this blob is in the audio range, like audio/mpeg.
|
2017-08-03 15:23:28 -04:00
|
|
|
def audio?
|
2017-08-03 20:22:57 -04:00
|
|
|
content_type.start_with?("audio")
|
2017-08-03 15:23:28 -04:00
|
|
|
end
|
2017-07-28 16:49:43 -04:00
|
|
|
|
|
|
|
# Returns true if the content_type of this blob is in the video range, like video/mp4.
|
2017-08-03 15:23:28 -04:00
|
|
|
def video?
|
2017-08-03 20:22:57 -04:00
|
|
|
content_type.start_with?("video")
|
2017-08-03 15:23:28 -04:00
|
|
|
end
|
2017-07-28 16:49:43 -04:00
|
|
|
|
|
|
|
# Returns true if the content_type of this blob is in the text range, like text/plain.
|
2017-08-03 15:23:28 -04:00
|
|
|
def text?
|
2017-08-03 20:22:57 -04:00
|
|
|
content_type.start_with?("text")
|
2017-08-03 15:23:28 -04:00
|
|
|
end
|
2017-07-28 16:27:43 -04:00
|
|
|
|
2019-07-22 11:46:02 -04:00
|
|
|
# Returns the URL of the blob on the service. This returns a permanent URL for public files, and returns a
|
2019-12-02 11:34:01 -05:00
|
|
|
# short-lived URL for private files. Private files are signed, and not for public use. Instead,
|
2019-07-22 11:46:02 -04:00
|
|
|
# the URL should only be exposed as a redirect from a stable, possibly authenticated URL. Hiding the
|
2019-12-02 11:34:01 -05:00
|
|
|
# URL behind a redirect also allows you to change services without updating all URLs.
|
2019-07-22 11:46:02 -04:00
|
|
|
def url(expires_in: ActiveStorage.service_urls_expire_in, disposition: :inline, filename: nil, **options)
|
2018-02-07 21:15:55 -05:00
|
|
|
filename = ActiveStorage::Filename.wrap(filename || self.filename)
|
|
|
|
|
Prevent content type and disposition bypass in storage service URLs
* Force content-type to binary on service urls for relevant content types
We have a list of content types that must be forcibly served as binary,
but in practice this only means to serve them as attachment always. We
should also set the Content-Type to the configured binary type.
As a bonus: add text/cache-manifest to the list of content types to be
served as binary by default.
* Store content-disposition and content-type in GCS
Forcing these in the service_url when serving the file works fine for S3
and Azure, since these services include params in the signature.
However, GCS specifically excludes response-content-disposition and
response-content-type from the signature, which means an attacker can
modify these and have files that should be served as text/plain attachments
served as inline HTML for example. This makes our attempt to force
specific files to be served as binary and as attachment can be easily
bypassed.
The only way this can be forced in GCS is by storing
content-disposition and content-type in the object metadata.
* Update GCS object metadata after identifying blob
In some cases we create the blob and upload the data before identifying
the content-type, which means we can't store that in GCS right when
uploading. In these, after creating the attachment, we enqueue a job to
identify the blob, and set the content-type.
In other cases, files are uploaded to the storage service via direct
upload link. We create the blob before the direct upload, which happens
independently from the blob creation itself. We then mark the blob as
identified, but we have already the content-type we need without having
put it in the service.
In these two cases, then, we need to update the metadata in the GCS
service.
* Include content-type and disposition in the verified key for disk service
This prevents an attacker from modifying these params in the service
signed URL, which is particularly important when we want to force them
to have specific values for security reasons.
* Allow only a list of specific content types to be served inline
This is different from the content types that must be served as binary
in the sense that any content type not in this list will be always
served as attachment but with its original content type. Only types in
this list are allowed to be served either inline or as attachment.
Apart from forcing this in the service URL, for GCS we need to store the
disposition in the metadata.
Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
|
|
|
service.url key, expires_in: expires_in, filename: filename, content_type: content_type_for_service_url,
|
|
|
|
disposition: forced_disposition_for_service_url || disposition, **options
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
|
|
|
|
2019-07-22 11:46:02 -04:00
|
|
|
alias_method :service_url, :url
|
|
|
|
deprecate service_url: :url
|
|
|
|
|
2017-07-24 13:05:15 -04:00
|
|
|
# Returns a URL that can be used to directly upload a file for this blob on the service. This URL is intended to be
|
|
|
|
# short-lived for security and only generated on-demand by the client-side JavaScript responsible for doing the uploading.
|
2018-06-21 11:06:32 -04:00
|
|
|
def service_url_for_direct_upload(expires_in: ActiveStorage.service_urls_expire_in)
|
2017-07-24 12:41:34 -04:00
|
|
|
service.url_for_direct_upload key, expires_in: expires_in, content_type: content_type, content_length: byte_size, checksum: checksum
|
2017-07-09 12:03:13 -04:00
|
|
|
end
|
|
|
|
|
2017-08-10 23:36:07 -04:00
|
|
|
# Returns a Hash of headers for +service_url_for_direct_upload+ requests.
|
2017-07-30 11:00:55 -04:00
|
|
|
def service_headers_for_direct_upload
|
|
|
|
service.headers_for_direct_upload key, filename: filename, content_type: content_type, content_length: byte_size, checksum: checksum
|
|
|
|
end
|
2017-06-30 18:13:23 -04:00
|
|
|
|
2018-01-10 12:12:04 -05:00
|
|
|
|
2017-08-10 23:36:07 -04:00
|
|
|
# Uploads the +io+ to the service on the +key+ for this blob. Blobs are intended to be immutable, so you shouldn't be
|
2017-07-24 13:05:15 -04:00
|
|
|
# using this method after a file has already been uploaded to fit with a blob. If you want to create a derivative blob,
|
|
|
|
# you should instead simply create a new blob based on the old one.
|
|
|
|
#
|
|
|
|
# Prior to uploading, we compute the checksum, which is sent to the service for transit integrity validation. If the
|
2017-08-10 23:36:07 -04:00
|
|
|
# checksum does not match what the service receives, an exception will be raised. We also measure the size of the +io+
|
2018-05-06 11:25:05 -04:00
|
|
|
# and store that in +byte_size+ on the blob record. The content type is automatically extracted from the +io+ unless
|
|
|
|
# you specify a +content_type+ and pass +identify+ as false.
|
2017-07-24 13:05:15 -04:00
|
|
|
#
|
2019-09-24 08:00:48 -04:00
|
|
|
# Normally, you do not have to call this method directly at all. Use the +create_and_upload!+ class method instead.
|
|
|
|
# If you do use this method directly, make sure you are using it on a persisted Blob as otherwise another blob's
|
|
|
|
# data might get overwritten on the service.
|
2018-05-06 11:25:05 -04:00
|
|
|
def upload(io, identify: true)
|
2018-07-07 23:25:33 -04:00
|
|
|
unfurl io, identify: identify
|
|
|
|
upload_without_unfurling io
|
|
|
|
end
|
|
|
|
|
|
|
|
def unfurl(io, identify: true) #:nodoc:
|
2018-01-15 13:06:17 -05:00
|
|
|
self.checksum = compute_checksum_in_chunks(io)
|
2018-05-06 11:25:05 -04:00
|
|
|
self.content_type = extract_content_type(io) if content_type.nil? || identify
|
2018-01-15 13:06:17 -05:00
|
|
|
self.byte_size = io.size
|
|
|
|
self.identified = true
|
2018-07-07 23:25:33 -04:00
|
|
|
end
|
2017-07-01 08:25:02 -04:00
|
|
|
|
2018-07-07 23:25:33 -04:00
|
|
|
def upload_without_unfurling(io) #:nodoc:
|
Prevent content type and disposition bypass in storage service URLs
* Force content-type to binary on service urls for relevant content types
We have a list of content types that must be forcibly served as binary,
but in practice this only means to serve them as attachment always. We
should also set the Content-Type to the configured binary type.
As a bonus: add text/cache-manifest to the list of content types to be
served as binary by default.
* Store content-disposition and content-type in GCS
Forcing these in the service_url when serving the file works fine for S3
and Azure, since these services include params in the signature.
However, GCS specifically excludes response-content-disposition and
response-content-type from the signature, which means an attacker can
modify these and have files that should be served as text/plain attachments
served as inline HTML for example. This makes our attempt to force
specific files to be served as binary and as attachment can be easily
bypassed.
The only way this can be forced in GCS is by storing
content-disposition and content-type in the object metadata.
* Update GCS object metadata after identifying blob
In some cases we create the blob and upload the data before identifying
the content-type, which means we can't store that in GCS right when
uploading. In these, after creating the attachment, we enqueue a job to
identify the blob, and set the content-type.
In other cases, files are uploaded to the storage service via direct
upload link. We create the blob before the direct upload, which happens
independently from the blob creation itself. We then mark the blob as
identified, but we have already the content-type we need without having
put it in the service.
In these two cases, then, we need to update the metadata in the GCS
service.
* Include content-type and disposition in the verified key for disk service
This prevents an attacker from modifying these params in the service
signed URL, which is particularly important when we want to force them
to have specific values for security reasons.
* Allow only a list of specific content types to be served inline
This is different from the content types that must be served as binary
in the sense that any content type not in this list will be always
served as attachment but with its original content type. Only types in
this list are allowed to be served either inline or as attachment.
Apart from forcing this in the service URL, for GCS we need to store the
disposition in the metadata.
Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
|
|
|
service.upload key, io, checksum: checksum, **service_metadata
|
2017-07-01 06:47:13 -04:00
|
|
|
end
|
|
|
|
|
2017-07-24 13:05:15 -04:00
|
|
|
# Downloads the file associated with this blob. If no block is given, the entire file is read into memory and returned.
|
|
|
|
# That'll use a lot of RAM for very large files. If a block is given, then the download is streamed and yielded in chunks.
|
2017-07-10 16:17:48 -04:00
|
|
|
def download(&block)
|
|
|
|
service.download key, &block
|
2017-07-01 06:47:13 -04:00
|
|
|
end
|
|
|
|
|
2018-05-16 22:12:31 -04:00
|
|
|
# Downloads the blob to a tempfile on disk. Yields the tempfile.
|
2018-05-28 16:28:46 -04:00
|
|
|
#
|
2018-05-30 20:01:07 -04:00
|
|
|
# The tempfile's name is prefixed with +ActiveStorage-+ and the blob's ID. Its extension matches that of the blob.
|
|
|
|
#
|
2019-03-28 18:47:42 -04:00
|
|
|
# By default, the tempfile is created in <tt>Dir.tmpdir</tt>. Pass +tmpdir:+ to create it in a different directory:
|
2018-05-30 20:01:07 -04:00
|
|
|
#
|
2019-03-28 18:47:42 -04:00
|
|
|
# blob.open(tmpdir: "/path/to/tmp") do |file|
|
2018-05-30 20:01:07 -04:00
|
|
|
# # ...
|
|
|
|
# end
|
|
|
|
#
|
|
|
|
# The tempfile is automatically closed and unlinked after the given block is executed.
|
|
|
|
#
|
2018-05-28 16:28:46 -04:00
|
|
|
# Raises ActiveStorage::IntegrityError if the downloaded data does not match the blob's checksum.
|
2019-03-28 18:47:42 -04:00
|
|
|
def open(tmpdir: nil, &block)
|
|
|
|
service.open key, checksum: checksum,
|
|
|
|
name: [ "ActiveStorage-#{id}-", filename.extension_with_delimiter ], tmpdir: tmpdir, &block
|
2018-05-16 22:12:31 -04:00
|
|
|
end
|
|
|
|
|
2019-05-22 15:07:35 -04:00
|
|
|
def mirror_later #:nodoc:
|
|
|
|
ActiveStorage::MirrorJob.perform_later(key, checksum: checksum) if service.respond_to?(:mirror)
|
|
|
|
end
|
2017-08-15 15:57:27 -04:00
|
|
|
|
2018-07-18 22:05:50 -04:00
|
|
|
# Deletes the files on the service associated with the blob. This should only be done if the blob is going to be
|
|
|
|
# deleted as well or you will essentially have a dead reference. It's recommended to use #purge and #purge_later
|
2017-07-24 13:05:15 -04:00
|
|
|
# methods in most circumstances.
|
2017-06-30 13:12:58 -04:00
|
|
|
def delete
|
2017-12-02 22:43:28 -05:00
|
|
|
service.delete(key)
|
|
|
|
service.delete_prefixed("variants/#{key}/") if image?
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
|
|
|
|
2019-11-25 10:30:37 -05:00
|
|
|
# Destroys the blob record and then deletes the file on the service. This is the recommended way to dispose of unwanted
|
2019-09-23 19:42:54 -04:00
|
|
|
# blobs. Note, though, that deleting the file off the service will initiate an HTTP connection to the service, which may
|
2018-07-18 22:05:50 -04:00
|
|
|
# be slow or prevented, so you should not use this method inside a transaction or in callbacks. Use #purge_later instead.
|
2017-06-30 13:12:58 -04:00
|
|
|
def purge
|
|
|
|
destroy
|
2018-07-19 20:26:40 -04:00
|
|
|
delete
|
2018-07-26 09:24:31 -04:00
|
|
|
rescue ActiveRecord::InvalidForeignKey
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
|
|
|
|
2018-07-18 22:05:50 -04:00
|
|
|
# Enqueues an ActiveStorage::PurgeJob to call #purge. This is the recommended way to purge blobs from a transaction,
|
|
|
|
# an Active Record callback, or in any other real-time scenario.
|
2017-06-30 13:12:58 -04:00
|
|
|
def purge_later
|
2017-07-06 05:33:29 -04:00
|
|
|
ActiveStorage::PurgeJob.perform_later(self)
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
2017-07-06 09:38:01 -04:00
|
|
|
|
2019-01-14 14:56:35 -05:00
|
|
|
# Returns an instance of service, which can be configured globally or per attachment
|
|
|
|
def service
|
2019-11-08 15:03:42 -05:00
|
|
|
services.fetch(service_name)
|
2019-01-14 14:56:35 -05:00
|
|
|
end
|
|
|
|
|
2017-07-06 09:38:01 -04:00
|
|
|
private
|
|
|
|
def compute_checksum_in_chunks(io)
|
|
|
|
Digest::MD5.new.tap do |checksum|
|
|
|
|
while chunk = io.read(5.megabytes)
|
|
|
|
checksum << chunk
|
|
|
|
end
|
|
|
|
|
|
|
|
io.rewind
|
|
|
|
end.base64digest
|
|
|
|
end
|
2017-10-22 13:16:59 -04:00
|
|
|
|
2018-01-15 13:06:17 -05:00
|
|
|
def extract_content_type(io)
|
|
|
|
Marcel::MimeType.for io, name: filename.to_s, declared_type: content_type
|
|
|
|
end
|
|
|
|
|
2018-01-04 13:35:54 -05:00
|
|
|
def forcibly_serve_as_binary?
|
|
|
|
ActiveStorage.content_types_to_serve_as_binary.include?(content_type)
|
|
|
|
end
|
2018-02-24 19:54:27 -05:00
|
|
|
|
Prevent content type and disposition bypass in storage service URLs
* Force content-type to binary on service urls for relevant content types
We have a list of content types that must be forcibly served as binary,
but in practice this only means to serve them as attachment always. We
should also set the Content-Type to the configured binary type.
As a bonus: add text/cache-manifest to the list of content types to be
served as binary by default.
* Store content-disposition and content-type in GCS
Forcing these in the service_url when serving the file works fine for S3
and Azure, since these services include params in the signature.
However, GCS specifically excludes response-content-disposition and
response-content-type from the signature, which means an attacker can
modify these and have files that should be served as text/plain attachments
served as inline HTML for example. This makes our attempt to force
specific files to be served as binary and as attachment can be easily
bypassed.
The only way this can be forced in GCS is by storing
content-disposition and content-type in the object metadata.
* Update GCS object metadata after identifying blob
In some cases we create the blob and upload the data before identifying
the content-type, which means we can't store that in GCS right when
uploading. In these, after creating the attachment, we enqueue a job to
identify the blob, and set the content-type.
In other cases, files are uploaded to the storage service via direct
upload link. We create the blob before the direct upload, which happens
independently from the blob creation itself. We then mark the blob as
identified, but we have already the content-type we need without having
put it in the service.
In these two cases, then, we need to update the metadata in the GCS
service.
* Include content-type and disposition in the verified key for disk service
This prevents an attacker from modifying these params in the service
signed URL, which is particularly important when we want to force them
to have specific values for security reasons.
* Allow only a list of specific content types to be served inline
This is different from the content types that must be served as binary
in the sense that any content type not in this list will be always
served as attachment but with its original content type. Only types in
this list are allowed to be served either inline or as attachment.
Apart from forcing this in the service URL, for GCS we need to store the
disposition in the metadata.
Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
|
|
|
def allowed_inline?
|
|
|
|
ActiveStorage.content_types_allowed_inline.include?(content_type)
|
|
|
|
end
|
|
|
|
|
|
|
|
def content_type_for_service_url
|
|
|
|
forcibly_serve_as_binary? ? ActiveStorage.binary_content_type : content_type
|
|
|
|
end
|
|
|
|
|
|
|
|
def forced_disposition_for_service_url
|
|
|
|
if forcibly_serve_as_binary? || !allowed_inline?
|
|
|
|
:attachment
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def service_metadata
|
|
|
|
if forcibly_serve_as_binary?
|
|
|
|
{ content_type: ActiveStorage.binary_content_type, disposition: :attachment, filename: filename }
|
|
|
|
elsif !allowed_inline?
|
|
|
|
{ content_type: content_type, disposition: :attachment, filename: filename }
|
|
|
|
else
|
|
|
|
{ content_type: content_type }
|
|
|
|
end
|
|
|
|
end
|
2020-08-11 18:08:09 -04:00
|
|
|
|
|
|
|
def update_service_metadata
|
|
|
|
service.update_metadata key, **service_metadata if service_metadata.any?
|
|
|
|
end
|
2017-06-30 13:12:58 -04:00
|
|
|
end
|
2019-03-28 18:52:02 -04:00
|
|
|
|
|
|
|
ActiveSupport.run_load_hooks :active_storage_blob, ActiveStorage::Blob
|