1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00
rails--rails/activestorage/test/models/blob_test.rb

213 lines
7.3 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
2017-07-01 06:06:08 -04:00
require "test_helper"
2017-07-01 06:47:13 -04:00
require "database/setup"
require "active_support/testing/method_call_assertions"
2017-07-01 06:47:13 -04:00
class ActiveStorage::BlobTest < ActiveSupport::TestCase
include ActiveSupport::Testing::MethodCallAssertions
test "unattached scope" do
[ create_blob(filename: "funky.jpg"), create_blob(filename: "town.jpg") ].tap do |blobs|
User.create! name: "DHH", avatar: blobs.first
assert_includes ActiveStorage::Blob.unattached, blobs.second
assert_not_includes ActiveStorage::Blob.unattached, blobs.first
User.create! name: "Jason", avatar: blobs.second
assert_not_includes ActiveStorage::Blob.unattached, blobs.second
end
end
test "create after upload sets byte size and checksum" do
data = "Hello world!"
2017-07-03 14:14:28 -04:00
blob = create_blob data: data
assert_equal data, blob.download
assert_equal data.length, blob.byte_size
assert_equal Digest::MD5.base64digest(data), blob.checksum
2017-06-30 13:12:58 -04:00
end
2017-07-03 14:14:28 -04:00
2018-01-15 13:06:17 -05:00
test "create after upload extracts content type from data" do
blob = create_file_blob content_type: "application/octet-stream"
assert_equal "image/jpeg", blob.content_type
end
test "create after upload extracts content type from filename" do
blob = create_blob content_type: "application/octet-stream"
assert_equal "text/plain", blob.content_type
end
test "create after upload extracts content_type from io when no content_type given and identify: false" do
blob = create_blob content_type: nil, identify: false
assert_equal "text/plain", blob.content_type
end
test "create after upload uses content_type when identify: false" do
blob = create_blob data: "Article,dates,analysis\n1, 2, 3", filename: "table.csv", content_type: "text/csv", identify: false
assert_equal "text/csv", blob.content_type
end
test "create after upload generates a 28-character base36 key" do
assert_match(/^[a-z0-9]{28}$/, create_blob.key)
end
test "create after upload accepts a record for overrides" do
assert_nothing_raised do
create_blob(record: User.new)
end
end
test "image?" do
blob = create_file_blob filename: "racecar.jpg"
assert_predicate blob, :image?
assert_not_predicate blob, :audio?
end
test "video?" do
blob = create_file_blob(filename: "video.mp4", content_type: "video/mp4")
assert_predicate blob, :video?
assert_not_predicate blob, :audio?
end
test "text?" do
2017-07-28 16:27:43 -04:00
blob = create_blob data: "Hello world!"
assert_predicate blob, :text?
assert_not_predicate blob, :audio?
2017-07-28 16:27:43 -04:00
end
2017-07-10 16:17:48 -04:00
test "download yields chunks" do
blob = create_blob data: "a" * 5.0625.megabytes
2017-07-10 16:17:48 -04:00
chunks = []
blob.download do |chunk|
chunks << chunk
end
assert_equal 2, chunks.size
assert_equal "a" * 5.megabytes, chunks.first
assert_equal "a" * 64.kilobytes, chunks.second
2017-07-10 16:17:48 -04:00
end
test "open with integrity" do
create_file_blob(filename: "racecar.jpg").tap do |blob|
blob.open do |file|
assert file.binmode?
assert_equal 0, file.pos
assert File.basename(file.path).starts_with?("ActiveStorage-#{blob.id}-")
assert file.path.ends_with?(".jpg")
assert_equal file_fixture("racecar.jpg").binread, file.read, "Expected downloaded file to match fixture file"
end
end
end
test "open without integrity" do
create_blob(data: "Hello, world!").tap do |blob|
blob.update! checksum: Digest::MD5.base64digest("Goodbye, world!")
assert_raises ActiveStorage::IntegrityError do
blob.open { |file| flunk "Expected integrity check to fail" }
end
end
end
2019-03-28 18:47:42 -04:00
test "open in a custom tmpdir" do
create_file_blob(filename: "racecar.jpg").open(tmpdir: tmpdir = Dir.mktmpdir) do |file|
assert file.binmode?
assert_equal 0, file.pos
assert_match(/\.jpg\z/, file.path)
2019-03-28 18:47:42 -04:00
assert file.path.starts_with?(tmpdir)
assert_equal file_fixture("racecar.jpg").binread, file.read, "Expected downloaded file to match fixture file"
end
end
test "URLs expiring in 5 minutes" do
2017-07-03 14:14:28 -04:00
blob = create_blob
2017-07-03 15:06:09 -04:00
2017-07-19 13:58:23 -04:00
freeze_time do
assert_equal expected_url_for(blob), blob.service_url
assert_equal expected_url_for(blob, disposition: :attachment), blob.service_url(disposition: :attachment)
2017-07-03 15:06:09 -04:00
end
2017-07-03 14:14:28 -04:00
end
test "URLs force content_type to binary and attachment as content disposition for content types served as binary" do
blob = create_blob(content_type: "text/html")
freeze_time do
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
assert_equal expected_url_for(blob, disposition: :attachment, content_type: "application/octet-stream"), blob.service_url
assert_equal expected_url_for(blob, disposition: :attachment, content_type: "application/octet-stream"), blob.service_url(disposition: :inline)
end
end
test "URLs force attachment as content disposition when the content type is not allowed inline" do
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
blob = create_blob(content_type: "application/zip")
freeze_time do
assert_equal expected_url_for(blob, disposition: :attachment, content_type: "application/zip"), blob.service_url
assert_equal expected_url_for(blob, disposition: :attachment, content_type: "application/zip"), blob.service_url(disposition: :inline)
end
end
test "URLs allow for custom filename" do
blob = create_blob(filename: "original.txt")
new_filename = ActiveStorage::Filename.new("new.txt")
freeze_time do
assert_equal expected_url_for(blob), blob.service_url
assert_equal expected_url_for(blob, filename: new_filename), blob.service_url(filename: new_filename)
assert_equal expected_url_for(blob, filename: new_filename), blob.service_url(filename: "new.txt")
assert_equal expected_url_for(blob, filename: blob.filename), blob.service_url(filename: nil)
end
end
test "URLs allow for custom options" do
blob = create_blob(filename: "original.txt")
arguments = [
blob.key,
expires_in: ActiveStorage.service_urls_expire_in,
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
disposition: :attachment,
content_type: blob.content_type,
filename: blob.filename,
thumb_size: "300x300",
thumb_mode: "crop"
]
assert_called_with(blob.service, :url, arguments) do
blob.service_url(thumb_size: "300x300", thumb_mode: "crop")
end
end
2017-12-02 22:43:28 -05:00
test "purge deletes file from external service" do
2017-08-17 09:35:24 -04:00
blob = create_blob
blob.purge
assert_not ActiveStorage::Blob.service.exist?(blob.key)
end
2017-12-02 22:43:28 -05:00
test "purge deletes variants from external service" do
blob = create_file_blob
variant = blob.variant(resize: "100>").processed
blob.purge
assert_not ActiveStorage::Blob.service.exist?(variant.key)
end
test "purge does nothing when attachments exist" do
create_blob.tap do |blob|
User.create! name: "DHH", avatar: blob
assert_no_difference(-> { ActiveStorage::Blob.count }) { blob.purge }
assert ActiveStorage::Blob.service.exist?(blob.key)
end
end
2017-07-03 14:14:28 -04:00
private
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
def expected_url_for(blob, disposition: :attachment, filename: nil, content_type: nil)
filename ||= blob.filename
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
content_type ||= blob.content_type
query = { disposition: ActionDispatch::Http::ContentDisposition.format(disposition: disposition, filename: filename.sanitized), content_type: content_type }
Prevent content type and disposition bypass in storage service URLs * Force content-type to binary on service urls for relevant content types We have a list of content types that must be forcibly served as binary, but in practice this only means to serve them as attachment always. We should also set the Content-Type to the configured binary type. As a bonus: add text/cache-manifest to the list of content types to be served as binary by default. * Store content-disposition and content-type in GCS Forcing these in the service_url when serving the file works fine for S3 and Azure, since these services include params in the signature. However, GCS specifically excludes response-content-disposition and response-content-type from the signature, which means an attacker can modify these and have files that should be served as text/plain attachments served as inline HTML for example. This makes our attempt to force specific files to be served as binary and as attachment can be easily bypassed. The only way this can be forced in GCS is by storing content-disposition and content-type in the object metadata. * Update GCS object metadata after identifying blob In some cases we create the blob and upload the data before identifying the content-type, which means we can't store that in GCS right when uploading. In these, after creating the attachment, we enqueue a job to identify the blob, and set the content-type. In other cases, files are uploaded to the storage service via direct upload link. We create the blob before the direct upload, which happens independently from the blob creation itself. We then mark the blob as identified, but we have already the content-type we need without having put it in the service. In these two cases, then, we need to update the metadata in the GCS service. * Include content-type and disposition in the verified key for disk service This prevents an attacker from modifying these params in the service signed URL, which is particularly important when we want to force them to have specific values for security reasons. * Allow only a list of specific content types to be served inline This is different from the content types that must be served as binary in the sense that any content type not in this list will be always served as attachment but with its original content type. Only types in this list are allowed to be served either inline or as attachment. Apart from forcing this in the service URL, for GCS we need to store the disposition in the metadata. Fix CVE-2018-16477.
2018-09-06 10:52:52 -04:00
key_params = { key: blob.key }.merge(query)
"https://example.com/rails/active_storage/disk/#{ActiveStorage.verifier.generate(key_params, expires_in: 5.minutes, purpose: :blob_key)}/#{filename}?#{query.to_param}"
end
2017-06-30 13:12:58 -04:00
end