S3: permit uploading files larger than 5 GB

Use multipart uploads for files larger than 100 MB. Dynamically calculate part size based on total object size and maximum part count.
This commit is contained in:
George Claghorn 2019-05-16 10:58:33 -04:00 committed by GitHub
parent c8396e030f
commit 9c5135ce6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 6 deletions

View File

@ -1,3 +1,24 @@
* The S3 service now permits uploading files larger than 5 gigabytes.
When uploading a file greater than 100 megabytes in size, the service
transparently switches to [multipart uploads](https://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html)
using a part size computed from the file's total size and S3's part count limit.
No application changes are necessary to take advantage of this feature. You
can customize the default 100 MB multipart upload threshold in your S3
service's configuration:
```yaml
production:
service: s3
access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %>
secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %>
region: us-east-1
bucket: my-bucket
upload:
multipart_threshold: <%= 250.megabytes %>
```
*George Claghorn*
Please check [6-0-stable](https://github.com/rails/rails/blob/6-0-stable/activestorage/CHANGELOG.md) for previous changes.

View File

@ -1,5 +1,7 @@
# frozen_string_literal: true
gem "aws-sdk-s3", "~> 1.14"
require "aws-sdk-s3"
require "active_support/core_ext/numeric/bytes"
@ -7,20 +9,24 @@ module ActiveStorage
# Wraps the Amazon Simple Storage Service (S3) as an Active Storage service.
# See ActiveStorage::Service for the generic API documentation that applies to all services.
class Service::S3Service < Service
attr_reader :client, :bucket, :upload_options
attr_reader :client, :bucket
attr_reader :multipart_upload_threshold, :upload_options
def initialize(bucket:, upload: {}, **options)
@client = Aws::S3::Resource.new(**options)
@bucket = @client.bucket(bucket)
@multipart_upload_threshold = upload.fetch(:multipart_threshold, 100.megabytes)
@upload_options = upload
end
def upload(key, io, checksum: nil, content_type: nil, **)
instrument :upload, key: key, checksum: checksum do
object_for(key).put(upload_options.merge(body: io, content_md5: checksum, content_type: content_type))
rescue Aws::S3::Errors::BadDigest
raise ActiveStorage::IntegrityError
if io.size < multipart_upload_threshold
upload_with_single_part key, io, checksum: checksum, content_type: content_type
else
upload_with_multipart key, io, content_type: content_type
end
end
end
@ -94,6 +100,24 @@ module ActiveStorage
end
private
MAXIMUM_UPLOAD_PARTS_COUNT = 10000
MINIMUM_UPLOAD_PART_SIZE = 5.megabytes
def upload_with_single_part(key, io, checksum: nil, content_type: nil)
object_for(key).put(body: io, content_md5: checksum, content_type: content_type, **upload_options)
rescue Aws::S3::Errors::BadDigest
raise ActiveStorage::IntegrityError
end
def upload_with_multipart(key, io, content_type: nil)
part_size = [ io.size.fdiv(MAXIMUM_UPLOAD_PARTS_COUNT).ceil, MINIMUM_UPLOAD_PART_SIZE ].max
object_for(key).upload_stream(content_type: content_type, part_size: part_size, **upload_options) do |out|
IO.copy_stream(io, out)
end
end
def object_for(key)
bucket.object(key)
end

View File

@ -46,8 +46,7 @@ if SERVICE_CONFIGURATIONS[:s3]
end
test "uploading with server-side encryption" do
config = SERVICE_CONFIGURATIONS.deep_merge(s3: { upload: { server_side_encryption: "AES256" } })
service = ActiveStorage::Service.configure(:s3, config)
service = build_service(upload: { server_side_encryption: "AES256" })
begin
key = SecureRandom.base58(24)
@ -77,6 +76,25 @@ if SERVICE_CONFIGURATIONS[:s3]
ensure
@service.delete key
end
test "uploading a large object in multiple parts" do
service = build_service(upload: { multipart_threshold: 5.megabytes })
begin
key = SecureRandom.base58(24)
data = SecureRandom.bytes(8.megabytes)
service.upload key, StringIO.new(data), checksum: Digest::MD5.base64digest(data)
assert data == service.download(key)
ensure
service.delete key
end
end
private
def build_service(configuration)
ActiveStorage::Service.configure :s3, SERVICE_CONFIGURATIONS.deep_merge(s3: configuration)
end
end
else
puts "Skipping S3 Service tests because no S3 configuration was supplied"