1
0
Fork 0
mirror of https://github.com/fog/fog-aws.git synced 2022-11-09 13:50:52 -05:00

Add storage option to configure multipart put/copy

Amazon S3 limits max chunk size that can be uploaded/copied in a single request to 5GB.
Other S3-compatible storages (like, Ceph) do not have such limit.
Ceph shows much better performance when file is copied as a whole, in a single request.

This commit adds `max_put_chunk_size`/`max_copy_chunk_size`storage options
that allow user to configure respective chunk sizes.
Usage of non-positive value will tell fog-aws to use a single put/copy request regardless of file size.

This commit preserves backward compatibility:
if user explicitly sets `multipart_chunk_size` on a `Fog::AWS::Storage::File` instance,
that value will take precedence.
This commit is contained in:
Marat Radchenko 2021-07-26 18:42:34 +03:00
parent d7ec997c7b
commit 04b3cd7cbf
2 changed files with 48 additions and 11 deletions

View file

@ -4,8 +4,11 @@ module Fog
module AWS
class Storage
class File < Fog::Model
MIN_MULTIPART_CHUNK_SIZE = 5242880
MAX_SINGLE_PUT_SIZE = 5368709120
# @deprecated use {Fog::AWS::Storage::MIN_MULTIPART_CHUNK_SIZE} instead
MIN_MULTIPART_CHUNK_SIZE = Fog::AWS::Storage::MIN_MULTIPART_CHUNK_SIZE
# @deprecated use {Fog::AWS::Storage::MAX_SINGLE_PUT_SIZE} instead
MAX_SINGLE_PUT_SIZE = Fog::AWS::Storage::MAX_SINGLE_PUT_SIZE
# @deprecated not used for anything
MULTIPART_COPY_THRESHOLD = 15728640
# @see AWS Object docs http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectOps.html
@ -65,7 +68,7 @@ module Fog
# Use small chunk sizes to minimize memory. E.g. 5242880 = 5mb
attr_reader :multipart_chunk_size
def multipart_chunk_size=(mp_chunk_size)
raise ArgumentError.new("minimum multipart_chunk_size is #{MIN_MULTIPART_CHUNK_SIZE}") if mp_chunk_size < MIN_MULTIPART_CHUNK_SIZE
service.validate_chunk_size(mp_chunk_size, 'multipart_chunk_size')
@multipart_chunk_size = mp_chunk_size
end
@ -145,10 +148,9 @@ module Fog
def copy(target_directory_key, target_file_key, options = {})
requires :directory, :key
# With a single PUT operation you can upload objects up to 5 GB in size. Automatically set MP for larger objects.
self.multipart_chunk_size = MIN_MULTIPART_CHUNK_SIZE * 2 if !multipart_chunk_size && self.content_length.to_i > MAX_SINGLE_PUT_SIZE
self.multipart_chunk_size = service.max_copy_chunk_size if multipart_chunk_size.nil?
if multipart_chunk_size && self.content_length.to_i >= multipart_chunk_size
if multipart_chunk_size > 0 && self.content_length.to_i >= multipart_chunk_size
upload_part_options = options.select { |key, _| ALLOWED_UPLOAD_PART_OPTIONS.include?(key.to_sym) }
upload_part_options = upload_part_options.merge({ 'x-amz-copy-source' => "#{directory.key}/#{key}" })
multipart_copy(options, upload_part_options, target_directory_key, target_file_key)
@ -271,10 +273,8 @@ module Fog
options['x-amz-website-redirect-location'] = website_redirect_location if website_redirect_location
options.merge!(encryption_headers)
# With a single PUT operation you can upload objects up to 5 GB in size. Automatically set MP for larger objects.
self.multipart_chunk_size = MIN_MULTIPART_CHUNK_SIZE if !multipart_chunk_size && Fog::Storage.get_body_size(body) > MAX_SINGLE_PUT_SIZE
if multipart_chunk_size && Fog::Storage.get_body_size(body) >= multipart_chunk_size && body.respond_to?(:read)
self.multipart_chunk_size = service.max_put_chunk_size if multipart_chunk_size.nil?
if multipart_chunk_size > 0 && Fog::Storage.get_body_size(body) >= multipart_chunk_size && body.respond_to?(:read)
data = multipart_save(options)
merge_attributes(data.body)
else

View file

@ -14,6 +14,9 @@ module Fog
'https' => 443
}
MIN_MULTIPART_CHUNK_SIZE = 5242880
MAX_SINGLE_PUT_SIZE = 5368709120
VALID_QUERY_KEYS = %w[
acl
cors
@ -43,7 +46,7 @@ module Fog
]
requires :aws_access_key_id, :aws_secret_access_key
recognizes :endpoint, :region, :host, :port, :scheme, :persistent, :use_iam_profile, :aws_session_token, :aws_credentials_expire_at, :path_style, :acceleration, :instrumentor, :instrumentor_name, :aws_signature_version, :enable_signature_v4_streaming, :virtual_host, :cname
recognizes :endpoint, :region, :host, :port, :scheme, :persistent, :use_iam_profile, :aws_session_token, :aws_credentials_expire_at, :path_style, :acceleration, :instrumentor, :instrumentor_name, :aws_signature_version, :enable_signature_v4_streaming, :virtual_host, :cname, :max_put_chunk_size, :max_copy_chunk_size
secrets :aws_secret_access_key, :hmac
@ -117,6 +120,17 @@ module Fog
module Utils
attr_accessor :region
# Amazon S3 limits max chunk size that can be uploaded/copied in a single request to 5GB.
# Other S3-compatible storages (like, Ceph) do not have such limit.
# Ceph shows much better performance when file is copied as a whole, in a single request.
# fog-aws user can use these settings to configure chunk sizes.
# A non-positive value will tell fog-aws to use a single put/copy request regardless of file size.
#
# @return [Integer]
# @see https://docs.aws.amazon.com/AmazonS3/latest/userguide/copy-object.html
attr_reader :max_put_chunk_size
attr_reader :max_copy_chunk_size
def cdn
@cdn ||= Fog::AWS::CDN.new(
:aws_access_key_id => @aws_access_key_id,
@ -171,6 +185,12 @@ module Fog
params_to_url(params)
end
# @param value [int]
# @param description [str]
def validate_chunk_size(value, description)
raise "#{description} (#{value}) is less than minimum #{MIN_MULTIPART_CHUNK_SIZE}" unless value <= 0 || value >= MIN_MULTIPART_CHUNK_SIZE
end
private
def validate_signature_version!
@ -179,6 +199,16 @@ module Fog
end
end
def init_max_put_chunk_size!(options = {})
@max_put_chunk_size = options.fetch(:max_put_chunk_size, MAX_SINGLE_PUT_SIZE)
validate_chunk_size(@max_put_chunk_size, 'max_put_chunk_size')
end
def init_max_copy_chunk_size!(options = {})
@max_copy_chunk_size = options.fetch(:max_copy_chunk_size, MAX_SINGLE_PUT_SIZE)
validate_chunk_size(@max_copy_chunk_size, 'max_copy_chunk_size')
end
def v4_signed_params_for_url(params, expires)
now = Fog::Time.now
@ -452,6 +482,10 @@ module Fog
@path_style = options[:path_style] || false
init_max_put_chunk_size!(options)
init_max_copy_chunk_size!(options)
@signature_version = options.fetch(:aws_signature_version, 4)
validate_signature_version!
setup_credentials(options)
@ -515,6 +549,9 @@ module Fog
validate_signature_version!
@path_style = options[:path_style] || false
init_max_put_chunk_size!(options)
init_max_copy_chunk_size!(options)
@region = options[:region] || DEFAULT_REGION
if @endpoint = options[:endpoint]