2021-05-20 14:10:33 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-01-18 07:16:49 -05:00
|
|
|
# File Download Service allows remote file download into tmp directory.
|
|
|
|
#
|
|
|
|
# @param configuration [BulkImports::Configuration] Config object containing url and access token
|
|
|
|
# @param relative_url [String] Relative URL to download the file from
|
|
|
|
# @param tmpdir [String] Temp directory to store downloaded file to. Must be located under `Dir.tmpdir`.
|
|
|
|
# @param file_size_limit [Integer] Maximum allowed file size
|
|
|
|
# @param allowed_content_types [Array<String>] Allowed file content types
|
|
|
|
# @param filename [String] Name of the file to download, if known. Use remote filename if none given.
|
2021-05-20 14:10:33 -04:00
|
|
|
module BulkImports
|
|
|
|
class FileDownloadService
|
2022-08-30 23:10:23 -04:00
|
|
|
include ::BulkImports::FileDownloads::FilenameFetch
|
|
|
|
include ::BulkImports::FileDownloads::Validations
|
|
|
|
|
2021-05-20 14:10:33 -04:00
|
|
|
ServiceError = Class.new(StandardError)
|
|
|
|
|
2021-10-19 14:13:24 -04:00
|
|
|
DEFAULT_FILE_SIZE_LIMIT = 5.gigabytes
|
|
|
|
DEFAULT_ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
|
|
|
|
|
|
|
|
def initialize(
|
|
|
|
configuration:,
|
|
|
|
relative_url:,
|
2022-01-18 07:16:49 -05:00
|
|
|
tmpdir:,
|
2021-10-19 14:13:24 -04:00
|
|
|
file_size_limit: DEFAULT_FILE_SIZE_LIMIT,
|
|
|
|
allowed_content_types: DEFAULT_ALLOWED_CONTENT_TYPES,
|
|
|
|
filename: nil)
|
2021-05-20 14:10:33 -04:00
|
|
|
@configuration = configuration
|
|
|
|
@relative_url = relative_url
|
|
|
|
@filename = filename
|
2022-01-18 07:16:49 -05:00
|
|
|
@tmpdir = tmpdir
|
2021-06-22 05:07:12 -04:00
|
|
|
@file_size_limit = file_size_limit
|
|
|
|
@allowed_content_types = allowed_content_types
|
2021-05-20 14:10:33 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
2022-01-18 07:16:49 -05:00
|
|
|
validate_tmpdir
|
|
|
|
validate_filepath
|
2021-05-20 14:10:33 -04:00
|
|
|
validate_url
|
|
|
|
validate_content_type
|
|
|
|
validate_content_length
|
|
|
|
|
|
|
|
download_file
|
|
|
|
|
|
|
|
validate_symlink
|
|
|
|
|
|
|
|
filepath
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2022-01-18 07:16:49 -05:00
|
|
|
attr_reader :configuration, :relative_url, :tmpdir, :file_size_limit, :allowed_content_types
|
2021-05-20 14:10:33 -04:00
|
|
|
|
|
|
|
def download_file
|
|
|
|
File.open(filepath, 'wb') do |file|
|
|
|
|
bytes_downloaded = 0
|
|
|
|
|
|
|
|
http_client.stream(relative_url) do |chunk|
|
2022-08-05 05:12:06 -04:00
|
|
|
next if bytes_downloaded == 0 && [301, 302, 303, 307, 308].include?(chunk.code)
|
|
|
|
|
2021-05-20 14:10:33 -04:00
|
|
|
bytes_downloaded += chunk.size
|
|
|
|
|
2021-06-22 05:07:12 -04:00
|
|
|
validate_size!(bytes_downloaded)
|
2021-05-20 14:10:33 -04:00
|
|
|
|
2022-08-05 05:12:06 -04:00
|
|
|
if chunk.code == 200
|
|
|
|
file.write(chunk)
|
|
|
|
else
|
|
|
|
raise(ServiceError, "File download error #{chunk.code}")
|
|
|
|
end
|
2021-05-20 14:10:33 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
rescue StandardError => e
|
|
|
|
File.delete(filepath) if File.exist?(filepath)
|
|
|
|
|
|
|
|
raise e
|
|
|
|
end
|
|
|
|
|
2022-08-30 23:10:23 -04:00
|
|
|
def raise_error(message)
|
|
|
|
raise ServiceError, message
|
|
|
|
end
|
|
|
|
|
2021-05-20 14:10:33 -04:00
|
|
|
def http_client
|
2021-06-11 11:09:58 -04:00
|
|
|
@http_client ||= BulkImports::Clients::HTTP.new(
|
2021-07-08 08:08:30 -04:00
|
|
|
url: configuration.url,
|
2021-05-20 14:10:33 -04:00
|
|
|
token: configuration.access_token
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def allow_local_requests?
|
|
|
|
::Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
|
|
|
|
end
|
|
|
|
|
2022-08-30 23:10:23 -04:00
|
|
|
def response_headers
|
|
|
|
@response_headers ||= http_client.head(relative_url).headers
|
2022-01-18 07:16:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def validate_tmpdir
|
|
|
|
Gitlab::Utils.check_allowed_absolute_path!(tmpdir, [Dir.tmpdir])
|
2021-05-20 14:10:33 -04:00
|
|
|
end
|
|
|
|
|
2022-08-30 23:10:23 -04:00
|
|
|
def filepath
|
|
|
|
@filepath ||= File.join(@tmpdir, filename)
|
|
|
|
end
|
2021-05-20 14:10:33 -04:00
|
|
|
|
2022-08-30 23:10:23 -04:00
|
|
|
def filename
|
|
|
|
@filename.presence || remote_filename
|
2021-05-20 14:10:33 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def validate_url
|
|
|
|
::Gitlab::UrlBlocker.validate!(
|
|
|
|
http_client.resource_url(relative_url),
|
|
|
|
allow_localhost: allow_local_requests?,
|
|
|
|
allow_local_network: allow_local_requests?,
|
|
|
|
schemes: %w(http https)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|