2018-10-22 03:00:50 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-07-09 07:34:18 -04:00
|
|
|
##
|
|
|
|
# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
|
|
|
|
# source: https://gitlab.com/snippets/1685610
|
|
|
|
module Gitlab
|
|
|
|
class HttpIO
|
|
|
|
BUFFER_SIZE = 128.kilobytes
|
|
|
|
|
|
|
|
InvalidURLError = Class.new(StandardError)
|
|
|
|
FailedToGetChunkError = Class.new(StandardError)
|
|
|
|
|
|
|
|
attr_reader :uri, :size
|
|
|
|
attr_reader :tell
|
|
|
|
attr_reader :chunk, :chunk_range
|
|
|
|
|
|
|
|
alias_method :pos, :tell
|
|
|
|
|
|
|
|
def initialize(url, size)
|
|
|
|
raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
|
|
|
|
|
|
|
|
@uri = URI(url)
|
|
|
|
@size = size
|
|
|
|
@tell = 0
|
|
|
|
end
|
|
|
|
|
|
|
|
def close
|
|
|
|
# no-op
|
|
|
|
end
|
|
|
|
|
|
|
|
def binmode
|
|
|
|
# no-op
|
|
|
|
end
|
|
|
|
|
|
|
|
def binmode?
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
def path
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def url
|
|
|
|
@uri.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def seek(pos, where = IO::SEEK_SET)
|
|
|
|
new_pos =
|
|
|
|
case where
|
|
|
|
when IO::SEEK_END
|
|
|
|
size + pos
|
|
|
|
when IO::SEEK_SET
|
|
|
|
pos
|
|
|
|
when IO::SEEK_CUR
|
|
|
|
tell + pos
|
|
|
|
else
|
|
|
|
-1
|
|
|
|
end
|
|
|
|
|
|
|
|
raise 'new position is outside of file' if new_pos < 0 || new_pos > size
|
|
|
|
|
|
|
|
@tell = new_pos
|
|
|
|
end
|
|
|
|
|
|
|
|
def eof?
|
|
|
|
tell == size
|
|
|
|
end
|
|
|
|
|
|
|
|
def each_line
|
|
|
|
until eof?
|
|
|
|
line = readline
|
|
|
|
break if line.nil?
|
|
|
|
|
|
|
|
yield(line)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-10-22 03:00:50 -04:00
|
|
|
def read(length = nil, outbuf = nil)
|
|
|
|
out = []
|
2018-07-09 07:34:18 -04:00
|
|
|
|
|
|
|
length ||= size - tell
|
|
|
|
|
|
|
|
until length <= 0 || eof?
|
|
|
|
data = get_chunk
|
|
|
|
break if data.empty?
|
|
|
|
|
|
|
|
chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
|
2018-11-23 10:46:33 -05:00
|
|
|
data_slice = data.byteslice(0, chunk_bytes)
|
2018-07-09 07:34:18 -04:00
|
|
|
|
2018-11-23 10:46:33 -05:00
|
|
|
out << data_slice
|
|
|
|
@tell += data_slice.bytesize
|
|
|
|
length -= data_slice.bytesize
|
2018-07-09 07:34:18 -04:00
|
|
|
end
|
|
|
|
|
2018-10-22 03:00:50 -04:00
|
|
|
out = out.join
|
|
|
|
|
2018-07-09 07:34:18 -04:00
|
|
|
# If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
|
|
|
|
if outbuf
|
2018-10-22 03:00:50 -04:00
|
|
|
outbuf.replace(out)
|
2018-07-09 07:34:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
out
|
|
|
|
end
|
|
|
|
|
|
|
|
def readline
|
2018-10-22 03:00:50 -04:00
|
|
|
out = []
|
2018-07-09 07:34:18 -04:00
|
|
|
|
|
|
|
until eof?
|
|
|
|
data = get_chunk
|
|
|
|
new_line = data.index("\n")
|
|
|
|
|
|
|
|
if !new_line.nil?
|
|
|
|
out << data[0..new_line]
|
|
|
|
@tell += new_line + 1
|
|
|
|
break
|
|
|
|
else
|
|
|
|
out << data
|
|
|
|
@tell += data.bytesize
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-10-22 03:00:50 -04:00
|
|
|
out.join
|
2018-07-09 07:34:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def write(data)
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def truncate(offset)
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def flush
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def present?
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
##
|
|
|
|
# The below methods are not implemented in IO class
|
|
|
|
#
|
|
|
|
def in_range?
|
|
|
|
@chunk_range&.include?(tell)
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_chunk
|
|
|
|
unless in_range?
|
|
|
|
response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
|
|
|
|
http.request(request)
|
|
|
|
end
|
|
|
|
|
2022-07-22 20:09:01 -04:00
|
|
|
raise FailedToGetChunkError, "Unexpected response code: #{response.code}" unless response.code == '200' || response.code == '206'
|
2018-07-09 07:34:18 -04:00
|
|
|
|
|
|
|
@chunk = response.body.force_encoding(Encoding::BINARY)
|
|
|
|
@chunk_range = response.content_range
|
|
|
|
|
|
|
|
##
|
|
|
|
# Note: If provider does not return content_range, then we set it as we requested
|
|
|
|
# Provider: minio
|
2018-11-15 05:13:50 -05:00
|
|
|
# - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
|
|
|
|
# - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
|
2018-07-09 07:34:18 -04:00
|
|
|
# Provider: AWS
|
2018-11-15 05:13:50 -05:00
|
|
|
# - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
|
|
|
|
# - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
|
2018-07-09 07:34:18 -04:00
|
|
|
# Provider: GCS
|
2018-11-15 05:13:50 -05:00
|
|
|
# - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
|
|
|
|
# - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPOK 200
|
2018-07-09 07:34:18 -04:00
|
|
|
@chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
|
|
|
|
end
|
|
|
|
|
|
|
|
@chunk[chunk_offset..BUFFER_SIZE]
|
|
|
|
end
|
|
|
|
|
|
|
|
def request
|
2020-03-28 23:07:53 -04:00
|
|
|
Net::HTTP::Get.new(uri, { 'accept-encoding' => nil }).tap do |request|
|
2018-07-09 07:34:18 -04:00
|
|
|
request.set_range(chunk_start, BUFFER_SIZE)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def chunk_offset
|
|
|
|
tell % BUFFER_SIZE
|
|
|
|
end
|
|
|
|
|
|
|
|
def chunk_start
|
|
|
|
(tell / BUFFER_SIZE) * BUFFER_SIZE
|
|
|
|
end
|
|
|
|
|
|
|
|
def chunk_end
|
|
|
|
[chunk_start + BUFFER_SIZE, size].min
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|