mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
90ccc5674a
The ignore_eof setting on HTTPResponse makes it so an EOFError is raised when reading bodies with a defined Content-Length, if the body read was truncated due to the socket be closed. The ignore_eof setting on HTTP sets the values used in responses that are created by the object. For backwards compatibility, the default is for both settings is true. However, unless you are specifically tested for and handling truncated responses, it's a good idea to set ignore_eof to false so that errors are raised for truncated responses, instead of those errors silently being ignored. Fixes [Bug #14972] https://github.com/ruby/net-http/commit/4d47e34995
602 lines
15 KiB
Ruby
602 lines
15 KiB
Ruby
# frozen_string_literal: false
|
|
# HTTP response class.
|
|
#
|
|
# This class wraps together the response header and the response body (the
|
|
# entity requested).
|
|
#
|
|
# It mixes in the HTTPHeader module, which provides access to response
|
|
# header values both via hash-like methods and via individual readers.
|
|
#
|
|
# Note that each possible HTTP response code defines its own
|
|
# HTTPResponse subclass. All classes are defined under the Net module.
|
|
# Indentation indicates inheritance. For a list of the classes see Net::HTTP.
|
|
#
|
|
# Correspondence <code>HTTP code => class</code> is stored in CODE_TO_OBJ
|
|
# constant:
|
|
#
|
|
# Net::HTTPResponse::CODE_TO_OBJ['404'] #=> Net::HTTPNotFound
|
|
#
|
|
class Net::HTTPResponse
|
|
class << self
|
|
# true if the response has a body.
|
|
def body_permitted?
|
|
self::HAS_BODY
|
|
end
|
|
|
|
def exception_type # :nodoc: internal use only
|
|
self::EXCEPTION_TYPE
|
|
end
|
|
|
|
def read_new(sock) #:nodoc: internal use only
|
|
httpv, code, msg = read_status_line(sock)
|
|
res = response_class(code).new(httpv, code, msg)
|
|
each_response_header(sock) do |k,v|
|
|
res.add_field k, v
|
|
end
|
|
res
|
|
end
|
|
|
|
private
|
|
|
|
def read_status_line(sock)
|
|
str = sock.readline
|
|
m = /\AHTTP(?:\/(\d+\.\d+))?\s+(\d\d\d)(?:\s+(.*))?\z/in.match(str) or
|
|
raise Net::HTTPBadResponse, "wrong status line: #{str.dump}"
|
|
m.captures
|
|
end
|
|
|
|
def response_class(code)
|
|
CODE_TO_OBJ[code] or
|
|
CODE_CLASS_TO_OBJ[code[0,1]] or
|
|
Net::HTTPUnknownResponse
|
|
end
|
|
|
|
def each_response_header(sock)
|
|
key = value = nil
|
|
while true
|
|
line = sock.readuntil("\n", true).sub(/\s+\z/, '')
|
|
break if line.empty?
|
|
if line[0] == ?\s or line[0] == ?\t and value
|
|
value << ' ' unless value.empty?
|
|
value << line.strip
|
|
else
|
|
yield key, value if key
|
|
key, value = line.strip.split(/\s*:\s*/, 2)
|
|
raise Net::HTTPBadResponse, 'wrong header line format' if value.nil?
|
|
end
|
|
end
|
|
yield key, value if key
|
|
end
|
|
end
|
|
|
|
# next is to fix bug in RDoc, where the private inside class << self
|
|
# spills out.
|
|
public
|
|
|
|
include Net::HTTPHeader
|
|
|
|
def initialize(httpv, code, msg) #:nodoc: internal use only
|
|
@http_version = httpv
|
|
@code = code
|
|
@message = msg
|
|
initialize_http_header nil
|
|
@body = nil
|
|
@read = false
|
|
@uri = nil
|
|
@decode_content = false
|
|
@body_encoding = false
|
|
@ignore_eof = true
|
|
end
|
|
|
|
# The HTTP version supported by the server.
|
|
attr_reader :http_version
|
|
|
|
# The HTTP result code string. For example, '302'. You can also
|
|
# determine the response type by examining which response subclass
|
|
# the response object is an instance of.
|
|
attr_reader :code
|
|
|
|
# The HTTP result message sent by the server. For example, 'Not Found'.
|
|
attr_reader :message
|
|
alias msg message # :nodoc: obsolete
|
|
|
|
# The URI used to fetch this response. The response URI is only available
|
|
# if a URI was used to create the request.
|
|
attr_reader :uri
|
|
|
|
# Set to true automatically when the request did not contain an
|
|
# Accept-Encoding header from the user.
|
|
attr_accessor :decode_content
|
|
|
|
# The encoding to use for the response body. If Encoding, use that encoding.
|
|
# If other true value, attempt to detect the appropriate encoding, and use
|
|
# that.
|
|
attr_reader :body_encoding
|
|
|
|
# Set the encoding to use for the response body. If given a String, find
|
|
# the related Encoding.
|
|
def body_encoding=(value)
|
|
value = Encoding.find(value) if value.is_a?(String)
|
|
@body_encoding = value
|
|
end
|
|
|
|
# Whether to ignore EOF when reading bodies with a specified Content-Length
|
|
# header.
|
|
attr_accessor :ignore_eof
|
|
|
|
def inspect
|
|
"#<#{self.class} #{@code} #{@message} readbody=#{@read}>"
|
|
end
|
|
|
|
#
|
|
# response <-> exception relationship
|
|
#
|
|
|
|
def code_type #:nodoc:
|
|
self.class
|
|
end
|
|
|
|
def error! #:nodoc:
|
|
message = @code
|
|
message += ' ' + @message.dump if @message
|
|
raise error_type().new(message, self)
|
|
end
|
|
|
|
def error_type #:nodoc:
|
|
self.class::EXCEPTION_TYPE
|
|
end
|
|
|
|
# Raises an HTTP error if the response is not 2xx (success).
|
|
def value
|
|
error! unless self.kind_of?(Net::HTTPSuccess)
|
|
end
|
|
|
|
def uri= uri # :nodoc:
|
|
@uri = uri.dup if uri
|
|
end
|
|
|
|
#
|
|
# header (for backward compatibility only; DO NOT USE)
|
|
#
|
|
|
|
def response #:nodoc:
|
|
warn "Net::HTTPResponse#response is obsolete", uplevel: 1 if $VERBOSE
|
|
self
|
|
end
|
|
|
|
def header #:nodoc:
|
|
warn "Net::HTTPResponse#header is obsolete", uplevel: 1 if $VERBOSE
|
|
self
|
|
end
|
|
|
|
def read_header #:nodoc:
|
|
warn "Net::HTTPResponse#read_header is obsolete", uplevel: 1 if $VERBOSE
|
|
self
|
|
end
|
|
|
|
#
|
|
# body
|
|
#
|
|
|
|
def reading_body(sock, reqmethodallowbody) #:nodoc: internal use only
|
|
@socket = sock
|
|
@body_exist = reqmethodallowbody && self.class.body_permitted?
|
|
begin
|
|
yield
|
|
self.body # ensure to read body
|
|
ensure
|
|
@socket = nil
|
|
end
|
|
end
|
|
|
|
# Gets the entity body returned by the remote HTTP server.
|
|
#
|
|
# If a block is given, the body is passed to the block, and
|
|
# the body is provided in fragments, as it is read in from the socket.
|
|
#
|
|
# If +dest+ argument is given, response is read into that variable,
|
|
# with <code>dest#<<</code> method (it could be String or IO, or any
|
|
# other object responding to <code><<</code>).
|
|
#
|
|
# Calling this method a second or subsequent time for the same
|
|
# HTTPResponse object will return the value already read.
|
|
#
|
|
# http.request_get('/index.html') {|res|
|
|
# puts res.read_body
|
|
# }
|
|
#
|
|
# http.request_get('/index.html') {|res|
|
|
# p res.read_body.object_id # 538149362
|
|
# p res.read_body.object_id # 538149362
|
|
# }
|
|
#
|
|
# # using iterator
|
|
# http.request_get('/index.html') {|res|
|
|
# res.read_body do |segment|
|
|
# print segment
|
|
# end
|
|
# }
|
|
#
|
|
def read_body(dest = nil, &block)
|
|
if @read
|
|
raise IOError, "#{self.class}\#read_body called twice" if dest or block
|
|
return @body
|
|
end
|
|
to = procdest(dest, block)
|
|
stream_check
|
|
if @body_exist
|
|
read_body_0 to
|
|
@body = to
|
|
else
|
|
@body = nil
|
|
end
|
|
@read = true
|
|
|
|
case enc = @body_encoding
|
|
when Encoding, false, nil
|
|
# Encoding: force given encoding
|
|
# false/nil: do not force encoding
|
|
else
|
|
# other value: detect encoding from body
|
|
enc = detect_encoding(@body)
|
|
end
|
|
|
|
@body.force_encoding(enc) if enc
|
|
|
|
@body
|
|
end
|
|
|
|
# Returns the full entity body.
|
|
#
|
|
# Calling this method a second or subsequent time will return the
|
|
# string already read.
|
|
#
|
|
# http.request_get('/index.html') {|res|
|
|
# puts res.body
|
|
# }
|
|
#
|
|
# http.request_get('/index.html') {|res|
|
|
# p res.body.object_id # 538149362
|
|
# p res.body.object_id # 538149362
|
|
# }
|
|
#
|
|
def body
|
|
read_body()
|
|
end
|
|
|
|
# Because it may be necessary to modify the body, Eg, decompression
|
|
# this method facilitates that.
|
|
def body=(value)
|
|
@body = value
|
|
end
|
|
|
|
alias entity body #:nodoc: obsolete
|
|
|
|
private
|
|
|
|
# :nodoc:
|
|
def detect_encoding(str, encoding=nil)
|
|
if encoding
|
|
elsif encoding = type_params['charset']
|
|
elsif encoding = check_bom(str)
|
|
else
|
|
encoding = case content_type&.downcase
|
|
when %r{text/x(?:ht)?ml|application/(?:[^+]+\+)?xml}
|
|
/\A<xml[ \t\r\n]+
|
|
version[ \t\r\n]*=[ \t\r\n]*(?:"[0-9.]+"|'[0-9.]*')[ \t\r\n]+
|
|
encoding[ \t\r\n]*=[ \t\r\n]*
|
|
(?:"([A-Za-z][\-A-Za-z0-9._]*)"|'([A-Za-z][\-A-Za-z0-9._]*)')/x =~ str
|
|
encoding = $1 || $2 || Encoding::UTF_8
|
|
when %r{text/html.*}
|
|
sniff_encoding(str)
|
|
end
|
|
end
|
|
return encoding
|
|
end
|
|
|
|
# :nodoc:
|
|
def sniff_encoding(str, encoding=nil)
|
|
# the encoding sniffing algorithm
|
|
# http://www.w3.org/TR/html5/parsing.html#determining-the-character-encoding
|
|
if enc = scanning_meta(str)
|
|
enc
|
|
# 6. last visited page or something
|
|
# 7. frequency
|
|
elsif str.ascii_only?
|
|
Encoding::US_ASCII
|
|
elsif str.dup.force_encoding(Encoding::UTF_8).valid_encoding?
|
|
Encoding::UTF_8
|
|
end
|
|
# 8. implementation-defined or user-specified
|
|
end
|
|
|
|
# :nodoc:
|
|
def check_bom(str)
|
|
case str.byteslice(0, 2)
|
|
when "\xFE\xFF"
|
|
return Encoding::UTF_16BE
|
|
when "\xFF\xFE"
|
|
return Encoding::UTF_16LE
|
|
end
|
|
if "\xEF\xBB\xBF" == str.byteslice(0, 3)
|
|
return Encoding::UTF_8
|
|
end
|
|
nil
|
|
end
|
|
|
|
# :nodoc:
|
|
def scanning_meta(str)
|
|
require 'strscan'
|
|
ss = StringScanner.new(str)
|
|
if ss.scan_until(/<meta[\t\n\f\r ]*/)
|
|
attrs = {} # attribute_list
|
|
got_pragma = false
|
|
need_pragma = nil
|
|
charset = nil
|
|
|
|
# step: Attributes
|
|
while attr = get_attribute(ss)
|
|
name, value = *attr
|
|
next if attrs[name]
|
|
attrs[name] = true
|
|
case name
|
|
when 'http-equiv'
|
|
got_pragma = true if value == 'content-type'
|
|
when 'content'
|
|
encoding = extracting_encodings_from_meta_elements(value)
|
|
unless charset
|
|
charset = encoding
|
|
end
|
|
need_pragma = true
|
|
when 'charset'
|
|
need_pragma = false
|
|
charset = value
|
|
end
|
|
end
|
|
|
|
# step: Processing
|
|
return if need_pragma.nil?
|
|
return if need_pragma && !got_pragma
|
|
|
|
charset = Encoding.find(charset) rescue nil
|
|
return unless charset
|
|
charset = Encoding::UTF_8 if charset == Encoding::UTF_16
|
|
return charset # tentative
|
|
end
|
|
nil
|
|
end
|
|
|
|
def get_attribute(ss)
|
|
ss.scan(/[\t\n\f\r \/]*/)
|
|
if ss.peek(1) == '>'
|
|
ss.getch
|
|
return nil
|
|
end
|
|
name = ss.scan(/[^=\t\n\f\r \/>]*/)
|
|
name.downcase!
|
|
raise if name.empty?
|
|
ss.skip(/[\t\n\f\r ]*/)
|
|
if ss.getch != '='
|
|
value = ''
|
|
return [name, value]
|
|
end
|
|
ss.skip(/[\t\n\f\r ]*/)
|
|
case ss.peek(1)
|
|
when '"'
|
|
ss.getch
|
|
value = ss.scan(/[^"]+/)
|
|
value.downcase!
|
|
ss.getch
|
|
when "'"
|
|
ss.getch
|
|
value = ss.scan(/[^']+/)
|
|
value.downcase!
|
|
ss.getch
|
|
when '>'
|
|
value = ''
|
|
else
|
|
value = ss.scan(/[^\t\n\f\r >]+/)
|
|
value.downcase!
|
|
end
|
|
[name, value]
|
|
end
|
|
|
|
def extracting_encodings_from_meta_elements(value)
|
|
# http://dev.w3.org/html5/spec/fetching-resources.html#algorithm-for-extracting-an-encoding-from-a-meta-element
|
|
if /charset[\t\n\f\r ]*=(?:"([^"]*)"|'([^']*)'|["']|\z|([^\t\n\f\r ;]+))/i =~ value
|
|
return $1 || $2 || $3
|
|
end
|
|
return nil
|
|
end
|
|
|
|
##
|
|
# Checks for a supported Content-Encoding header and yields an Inflate
|
|
# wrapper for this response's socket when zlib is present. If the
|
|
# Content-Encoding is not supported or zlib is missing, the plain socket is
|
|
# yielded.
|
|
#
|
|
# If a Content-Range header is present, a plain socket is yielded as the
|
|
# bytes in the range may not be a complete deflate block.
|
|
|
|
def inflater # :nodoc:
|
|
return yield @socket unless Net::HTTP::HAVE_ZLIB
|
|
return yield @socket unless @decode_content
|
|
return yield @socket if self['content-range']
|
|
|
|
v = self['content-encoding']
|
|
case v&.downcase
|
|
when 'deflate', 'gzip', 'x-gzip' then
|
|
self.delete 'content-encoding'
|
|
|
|
inflate_body_io = Inflater.new(@socket)
|
|
|
|
begin
|
|
yield inflate_body_io
|
|
success = true
|
|
ensure
|
|
begin
|
|
inflate_body_io.finish
|
|
if self['content-length']
|
|
self['content-length'] = inflate_body_io.bytes_inflated.to_s
|
|
end
|
|
rescue => err
|
|
# Ignore #finish's error if there is an exception from yield
|
|
raise err if success
|
|
end
|
|
end
|
|
when 'none', 'identity' then
|
|
self.delete 'content-encoding'
|
|
|
|
yield @socket
|
|
else
|
|
yield @socket
|
|
end
|
|
end
|
|
|
|
def read_body_0(dest)
|
|
inflater do |inflate_body_io|
|
|
if chunked?
|
|
read_chunked dest, inflate_body_io
|
|
return
|
|
end
|
|
|
|
@socket = inflate_body_io
|
|
|
|
clen = content_length()
|
|
if clen
|
|
@socket.read clen, dest, @ignore_eof
|
|
return
|
|
end
|
|
clen = range_length()
|
|
if clen
|
|
@socket.read clen, dest
|
|
return
|
|
end
|
|
@socket.read_all dest
|
|
end
|
|
end
|
|
|
|
##
|
|
# read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF,
|
|
# etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip
|
|
# encoded.
|
|
#
|
|
# See RFC 2616 section 3.6.1 for definitions
|
|
|
|
def read_chunked(dest, chunk_data_io) # :nodoc:
|
|
total = 0
|
|
while true
|
|
line = @socket.readline
|
|
hexlen = line.slice(/[0-9a-fA-F]+/) or
|
|
raise Net::HTTPBadResponse, "wrong chunk size line: #{line}"
|
|
len = hexlen.hex
|
|
break if len == 0
|
|
begin
|
|
chunk_data_io.read len, dest
|
|
ensure
|
|
total += len
|
|
@socket.read 2 # \r\n
|
|
end
|
|
end
|
|
until @socket.readline.empty?
|
|
# none
|
|
end
|
|
end
|
|
|
|
def stream_check
|
|
raise IOError, 'attempt to read body out of block' if @socket.closed?
|
|
end
|
|
|
|
def procdest(dest, block)
|
|
raise ArgumentError, 'both arg and block given for HTTP method' if
|
|
dest and block
|
|
if block
|
|
Net::ReadAdapter.new(block)
|
|
else
|
|
dest || ''
|
|
end
|
|
end
|
|
|
|
##
|
|
# Inflater is a wrapper around Net::BufferedIO that transparently inflates
|
|
# zlib and gzip streams.
|
|
|
|
class Inflater # :nodoc:
|
|
|
|
##
|
|
# Creates a new Inflater wrapping +socket+
|
|
|
|
def initialize socket
|
|
@socket = socket
|
|
# zlib with automatic gzip detection
|
|
@inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS)
|
|
end
|
|
|
|
##
|
|
# Finishes the inflate stream.
|
|
|
|
def finish
|
|
return if @inflate.total_in == 0
|
|
@inflate.finish
|
|
end
|
|
|
|
##
|
|
# The number of bytes inflated, used to update the Content-Length of
|
|
# the response.
|
|
|
|
def bytes_inflated
|
|
@inflate.total_out
|
|
end
|
|
|
|
##
|
|
# Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
|
|
#
|
|
# This allows a large response body to be inflated without storing the
|
|
# entire body in memory.
|
|
|
|
def inflate_adapter(dest)
|
|
if dest.respond_to?(:set_encoding)
|
|
dest.set_encoding(Encoding::ASCII_8BIT)
|
|
elsif dest.respond_to?(:force_encoding)
|
|
dest.force_encoding(Encoding::ASCII_8BIT)
|
|
end
|
|
block = proc do |compressed_chunk|
|
|
@inflate.inflate(compressed_chunk) do |chunk|
|
|
compressed_chunk.clear
|
|
dest << chunk
|
|
end
|
|
end
|
|
|
|
Net::ReadAdapter.new(block)
|
|
end
|
|
|
|
##
|
|
# Reads +clen+ bytes from the socket, inflates them, then writes them to
|
|
# +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read
|
|
#
|
|
# Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes.
|
|
# At this time there is no way for a user of Net::HTTPResponse to read a
|
|
# specific number of bytes from the HTTP response body, so this internal
|
|
# API does not return the same number of bytes as were requested.
|
|
#
|
|
# See https://bugs.ruby-lang.org/issues/6492 for further discussion.
|
|
|
|
def read clen, dest, ignore_eof = false
|
|
temp_dest = inflate_adapter(dest)
|
|
|
|
@socket.read clen, temp_dest, ignore_eof
|
|
end
|
|
|
|
##
|
|
# Reads the rest of the socket, inflates it, then writes it to +dest+.
|
|
|
|
def read_all dest
|
|
temp_dest = inflate_adapter(dest)
|
|
|
|
@socket.read_all temp_dest
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|