1
0
Fork 0
mirror of https://github.com/puma/puma.git synced 2022-11-09 13:48:40 -05:00
puma--puma/lib/puma/client.rb

597 lines
15 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
class IO
2012-08-10 19:35:47 -07:00
# We need to use this for a jruby work around on both 1.8 and 1.9.
# So this either creates the constant (on 1.8), or harmlessly
# reopens it (on 1.9).
module WaitReadable
end
end
2012-08-10 19:35:47 -07:00
require 'puma/detect'
require 'tempfile'
require 'forwardable'
2012-08-10 19:35:47 -07:00
if Puma::IS_JRUBY
# We have to work around some OpenSSL buffer/io-readiness bugs
# so we pull it in regardless of if the user is binding
# to an SSL socket
require 'openssl'
end
module Puma
class ConnectionError < RuntimeError; end
class HttpParserError501 < IOError; end
2018-05-01 15:42:05 -05:00
# An instance of this class represents a unique request from a client.
# For example, this could be a web request from a browser or from CURL.
2018-05-01 15:42:05 -05:00
#
# An instance of `Puma::Client` can be used as if it were an IO object
# by the reactor. The reactor is expected to call `#to_io`
# on any non-IO objects it polls. For example, nio4r internally calls
# `IO::try_convert` (which may call `#to_io`) when a new socket is
# registered.
2018-05-01 15:42:05 -05:00
#
# Instances of this class are responsible for knowing if
# the header and body are fully buffered via the `try_to_finish` method.
# They can be used to "time out" a response via the `timeout_at` reader.
#
class Client
# this tests all values but the last, which must be chunked
ALLOWED_TRANSFER_ENCODING = %w[compress deflate gzip].freeze
# chunked body validation
CHUNK_SIZE_INVALID = /[^\h]/.freeze
CHUNK_VALID_ENDING = "\r\n".freeze
# Content-Length header value validation
CONTENT_LENGTH_VALUE_INVALID = /[^\d]/.freeze
TE_ERR_MSG = 'Invalid Transfer-Encoding'
2019-09-20 13:30:22 +02:00
# The object used for a request with no body. All requests with
# no body share this one object since it has no state.
EmptyBody = NullIO.new
include Puma::Const
extend Forwardable
def initialize(io, env=nil)
@io = io
@to_io = io.to_io
@proto_env = env
if !env
@env = nil
else
@env = env.dup
end
@parser = HttpParser.new
@parsed_bytes = 0
@read_header = true
@read_proxy = false
2012-07-23 14:29:33 -07:00
@ready = false
@body = nil
@body_read_start = nil
@buffer = nil
@tempfile = nil
@timeout_at = nil
@requests_served = 0
@hijacked = false
2016-01-06 10:12:09 -08:00
@peerip = nil
@peer_family = nil
@listener = nil
2016-01-06 10:12:09 -08:00
@remote_addr_header = nil
@expect_proxy_proto = false
@body_remain = 0
Fix a bug that the last CRLF of chunked body may be used in the next request (#1812) * Fix a bug that the last CRLF of chunked body may be used in the next request The last CRLF of chunked body is checked by #1607. But it's incomplete. If a client sends the last CRLF (or just LF) after Puma processes "0\r\n" line, the last CRLF (or just LF) isn't dropped in the "0\r\n" process: https://github.com/puma/puma/blob/675344e8609509b0d767ae7680436b3b382d8394/lib/puma/client.rb#L183-L192 if line.end_with?("\r\n") len = line.strip.to_i(16) if len == 0 @body.rewind rest = io.read # rest is "" with no the last CRLF case and # "\r" with no last LF case. # rest.start_with?("\r\n") returns false for # Both of these cases. rest = rest[2..-1] if rest.start_with?("\r\n") @buffer = rest.empty? ? nil : rest set_ready return true end The unprocessed last CRLF (or LF) is used as the first data in the next request. Because Puma::Client#reset sets `@parsed_bytes` to 0. https://github.com/puma/puma/blob/675344e8609509b0d767ae7680436b3b382d8394/lib/puma/client.rb#L100-L109 def reset(fast_check=true) @parsed_bytes = 0 It means that data in `@buffer` (it's "\r" in no the last LF case) and unread data in input socket (it's "\r\n" in no the last CRLF case and "\n" in no the last LF case) are used used as the first data in the next request. This change fixes these cases by the followings: * Ensures reading the last CRLF by setting `@partial_part_left` when CRLF isn't read in processing "0\r\n" line. * Introduces a `@in_last_chunk` new state to detect whether the last CRLF is waiting or not. It's reset in Puma::Client#reset. * Remove unnecessary returns https://github.com/puma/puma/pull/1812#discussion_r307806310 is the location where this rule is made. * Add missing last CRLF for chunked request in tests
2019-08-04 07:52:09 +09:00
@in_last_chunk = false
end
attr_reader :env, :to_io, :body, :io, :timeout_at, :ready, :hijacked,
:tempfile
2016-01-06 10:12:09 -08:00
attr_writer :peerip
attr_accessor :remote_addr_header, :listener
2016-01-06 10:12:09 -08:00
def_delegators :@io, :closed?
# Test to see if io meets a bare minimum of functioning, @to_io needs to be
# used for MiniSSL::Socket
def io_ok?
@to_io.is_a?(::BasicSocket) && !closed?
end
# @!attribute [r] inspect
def inspect
"#<Puma::Client:0x#{object_id.to_s(16)} @ready=#{@ready.inspect}>"
end
# For the hijack protocol (allows us to just put the Client object
# into the env)
def call
@hijacked = true
env[HIJACK_IO] ||= @io
end
# @!attribute [r] in_data_phase
2014-01-30 17:37:38 -05:00
def in_data_phase
!(@read_header || @read_proxy)
2014-01-30 17:37:38 -05:00
end
def set_timeout(val)
@timeout_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) + val
end
# Number of seconds until the timeout elapses.
def timeout
[@timeout_at - Process.clock_gettime(Process::CLOCK_MONOTONIC), 0].max
end
def reset(fast_check=true)
@parser.reset
@read_header = true
@read_proxy = !!@expect_proxy_proto
@env = @proto_env.dup
@body = nil
@tempfile = nil
@parsed_bytes = 0
2012-07-23 14:29:33 -07:00
@ready = false
@body_remain = 0
@peerip = nil if @remote_addr_header
Fix a bug that the last CRLF of chunked body may be used in the next request (#1812) * Fix a bug that the last CRLF of chunked body may be used in the next request The last CRLF of chunked body is checked by #1607. But it's incomplete. If a client sends the last CRLF (or just LF) after Puma processes "0\r\n" line, the last CRLF (or just LF) isn't dropped in the "0\r\n" process: https://github.com/puma/puma/blob/675344e8609509b0d767ae7680436b3b382d8394/lib/puma/client.rb#L183-L192 if line.end_with?("\r\n") len = line.strip.to_i(16) if len == 0 @body.rewind rest = io.read # rest is "" with no the last CRLF case and # "\r" with no last LF case. # rest.start_with?("\r\n") returns false for # Both of these cases. rest = rest[2..-1] if rest.start_with?("\r\n") @buffer = rest.empty? ? nil : rest set_ready return true end The unprocessed last CRLF (or LF) is used as the first data in the next request. Because Puma::Client#reset sets `@parsed_bytes` to 0. https://github.com/puma/puma/blob/675344e8609509b0d767ae7680436b3b382d8394/lib/puma/client.rb#L100-L109 def reset(fast_check=true) @parsed_bytes = 0 It means that data in `@buffer` (it's "\r" in no the last LF case) and unread data in input socket (it's "\r\n" in no the last CRLF case and "\n" in no the last LF case) are used used as the first data in the next request. This change fixes these cases by the followings: * Ensures reading the last CRLF by setting `@partial_part_left` when CRLF isn't read in processing "0\r\n" line. * Introduces a `@in_last_chunk` new state to detect whether the last CRLF is waiting or not. It's reset in Puma::Client#reset. * Remove unnecessary returns https://github.com/puma/puma/pull/1812#discussion_r307806310 is the location where this rule is made. * Add missing last CRLF for chunked request in tests
2019-08-04 07:52:09 +09:00
@in_last_chunk = false
if @buffer
return false unless try_to_parse_proxy_protocol
@parsed_bytes = @parser.execute(@env, @buffer, @parsed_bytes)
if @parser.finished?
return setup_body
elsif @parsed_bytes >= MAX_HEADER
raise HttpParserError,
"HEADER is longer than allowed, aborting client early."
end
return false
else
begin
if fast_check && @to_io.wait_readable(FAST_TRACK_KA_TIMEOUT)
return try_to_finish
end
rescue IOError
# swallow it
end
end
end
def close
2012-07-24 17:24:44 -07:00
begin
@io.close
rescue IOError, Errno::EBADF
Puma::Util.purge_interrupt_queue
2012-07-24 17:24:44 -07:00
end
end
# If necessary, read the PROXY protocol from the buffer. Returns
# false if more data is needed.
def try_to_parse_proxy_protocol
if @read_proxy
if @expect_proxy_proto == :v1
if @buffer.include? "\r\n"
if md = PROXY_PROTOCOL_V1_REGEX.match(@buffer)
if md[1]
@peerip = md[1].split(" ")[0]
end
@buffer = md.post_match
end
# if the buffer has a \r\n but doesn't have a PROXY protocol
# request, this is just HTTP from a non-PROXY client; move on
@read_proxy = false
return @buffer.size > 0
else
return false
end
end
end
true
end
def try_to_finish
return read_body if in_data_phase
2012-08-22 22:34:10 -07:00
begin
data = @io.read_nonblock(CHUNK_SIZE)
rescue IO::WaitReadable
2012-08-22 22:34:10 -07:00
return false
rescue EOFError
# Swallow error, don't log
rescue SystemCallError, IOError
raise ConnectionError, "Connection error detected during read"
2012-08-22 22:34:10 -07:00
end
# No data means a closed socket
unless data
@buffer = nil
set_ready
raise EOFError
end
if @buffer
@buffer << data
else
@buffer = data
end
return false unless try_to_parse_proxy_protocol
@parsed_bytes = @parser.execute(@env, @buffer, @parsed_bytes)
if @parser.finished?
return setup_body
elsif @parsed_bytes >= MAX_HEADER
raise HttpParserError,
"HEADER is longer than allowed, aborting client early."
end
2016-09-01 23:57:38 +02:00
false
end
def eagerly_finish
return true if @ready
return false unless @to_io.wait_readable(0)
try_to_finish
end
2012-07-23 14:29:33 -07:00
def finish(timeout)
return if @ready
@to_io.wait_readable(timeout) || timeout! until try_to_finish
end
def timeout!
write_error(408) if in_data_phase
raise ConnectionError
end
2019-09-20 13:30:22 +02:00
2019-09-20 13:41:58 +02:00
def write_error(status_code)
2019-09-20 13:30:22 +02:00
begin
2019-09-20 13:41:58 +02:00
@io << ERROR_RESPONSE[status_code]
2019-09-20 13:30:22 +02:00
rescue StandardError
end
end
def peerip
return @peerip if @peerip
if @remote_addr_header
hdr = (@env[@remote_addr_header] || @io.peeraddr.last).split(/[\s,]/).first
2019-09-20 13:30:22 +02:00
@peerip = hdr
return hdr
end
@peerip ||= @io.peeraddr.last
end
def peer_family
return @peer_family if @peer_family
@peer_family ||= begin
@io.local_address.afamily
rescue
Socket::AF_INET
end
end
# Returns true if the persistent connection can be closed immediately
# without waiting for the configured idle/shutdown timeout.
# @version 5.0.0
#
def can_close?
# Allow connection to close if we're not in the middle of parsing a request.
@parsed_bytes == 0
end
def expect_proxy_proto=(val)
if val
if @read_header
@read_proxy = true
end
else
@read_proxy = false
end
@expect_proxy_proto = val
end
2019-09-20 13:30:22 +02:00
private
def setup_body
@body_read_start = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
if @env[HTTP_EXPECT] == CONTINUE
# TODO allow a hook here to check the headers before
# going forward
@io << HTTP_11_100
@io.flush
end
@read_header = false
body = @parser.body
te = @env[TRANSFER_ENCODING2]
2020-05-18 16:01:53 -07:00
if te
te_lwr = te.downcase
if te.include? ','
te_ary = te_lwr.split ','
te_count = te_ary.count CHUNKED
te_valid = te_ary[0..-2].all? { |e| ALLOWED_TRANSFER_ENCODING.include? e }
if te_ary.last == CHUNKED && te_count == 1 && te_valid
@env.delete TRANSFER_ENCODING2
return setup_chunked_body body
elsif te_count >= 1
raise HttpParserError , "#{TE_ERR_MSG}, multiple chunked: '#{te}'"
elsif !te_valid
raise HttpParserError501, "#{TE_ERR_MSG}, unknown value: '#{te}'"
2020-05-18 16:01:53 -07:00
end
elsif te_lwr == CHUNKED
@env.delete TRANSFER_ENCODING2
return setup_chunked_body body
elsif ALLOWED_TRANSFER_ENCODING.include? te_lwr
raise HttpParserError , "#{TE_ERR_MSG}, single value must be chunked: '#{te}'"
else
raise HttpParserError501 , "#{TE_ERR_MSG}, unknown value: '#{te}'"
2020-05-18 16:01:53 -07:00
end
2019-09-20 13:30:22 +02:00
end
@chunked_body = false
cl = @env[CONTENT_LENGTH]
if cl
# cannot contain characters that are not \d
if cl =~ CONTENT_LENGTH_VALUE_INVALID
raise HttpParserError, "Invalid Content-Length: #{cl.inspect}"
end
else
2019-09-20 13:30:22 +02:00
@buffer = body.empty? ? nil : body
@body = EmptyBody
set_ready
return true
end
remain = cl.to_i - body.bytesize
if remain <= 0
@body = StringIO.new(body)
@buffer = nil
set_ready
return true
end
if remain > MAX_BODY
@body = Tempfile.new(Const::PUMA_TMP_BASE)
Immediately unlink temporary files (#2613) Puma has a limit (`Puma::Const::MAX_BODY` - around 110 KiB) over which it will write request bodies to disk for handing off to the application. When it does this, the request body can be left on disk if the Puma process receives SIGKILL. Consider an extremely minimal `config.ru`: run(proc { [204, {}, []] }) If we then: 1. Start `puma`, noting the process ID. 2. Start a slow file transfer, using `curl --limit-rate 100k` (for example) and `-T $PATH_TO_LARGE_FILE`. 3. Watch `$TMPDIR/puma*`. We will see Puma start to write this temporary file. If we then send SIGKILL to Puma, the file won't be cleaned up. With this patch, it will - at least on POSIX systems. On Windows it may still be available. This is suggested in the Ruby Tempfile documentation, and even uses this specific example: https://ruby-doc.org/stdlib-2.7.2/libdoc/tempfile/rdoc/Tempfile.html#class-Tempfile-label-Unlink+after+creation > On POSIX systems, it's possible to unlink a file right after creating > it, and before closing it. This removes the filesystem entry without > closing the file handle, so it ensures that only the processes that > already had the file handle open can access the file’s contents. It's > strongly recommended that you do this if you do not want any other > processes to be able to read from or write to the Tempfile, and you do > not need to know the Tempfile's filename either. > > For example, a practical use case for unlink-after-creation would be > this: you need a large byte buffer that's too large to comfortably fit > in RAM, e.g. when you're writing a web server and you want to buffer > the client's file upload data.
2021-04-27 15:35:42 +01:00
@body.unlink
2019-09-20 13:30:22 +02:00
@body.binmode
@tempfile = @body
else
# The body[0,0] trick is to get an empty string in the same
# encoding as body.
@body = StringIO.new body[0,0]
end
@body.write body
@body_remain = remain
2021-06-10 12:28:35 -06:00
false
2019-09-20 13:30:22 +02:00
end
def read_body
if @chunked_body
return read_chunked_body
end
# Read an odd sized chunk so we can read even sized ones
# after this
remain = @body_remain
if remain > CHUNK_SIZE
want = CHUNK_SIZE
else
want = remain
end
2012-08-22 22:34:10 -07:00
begin
chunk = @io.read_nonblock(want)
rescue IO::WaitReadable
2012-08-22 22:34:10 -07:00
return false
rescue SystemCallError, IOError
raise ConnectionError, "Connection error detected during read"
2012-08-22 22:34:10 -07:00
end
# No chunk means a closed socket
unless chunk
@body.close
@buffer = nil
set_ready
raise EOFError
end
remain -= @body.write(chunk)
if remain <= 0
@body.rewind
@buffer = nil
set_ready
return true
end
@body_remain = remain
false
end
2019-09-20 13:30:22 +02:00
def read_chunked_body
while true
begin
chunk = @io.read_nonblock(4096)
rescue IO::WaitReadable
return false
rescue SystemCallError, IOError
raise ConnectionError, "Connection error detected during read"
end
# No chunk means a closed socket
unless chunk
@body.close
@buffer = nil
set_ready
raise EOFError
end
if decode_chunk(chunk)
@env[CONTENT_LENGTH] = @chunked_content_length.to_s
return true
end
end
end
2019-09-20 13:30:22 +02:00
def setup_chunked_body(body)
@chunked_body = true
@partial_part_left = 0
@prev_chunk = ""
@body = Tempfile.new(Const::PUMA_TMP_BASE)
Immediately unlink temporary files (#2613) Puma has a limit (`Puma::Const::MAX_BODY` - around 110 KiB) over which it will write request bodies to disk for handing off to the application. When it does this, the request body can be left on disk if the Puma process receives SIGKILL. Consider an extremely minimal `config.ru`: run(proc { [204, {}, []] }) If we then: 1. Start `puma`, noting the process ID. 2. Start a slow file transfer, using `curl --limit-rate 100k` (for example) and `-T $PATH_TO_LARGE_FILE`. 3. Watch `$TMPDIR/puma*`. We will see Puma start to write this temporary file. If we then send SIGKILL to Puma, the file won't be cleaned up. With this patch, it will - at least on POSIX systems. On Windows it may still be available. This is suggested in the Ruby Tempfile documentation, and even uses this specific example: https://ruby-doc.org/stdlib-2.7.2/libdoc/tempfile/rdoc/Tempfile.html#class-Tempfile-label-Unlink+after+creation > On POSIX systems, it's possible to unlink a file right after creating > it, and before closing it. This removes the filesystem entry without > closing the file handle, so it ensures that only the processes that > already had the file handle open can access the file’s contents. It's > strongly recommended that you do this if you do not want any other > processes to be able to read from or write to the Tempfile, and you do > not need to know the Tempfile's filename either. > > For example, a practical use case for unlink-after-creation would be > this: you need a large byte buffer that's too large to comfortably fit > in RAM, e.g. when you're writing a web server and you want to buffer > the client's file upload data.
2021-04-27 15:35:42 +01:00
@body.unlink
2019-09-20 13:30:22 +02:00
@body.binmode
@tempfile = @body
@chunked_content_length = 0
if decode_chunk(body)
@env[CONTENT_LENGTH] = @chunked_content_length.to_s
return true
end
end
2019-09-20 13:30:22 +02:00
# @version 5.0.0
def write_chunk(str)
@chunked_content_length += @body.write(str)
end
2019-09-20 13:30:22 +02:00
def decode_chunk(chunk)
if @partial_part_left > 0
if @partial_part_left <= chunk.size
if @partial_part_left > 2
write_chunk(chunk[0..(@partial_part_left-3)]) # skip the \r\n
2019-09-20 13:30:22 +02:00
end
chunk = chunk[@partial_part_left..-1]
@partial_part_left = 0
else
2020-07-30 16:07:55 +02:00
if @partial_part_left > 2
if @partial_part_left == chunk.size + 1
# Don't include the last \r
write_chunk(chunk[0..(@partial_part_left-3)])
else
# don't include the last \r\n
write_chunk(chunk)
end
end
2019-09-20 13:30:22 +02:00
@partial_part_left -= chunk.size
return false
end
2014-01-30 13:23:01 -05:00
end
2019-09-20 13:30:22 +02:00
if @prev_chunk.empty?
io = StringIO.new(chunk)
else
io = StringIO.new(@prev_chunk+chunk)
@prev_chunk = ""
end
2016-01-06 10:12:09 -08:00
2019-09-20 13:30:22 +02:00
while !io.eof?
line = io.gets
if line.end_with?("\r\n")
# Puma doesn't process chunk extensions, but should parse if they're
# present, which is the reason for the semicolon regex
chunk_hex = line.strip[/\A[^;]+/]
if chunk_hex =~ CHUNK_SIZE_INVALID
raise HttpParserError, "Invalid chunk size: '#{chunk_hex}'"
end
len = chunk_hex.to_i(16)
2019-09-20 13:30:22 +02:00
if len == 0
@in_last_chunk = true
@body.rewind
rest = io.read
last_crlf_size = "\r\n".bytesize
if rest.bytesize < last_crlf_size
@buffer = nil
@partial_part_left = last_crlf_size - rest.bytesize
return false
else
@buffer = rest[last_crlf_size..-1]
@buffer = nil if @buffer.empty?
set_ready
return true
end
end
2016-01-06 10:12:09 -08:00
2019-09-20 13:30:22 +02:00
len += 2
part = io.read(len)
unless part
@partial_part_left = len
next
end
got = part.size
case
when got == len
# proper chunked segment must end with "\r\n"
if part.end_with? CHUNK_VALID_ENDING
write_chunk(part[0..-3]) # to skip the ending \r\n
else
raise HttpParserError, "Chunk size mismatch"
end
2019-09-20 13:30:22 +02:00
when got <= len - 2
write_chunk(part)
2019-09-20 13:30:22 +02:00
@partial_part_left = len - part.size
when got == len - 1 # edge where we get just \r but not \n
write_chunk(part[0..-2])
2019-09-20 13:30:22 +02:00
@partial_part_left = len - part.size
end
else
@prev_chunk = line
return false
end
2016-01-06 10:12:09 -08:00
end
2019-09-20 13:30:22 +02:00
if @in_last_chunk
set_ready
true
else
false
end
end
def set_ready
if @body_read_start
@env['puma.request_body_wait'] = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - @body_read_start
end
@requests_served += 1
@ready = true
2016-01-06 10:12:09 -08:00
end
end
end