1
0
Fork 0
mirror of https://github.com/puma/puma.git synced 2022-11-09 13:48:40 -05:00
puma--puma/lib/mongrel.rb
zedshaw b3c3a3b7a8 sendfile is banished until it can get sorted out
git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@301 19e92222-5c0b-0410-8929-a290d50e31e9
2006-08-03 21:28:25 +00:00

731 lines
27 KiB
Ruby

# Copyright (c) 2005 Zed A. Shaw
# You can redistribute it and/or modify it under the same terms as Ruby.
#
# Additional work donated by contributors. See http://mongrel.rubyforge.org/attributions.html
# for more information.
require 'socket'
require 'http11'
require 'tempfile'
require 'thread'
require 'stringio'
require 'mongrel/cgi'
require 'mongrel/handlers'
require 'mongrel/command'
require 'mongrel/tcphack'
require 'yaml'
require 'mongrel/configurator'
require 'time'
require 'rubygems'
require 'etc'
# Mongrel module containing all of the classes (include C extensions) for running
# a Mongrel web server. It contains a minimalist HTTP server with just enough
# functionality to service web application requests fast as possible.
module Mongrel
class URIClassifier
attr_reader :handler_map
# Returns the URIs that have been registered with this classifier so far.
# The URIs returned should not be modified as this will cause a memory leak.
# You can use this to inspect the contents of the URIClassifier.
def uris
@handler_map.keys
end
# Simply does an inspect that looks like a Hash inspect.
def inspect
@handler_map.inspect
end
end
# Used to stop the HttpServer via Thread.raise.
class StopServer < Exception; end
# Thrown at a thread when it is timed out.
class TimeoutError < Exception; end
# Every standard HTTP code mapped to the appropriate message. These are
# used so frequently that they are placed directly in Mongrel for easy
# access rather than Mongrel::Const.
HTTP_STATUS_CODES = {
100 => 'Continue',
101 => 'Switching Protocols',
200 => 'OK',
201 => 'Created',
202 => 'Accepted',
203 => 'Non-Authoritative Information',
204 => 'No Content',
205 => 'Reset Content',
206 => 'Partial Content',
300 => 'Multiple Choices',
301 => 'Moved Permanently',
302 => 'Moved Temporarily',
303 => 'See Other',
304 => 'Not Modified',
305 => 'Use Proxy',
400 => 'Bad Request',
401 => 'Unauthorized',
402 => 'Payment Required',
403 => 'Forbidden',
404 => 'Not Found',
405 => 'Method Not Allowed',
406 => 'Not Acceptable',
407 => 'Proxy Authentication Required',
408 => 'Request Time-out',
409 => 'Conflict',
410 => 'Gone',
411 => 'Length Required',
412 => 'Precondition Failed',
413 => 'Request Entity Too Large',
414 => 'Request-URI Too Large',
415 => 'Unsupported Media Type',
500 => 'Internal Server Error',
501 => 'Not Implemented',
502 => 'Bad Gateway',
503 => 'Service Unavailable',
504 => 'Gateway Time-out',
505 => 'HTTP Version not supported'
}
# Frequently used constants when constructing requests or responses. Many times
# the constant just refers to a string with the same contents. Using these constants
# gave about a 3% to 10% performance improvement over using the strings directly.
# Symbols did not really improve things much compared to constants.
#
# While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
# REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
# too taxing on performance.
module Const
DATE = "Date".freeze
# This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this.
PATH_INFO="PATH_INFO".freeze
# This is the initial part that your handler is identified as by URIClassifier.
SCRIPT_NAME="SCRIPT_NAME".freeze
# The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
REQUEST_URI='REQUEST_URI'.freeze
MONGREL_VERSION="0.3.13.4".freeze
# TODO: this use of a base for tempfiles needs to be looked at for security problems
MONGREL_TMP_BASE="mongrel".freeze
# The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel #{MONGREL_VERSION}\r\n\r\nNOT FOUND".freeze
CONTENT_LENGTH="CONTENT_LENGTH".freeze
# A common header for indicating the server is too busy. Not used yet.
ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
# The basic max request size we'll try to read.
CHUNK_SIZE=(16 * 1024)
# This is the maximum header that is allowed before a client is booted. The parser detects
# this, but we'd also like to do this as well.
MAX_HEADER=1024 * (80 + 32)
# Maximum request body size before it is moved out of memory and into a tempfile for reading.
MAX_BODY=MAX_HEADER
# A frozen format for this is about 15% faster
STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze
CONTENT_TYPE = "Content-Type".freeze
LAST_MODIFIED = "Last-Modified".freeze
ETAG = "ETag".freeze
SLASH = "/".freeze
REQUEST_METHOD="REQUEST_METHOD".freeze
GET="GET".freeze
HEAD="HEAD".freeze
# ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
ETAG_FORMAT="\"%x-%x-%x\"".freeze
HEADER_FORMAT="%s: %s\r\n".freeze
LINE_END="\r\n".freeze
REMOTE_ADDR="REMOTE_ADDR".freeze
HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
end
# Basically a Hash with one extra parameter for the HTTP body, mostly used internally.
class HttpParams < Hash
attr_accessor :http_body
end
# When a handler is found for a registered URI then this class is constructed
# and passed to your HttpHandler::process method. You should assume that
# *one* handler processes all requests. Included in the HttpRequest is a
# HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body
# which is a string containing the request body (raw for now).
#
# The HttpRequest.initialize method will convert any request that is larger than
# Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses
# a StringIO object. To be safe, you should assume it works like a file.
#
# The HttpHandler.request_notify system is implemented by having HttpRequest call
# HttpHandler.request_begins, HttpHandler.request_progress, HttpHandler.process during
# the IO processing. This adds a small amount of overhead but lets you implement
# finer controlled handlers and filters.
class HttpRequest
attr_reader :body, :params
# You don't really call this. It's made for you.
# Main thing it does is hook up the params, and store any remaining
# body data into the HttpRequest.body attribute.
#
# TODO: Implement tempfile removal when the request is done.
def initialize(params, socket, dispatcher)
@params = params
@socket = socket
content_length = params[Const::CONTENT_LENGTH].to_i
remain = content_length - params.http_body.length
dispatcher.request_begins(params) if dispatcher
if remain == 0
# we've got everything, pack it up
@body = StringIO.new
@body.write params.http_body
dispatcher.request_progress(params, 0, content_length) if dispatcher
elsif remain < 0
# ERROR, they're sending bad requests
raise HttpParserError.new("Sent body size #{params.http_body.length} but declared Content-Length: #{content_length}")
elsif remain > 0
# must read more data to complete body
if remain > Const::MAX_BODY
# huge body, put it in a tempfile
@body = Tempfile.new(Const::MONGREL_TMP_BASE)
@body.binmode
else
# small body, just use that
@body = StringIO.new
end
@body.write params.http_body
read_body(remain, content_length, dispatcher)
end
@body.rewind
end
# Does the heavy lifting of properly reading the larger body requests in
# small chunks. It expects @body to be an IO object, @socket to be valid,
# and will set @body = nil if the request fails. It also expects any initial
# part of the body that has been read to be in the @body already.
def read_body(remain, total, dispatcher)
begin
# write the odd sized chunk first
remain -= @body.write(@socket.read(remain % Const::CHUNK_SIZE))
dispatcher.request_progress(params, remain, total) if dispatcher
# then stream out nothing but perfectly sized chunks
until remain <= 0 or @socket.closed?
data = @socket.read(Const::CHUNK_SIZE)
# have to do it this way since @socket.eof? causes it to block
raise "Socket closed or read failure" if not data or data.length != Const::CHUNK_SIZE
remain -= @body.write(data)
# ASSUME: we are writing to a disk and these writes always write the requested amount
dispatcher.request_progress(params, remain, total) if dispatcher
end
rescue Object
STDERR.puts "ERROR reading http body: #$!"
$!.backtrace.join("\n")
# any errors means we should delete the file, including if the file is dumped
@socket.close unless @socket.closed?
@body.delete if @body.class == Tempfile
@body = nil # signals that there was a problem
end
end
# Performs URI escaping so that you can construct proper
# query strings faster. Use this rather than the cgi.rb
# version since it's faster. (Stolen from Camping).
def self.escape(s)
s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
'%'+$1.unpack('H2'*$1.size).join('%').upcase
}.tr(' ', '+')
end
# Unescapes a URI escaped string. (Stolen from Camping).
def self.unescape(s)
s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){
[$1.delete('%')].pack('H*')
}
end
# Parses a query string by breaking it up at the '&'
# and ';' characters. You can also use this to parse
# cookies by changing the characters used in the second
# parameter (which defaults to '&;'.
def self.query_parse(qs, d = '&;')
params = {}
(qs||'').split(/[#{d}] */n).inject(params) { |h,p|
k, v=unescape(p).split('=',2)
if cur = params[k]
if cur.class == Array
params[k] << v
else
params[k] = [cur, v]
end
else
params[k] = v
end
}
return params
end
end
# This class implements a simple way of constructing the HTTP headers dynamically
# via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for
# information on how this is used.
#
# One consequence of this write-only nature is that you can write multiple headers
# by just doing them twice (which is sometimes needed in HTTP), but that the normal
# semantics for Hash (where doing an insert replaces) is not there.
class HeaderOut
attr_reader :out
def initialize(out)
@out = out
end
# Simply writes "#{key}: #{value}" to an output buffer.
def[]=(key,value)
@out.write(Const::HEADER_FORMAT % [key, value])
end
end
# Writes and controls your response to the client using the HTTP/1.1 specification.
# You use it by simply doing:
#
# response.start(200) do |head,out|
# head['Content-Type'] = 'text/plain'
# out.write("hello\n")
# end
#
# The parameter to start is the response code--which Mongrel will translate for you
# based on HTTP_STATUS_CODES. The head parameter is how you write custom headers.
# The out parameter is where you write your body. The default status code for
# HttpResponse.start is 200 so the above example is redundant.
#
# As you can see, it's just like using a Hash and as you do this it writes the proper
# header to the output on the fly. You can even intermix specifying headers and
# writing content. The HttpResponse class with write the things in the proper order
# once the HttpResponse.block is ended.
#
# You may also work the HttpResponse object directly using the various attributes available
# for the raw socket, body, header, and status codes. If you do this you're on your own.
# A design decision was made to force the client to not pipeline requests. HTTP/1.1
# pipelining really kills the performance due to how it has to be handled and how
# unclear the standard is. To fix this the HttpResponse gives a "Connection: close"
# header which forces the client to close right away. The bonus for this is that it
# gives a pretty nice speed boost to most clients since they can close their connection
# immediately.
#
# One additional caveat is that you don't have to specify the Content-length header
# as the HttpResponse will write this for you based on the out length.
class HttpResponse
attr_reader :socket
attr_reader :body
attr_writer :body
attr_reader :header
attr_reader :status
attr_writer :status
attr_reader :body_sent
attr_reader :header_sent
attr_reader :status_sent
def initialize(socket)
@socket = socket
@body = StringIO.new
@status = 404
@header = HeaderOut.new(StringIO.new)
@header[Const::DATE] = Time.now.httpdate
@body_sent = false
@header_sent = false
@status_sent = false
end
# Receives a block passing it the header and body for you to work with.
# When the block is finished it writes everything you've done to
# the socket in the proper order. This lets you intermix header and
# body content as needed. Handlers are able to modify pretty much
# any part of the request in the chain, and can stop further processing
# by simple passing "finalize=true" to the start method. By default
# all handlers run and then mongrel finalizes the request when they're
# all done.
def start(status=200, finalize=false)
@status = status.to_i
yield @header, @body
finished if finalize
end
# Primarily used in exception handling to reset the response output in order to write
# an alternative response. It will abort with an exception if you have already
# sent the header or the body. This is pretty catastrophic actually.
def reset
if @body_sent
raise "You have already sent the request body."
elsif @header_sent
raise "You have already sent the request headers."
else
@header.out.rewind
@body.rewind
end
end
def send_status(content_length=@body.length)
if not @status_sent
@header['Content-Length'] = content_length unless @status == 304
write(Const::STATUS_FORMAT % [@status, HTTP_STATUS_CODES[@status]])
@status_sent = true
end
end
def send_header
if not @header_sent
@header.out.rewind
write(@header.out.read + Const::LINE_END)
@header_sent = true
end
end
def send_body
if not @body_sent
@body.rewind
write(@body.read)
@body_sent = true
end
end
# Appends the contents of +path+ to the response stream. The file is opened for binary
# reading and written in chunks to the socket. If the
# <a href="http://rubyforge.org/projects/ruby-sendfile">sendfile</a> library is found,
# it is used to send the file, often with greater speed and less memory/cpu usage.
#
# The presence of ruby-sendfile is determined by @socket.response_to? :sendfile, which means
# that if you have your own sendfile implementation you can use it without changing this function,
# just make sure it follows the ruby-sendfile signature.
def send_file(path)
File.open(path, "rb") do |f|
while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0
write(chunk)
end
@body_sent = true
end
end
def socket_error(details)
# ignore these since it means the client closed off early
@socket.close unless @socket.closed?
done = true
raise details
end
def write(data)
@socket.write(data)
rescue => details
socket_error(details)
end
# This takes whatever has been done to header and body and then writes it in the
# proper format to make an HTTP/1.1 response.
def finished
send_status
send_header
send_body
end
# Used during error conditions to mark the response as "done" so there isn't any more processing
# sent to the client.
def done=(val)
@status_sent = true
@header_sent = true
@body_sent = true
end
def done
(@status_sent and @header_sent and @body_sent)
end
end
# This is the main driver of Mongrel, while the Mongrel::HttpParser and Mongrel::URIClassifier
# make up the majority of how the server functions. It's a very simple class that just
# has a thread accepting connections and a simple HttpServer.process_client function
# to do the heavy lifting with the IO and Ruby.
#
# You use it by doing the following:
#
# server = HttpServer.new("0.0.0.0", 3000)
# server.register("/stuff", MyNiftyHandler.new)
# server.run.join
#
# The last line can be just server.run if you don't want to join the thread used.
# If you don't though Ruby will mysteriously just exit on you.
#
# Ruby's thread implementation is "interesting" to say the least. Experiments with
# *many* different types of IO processing simply cannot make a dent in it. Future
# releases of Mongrel will find other creative ways to make threads faster, but don't
# hold your breath until Ruby 1.9 is actually finally useful.
class HttpServer
attr_reader :acceptor
attr_reader :workers
attr_reader :classifier
attr_reader :host
attr_reader :port
attr_reader :timeout
attr_reader :num_processors
# Creates a working server on host:port (strange things happen if port isn't a Number).
# Use HttpServer::run to start the server and HttpServer.acceptor.join to
# join the thread that's processing incoming requests on the socket.
#
# The num_processors optional argument is the maximum number of concurrent
# processors to accept, anything over this is closed immediately to maintain
# server processing performance. This may seem mean but it is the most efficient
# way to deal with overload. Other schemes involve still parsing the client's request
# which defeats the point of an overload handling system.
#
# The timeout parameter is a sleep timeout (in hundredths of a second) that is placed between
# socket.accept calls in order to give the server a cheap throttle time. It defaults to 0 and
# actually if it is 0 then the sleep is not done at all.
#
# TODO: Find out if anyone actually uses the timeout option since it seems to cause problems on FBSD.
def initialize(host, port, num_processors=(2**30-1), timeout=0)
@socket = TCPServer.new(host, port)
@classifier = URIClassifier.new
@host = host
@port = port
@workers = ThreadGroup.new
@timeout = timeout
@num_processors = num_processors
@death_time = 60
end
# Does the majority of the IO processing. It has been written in Ruby using
# about 7 different IO processing strategies and no matter how it's done
# the performance just does not improve. It is currently carefully constructed
# to make sure that it gets the best possible performance, but anyone who
# thinks they can make it faster is more than welcome to take a crack at it.
def process_client(client)
begin
parser = HttpParser.new
params = HttpParams.new
request = nil
data = client.readpartial(Const::CHUNK_SIZE)
nparsed = 0
# Assumption: nparsed will always be less since data will get filled with more
# after each parsing. If it doesn't get more then there was a problem
# with the read operation on the client socket. Effect is to stop processing when the
# socket can't fill the buffer for further parsing.
while nparsed < data.length
nparsed = parser.execute(params, data, nparsed)
if parser.finished?
script_name, path_info, handlers = @classifier.resolve(params[Const::REQUEST_URI])
if handlers
params[Const::PATH_INFO] = path_info
params[Const::SCRIPT_NAME] = script_name
params[Const::REMOTE_ADDR] = params[Const::HTTP_X_FORWARDED_FOR] || client.peeraddr.last
notifier = handlers[0].request_notify ? handlers[0] : nil
request = HttpRequest.new(params, client, notifier)
# in the case of large file uploads the user could close the socket, so skip those requests
break if request.body == nil # nil signals from HttpRequest::initialize that the request was aborted
# request is good so far, continue processing the response
response = HttpResponse.new(client)
# Process each handler in registered order until we run out or one finalizes the response.
handlers.each do |handler|
handler.process(request, response)
break if response.done or client.closed?
end
# And finally, if nobody closed the response off, we finalize it.
unless response.done or client.closed?
response.finished
end
else
# Didn't find it, return a stock 404 response.
client.write(Const::ERROR_404_RESPONSE)
end
break #done
else
# Parser is not done, queue up more data to read and continue parsing
data << client.readpartial(Const::CHUNK_SIZE)
if data.length >= Const::MAX_HEADER
raise HttpParserError.new("HEADER is longer than allowed, aborting client early.")
end
end
end
rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
# ignored
rescue HttpParserError
STDERR.puts "#{Time.now}: BAD CLIENT (#{params[Const::HTTP_X_FORWARDED_FOR] || client.peeraddr.last}): #$!"
rescue Errno::EMFILE
reap_dead_workers('too many files')
rescue Object
STDERR.puts "#{Time.now}: ERROR: #$!"
STDERR.puts $!.backtrace.join("\n")
ensure
client.close unless client.closed?
request.body.delete if request and request.body.class == Tempfile
end
end
# Used internally to kill off any worker threads that have taken too long
# to complete processing. Only called if there are too many processors
# currently servicing. It returns the count of workers still active
# after the reap is done. It only runs if there are workers to reap.
def reap_dead_workers(reason='unknown')
if @workers.list.length > 0
STDERR.puts "#{Time.now}: Reaping #{@workers.list.length} threads for slow workers because of '#{reason}'"
mark = Time.now
@workers.list.each do |w|
w[:started_on] = Time.now if not w[:started_on]
if mark - w[:started_on] > @death_time + @timeout
STDERR.puts "Thread #{w.inspect} is too old, killing."
w.raise(TimeoutError.new("Timed out thread."))
end
end
end
return @workers.list.length
end
# Performs a wait on all the currently running threads and kills any that take
# too long. Right now it just waits 60 seconds, but will expand this to
# allow setting. The @timeout setting does extend this waiting period by
# that much longer.
def graceful_shutdown
while reap_dead_workers("shutdown") > 0
STDERR.print "Waiting for #{@workers.list.length} requests to finish, could take #{@death_time + @timeout} seconds."
sleep @death_time / 10
end
end
def configure_socket_options
if /linux/ === RUBY_PLATFORM
# 9 is currently TCP_DEFER_ACCEPT
$tcp_defer_accept_opts = [9,1]
$tcp_cork_opts = [3,1]
end
end
# Runs the thing. It returns the thread used so you can "join" it. You can also
# access the HttpServer::acceptor attribute to get the thread later.
def run
BasicSocket.do_not_reverse_lookup=true
configure_socket_options
@socket.setsockopt(Socket::SOL_TCP, $tcp_defer_accept_opts[0], $tcp_defer_accept_opts[1]) if $tcp_defer_accept_opts
@acceptor = Thread.new do
while true
begin
client = @socket.accept
client.setsockopt(Socket::SOL_TCP, $tcp_cork_opts[0], $tcp_cork_opts[1]) if $tcp_cork_opts
worker_list = @workers.list
if worker_list.length >= @num_processors
STDERR.puts "Server overloaded with #{worker_list.length} processors (#@num_processors max). Dropping connection."
client.close
reap_dead_workers("max processors")
else
thread = Thread.new { process_client(client) }
thread.abort_on_exception = true
thread[:started_on] = Time.now
@workers.add(thread)
sleep @timeout/100 if @timeout > 0
end
rescue StopServer
@socket.close if not @socket.closed?
break
rescue Errno::EMFILE
reap_dead_workers("too many open files")
sleep 0.5
rescue Errno::ECONNABORTED
# client closed the socket even before accept
client.close if not client.closed?
end
end
graceful_shutdown
end
return @acceptor
end
# Simply registers a handler with the internal URIClassifier. When the URI is
# found in the prefix of a request then your handler's HttpHandler::process method
# is called. See Mongrel::URIClassifier#register for more information.
#
# If you set in_front=true then the passed in handler will be put in front in the list.
# Otherwise it's placed at the end of the list.
def register(uri, handler, in_front=false)
script_name, path_info, handlers = @classifier.resolve(uri)
if not handlers
@classifier.register(uri, [handler])
else
if path_info.length == 0 or (script_name == Const::SLASH and path_info == Const::SLASH)
if in_front
handlers.unshift(handler)
else
handlers << handler
end
else
@classifier.register(uri, [handler])
end
end
handler.listener = self
end
# Removes any handlers registered at the given URI. See Mongrel::URIClassifier#unregister
# for more information. Remember this removes them *all* so the entire
# processing chain goes away.
def unregister(uri)
@classifier.unregister(uri)
end
# Stops the acceptor thread and then causes the worker threads to finish
# off the request queue before finally exiting.
def stop
stopper = Thread.new do
exc = StopServer.new
@acceptor.raise(exc)
end
stopper.priority = 10
end
end
end