1
0
Fork 0
mirror of https://github.com/puma/puma.git synced 2022-11-09 13:48:40 -05:00
puma--puma/lib/mongrel.rb
zedshaw edaaab5b63 Tried out using a ThreadGroup and the performance wasn't as good as with a fixed set of thread processors.
git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@25 19e92222-5c0b-0410-8929-a290d50e31e9
2006-02-10 03:56:16 +00:00

550 lines
20 KiB
Ruby

require 'socket'
require 'http11'
require 'thread'
require 'stringio'
# Mongrel module containing all of the classes (include C extensions) for running
# a Mongrel web server. It contains a minimalist HTTP server with just enough
# functionality to service web application requests fast as possible.
module Mongrel
# Every standard HTTP code mapped to the appropriate message. These are
# used so frequently that they are placed directly in Mongrel for easy
# access rather than Mongrel::Const.
HTTP_STATUS_CODES = {
100 => 'Continue',
101 => 'Switching Protocols',
200 => 'OK',
201 => 'Created',
202 => 'Accepted',
203 => 'Non-Authoritative Information',
204 => 'No Content',
205 => 'Reset Content',
206 => 'Partial Content',
300 => 'Multiple Choices',
301 => 'Moved Permanently',
302 => 'Moved Temporarily',
303 => 'See Other',
304 => 'Not Modified',
305 => 'Use Proxy',
400 => 'Bad Request',
401 => 'Unauthorized',
402 => 'Payment Required',
403 => 'Forbidden',
404 => 'Not Found',
405 => 'Method Not Allowed',
406 => 'Not Acceptable',
407 => 'Proxy Authentication Required',
408 => 'Request Time-out',
409 => 'Conflict',
410 => 'Gone',
411 => 'Length Required',
412 => 'Precondition Failed',
413 => 'Request Entity Too Large',
414 => 'Request-URI Too Large',
415 => 'Unsupported Media Type',
500 => 'Internal Server Error',
501 => 'Not Implemented',
502 => 'Bad Gateway',
503 => 'Service Unavailable',
504 => 'Gateway Time-out',
505 => 'HTTP Version not supported'
}
# Frequently used constants when constructing requests or responses. Many times
# the constant just refers to a string with the same contents. Using these constants
# gave about a 3% to 10% performance improvement over using the strings directly.
# Symbols did not really improve things much compared to constants.
#
# While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
# REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
# too taxing on performance.
module Const
# This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this.
PATH_INFO="PATH_INFO"
# This is the intial part that your handler is identified as by URIClassifier.
SCRIPT_NAME="SCRIPT_NAME"
# The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
REQUEST_URI='REQUEST_URI'
# Content length (also available as HTTP_CONTENT_LENGTH).
CONTENT_LENGTH='CONTENT_LENGTH'
# Content length (also available as CONTENT_LENGTH).
HTTP_CONTENT_LENGTH='HTTP_CONTENT_LENGTH'
# Content type (also available as HTTP_CONTENT_TYPE).
CONTENT_TYPE='CONTENT_TYPE'
# Content type (also available as CONTENT_TYPE).
HTTP_CONTENT_TYPE='HTTP_CONTENT_TYPE'
# Gateway interface key in the HttpRequest parameters.
GATEWAY_INTERFACE='GATEWAY_INTERFACE'
# We claim to support CGI/1.2.
GATEWAY_INTERFACE_VALUE='CGI/1.2'
# Hosts remote IP address. Mongrel does not do DNS resolves since that slows
# processing down considerably.
REMOTE_ADDR='REMOTE_ADDR'
# This is not given since Mongrel does not do DNS resolves. It is only here for
# completeness for the CGI standard.
REMOTE_HOST='REMOTE_HOST'
# The name/host of our server as given by the HttpServer.new(host,port) call.
SERVER_NAME='SERVER_NAME'
# The port of our server as given by the HttpServer.new(host,port) call.
SERVER_PORT='SERVER_PORT'
# Official server protocol key in the HttpRequest parameters.
SERVER_PROTOCOL='SERVER_PROTOCOL'
# Mongrel claims to support HTTP/1.1.
SERVER_PROTOCOL_VALUE='HTTP/1.1'
# The actual server software being used (it's Mongrel man).
SERVER_SOFTWARE='SERVER_SOFTWARE'
# Current Mongrel version (used for SERVER_SOFTWARE and other response headers).
MONGREL_VERSION='Mongrel 0.2.2'
# The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{MONGREL_VERSION}\r\n\r\nNOT FOUND"
# A common header for indicating the server is too busy. Not used yet.
ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY"
# The basic max request size we'll try to read.
CHUNK_SIZE=(16 * 1024)
end
# When a handler is found for a registered URI then this class is constructed
# and passed to your HttpHandler::process method. You should assume that
# *one* handler processes all requests. Included in the HttpReqeust is a
# HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body
# which is a string containing the request body (raw for now).
#
# Mongrel really only supports small-ish request bodies right now since really
# huge ones have to be completely read off the wire and put into a string.
# Later there will be several options for efficiently handling large file
# uploads.
class HttpRequest
attr_reader :body, :params
# You don't really call this. It's made for you.
# Main thing it does is hook up the params, and store any remaining
# body data into the HttpRequest.body attribute.
def initialize(params, initial_body, socket)
@body = initial_body || ""
@params = params
@socket = socket
# fix up the CGI requirements
params[Const::CONTENT_LENGTH] = params[Const::HTTP_CONTENT_LENGTH] || 0
params[Const::CONTENT_TYPE] ||= params[Const::HTTP_CONTENT_TYPE]
# now, if the initial_body isn't long enough for the content length we have to fill it
# TODO: adapt for big ass stuff by writing to a temp file
clen = params[Const::HTTP_CONTENT_LENGTH].to_i
if @body.length < clen
@body << @socket.read(clen - @body.length)
end
end
end
# This class implements a simple way of constructing the HTTP headers dynamically
# via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for
# information on how this is used.
#
# One consequence of this write-only nature is that you can write multiple headers
# by just doing them twice (which is sometimes needed in HTTP), but that the normal
# semantics for Hash (where doing an insert replaces) is not there.
class HeaderOut
attr_reader :out
def initialize(out)
@out = out
end
# Simply writes "#{key}: #{value}" to an output buffer.
def[]=(key,value)
@out.write(key)
@out.write(": ")
@out.write(value)
@out.write("\r\n")
end
end
# Writes and controls your response to the client using the HTTP/1.1 specification.
# You use it by simply doing:
#
# response.start(200) do |head,out|
# head['Content-Type'] = 'text/plain'
# out.write("hello\n")
# end
#
# The parameter to start is the response code--which Mongrel will translate for you
# based on HTTP_STATUS_CODES. The head parameter is how you write custom headers.
# The out parameter is where you write your body. The default status code for
# HttpResponse.start is 200 so the above example is redundant.
#
# As you can see, it's just like using a Hash and as you do this it writes the proper
# header to the output on the fly. You can even intermix specifying headers and
# writing content. The HttpResponse class with write the things in the proper order
# once the HttpResponse.block is ended.
#
# You may also work the HttpResponse object directly using the various attributes available
# for the raw socket, body, header, and status codes. If you do this you're on your own.
# A design decision was made to force the client to not pipeline requests. HTTP/1.1
# pipelining really kills the performance due to how it has to be handled and how
# unclear the standard is. To fix this the HttpResponse gives a "Connection: close"
# header which forces the client to close right away. The bonus for this is that it
# gives a pretty nice speed boost to most clients since they can close their connection
# immediately.
#
# One additional caveat is that you don't have to specify the Content-length header
# as the HttpResponse will write this for you based on the out length.
class HttpResponse
attr_reader :socket
attr_reader :body
attr_reader :header
attr_reader :status
attr_writer :status
def initialize(socket)
@socket = socket
@body = StringIO.new
@status = 404
@header = HeaderOut.new(StringIO.new)
end
# Receives a block passing it the header and body for you to work with.
# When the block is finished it writes everything you've done to
# the socket in the proper order. This lets you intermix header and
# body content as needed.
def start(status=200)
@status = status
yield @header, @body
finished
end
# Primarily used in exception handling to reset the response output in order to write
# an alternative response.
def reset
@header.out.rewind
@body.rewind
end
def send_status
@socket.write("HTTP/1.1 #{@status} #{HTTP_STATUS_CODES[@status]}\r\nContent-Length: #{@body.length}\r\nConnection: close\r\n")
end
def send_header
@header.out.rewind
@socket.write(@header.out.read)
@socket.write("\r\n")
end
def send_body
@body.rewind
# connection: close is also added to ensure that the client does not pipeline.
@socket.write(@body.read)
end
# This takes whatever has been done to header and body and then writes it in the
# proper format to make an HTTP/1.1 response.
def finished
send_status
send_header
send_body
end
end
# You implement your application handler with this. It's very light giving
# just the minimum necessary for you to handle a request and shoot back
# a response. Look at the HttpRequest and HttpResponse objects for how
# to use them.
class HttpHandler
def process(request, response)
end
end
# This is the main driver of Mongrel, while the Mognrel::HttpParser and Mongrel::URIClassifier
# make up the majority of how the server functions. It's a very simple class that just
# has a thread accepting connections and a simple HttpServer.process_client function
# to do the heavy lifting with the IO and Ruby.
#
# You use it by doing the following:
#
# server = HttpServer.new("0.0.0.0", 3000)
# server.register("/stuff", MyNifterHandler.new)
# server.run.join
#
# The last line can be just server.run if you don't want to join the thread used.
# If you don't though Ruby will mysteriously just exit on you.
#
# Ruby's thread implementation is "interesting" to say the least. Experiments with
# *many* different types of IO processing simply cannot make a dent in it. Future
# releases of Mongrel will find other creative ways to make threads faster, but don't
# hold your breath until Ruby 1.9 is actually finally useful.
class HttpServer
attr_reader :acceptor
# Creates a working server on host:port (strange things happen if port isn't a Number).
# Use HttpServer::run to start the server.
#
# The num_processors variable has varying affects on how requests are processed. You'd
# think adding more processing threads (processors) would make the server faster, but
# that's just not true. There's actually an effect of how Ruby does threads such that
# the more processors waiting on the request queue, the slower the system is to handle
# each request. But, the lower the number of processors the fewer concurrent responses
# the server can make.
#
# 20 is the default number of processors and is based on experimentation on a few
# systems. If you find that you overload Mongrel too much
# try changing it higher. If you find that responses are way too slow
# try lowering it (after you've tuned your stuff of course).
# Future versions of Mongrel will make this more dynamic (hopefully).
def initialize(host, port, num_processors=20)
@socket = TCPServer.new(host, port)
@classifier = URIClassifier.new
@req_queue = Queue.new
@host = host
@port = port
@num_procesors = num_processors
@num_processors.times {|i| Thread.new do
while client = @req_queue.deq
process_client(client)
end
end
}
end
# Does the majority of the IO processing. It has been written in Ruby using
# about 7 different IO processing strategies and no matter how it's done
# the performance just does not improve. It is currently carefully constructed
# to make sure that it gets the best possible performance, but anyone who
# thinks they can make it faster is more than welcome to take a crack at it.
def process_client(client)
begin
parser = HttpParser.new
params = {}
data = client.readpartial(Const::CHUNK_SIZE)
while true
nread = parser.execute(params, data)
if parser.finished?
script_name, path_info, handler = @classifier.resolve(params[Const::REQUEST_URI])
if handler
params[Const::PATH_INFO] = path_info
params[Const::SCRIPT_NAME] = script_name
params[Const::GATEWAY_INTERFACE]=Const::GATEWAY_INTERFACE_VALUE
params[Const::REMOTE_ADDR]=client.peeraddr[3]
params[Const::SERVER_NAME]=@host
params[Const::SERVER_PORT]=@port
params[Const::SERVER_PROTOCOL]=Const::SERVER_PROTOCOL_VALUE
params[Const::SERVER_SOFTWARE]=Const::MONGREL_VERSION
request = HttpRequest.new(params, data[nread ... data.length], client)
response = HttpResponse.new(client)
handler.process(request, response)
else
client.write(Const::ERROR_404_RESPONSE)
end
break
else
# gotta stream and read again until we can get the parser to be character safe
# TODO: make this more efficient since this means we're parsing a lot repeatedly
parser.reset
data << client.readpartial(Const::CHUNK_SIZE)
end
end
rescue EOFError
# ignored
rescue Errno::ECONNRESET
# ignored
rescue Errno::EPIPE
# ignored
rescue => details
STDERR.puts "ERROR(#{details.class}): #{details}"
STDERR.puts details.backtrace.join("\n")
ensure
client.close
end
end
# Runs the thing. It returns the thread used so you can "join" it. You can also
# access the HttpServer::acceptor attribute to get the thread later.
def run
BasicSocket.do_not_reverse_lookup=true
@acceptor = Thread.new do
while true
@req_queue << @socket.accept
end
end
end
# Simply registers a handler with the internal URIClassifier. When the URI is
# found in the prefix of a request then your handler's HttpHandler::process method
# is called. See Mongrel::URIClassifier#register for more information.
def register(uri, handler)
@classifier.register(uri, handler)
end
# Removes any handler registered at the given URI. See Mongrel::URIClassifier#unregister
# for more information.
def unregister(uri)
@classifier.unregister(uri)
end
end
# The server normally returns a 404 response if a URI is requested, but it
# also returns a lame empty message. This lets you do a 404 response
# with a custom message for special URIs.
class Error404Handler < HttpHandler
# Sets the message to return. This is constructed once for the handler
# so it's pretty efficient.
def initialize(msg)
@response = Const::ERROR_404_RESPONSE + msg
end
# Just kicks back the standard 404 response with your special message.
def process(request, response)
response.socket.write(@response)
end
end
# Serves the contents of a directory. You give it the path to the root
# where the files are located, and it tries to find the files based on
# the PATH_INFO inside the directory. If the requested path is a
# directory then it returns a simple directory listing.
#
# It does a simple protection against going outside it's root path by
# converting all paths to an absolute expanded path, and then making sure
# that the final expanded path includes the root path. If it doesn't
# than it simply gives a 404.
class DirHandler < HttpHandler
attr_reader :path
# You give it the path to the directory root and an (optional)
def initialize(path, listing_allowed=true, index_html="index.html")
@path = File.expand_path(path)
@listing_allowed=listing_allowed
@index_html = index_html
end
# Checks if the given path can be served and returns the full path (or nil if not).
def can_serve(path_info)
req = File.expand_path(File.join(@path,path_info), @path)
if req.index(@path) == 0 and File.exist? req
# it exists and it's in the right location
if File.directory? req
# the request is for a directory
index = File.join(req, @index_html)
if File.exist? index
# serve the index
return index
elsif @listing_allows
# serve the directory
req
else
# do not serve anything
return nil
end
else
# it's a file and it's there
return req
end
end
end
# Returns a simplistic directory listing if they're enabled, otherwise a 403.
# Base is the base URI from the REQUEST_URI, dir is the directory to serve
# on the file system (comes from can_serve()), and response is the HttpResponse
# object to send the results on.
def send_dir_listing(base, dir, response)
# take off any trailing / so the links come out right
base.chop! if base[-1] == "/"[-1]
if @listing_allowed
response.start(200) do |head,out|
head['Content-Type'] = "text/html"
out << "<html><head><title>Directory Listing</title></head><body>"
Dir.entries(dir).each do |child|
next if child == "."
if child == ".."
out << "<a href=\"#{base}/#{child}\">Up to parent..</a><br/>"
else
out << "<a href=\"#{base}/#{child}\">#{child}</a><br/>"
end
end
out << "</body></html>"
end
else
response.start(403) do |head,out|
out.write("Directory listings not allowed")
end
end
end
# Sends the contents of a file back to the user. Not terribly efficient since it's
# opening and closing the file for each read.
def send_file(req, response)
response.start(200) do |head,out|
open(req, "r") do |f|
out.write(f.read)
end
end
end
# Process the request to either serve a file or a directory listing
# if allowed (based on the listing_allowed paramter to the constructor).
def process(request, response)
req = can_serve request.params['PATH_INFO']
if not req
# not found, return a 404
response.start(404) do |head,out|
out << "File not found"
end
else
begin
if File.directory? req
send_dir_listing(request.params["REQUEST_URI"],req, response)
else
send_file(req, response)
end
rescue => details
response.reset
response.start(403) do |head,out|
out << "Error accessing file: #{details}"
end
end
end
end
end
end