2006-01-28 14:03:53 -05:00
require 'socket'
require 'http11'
2006-04-10 21:46:13 -04:00
require 'tempfile'
2006-01-28 14:03:53 -05:00
require 'thread'
2006-01-28 14:34:12 -05:00
require 'stringio'
2006-02-20 19:55:39 -05:00
require 'mongrel/cgi'
require 'mongrel/handlers'
2006-02-28 02:04:41 -05:00
require 'mongrel/command'
2006-03-08 23:38:02 -05:00
require 'mongrel/tcphack'
2006-03-21 21:02:34 -05:00
require 'yaml'
2006-04-08 14:00:35 -04:00
require 'time'
2006-04-20 00:57:02 -04:00
require 'rubygems'
2006-05-16 00:26:03 -04:00
2006-01-28 14:03:53 -05:00
2006-04-10 20:00:52 -04:00
begin
require 'sendfile'
STDERR . puts " ** You have sendfile installed, will use that to serve files. "
rescue Object
2006-04-20 00:57:02 -04:00
# do nothing
2006-04-10 20:00:52 -04:00
end
2006-05-11 15:10:34 -04:00
2006-01-28 14:03:53 -05:00
# Mongrel module containing all of the classes (include C extensions) for running
# a Mongrel web server. It contains a minimalist HTTP server with just enough
# functionality to service web application requests fast as possible.
module Mongrel
2006-02-28 00:17:23 -05:00
class URIClassifier
# Returns the URIs that have been registered with this classifier so far.
# The URIs returned should not be modified as this will cause a memory leak.
# You can use this to inspect the contents of the URIClassifier.
def uris
@handler_map . keys
end
# Simply does an inspect that looks like a Hash inspect.
def inspect
@handler_map . inspect
end
end
2006-02-20 17:39:37 -05:00
# Used to stop the HttpServer via Thread.raise.
class StopServer < Exception
end
2006-02-28 02:04:41 -05:00
2006-02-03 00:42:08 -05:00
# Every standard HTTP code mapped to the appropriate message. These are
# used so frequently that they are placed directly in Mongrel for easy
# access rather than Mongrel::Const.
2006-01-28 14:34:12 -05:00
HTTP_STATUS_CODES = {
100 = > 'Continue' ,
101 = > 'Switching Protocols' ,
200 = > 'OK' ,
201 = > 'Created' ,
202 = > 'Accepted' ,
203 = > 'Non-Authoritative Information' ,
204 = > 'No Content' ,
205 = > 'Reset Content' ,
206 = > 'Partial Content' ,
300 = > 'Multiple Choices' ,
301 = > 'Moved Permanently' ,
302 = > 'Moved Temporarily' ,
303 = > 'See Other' ,
304 = > 'Not Modified' ,
305 = > 'Use Proxy' ,
400 = > 'Bad Request' ,
401 = > 'Unauthorized' ,
402 = > 'Payment Required' ,
403 = > 'Forbidden' ,
404 = > 'Not Found' ,
405 = > 'Method Not Allowed' ,
406 = > 'Not Acceptable' ,
407 = > 'Proxy Authentication Required' ,
408 = > 'Request Time-out' ,
409 = > 'Conflict' ,
410 = > 'Gone' ,
411 = > 'Length Required' ,
412 = > 'Precondition Failed' ,
413 = > 'Request Entity Too Large' ,
414 = > 'Request-URI Too Large' ,
415 = > 'Unsupported Media Type' ,
500 = > 'Internal Server Error' ,
501 = > 'Not Implemented' ,
502 = > 'Bad Gateway' ,
503 = > 'Service Unavailable' ,
504 = > 'Gateway Time-out' ,
505 = > 'HTTP Version not supported'
}
2006-05-19 22:56:30 -04:00
2006-02-09 21:38:18 -05:00
2006-02-03 00:42:08 -05:00
# Frequently used constants when constructing requests or responses. Many times
# the constant just refers to a string with the same contents. Using these constants
# gave about a 3% to 10% performance improvement over using the strings directly.
# Symbols did not really improve things much compared to constants.
#
# While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
# REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
# too taxing on performance.
module Const
2006-03-30 04:31:14 -05:00
DATE = " Date " . freeze
2006-02-03 00:42:08 -05:00
# This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this.
2006-03-30 04:31:14 -05:00
PATH_INFO = " PATH_INFO " . freeze
2006-03-19 00:18:11 -05:00
2006-02-03 00:42:08 -05:00
# This is the intial part that your handler is identified as by URIClassifier.
2006-03-30 04:31:14 -05:00
SCRIPT_NAME = " SCRIPT_NAME " . freeze
2006-03-19 00:18:11 -05:00
2006-02-03 00:42:08 -05:00
# The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
2006-03-30 04:31:14 -05:00
REQUEST_URI = 'REQUEST_URI' . freeze
2006-02-03 00:42:08 -05:00
2006-05-11 16:23:50 -04:00
MONGREL_VERSION = " 0.3.13 " . freeze
2006-02-03 00:42:08 -05:00
# The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
2006-03-30 04:31:14 -05:00
ERROR_404_RESPONSE = " HTTP/1.1 404 Not Found \r \n Connection: close \r \n Server: #{ MONGREL_VERSION } \r \n \r \n NOT FOUND " . freeze
2006-02-03 00:42:08 -05:00
2006-03-30 04:31:14 -05:00
CONTENT_LENGTH = " CONTENT_LENGTH " . freeze
2006-03-19 00:18:11 -05:00
2006-02-03 00:42:08 -05:00
# A common header for indicating the server is too busy. Not used yet.
2006-03-30 04:31:14 -05:00
ERROR_503_RESPONSE = " HTTP/1.1 503 Service Unavailable \r \n \r \n BUSY " . freeze
2006-02-03 00:42:08 -05:00
# The basic max request size we'll try to read.
2006-04-02 22:27:59 -04:00
CHUNK_SIZE = ( 4 * 1024 )
# This is the maximum header that is allowed before a client is booted. The parser detects
# this, but we'd also like to do this as well.
MAX_HEADER = 1024 * ( 80 + 32 )
2006-02-03 00:42:08 -05:00
2006-04-10 21:46:13 -04:00
# Maximum request body size before it is moved out of memory and into a tempfile for reading.
MAX_BODY = MAX_HEADER
2006-03-30 04:31:14 -05:00
# A frozen format for this is about 15% faster
STATUS_FORMAT = " HTTP/1.1 %d %s \r \n Content-Length: %d \r \n Connection: close \r \n " . freeze
CONTENT_TYPE = " Content-Type " . freeze
LAST_MODIFIED = " Last-Modified " . freeze
ETAG = " ETag " . freeze
SLASH = " / " . freeze
REQUEST_METHOD = " REQUEST_METHOD " . freeze
GET = " GET " . freeze
HEAD = " HEAD " . freeze
# ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
ETAG_FORMAT = " \" %x-%x-%x \" " . freeze
HEADER_FORMAT = " %s: %s \r \n " . freeze
LINE_END = " \r \n " . freeze
2006-03-31 15:12:30 -05:00
REMOTE_ADDR = " REMOTE_ADDR " . freeze
HTTP_X_FORWARDED_FOR = " HTTP_X_FORWARDED_FOR " . freeze
2006-05-04 12:07:56 -04:00
HTTP_IF_UNMODIFIED_SINCE = " HTTP_IF_UNMODIFIED_SINCE " . freeze
HTTP_IF_NONE_MATCH = " HTTP_IF_NONE_MATCH " . freeze
2006-02-03 00:42:08 -05:00
end
2006-01-28 14:03:53 -05:00
# When a handler is found for a registered URI then this class is constructed
# and passed to your HttpHandler::process method. You should assume that
2006-03-30 04:31:14 -05:00
# *one* handler processes all requests. Included in the HttpRequest is a
2006-01-28 14:03:53 -05:00
# HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body
# which is a string containing the request body (raw for now).
#
2006-05-19 22:56:30 -04:00
# The HttpRequest.initialize method will convert any request that is larger than
2006-04-20 00:57:02 -04:00
# Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses
# a StringIO object. To be safe, you should assume it works like a file.
2006-01-28 14:03:53 -05:00
class HttpRequest
attr_reader :body , :params
2006-02-03 00:42:08 -05:00
2006-01-28 14:03:53 -05:00
# You don't really call this. It's made for you.
# Main thing it does is hook up the params, and store any remaining
# body data into the HttpRequest.body attribute.
2006-05-19 22:56:30 -04:00
#
# TODO: Implement tempfile removal when the request is done.
2006-01-28 14:03:53 -05:00
def initialize ( params , initial_body , socket )
@params = params
@socket = socket
2006-04-20 00:57:02 -04:00
clen = params [ Const :: CONTENT_LENGTH ] . to_i - initial_body . length
2006-05-19 22:56:30 -04:00
2006-04-10 21:46:13 -04:00
if clen > Const :: MAX_BODY
2006-04-20 00:57:02 -04:00
@body = Tempfile . new ( self . class . name )
@body . binmode
2006-04-10 21:46:13 -04:00
else
2006-04-20 00:57:02 -04:00
@body = StringIO . new
end
begin
@body . write ( initial_body )
# write the odd sized chunk first
clen -= @body . write ( @socket . read ( clen % Const :: CHUNK_SIZE ) )
2006-05-19 22:56:30 -04:00
2006-04-20 00:57:02 -04:00
# then stream out nothing but perfectly sized chunks
while clen > 0
data = @socket . read ( Const :: CHUNK_SIZE )
# have to do it this way since @socket.eof? causes it to block
raise " Socket closed or read failure " if not data or data . length != Const :: CHUNK_SIZE
clen -= @body . write ( data )
2006-05-19 22:56:30 -04:00
# ASSUME: we are writing to a disk and these writes always write the requested amount
2006-04-10 21:46:13 -04:00
end
2006-04-20 00:57:02 -04:00
# rewind to keep the world happy
@body . rewind
rescue Object
# any errors means we should delete the file, including if the file is dumped
STDERR . puts " Error reading request: # $! "
@body . delete if @body . class == Tempfile
@body = nil # signals that there was a problem
2006-01-28 14:03:53 -05:00
end
end
2006-03-19 18:31:30 -05:00
2006-05-11 15:10:34 -04:00
# Performs URI escaping so that you can construct proper
# query strings faster. Use this rather than the cgi.rb
# version since it's faster. (Stolen from Camping).
2006-03-19 18:31:30 -05:00
def self . escape ( s )
s . to_s . gsub ( / ([^ a-zA-Z0-9_.-]+) /n ) {
'%' + $1 . unpack ( 'H2' * $1 . size ) . join ( '%' ) . upcase
} . tr ( ' ' , '+' )
end
2006-05-11 15:10:34 -04:00
# Unescapes a URI escaped string. (Stolen from Camping).
2006-03-19 18:31:30 -05:00
def self . unescape ( s )
s . tr ( '+' , ' ' ) . gsub ( / ((?:%[0-9a-fA-F]{2})+) /n ) {
[ $1 . delete ( '%' ) ] . pack ( 'H*' )
}
end
2006-05-11 15:10:34 -04:00
# Parses a query string by breaking it up at the '&'
# and ';' characters. You can also use this to parse
# cookies by changing the characters used in the second
# parameter (which defaults to '&;'.
2006-03-19 18:31:30 -05:00
def self . query_parse ( qs , d = '&;' )
params = { }
( qs || '' ) . split ( / [ #{ d } ] * /n ) . inject ( params ) { | h , p |
k , v = unescape ( p ) . split ( '=' , 2 )
if cur = params [ k ]
if cur . class == Array
params [ k ] << v
else
params [ k ] = [ cur , v ]
end
else
params [ k ] = v
end
}
return params
end
2006-01-28 14:03:53 -05:00
end
2006-01-28 14:34:12 -05:00
2006-02-03 00:42:08 -05:00
# This class implements a simple way of constructing the HTTP headers dynamically
# via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for
# information on how this is used.
#
# One consequence of this write-only nature is that you can write multiple headers
# by just doing them twice (which is sometimes needed in HTTP), but that the normal
# semantics for Hash (where doing an insert replaces) is not there.
2006-01-28 14:34:12 -05:00
class HeaderOut
attr_reader :out
def initialize ( out )
@out = out
end
2006-02-03 00:42:08 -05:00
# Simply writes "#{key}: #{value}" to an output buffer.
2006-01-28 14:34:12 -05:00
def []= ( key , value )
2006-03-30 04:31:14 -05:00
@out . write ( Const :: HEADER_FORMAT % [ key , value ] )
2006-01-28 14:34:12 -05:00
end
2006-05-19 22:56:30 -04:00
2006-01-28 14:34:12 -05:00
end
2006-02-03 00:42:08 -05:00
# Writes and controls your response to the client using the HTTP/1.1 specification.
# You use it by simply doing:
#
# response.start(200) do |head,out|
# head['Content-Type'] = 'text/plain'
# out.write("hello\n")
# end
#
# The parameter to start is the response code--which Mongrel will translate for you
# based on HTTP_STATUS_CODES. The head parameter is how you write custom headers.
# The out parameter is where you write your body. The default status code for
# HttpResponse.start is 200 so the above example is redundant.
#
# As you can see, it's just like using a Hash and as you do this it writes the proper
# header to the output on the fly. You can even intermix specifying headers and
# writing content. The HttpResponse class with write the things in the proper order
# once the HttpResponse.block is ended.
#
# You may also work the HttpResponse object directly using the various attributes available
# for the raw socket, body, header, and status codes. If you do this you're on your own.
# A design decision was made to force the client to not pipeline requests. HTTP/1.1
# pipelining really kills the performance due to how it has to be handled and how
# unclear the standard is. To fix this the HttpResponse gives a "Connection: close"
# header which forces the client to close right away. The bonus for this is that it
# gives a pretty nice speed boost to most clients since they can close their connection
# immediately.
#
# One additional caveat is that you don't have to specify the Content-length header
# as the HttpResponse will write this for you based on the out length.
2006-01-28 14:03:53 -05:00
class HttpResponse
attr_reader :socket
2006-01-28 15:27:34 -05:00
attr_reader :body
2006-01-28 14:34:12 -05:00
attr_reader :header
attr_reader :status
attr_writer :status
2006-03-25 16:15:30 -05:00
attr_reader :body_sent
attr_reader :header_sent
attr_reader :status_sent
2006-05-19 22:56:30 -04:00
2006-04-02 22:27:59 -04:00
def initialize ( socket )
2006-01-28 14:03:53 -05:00
@socket = socket
2006-01-28 15:27:34 -05:00
@body = StringIO . new
2006-01-28 14:34:12 -05:00
@status = 404
@header = HeaderOut . new ( StringIO . new )
2006-04-08 14:00:35 -04:00
@header [ Const :: DATE ] = Time . now . httpdate
2006-03-25 16:15:30 -05:00
@body_sent = false
@header_sent = false
@status_sent = false
2006-01-28 14:34:12 -05:00
end
2006-02-03 00:42:08 -05:00
# Receives a block passing it the header and body for you to work with.
# When the block is finished it writes everything you've done to
# the socket in the proper order. This lets you intermix header and
2006-03-26 18:57:11 -05:00
# body content as needed. Handlers are able to modify pretty much
# any part of the request in the chain, and can stop further processing
# by simple passing "finalize=true" to the start method. By default
# all handlers run and then mongrel finalizes the request when they're
# all done.
def start ( status = 200 , finalize = false )
2006-02-12 14:31:28 -05:00
@status = status . to_i
2006-01-28 15:27:34 -05:00
yield @header , @body
2006-03-26 18:57:11 -05:00
finished if finalize
2006-01-28 14:34:12 -05:00
end
2006-02-03 00:42:08 -05:00
# Primarily used in exception handling to reset the response output in order to write
2006-03-25 16:15:30 -05:00
# an alternative response. It will abort with an exception if you have already
# sent the header or the body. This is pretty catastrophic actually.
2006-02-03 00:42:08 -05:00
def reset
2006-03-25 16:15:30 -05:00
if @body_sent
2006-03-26 18:57:11 -05:00
raise " You have already sent the request body. "
2006-03-25 16:15:30 -05:00
elsif @header_sent
raise " You have already sent the request headers. "
else
@header . out . rewind
@body . rewind
end
2006-02-03 00:42:08 -05:00
end
2006-03-27 21:06:50 -05:00
def send_status ( content_length = nil )
2006-03-26 18:57:11 -05:00
if not @status_sent
2006-05-19 22:56:30 -04:00
content_length || = @body . length
2006-03-30 04:31:14 -05:00
@socket . write ( Const :: STATUS_FORMAT % [ status , HTTP_STATUS_CODES [ @status ] , content_length ] )
2006-03-26 18:57:11 -05:00
@status_sent = true
end
2006-02-08 07:48:41 -05:00
end
def send_header
2006-03-26 18:57:11 -05:00
if not @header_sent
@header . out . rewind
2006-03-30 04:31:14 -05:00
@socket . write ( @header . out . read + Const :: LINE_END )
2006-03-26 18:57:11 -05:00
@header_sent = true
end
2006-02-08 07:48:41 -05:00
end
def send_body
2006-03-26 18:57:11 -05:00
if not @body_sent
@body . rewind
@socket . write ( @body . read )
@body_sent = true
end
2006-02-12 14:31:28 -05:00
end
2006-02-08 07:48:41 -05:00
2006-04-10 20:00:52 -04:00
# Appends the contents of +path+ to the response stream. The file is opened for binary
# reading and written in chunks to the socket. If the
# <a href="http://rubyforge.org/projects/ruby-sendfile">sendfile</a> library is found,
# it is used to send the file, often with greater speed and less memory/cpu usage.
2006-05-19 22:56:30 -04:00
#
# The presence of ruby-sendfile is determined by @socket.response_to? :sendfile, which means
# that if you have your own sendfile implementation you can use it without changing this function,
# just make sure it follows the ruby-sendfile signature.
2006-04-10 20:00:52 -04:00
def send_file ( path )
File . open ( path , " rb " ) do | f |
if @socket . respond_to? :sendfile
@socket . sendfile ( f )
else
2006-05-14 19:42:19 -04:00
while chunk = f . read ( Const :: CHUNK_SIZE ) and chunk . length > 0
2006-04-10 20:00:52 -04:00
@socket . write ( chunk )
end
end
2006-05-04 12:07:56 -04:00
end
2006-04-10 20:00:52 -04:00
rescue EOFError , Errno :: ECONNRESET , Errno :: EPIPE , Errno :: EINVAL , Errno :: EBADF
2006-04-20 00:57:02 -04:00
# ignore these since it means the client closed off early
2006-05-20 02:01:51 -04:00
STDERR . puts " Client closed socket early requesting file #{ path } : # $! "
2006-04-10 20:00:52 -04:00
end
2006-03-27 01:10:07 -05:00
def write ( data )
@socket . write ( data )
end
2006-02-08 07:48:41 -05:00
# This takes whatever has been done to header and body and then writes it in the
# proper format to make an HTTP/1.1 response.
def finished
send_status
send_header
send_body
2006-01-28 14:03:53 -05:00
end
2006-03-25 16:15:30 -05:00
def done
2006-03-26 18:57:11 -05:00
( @status_sent and @header_sent and @body_sent )
2006-03-25 16:15:30 -05:00
end
2006-03-27 01:10:07 -05:00
2006-01-28 14:03:53 -05:00
end
2006-04-02 22:27:59 -04:00
2006-01-28 14:03:53 -05:00
# This is the main driver of Mongrel, while the Mognrel::HttpParser and Mongrel::URIClassifier
# make up the majority of how the server functions. It's a very simple class that just
# has a thread accepting connections and a simple HttpServer.process_client function
# to do the heavy lifting with the IO and Ruby.
#
# You use it by doing the following:
#
# server = HttpServer.new("0.0.0.0", 3000)
# server.register("/stuff", MyNifterHandler.new)
# server.run.join
#
# The last line can be just server.run if you don't want to join the thread used.
# If you don't though Ruby will mysteriously just exit on you.
2006-01-30 01:25:20 -05:00
#
# Ruby's thread implementation is "interesting" to say the least. Experiments with
# *many* different types of IO processing simply cannot make a dent in it. Future
# releases of Mongrel will find other creative ways to make threads faster, but don't
# hold your breath until Ruby 1.9 is actually finally useful.
2006-01-28 14:03:53 -05:00
class HttpServer
attr_reader :acceptor
2006-03-21 22:31:30 -05:00
attr_reader :workers
2006-03-26 18:57:11 -05:00
attr_reader :classifier
2006-04-05 08:29:23 -04:00
attr_reader :host
attr_reader :port
2006-01-28 14:03:53 -05:00
# Creates a working server on host:port (strange things happen if port isn't a Number).
2006-03-21 22:31:30 -05:00
# Use HttpServer::run to start the server and HttpServer.acceptor.join to
# join the thread that's processing incoming requests on the socket.
2006-01-30 01:25:20 -05:00
#
2006-03-21 22:31:30 -05:00
# The num_processors optional argument is the maximum number of concurrent
2006-03-25 16:15:30 -05:00
# processors to accept, anything over this is closed immediately to maintain
# server processing performance. This may seem mean but it is the most efficient
# way to deal with overload. Other schemes involve still parsing the client's request
# which defeats the point of an overload handling system.
#
# The timeout parameter is a sleep timeout (in hundredths of a second) that is placed between
# socket.accept calls in order to give the server a cheap throttle time. It defaults to 0 and
2006-03-21 22:31:30 -05:00
# actually if it is 0 then the sleep is not done at all.
2006-05-19 22:56:30 -04:00
#
# TODO: Find out if anyone actually uses the timeout option since it seems to cause problems on FBSD.
2006-03-21 22:31:30 -05:00
def initialize ( host , port , num_processors = ( 2 ** 30 - 1 ) , timeout = 0 )
2006-03-02 21:48:46 -05:00
@socket = TCPServer . new ( host , port )
2006-01-28 14:03:53 -05:00
@classifier = URIClassifier . new
2006-02-03 00:42:08 -05:00
@host = host
@port = port
2006-03-21 22:31:30 -05:00
@workers = ThreadGroup . new
2006-03-19 18:31:30 -05:00
@timeout = timeout
2006-03-21 22:31:30 -05:00
@num_processors = num_processors
2006-04-05 08:29:23 -04:00
@death_time = 60
2006-01-28 14:03:53 -05:00
end
2006-05-19 22:56:30 -04:00
2006-01-30 01:25:20 -05:00
# Does the majority of the IO processing. It has been written in Ruby using
# about 7 different IO processing strategies and no matter how it's done
2006-02-03 00:42:08 -05:00
# the performance just does not improve. It is currently carefully constructed
# to make sure that it gets the best possible performance, but anyone who
# thinks they can make it faster is more than welcome to take a crack at it.
2006-03-19 18:31:30 -05:00
def process_client ( client )
2006-01-28 14:03:53 -05:00
begin
2006-03-19 18:31:30 -05:00
parser = HttpParser . new
2006-01-28 14:03:53 -05:00
params = { }
2006-05-19 22:56:30 -04:00
2006-02-03 00:42:08 -05:00
data = client . readpartial ( Const :: CHUNK_SIZE )
2006-05-14 19:42:19 -04:00
nparsed = 0
2006-05-19 22:56:30 -04:00
2006-05-14 19:42:19 -04:00
# Assumption: nparsed will always be less since data will get filled with more
# after each parsing. If it doesn't get more then there was a problem
2006-05-19 22:56:30 -04:00
# with the read operation on the client socket. Effect is to stop processing when the
# socket can't fill the buffer for further parsing.
2006-05-14 19:42:19 -04:00
while nparsed < data . length
nparsed = parser . execute ( params , data , nparsed )
2006-05-19 22:56:30 -04:00
2006-01-30 01:25:20 -05:00
if parser . finished?
2006-03-25 16:15:30 -05:00
script_name , path_info , handlers = @classifier . resolve ( params [ Const :: REQUEST_URI ] )
2006-01-30 01:25:20 -05:00
2006-03-25 16:15:30 -05:00
if handlers
2006-02-03 00:42:08 -05:00
params [ Const :: PATH_INFO ] = path_info
params [ Const :: SCRIPT_NAME ] = script_name
2006-03-31 15:12:30 -05:00
params [ Const :: REMOTE_ADDR ] = params [ Const :: HTTP_X_FORWARDED_FOR ] || client . peeraddr . last
2006-05-19 22:56:30 -04:00
# TODO: Find a faster/better way to carve out the range, preferrably without copying.
2006-05-14 19:42:19 -04:00
request = HttpRequest . new ( params , data [ nparsed ... data . length ] || " " , client )
2006-05-19 22:56:30 -04:00
2006-04-20 00:57:02 -04:00
# in the case of large file uploads the user could close the socket, so skip those requests
2006-05-19 22:56:30 -04:00
break if request . body == nil # nil signals from HttpRequest::initialize that the request was aborted
2006-04-05 08:36:29 -04:00
2006-04-20 00:57:02 -04:00
# request is good so far, continue processing the response
2006-01-28 14:03:53 -05:00
response = HttpResponse . new ( client )
2006-05-19 22:56:30 -04:00
# Process each handler in registered order until we run out or one finalizes the response.
2006-03-25 16:15:30 -05:00
handlers . each do | handler |
handler . process ( request , response )
break if response . done
end
2006-05-19 22:56:30 -04:00
# And finally, if nobody closed the response off, we finalize it.
2006-03-26 18:57:11 -05:00
if not response . done
response . finished
end
2006-01-28 14:03:53 -05:00
else
2006-05-19 22:56:30 -04:00
# Didn't find it, return a stock 404 response.
# TODO: Implement customer 404 files (but really they should use a real web server).
2006-02-03 00:42:08 -05:00
client . write ( Const :: ERROR_404_RESPONSE )
2006-01-28 14:03:53 -05:00
end
2006-05-19 22:56:30 -04:00
2006-02-20 01:40:48 -05:00
break #done
2006-01-28 14:03:53 -05:00
else
2006-05-19 22:56:30 -04:00
# Parser is not done, queue up more data to read and continue parsing
2006-05-14 19:42:19 -04:00
data << client . readpartial ( Const :: CHUNK_SIZE )
2006-04-02 22:27:59 -04:00
if data . length > = Const :: MAX_HEADER
raise HttpParserError . new ( " HEADER is longer than allowed, aborting client early. " )
end
2006-01-28 14:03:53 -05:00
end
end
2006-05-11 15:10:34 -04:00
rescue EOFError , Errno :: ECONNRESET , Errno :: EPIPE , Errno :: EINVAL , Errno :: EBADF
2006-01-30 01:25:20 -05:00
# ignored
2006-04-02 22:27:59 -04:00
rescue HttpParserError
2006-04-20 00:57:02 -04:00
STDERR . puts " #{ Time . now } : BAD CLIENT ( #{ params [ Const :: HTTP_X_FORWARDED_FOR ] || client . peeraddr . last } ): # $! "
2006-01-28 14:03:53 -05:00
rescue = > details
2006-04-20 00:57:02 -04:00
STDERR . puts " #{ Time . now } : ERROR: # $! "
2006-01-28 14:03:53 -05:00
STDERR . puts details . backtrace . join ( " \n " )
ensure
client . close
end
end
2006-03-25 16:15:30 -05:00
# Used internally to kill off any worker threads that have taken too long
# to complete processing. Only called if there are too many processors
# currently servicing.
def reap_dead_workers ( worker_list )
mark = Time . now
worker_list . each do | w |
2006-04-05 08:29:23 -04:00
w [ :started_on ] = Time . now if not w [ :started_on ]
if mark - w [ :started_on ] > @death_time + @timeout
2006-03-25 16:15:30 -05:00
STDERR . puts " Thread #{ w . inspect } is too old, killing. "
w . raise ( StopServer . new ( " Timed out thread. " ) )
end
end
end
2006-05-19 22:56:30 -04:00
2006-03-25 16:15:30 -05:00
2006-01-28 14:03:53 -05:00
# Runs the thing. It returns the thread used so you can "join" it. You can also
# access the HttpServer::acceptor attribute to get the thread later.
def run
2006-02-03 00:42:08 -05:00
BasicSocket . do_not_reverse_lookup = true
2006-03-25 16:15:30 -05:00
2006-01-28 14:03:53 -05:00
@acceptor = Thread . new do
2006-03-21 22:31:30 -05:00
while true
2006-02-20 17:39:37 -05:00
begin
2006-03-19 18:31:30 -05:00
client = @socket . accept
2006-03-25 16:15:30 -05:00
worker_list = @workers . list
2006-04-05 08:29:23 -04:00
2006-03-25 16:15:30 -05:00
if worker_list . length > = @num_processors
2006-03-26 15:01:50 -05:00
STDERR . puts " Server overloaded with #{ worker_list . length } processors ( #@num_processors max). Dropping connection. "
2006-03-21 22:31:30 -05:00
client . close
2006-03-25 16:15:30 -05:00
reap_dead_workers ( worker_list )
2006-03-21 22:31:30 -05:00
else
thread = Thread . new do
process_client ( client )
end
2006-05-19 22:56:30 -04:00
2006-03-25 16:15:30 -05:00
thread [ :started_on ] = Time . now
2006-03-21 22:31:30 -05:00
thread . priority = 1
@workers . add ( thread )
2006-05-19 22:56:30 -04:00
2006-03-25 16:15:30 -05:00
sleep @timeout / 100 if @timeout > 0
2006-03-19 18:31:30 -05:00
end
2006-02-20 17:39:37 -05:00
rescue StopServer
STDERR . puts " Server stopped. Exiting. "
@socket . close if not @socket . closed?
break
rescue Errno :: EMFILE
STDERR . puts " Too many open files. Try increasing ulimits. "
sleep 0 . 5
end
end
2006-04-20 00:57:02 -04:00
# troll through the threads that are waiting and kill any that take too long
2006-05-19 22:56:30 -04:00
# TODO: Allow for death time to be set if people ask for it.
2006-04-05 08:29:23 -04:00
@death_time = 10
shutdown_start = Time . now
2006-05-19 22:56:30 -04:00
2006-03-21 22:31:30 -05:00
while @workers . list . length > 0
2006-04-05 08:29:23 -04:00
waited_for = ( Time . now - shutdown_start ) . ceil
STDERR . print " Shutdown waited #{ waited_for } for #{ @workers . list . length } requests, could take #{ @death_time + @timeout } seconds. \r " if @workers . list . length > 0
2006-02-20 17:39:37 -05:00
sleep 1
2006-04-05 08:29:23 -04:00
reap_dead_workers ( @workers . list )
2006-01-28 14:03:53 -05:00
end
end
2006-02-20 17:39:37 -05:00
2006-03-04 12:55:39 -05:00
return @acceptor
2006-01-28 14:03:53 -05:00
end
2006-05-19 22:56:30 -04:00
2006-01-28 14:03:53 -05:00
# Simply registers a handler with the internal URIClassifier. When the URI is
# found in the prefix of a request then your handler's HttpHandler::process method
# is called. See Mongrel::URIClassifier#register for more information.
2006-04-04 01:24:55 -04:00
#
# If you set in_front=true then the passed in handler will be put in front in the list.
2006-05-19 22:56:30 -04:00
# Otherwise it's placed at the end of the list.
2006-04-04 01:24:55 -04:00
def register ( uri , handler , in_front = false )
2006-03-25 16:15:30 -05:00
script_name , path_info , handlers = @classifier . resolve ( uri )
2006-03-26 18:57:11 -05:00
if not handlers
2006-03-25 16:15:30 -05:00
@classifier . register ( uri , [ handler ] )
else
2006-03-30 04:31:14 -05:00
if path_info . length == 0 or ( script_name == Const :: SLASH and path_info == Const :: SLASH )
2006-04-04 01:24:55 -04:00
if in_front
handlers . unshift ( handler )
else
handlers << handler
end
2006-03-26 18:57:11 -05:00
else
@classifier . register ( uri , [ handler ] )
end
2006-03-25 16:15:30 -05:00
end
2006-01-28 14:03:53 -05:00
end
2006-03-25 16:15:30 -05:00
# Removes any handlers registered at the given URI. See Mongrel::URIClassifier#unregister
# for more information. Remember this removes them *all* so the entire
# processing chain goes away.
2006-01-28 14:03:53 -05:00
def unregister ( uri )
@classifier . unregister ( uri )
end
2006-02-20 17:39:37 -05:00
# Stops the acceptor thread and then causes the worker threads to finish
# off the request queue before finally exiting.
def stop
2006-03-02 21:48:46 -05:00
stopper = Thread . new do
exc = StopServer . new
@acceptor . raise ( exc )
end
stopper . priority = 10
2006-02-20 17:39:37 -05:00
end
2006-01-28 14:03:53 -05:00
end
2006-02-03 00:42:08 -05:00
2006-03-21 21:02:34 -05:00
# Implements a simple DSL for configuring a Mongrel server for your
# purposes. More used by framework implementers to setup Mongrel
# how they like, but could be used by regular folks to add more things
# to an existing mongrel configuration.
#
# It is used like this:
#
# require 'mongrel'
# config = Mongrel::Configurator.new :host => "127.0.0.1" do
# listener :port => 3000 do
# uri "/app", :handler => Mongrel::DirHandler.new(".", load_mime_map("mime.yaml"))
# end
# run
# end
#
# This will setup a simple DirHandler at the current directory and load additional
# mime types from mimy.yaml. The :host => "127.0.0.1" is actually not
# specific to the servers but just a hash of default parameters that all
# server or uri calls receive.
#
# When you are inside the block after Mongrel::Configurator.new you can simply
# call functions that are part of Configurator (like server, uri, daemonize, etc)
# without having to refer to anything else. You can also call these functions on
# the resulting object directly for additional configuration.
#
# A major thing about Configurator is that it actually lets you configure
# multiple listeners for any hosts and ports you want. These are kept in a
# map config.listeners so you can get to them.
class Configurator
attr_reader :listeners
attr_reader :defaults
2006-03-26 15:01:50 -05:00
attr_reader :needs_restart
2006-03-21 21:02:34 -05:00
# You pass in initial defaults and then a block to continue configuring.
def initialize ( defaults = { } , & blk )
2006-05-19 22:56:30 -04:00
@listener = nil
@listener_name = nil
2006-03-21 21:02:34 -05:00
@listeners = { }
@defaults = defaults
2006-03-26 15:01:50 -05:00
@needs_restart = false
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
if blk
cloaker ( & blk ) . bind ( self ) . call
end
end
2006-05-19 22:56:30 -04:00
# generates a class for cloaking the current self and making the DSL nicer
def cloaking_class
class << self
self
end
end
2006-03-21 21:02:34 -05:00
# Do not call this. You were warned.
2006-05-19 22:56:30 -04:00
def cloaker ( & blk )
cloaking_class . class_eval do
2006-03-21 21:02:34 -05:00
define_method :cloaker_ , & blk
meth = instance_method ( :cloaker_ )
remove_method :cloaker_
meth
end
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# This will resolve the given options against the defaults.
# Normally just used internally.
def resolve_defaults ( options )
options . merge ( @defaults )
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Starts a listener block. This is the only one that actually takes
# a block and then you make Configurator.uri calls in order to setup
# your URIs and handlers. If you write your Handlers as GemPlugins
# then you can use load_plugins and plugin to load them.
#
# It expects the following options (or defaults):
#
# * :host => Host name to bind.
# * :port => Port to bind.
2006-03-26 15:01:50 -05:00
# * :num_processors => The maximum number of concurrent threads allowed. (950 default)
2006-05-19 22:56:30 -04:00
# * :timeout => 1/100th of a second timeout between requests. (10 is 1/10th, 0 is timeout)
2006-03-21 21:02:34 -05:00
#
def listener ( options = { } , & blk )
2006-05-19 22:56:30 -04:00
raise " Cannot call listener inside another listener block. " if ( @listener or @listener_name )
2006-03-21 21:02:34 -05:00
ops = resolve_defaults ( options )
2006-03-26 15:01:50 -05:00
ops [ :num_processors ] || = 950
ops [ :timeout ] || = 0
@listener = Mongrel :: HttpServer . new ( ops [ :host ] , ops [ :port ] . to_i , ops [ :num_processors ] . to_i , ops [ :timeout ] . to_i )
2006-03-21 21:02:34 -05:00
@listener_name = " #{ ops [ :host ] } : #{ ops [ :port ] } "
@listeners [ @listener_name ] = @listener
2006-05-19 22:56:30 -04:00
# Does the actual cloaking operation to give the new implicit self.
2006-03-21 21:02:34 -05:00
if blk
cloaker ( & blk ) . bind ( self ) . call
end
2006-05-19 22:56:30 -04:00
# all done processing this listener setup, reset implicit variables
2006-03-21 21:02:34 -05:00
@listener = nil
@listener_name = nil
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Called inside a Configurator.listener block in order to
# add URI->handler mappings for that listener. Use this as
# many times as you like. It expects the following options
# or defaults:
#
2006-05-19 22:56:30 -04:00
# * :handler => HttpHandler -- Handler to use for this location.
# * :in_front => true/false -- Rather than appending, it prepends this handler.
2006-03-21 21:02:34 -05:00
def uri ( location , options = { } )
ops = resolve_defaults ( options )
2006-04-04 01:24:55 -04:00
@listener . register ( location , ops [ :handler ] , in_front = ops [ :in_front ] )
2006-03-21 21:02:34 -05:00
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Daemonizes the current Ruby script turning all the
# listeners into an actual "server" or detached process.
# You must call this *before* frameworks that open files
# as otherwise the files will be closed by this function.
#
# Does not work for Win32 systems (the call is silently ignored).
#
# Requires the following options or defaults:
#
# * :cwd => Directory to change to.
# * :log_file => Where to write STDOUT and STDERR.
# * :pid_file => Where to write the process ID.
#
2006-05-19 22:56:30 -04:00
# It is safe to call this on win32 as it will only require the daemons
# gem/library if NOT win32.
2006-03-21 21:02:34 -05:00
def daemonize ( options = { } )
2006-03-26 15:01:50 -05:00
ops = resolve_defaults ( options )
2006-03-21 21:02:34 -05:00
# save this for later since daemonize will hose it
if RUBY_PLATFORM !~ / mswin /
require 'daemons/daemonize'
2006-05-19 22:56:30 -04:00
2006-03-26 15:01:50 -05:00
Daemonize . daemonize ( log_file = File . join ( ops [ :cwd ] , ops [ :log_file ] ) )
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# change back to the original starting directory
2006-03-26 15:01:50 -05:00
Dir . chdir ( ops [ :cwd ] )
2006-05-19 22:56:30 -04:00
2006-03-26 15:01:50 -05:00
open ( ops [ :pid_file ] , " w " ) { | f | f . write ( Process . pid ) }
else
log " WARNING: Win32 does not support daemon mode. "
2006-03-21 21:02:34 -05:00
end
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Uses the GemPlugin system to easily load plugins based on their
# gem dependencies. You pass in either an :includes => [] or
# :excludes => [] setting listing the names of plugins to include
2006-05-19 22:56:30 -04:00
# or exclude from the when determining the dependencies.
2006-03-21 21:02:34 -05:00
def load_plugins ( options = { } )
2006-03-26 15:01:50 -05:00
ops = resolve_defaults ( options )
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
load_settings = { }
2006-03-26 15:01:50 -05:00
if ops [ :includes ]
ops [ :includes ] . each do | plugin |
2006-03-21 21:02:34 -05:00
load_settings [ plugin ] = GemPlugin :: INCLUDE
end
end
2006-03-26 15:01:50 -05:00
if ops [ :excludes ]
ops [ :excludes ] . each do | plugin |
2006-03-21 21:02:34 -05:00
load_settings [ plugin ] = GemPlugin :: EXCLUDE
end
end
GemPlugin :: Manager . instance . load ( load_settings )
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Easy way to load a YAML file and apply default settings.
def load_yaml ( file , default = { } )
default . merge ( YAML . load_file ( file ) )
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Loads the MIME map file and checks that it is correct
# on loading. This is commonly passed to Mongrel::DirHandler
# or any framework handler that uses DirHandler to serve files.
# You can also include a set of default MIME types as additional
# settings. See Mongrel::DirHandler for how the MIME types map
# is organized.
def load_mime_map ( file , mime = { } )
# configure any requested mime map
mime = load_yaml ( file , mime )
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# check all the mime types to make sure they are the right format
2006-03-26 15:01:50 -05:00
mime . each { | k , v | log " WARNING: MIME type #{ k } must start with '.' " if k . index ( " . " ) != 0 }
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
return mime
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Loads and creates a plugin for you based on the given
# name and configured with the selected options. The options
# are merged with the defaults prior to passing them in.
def plugin ( name , options = { } )
ops = resolve_defaults ( options )
2006-03-25 16:15:30 -05:00
GemPlugin :: Manager . instance . create ( name , ops )
2006-03-21 21:02:34 -05:00
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Works like a meta run method which goes through all the
# configured listeners. Use the Configurator.join method
# to prevent Ruby from exiting until each one is done.
def run
@listeners . each { | name , s |
2006-03-26 15:01:50 -05:00
log " Running #{ name } listener. "
2006-03-21 21:02:34 -05:00
s . run
}
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
# Calls .stop on all the configured listeners so they
2006-05-19 22:56:30 -04:00
# stop processing requests (gracefully). By default it
# assumes that you don't want to restart and that the pid file
# should be unlinked on exit.
def stop ( needs_restart = false , unlink_pid_file = true )
2006-03-21 21:02:34 -05:00
@listeners . each { | name , s |
2006-03-26 15:01:50 -05:00
log " Stopping #{ name } listener. "
2006-03-21 21:02:34 -05:00
s . stop
}
2006-05-19 22:56:30 -04:00
@needs_restart = needs_restart
if unlink_pid_file
File . unlink @pid_file if ( @pid_file and File . exist? ( @pid_file ) )
end
2006-03-21 21:02:34 -05:00
end
# This method should actually be called *outside* of the
# Configurator block so that you can control it. In otherwords
# do it like: config.join.
def join
@listeners . values . each { | s | s . acceptor . join }
end
2006-03-26 15:01:50 -05:00
# Calling this before you register your URIs to the given location
# will setup a set of handlers that log open files, objects, and the
# parameters for each request. This helps you track common problems
# found in Rails applications that are either slow or become unresponsive
# after a little while.
2006-05-19 22:56:30 -04:00
#
# TODO: Document the optional selections from the what parameter
def debug ( location , what = [ :object , :rails , :files , :threads , :params ] )
2006-03-26 15:01:50 -05:00
require 'mongrel/debug'
2006-05-19 22:56:30 -04:00
handlers = {
:object = > " /handlers/requestlog::access " ,
:rails = > " /handlers/requestlog::files " ,
:files = > " /handlers/requestlog::objects " ,
:threads = > " /handlers/requestlog::threads " ,
:params = > " /handlers/requestlog::params "
}
# turn on the debugging infrastructure, and ObjectTracker is a pig
ObjectTracker . configure if what . include? :object
2006-03-26 15:01:50 -05:00
MongrelDbg . configure
2006-05-19 22:56:30 -04:00
# now we roll through each requested debug type, turn it on and load that plugin
what . each do | type |
MongrelDbg . begin_trace type
uri location , :handler = > plugin ( handlers [ type ] )
end
2006-03-26 15:01:50 -05:00
end
2006-04-01 03:43:30 -05:00
# Used to allow you to let users specify their own configurations
# inside your Configurator setup. You pass it a script name and
2006-05-19 22:56:30 -04:00
# it reads it in and does an eval on the contents passing in the right
2006-04-01 03:43:30 -05:00
# binding so they can put their own Configurator statements.
def run_config ( script )
open ( script ) { | f | eval ( f . read , proc { self } ) }
end
2006-03-26 15:01:50 -05:00
# Sets up the standard signal handlers that are used on most Ruby
# It only configures if the platform is not win32 and doesn't do
# a HUP signal since this is typically framework specific.
#
# Requires a :pid_file option to indicate a file to delete.
# It sets the MongrelConfig.needs_restart attribute if
# the start command should reload. It's up to you to detect this
# and do whatever is needed for a "restart".
#
# This command is safely ignored if the platform is win32 (with a warning)
def setup_signals ( options = { } )
ops = resolve_defaults ( options )
2006-05-19 22:56:30 -04:00
@pid_file = ops [ :pid_file ]
2006-03-26 15:01:50 -05:00
if RUBY_PLATFORM !~ / mswin /
# graceful shutdown
2006-05-19 22:56:30 -04:00
trap ( " TERM " ) { log " TERM signal received. " ; stop }
2006-03-26 15:01:50 -05:00
# restart
2006-05-19 22:56:30 -04:00
trap ( " USR2 " ) { log " USR2 signal received. " ; stop ( need_restart = true ) }
# forced shutdown, even if previously restarted (actually just like TERM but for CTRL-C)
trap ( " INT " ) { log " INT signal received. " ; stop ( need_restart = false ) }
2006-03-26 15:01:50 -05:00
log " Signals ready. TERM => stop. USR2 => restart. INT => stop (no restart). "
else
log " WARNING: Win32 does not have signals support. "
end
end
# Logs a simple message to STDERR (or the mongrel log if in daemon mode).
def log ( msg )
STDERR . print " ** " , msg , " \n "
end
2006-05-19 22:56:30 -04:00
2006-03-21 21:02:34 -05:00
end
2006-02-20 19:55:39 -05:00
end