From 14d8cca4f2f92858cc76f39403392d0e49fe587c Mon Sep 17 00:00:00 2001 From: evanweaver Date: Fri, 26 Oct 2007 09:23:10 +0000 Subject: [PATCH] break classes into their own files git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@766 19e92222-5c0b-0410-8929-a290d50e31e9 --- lib/mongrel.rb | 543 +--------------------------------- lib/mongrel/const.rb | 110 +++++++ lib/mongrel/header_out.rb | 28 ++ lib/mongrel/http_request.rb | 155 ++++++++++ lib/mongrel/http_response.rb | 163 ++++++++++ lib/mongrel/uri_classifier.rb | 76 +++++ 6 files changed, 542 insertions(+), 533 deletions(-) create mode 100644 lib/mongrel/const.rb create mode 100644 lib/mongrel/header_out.rb create mode 100644 lib/mongrel/http_request.rb create mode 100644 lib/mongrel/http_response.rb create mode 100644 lib/mongrel/uri_classifier.rb diff --git a/lib/mongrel.rb b/lib/mongrel.rb index 4c02a887..72685aeb 100644 --- a/lib/mongrel.rb +++ b/lib/mongrel.rb @@ -24,553 +24,29 @@ require 'mongrel/handlers' require 'mongrel/command' require 'mongrel/tcphack' require 'mongrel/configurator' +require 'mongrel/uri_classifier' +require 'mongrel/const' +require 'mongrel/http_request' +require 'mongrel/header_out' +require 'mongrel/http_response' # Mongrel module containing all of the classes (include C extensions) for running # a Mongrel web server. It contains a minimalist HTTP server with just enough # functionality to service web application requests fast as possible. module Mongrel - class URIClassifier - - class RegistrationError < RuntimeError - end - class UsageError < RuntimeError - end - - attr_reader :handler_map - - # Returns the URIs that have been registered with this classifier so far. - def uris - @handler_map.keys - end - - def initialize - @handler_map = {} - @matcher = // - @root_handler = nil - end - - # Register a handler object at a particular URI. The handler can be whatever - # you want, including an array. It's up to you what to do with it. - # - # Registering a handler is not necessarily threadsafe, so be careful if you go - # mucking around once the server is running. - def register(uri, handler) - raise RegistrationError, "#{uri.inspect} is already registered" if @handler_map[uri] - raise RegistrationError, "URI is empty" if !uri or uri.empty? - raise RegistrationError, "URI must begin with a \"#{Const::SLASH}\"" unless uri[0..0] == Const::SLASH - @handler_map[uri.dup] = handler - rebuild - end - - # Unregister a particular URI and its handler. - def unregister(uri) - handler = @handler_map.delete(uri) - raise RegistrationError, "#{uri.inspect} was not registered" unless handler - rebuild - handler - end - - # Resolve a request URI by finding the best partial match in the registered - # handler URIs. - def resolve(request_uri) - if @root_handler - # Optimization for the pathological case of only one handler on "/"; e.g. Rails - [Const::SLASH, request_uri, @root_handler] - elsif match = @matcher.match(request_uri) - uri = match.to_s - # A root mounted ("/") handler must resolve such that path info matches the original URI. - [uri, (uri == Const::SLASH ? request_uri : match.post_match), @handler_map[uri]] - else - [nil, nil, nil] - end - end - - private - - def rebuild - if @handler_map.size == 1 and @handler_map[Const::SLASH] - @root_handler = @handler_map.values.first - else - @root_handler = nil - routes = @handler_map.keys.sort.sort_by do |uri| - -uri.length - end - @matcher = Regexp.new(routes.map do |uri| - Regexp.new('^' + Regexp.escape(uri)) - end.join('|')) - end - end - - end - - # Used to stop the HttpServer via Thread.raise. class StopServer < Exception; end - # Thrown at a thread when it is timed out. class TimeoutError < Exception; end - - # Every standard HTTP code mapped to the appropriate message. These are - # used so frequently that they are placed directly in Mongrel for easy - # access rather than Mongrel::Const. - HTTP_STATUS_CODES = { - 100 => 'Continue', - 101 => 'Switching Protocols', - 200 => 'OK', - 201 => 'Created', - 202 => 'Accepted', - 203 => 'Non-Authoritative Information', - 204 => 'No Content', - 205 => 'Reset Content', - 206 => 'Partial Content', - 300 => 'Multiple Choices', - 301 => 'Moved Permanently', - 302 => 'Moved Temporarily', - 303 => 'See Other', - 304 => 'Not Modified', - 305 => 'Use Proxy', - 400 => 'Bad Request', - 401 => 'Unauthorized', - 402 => 'Payment Required', - 403 => 'Forbidden', - 404 => 'Not Found', - 405 => 'Method Not Allowed', - 406 => 'Not Acceptable', - 407 => 'Proxy Authentication Required', - 408 => 'Request Time-out', - 409 => 'Conflict', - 410 => 'Gone', - 411 => 'Length Required', - 412 => 'Precondition Failed', - 413 => 'Request Entity Too Large', - 414 => 'Request-URI Too Large', - 415 => 'Unsupported Media Type', - 500 => 'Internal Server Error', - 501 => 'Not Implemented', - 502 => 'Bad Gateway', - 503 => 'Service Unavailable', - 504 => 'Gateway Time-out', - 505 => 'HTTP Version not supported' - } - - - # Frequently used constants when constructing requests or responses. Many times - # the constant just refers to a string with the same contents. Using these constants - # gave about a 3% to 10% performance improvement over using the strings directly. - # Symbols did not really improve things much compared to constants. - # - # While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT, - # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or - # too taxing on performance. - module Const - DATE = "Date".freeze - - # This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this. - PATH_INFO="PATH_INFO".freeze - - # This is the initial part that your handler is identified as by URIClassifier. - SCRIPT_NAME="SCRIPT_NAME".freeze - - # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME. - REQUEST_URI='REQUEST_URI'.freeze - REQUEST_PATH='REQUEST_PATH'.freeze - - MONGREL_VERSION="1.0.2".freeze - - MONGREL_TMP_BASE="mongrel".freeze - - # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. - ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel #{MONGREL_VERSION}\r\n\r\nNOT FOUND".freeze - - CONTENT_LENGTH="CONTENT_LENGTH".freeze - - # A common header for indicating the server is too busy. Not used yet. - ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze - - # The basic max request size we'll try to read. - CHUNK_SIZE=(16 * 1024) - - # This is the maximum header that is allowed before a client is booted. The parser detects - # this, but we'd also like to do this as well. - MAX_HEADER=1024 * (80 + 32) - - # Maximum request body size before it is moved out of memory and into a tempfile for reading. - MAX_BODY=MAX_HEADER - - # A frozen format for this is about 15% faster - STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze - CONTENT_TYPE = "Content-Type".freeze - LAST_MODIFIED = "Last-Modified".freeze - ETAG = "ETag".freeze - SLASH = "/".freeze - REQUEST_METHOD="REQUEST_METHOD".freeze - GET="GET".freeze - HEAD="HEAD".freeze - # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32) - ETAG_FORMAT="\"%x-%x-%x\"".freeze - HEADER_FORMAT="%s: %s\r\n".freeze - LINE_END="\r\n".freeze - REMOTE_ADDR="REMOTE_ADDR".freeze - HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze - HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze - HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze - REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze - HOST = "HOST".freeze - end - - - # Basically a Hash with one extra parameter for the HTTP body, mostly used internally. + # A Hash with one extra parameter for the HTTP body, used internally. class HttpParams < Hash attr_accessor :http_body end - # When a handler is found for a registered URI then this class is constructed - # and passed to your HttpHandler::process method. You should assume that - # *one* handler processes all requests. Included in the HttpRequest is a - # HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body - # which is a string containing the request body (raw for now). - # - # The HttpRequest.initialize method will convert any request that is larger than - # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses - # a StringIO object. To be safe, you should assume it works like a file. - # - # The HttpHandler.request_notify system is implemented by having HttpRequest call - # HttpHandler.request_begins, HttpHandler.request_progress, HttpHandler.process during - # the IO processing. This adds a small amount of overhead but lets you implement - # finer controlled handlers and filters. - class HttpRequest - attr_reader :body, :params - - # You don't really call this. It's made for you. - # Main thing it does is hook up the params, and store any remaining - # body data into the HttpRequest.body attribute. - def initialize(params, socket, dispatchers) - @params = params - @socket = socket - @dispatchers = dispatchers - content_length = @params[Const::CONTENT_LENGTH].to_i - remain = content_length - @params.http_body.length - - # tell all dispatchers the request has begun - @dispatchers.each do |dispatcher| - dispatcher.request_begins(@params) - end unless @dispatchers.nil? || @dispatchers.empty? - - # Some clients (like FF1.0) report 0 for body and then send a body. This will probably truncate them but at least the request goes through usually. - if remain <= 0 - # we've got everything, pack it up - @body = StringIO.new - @body.write @params.http_body - update_request_progress(0, content_length) - elsif remain > 0 - # must read more data to complete body - if remain > Const::MAX_BODY - # huge body, put it in a tempfile - @body = Tempfile.new(Const::MONGREL_TMP_BASE) - @body.binmode - else - # small body, just use that - @body = StringIO.new - end - - @body.write @params.http_body - read_body(remain, content_length) - end - - @body.rewind if @body - end - - # updates all dispatchers about our progress - def update_request_progress(clen, total) - return if @dispatchers.nil? || @dispatchers.empty? - @dispatchers.each do |dispatcher| - dispatcher.request_progress(@params, clen, total) - end - end - private :update_request_progress - - # Does the heavy lifting of properly reading the larger body requests in - # small chunks. It expects @body to be an IO object, @socket to be valid, - # and will set @body = nil if the request fails. It also expects any initial - # part of the body that has been read to be in the @body already. - def read_body(remain, total) - begin - # write the odd sized chunk first - @params.http_body = read_socket(remain % Const::CHUNK_SIZE) - - remain -= @body.write(@params.http_body) - - update_request_progress(remain, total) - - # then stream out nothing but perfectly sized chunks - until remain <= 0 or @socket.closed? - # ASSUME: we are writing to a disk and these writes always write the requested amount - @params.http_body = read_socket(Const::CHUNK_SIZE) - remain -= @body.write(@params.http_body) - - update_request_progress(remain, total) - end - rescue Object => e - STDERR.puts "#{Time.now}: Error reading HTTP body: #{e.inspect}" - STDERR.puts e.backtrace.join("\n") - # any errors means we should delete the file, including if the file is dumped - @socket.close rescue nil - @body.delete if @body.class == Tempfile - @body = nil # signals that there was a problem - end - end - - def read_socket(len) - if !@socket.closed? - data = @socket.read(len) - if !data - raise "Socket read return nil" - elsif data.length != len - raise "Socket read returned insufficient data: #{data.length}" - else - data - end - else - raise "Socket already closed when reading." - end - end - - # Performs URI escaping so that you can construct proper - # query strings faster. Use this rather than the cgi.rb - # version since it's faster. (Stolen from Camping). - def self.escape(s) - s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) { - '%'+$1.unpack('H2'*$1.size).join('%').upcase - }.tr(' ', '+') - end - - - # Unescapes a URI escaped string. (Stolen from Camping). - def self.unescape(s) - s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){ - [$1.delete('%')].pack('H*') - } - end - - # Parses a query string by breaking it up at the '&' - # and ';' characters. You can also use this to parse - # cookies by changing the characters used in the second - # parameter (which defaults to '&;'. - def self.query_parse(qs, d = '&;') - params = {} - (qs||'').split(/[#{d}] */n).inject(params) { |h,p| - k, v=unescape(p).split('=',2) - if cur = params[k] - if cur.class == Array - params[k] << v - else - params[k] = [cur, v] - end - else - params[k] = v - end - } - - return params - end - end - - - # This class implements a simple way of constructing the HTTP headers dynamically - # via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for - # information on how this is used. - # - # One consequence of this write-only nature is that you can write multiple headers - # by just doing them twice (which is sometimes needed in HTTP), but that the normal - # semantics for Hash (where doing an insert replaces) is not there. - class HeaderOut - attr_reader :out - attr_accessor :allowed_duplicates - - def initialize(out) - @sent = {} - @allowed_duplicates = {"Set-Cookie" => true, "Set-Cookie2" => true, - "Warning" => true, "WWW-Authenticate" => true} - @out = out - end - - # Simply writes "#{key}: #{value}" to an output buffer. - def[]=(key,value) - if not @sent.has_key?(key) or @allowed_duplicates.has_key?(key) - @sent[key] = true - @out.write(Const::HEADER_FORMAT % [key, value]) - end - end - end - - - # Writes and controls your response to the client using the HTTP/1.1 specification. - # You use it by simply doing: - # - # response.start(200) do |head,out| - # head['Content-Type'] = 'text/plain' - # out.write("hello\n") - # end - # - # The parameter to start is the response code--which Mongrel will translate for you - # based on HTTP_STATUS_CODES. The head parameter is how you write custom headers. - # The out parameter is where you write your body. The default status code for - # HttpResponse.start is 200 so the above example is redundant. - # - # As you can see, it's just like using a Hash and as you do this it writes the proper - # header to the output on the fly. You can even intermix specifying headers and - # writing content. The HttpResponse class with write the things in the proper order - # once the HttpResponse.block is ended. - # - # You may also work the HttpResponse object directly using the various attributes available - # for the raw socket, body, header, and status codes. If you do this you're on your own. - # A design decision was made to force the client to not pipeline requests. HTTP/1.1 - # pipelining really kills the performance due to how it has to be handled and how - # unclear the standard is. To fix this the HttpResponse gives a "Connection: close" - # header which forces the client to close right away. The bonus for this is that it - # gives a pretty nice speed boost to most clients since they can close their connection - # immediately. - # - # One additional caveat is that you don't have to specify the Content-length header - # as the HttpResponse will write this for you based on the out length. - class HttpResponse - attr_reader :socket - attr_reader :body - attr_writer :body - attr_reader :header - attr_reader :status - attr_writer :status - attr_reader :body_sent - attr_reader :header_sent - attr_reader :status_sent - - def initialize(socket) - @socket = socket - @body = StringIO.new - @status = 404 - @reason = HTTP_STATUS_CODES[@status] - @header = HeaderOut.new(StringIO.new) - @header[Const::DATE] = Time.now.httpdate - @body_sent = false - @header_sent = false - @status_sent = false - end - - # Receives a block passing it the header and body for you to work with. - # When the block is finished it writes everything you've done to - # the socket in the proper order. This lets you intermix header and - # body content as needed. Handlers are able to modify pretty much - # any part of the request in the chain, and can stop further processing - # by simple passing "finalize=true" to the start method. By default - # all handlers run and then mongrel finalizes the request when they're - # all done. - def start(status=200, finalize=false, reason=HTTP_STATUS_CODES[status]) - @status = status.to_i - @reason = reason - yield @header, @body - finished if finalize - end - - # Primarily used in exception handling to reset the response output in order to write - # an alternative response. It will abort with an exception if you have already - # sent the header or the body. This is pretty catastrophic actually. - def reset - if @body_sent - raise "You have already sent the request body." - elsif @header_sent - raise "You have already sent the request headers." - else - @header.out.truncate(0) - @body.close - @body = StringIO.new - end - end - - def send_status(content_length=@body.length) - if not @status_sent - @header['Content-Length'] = content_length if content_length and @status != 304 - write(Const::STATUS_FORMAT % [@status, @reason]) - @status_sent = true - end - end - - def send_header - if not @header_sent - @header.out.rewind - write(@header.out.read + Const::LINE_END) - @header_sent = true - end - end - - def send_body - if not @body_sent - @body.rewind - write(@body.read) - @body_sent = true - end - end - - # Appends the contents of +path+ to the response stream. The file is opened for binary - # reading and written in chunks to the socket. - # - # Sendfile API support has been removed in 0.3.13.4 due to stability problems. - def send_file(path, small_file = false) - if small_file - File.open(path, "rb") {|f| @socket << f.read } - else - File.open(path, "rb") do |f| - while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0 - begin - write(chunk) - rescue Object => exc - break - end - end - end - end - @body_sent = true - end - - def socket_error(details) - # ignore these since it means the client closed off early - @socket.close rescue nil - done = true - raise details - end - - def write(data) - @socket.write(data) - rescue => details - socket_error(details) - end - - # This takes whatever has been done to header and body and then writes it in the - # proper format to make an HTTP/1.1 response. - def finished - send_status - send_header - send_body - end - - # Used during error conditions to mark the response as "done" so there isn't any more processing - # sent to the client. - def done=(val) - @status_sent = true - @header_sent = true - @body_sent = true - end - - def done - (@status_sent and @header_sent and @body_sent) - end - - end - - # This is the main driver of Mongrel, while the Mongrel::HttpParser and Mongrel::URIClassifier # make up the majority of how the server functions. It's a very simple class that just # has a thread accepting connections and a simple HttpServer.process_client function @@ -866,10 +342,11 @@ module Mongrel end end -# Load experimental gem, if present. We put it here so it can override anything +# Load experimental library, if present. We put it here so it can override anything # in regular Mongrel. -begin - require 'mongrel_experimental', '=1.1' +begin + gem 'mongrel_experimental', '=1.1' if respond_to? 'gem' + require 'mongrel_experimental' rescue LoadError end diff --git a/lib/mongrel/const.rb b/lib/mongrel/const.rb new file mode 100644 index 00000000..282763b2 --- /dev/null +++ b/lib/mongrel/const.rb @@ -0,0 +1,110 @@ + +module Mongrel + + # Every standard HTTP code mapped to the appropriate message. These are + # used so frequently that they are placed directly in Mongrel for easy + # access rather than Mongrel::Const itself. + HTTP_STATUS_CODES = { + 100 => 'Continue', + 101 => 'Switching Protocols', + 200 => 'OK', + 201 => 'Created', + 202 => 'Accepted', + 203 => 'Non-Authoritative Information', + 204 => 'No Content', + 205 => 'Reset Content', + 206 => 'Partial Content', + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Moved Temporarily', + 303 => 'See Other', + 304 => 'Not Modified', + 305 => 'Use Proxy', + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not Allowed', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Time-out', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Request Entity Too Large', + 414 => 'Request-URI Too Large', + 415 => 'Unsupported Media Type', + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Time-out', + 505 => 'HTTP Version not supported' + } + + # Frequently used constants when constructing requests or responses. Many times + # the constant just refers to a string with the same contents. Using these constants + # gave about a 3% to 10% performance improvement over using the strings directly. + # Symbols did not really improve things much compared to constants. + # + # While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT, + # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or + # too taxing on performance. + module Const + DATE = "Date".freeze + + # This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this. + PATH_INFO="PATH_INFO".freeze + + # This is the initial part that your handler is identified as by URIClassifier. + SCRIPT_NAME="SCRIPT_NAME".freeze + + # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME. + REQUEST_URI='REQUEST_URI'.freeze + REQUEST_PATH='REQUEST_PATH'.freeze + + MONGREL_VERSION="1.0.2".freeze + + MONGREL_TMP_BASE="mongrel".freeze + + # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. + ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel #{MONGREL_VERSION}\r\n\r\nNOT FOUND".freeze + + CONTENT_LENGTH="CONTENT_LENGTH".freeze + + # A common header for indicating the server is too busy. Not used yet. + ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze + + # The basic max request size we'll try to read. + CHUNK_SIZE=(16 * 1024) + + # This is the maximum header that is allowed before a client is booted. The parser detects + # this, but we'd also like to do this as well. + MAX_HEADER=1024 * (80 + 32) + + # Maximum request body size before it is moved out of memory and into a tempfile for reading. + MAX_BODY=MAX_HEADER + + # A frozen format for this is about 15% faster + STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze + CONTENT_TYPE = "Content-Type".freeze + LAST_MODIFIED = "Last-Modified".freeze + ETAG = "ETag".freeze + SLASH = "/".freeze + REQUEST_METHOD="REQUEST_METHOD".freeze + GET="GET".freeze + HEAD="HEAD".freeze + # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32) + ETAG_FORMAT="\"%x-%x-%x\"".freeze + HEADER_FORMAT="%s: %s\r\n".freeze + LINE_END="\r\n".freeze + REMOTE_ADDR="REMOTE_ADDR".freeze + HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze + HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze + HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze + REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze + HOST = "HOST".freeze + end +end \ No newline at end of file diff --git a/lib/mongrel/header_out.rb b/lib/mongrel/header_out.rb new file mode 100644 index 00000000..b34e95e9 --- /dev/null +++ b/lib/mongrel/header_out.rb @@ -0,0 +1,28 @@ +module Mongrel + # This class implements a simple way of constructing the HTTP headers dynamically + # via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for + # information on how this is used. + # + # One consequence of this write-only nature is that you can write multiple headers + # by just doing them twice (which is sometimes needed in HTTP), but that the normal + # semantics for Hash (where doing an insert replaces) is not there. + class HeaderOut + attr_reader :out + attr_accessor :allowed_duplicates + + def initialize(out) + @sent = {} + @allowed_duplicates = {"Set-Cookie" => true, "Set-Cookie2" => true, + "Warning" => true, "WWW-Authenticate" => true} + @out = out + end + + # Simply writes "#{key}: #{value}" to an output buffer. + def[]=(key,value) + if not @sent.has_key?(key) or @allowed_duplicates.has_key?(key) + @sent[key] = true + @out.write(Const::HEADER_FORMAT % [key, value]) + end + end + end +end \ No newline at end of file diff --git a/lib/mongrel/http_request.rb b/lib/mongrel/http_request.rb new file mode 100644 index 00000000..82ffe428 --- /dev/null +++ b/lib/mongrel/http_request.rb @@ -0,0 +1,155 @@ + +module Mongrel + # + # When a handler is found for a registered URI then this class is constructed + # and passed to your HttpHandler::process method. You should assume that + # *one* handler processes all requests. Included in the HttpRequest is a + # HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body + # which is a string containing the request body (raw for now). + # + # The HttpRequest.initialize method will convert any request that is larger than + # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses + # a StringIO object. To be safe, you should assume it works like a file. + # + # The HttpHandler.request_notify system is implemented by having HttpRequest call + # HttpHandler.request_begins, HttpHandler.request_progress, HttpHandler.process during + # the IO processing. This adds a small amount of overhead but lets you implement + # finer controlled handlers and filters. + # + class HttpRequest + attr_reader :body, :params + + # You don't really call this. It's made for you. + # Main thing it does is hook up the params, and store any remaining + # body data into the HttpRequest.body attribute. + def initialize(params, socket, dispatchers) + @params = params + @socket = socket + @dispatchers = dispatchers + content_length = @params[Const::CONTENT_LENGTH].to_i + remain = content_length - @params.http_body.length + + # tell all dispatchers the request has begun + @dispatchers.each do |dispatcher| + dispatcher.request_begins(@params) + end unless @dispatchers.nil? || @dispatchers.empty? + + # Some clients (like FF1.0) report 0 for body and then send a body. This will probably truncate them but at least the request goes through usually. + if remain <= 0 + # we've got everything, pack it up + @body = StringIO.new + @body.write @params.http_body + update_request_progress(0, content_length) + elsif remain > 0 + # must read more data to complete body + if remain > Const::MAX_BODY + # huge body, put it in a tempfile + @body = Tempfile.new(Const::MONGREL_TMP_BASE) + @body.binmode + else + # small body, just use that + @body = StringIO.new + end + + @body.write @params.http_body + read_body(remain, content_length) + end + + @body.rewind if @body + end + + # updates all dispatchers about our progress + def update_request_progress(clen, total) + return if @dispatchers.nil? || @dispatchers.empty? + @dispatchers.each do |dispatcher| + dispatcher.request_progress(@params, clen, total) + end + end + private :update_request_progress + + # Does the heavy lifting of properly reading the larger body requests in + # small chunks. It expects @body to be an IO object, @socket to be valid, + # and will set @body = nil if the request fails. It also expects any initial + # part of the body that has been read to be in the @body already. + def read_body(remain, total) + begin + # write the odd sized chunk first + @params.http_body = read_socket(remain % Const::CHUNK_SIZE) + + remain -= @body.write(@params.http_body) + + update_request_progress(remain, total) + + # then stream out nothing but perfectly sized chunks + until remain <= 0 or @socket.closed? + # ASSUME: we are writing to a disk and these writes always write the requested amount + @params.http_body = read_socket(Const::CHUNK_SIZE) + remain -= @body.write(@params.http_body) + + update_request_progress(remain, total) + end + rescue Object => e + STDERR.puts "#{Time.now}: Error reading HTTP body: #{e.inspect}" + STDERR.puts e.backtrace.join("\n") + # any errors means we should delete the file, including if the file is dumped + @socket.close rescue nil + @body.delete if @body.class == Tempfile + @body = nil # signals that there was a problem + end + end + + def read_socket(len) + if !@socket.closed? + data = @socket.read(len) + if !data + raise "Socket read return nil" + elsif data.length != len + raise "Socket read returned insufficient data: #{data.length}" + else + data + end + else + raise "Socket already closed when reading." + end + end + + # Performs URI escaping so that you can construct proper + # query strings faster. Use this rather than the cgi.rb + # version since it's faster. (Stolen from Camping). + def self.escape(s) + s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) { + '%'+$1.unpack('H2'*$1.size).join('%').upcase + }.tr(' ', '+') + end + + + # Unescapes a URI escaped string. (Stolen from Camping). + def self.unescape(s) + s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){ + [$1.delete('%')].pack('H*') + } + end + + # Parses a query string by breaking it up at the '&' + # and ';' characters. You can also use this to parse + # cookies by changing the characters used in the second + # parameter (which defaults to '&;'. + def self.query_parse(qs, d = '&;') + params = {} + (qs||'').split(/[#{d}] */n).inject(params) { |h,p| + k, v=unescape(p).split('=',2) + if cur = params[k] + if cur.class == Array + params[k] << v + else + params[k] = [cur, v] + end + else + params[k] = v + end + } + + return params + end + end +end \ No newline at end of file diff --git a/lib/mongrel/http_response.rb b/lib/mongrel/http_response.rb new file mode 100644 index 00000000..ff715cc1 --- /dev/null +++ b/lib/mongrel/http_response.rb @@ -0,0 +1,163 @@ +module Mongrel + # Writes and controls your response to the client using the HTTP/1.1 specification. + # You use it by simply doing: + # + # response.start(200) do |head,out| + # head['Content-Type'] = 'text/plain' + # out.write("hello\n") + # end + # + # The parameter to start is the response code--which Mongrel will translate for you + # based on HTTP_STATUS_CODES. The head parameter is how you write custom headers. + # The out parameter is where you write your body. The default status code for + # HttpResponse.start is 200 so the above example is redundant. + # + # As you can see, it's just like using a Hash and as you do this it writes the proper + # header to the output on the fly. You can even intermix specifying headers and + # writing content. The HttpResponse class with write the things in the proper order + # once the HttpResponse.block is ended. + # + # You may also work the HttpResponse object directly using the various attributes available + # for the raw socket, body, header, and status codes. If you do this you're on your own. + # A design decision was made to force the client to not pipeline requests. HTTP/1.1 + # pipelining really kills the performance due to how it has to be handled and how + # unclear the standard is. To fix this the HttpResponse gives a "Connection: close" + # header which forces the client to close right away. The bonus for this is that it + # gives a pretty nice speed boost to most clients since they can close their connection + # immediately. + # + # One additional caveat is that you don't have to specify the Content-length header + # as the HttpResponse will write this for you based on the out length. + class HttpResponse + attr_reader :socket + attr_reader :body + attr_writer :body + attr_reader :header + attr_reader :status + attr_writer :status + attr_reader :body_sent + attr_reader :header_sent + attr_reader :status_sent + + def initialize(socket) + @socket = socket + @body = StringIO.new + @status = 404 + @reason = HTTP_STATUS_CODES[@status] + @header = HeaderOut.new(StringIO.new) + @header[Const::DATE] = Time.now.httpdate + @body_sent = false + @header_sent = false + @status_sent = false + end + + # Receives a block passing it the header and body for you to work with. + # When the block is finished it writes everything you've done to + # the socket in the proper order. This lets you intermix header and + # body content as needed. Handlers are able to modify pretty much + # any part of the request in the chain, and can stop further processing + # by simple passing "finalize=true" to the start method. By default + # all handlers run and then mongrel finalizes the request when they're + # all done. + def start(status=200, finalize=false, reason=HTTP_STATUS_CODES[status]) + @status = status.to_i + @reason = reason + yield @header, @body + finished if finalize + end + + # Primarily used in exception handling to reset the response output in order to write + # an alternative response. It will abort with an exception if you have already + # sent the header or the body. This is pretty catastrophic actually. + def reset + if @body_sent + raise "You have already sent the request body." + elsif @header_sent + raise "You have already sent the request headers." + else + @header.out.truncate(0) + @body.close + @body = StringIO.new + end + end + + def send_status(content_length=@body.length) + if not @status_sent + @header['Content-Length'] = content_length if content_length and @status != 304 + write(Const::STATUS_FORMAT % [@status, @reason]) + @status_sent = true + end + end + + def send_header + if not @header_sent + @header.out.rewind + write(@header.out.read + Const::LINE_END) + @header_sent = true + end + end + + def send_body + if not @body_sent + @body.rewind + write(@body.read) + @body_sent = true + end + end + + # Appends the contents of +path+ to the response stream. The file is opened for binary + # reading and written in chunks to the socket. + # + # Sendfile API support has been removed in 0.3.13.4 due to stability problems. + def send_file(path, small_file = false) + if small_file + File.open(path, "rb") {|f| @socket << f.read } + else + File.open(path, "rb") do |f| + while chunk = f.read(Const::CHUNK_SIZE) and chunk.length > 0 + begin + write(chunk) + rescue Object => exc + break + end + end + end + end + @body_sent = true + end + + def socket_error(details) + # ignore these since it means the client closed off early + @socket.close rescue nil + done = true + raise details + end + + def write(data) + @socket.write(data) + rescue => details + socket_error(details) + end + + # This takes whatever has been done to header and body and then writes it in the + # proper format to make an HTTP/1.1 response. + def finished + send_status + send_header + send_body + end + + # Used during error conditions to mark the response as "done" so there isn't any more processing + # sent to the client. + def done=(val) + @status_sent = true + @header_sent = true + @body_sent = true + end + + def done + (@status_sent and @header_sent and @body_sent) + end + + end +end \ No newline at end of file diff --git a/lib/mongrel/uri_classifier.rb b/lib/mongrel/uri_classifier.rb new file mode 100644 index 00000000..f39ccc9d --- /dev/null +++ b/lib/mongrel/uri_classifier.rb @@ -0,0 +1,76 @@ + +module Mongrel + class URIClassifier + + class RegistrationError < RuntimeError + end + class UsageError < RuntimeError + end + + attr_reader :handler_map + + # Returns the URIs that have been registered with this classifier so far. + def uris + @handler_map.keys + end + + def initialize + @handler_map = {} + @matcher = // + @root_handler = nil + end + + # Register a handler object at a particular URI. The handler can be whatever + # you want, including an array. It's up to you what to do with it. + # + # Registering a handler is not necessarily threadsafe, so be careful if you go + # mucking around once the server is running. + def register(uri, handler) + raise RegistrationError, "#{uri.inspect} is already registered" if @handler_map[uri] + raise RegistrationError, "URI is empty" if !uri or uri.empty? + raise RegistrationError, "URI must begin with a \"#{Const::SLASH}\"" unless uri[0..0] == Const::SLASH + @handler_map[uri.dup] = handler + rebuild + end + + # Unregister a particular URI and its handler. + def unregister(uri) + handler = @handler_map.delete(uri) + raise RegistrationError, "#{uri.inspect} was not registered" unless handler + rebuild + handler + end + + # Resolve a request URI by finding the best partial match in the registered + # handler URIs. + def resolve(request_uri) + if @root_handler + # Optimization for the pathological case of only one handler on "/"; e.g. Rails + [Const::SLASH, request_uri, @root_handler] + elsif match = @matcher.match(request_uri) + uri = match.to_s + # A root mounted ("/") handler must resolve such that path info matches the original URI. + [uri, (uri == Const::SLASH ? request_uri : match.post_match), @handler_map[uri]] + else + [nil, nil, nil] + end + end + + private + + def rebuild + if @handler_map.size == 1 and @handler_map[Const::SLASH] + @root_handler = @handler_map.values.first + else + @root_handler = nil + routes = @handler_map.keys.sort.sort_by do |uri| + -uri.length + end + @matcher = Regexp.new(routes.map do |uri| + Regexp.new('^' + Regexp.escape(uri)) + end.join('|')) + end + end + + end +end \ No newline at end of file