mirror of
https://github.com/puma/puma.git
synced 2022-11-09 13:48:40 -05:00
Removed experimental mime carving (in branch now).
git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@413 19e92222-5c0b-0410-8929-a290d50e31e9
This commit is contained in:
parent
b60da7b70f
commit
1a16539f42
3 changed files with 0 additions and 489 deletions
|
@ -9,26 +9,11 @@
|
|||
#include "http11_parser.h"
|
||||
#include <ctype.h>
|
||||
#include "tst.h"
|
||||
#include <string.h>
|
||||
|
||||
typedef struct BMHSearchState {
|
||||
size_t occ[UCHAR_MAX+1];
|
||||
size_t htotal;
|
||||
unsigned char *needle;
|
||||
size_t nlen;
|
||||
size_t skip;
|
||||
size_t max_find;
|
||||
|
||||
size_t *found_at;
|
||||
size_t nfound;
|
||||
} BMHSearchState;
|
||||
|
||||
static VALUE mMongrel;
|
||||
static VALUE cHttpParser;
|
||||
static VALUE cBMHSearch;
|
||||
static VALUE cURIClassifier;
|
||||
static VALUE eHttpParserError;
|
||||
static VALUE eBMHSearchError;
|
||||
|
||||
#define id_handler_map rb_intern("@handler_map")
|
||||
#define id_http_body rb_intern("@http_body")
|
||||
|
@ -192,294 +177,6 @@ void header_done(void *data, const char *at, size_t length)
|
|||
}
|
||||
|
||||
|
||||
inline size_t max(size_t a, size_t b) { return a>b ? a : b; }
|
||||
|
||||
void BMHSearch_free(void *data) {
|
||||
BMHSearchState *S = (BMHSearchState *)data;
|
||||
|
||||
if(data) {
|
||||
if(S->needle) free(S->needle);
|
||||
if(S->found_at) free(S->found_at);
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VALUE BMHSearch_alloc(VALUE klass)
|
||||
{
|
||||
VALUE obj;
|
||||
|
||||
BMHSearchState *S = ALLOC_N(BMHSearchState, 1);
|
||||
|
||||
obj = Data_Wrap_Struct(klass, NULL, BMHSearch_free, S);
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* BMHSearch.new(needle, max_find) -> searcher
|
||||
*
|
||||
* Prepares the needle for searching and allocates enough space to
|
||||
* retain max_find locations. BMHSearch will throw an exception
|
||||
* if you go over the max_find without calling BMHSearch#pop.
|
||||
*/
|
||||
VALUE BMHSearch_init(VALUE self, VALUE needle, VALUE max_find)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
size_t a, b;
|
||||
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
|
||||
S->htotal = 0;
|
||||
S->nfound = 0;
|
||||
S->skip = 0;
|
||||
S->nlen = RSTRING(needle)->len;
|
||||
S->max_find = FIX2INT(max_find);
|
||||
S->needle = NULL;
|
||||
S->found_at = NULL;
|
||||
|
||||
if(S->nlen <= 0) rb_raise(eBMHSearchError, "Needle can't be 0 length.");
|
||||
|
||||
// copy it so we don't have to worry about Ruby messing it up
|
||||
S->needle = ALLOC_N(unsigned char, S->nlen);
|
||||
memcpy(S->needle, RSTRING(needle)->ptr, S->nlen);
|
||||
|
||||
// setup the number of finds they want
|
||||
S->found_at = ALLOC_N(size_t, S->max_find);
|
||||
|
||||
/* Preprocess */
|
||||
/* Initialize the table to default value */
|
||||
for(a=0; a<UCHAR_MAX+1; a=a+1) S->occ[a] = S->nlen;
|
||||
|
||||
/* Then populate it with the analysis of the needle */
|
||||
b=S->nlen;
|
||||
for(a=0; a < S->nlen; a=a+1)
|
||||
{
|
||||
b=b-1;
|
||||
S->occ[S->needle[a]] = b;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.find(haystack) -> nfound
|
||||
*
|
||||
* This will look for the needle in the haystack and return the total
|
||||
* number found so far. The key ingredient is that you can pass in
|
||||
* as many haystacks as you want as long as they are longer than the
|
||||
* needle. Each time you call find, it adds to the total and number
|
||||
* found. The purpose is to allow you to read a large string in chunks
|
||||
* and find the needle.
|
||||
*
|
||||
* The main purpose of the nfound is so that you can periodically call
|
||||
* BMHSearch.pop to clear the list of found locations before you run
|
||||
* out of max_finds.
|
||||
*/
|
||||
VALUE BMHSearch_find(VALUE self, VALUE hay)
|
||||
{
|
||||
size_t hpos = 0, npos = 0, skip = 0;
|
||||
unsigned char* haystack = NULL;
|
||||
size_t hlen = 0, last = 0;
|
||||
BMHSearchState *S = NULL;
|
||||
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
|
||||
haystack = RSTRING(hay)->ptr;
|
||||
hlen = RSTRING(hay)->len;
|
||||
|
||||
/* Sanity checks on the parameters */
|
||||
if(S->nlen > hlen) {
|
||||
rb_raise(eBMHSearchError, "Haystack can't be smaller than needle.");
|
||||
} else if(S->nlen <= 0) {
|
||||
rb_raise(eBMHSearchError, "Needle can't be 0 length.");
|
||||
} else if(!haystack || !S->needle) {
|
||||
rb_raise(eBMHSearchError, "Corrupt search state. REALLY BAD!");
|
||||
}
|
||||
|
||||
/* Check for a trailing remainder, which is only possible if skip > 1 */
|
||||
if(S->skip) {
|
||||
// only scan for what should be the rest of the string
|
||||
size_t remainder = S->nlen - S->skip;
|
||||
if(!memcmp(haystack, S->needle + S->skip, remainder)) {
|
||||
// record a find
|
||||
S->found_at[S->nfound++] = S->htotal - S->skip;
|
||||
// move up by the amount found
|
||||
hpos += remainder;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Start searching from the end of S->needle (this is not a typo) */
|
||||
hpos = S->nlen-1;
|
||||
|
||||
while(hpos < hlen) {
|
||||
/* Compare the S->needle backwards, and stop when first mismatch is found */
|
||||
npos = S->nlen-1;
|
||||
|
||||
while(hpos < hlen && haystack[hpos] == S->needle[npos]) {
|
||||
if(npos == 0) {
|
||||
// found one, log it in found_at, but reduce by the skip if we're at the beginning
|
||||
S->found_at[S->nfound++] = hpos + S->htotal;
|
||||
last = hpos; // keep track of the last one we found in this chunk
|
||||
|
||||
if(S->nfound > S->max_find) {
|
||||
rb_raise(eBMHSearchError, "More than max requested needles found. Use pop.");
|
||||
}
|
||||
|
||||
/* continue at the next possible spot, from end of S->needle (not typo) */
|
||||
hpos += S->nlen + S->nlen - 1;
|
||||
npos = S->nlen - 1;
|
||||
continue;
|
||||
} else {
|
||||
hpos--;
|
||||
npos--;
|
||||
}
|
||||
}
|
||||
|
||||
// exhausted the string already, done
|
||||
if(hpos > hlen) break;
|
||||
|
||||
/* Find out how much ahead we can skip based on the byte that was found */
|
||||
skip = max(S->nlen - npos, S->occ[haystack[hpos]]);
|
||||
hpos += skip;
|
||||
}
|
||||
|
||||
|
||||
skip = 0; // skip defaults to 0
|
||||
|
||||
// don't bother if the string ends in the needle completely
|
||||
if(S->nfound == 0 || last != hlen - S->nlen) {
|
||||
// invert the occ array to figure out how much to jump back
|
||||
size_t back = S->nlen - S->occ[haystack[hlen-1]];
|
||||
|
||||
// the needle could have an ending char that's also repeated in the needle
|
||||
// in that case, we have to adjust to search for nlen-1 just in case
|
||||
if(haystack[hlen-1] == S->needle[S->nlen - 1]) {
|
||||
back = S->nlen - 1;
|
||||
}
|
||||
|
||||
int found_it = 0; // defaults to 0 for test at end of it
|
||||
if(back < S->nlen && back > 0) {
|
||||
// search for the longest possible prefix
|
||||
for(hpos = hlen - back; hpos < hlen; hpos++) {
|
||||
skip = hlen - hpos;
|
||||
if(!memcmp(haystack+hpos, S->needle, skip)) {
|
||||
found_it = 1; // make sure we actually found it
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// skip will be 1 if nothing was found, so we have to force it 0
|
||||
if(!found_it) skip = 0;
|
||||
}
|
||||
|
||||
// keep track of the skip, it's set to 0 by the code above
|
||||
S->skip = skip;
|
||||
// update the total processed so far so indexes will be in sync
|
||||
S->htotal += hlen;
|
||||
|
||||
return INT2FIX(S->nfound);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.pop -> [1, 2, 3, ..]
|
||||
*
|
||||
* Pops off the locations in the TOTAL string (over all haystacks processed)
|
||||
* clearing the internal buffer for more space and letting you use it.
|
||||
*/
|
||||
VALUE BMHSearch_pop(VALUE self)
|
||||
{
|
||||
int i = 0;
|
||||
BMHSearchState *S = NULL;
|
||||
VALUE result;
|
||||
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
|
||||
result = rb_ary_new();
|
||||
for(i = 0; i < S->nfound; i++) {
|
||||
rb_ary_push(result, INT2FIX(S->found_at[i]));
|
||||
}
|
||||
|
||||
S->nfound = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.nfound -> Fixed
|
||||
*
|
||||
* The number found so far.
|
||||
*/
|
||||
VALUE BMHSearch_nfound(VALUE self)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
return INT2FIX(S->nfound);
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.max_find -> Fixed
|
||||
*
|
||||
* The current maximum find setting (cannot be changed).
|
||||
*/
|
||||
VALUE BMHSearch_max_find(VALUE self)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
return INT2FIX(S->max_find);
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.total -> Fixed
|
||||
*
|
||||
* The total bytes processed so far.
|
||||
*/
|
||||
VALUE BMHSearch_total(VALUE self)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
return INT2FIX(S->htotal);
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.needle -> String
|
||||
*
|
||||
* A COPY of the internal needle string being used for searching.
|
||||
*/
|
||||
VALUE BMHSearch_needle(VALUE self)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
return rb_str_new(S->needle, S->nlen);
|
||||
}
|
||||
|
||||
/**
|
||||
* call-seq:
|
||||
* searcher.has_trailing? -> Boolean
|
||||
*
|
||||
* Tells you whether the last call to BMHSearch#find has possible
|
||||
* trailing matches. This is mostly for completeness but you
|
||||
* might use this to adjust the amount of data you're reading
|
||||
* or the buffer size.
|
||||
*/
|
||||
VALUE BMHSearch_has_trailing(VALUE self)
|
||||
{
|
||||
BMHSearchState *S = NULL;
|
||||
DATA_GET(self, BMHSearchState, S);
|
||||
return S->skip ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
|
||||
void HttpParser_free(void *data) {
|
||||
TRACE();
|
||||
|
||||
|
@ -864,18 +561,6 @@ void Init_http11()
|
|||
DEF_GLOBAL(port_80, "80");
|
||||
|
||||
eHttpParserError = rb_define_class_under(mMongrel, "HttpParserError", rb_eIOError);
|
||||
eBMHSearchError = rb_define_class_under(mMongrel, "BMHSearchError", rb_eIOError);
|
||||
|
||||
cBMHSearch = rb_define_class_under(mMongrel, "BMHSearch", rb_cObject);
|
||||
rb_define_alloc_func(cBMHSearch, BMHSearch_alloc);
|
||||
rb_define_method(cBMHSearch, "initialize", BMHSearch_init,2);
|
||||
rb_define_method(cBMHSearch, "find", BMHSearch_find,1);
|
||||
rb_define_method(cBMHSearch, "pop", BMHSearch_pop,0);
|
||||
rb_define_method(cBMHSearch, "nfound", BMHSearch_nfound,0);
|
||||
rb_define_method(cBMHSearch, "total", BMHSearch_total,0);
|
||||
rb_define_method(cBMHSearch, "needle", BMHSearch_needle,0);
|
||||
rb_define_method(cBMHSearch, "max_find", BMHSearch_max_find,0);
|
||||
rb_define_method(cBMHSearch, "has_trailing?", BMHSearch_has_trailing,0);
|
||||
|
||||
cHttpParser = rb_define_class_under(mMongrel, "HttpParser", rb_cObject);
|
||||
rb_define_alloc_func(cHttpParser, HttpParser_alloc);
|
||||
|
|
|
@ -199,20 +199,7 @@ module Mongrel
|
|||
content_length = @params[Const::CONTENT_LENGTH].to_i
|
||||
remain = content_length - @params.http_body.length
|
||||
|
||||
if @params.has_key? 'CONTENT_TYPE'
|
||||
# see if it's multipart, and carve out the boundary for later
|
||||
@mpart_type, @mpart_boundary = @params['CONTENT_TYPE'].split(/;\s*/)
|
||||
if @mpart_type and @mpart_boundary and @mpart_boundary.include? "="
|
||||
@mpart_boundary = @mpart_boundary.split("=")[1].strip
|
||||
STDERR.puts "boundary: #{@mpart_boundary}"
|
||||
@params['MULTIPART_TYPE'] = @mpart_type
|
||||
@params['MULTIPART_BOUNDARY'] = @mpart_boundary
|
||||
@search = BMHSearch.new(@mpart_boundary, 100)
|
||||
end
|
||||
end
|
||||
|
||||
dispatcher.request_begins(@params) if dispatcher
|
||||
@search.find @params.http_body if @search and @params.http_body.length > @mpart_boundary.length
|
||||
|
||||
# Some clients (like FF1.0) report 0 for body and then send a body. This will probably truncate them but at least the request goes through usually.
|
||||
if remain <= 0
|
||||
|
@ -235,15 +222,6 @@ module Mongrel
|
|||
read_body(remain, content_length, dispatcher)
|
||||
end
|
||||
|
||||
if @search and @body
|
||||
STDERR.puts "number of boundaries: #{@search.nfound}"
|
||||
@body.rewind
|
||||
@search.pop.each do |boundary|
|
||||
@body.seek(boundary - 2)
|
||||
STDERR.puts "BOUNDARY at #{boundary}: #{@body.read(@mpart_boundary.length + 2)}"
|
||||
end
|
||||
end
|
||||
|
||||
@body.rewind if @body
|
||||
end
|
||||
|
||||
|
@ -256,7 +234,6 @@ module Mongrel
|
|||
begin
|
||||
# write the odd sized chunk first
|
||||
@params.http_body = read_socket(remain % Const::CHUNK_SIZE)
|
||||
@search.find(@params.http_body) if @search
|
||||
|
||||
remain -= @body.write(@params.http_body)
|
||||
dispatcher.request_progress(@params, remain, total) if dispatcher
|
||||
|
@ -265,7 +242,6 @@ module Mongrel
|
|||
until remain <= 0 or @socket.closed?
|
||||
# ASSUME: we are writing to a disk and these writes always write the requested amount
|
||||
@params.http_body = read_socket(Const::CHUNK_SIZE)
|
||||
@search.find(@params.http_body) if @search
|
||||
remain -= @body.write(@params.http_body)
|
||||
dispatcher.request_progress(@params, remain, total) if dispatcher
|
||||
end
|
||||
|
|
|
@ -1,150 +0,0 @@
|
|||
# Copyright (c) 2005 Zed A. Shaw
|
||||
# You can redistribute it and/or modify it under the same terms as Ruby.
|
||||
#
|
||||
# Additional work donated by contributors. See http://mongrel.rubyforge.org/attributions.html
|
||||
# for more information.
|
||||
|
||||
require 'test/unit'
|
||||
require 'mongrel'
|
||||
require 'http11'
|
||||
require File.dirname(__FILE__) + "/testhelp.rb"
|
||||
|
||||
include Mongrel
|
||||
|
||||
class BMHSearchTest < Test::Unit::TestCase
|
||||
|
||||
def setup
|
||||
@needle = "needle"
|
||||
end
|
||||
|
||||
def teardown
|
||||
end
|
||||
|
||||
def test_operations
|
||||
to_find = "this has a needle and a need"
|
||||
s = BMHSearch.new(@needle, 10)
|
||||
assert s.needle == @needle, "internal needle does not match needle"
|
||||
assert_equal s.max_find, 10, "max_found isn't right"
|
||||
|
||||
n = s.find(to_find)
|
||||
assert_equal n,1,"wrong number found"
|
||||
assert_equal n,s.nfound, "returned found and nfound don't match"
|
||||
|
||||
f = s.pop
|
||||
assert f, "didn't get the locations"
|
||||
assert_equal f.length,n,"wrong number of returned finds"
|
||||
assert_equal f[0],11,"wrong location returned"
|
||||
|
||||
assert_equal to_find.length, s.total, "total doesn't match searched length"
|
||||
assert s.has_trailing?, "should have trailing data"
|
||||
|
||||
to_find2 = "le through a needle dude"
|
||||
n = s.find(to_find2)
|
||||
|
||||
assert_equal n,2,"second find not working"
|
||||
assert_equal n,s.nfound, "nfound don't match"
|
||||
|
||||
f = s.pop
|
||||
assert f, "pop after second find not working"
|
||||
assert_equal f[0],24,"first location of second find not right"
|
||||
assert_equal f[1],41,"second location of second find not right"
|
||||
|
||||
assert_equal to_find.length+to_find2.length,s.total,"wrong total length"
|
||||
assert !s.has_trailing?, "should not have trailing"
|
||||
|
||||
assert_raise BMHSearchError do
|
||||
11.times { s.find("this has a needle") }
|
||||
end
|
||||
end
|
||||
|
||||
def test_boundaries_over_chunks
|
||||
hay = ["zed is a tool ba",
|
||||
"ggiethis has the baggieb",
|
||||
"aggie of baggie-baggie tricks in another ",
|
||||
"baggie"]
|
||||
|
||||
total = "zed is a tool baggiethis has the baggiebaggie of baggie-baggie tricks in another baggie"
|
||||
|
||||
s = BMHSearch.new("baggie", 20)
|
||||
|
||||
hay.each {|h| s.find(h) }
|
||||
hay_found = s.pop
|
||||
|
||||
s = BMHSearch.new("baggie", 20)
|
||||
s.find(total)
|
||||
total_found = s.pop
|
||||
|
||||
assert_equal hay_found.length, total_found.length, "wrong number of needles found"
|
||||
|
||||
total_found.length.times do |i|
|
||||
assert_equal total_found[i], hay_found[i], "boundary doesn't match"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def test_fuzzing
|
||||
begin
|
||||
has_rfuzz = require 'rfuzz/random'
|
||||
rescue Object
|
||||
has_rfuzz = false
|
||||
end
|
||||
|
||||
if has_rfuzz
|
||||
r = RFuzz::RandomGenerator.new
|
||||
needles = r.base64(20, 64).collect {|n| "\r\n" + n.strip }
|
||||
needles.each do |needle|
|
||||
next if needle.length == 0
|
||||
|
||||
nchunks = r.num(10) + 10
|
||||
bmh = BMHSearch.new(needle, nchunks+1)
|
||||
total = "" # used to collect the full string for compare
|
||||
|
||||
# each needle is sprinkled into up to 100 chunks
|
||||
nchunks.times do
|
||||
# each chunk is up to 16k in size
|
||||
chunk = r.bytes(r.num(16 * 1024) + needle.length * 2)
|
||||
chunk.gsub! needle, ""
|
||||
|
||||
# make about 60% go across boundaries
|
||||
if r.num(10) < 6
|
||||
# this one gets cut in two
|
||||
cut_at = r.num(needle.length - 1) + 1
|
||||
n1 = needle[0 ... cut_at]
|
||||
n2 = needle[cut_at .. -1]
|
||||
|
||||
assert_equal n1+n2, needle, "oops, messed up breaking the needle"
|
||||
|
||||
last_nfound = bmh.nfound
|
||||
bmh.find(chunk + n1)
|
||||
assert bmh.has_trailing?, "should have trailing on #{n1}:#{n2} on chunk length: #{chunk.length}"
|
||||
assert_equal last_nfound, bmh.nfound, "shouldn't find it yet"
|
||||
|
||||
bmh.find(n2 + chunk)
|
||||
assert_equal last_nfound+1, bmh.nfound, "should have found the boundary for #{n1}:#{n2} on chunk length: #{chunk.length}"
|
||||
total << chunk + n1 + n2 + chunk
|
||||
else
|
||||
# this one is put in complete
|
||||
bmh.find(chunk + needle)
|
||||
bmh.find(chunk)
|
||||
|
||||
total << chunk + needle + chunk
|
||||
end
|
||||
end
|
||||
|
||||
tbmh = BMHSearch.new(needle, nchunks+1)
|
||||
tbmh.find(total)
|
||||
|
||||
assert_equal total.length, bmh.total, "totals don't match"
|
||||
assert_equal tbmh.nfound, bmh.nfound, "nfound don't match"
|
||||
|
||||
total_found = tbmh.pop
|
||||
hay_found = bmh.pop
|
||||
|
||||
total_found.length.times do |i|
|
||||
assert_equal total_found[i], hay_found[i], "boundary doesn't match"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in a new issue