1
0
Fork 0
mirror of https://github.com/puma/puma.git synced 2022-11-09 13:48:40 -05:00

pure ruby URIClassifier passes on C and JRuby both; now time to profile and see if it's worth keeping

git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@722 19e92222-5c0b-0410-8929-a290d50e31e9
This commit is contained in:
evanweaver 2007-10-22 02:58:02 +00:00
parent 76a4d04d1a
commit 27e2243e7c
18 changed files with 70 additions and 1396 deletions

View file

@ -8,11 +8,9 @@
#include <string.h>
#include "http11_parser.h"
#include <ctype.h>
#include "tst.h"
static VALUE mMongrel;
static VALUE cHttpParser;
static VALUE cURIClassifier;
static VALUE eHttpParserError;
#define id_handler_map rb_intern("@handler_map")
@ -363,191 +361,6 @@ VALUE HttpParser_nread(VALUE self)
return INT2FIX(http->nread);
}
void URIClassifier_free(void *data)
{
TRACE();
if(data) {
tst_cleanup((struct tst *)data);
}
}
VALUE URIClassifier_alloc(VALUE klass)
{
VALUE obj;
struct tst *tst = tst_init(TRIE_INCREASE);
TRACE();
assert(tst && "failed to initialize trie structure");
obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
return obj;
}
/**
* call-seq:
* URIClassifier.new -> URIClassifier
*
* Initializes a new URIClassifier object that you can use to associate URI sequences
* with objects. You can actually use it with any string sequence and any objects,
* but it's mostly used with URIs.
*
* It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
* trie to hold all of the URIs. It uses this to do an initial search for the a URI
* prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions. It actually
* will do two searches most of the time in order to find the right handler for the
* registered prefix portion.
*
*/
VALUE URIClassifier_init(VALUE self)
{
VALUE hash;
/* we create an internal hash to protect stuff from the GC */
hash = rb_hash_new();
rb_ivar_set(self, id_handler_map, hash);
return self;
}
/**
* call-seq:
* uc.register("/someuri", SampleHandler.new) -> nil
*
* Registers the SampleHandler (one for all requests) with the "/someuri".
* When URIClassifier::resolve is called with "/someuri" it'll return
* SampleHandler immediately. When called with "/someuri/iwant" it'll also
* return SomeHandler immediatly, with no additional searches, but it will
* return path info with "/iwant".
*
* You actually can reuse this class to register nearly anything and
* quickly resolve it. This could be used for caching, fast mapping, etc.
* The downside is it uses much more memory than a Hash, but it can be
* a lot faster. It's main advantage is that it works on prefixes, which
* is damn hard to get right with a Hash.
*/
VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
{
int rc = 0;
void *ptr = NULL;
struct tst *tst = NULL;
DATA_GET(self, struct tst, tst);
rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
if(rc == TST_DUPLICATE_KEY) {
rb_raise(rb_eStandardError, "Handler already registered with that name");
} else if(rc == TST_ERROR) {
rb_raise(rb_eStandardError, "Memory error registering handler");
} else if(rc == TST_NULL_KEY) {
rb_raise(rb_eStandardError, "URI was empty");
}
rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
return Qnil;
}
/**
* call-seq:
* uc.unregister("/someuri")
*
* Yep, just removes this uri and it's handler from the trie.
*/
VALUE URIClassifier_unregister(VALUE self, VALUE uri)
{
void *handler = NULL;
struct tst *tst = NULL;
DATA_GET(self, struct tst, tst);
handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
if(handler) {
rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
return (VALUE)handler;
} else {
return Qnil;
}
}
/**
* call-seq:
* uc.resolve("/someuri") -> "/someuri", "", handler
* uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
* uc.resolve("/notfound/orhere") -> nil, nil, nil
* uc.resolve("/") -> "/", "/", handler # if uc.register("/", handler)
* uc.resolve("/path/from/root") -> "/", "/path/from/root", handler # if uc.register("/", handler)
*
* Attempts to resolve either the whole URI or at the longest prefix, returning
* the prefix (as script_info), path (as path_info), and registered handler
* (usually an HttpHandler). If it doesn't find a handler registered at the longest
* match then it returns nil,nil,nil.
*
* Because the resolver uses a trie you are able to register a handler at *any* character
* in the URI and it will be handled as long as it's the longest prefix. So, if you
* registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
* a search for "/something/like" would give you #1. A search for "/something/like/that/too"
* would give you #2.
*
* This is very powerful since it means you can also attach handlers to parts of the ;
* (semi-colon) separated path params, any part of the path, use off chars, anything really.
* It also means that it's very efficient to do this only taking as long as the URI has
* characters.
*
* A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
* CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
* about a script that handles the root. To make this work, the resolver will detect that
* the requested handler is at "/", and return that for script_name, and then simply return
* the full URI back as path_info.
*
* It expects strings with no embedded '\0' characters. Don't try other string-like stuff yet.
*/
VALUE URIClassifier_resolve(VALUE self, VALUE uri)
{
void *handler = NULL;
int pref_len = 0;
struct tst *tst = NULL;
VALUE result;
unsigned char *uri_str = NULL;
DATA_GET(self, struct tst, tst);
uri_str = (unsigned char *)StringValueCStr(uri);
handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
/* setup for multiple return values */
result = rb_ary_new();
if(handler) {
rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
/* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
if(pref_len == 1 && uri_str[0] == '/') {
/* matches the root URI so we have to use the whole URI as the path_info */
rb_ary_push(result, uri);
} else {
/* matches a script so process like normal */
rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
}
rb_ary_push(result, (VALUE)handler);
} else {
/* not found so push back nothing */
rb_ary_push(result, Qnil);
rb_ary_push(result, Qnil);
rb_ary_push(result, Qnil);
}
return result;
}
void Init_http11()
{
@ -586,11 +399,4 @@ void Init_http11()
rb_define_method(cHttpParser, "error?", HttpParser_has_error,0);
rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0);
rb_define_method(cHttpParser, "nread", HttpParser_nread,0);
cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
}

View file

@ -1,40 +0,0 @@
struct node
{
unsigned char value;
struct node *left;
struct node *middle;
struct node *right;
};
struct tst
{
int node_line_width;
struct node_lines *node_lines;
struct node *free_list;
struct node *head[127];
};
struct node_lines
{
struct node *node_line;
struct node_lines *next;
};
enum tst_constants
{
TST_OK, TST_ERROR, TST_NULL_KEY, TST_DUPLICATE_KEY, TST_REPLACE, TST_LONGEST_MATCH
};
struct tst *tst_init(int node_line_width);
int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr);
void *tst_search(const unsigned char *key, struct tst *tst, int option, unsigned int *match_len);
void *tst_delete(unsigned char *key, struct tst *tst);
void tst_cleanup(struct tst *tst);

View file

@ -1,23 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
void tst_cleanup(struct tst *tst)
{
struct node_lines *current_line;
struct node_lines *next_line;
next_line = tst->node_lines;
do
{
current_line = next_line;
next_line = current_line->next;
free(current_line->node_line);
free(current_line);
}
while(next_line != NULL);
free(tst);
}

View file

@ -1,146 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
void *tst_delete(unsigned char *key, struct tst *tst)
{
struct node *current_node;
struct node *current_node_parent;
struct node *last_branch;
struct node *last_branch_parent;
struct node *next_node;
struct node *last_branch_replacement;
struct node *last_branch_dangling_child;
int key_index;
if(key[0] == 0)
return NULL;
if(tst->head[(int)key[0]] == NULL)
return NULL;
last_branch = NULL;
last_branch_parent = NULL;
current_node = tst->head[(int)key[0]];
current_node_parent = NULL;
key_index = 1;
while(current_node != NULL)
{
if(key[key_index] == current_node->value)
{
if( (current_node->left != NULL) || (current_node->right != NULL) )
{
last_branch = current_node;
last_branch_parent = current_node_parent;
}
if(key[key_index] == 0)
break;
else
{
current_node_parent = current_node;
current_node = current_node->middle;
key_index++;
continue;
}
}
else if( ((current_node->value == 0) && (key[key_index] < 64)) ||
((current_node->value != 0) && (key[key_index] <
current_node->value)) )
{
last_branch_parent = current_node;
current_node_parent = current_node;
current_node = current_node->left;
last_branch = current_node;
continue;
}
else
{
last_branch_parent = current_node;
current_node_parent = current_node;
current_node = current_node->right;
last_branch = current_node;
continue;
}
}
if(current_node == NULL)
return NULL;
if(last_branch == NULL)
{
next_node = tst->head[(int)key[0]];
tst->head[(int)key[0]] = NULL;
}
else if( (last_branch->left == NULL) && (last_branch->right == NULL) )
{
if(last_branch_parent->left == last_branch)
last_branch_parent->left = NULL;
else
last_branch_parent->right = NULL;
next_node = last_branch;
}
else
{
if( (last_branch->left != NULL) && (last_branch->right != NULL) )
{
last_branch_replacement = last_branch->right;
last_branch_dangling_child = last_branch->left;
}
else if(last_branch->right != NULL)
{
last_branch_replacement = last_branch->right;
last_branch_dangling_child = NULL;
}
else
{
last_branch_replacement = last_branch->left;
last_branch_dangling_child = NULL;
}
if(last_branch_parent == NULL)
tst->head[(int)key[0]]=last_branch_replacement;
else
{
if (last_branch_parent->left == last_branch)
last_branch_parent->left = last_branch_replacement;
else if (last_branch_parent->right == last_branch)
last_branch_parent->right = last_branch_replacement;
else
last_branch_parent->middle = last_branch_replacement;
}
if(last_branch_dangling_child != NULL)
{
current_node = last_branch_replacement;
while (current_node->left != NULL)
current_node = current_node->left;
current_node->left = last_branch_dangling_child;
}
next_node = last_branch;
}
do
{
current_node = next_node;
next_node = current_node->middle;
current_node->left = NULL;
current_node->right = NULL;
current_node->middle = tst->free_list;
tst->free_list = current_node;
}
while(current_node->value != 0);
return next_node;
}

View file

@ -1,38 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
int tst_grow_node_free_list(struct tst *tst)
{
struct node *current_node;
struct node_lines *new_line;
int i;
if((new_line = (struct node_lines *) malloc(sizeof(struct node_lines))) == NULL)
return TST_ERROR;
if((new_line->node_line = (struct node *)
calloc(tst->node_line_width, sizeof(struct node))) == NULL)
{
free(new_line);
return TST_ERROR;
}
else
{
new_line->next = tst->node_lines;
tst->node_lines = new_line;
}
current_node = tst->node_lines->node_line;
tst->free_list = current_node;
for (i = 1; i < tst->node_line_width; i++)
{
current_node->middle = &(tst->node_lines->node_line[i]);
current_node = current_node->middle;
}
current_node->middle = NULL;
return 1;
}

View file

@ -1,41 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
struct tst *tst_init(int width)
{
struct tst *tst;
struct node *current_node;
int i;
if((tst = (struct tst *) calloc(1, sizeof(struct tst))) == NULL)
return NULL;
if ((tst->node_lines = (struct node_lines *) calloc(1, sizeof(struct node_lines))) == NULL)
{
free(tst);
return NULL;
}
tst->node_line_width = width;
tst->node_lines->next = NULL;
if ((tst->node_lines->node_line = (struct node *) calloc(width, sizeof(struct node))) == NULL)
{
free(tst->node_lines);
free(tst);
return NULL;
}
current_node = tst->node_lines->node_line;
tst->free_list = current_node;
for (i = 1; i < width; i++)
{
current_node->middle = &(tst->node_lines->node_line[i]);
current_node = current_node->middle;
}
current_node->middle = NULL;
return tst;
}

View file

@ -1,218 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int tst_grow_node_free_list(struct tst *tst);
int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr)
{
struct node *current_node;
struct node *new_node_tree_begin = NULL;
struct node *new_node;
int key_index;
int perform_loop = 1;
if (key == NULL)
return TST_NULL_KEY;
if(key[0] == 0)
return TST_NULL_KEY;
if(tst->head[(int)key[0]] == NULL)
{
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
tst->head[(int)key[0]] = tst->free_list;
tst->free_list = tst->free_list->middle;
current_node = tst->head[(int)key[0]];
current_node->value = key[1];
if(key[1] == 0)
{
current_node->middle = data;
return TST_OK;
}
else
perform_loop = 0;
}
current_node = tst->head[(int)key[0]];
key_index = 1;
while(perform_loop == 1)
{
if(key[key_index] == current_node->value)
{
if(key[key_index] == 0)
{
if (option == TST_REPLACE)
{
if (exist_ptr != NULL)
*exist_ptr = current_node->middle;
current_node->middle = data;
return TST_OK;
}
else
{
if (exist_ptr != NULL)
*exist_ptr = current_node->middle;
return TST_DUPLICATE_KEY;
}
}
else
{
if(current_node->middle == NULL)
{
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
current_node->middle = tst->free_list;
tst->free_list = tst->free_list->middle;
new_node_tree_begin = current_node;
current_node = current_node->middle;
current_node->value = key[key_index];
break;
}
else
{
current_node = current_node->middle;
key_index++;
continue;
}
}
}
if(key[key_index] == 0)
{
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
new_node = tst->free_list;
tst->free_list = tst->free_list->middle;
memcpy((void*)new_node, (void*)current_node, sizeof(struct node));
current_node->value = 0;
if(new_node->value < 64)
{
current_node->left = new_node;
current_node->right = '\0';
}
else
{
current_node->left = '\0';
current_node->right = new_node;
}
current_node->middle = data;
return TST_OK;
}
if( ((current_node->value == 0) && (key[key_index] < 64)) ||
((current_node->value != 0) && (key[key_index] <
current_node->value)) )
{
if (current_node->left == NULL)
{
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
current_node->left = tst->free_list;
tst->free_list = tst->free_list->middle;
new_node_tree_begin = current_node;
current_node = current_node->left;
current_node->value = key[key_index];
if(key[key_index] == 0)
{
current_node->middle = data;
return TST_OK;
}
else
break;
}
else
{
current_node = current_node->left;
continue;
}
}
else
{
if (current_node->right == NULL)
{
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
current_node->right = tst->free_list;
tst->free_list = tst->free_list->middle;
new_node_tree_begin = current_node;
current_node = current_node->right;
current_node->value = key[key_index];
break;
}
else
{
current_node = current_node->right;
continue;
}
}
}
do
{
key_index++;
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
{
current_node = new_node_tree_begin->middle;
while (current_node->middle != NULL)
current_node = current_node->middle;
current_node->middle = tst->free_list;
tst->free_list = new_node_tree_begin->middle;
new_node_tree_begin->middle = NULL;
return TST_ERROR;
}
}
if(tst->free_list == NULL)
{
if(tst_grow_node_free_list(tst) != 1)
return TST_ERROR;
}
current_node->middle = tst->free_list;
tst->free_list = tst->free_list->middle;
current_node = current_node->middle;
current_node->value = key[key_index];
} while(key[key_index] !=0);
current_node->middle = data;
return TST_OK;
}

View file

@ -1,73 +0,0 @@
#include "tst.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
void *tst_search(const unsigned char *key, struct tst *tst, int option,
unsigned int *match_len)
{
struct node *current_node;
struct node *longest_match = NULL;
unsigned int longest_match_len = 0;
int key_index;
assert(key != NULL && "key can't be NULL");
assert(tst != NULL && "tst can't be NULL");
if (key[0] == 0)
return NULL;
if (tst->head[(int) key[0]] == NULL)
return NULL;
if (match_len)
*match_len = 0;
current_node = tst->head[(int) key[0]];
key_index = 1;
while (current_node != NULL) {
if (key[key_index] == current_node->value) {
if (current_node->value == 0) {
if (match_len)
*match_len = key_index;
return current_node->middle;
} else {
current_node = current_node->middle;
key_index++;
continue;
}
} else {
if (current_node->value == 0) {
if (option & TST_LONGEST_MATCH) {
longest_match = current_node->middle;
longest_match_len = key_index;
}
if (key[key_index] < 64) {
current_node = current_node->left;
continue;
} else {
current_node = current_node->right;
continue;
}
} else {
if (key[key_index] < current_node->value) {
current_node = current_node->left;
continue;
} else {
current_node = current_node->right;
continue;
}
}
}
}
if (match_len)
*match_len = longest_match_len;
return longest_match;
}

View file

@ -83,8 +83,6 @@ public class Http11 extends RubyObject {
cHttpParser.defineFastMethod("error?",cf.getFastMethod("has_error"));
cHttpParser.defineFastMethod("finished?",cf.getFastMethod("is_finished"));
cHttpParser.defineFastMethod("nread",cf.getFastMethod("nread"));
URIClassifier.createURIClassifier(runtime, mMongrel);
}
private Ruby runtime;

View file

@ -1,129 +0,0 @@
/***** BEGIN LICENSE BLOCK *****
* Version: CPL 1.0/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Common Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.eclipse.org/legal/cpl-v10.html
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* Copyright (C) 2007 Ola Bini <ola@ologix.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the CPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the CPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****/
package org.jruby.mongrel;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyHash;
import org.jruby.RubyModule;
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.runtime.Block;
import org.jruby.runtime.CallbackFactory;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.tst.TernarySearchTree;
/**
* @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
*/
public class URIClassifier extends RubyObject {
public final static int TRIE_INCREASE = 30;
private static ObjectAllocator ALLOCATOR = new ObjectAllocator() {
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
return new URIClassifier(runtime, klass);
}
};
public static void createURIClassifier(Ruby runtime, RubyModule mMongrel) {
RubyClass cURIClassifier = mMongrel.defineClassUnder("URIClassifier",runtime.getObject(),ALLOCATOR);
CallbackFactory cf = runtime.callbackFactory(URIClassifier.class);
cURIClassifier.defineFastMethod("initialize",cf.getFastMethod("initialize"));
cURIClassifier.defineFastMethod("register",cf.getFastMethod("register",IRubyObject.class,IRubyObject.class));
cURIClassifier.defineFastMethod("unregister",cf.getFastMethod("unregister",IRubyObject.class));
cURIClassifier.defineFastMethod("resolve",cf.getFastMethod("resolve",IRubyObject.class));
}
private TernarySearchTree tst;
public URIClassifier(Ruby runtime, RubyClass clazz) {
super(runtime,clazz);
tst = new TernarySearchTree(TRIE_INCREASE);
}
public IRubyObject initialize() {
setInstanceVariable("@handler_map",RubyHash.newHash(getRuntime()));
return this;
}
public IRubyObject register(IRubyObject uri, IRubyObject handler) {
Object[] ptr = new Object[]{null};
int rc = 0;
rc = tst.insert(uri.toString(),handler,0,ptr);
if(rc == TernarySearchTree.TST_DUPLICATE_KEY) {
throw getRuntime().newStandardError("Handler already registered with that name");
} else if(rc == TernarySearchTree.TST_ERROR) {
throw getRuntime().newStandardError("Memory error registering handler");
} else if(rc == TernarySearchTree.TST_NULL_KEY) {
throw getRuntime().newStandardError("URI was empty");
}
((RubyHash)getInstanceVariable("@handler_map")).aset(uri,handler);
return getRuntime().getNil();
}
public IRubyObject unregister(IRubyObject uri) {
IRubyObject handler = (IRubyObject)tst.delete(uri.toString());
if(null != handler) {
((RubyHash)getInstanceVariable("@handler_map")).delete(uri,Block.NULL_BLOCK);
return handler;
}
return getRuntime().getNil();
}
public IRubyObject resolve(IRubyObject _ri) {
IRubyObject handler = null;
int[] pref_len = new int[]{0};
RubyArray result;
String uri_str;
RubyString uri = _ri.convertToString();
uri_str = uri.toString();
handler = (IRubyObject)tst.search(uri_str,pref_len);
result = getRuntime().newArray();
if(handler != null) {
result.append(uri.substr(0, pref_len[0]));
if(pref_len[0] == 1 && uri_str.startsWith("/")) {
result.append(uri);
} else {
result.append(uri.substr(pref_len[0],uri.getByteList().length()));
}
result.append(handler);
} else {
result.append(getRuntime().getNil());
result.append(getRuntime().getNil());
result.append(getRuntime().getNil());
}
return result;
}
}// URIClassifier

View file

@ -1,18 +0,0 @@
/*
* See LICENSE file.
*/
/**
* $Id: $
*/
package org.jruby.tst;
/**
* @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
* @version $Revision: $
*/
public class Node {
public char value;
public Node left;
public Object middle;
public Node right;
}// Node

View file

@ -1,16 +0,0 @@
/*
* See LICENSE file.
*/
/**
* $Id: $
*/
package org.jruby.tst;
/**
* @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
* @version $Revision: $
*/
public class NodeLines {
public Node[] node_line;
public NodeLines next;
}// NodeLines

View file

@ -1,433 +0,0 @@
/*
* See LICENSE file.
*/
/**
* $Id: $
*/
package org.jruby.tst;
/**
* @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
* @version $Revision: $
*/
public class TernarySearchTree {
public final static int TST_OK = 0x1;
public final static int TST_ERROR = 0x2;
public final static int TST_NULL_KEY = 0x4;
public final static int TST_DUPLICATE_KEY = 0x8;
public final static int TST_REPLACE = 0x10;
public int node_line_width;
public NodeLines node_lines;
public Node free_list;
public Node[] head = new Node[127];
public TernarySearchTree(final int width) {
this.node_lines = new NodeLines();
this.node_line_width = width;
this.node_lines.next = null;
this.node_lines.node_line = new Node[width];
for(int i=0;i<width;i++) {
this.node_lines.node_line[i] = new Node();
}
Node current_node = this.node_lines.node_line[0];
this.free_list = current_node;
for(int i=1; i<width; i++) {
current_node.middle = this.node_lines.node_line[i];
current_node = (Node)current_node.middle;
}
current_node.middle = null;
}
public int insert(final String key, final Object data, final int option, final Object[] exist_ptr) {
Node current_node = null;
Node new_node_tree_begin = null;
int key_index = 0;
boolean perform_loop = true;
if(null == key || key.length() == 0) {
return TST_NULL_KEY;
}
if(this.head[key.charAt(0)] == null) {
if(this.free_list == null) {
if(!grow_node_free_list()) {
return TST_ERROR;
}
}
this.head[key.charAt(0)] = this.free_list;
this.free_list = (Node)this.free_list.middle;
current_node = this.head[key.charAt(0)];
if(key.length() == 1) {
current_node.value = 0;
current_node.middle = data;
return TST_OK;
} else {
current_node.value = key.charAt(1);
perform_loop = false;
}
}
current_node = this.head[key.charAt(0)];
key_index = 1;
char curr = 0;
while(perform_loop) {
if(key_index < key.length()) {
curr = key.charAt(key_index);
} else {
curr = 0;
}
if(curr == current_node.value) {
if(curr == 0) {
if(option == TST_REPLACE) {
if(exist_ptr.length > 0) {
exist_ptr[0] = current_node.middle;
}
current_node.middle = data;
return TST_OK;
} else {
if(exist_ptr.length > 0) {
exist_ptr[0] = current_node.middle;
}
return TST_DUPLICATE_KEY;
}
} else {
if(current_node.middle == null) {
if(this.free_list == null) {
if(!grow_node_free_list()) {
return TST_ERROR;
}
}
current_node.middle = this.free_list;
this.free_list = (Node)this.free_list.middle;
new_node_tree_begin = current_node;
current_node = (Node)current_node.middle;
current_node.value = curr;
break;
} else {
current_node = (Node)current_node.middle;
key_index++;
continue;
}
}
}
if((current_node.value == 0 && curr < 64) ||
(current_node.value != 0 && curr < current_node.value)) {
if(current_node.left == null) {
if(this.free_list == null) {
if(!grow_node_free_list()) {
return TST_ERROR;
}
}
current_node.left = this.free_list;
this.free_list = (Node)this.free_list.middle;
new_node_tree_begin = current_node;
current_node = current_node.left;
current_node.value = curr;
if(curr == 0) {
current_node.middle = data;
return TST_OK;
} else {
break;
}
} else {
current_node = current_node.left;
continue;
}
} else {
if(current_node.right == null) {
if(this.free_list == null) {
if(!grow_node_free_list()) {
return TST_ERROR;
}
}
current_node.right = this.free_list;
this.free_list = (Node)this.free_list.middle;
new_node_tree_begin = current_node;
current_node = current_node.right;
current_node.value = curr;
break;
} else {
current_node = current_node.right;
continue;
}
}
}
do {
key_index++;
if(key_index < key.length()) {
curr = key.charAt(key_index);
} else {
curr = 0;
}
if(this.free_list == null) {
if(!grow_node_free_list()) {
current_node = (Node)new_node_tree_begin.middle;
while(current_node.middle != null) {
current_node = (Node)current_node.middle;
}
current_node.middle = this.free_list;
this.free_list = (Node)new_node_tree_begin.middle;
new_node_tree_begin.middle = null;
return TST_ERROR;
}
}
if(this.free_list == null) {
if(!grow_node_free_list()) {
return TST_ERROR;
}
}
current_node.middle = this.free_list;
this.free_list = (Node)this.free_list.middle;
current_node = (Node)current_node.middle;
current_node.value = curr;
} while(curr != 0);
current_node.middle = data;
return TST_OK;
}
public Object search(final String key, final int[] prefix_len) {
Node current_node = null;
Object longest_match = null;
int key_index = 0;
if(key == null) {
throw new IllegalArgumentException("key can't be null");
}
if(key.length() == 0 || this.head[key.charAt(0)] == null) {
return null;
}
if(prefix_len.length > 0) {
prefix_len[0] = 0;
}
current_node = this.head[key.charAt(0)];
key_index = 1;
char curr = 0;
while(null != current_node) {
if(key_index < key.length()) {
curr = key.charAt(key_index);
} else {
curr = 0;
}
if(curr == current_node.value) {
if(current_node.value == 0) {
if(prefix_len.length>0) {
prefix_len[0] = key_index;
}
return current_node.middle;
} else {
current_node = (Node)current_node.middle;
if(current_node != null && current_node.value == 0) {
if(prefix_len.length>0) {
prefix_len[0] = key_index+1;
}
longest_match = current_node.middle;
}
key_index++;
continue;
}
} else if((current_node.value == 0 && curr < 64) ||
(current_node.value != 0 && curr < current_node.value)) {
if(current_node.value == 0) {
if(prefix_len.length>0) {
prefix_len[0] = key_index;
}
longest_match = current_node.middle;
}
current_node = current_node.left;
continue;
} else {
if(current_node.value == 0) {
if(prefix_len.length>0) {
prefix_len[0] = key_index;
}
longest_match = current_node.middle;
}
current_node = current_node.right;
continue;
}
}
return longest_match;
}
public Object delete(final String key) {
Node current_node = null;
Node current_node_parent = null;
Node last_branch = null;
Node last_branch_parent = null;
Object next_node = null;
Node last_branch_replacement = null;
Node last_branch_dangling_child = null;
int key_index = 1;
if(key.length() == 0 || this.head[key.charAt(0)] == null) {
return null;
}
current_node = this.head[key.charAt(0)];
char curr = 0;
while(null != current_node) {
if(key_index < key.length()) {
curr = key.charAt(key_index);
} else {
curr = 0;
}
if(curr == current_node.value) {
if(current_node.left != null || current_node.right != null) {
last_branch = current_node;
last_branch_parent = current_node_parent;
}
if(curr == 0) {
break;
}
current_node_parent = current_node;
current_node = (Node)current_node.middle;
key_index++;
continue;
} else if((current_node.value == 0 && curr < 64) ||
(current_node.value != 0&& curr < current_node.value)) {
last_branch_parent = current_node;
current_node_parent = current_node;
current_node = current_node.left;
last_branch = current_node;
continue;
} else {
last_branch_parent = current_node;
current_node_parent = current_node;
current_node = current_node.right;
last_branch = current_node;
continue;
}
}
if(null == current_node) {
return null;
}
if(null == last_branch) {
next_node = this.head[key.charAt(0)];
this.head[key.charAt(0)] = null;
} else if(last_branch.left == null && last_branch.right == null) {
if(last_branch_parent.left == last_branch) {
last_branch_parent.left = null;
} else {
last_branch_parent.right = null;
}
next_node = last_branch;
} else {
if(last_branch.left != null && last_branch.right != null) {
last_branch_replacement = last_branch.right;
last_branch_dangling_child = last_branch.left;
} else if(last_branch.right != null) {
last_branch_replacement = last_branch.right;
last_branch_dangling_child = null;
} else {
last_branch_replacement = last_branch.left;
last_branch_dangling_child = null;
}
if(last_branch_parent == null) {
this.head[key.charAt(0)] = last_branch_replacement;
} else {
if(last_branch_parent.left == last_branch) {
last_branch_parent.left = last_branch_replacement;
} else if(last_branch_parent.right == last_branch) {
last_branch_parent.right = last_branch_replacement;
} else {
last_branch_parent.middle = last_branch_replacement;
}
}
if(last_branch_dangling_child != null) {
current_node = last_branch_replacement;
while(current_node.left != null) {
current_node = current_node.left;
}
current_node.left = last_branch_dangling_child;
}
next_node = last_branch;
}
do {
current_node = (Node)next_node;
next_node = current_node.middle;
current_node.left = null;
current_node.right = null;
current_node.middle = this.free_list;
this.free_list = current_node;
} while(current_node.value != 0);
return next_node;
}
public boolean grow_node_free_list() {
Node current_node = null;
NodeLines new_line = null;
new_line = new NodeLines();
new_line.node_line = new Node[this.node_line_width];
for(int i=0;i<this.node_line_width;i++) {
new_line.node_line[i] = new Node();
}
new_line.next = this.node_lines;
this.node_lines = new_line;
current_node = this.node_lines.node_line[0];
this.free_list = current_node;
for(int i=1;i<this.node_line_width;i++) {
current_node.middle = this.node_lines.node_line[i];
current_node = (Node)current_node.middle;
}
current_node.middle = null;
return true;
}
public static void main(final String[] args) {
final TernarySearchTree tst = new TernarySearchTree(30);
int ret = tst.insert("fOO","VAL1",0, new Object[0]);
System.err.println("ret: " + ret);
ret = tst.insert("bar","VAL2",0, new Object[0]);
System.err.println("ret: " + ret);
ret = tst.insert("baz","VAL3",0, new Object[0]);
System.err.println("ret: " + ret);
ret = tst.insert("zydsfgfd","VAL4",0, new Object[0]);
System.err.println("ret: " + ret);
ret = tst.insert("1242","VAL5",0, new Object[0]);
System.err.println("ret: " + ret);
Object val = tst.delete("fOO");
System.err.println("del: " + val);
int[] pref_len = new int[1];
pref_len[0] = 0;
val = tst.search("ba",pref_len);
System.err.println("search: " + val + " pref_len: " + pref_len[0]);
val = tst.search("bar",pref_len);
System.err.println("search: " + val + " pref_len: " + pref_len[0]);
}
}// TernarySearchTree

View file

@ -41,19 +41,71 @@ require 'uri'
module Mongrel
class URIClassifier
attr_reader :handler_map
attr_reader :routes
class RegistrationError < RuntimeError
end
class UsageError < RuntimeError
end
# Returns the URIs that have been registered with this classifier so far.
# The URIs returned should not be modified as this will cause a memory leak.
# You can use this to inspect the contents of the URIClassifier.
# The URIs returned should not be modified.
def uris
@handler_map.keys
@routes.keys
end
# Simply does an inspect that looks like a Hash inspect.
def inspect
@handler_map.inspect
def initialize
@routes = {}
@matcher = nil
end
def register(path, handler)
if @routes[path]
raise RegistrationError, "#{path.inspect} is already registered"
elsif !path or path.empty?
raise RegistrationError, "Path is empty"
end
@routes[path.dup] = handler
rebuild
end
def unregister(path)
handler = @routes.delete(path)
raise RegistrationError, "#{path.inspect} was not registered" unless handler
rebuild
handler
end
def resolve(request_uri)
raise UsageError, "No routes have been registered" unless @matcher
match = request_uri[@matcher, 0]
if match
path_info = request_uri[match.size..-1]
# A root mounted ("/") handler must resolve such that path info matches the original URI.
path_info = "#{Const::SLASH}#{path_info}" if match == Const::SLASH
[match, path_info, @routes[match]]
else
[nil, nil, nil]
end
end
alias :handler_map :routes # Legacy
private
def rebuild
routes = @routes.sort_by do |path, handler|
# Sort by name
path
end.sort_by do |path, handler|
# Then by descending length
-path.length
end
@matcher = Regexp.new(routes.map do |path, handler|
Regexp.new('^' + Regexp.escape(path))
end.join('|'))
end
end
@ -785,25 +837,16 @@ module Mongrel
# found in the prefix of a request then your handler's HttpHandler::process method
# is called. See Mongrel::URIClassifier#register for more information.
#
# If you set in_front=true then the passed in handler will be put in front in the list.
# Otherwise it's placed at the end of the list.
# If you set in_front=true then the passed in handler will be put in the front of the list
# for that particular URI. Otherwise it's placed at the end of the list.
def register(uri, handler, in_front=false)
script_name, path_info, handlers = @classifier.resolve(uri)
if not handlers
begin
@classifier.register(uri, [handler])
else
if path_info.length == 0 or (script_name == Const::SLASH and path_info == Const::SLASH)
if in_front
handlers.unshift(handler)
else
handlers << handler
end
else
@classifier.register(uri, [handler])
end
rescue URIClassifier::RegistrationError
handlers = @classifier.resolve(uri)[2]
method_name = in_front ? 'unshift' : 'push'
handlers.send(method_name, handler)
end
handler.listener = self
end

View file

@ -385,7 +385,7 @@ module Mongrel
end
results << "<h2>Registered Handlers</h2>"
uris = listener.classifier.handler_map
uris = listener.classifier.routes
results << table("handlers", uris.map {|uri,handlers|
[uri,
"<pre>" +

View file

@ -56,6 +56,7 @@ class ConfiguratorTest < Test::Unit::TestCase
end
end
# pp @config.listeners.values.first.classifier.routes
@config.listeners.each do |host,listener|
assert listener.classifier.uris.length == 3, "Wrong number of registered URIs"

View file

@ -117,6 +117,7 @@ class URIClassifierTest < Test::Unit::TestCase
uri_classifier.register(current, c)
end
# Try to resolve everything with no asserts as a fuzzing
tests.each do |prefix|
current = ""
@ -178,7 +179,6 @@ class URIClassifierTest < Test::Unit::TestCase
tests.each do |uri|
script_name, path_info, handler = uri_classifier.resolve(uri)
# p uri_classifier.resolve(uri)
assert_equal root, script_name, "#{uri} did not resolve to #{root}"
assert_equal uri, path_info
assert_equal 2, handler

View file

@ -20,6 +20,7 @@ require 'benchmark'
require 'digest/sha1'
require 'uri'
require 'stringio'
require 'pp'
require 'mongrel'
require 'mongrel/stats'