pure ruby URIClassifier passes on C and JRuby both; now time to profile and see if it's worth keeping

git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@722 19e92222-5c0b-0410-8929-a290d50e31e9
2022-11-09 13:48:40 -05:00 · 2007-10-22 02:58:02 +00:00 · 2007-10-22 02:58:02 +00:00 · 27e2243e7c
commit 27e2243e7c
parent 76a4d04d1a
18 changed files with 70 additions and 1396 deletions
--- a/ext/http11/http11.c
+++ b/ext/http11/http11.c
@ -8,11 +8,9 @@
 #include <string.h>
 #include "http11_parser.h"
 #include <ctype.h>
-#include "tst.h"

 static VALUE mMongrel;
 static VALUE cHttpParser;
-static VALUE cURIClassifier;
 static VALUE eHttpParserError;

 #define id_handler_map rb_intern("@handler_map")
@ -363,191 +361,6 @@ VALUE HttpParser_nread(VALUE self)
  return INT2FIX(http->nread);
 }

-
-void URIClassifier_free(void *data) 
-{
-  TRACE();
-
-  if(data) {
-    tst_cleanup((struct tst *)data);
-  }
-}
-
-
-
-VALUE URIClassifier_alloc(VALUE klass)
-{
-  VALUE obj;
-  struct tst *tst = tst_init(TRIE_INCREASE);
-  TRACE();
-  assert(tst && "failed to initialize trie structure");
-
-  obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
-
-  return obj;
-}
-
-/**
- * call-seq:
- *    URIClassifier.new -> URIClassifier
- *
- * Initializes a new URIClassifier object that you can use to associate URI sequences
- * with objects.  You can actually use it with any string sequence and any objects,
- * but it's mostly used with URIs.
- *
- * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
- * trie to hold all of the URIs.  It uses this to do an initial search for the a URI
- * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions.  It actually
- * will do two searches most of the time in order to find the right handler for the
- * registered prefix portion.
- *
- */
-VALUE URIClassifier_init(VALUE self)
-{
-  VALUE hash;
-
-  /* we create an internal hash to protect stuff from the GC */
-  hash = rb_hash_new();
-  rb_ivar_set(self, id_handler_map, hash);
-
-  return self;
-}
-
-
-/**
- * call-seq:
- *    uc.register("/someuri", SampleHandler.new) -> nil
- *
- * Registers the SampleHandler (one for all requests) with the "/someuri".
- * When URIClassifier::resolve is called with "/someuri" it'll return
- * SampleHandler immediately.  When called with "/someuri/iwant" it'll also
- * return SomeHandler immediatly, with no additional searches, but it will
- * return path info with "/iwant".
- *
- * You actually can reuse this class to register nearly anything and 
- * quickly resolve it.  This could be used for caching, fast mapping, etc.
- * The downside is it uses much more memory than a Hash, but it can be
- * a lot faster.  It's main advantage is that it works on prefixes, which
- * is damn hard to get right with a Hash.
- */
-VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
-{
-  int rc = 0;
-  void *ptr = NULL;
-  struct tst *tst = NULL;
-  DATA_GET(self, struct tst, tst);
-
-  rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
-
-  if(rc == TST_DUPLICATE_KEY) {
-    rb_raise(rb_eStandardError, "Handler already registered with that name");
-  } else if(rc == TST_ERROR) {
-    rb_raise(rb_eStandardError, "Memory error registering handler");
-  } else if(rc == TST_NULL_KEY) {
-    rb_raise(rb_eStandardError, "URI was empty");
-  }
-
-  rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
-
-  return Qnil;
-}
-
-
-/**
- * call-seq:
- *    uc.unregister("/someuri")
- *
- * Yep, just removes this uri and it's handler from the trie.
- */
-VALUE URIClassifier_unregister(VALUE self, VALUE uri)
-{
-  void *handler = NULL;
-  struct tst *tst = NULL;
-  DATA_GET(self, struct tst, tst);
-
-  handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
-
-  if(handler) {
-    rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
-
-    return (VALUE)handler;
-  } else {
-    return Qnil;
-  }
-}
-
-
-/**
- * call-seq:
- *    uc.resolve("/someuri") -> "/someuri", "", handler
- *    uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
- *    uc.resolve("/notfound/orhere") -> nil, nil, nil
- *    uc.resolve("/") -> "/", "/", handler  # if uc.register("/", handler)
- *    uc.resolve("/path/from/root") -> "/", "/path/from/root", handler  # if uc.register("/", handler) 
- * 
- * Attempts to resolve either the whole URI or at the longest prefix, returning
- * the prefix (as script_info), path (as path_info), and registered handler
- * (usually an HttpHandler).  If it doesn't find a handler registered at the longest
- * match then it returns nil,nil,nil.
- *
- * Because the resolver uses a trie you are able to register a handler at *any* character
- * in the URI and it will be handled as long as it's the longest prefix.  So, if you 
- * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
- * a search for "/something/like" would give you #1.  A search for "/something/like/that/too"
- * would give you #2.
- * 
- * This is very powerful since it means you can also attach handlers to parts of the ; 
- * (semi-colon) separated path params, any part of the path, use off chars, anything really.
- * It also means that it's very efficient to do this only taking as long as the URI has
- * characters.
- *
- * A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
- * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
- * about a script that handles the root.  To make this work, the resolver will detect that
- * the requested handler is at "/", and return that for script_name, and then simply return
- * the full URI back as path_info.
- *
- * It expects strings with no embedded '\0' characters.  Don't try other string-like stuff yet.
- */
-VALUE URIClassifier_resolve(VALUE self, VALUE uri)
-{
-  void *handler = NULL;
-  int pref_len = 0;
-  struct tst *tst = NULL;
-  VALUE result;
-  unsigned char *uri_str = NULL;
-
-  DATA_GET(self, struct tst, tst);
-  uri_str = (unsigned char *)StringValueCStr(uri);
-
-  handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
-
-  /* setup for multiple return values */
-  result = rb_ary_new();
-
-  if(handler) {
-    rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
-    /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
-    if(pref_len == 1 && uri_str[0] == '/') {
-      /* matches the root URI so we have to use the whole URI as the path_info */
-      rb_ary_push(result, uri);
-    } else {
-      /* matches a script so process like normal */
-      rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
-    }
-
-    rb_ary_push(result, (VALUE)handler);
-  } else {
-    /* not found so push back nothing */
-    rb_ary_push(result, Qnil);
-    rb_ary_push(result, Qnil);
-    rb_ary_push(result, Qnil);
-  }
-
-  return result;
-}
-
-
 void Init_http11()
 {

@ -586,11 +399,4 @@ void Init_http11()
  rb_define_method(cHttpParser, "error?", HttpParser_has_error,0);
  rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0);
  rb_define_method(cHttpParser, "nread", HttpParser_nread,0);
-
-  cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
-  rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
-  rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
-  rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
-  rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
-  rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
 }
--- a/ext/http11/tst.h
+++ b/ext/http11/tst.h
@ -1,40 +0,0 @@
-
-
-struct node
-{
-   unsigned char value;
-   struct node *left;
-   struct node *middle;
-   struct node *right;
-};
-
-struct tst
-{
-   int node_line_width;
-   struct node_lines *node_lines;
-   struct node *free_list;
-   struct node *head[127];
-};
-
-struct node_lines
-{
-   struct node *node_line;
-   struct node_lines *next;
-};
-
-enum tst_constants
-{
-   TST_OK, TST_ERROR, TST_NULL_KEY, TST_DUPLICATE_KEY, TST_REPLACE, TST_LONGEST_MATCH
-};
-
-struct tst *tst_init(int node_line_width);
-
-int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr);
-
-void *tst_search(const unsigned char *key, struct tst *tst, int option, unsigned int *match_len);
-
-void *tst_delete(unsigned char *key, struct tst *tst);
-
-void tst_cleanup(struct tst *tst);
-
-
--- a/ext/http11/tst_cleanup.c
+++ b/ext/http11/tst_cleanup.c
@ -1,23 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-void tst_cleanup(struct tst *tst)
-{
-   struct node_lines *current_line;
-   struct node_lines *next_line;
-
-   next_line = tst->node_lines;
-
-   do
-   {
-      current_line = next_line;
-      next_line = current_line->next;
-      free(current_line->node_line);
-      free(current_line);
-   }
-   while(next_line != NULL);
-
-   free(tst);
-}
--- a/ext/http11/tst_delete.c
+++ b/ext/http11/tst_delete.c
@ -1,146 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-void *tst_delete(unsigned char *key, struct tst *tst)
-{
-   struct node *current_node;
-   struct node *current_node_parent;
-   struct node *last_branch;
-   struct node *last_branch_parent;
-   struct node *next_node;
-   struct node *last_branch_replacement;
-   struct node *last_branch_dangling_child;
-   int key_index;
-
-   
-   if(key[0] == 0)
-      return NULL;
-   if(tst->head[(int)key[0]] == NULL)
-      return NULL;
-   
-   last_branch = NULL;
-   last_branch_parent = NULL;
-   current_node = tst->head[(int)key[0]];
-   current_node_parent = NULL;
-   key_index = 1;
-   while(current_node != NULL)
-   {
-      if(key[key_index] == current_node->value)
-      {
-         
-         if( (current_node->left != NULL) || (current_node->right != NULL) )
-         {
-            last_branch = current_node;
-            last_branch_parent = current_node_parent;
-         }
-         if(key[key_index] == 0)
-            break;
-         else
-         {
-            current_node_parent = current_node;
-            current_node = current_node->middle;
-            key_index++;
-            continue;
-         }
-      }
-      else if( ((current_node->value == 0) && (key[key_index] < 64)) ||
-         ((current_node->value != 0) && (key[key_index] <
-         current_node->value)) )
-      {
-         last_branch_parent = current_node;
-         current_node_parent = current_node;
-         current_node = current_node->left;
-         last_branch = current_node;
-         continue;
-      }
-      else
-      {
-         last_branch_parent = current_node;
-         current_node_parent = current_node;
-         current_node = current_node->right;
-         last_branch = current_node;
-         continue;
-      }
-   
-   }
-   if(current_node == NULL)
-      return NULL;
-   
-   if(last_branch == NULL)
-   {
-      
-         next_node = tst->head[(int)key[0]];
-         tst->head[(int)key[0]] = NULL;
-   }
-   else if( (last_branch->left == NULL) && (last_branch->right == NULL) )
-   {
-      
-      if(last_branch_parent->left == last_branch)
-         last_branch_parent->left = NULL;
-      else
-         last_branch_parent->right = NULL;
-      
-      next_node = last_branch;
-   }
-   else
-   {
-      
-      if( (last_branch->left != NULL) && (last_branch->right != NULL) )
-      {
-         last_branch_replacement = last_branch->right;
-         last_branch_dangling_child = last_branch->left;
-      }
-      else if(last_branch->right != NULL)
-      {
-         last_branch_replacement = last_branch->right;
-         last_branch_dangling_child = NULL;
-      }
-      else
-      {
-         last_branch_replacement = last_branch->left;
-         last_branch_dangling_child = NULL;
-      }
-      
-      if(last_branch_parent == NULL)
-         tst->head[(int)key[0]]=last_branch_replacement;
-      else
-      {
-         if (last_branch_parent->left == last_branch)
-            last_branch_parent->left = last_branch_replacement;
-         else if (last_branch_parent->right == last_branch)
-            last_branch_parent->right = last_branch_replacement;
-         else
-            last_branch_parent->middle = last_branch_replacement;
-      }
-      
-      if(last_branch_dangling_child != NULL)
-      {
-         current_node = last_branch_replacement;
-      
-         while (current_node->left != NULL)
-            current_node = current_node->left;
-      
-         current_node->left = last_branch_dangling_child;
-      }
-      
-      next_node = last_branch;
-   }
-   
-   do
-   {
-      current_node = next_node;
-      next_node = current_node->middle;
-      
-      current_node->left = NULL;
-      current_node->right = NULL;
-      current_node->middle = tst->free_list;
-      tst->free_list = current_node;
-   }
-   while(current_node->value != 0);
-   
-   return next_node;
-   
-}
-
--- a/ext/http11/tst_grow_node_free_list.c
+++ b/ext/http11/tst_grow_node_free_list.c
@ -1,38 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-int tst_grow_node_free_list(struct tst *tst)
-{
-   struct node *current_node;
-   struct node_lines *new_line;
-   int i;
-
-   
-   if((new_line = (struct node_lines *) malloc(sizeof(struct node_lines))) == NULL)
-      return TST_ERROR;
-   
-   if((new_line->node_line = (struct node *)
-   calloc(tst->node_line_width, sizeof(struct node))) == NULL)
-   {
-      free(new_line);
-      return TST_ERROR;
-   }
-   else
-   {
-      new_line->next = tst->node_lines;
-      tst->node_lines = new_line;
-   }
-   
-   current_node = tst->node_lines->node_line;
-   tst->free_list = current_node;
-   for (i = 1; i < tst->node_line_width; i++)
-   {
-      current_node->middle = &(tst->node_lines->node_line[i]);
-      current_node = current_node->middle;
-   }
-   current_node->middle = NULL;
-   return 1;
-}
-
--- a/ext/http11/tst_init.c
+++ b/ext/http11/tst_init.c
@ -1,41 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-struct tst *tst_init(int width)
-{
-   struct tst *tst;
-   struct node *current_node;
-   int i;
-
-
-if((tst = (struct tst *) calloc(1, sizeof(struct tst))) == NULL)
-   return NULL;
-
-if ((tst->node_lines = (struct node_lines *) calloc(1, sizeof(struct node_lines))) == NULL)
-{
-   free(tst);
-   return NULL;
-}
-
-tst->node_line_width = width;
-tst->node_lines->next = NULL;
-if ((tst->node_lines->node_line = (struct node *) calloc(width, sizeof(struct node))) == NULL)
-{
-   free(tst->node_lines);
-   free(tst);
-   return NULL;
-}
-
-current_node = tst->node_lines->node_line;
-tst->free_list = current_node;
-for (i = 1; i < width; i++)
-{
-   current_node->middle = &(tst->node_lines->node_line[i]);
-   current_node = current_node->middle;
-}
-current_node->middle = NULL;
-return tst;
-}
-
--- a/ext/http11/tst_insert.c
+++ b/ext/http11/tst_insert.c
@ -1,218 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h> 
-
-int tst_grow_node_free_list(struct tst *tst);
-int tst_insert(unsigned char *key, void *data, struct tst *tst, int option, void **exist_ptr)
-{
-   struct node *current_node;
-   struct node *new_node_tree_begin = NULL;
-   struct node *new_node;
-   int key_index;
-   int perform_loop = 1;
-   
-   if (key == NULL)
-      return TST_NULL_KEY;
-   
-   if(key[0] == 0)
-      return TST_NULL_KEY;
-   
-   if(tst->head[(int)key[0]] == NULL)
-   {
-      
-      if(tst->free_list == NULL)
-      {
-         if(tst_grow_node_free_list(tst) != 1)
-            return TST_ERROR;
-      }
-      tst->head[(int)key[0]] = tst->free_list;
-      
-      tst->free_list = tst->free_list->middle;
-      current_node = tst->head[(int)key[0]];
-      current_node->value = key[1];
-      if(key[1] == 0)
-      {
-         current_node->middle = data;
-         return TST_OK;
-      }
-      else
-         perform_loop = 0;
-   }
-   
-   current_node = tst->head[(int)key[0]];
-   key_index = 1;
-   while(perform_loop == 1)
-   {
-      if(key[key_index] == current_node->value)
-      {
-         
-         if(key[key_index] == 0)
-         {
-            if (option == TST_REPLACE)
-            {
-               if (exist_ptr != NULL)
-                  *exist_ptr = current_node->middle;
-         
-               current_node->middle = data;
-               return TST_OK;
-            }
-            else
-            {
-               if (exist_ptr != NULL)
-                  *exist_ptr = current_node->middle;
-               return TST_DUPLICATE_KEY;
-            }
-         }
-         else
-         {
-            if(current_node->middle == NULL)
-            {
-               
-               if(tst->free_list == NULL)
-               {
-                  if(tst_grow_node_free_list(tst) != 1)
-                     return TST_ERROR;
-               }
-               current_node->middle = tst->free_list;
-               
-               tst->free_list = tst->free_list->middle;
-               new_node_tree_begin = current_node;
-               current_node = current_node->middle;
-               current_node->value = key[key_index];
-               break;
-            }
-            else
-            {
-               current_node = current_node->middle;
-               key_index++;
-               continue;
-            }
-         }
-      }
-      if(key[key_index] == 0)
-      {
-      	 if(tst->free_list == NULL)
-	 {
-	     if(tst_grow_node_free_list(tst) != 1)
-		 return TST_ERROR;
-	 }
-	 new_node = tst->free_list;
-	 tst->free_list = tst->free_list->middle;
-
-	 memcpy((void*)new_node, (void*)current_node, sizeof(struct node));
-	 current_node->value = 0;
-	 if(new_node->value < 64)
-	 {
-	    current_node->left = new_node;
-	    current_node->right = '\0';
-	 }
-	 else
-	 {
-	    current_node->left = '\0';
-	    current_node->right = new_node;
-         }
-
-         current_node->middle = data;
-         return TST_OK;
-      }
-
-      if( ((current_node->value == 0) && (key[key_index] < 64)) ||
-         ((current_node->value != 0) && (key[key_index] <
-         current_node->value)) )
-      {
-         
-         if (current_node->left == NULL)
-         {
-            
-            if(tst->free_list == NULL)
-            {
-               if(tst_grow_node_free_list(tst) != 1)
-                  return TST_ERROR;
-            }
-            current_node->left = tst->free_list;
-            
-            tst->free_list = tst->free_list->middle;
-            new_node_tree_begin = current_node;
-            current_node = current_node->left;
-            current_node->value = key[key_index];
-            if(key[key_index] == 0)
-            {
-               current_node->middle = data;
-               return TST_OK;
-            }
-            else
-               break;
-         }
-         else
-         {
-            current_node = current_node->left;
-            continue;
-         }
-      }
-      else
-      {
-         
-         if (current_node->right == NULL)
-         {
-            
-            if(tst->free_list == NULL)
-            {
-               if(tst_grow_node_free_list(tst) != 1)
-                  return TST_ERROR;
-            }
-            current_node->right = tst->free_list;
-            
-            tst->free_list = tst->free_list->middle;
-            new_node_tree_begin = current_node;
-            current_node = current_node->right;
-            current_node->value = key[key_index];
-            break;
-         }
-         else
-         {
-            current_node = current_node->right;
-            continue;
-         }
-      }
-   }
-   
-   do
-   {
-      key_index++;
-   
-      if(tst->free_list == NULL)
-      {
-         if(tst_grow_node_free_list(tst) != 1)
-         {
-            current_node = new_node_tree_begin->middle;
-   
-            while (current_node->middle != NULL)
-               current_node = current_node->middle;
-   
-            current_node->middle = tst->free_list;
-            tst->free_list = new_node_tree_begin->middle;
-            new_node_tree_begin->middle = NULL;
-   
-            return TST_ERROR;
-         }
-      }
-   
-      
-      if(tst->free_list == NULL)
-      {
-         if(tst_grow_node_free_list(tst) != 1)
-            return TST_ERROR;
-      }
-      current_node->middle = tst->free_list;
-      
-      tst->free_list = tst->free_list->middle;
-      current_node = current_node->middle;
-      current_node->value = key[key_index];
-   } while(key[key_index] !=0);
-   
-   current_node->middle = data;
-   return TST_OK;
-}
-
--- a/ext/http11/tst_search.c
+++ b/ext/http11/tst_search.c
@ -1,73 +0,0 @@
-
-#include "tst.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-
-void *tst_search(const unsigned char *key, struct tst *tst, int option,
-		 unsigned int *match_len)
-{
-    struct node *current_node;
-    struct node *longest_match = NULL;
-    unsigned int longest_match_len = 0;
-    int key_index;
-
-    assert(key != NULL && "key can't be NULL");
-    assert(tst != NULL && "tst can't be NULL");
-
-    if (key[0] == 0)
-	return NULL;
-
-    if (tst->head[(int) key[0]] == NULL)
-	return NULL;
-
-    if (match_len)
-	*match_len = 0;
-
-    current_node = tst->head[(int) key[0]];
-    key_index = 1;
-
-    while (current_node != NULL) {
-	if (key[key_index] == current_node->value) {
-	    if (current_node->value == 0) {
-		if (match_len)
-		    *match_len = key_index;
-		return current_node->middle;
-	    } else {
-		current_node = current_node->middle;
-		key_index++;
-		continue;
-	    }
-	} else {
-	    if (current_node->value == 0) {
-		if (option & TST_LONGEST_MATCH) {
-		    longest_match = current_node->middle;
-		    longest_match_len = key_index;
-		}
-
-		if (key[key_index] < 64) {
-		    current_node = current_node->left;
-		    continue;
-		} else {
-		    current_node = current_node->right;
-		    continue;
-		}
-	    } else {
-		if (key[key_index] < current_node->value) {
-		    current_node = current_node->left;
-		    continue;
-		} else {
-		    current_node = current_node->right;
-		    continue;
-		}
-	    }
-	}
-    }
-
-    if (match_len)
-	*match_len = longest_match_len;
-
-    return longest_match;
-
-}
--- a/ext/http11_java/org/jruby/mongrel/Http11.java
+++ b/ext/http11_java/org/jruby/mongrel/Http11.java
@ -83,8 +83,6 @@ public class Http11 extends RubyObject {
        cHttpParser.defineFastMethod("error?",cf.getFastMethod("has_error"));
        cHttpParser.defineFastMethod("finished?",cf.getFastMethod("is_finished"));
        cHttpParser.defineFastMethod("nread",cf.getFastMethod("nread"));
-
-        URIClassifier.createURIClassifier(runtime, mMongrel);
    }

    private Ruby runtime;
--- a/ext/http11_java/org/jruby/mongrel/URIClassifier.java
+++ b/ext/http11_java/org/jruby/mongrel/URIClassifier.java
@ -1,129 +0,0 @@
-/***** BEGIN LICENSE BLOCK *****
- * Version: CPL 1.0/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Common Public
- * License Version 1.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.eclipse.org/legal/cpl-v10.html
- *
- * Software distributed under the License is distributed on an "AS
- * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * rights and limitations under the License.
- *
- * Copyright (C) 2007 Ola Bini <ola@ologix.com>
- * 
- * Alternatively, the contents of this file may be used under the terms of
- * either of the GNU General Public License Version 2 or later (the "GPL"),
- * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the CPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the CPL, the GPL or the LGPL.
- ***** END LICENSE BLOCK *****/
-package org.jruby.mongrel;
-
-import org.jruby.Ruby;
-import org.jruby.RubyArray;
-import org.jruby.RubyClass;
-import org.jruby.RubyHash;
-import org.jruby.RubyModule;
-import org.jruby.RubyObject;
-import org.jruby.RubyString;
-
-import org.jruby.runtime.Block;
-import org.jruby.runtime.CallbackFactory;
-import org.jruby.runtime.ObjectAllocator;
-import org.jruby.runtime.builtin.IRubyObject;
-
-import org.jruby.tst.TernarySearchTree;
-
-/**
- * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
- */
-public class URIClassifier extends RubyObject {
-    public final static int TRIE_INCREASE = 30;
-
-    private static ObjectAllocator ALLOCATOR = new ObjectAllocator() {
-        public IRubyObject allocate(Ruby runtime, RubyClass klass) {
-            return new URIClassifier(runtime, klass);
-        }
-    };
-
-    public static void createURIClassifier(Ruby runtime, RubyModule mMongrel) {
-        RubyClass cURIClassifier = mMongrel.defineClassUnder("URIClassifier",runtime.getObject(),ALLOCATOR);
-        CallbackFactory cf = runtime.callbackFactory(URIClassifier.class);
-        cURIClassifier.defineFastMethod("initialize",cf.getFastMethod("initialize"));
-        cURIClassifier.defineFastMethod("register",cf.getFastMethod("register",IRubyObject.class,IRubyObject.class));
-        cURIClassifier.defineFastMethod("unregister",cf.getFastMethod("unregister",IRubyObject.class));
-        cURIClassifier.defineFastMethod("resolve",cf.getFastMethod("resolve",IRubyObject.class));
-    }
-
-    private TernarySearchTree tst;
-
-    public URIClassifier(Ruby runtime, RubyClass clazz) {
-        super(runtime,clazz);
-        tst = new TernarySearchTree(TRIE_INCREASE);
-    }
-
-    public IRubyObject initialize() {
-        setInstanceVariable("@handler_map",RubyHash.newHash(getRuntime()));
-        return this;
-    }
-
-    public IRubyObject register(IRubyObject uri, IRubyObject handler) {
-        Object[] ptr = new Object[]{null};
-        int rc = 0;
-        rc = tst.insert(uri.toString(),handler,0,ptr);
-        if(rc == TernarySearchTree.TST_DUPLICATE_KEY) {
-            throw getRuntime().newStandardError("Handler already registered with that name");
-        } else if(rc == TernarySearchTree.TST_ERROR) {
-            throw getRuntime().newStandardError("Memory error registering handler");
-        } else if(rc == TernarySearchTree.TST_NULL_KEY) {
-            throw getRuntime().newStandardError("URI was empty");
-        }
-        ((RubyHash)getInstanceVariable("@handler_map")).aset(uri,handler);
-        return getRuntime().getNil();
-    }
-
-    public IRubyObject unregister(IRubyObject uri) {
-        IRubyObject handler = (IRubyObject)tst.delete(uri.toString());
-        if(null != handler) {
-            ((RubyHash)getInstanceVariable("@handler_map")).delete(uri,Block.NULL_BLOCK);
-            return handler;
-        }
-        return getRuntime().getNil();
-    }
-
-    public IRubyObject resolve(IRubyObject _ri) {
-        IRubyObject handler = null;
-        int[] pref_len = new int[]{0};
-        RubyArray result;
-        String uri_str;
-        RubyString uri = _ri.convertToString();
-
-        uri_str = uri.toString();
-        handler = (IRubyObject)tst.search(uri_str,pref_len);
-        
-        result = getRuntime().newArray();
-        
-        if(handler != null) {
-            result.append(uri.substr(0, pref_len[0]));
-            if(pref_len[0] == 1 && uri_str.startsWith("/")) {
-                result.append(uri);
-            } else {
-                result.append(uri.substr(pref_len[0],uri.getByteList().length()));
-            }
-            result.append(handler);
-        } else {
-            result.append(getRuntime().getNil());
-            result.append(getRuntime().getNil());
-            result.append(getRuntime().getNil());
-        }
-        return result;
-    }
-}// URIClassifier
--- a/ext/http11_java/org/jruby/tst/Node.java
+++ b/ext/http11_java/org/jruby/tst/Node.java
@ -1,18 +0,0 @@
-/*
- * See LICENSE file.
- */
-/**
- * $Id: $
- */
-package org.jruby.tst;
-
-/**
- * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
- * @version $Revision: $
- */
-public class Node {
-    public char value;
-    public Node left;
-    public Object middle;
-    public Node right;
-}// Node
--- a/ext/http11_java/org/jruby/tst/NodeLines.java
+++ b/ext/http11_java/org/jruby/tst/NodeLines.java
@ -1,16 +0,0 @@
-/*
- * See LICENSE file.
- */
-/**
- * $Id: $
- */
-package org.jruby.tst;
-
-/**
- * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
- * @version $Revision: $
- */
-public class NodeLines {
-    public Node[] node_line;
-    public NodeLines next;
-}// NodeLines
--- a/ext/http11_java/org/jruby/tst/TernarySearchTree.java
+++ b/ext/http11_java/org/jruby/tst/TernarySearchTree.java
@ -1,433 +0,0 @@
-/*
- * See LICENSE file.
- */
-/**
- * $Id: $
- */
-package org.jruby.tst;
-
-/**
- * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a>
- * @version $Revision: $
- */
-public class TernarySearchTree {
-    public final static int TST_OK = 0x1;
-    public final static int TST_ERROR = 0x2;
-    public final static int TST_NULL_KEY = 0x4;
-    public final static int TST_DUPLICATE_KEY = 0x8;
-    public final static int TST_REPLACE = 0x10;
-
-    public int node_line_width;
-    public NodeLines node_lines;
-    public Node free_list;
-    public Node[] head = new Node[127];
-
-    public TernarySearchTree(final int width) {
-        this.node_lines = new NodeLines();
-        this.node_line_width = width;
-
-        this.node_lines.next = null;
-        this.node_lines.node_line = new Node[width];
-        for(int i=0;i<width;i++) {
-            this.node_lines.node_line[i] = new Node();
-        }
-
-        Node current_node = this.node_lines.node_line[0];
-        this.free_list = current_node;
-
-        for(int i=1; i<width; i++) {
-            current_node.middle = this.node_lines.node_line[i];
-            current_node = (Node)current_node.middle;
-        }
-        current_node.middle = null;
-    }
-
-    public int insert(final String key, final Object data, final int option, final Object[] exist_ptr) {
-        Node current_node = null;
-        Node new_node_tree_begin = null;
-        int key_index = 0;
-        boolean perform_loop = true;
-
-        if(null == key || key.length() == 0) {
-            return TST_NULL_KEY;
-        }
-
-        if(this.head[key.charAt(0)] == null) {
-            if(this.free_list == null) {
-                if(!grow_node_free_list()) {
-                    return TST_ERROR;
-                }
-            }
-            this.head[key.charAt(0)] = this.free_list;
-            this.free_list = (Node)this.free_list.middle;
-            current_node = this.head[key.charAt(0)];
-            
-            if(key.length() == 1) {
-                current_node.value = 0;
-                current_node.middle = data;
-                return TST_OK;
-            } else {
-                current_node.value = key.charAt(1);
-                perform_loop = false;
-            }
-        }
-
-        current_node = this.head[key.charAt(0)];
-        key_index = 1;
-        char curr = 0;
-        while(perform_loop) {
-            if(key_index < key.length()) {
-                curr = key.charAt(key_index);
-            } else {
-                curr = 0;
-            }
-
-            if(curr == current_node.value) {
-                if(curr == 0) {
-                    if(option == TST_REPLACE) {
-                        if(exist_ptr.length > 0) {
-                            exist_ptr[0] = current_node.middle;
-                        }
-                        current_node.middle = data;
-                        return TST_OK;
-                    } else {
-                        if(exist_ptr.length > 0) {
-                            exist_ptr[0] = current_node.middle;
-                        }
-                        return TST_DUPLICATE_KEY;
-                    }
-
-                } else {
-                    if(current_node.middle == null) {
-                        if(this.free_list == null) {
-                            if(!grow_node_free_list()) {
-                                return TST_ERROR;
-                            }
-                        }
-                        current_node.middle = this.free_list;
-                        this.free_list = (Node)this.free_list.middle;
-                        new_node_tree_begin = current_node;
-                        current_node = (Node)current_node.middle;
-                        current_node.value = curr;
-                        break;
-                    } else {
-                        current_node = (Node)current_node.middle;
-                        key_index++;
-                        continue;
-                    }
-                }
-            }
-
-            if((current_node.value == 0 && curr < 64) ||
-               (current_node.value != 0 && curr < current_node.value)) {
-                if(current_node.left == null) {
-                    if(this.free_list == null) {
-                        if(!grow_node_free_list()) {
-                            return TST_ERROR;
-                        }
-                    }
-                    current_node.left = this.free_list;
-                    this.free_list = (Node)this.free_list.middle;
-                    new_node_tree_begin = current_node;
-                    current_node = current_node.left;
-                    current_node.value = curr;
-                    if(curr == 0) {
-                        current_node.middle = data;
-                        return TST_OK;
-                    } else {
-                        break;
-                    }
-                } else {
-                    current_node = current_node.left;
-                    continue;
-                }
-            } else {
-                if(current_node.right == null) {
-                    if(this.free_list == null) {
-                        if(!grow_node_free_list()) {
-                            return TST_ERROR;
-                        }
-                    }
-                    current_node.right = this.free_list;
-                    this.free_list = (Node)this.free_list.middle;
-                    new_node_tree_begin = current_node;
-                    current_node = current_node.right;
-                    current_node.value = curr;
-                    break;
-                } else {
-                    current_node = current_node.right;
-                    continue;
-                }
-            }
-        }
-        
-        do {
-            key_index++;
-            if(key_index < key.length()) {
-                curr = key.charAt(key_index);
-            } else {
-                curr = 0;
-            }
-
-            if(this.free_list == null) {
-                if(!grow_node_free_list()) {
-                    current_node = (Node)new_node_tree_begin.middle;
-                    while(current_node.middle != null) {
-                        current_node = (Node)current_node.middle;
-                    }
-                    current_node.middle = this.free_list;
-                    this.free_list = (Node)new_node_tree_begin.middle;
-                    new_node_tree_begin.middle = null;
-                    return TST_ERROR;
-                }
-            }
-            
-            if(this.free_list == null) {
-                if(!grow_node_free_list()) {
-                    return TST_ERROR;
-                }
-            }
-            current_node.middle = this.free_list;
-            this.free_list = (Node)this.free_list.middle;
-            current_node = (Node)current_node.middle;
-
-            current_node.value = curr;
-        } while(curr != 0);
-        
-        current_node.middle = data;
-        return TST_OK;
-    }
-
-    public Object search(final String key, final int[] prefix_len) {
-        Node current_node = null;
-        Object longest_match = null;
-        int key_index = 0;
-
-        if(key == null) {
-            throw new IllegalArgumentException("key can't be null");
-        }
-
-        if(key.length() == 0 || this.head[key.charAt(0)] == null) {
-            return null;
-        }
-
-        if(prefix_len.length > 0) {
-            prefix_len[0] = 0;
-        }
-
-        current_node = this.head[key.charAt(0)];
-        key_index = 1;
-
-        char curr = 0;
-        while(null != current_node) {
-            if(key_index < key.length()) {
-                curr = key.charAt(key_index);
-            } else {
-                curr = 0;
-            }
-
-            if(curr == current_node.value) {
-                if(current_node.value == 0) {
-                    if(prefix_len.length>0) {
-                        prefix_len[0] = key_index;
-                    }
-                    return current_node.middle;
-                } else {
-                    current_node = (Node)current_node.middle;
-                    if(current_node != null && current_node.value == 0) {
-                        if(prefix_len.length>0) {
-                            prefix_len[0] = key_index+1;
-                        }
-                        longest_match = current_node.middle;
-                    }
-                    key_index++;
-                    continue;
-                }
-            } else if((current_node.value == 0 && curr < 64) ||
-                      (current_node.value != 0 && curr < current_node.value)) {
-                if(current_node.value == 0) {
-                    if(prefix_len.length>0) {
-                        prefix_len[0] = key_index;
-                    }
-                    longest_match = current_node.middle;
-                }
-                current_node = current_node.left;
-                continue;
-            } else {
-                if(current_node.value == 0) {
-                    if(prefix_len.length>0) {
-                        prefix_len[0] = key_index;
-                    }
-                    longest_match = current_node.middle;
-                }
-                current_node = current_node.right;
-                continue;
-            }
-        }
-
-        return longest_match;
-    }
-
-    public Object delete(final String key) {
-        Node current_node = null;
-        Node current_node_parent = null;
-        Node last_branch = null;
-        Node last_branch_parent = null;
-        Object next_node = null;
-        Node last_branch_replacement = null;
-        Node last_branch_dangling_child = null;
-        int key_index = 1;
-        
-        if(key.length() == 0 || this.head[key.charAt(0)] == null) {
-            return null;
-        }
-   
-        current_node = this.head[key.charAt(0)];
-
-        char curr = 0;
-        while(null != current_node) {
-            if(key_index < key.length()) {
-                curr = key.charAt(key_index);
-            } else {
-                curr = 0;
-            }
-            if(curr == current_node.value) {
-                if(current_node.left != null || current_node.right != null) {
-                    last_branch = current_node;
-                    last_branch_parent = current_node_parent;
-                }
-                if(curr == 0) {
-                    break;
-                }
-                current_node_parent = current_node;
-                current_node = (Node)current_node.middle;
-                key_index++;
-                continue;
-            } else if((current_node.value == 0 && curr < 64) ||
-                      (current_node.value != 0&& curr < current_node.value)) {
-                last_branch_parent = current_node;
-                current_node_parent = current_node;
-                current_node = current_node.left;
-                last_branch = current_node;
-                continue;
-            } else {
-                last_branch_parent = current_node;
-                current_node_parent = current_node;
-                current_node = current_node.right;
-                last_branch = current_node;
-                continue;
-            }
-        }
-
-        if(null == current_node) {
-            return null;
-        }
-        
-        if(null == last_branch) {
-            next_node = this.head[key.charAt(0)];
-            this.head[key.charAt(0)] = null;
-        } else if(last_branch.left == null && last_branch.right == null) {
-            if(last_branch_parent.left == last_branch) {
-                last_branch_parent.left = null;
-            } else {
-                last_branch_parent.right = null;
-            }
-            next_node = last_branch;
-        } else {
-            if(last_branch.left != null && last_branch.right != null) {
-                last_branch_replacement = last_branch.right;
-                last_branch_dangling_child = last_branch.left;
-            } else if(last_branch.right != null) {
-                last_branch_replacement = last_branch.right;
-                last_branch_dangling_child = null;
-            } else {
-                last_branch_replacement = last_branch.left;
-                last_branch_dangling_child = null;
-            }
-
-            if(last_branch_parent == null) {
-                this.head[key.charAt(0)] = last_branch_replacement;
-            } else {
-                if(last_branch_parent.left == last_branch) {
-                    last_branch_parent.left = last_branch_replacement;
-                } else if(last_branch_parent.right == last_branch) {
-                    last_branch_parent.right = last_branch_replacement;
-                } else {
-                    last_branch_parent.middle = last_branch_replacement;
-                }
-            }
-            
-            if(last_branch_dangling_child != null) {
-                current_node = last_branch_replacement;
-                while(current_node.left != null) {
-                    current_node = current_node.left;
-                }
-                current_node.left = last_branch_dangling_child;
-            }
-
-            next_node = last_branch;
-        }
-   
-        do {
-            current_node = (Node)next_node;
-            next_node = current_node.middle;
-            current_node.left = null;
-            current_node.right = null;
-            current_node.middle = this.free_list;
-            this.free_list = current_node;
-        } while(current_node.value != 0);
-
-
-        return next_node;
-    }
-
-    public boolean grow_node_free_list() {
-        Node current_node = null;
-        NodeLines new_line = null;
-
-        new_line = new NodeLines();
-
-        new_line.node_line = new Node[this.node_line_width];
-        for(int i=0;i<this.node_line_width;i++) {
-            new_line.node_line[i] = new Node();
-        }
-
-        new_line.next = this.node_lines;
-        this.node_lines = new_line;
-
-        current_node = this.node_lines.node_line[0];
-        this.free_list = current_node;
-        for(int i=1;i<this.node_line_width;i++) {
-            current_node.middle = this.node_lines.node_line[i];
-            current_node = (Node)current_node.middle;
-        }
-        current_node.middle = null;
-
-        return true;
-    }
-
-
-    public static void main(final String[] args) {
-        final TernarySearchTree tst = new TernarySearchTree(30);
-        int ret = tst.insert("fOO","VAL1",0, new Object[0]);
-        System.err.println("ret: " + ret);
-        ret = tst.insert("bar","VAL2",0, new Object[0]);
-        System.err.println("ret: " + ret);
-        ret = tst.insert("baz","VAL3",0, new Object[0]);
-        System.err.println("ret: " + ret);
-        ret = tst.insert("zydsfgfd","VAL4",0, new Object[0]);
-        System.err.println("ret: " + ret);
-        ret = tst.insert("1242","VAL5",0, new Object[0]);
-        System.err.println("ret: " + ret);
-
-        Object val = tst.delete("fOO");
-        System.err.println("del: " + val);
-
-        int[] pref_len = new int[1];
-        pref_len[0] = 0;
-        val = tst.search("ba",pref_len);
-        System.err.println("search: " + val + " pref_len: " + pref_len[0]);
-        val = tst.search("bar",pref_len);
-        System.err.println("search: " + val + " pref_len: " + pref_len[0]);
-    }
-}// TernarySearchTree
--- a/lib/mongrel.rb
+++ b/lib/mongrel.rb
@ -41,19 +41,71 @@ require 'uri'
 module Mongrel

  class URIClassifier
-    attr_reader :handler_map
+    attr_reader :routes
+    
+    class RegistrationError < RuntimeError
+    end
+    class UsageError < RuntimeError
+    end

    # Returns the URIs that have been registered with this classifier so far.
-    # The URIs returned should not be modified as this will cause a memory leak.
-    # You can use this to inspect the contents of the URIClassifier.
+    # The URIs returned should not be modified.
    def uris
-      @handler_map.keys
+      @routes.keys
    end

-    # Simply does an inspect that looks like a Hash inspect.
-    def inspect
-      @handler_map.inspect
+    def initialize
+      @routes = {}
+      @matcher = nil
    end
+    
+    def register(path, handler)
+      if @routes[path]
+        raise RegistrationError, "#{path.inspect} is already registered"
+      elsif !path or path.empty?
+        raise RegistrationError, "Path is empty"
+      end      
+      @routes[path.dup] = handler
+      rebuild
+    end
+    
+    def unregister(path)
+      handler = @routes.delete(path)
+      raise RegistrationError, "#{path.inspect} was not registered" unless handler
+      rebuild
+      handler
+    end
+    
+    def resolve(request_uri)
+      raise UsageError, "No routes have been registered" unless @matcher
+      match = request_uri[@matcher, 0]
+      if match
+        path_info = request_uri[match.size..-1]
+        # A root mounted ("/") handler must resolve such that path info matches the original URI.
+        path_info = "#{Const::SLASH}#{path_info}" if match == Const::SLASH
+        [match, path_info, @routes[match]]
+      else
+        [nil, nil, nil]
+      end
+    end
+        
+    alias :handler_map :routes # Legacy
+    
+    private
+    
+    def rebuild
+      routes = @routes.sort_by do |path, handler|        
+        # Sort by name
+        path
+      end.sort_by do |path, handler|          
+        # Then by descending length
+        -path.length 
+      end
+      @matcher = Regexp.new(routes.map do |path, handler|
+        Regexp.new('^' + Regexp.escape(path))
+      end.join('|'))
+    end    
+    
  end


@ -785,25 +837,16 @@ module Mongrel
    # found in the prefix of a request then your handler's HttpHandler::process method
    # is called.  See Mongrel::URIClassifier#register for more information.
    #
-    # If you set in_front=true then the passed in handler will be put in front in the list.
-    # Otherwise it's placed at the end of the list.
+    # If you set in_front=true then the passed in handler will be put in the front of the list
+    # for that particular URI. Otherwise it's placed at the end of the list.
    def register(uri, handler, in_front=false)
-      script_name, path_info, handlers = @classifier.resolve(uri)
-
-      if not handlers
+      begin
        @classifier.register(uri, [handler])
-      else
-        if path_info.length == 0 or (script_name == Const::SLASH and path_info == Const::SLASH)
-          if in_front
-            handlers.unshift(handler)
-          else
-            handlers << handler
-          end
-        else
-          @classifier.register(uri, [handler])
-        end
+      rescue URIClassifier::RegistrationError
+        handlers = @classifier.resolve(uri)[2]
+        method_name = in_front ? 'unshift' : 'push'
+        handlers.send(method_name, handler)
      end
-
      handler.listener = self
    end

--- a/lib/mongrel/handlers.rb
+++ b/lib/mongrel/handlers.rb
@ -385,7 +385,7 @@ module Mongrel
      end

      results << "<h2>Registered Handlers</h2>"
-      uris = listener.classifier.handler_map
+      uris = listener.classifier.routes
      results << table("handlers", uris.map {|uri,handlers| 
        [uri, 
            "<pre>" + 
--- a/test/test_configurator.rb
+++ b/test/test_configurator.rb
@ -56,6 +56,7 @@ class ConfiguratorTest < Test::Unit::TestCase
      end
    end

+    # pp @config.listeners.values.first.classifier.routes

    @config.listeners.each do |host,listener| 
      assert listener.classifier.uris.length == 3, "Wrong number of registered URIs"
--- a/test/test_uriclassifier.rb
+++ b/test/test_uriclassifier.rb
@ -116,6 +116,7 @@ class URIClassifierTest < Test::Unit::TestCase
      current << c.chr
      uri_classifier.register(current, c)
    end
+    

    # Try to resolve everything with no asserts as a fuzzing
    tests.each do |prefix|
@ -178,7 +179,6 @@ class URIClassifierTest < Test::Unit::TestCase

    tests.each do |uri|
      script_name, path_info, handler = uri_classifier.resolve(uri)
-#      p uri_classifier.resolve(uri)
      assert_equal root, script_name, "#{uri} did not resolve to #{root}"
      assert_equal uri, path_info
      assert_equal 2, handler
--- a/test/testhelp.rb
+++ b/test/testhelp.rb
@ -20,6 +20,7 @@ require 'benchmark'
 require 'digest/sha1'
 require 'uri'
 require 'stringio'
+require 'pp'

 require 'mongrel'
 require 'mongrel/stats'