actionpack: Improve performance by allowing routes with custom regexes in the FSM.

The FSM used to find matching routes was previously limited to patterns that contained parameters with the default regexp / no constraints. In large route sets where many parameters are constrained by custom regexp, these routes all fall back on a slow linear search over the route list. These custom regexes were not previously able to be included in the FSM because it transitioned between nodes using only fragments of the URI, or path separators [/.?], but a custom regex may cross a path separator boundary. To work around this, the TransitionTable is improved to support remembering a point within the matching string that we started, and continuing to attempt to match from that point up to the end of each token. Only parameters not on a path separator boundary must still match with a linear search after this change (e.g. `/foo-:bar/`). This results in performance for constrainted routes that matches that of ones using the default regexp. Benchmark: https://gist.github.com/theojulienne/e91fc338d180e1710e29c81a5d701fab Before: ``` Calculating ------------------------------------- without params 6.466k (±12.7%) i/s - 31.648k in 5.009453s params without constraints 5.867k (±12.9%) i/s - 28.842k in 5.032637s params with constraints 909.661 (± 7.9%) i/s - 4.536k in 5.023534s ``` After: ``` Calculating ------------------------------------- without params 6.387k (±11.9%) i/s - 31.728k in 5.068939s params without constraints 5.824k (±13.2%) i/s - 28.650k in 5.043701s params with constraints 5.406k (±11.7%) i/s - 26.931k in 5.076412s ``` For github.com which has many constrainted parameters, a random sampling of 10 URL patterns can be matched approximately 2-4x faster than before. This commit fixes symbols as constrains as tested in 6ab985da28
2022-11-09 12:12:34 -05:00 · 2020-12-16 08:57:10 +11:00 · 2020-12-16 08:57:10 +11:00 · 16a80882f9
commit 16a80882f9
parent 6ab985da28
12 changed files with 216 additions and 73 deletions
--- a/actionpack/lib/action_dispatch/journey/gtg/builder.rb
+++ b/actionpack/lib/action_dispatch/journey/gtg/builder.rb
@ -6,13 +6,13 @@ module ActionDispatch
  module Journey # :nodoc:
    module GTG # :nodoc:
      class Builder # :nodoc:
-        DUMMY = Nodes::Dummy.new
+        DUMMY_END_NODE = Nodes::Dummy.new
        attr_reader :root, :ast, :endpoints
        def initialize(root)
          @root      = root
-          @ast       = Nodes::Cat.new root, DUMMY
+          @ast       = Nodes::Cat.new root, DUMMY_END_NODE
          @followpos = build_followpos
        end
@ -28,12 +28,12 @@ module ActionDispatch
            marked[s] = true # mark s
            s.group_by { |state| symbol(state) }.each do |sym, ps|
-              u = ps.flat_map { |l| @followpos[l] }
+              u = ps.flat_map { |l| @followpos[l] }.uniq
              next if u.empty?
              from = state_id[s]
-              if u.all? { |pos| pos == DUMMY }
+              if u.all? { |pos| pos == DUMMY_END_NODE }
                to = state_id[Object.new]
                dtrans[from, to] = sym
                dtrans.add_accepting(to)
@ -43,9 +43,9 @@ module ActionDispatch
                to = state_id[u]
                dtrans[from, to] = sym
-                if u.include?(DUMMY)
+                if u.include?(DUMMY_END_NODE)
                  ps.each do |state|
-                    if @followpos[state].include?(DUMMY)
+                    if @followpos[state].include?(DUMMY_END_NODE)
                      dtrans.add_memo(to, state.memo)
                    end
                  end
@ -66,7 +66,10 @@ module ActionDispatch
          when Nodes::Group
            true
          when Nodes::Star
-            true
+            # the default star regex is /(.+)/ which is NOT nullable
            # but since different constraints can be provided we must
            # actually check if this is the case or not.
            node.regexp.match?("")
          when Nodes::Or
            node.children.any? { |c| nullable?(c) }
          when Nodes::Cat
@ -104,7 +107,7 @@ module ActionDispatch
        def lastpos(node)
          case node
          when Nodes::Star
-            firstpos(node.left)
+            lastpos(node.left)
          when Nodes::Or
            node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!)
          when Nodes::Cat
@ -131,10 +134,6 @@ module ActionDispatch
                lastpos(n.left).each do |i|
                  table[i] += firstpos(n.right)
                end
              when Nodes::Star
                lastpos(n).each do |i|
                  table[i] += firstpos(n)
                end
              end
            end
            table
--- a/actionpack/lib/action_dispatch/journey/gtg/simulator.rb
+++ b/actionpack/lib/action_dispatch/journey/gtg/simulator.rb
@ -14,7 +14,7 @@ module ActionDispatch
      end
      class Simulator # :nodoc:
-        INITIAL_STATE = [0].freeze
+        INITIAL_STATE = [ [0, nil] ].freeze
        attr_reader :tt
@ -25,13 +25,19 @@ module ActionDispatch
        def memos(string)
          input = StringScanner.new(string)
          state = INITIAL_STATE
          start_index = 0
          while sym = input.scan(%r([/.?]|[^/.?]+))
-            state = tt.move(state, sym)
+            end_index = start_index + sym.length
            state = tt.move(state, string, start_index, end_index)
            start_index = end_index
          end
-          acceptance_states = state.each_with_object([]) do |s, memos|
+          acceptance_states = state.each_with_object([]) do |s_d, memos|
-            memos.concat(tt.memo(s)) if tt.accepting?(s)
+            s, idx = s_d
            memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s)
          end
          acceptance_states.empty? ? yield : acceptance_states
--- a/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb
+++ b/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb
@ -10,11 +10,15 @@ module ActionDispatch
        attr_reader :memos
        DEFAULT_EXP = /[^.\/?]+/
        DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/)
        def initialize
-          @regexp_states = {}
+          @stdparam_states = {}
-          @string_states = {}
+          @regexp_states   = {}
-          @accepting     = {}
+          @string_states   = {}
-          @memos         = Hash.new { |h, k| h[k] = [] }
+          @accepting       = {}
          @memos           = Hash.new { |h, k| h[k] = [] }
        end
        def add_accepting(state)
@ -41,22 +45,54 @@ module ActionDispatch
          Array(t)
        end
-        def move(t, a)
+        def move(t, full_string, start_index, end_index)
          return [] if t.empty?
-          regexps = []
+          next_states = []
          strings = []
-          t.each { |s|
+          tok = full_string.slice(start_index, end_index - start_index)
-            if states = @regexp_states[s]
+          token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok)
-              states.each { |re, v| regexps << v if re.match?(a) && !v.nil? }
+
          t.each { |s, previous_start|
            if previous_start.nil?
              # In the simple case of a "default" param regex do this fast-path
              # and add all next states.
              if token_matches_default_component && states = @stdparam_states[s]
                states.each { |re, v| next_states << [v, nil].freeze if !v.nil? }
              end
              # When we have a literal string, we can just pull the next state
              if states = @string_states[s]
                next_states << [states[tok], nil].freeze unless states[tok].nil?
              end
            end
-            if states = @string_states[s]
+            # For regexes that aren't the "default" style, they may potentially
-              strings << states[a] unless states[a].nil?
+            # not be terminated by the first "token" [./?], so we need to continue
            # to attempt to match this regexp as well as any successful paths that
            # continue out of it. both paths could be valid.
            if states = @regexp_states[s]
              slice_start = if previous_start.nil?
                start_index
              else
                previous_start
              end
              slice_length = end_index - slice_start
              curr_slice = full_string.slice(slice_start, slice_length)
              states.each { |re, v|
                # if we match, we can try moving past this
                next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice)
              }
              # and regardless, we must continue accepting tokens and retrying this regexp.
              # we need to remember where we started as well so we can take bigger slices.
              next_states << [s, slice_start].freeze
            end
          }
-          strings.concat regexps
+
          next_states
        end
        def as_json(options = nil)
@ -69,9 +105,10 @@ module ActionDispatch
          end
          {
-            regexp_states: simple_regexp,
+            regexp_states:   simple_regexp,
-            string_states: @string_states,
+            string_states:   @string_states,
-            accepting:     @accepting
+            stdparam_states: @stdparam_states,
            accepting:       @accepting
          }
        end
@ -125,18 +162,29 @@ module ActionDispatch
        def []=(from, to, sym)
          to_mappings = states_hash_for(sym)[from] ||= {}
          case sym
          when Regexp
            # we must match the whole string to a token boundary
            sym = Regexp.new(/\A#{sym}\Z/)
          when Symbol
            # account for symbols in the constraints the same as strings
            sym = sym.to_s
          end
          to_mappings[sym] = to
        end
        def states
          ss = @string_states.keys + @string_states.values.flat_map(&:values)
          ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values)
          rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values)
-          (ss + rs).uniq
+          (ss + ps + rs).uniq
        end
        def transitions
          @string_states.flat_map { |from, hash|
            hash.map { |s, to| [from, s, to] }
          } + @stdparam_states.flat_map { |from, hash|
            hash.map { |s, to| [from, s, to] }
          } + @regexp_states.flat_map { |from, hash|
            hash.map { |s, to| [from, s, to] }
          }
@ -145,10 +193,14 @@ module ActionDispatch
        private
          def states_hash_for(sym)
            case sym
-            when String
+            when String, Symbol
              @string_states
            when Regexp
-              @regexp_states
+              if sym == DEFAULT_EXP
                @stdparam_states
              else
                @regexp_states
              end
            else
              raise ArgumentError, "unknown symbol: %s" % sym.class
            end
--- a/actionpack/lib/action_dispatch/journey/nodes/node.rb
+++ b/actionpack/lib/action_dispatch/journey/nodes/node.rb
@ -104,6 +104,15 @@ module ActionDispatch
      end
      class Star < Unary # :nodoc:
        attr_accessor :regexp
        def initialize(left)
          super(left)
          # By default wildcard routes are non-greedy and must match something.
          @regexp = /.+?/
        end
        def star?; true; end
        def type; :STAR; end
--- a/actionpack/lib/action_dispatch/journey/path/pattern.rb
+++ b/actionpack/lib/action_dispatch/journey/path/pattern.rb
@ -39,6 +39,27 @@ module ActionDispatch
          @spec
        end
        def requirements_anchored?
          # each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal
          terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) }
          terminals.each_with_index { |s, index|
            next if index < 1
            next unless s.symbol?
            back = terminals[index - 1]
            fwd = terminals[index + 1]
            # we also don't support this yet, constraints must be regexps
            return false if s.regexp.is_a?(Array)
            return false if back.literal?
            return false if !fwd.nil? && fwd.literal?
          }
          true
        end
        def names
          @names ||= spec.find_all(&:symbol?).map(&:name)
        end
--- a/actionpack/lib/action_dispatch/journey/route.rb
+++ b/actionpack/lib/action_dispatch/journey/route.rb
@ -84,6 +84,8 @@ module ActionDispatch
        @decorated_ast ||= begin
          decorated_ast = path.ast
          decorated_ast.find_all(&:terminal?).each { |n| n.memo = self }
          # inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string.
          decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? }
          decorated_ast
        end
      end
--- a/actionpack/lib/action_dispatch/journey/router.rb
+++ b/actionpack/lib/action_dispatch/journey/router.rb
@ -85,7 +85,7 @@ module ActionDispatch
      private
        def partitioned_routes
          routes.partition { |r|
-            r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp?  }
+            r.path.anchored && r.path.requirements_anchored?
          }
        end
--- a/actionpack/lib/action_dispatch/journey/routes.rb
+++ b/actionpack/lib/action_dispatch/journey/routes.rb
@ -41,7 +41,7 @@ module ActionDispatch
      end
      def partition_route(route)
-        if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?)
+        if route.path.anchored && route.path.requirements_anchored?
          anchored_routes << route
        else
          custom_routes << route
--- a/actionpack/lib/action_dispatch/journey/visualizer/fsm.js
+++ b/actionpack/lib/action_dispatch/journey/visualizer/fsm.js
@ -68,7 +68,7 @@ function highlight_state(index, color) {
 }
 function highlight_finish(index) {
-  svg_nodes[index].select('polygon')
+  svg_nodes[index].select('ellipse')
    .style("fill", "while")
    .transition().duration(500)
    .style("fill", "blue");
@ -76,54 +76,79 @@ function highlight_finish(index) {
 function match(input) {
  reset_graph();
-  var table         = tt();
+  var table           = tt();
-  var states        = [0];
+  var states          = [[0, null]];
-  var regexp_states = table['regexp_states'];
+  var regexp_states   = table['regexp_states'];
-  var string_states = table['string_states'];
+  var string_states   = table['string_states'];
-  var accepting     = table['accepting'];
+  var stdparam_states = table['stdparam_states'];
  var accepting       = table['accepting'];
  var default_re      = new RegExp("^[^.\/?]+$");
  var start_index     = 0;
  highlight_state(0);
  tokenize(input, function(token) {
    var end_index = start_index + token.length;
    var new_states = [];
    for(var key in states) {
-      var state = states[key];
+      var state_parts = states[key];
      var state = state_parts[0];
      var previous_start = state_parts[1];
-      if(string_states[state] && string_states[state][token]) {
+      if(previous_start == null) {
-        var new_state = string_states[state][token];
+        if(string_states[state] && string_states[state][token]) {
-        highlight_edge(state, new_state);
+          var new_state = string_states[state][token];
-        highlight_state(new_state);
+          highlight_edge(state, new_state);
-        new_states.push(new_state);
+          highlight_state(new_state);
-      }
+          new_states.push([new_state, null]);
        }
-      if(regexp_states[state]) {
+        if(stdparam_states[state] && default_re.test(token)) {
-        for(var key in regexp_states[state]) {
+          for(var key in stdparam_states[state]) {
-          var re = new RegExp("^" + key + "$");
+            var new_state = stdparam_states[state][key];
          if(re.test(token)) {
            var new_state = regexp_states[state][key];
            highlight_edge(state, new_state);
            highlight_state(new_state);
-            new_states.push(new_state);
+            new_states.push([new_state, null]);
          }
        }
      }
      if(regexp_states[state]) {
        var slice_start = previous_start != null ? previous_start : start_index;
        for(var key in regexp_states[state]) {
          var re = new RegExp("^" + key + "$");
          var accumulation = input.slice(slice_start, end_index);
          if(re.test(accumulation)) {
            var new_state = regexp_states[state][key];
            highlight_edge(state, new_state);
            highlight_state(new_state);
            new_states.push([new_state, null]);
          }
          // retry the same regexp with the accumulated data either way
          new_states.push([state, slice_start]);
        }
      }
    }
    if(new_states.length == 0) {
      return;
    }
    states = new_states;
    start_index = end_index;
  });
  for(var key in states) {
-    var state = states[key];
+    var state_parts = states[key];
    var state = state_parts[0];
    var slice_start = state_parts[1];
    // we must ignore ones that are still accepting more data
    if (slice_start != null) continue;
    if(accepting[state]) {
-      for(var mkey in svg_edges[state]) {
+      highlight_finish(state);
        if(!regexp_states[mkey] && !string_states[mkey]) {
          highlight_edge(state, mkey);
          highlight_finish(mkey);
        }
      }
    } else {
      highlight_state(state, "red");
    }
--- a/actionpack/test/journey/gtg/builder_test.rb
+++ b/actionpack/test/journey/gtg/builder_test.rb
@ -8,13 +8,13 @@ module ActionDispatch
      class TestBuilder < ActiveSupport::TestCase
        def test_following_states_multi
          table = tt ["a|a"]
-          assert_equal 1, table.move([0], "a").length
+          assert_equal 1, table.move([[0, nil]], "a", 0, 1).length
        end
        def test_following_states_multi_regexp
          table = tt [":a|b"]
-          assert_equal 1, table.move([0], "fooo").length
+          assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length
-          assert_equal 2, table.move([0], "b").length
+          assert_equal 2, table.move([[0, nil]], "b", 0, 1).length
        end
        def test_multi_path
@ -25,8 +25,8 @@ module ActionDispatch
            [2, "b"],
            [2, "/"],
            [1, "c"],
-          ].inject([0]) { |state, (exp, sym)|
+          ].inject([[0, nil]]) { |state, (exp, sym)|
-            new = table.move(state, sym)
+            new = table.move(state, sym, 0, sym.length)
            assert_equal exp, new.length
            new
          }
@ -60,6 +60,23 @@ module ActionDispatch
          assert_equal 2, memos.length
        end
        def test_catchall
          table = tt %w{
            /
            /*unmatched_route
          }
          sim = Simulator.new table
          # matches just the /*unmatched_route
          memos = sim.memos "/test"
          assert_equal 1, memos.length
          # matches just the /
          memos = sim.memos "/"
          assert_equal 1, memos.length
        end
        private
          def ast(strings)
            parser = Journey::Parser.new
--- a/actionpack/test/journey/gtg/transition_table_test.rb
+++ b/actionpack/test/journey/gtg/transition_table_test.rb
@ -89,7 +89,7 @@ module ActionDispatch
          sim     = GTG::Simulator.new builder.transition_table
          memos = sim.memos "/articles/new"
-          assert_equal [paths[1], paths[3]], memos
+          assert_equal Set[paths[1], paths[3]], Set.new(memos)
        end
        private
--- a/actionpack/test/journey/routes_test.rb
+++ b/actionpack/test/journey/routes_test.rb
@ -45,12 +45,24 @@ module ActionDispatch
        assert_equal 1, @routes.anchored_routes.length
        assert_empty @routes.custom_routes
-        mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/
+        mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false
        assert_equal 1, @routes.custom_routes.length
        assert_equal 1, @routes.anchored_routes.length
      end
      def test_custom_anchored_not_partition_route
        mapper.get "/foo/:bar", to: "foo#bar", as: "aaron"
        assert_equal 1, @routes.anchored_routes.length
        assert_empty @routes.custom_routes
        mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/
        assert_equal 2, @routes.anchored_routes.length
        assert_empty @routes.custom_routes
      end
      def test_first_name_wins
        mapper.get "/hello", to: "foo#bar", as: "aaron"
        assert_raise(ArgumentError) do