actionpack: Improve performance by allowing routes with custom regexes in the FSM.

The FSM used to find matching routes was previously limited to patterns
that contained parameters with the default regexp / no constraints. In
large route sets where many parameters are constrained by custom regexp,
these routes all fall back on a slow linear search over the route list.

These custom regexes were not previously able to be included in the FSM
because it transitioned between nodes using only fragments of the URI,
or path separators [/.?], but a custom regex may cross a path separator
boundary. To work around this, the TransitionTable is improved to
support remembering a point within the matching string that we started,
and continuing to attempt to match from that point up to the end of each
token. Only parameters not on a path separator boundary must still match
with a linear search after this change (e.g. `/foo-:bar/`).

This results in performance for constrainted routes that matches that of
ones using the default regexp.

Benchmark:
https://gist.github.com/theojulienne/e91fc338d180e1710e29c81a5d701fab

Before:
```
Calculating -------------------------------------
    without params      6.466k (±12.7%) i/s -     31.648k in   5.009453s
params without constraints
                        5.867k (±12.9%) i/s -     28.842k in   5.032637s
params with constraints
                      909.661  (± 7.9%) i/s -      4.536k in   5.023534s
```

After:
```
Calculating -------------------------------------
    without params      6.387k (±11.9%) i/s -     31.728k in   5.068939s
params without constraints
                        5.824k (±13.2%) i/s -     28.650k in   5.043701s
params with constraints
                        5.406k (±11.7%) i/s -     26.931k in   5.076412s
```

For github.com which has many constrainted parameters, a random sampling
of 10 URL patterns can be matched approximately 2-4x faster than before.

This commit fixes symbols as constrains as tested in
6ab985da28
This commit is contained in:
Theo Julienne 2020-12-16 08:57:10 +11:00
parent 6ab985da28
commit 16a80882f9
12 changed files with 216 additions and 73 deletions

View File

@ -6,13 +6,13 @@ module ActionDispatch
module Journey # :nodoc:
module GTG # :nodoc:
class Builder # :nodoc:
DUMMY = Nodes::Dummy.new
DUMMY_END_NODE = Nodes::Dummy.new
attr_reader :root, :ast, :endpoints
def initialize(root)
@root = root
@ast = Nodes::Cat.new root, DUMMY
@ast = Nodes::Cat.new root, DUMMY_END_NODE
@followpos = build_followpos
end
@ -28,12 +28,12 @@ module ActionDispatch
marked[s] = true # mark s
s.group_by { |state| symbol(state) }.each do |sym, ps|
u = ps.flat_map { |l| @followpos[l] }
u = ps.flat_map { |l| @followpos[l] }.uniq
next if u.empty?
from = state_id[s]
if u.all? { |pos| pos == DUMMY }
if u.all? { |pos| pos == DUMMY_END_NODE }
to = state_id[Object.new]
dtrans[from, to] = sym
dtrans.add_accepting(to)
@ -43,9 +43,9 @@ module ActionDispatch
to = state_id[u]
dtrans[from, to] = sym
if u.include?(DUMMY)
if u.include?(DUMMY_END_NODE)
ps.each do |state|
if @followpos[state].include?(DUMMY)
if @followpos[state].include?(DUMMY_END_NODE)
dtrans.add_memo(to, state.memo)
end
end
@ -66,7 +66,10 @@ module ActionDispatch
when Nodes::Group
true
when Nodes::Star
true
# the default star regex is /(.+)/ which is NOT nullable
# but since different constraints can be provided we must
# actually check if this is the case or not.
node.regexp.match?("")
when Nodes::Or
node.children.any? { |c| nullable?(c) }
when Nodes::Cat
@ -104,7 +107,7 @@ module ActionDispatch
def lastpos(node)
case node
when Nodes::Star
firstpos(node.left)
lastpos(node.left)
when Nodes::Or
node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!)
when Nodes::Cat
@ -131,10 +134,6 @@ module ActionDispatch
lastpos(n.left).each do |i|
table[i] += firstpos(n.right)
end
when Nodes::Star
lastpos(n).each do |i|
table[i] += firstpos(n)
end
end
end
table

View File

@ -14,7 +14,7 @@ module ActionDispatch
end
class Simulator # :nodoc:
INITIAL_STATE = [0].freeze
INITIAL_STATE = [ [0, nil] ].freeze
attr_reader :tt
@ -25,13 +25,19 @@ module ActionDispatch
def memos(string)
input = StringScanner.new(string)
state = INITIAL_STATE
start_index = 0
while sym = input.scan(%r([/.?]|[^/.?]+))
state = tt.move(state, sym)
end_index = start_index + sym.length
state = tt.move(state, string, start_index, end_index)
start_index = end_index
end
acceptance_states = state.each_with_object([]) do |s, memos|
memos.concat(tt.memo(s)) if tt.accepting?(s)
acceptance_states = state.each_with_object([]) do |s_d, memos|
s, idx = s_d
memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s)
end
acceptance_states.empty? ? yield : acceptance_states

View File

@ -10,11 +10,15 @@ module ActionDispatch
attr_reader :memos
DEFAULT_EXP = /[^.\/?]+/
DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/)
def initialize
@regexp_states = {}
@string_states = {}
@accepting = {}
@memos = Hash.new { |h, k| h[k] = [] }
@stdparam_states = {}
@regexp_states = {}
@string_states = {}
@accepting = {}
@memos = Hash.new { |h, k| h[k] = [] }
end
def add_accepting(state)
@ -41,22 +45,54 @@ module ActionDispatch
Array(t)
end
def move(t, a)
def move(t, full_string, start_index, end_index)
return [] if t.empty?
regexps = []
strings = []
next_states = []
t.each { |s|
if states = @regexp_states[s]
states.each { |re, v| regexps << v if re.match?(a) && !v.nil? }
tok = full_string.slice(start_index, end_index - start_index)
token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok)
t.each { |s, previous_start|
if previous_start.nil?
# In the simple case of a "default" param regex do this fast-path
# and add all next states.
if token_matches_default_component && states = @stdparam_states[s]
states.each { |re, v| next_states << [v, nil].freeze if !v.nil? }
end
# When we have a literal string, we can just pull the next state
if states = @string_states[s]
next_states << [states[tok], nil].freeze unless states[tok].nil?
end
end
if states = @string_states[s]
strings << states[a] unless states[a].nil?
# For regexes that aren't the "default" style, they may potentially
# not be terminated by the first "token" [./?], so we need to continue
# to attempt to match this regexp as well as any successful paths that
# continue out of it. both paths could be valid.
if states = @regexp_states[s]
slice_start = if previous_start.nil?
start_index
else
previous_start
end
slice_length = end_index - slice_start
curr_slice = full_string.slice(slice_start, slice_length)
states.each { |re, v|
# if we match, we can try moving past this
next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice)
}
# and regardless, we must continue accepting tokens and retrying this regexp.
# we need to remember where we started as well so we can take bigger slices.
next_states << [s, slice_start].freeze
end
}
strings.concat regexps
next_states
end
def as_json(options = nil)
@ -69,9 +105,10 @@ module ActionDispatch
end
{
regexp_states: simple_regexp,
string_states: @string_states,
accepting: @accepting
regexp_states: simple_regexp,
string_states: @string_states,
stdparam_states: @stdparam_states,
accepting: @accepting
}
end
@ -125,18 +162,29 @@ module ActionDispatch
def []=(from, to, sym)
to_mappings = states_hash_for(sym)[from] ||= {}
case sym
when Regexp
# we must match the whole string to a token boundary
sym = Regexp.new(/\A#{sym}\Z/)
when Symbol
# account for symbols in the constraints the same as strings
sym = sym.to_s
end
to_mappings[sym] = to
end
def states
ss = @string_states.keys + @string_states.values.flat_map(&:values)
ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values)
rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values)
(ss + rs).uniq
(ss + ps + rs).uniq
end
def transitions
@string_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
} + @stdparam_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
} + @regexp_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
}
@ -145,10 +193,14 @@ module ActionDispatch
private
def states_hash_for(sym)
case sym
when String
when String, Symbol
@string_states
when Regexp
@regexp_states
if sym == DEFAULT_EXP
@stdparam_states
else
@regexp_states
end
else
raise ArgumentError, "unknown symbol: %s" % sym.class
end

View File

@ -104,6 +104,15 @@ module ActionDispatch
end
class Star < Unary # :nodoc:
attr_accessor :regexp
def initialize(left)
super(left)
# By default wildcard routes are non-greedy and must match something.
@regexp = /.+?/
end
def star?; true; end
def type; :STAR; end

View File

@ -39,6 +39,27 @@ module ActionDispatch
@spec
end
def requirements_anchored?
# each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal
terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) }
terminals.each_with_index { |s, index|
next if index < 1
next unless s.symbol?
back = terminals[index - 1]
fwd = terminals[index + 1]
# we also don't support this yet, constraints must be regexps
return false if s.regexp.is_a?(Array)
return false if back.literal?
return false if !fwd.nil? && fwd.literal?
}
true
end
def names
@names ||= spec.find_all(&:symbol?).map(&:name)
end

View File

@ -84,6 +84,8 @@ module ActionDispatch
@decorated_ast ||= begin
decorated_ast = path.ast
decorated_ast.find_all(&:terminal?).each { |n| n.memo = self }
# inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string.
decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? }
decorated_ast
end
end

View File

@ -85,7 +85,7 @@ module ActionDispatch
private
def partitioned_routes
routes.partition { |r|
r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? }
r.path.anchored && r.path.requirements_anchored?
}
end

View File

@ -41,7 +41,7 @@ module ActionDispatch
end
def partition_route(route)
if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?)
if route.path.anchored && route.path.requirements_anchored?
anchored_routes << route
else
custom_routes << route

View File

@ -68,7 +68,7 @@ function highlight_state(index, color) {
}
function highlight_finish(index) {
svg_nodes[index].select('polygon')
svg_nodes[index].select('ellipse')
.style("fill", "while")
.transition().duration(500)
.style("fill", "blue");
@ -76,54 +76,79 @@ function highlight_finish(index) {
function match(input) {
reset_graph();
var table = tt();
var states = [0];
var regexp_states = table['regexp_states'];
var string_states = table['string_states'];
var accepting = table['accepting'];
var table = tt();
var states = [[0, null]];
var regexp_states = table['regexp_states'];
var string_states = table['string_states'];
var stdparam_states = table['stdparam_states'];
var accepting = table['accepting'];
var default_re = new RegExp("^[^.\/?]+$");
var start_index = 0;
highlight_state(0);
tokenize(input, function(token) {
var end_index = start_index + token.length;
var new_states = [];
for(var key in states) {
var state = states[key];
var state_parts = states[key];
var state = state_parts[0];
var previous_start = state_parts[1];
if(string_states[state] && string_states[state][token]) {
var new_state = string_states[state][token];
highlight_edge(state, new_state);
highlight_state(new_state);
new_states.push(new_state);
}
if(previous_start == null) {
if(string_states[state] && string_states[state][token]) {
var new_state = string_states[state][token];
highlight_edge(state, new_state);
highlight_state(new_state);
new_states.push([new_state, null]);
}
if(regexp_states[state]) {
for(var key in regexp_states[state]) {
var re = new RegExp("^" + key + "$");
if(re.test(token)) {
var new_state = regexp_states[state][key];
if(stdparam_states[state] && default_re.test(token)) {
for(var key in stdparam_states[state]) {
var new_state = stdparam_states[state][key];
highlight_edge(state, new_state);
highlight_state(new_state);
new_states.push(new_state);
new_states.push([new_state, null]);
}
}
}
if(regexp_states[state]) {
var slice_start = previous_start != null ? previous_start : start_index;
for(var key in regexp_states[state]) {
var re = new RegExp("^" + key + "$");
var accumulation = input.slice(slice_start, end_index);
if(re.test(accumulation)) {
var new_state = regexp_states[state][key];
highlight_edge(state, new_state);
highlight_state(new_state);
new_states.push([new_state, null]);
}
// retry the same regexp with the accumulated data either way
new_states.push([state, slice_start]);
}
}
}
if(new_states.length == 0) {
return;
}
states = new_states;
start_index = end_index;
});
for(var key in states) {
var state = states[key];
var state_parts = states[key];
var state = state_parts[0];
var slice_start = state_parts[1];
// we must ignore ones that are still accepting more data
if (slice_start != null) continue;
if(accepting[state]) {
for(var mkey in svg_edges[state]) {
if(!regexp_states[mkey] && !string_states[mkey]) {
highlight_edge(state, mkey);
highlight_finish(mkey);
}
}
highlight_finish(state);
} else {
highlight_state(state, "red");
}

View File

@ -8,13 +8,13 @@ module ActionDispatch
class TestBuilder < ActiveSupport::TestCase
def test_following_states_multi
table = tt ["a|a"]
assert_equal 1, table.move([0], "a").length
assert_equal 1, table.move([[0, nil]], "a", 0, 1).length
end
def test_following_states_multi_regexp
table = tt [":a|b"]
assert_equal 1, table.move([0], "fooo").length
assert_equal 2, table.move([0], "b").length
assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length
assert_equal 2, table.move([[0, nil]], "b", 0, 1).length
end
def test_multi_path
@ -25,8 +25,8 @@ module ActionDispatch
[2, "b"],
[2, "/"],
[1, "c"],
].inject([0]) { |state, (exp, sym)|
new = table.move(state, sym)
].inject([[0, nil]]) { |state, (exp, sym)|
new = table.move(state, sym, 0, sym.length)
assert_equal exp, new.length
new
}
@ -60,6 +60,23 @@ module ActionDispatch
assert_equal 2, memos.length
end
def test_catchall
table = tt %w{
/
/*unmatched_route
}
sim = Simulator.new table
# matches just the /*unmatched_route
memos = sim.memos "/test"
assert_equal 1, memos.length
# matches just the /
memos = sim.memos "/"
assert_equal 1, memos.length
end
private
def ast(strings)
parser = Journey::Parser.new

View File

@ -89,7 +89,7 @@ module ActionDispatch
sim = GTG::Simulator.new builder.transition_table
memos = sim.memos "/articles/new"
assert_equal [paths[1], paths[3]], memos
assert_equal Set[paths[1], paths[3]], Set.new(memos)
end
private

View File

@ -45,12 +45,24 @@ module ActionDispatch
assert_equal 1, @routes.anchored_routes.length
assert_empty @routes.custom_routes
mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/
mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false
assert_equal 1, @routes.custom_routes.length
assert_equal 1, @routes.anchored_routes.length
end
def test_custom_anchored_not_partition_route
mapper.get "/foo/:bar", to: "foo#bar", as: "aaron"
assert_equal 1, @routes.anchored_routes.length
assert_empty @routes.custom_routes
mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/
assert_equal 2, @routes.anchored_routes.length
assert_empty @routes.custom_routes
end
def test_first_name_wins
mapper.get "/hello", to: "foo#bar", as: "aaron"
assert_raise(ArgumentError) do