1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00

actionpack: Improve performance by allowing routes with custom regexes in the FSM.

The FSM used to find matching routes was previously limited to patterns
that contained parameters with the default regexp / no constraints. In
large route sets where many parameters are constrained by custom regexp,
these routes all fall back on a slow linear search over the route list.

These custom regexes were not previously able to be included in the FSM
because it transitioned between nodes using only fragments of the URI,
or path separators [/.?], but a custom regex may cross a path separator
boundary. To work around this, the TransitionTable is improved to
support remembering a point within the matching string that we started,
and continuing to attempt to match from that point up to the end of each
token. Only parameters not on a path separator boundary must still match
with a linear search after this change (e.g. `/foo-:bar/`).

This results in performance for constrainted routes that matches that of
ones using the default regexp.

Benchmark:
https://gist.github.com/theojulienne/e91fc338d180e1710e29c81a5d701fab

Before:
```
Calculating -------------------------------------
    without params      6.466k (±12.7%) i/s -     31.648k in   5.009453s
params without constraints
                        5.867k (±12.9%) i/s -     28.842k in   5.032637s
params with constraints
                      909.661  (± 7.9%) i/s -      4.536k in   5.023534s
```

After:
```
Calculating -------------------------------------
    without params      6.387k (±11.9%) i/s -     31.728k in   5.068939s
params without constraints
                        5.824k (±13.2%) i/s -     28.650k in   5.043701s
params with constraints
                        5.406k (±11.7%) i/s -     26.931k in   5.076412s
```

For github.com which has many constrainted parameters, a random sampling
of 10 URL patterns can be matched approximately 2-4x faster than before.

This commit fixes symbols as constrains as tested in
6ab985da28
This commit is contained in:
Theo Julienne 2020-12-16 08:57:10 +11:00
parent 6ab985da28
commit 16a80882f9
12 changed files with 216 additions and 73 deletions

View file

@ -6,13 +6,13 @@ module ActionDispatch
module Journey # :nodoc: module Journey # :nodoc:
module GTG # :nodoc: module GTG # :nodoc:
class Builder # :nodoc: class Builder # :nodoc:
DUMMY = Nodes::Dummy.new DUMMY_END_NODE = Nodes::Dummy.new
attr_reader :root, :ast, :endpoints attr_reader :root, :ast, :endpoints
def initialize(root) def initialize(root)
@root = root @root = root
@ast = Nodes::Cat.new root, DUMMY @ast = Nodes::Cat.new root, DUMMY_END_NODE
@followpos = build_followpos @followpos = build_followpos
end end
@ -28,12 +28,12 @@ module ActionDispatch
marked[s] = true # mark s marked[s] = true # mark s
s.group_by { |state| symbol(state) }.each do |sym, ps| s.group_by { |state| symbol(state) }.each do |sym, ps|
u = ps.flat_map { |l| @followpos[l] } u = ps.flat_map { |l| @followpos[l] }.uniq
next if u.empty? next if u.empty?
from = state_id[s] from = state_id[s]
if u.all? { |pos| pos == DUMMY } if u.all? { |pos| pos == DUMMY_END_NODE }
to = state_id[Object.new] to = state_id[Object.new]
dtrans[from, to] = sym dtrans[from, to] = sym
dtrans.add_accepting(to) dtrans.add_accepting(to)
@ -43,9 +43,9 @@ module ActionDispatch
to = state_id[u] to = state_id[u]
dtrans[from, to] = sym dtrans[from, to] = sym
if u.include?(DUMMY) if u.include?(DUMMY_END_NODE)
ps.each do |state| ps.each do |state|
if @followpos[state].include?(DUMMY) if @followpos[state].include?(DUMMY_END_NODE)
dtrans.add_memo(to, state.memo) dtrans.add_memo(to, state.memo)
end end
end end
@ -66,7 +66,10 @@ module ActionDispatch
when Nodes::Group when Nodes::Group
true true
when Nodes::Star when Nodes::Star
true # the default star regex is /(.+)/ which is NOT nullable
# but since different constraints can be provided we must
# actually check if this is the case or not.
node.regexp.match?("")
when Nodes::Or when Nodes::Or
node.children.any? { |c| nullable?(c) } node.children.any? { |c| nullable?(c) }
when Nodes::Cat when Nodes::Cat
@ -104,7 +107,7 @@ module ActionDispatch
def lastpos(node) def lastpos(node)
case node case node
when Nodes::Star when Nodes::Star
firstpos(node.left) lastpos(node.left)
when Nodes::Or when Nodes::Or
node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!) node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!)
when Nodes::Cat when Nodes::Cat
@ -131,10 +134,6 @@ module ActionDispatch
lastpos(n.left).each do |i| lastpos(n.left).each do |i|
table[i] += firstpos(n.right) table[i] += firstpos(n.right)
end end
when Nodes::Star
lastpos(n).each do |i|
table[i] += firstpos(n)
end
end end
end end
table table

View file

@ -14,7 +14,7 @@ module ActionDispatch
end end
class Simulator # :nodoc: class Simulator # :nodoc:
INITIAL_STATE = [0].freeze INITIAL_STATE = [ [0, nil] ].freeze
attr_reader :tt attr_reader :tt
@ -25,13 +25,19 @@ module ActionDispatch
def memos(string) def memos(string)
input = StringScanner.new(string) input = StringScanner.new(string)
state = INITIAL_STATE state = INITIAL_STATE
start_index = 0
while sym = input.scan(%r([/.?]|[^/.?]+)) while sym = input.scan(%r([/.?]|[^/.?]+))
state = tt.move(state, sym) end_index = start_index + sym.length
state = tt.move(state, string, start_index, end_index)
start_index = end_index
end end
acceptance_states = state.each_with_object([]) do |s, memos| acceptance_states = state.each_with_object([]) do |s_d, memos|
memos.concat(tt.memo(s)) if tt.accepting?(s) s, idx = s_d
memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s)
end end
acceptance_states.empty? ? yield : acceptance_states acceptance_states.empty? ? yield : acceptance_states

View file

@ -10,11 +10,15 @@ module ActionDispatch
attr_reader :memos attr_reader :memos
DEFAULT_EXP = /[^.\/?]+/
DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/)
def initialize def initialize
@regexp_states = {} @stdparam_states = {}
@string_states = {} @regexp_states = {}
@accepting = {} @string_states = {}
@memos = Hash.new { |h, k| h[k] = [] } @accepting = {}
@memos = Hash.new { |h, k| h[k] = [] }
end end
def add_accepting(state) def add_accepting(state)
@ -41,22 +45,54 @@ module ActionDispatch
Array(t) Array(t)
end end
def move(t, a) def move(t, full_string, start_index, end_index)
return [] if t.empty? return [] if t.empty?
regexps = [] next_states = []
strings = []
t.each { |s| tok = full_string.slice(start_index, end_index - start_index)
if states = @regexp_states[s] token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok)
states.each { |re, v| regexps << v if re.match?(a) && !v.nil? }
t.each { |s, previous_start|
if previous_start.nil?
# In the simple case of a "default" param regex do this fast-path
# and add all next states.
if token_matches_default_component && states = @stdparam_states[s]
states.each { |re, v| next_states << [v, nil].freeze if !v.nil? }
end
# When we have a literal string, we can just pull the next state
if states = @string_states[s]
next_states << [states[tok], nil].freeze unless states[tok].nil?
end
end end
if states = @string_states[s] # For regexes that aren't the "default" style, they may potentially
strings << states[a] unless states[a].nil? # not be terminated by the first "token" [./?], so we need to continue
# to attempt to match this regexp as well as any successful paths that
# continue out of it. both paths could be valid.
if states = @regexp_states[s]
slice_start = if previous_start.nil?
start_index
else
previous_start
end
slice_length = end_index - slice_start
curr_slice = full_string.slice(slice_start, slice_length)
states.each { |re, v|
# if we match, we can try moving past this
next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice)
}
# and regardless, we must continue accepting tokens and retrying this regexp.
# we need to remember where we started as well so we can take bigger slices.
next_states << [s, slice_start].freeze
end end
} }
strings.concat regexps
next_states
end end
def as_json(options = nil) def as_json(options = nil)
@ -69,9 +105,10 @@ module ActionDispatch
end end
{ {
regexp_states: simple_regexp, regexp_states: simple_regexp,
string_states: @string_states, string_states: @string_states,
accepting: @accepting stdparam_states: @stdparam_states,
accepting: @accepting
} }
end end
@ -125,18 +162,29 @@ module ActionDispatch
def []=(from, to, sym) def []=(from, to, sym)
to_mappings = states_hash_for(sym)[from] ||= {} to_mappings = states_hash_for(sym)[from] ||= {}
case sym
when Regexp
# we must match the whole string to a token boundary
sym = Regexp.new(/\A#{sym}\Z/)
when Symbol
# account for symbols in the constraints the same as strings
sym = sym.to_s
end
to_mappings[sym] = to to_mappings[sym] = to
end end
def states def states
ss = @string_states.keys + @string_states.values.flat_map(&:values) ss = @string_states.keys + @string_states.values.flat_map(&:values)
ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values)
rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values) rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values)
(ss + rs).uniq (ss + ps + rs).uniq
end end
def transitions def transitions
@string_states.flat_map { |from, hash| @string_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] } hash.map { |s, to| [from, s, to] }
} + @stdparam_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
} + @regexp_states.flat_map { |from, hash| } + @regexp_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] } hash.map { |s, to| [from, s, to] }
} }
@ -145,10 +193,14 @@ module ActionDispatch
private private
def states_hash_for(sym) def states_hash_for(sym)
case sym case sym
when String when String, Symbol
@string_states @string_states
when Regexp when Regexp
@regexp_states if sym == DEFAULT_EXP
@stdparam_states
else
@regexp_states
end
else else
raise ArgumentError, "unknown symbol: %s" % sym.class raise ArgumentError, "unknown symbol: %s" % sym.class
end end

View file

@ -104,6 +104,15 @@ module ActionDispatch
end end
class Star < Unary # :nodoc: class Star < Unary # :nodoc:
attr_accessor :regexp
def initialize(left)
super(left)
# By default wildcard routes are non-greedy and must match something.
@regexp = /.+?/
end
def star?; true; end def star?; true; end
def type; :STAR; end def type; :STAR; end

View file

@ -39,6 +39,27 @@ module ActionDispatch
@spec @spec
end end
def requirements_anchored?
# each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal
terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) }
terminals.each_with_index { |s, index|
next if index < 1
next unless s.symbol?
back = terminals[index - 1]
fwd = terminals[index + 1]
# we also don't support this yet, constraints must be regexps
return false if s.regexp.is_a?(Array)
return false if back.literal?
return false if !fwd.nil? && fwd.literal?
}
true
end
def names def names
@names ||= spec.find_all(&:symbol?).map(&:name) @names ||= spec.find_all(&:symbol?).map(&:name)
end end

View file

@ -84,6 +84,8 @@ module ActionDispatch
@decorated_ast ||= begin @decorated_ast ||= begin
decorated_ast = path.ast decorated_ast = path.ast
decorated_ast.find_all(&:terminal?).each { |n| n.memo = self } decorated_ast.find_all(&:terminal?).each { |n| n.memo = self }
# inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string.
decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? }
decorated_ast decorated_ast
end end
end end

View file

@ -85,7 +85,7 @@ module ActionDispatch
private private
def partitioned_routes def partitioned_routes
routes.partition { |r| routes.partition { |r|
r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? } r.path.anchored && r.path.requirements_anchored?
} }
end end

View file

@ -41,7 +41,7 @@ module ActionDispatch
end end
def partition_route(route) def partition_route(route)
if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?) if route.path.anchored && route.path.requirements_anchored?
anchored_routes << route anchored_routes << route
else else
custom_routes << route custom_routes << route

View file

@ -68,7 +68,7 @@ function highlight_state(index, color) {
} }
function highlight_finish(index) { function highlight_finish(index) {
svg_nodes[index].select('polygon') svg_nodes[index].select('ellipse')
.style("fill", "while") .style("fill", "while")
.transition().duration(500) .transition().duration(500)
.style("fill", "blue"); .style("fill", "blue");
@ -76,54 +76,79 @@ function highlight_finish(index) {
function match(input) { function match(input) {
reset_graph(); reset_graph();
var table = tt(); var table = tt();
var states = [0]; var states = [[0, null]];
var regexp_states = table['regexp_states']; var regexp_states = table['regexp_states'];
var string_states = table['string_states']; var string_states = table['string_states'];
var accepting = table['accepting']; var stdparam_states = table['stdparam_states'];
var accepting = table['accepting'];
var default_re = new RegExp("^[^.\/?]+$");
var start_index = 0;
highlight_state(0); highlight_state(0);
tokenize(input, function(token) { tokenize(input, function(token) {
var end_index = start_index + token.length;
var new_states = []; var new_states = [];
for(var key in states) { for(var key in states) {
var state = states[key]; var state_parts = states[key];
var state = state_parts[0];
var previous_start = state_parts[1];
if(string_states[state] && string_states[state][token]) { if(previous_start == null) {
var new_state = string_states[state][token]; if(string_states[state] && string_states[state][token]) {
highlight_edge(state, new_state); var new_state = string_states[state][token];
highlight_state(new_state); highlight_edge(state, new_state);
new_states.push(new_state); highlight_state(new_state);
} new_states.push([new_state, null]);
}
if(regexp_states[state]) { if(stdparam_states[state] && default_re.test(token)) {
for(var key in regexp_states[state]) { for(var key in stdparam_states[state]) {
var re = new RegExp("^" + key + "$"); var new_state = stdparam_states[state][key];
if(re.test(token)) {
var new_state = regexp_states[state][key];
highlight_edge(state, new_state); highlight_edge(state, new_state);
highlight_state(new_state); highlight_state(new_state);
new_states.push(new_state); new_states.push([new_state, null]);
} }
} }
} }
if(regexp_states[state]) {
var slice_start = previous_start != null ? previous_start : start_index;
for(var key in regexp_states[state]) {
var re = new RegExp("^" + key + "$");
var accumulation = input.slice(slice_start, end_index);
if(re.test(accumulation)) {
var new_state = regexp_states[state][key];
highlight_edge(state, new_state);
highlight_state(new_state);
new_states.push([new_state, null]);
}
// retry the same regexp with the accumulated data either way
new_states.push([state, slice_start]);
}
}
} }
if(new_states.length == 0) {
return;
}
states = new_states; states = new_states;
start_index = end_index;
}); });
for(var key in states) { for(var key in states) {
var state = states[key]; var state_parts = states[key];
var state = state_parts[0];
var slice_start = state_parts[1];
// we must ignore ones that are still accepting more data
if (slice_start != null) continue;
if(accepting[state]) { if(accepting[state]) {
for(var mkey in svg_edges[state]) { highlight_finish(state);
if(!regexp_states[mkey] && !string_states[mkey]) {
highlight_edge(state, mkey);
highlight_finish(mkey);
}
}
} else { } else {
highlight_state(state, "red"); highlight_state(state, "red");
} }

View file

@ -8,13 +8,13 @@ module ActionDispatch
class TestBuilder < ActiveSupport::TestCase class TestBuilder < ActiveSupport::TestCase
def test_following_states_multi def test_following_states_multi
table = tt ["a|a"] table = tt ["a|a"]
assert_equal 1, table.move([0], "a").length assert_equal 1, table.move([[0, nil]], "a", 0, 1).length
end end
def test_following_states_multi_regexp def test_following_states_multi_regexp
table = tt [":a|b"] table = tt [":a|b"]
assert_equal 1, table.move([0], "fooo").length assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length
assert_equal 2, table.move([0], "b").length assert_equal 2, table.move([[0, nil]], "b", 0, 1).length
end end
def test_multi_path def test_multi_path
@ -25,8 +25,8 @@ module ActionDispatch
[2, "b"], [2, "b"],
[2, "/"], [2, "/"],
[1, "c"], [1, "c"],
].inject([0]) { |state, (exp, sym)| ].inject([[0, nil]]) { |state, (exp, sym)|
new = table.move(state, sym) new = table.move(state, sym, 0, sym.length)
assert_equal exp, new.length assert_equal exp, new.length
new new
} }
@ -60,6 +60,23 @@ module ActionDispatch
assert_equal 2, memos.length assert_equal 2, memos.length
end end
def test_catchall
table = tt %w{
/
/*unmatched_route
}
sim = Simulator.new table
# matches just the /*unmatched_route
memos = sim.memos "/test"
assert_equal 1, memos.length
# matches just the /
memos = sim.memos "/"
assert_equal 1, memos.length
end
private private
def ast(strings) def ast(strings)
parser = Journey::Parser.new parser = Journey::Parser.new

View file

@ -89,7 +89,7 @@ module ActionDispatch
sim = GTG::Simulator.new builder.transition_table sim = GTG::Simulator.new builder.transition_table
memos = sim.memos "/articles/new" memos = sim.memos "/articles/new"
assert_equal [paths[1], paths[3]], memos assert_equal Set[paths[1], paths[3]], Set.new(memos)
end end
private private

View file

@ -45,12 +45,24 @@ module ActionDispatch
assert_equal 1, @routes.anchored_routes.length assert_equal 1, @routes.anchored_routes.length
assert_empty @routes.custom_routes assert_empty @routes.custom_routes
mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/ mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false
assert_equal 1, @routes.custom_routes.length assert_equal 1, @routes.custom_routes.length
assert_equal 1, @routes.anchored_routes.length assert_equal 1, @routes.anchored_routes.length
end end
def test_custom_anchored_not_partition_route
mapper.get "/foo/:bar", to: "foo#bar", as: "aaron"
assert_equal 1, @routes.anchored_routes.length
assert_empty @routes.custom_routes
mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/
assert_equal 2, @routes.anchored_routes.length
assert_empty @routes.custom_routes
end
def test_first_name_wins def test_first_name_wins
mapper.get "/hello", to: "foo#bar", as: "aaron" mapper.get "/hello", to: "foo#bar", as: "aaron"
assert_raise(ArgumentError) do assert_raise(ArgumentError) do