mirror of
https://github.com/rails/rails.git
synced 2022-11-09 12:12:34 -05:00
actionpack: Improve performance by allowing routes with custom regexes in the FSM.
The FSM used to find matching routes was previously limited to patterns
that contained parameters with the default regexp / no constraints. In
large route sets where many parameters are constrained by custom regexp,
these routes all fall back on a slow linear search over the route list.
These custom regexes were not previously able to be included in the FSM
because it transitioned between nodes using only fragments of the URI,
or path separators [/.?], but a custom regex may cross a path separator
boundary. To work around this, the TransitionTable is improved to
support remembering a point within the matching string that we started,
and continuing to attempt to match from that point up to the end of each
token. Only parameters not on a path separator boundary must still match
with a linear search after this change (e.g. `/foo-:bar/`).
This results in performance for constrainted routes that matches that of
ones using the default regexp.
Benchmark:
https://gist.github.com/theojulienne/e91fc338d180e1710e29c81a5d701fab
Before:
```
Calculating -------------------------------------
without params 6.466k (±12.7%) i/s - 31.648k in 5.009453s
params without constraints
5.867k (±12.9%) i/s - 28.842k in 5.032637s
params with constraints
909.661 (± 7.9%) i/s - 4.536k in 5.023534s
```
After:
```
Calculating -------------------------------------
without params 6.387k (±11.9%) i/s - 31.728k in 5.068939s
params without constraints
5.824k (±13.2%) i/s - 28.650k in 5.043701s
params with constraints
5.406k (±11.7%) i/s - 26.931k in 5.076412s
```
For github.com which has many constrainted parameters, a random sampling
of 10 URL patterns can be matched approximately 2-4x faster than before.
This commit fixes symbols as constrains as tested in
6ab985da28
This commit is contained in:
parent
6ab985da28
commit
16a80882f9
12 changed files with 216 additions and 73 deletions
|
@ -6,13 +6,13 @@ module ActionDispatch
|
|||
module Journey # :nodoc:
|
||||
module GTG # :nodoc:
|
||||
class Builder # :nodoc:
|
||||
DUMMY = Nodes::Dummy.new
|
||||
DUMMY_END_NODE = Nodes::Dummy.new
|
||||
|
||||
attr_reader :root, :ast, :endpoints
|
||||
|
||||
def initialize(root)
|
||||
@root = root
|
||||
@ast = Nodes::Cat.new root, DUMMY
|
||||
@ast = Nodes::Cat.new root, DUMMY_END_NODE
|
||||
@followpos = build_followpos
|
||||
end
|
||||
|
||||
|
@ -28,12 +28,12 @@ module ActionDispatch
|
|||
marked[s] = true # mark s
|
||||
|
||||
s.group_by { |state| symbol(state) }.each do |sym, ps|
|
||||
u = ps.flat_map { |l| @followpos[l] }
|
||||
u = ps.flat_map { |l| @followpos[l] }.uniq
|
||||
next if u.empty?
|
||||
|
||||
from = state_id[s]
|
||||
|
||||
if u.all? { |pos| pos == DUMMY }
|
||||
if u.all? { |pos| pos == DUMMY_END_NODE }
|
||||
to = state_id[Object.new]
|
||||
dtrans[from, to] = sym
|
||||
dtrans.add_accepting(to)
|
||||
|
@ -43,9 +43,9 @@ module ActionDispatch
|
|||
to = state_id[u]
|
||||
dtrans[from, to] = sym
|
||||
|
||||
if u.include?(DUMMY)
|
||||
if u.include?(DUMMY_END_NODE)
|
||||
ps.each do |state|
|
||||
if @followpos[state].include?(DUMMY)
|
||||
if @followpos[state].include?(DUMMY_END_NODE)
|
||||
dtrans.add_memo(to, state.memo)
|
||||
end
|
||||
end
|
||||
|
@ -66,7 +66,10 @@ module ActionDispatch
|
|||
when Nodes::Group
|
||||
true
|
||||
when Nodes::Star
|
||||
true
|
||||
# the default star regex is /(.+)/ which is NOT nullable
|
||||
# but since different constraints can be provided we must
|
||||
# actually check if this is the case or not.
|
||||
node.regexp.match?("")
|
||||
when Nodes::Or
|
||||
node.children.any? { |c| nullable?(c) }
|
||||
when Nodes::Cat
|
||||
|
@ -104,7 +107,7 @@ module ActionDispatch
|
|||
def lastpos(node)
|
||||
case node
|
||||
when Nodes::Star
|
||||
firstpos(node.left)
|
||||
lastpos(node.left)
|
||||
when Nodes::Or
|
||||
node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!)
|
||||
when Nodes::Cat
|
||||
|
@ -131,10 +134,6 @@ module ActionDispatch
|
|||
lastpos(n.left).each do |i|
|
||||
table[i] += firstpos(n.right)
|
||||
end
|
||||
when Nodes::Star
|
||||
lastpos(n).each do |i|
|
||||
table[i] += firstpos(n)
|
||||
end
|
||||
end
|
||||
end
|
||||
table
|
||||
|
|
|
@ -14,7 +14,7 @@ module ActionDispatch
|
|||
end
|
||||
|
||||
class Simulator # :nodoc:
|
||||
INITIAL_STATE = [0].freeze
|
||||
INITIAL_STATE = [ [0, nil] ].freeze
|
||||
|
||||
attr_reader :tt
|
||||
|
||||
|
@ -25,13 +25,19 @@ module ActionDispatch
|
|||
def memos(string)
|
||||
input = StringScanner.new(string)
|
||||
state = INITIAL_STATE
|
||||
start_index = 0
|
||||
|
||||
while sym = input.scan(%r([/.?]|[^/.?]+))
|
||||
state = tt.move(state, sym)
|
||||
end_index = start_index + sym.length
|
||||
|
||||
state = tt.move(state, string, start_index, end_index)
|
||||
|
||||
start_index = end_index
|
||||
end
|
||||
|
||||
acceptance_states = state.each_with_object([]) do |s, memos|
|
||||
memos.concat(tt.memo(s)) if tt.accepting?(s)
|
||||
acceptance_states = state.each_with_object([]) do |s_d, memos|
|
||||
s, idx = s_d
|
||||
memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s)
|
||||
end
|
||||
|
||||
acceptance_states.empty? ? yield : acceptance_states
|
||||
|
|
|
@ -10,7 +10,11 @@ module ActionDispatch
|
|||
|
||||
attr_reader :memos
|
||||
|
||||
DEFAULT_EXP = /[^.\/?]+/
|
||||
DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/)
|
||||
|
||||
def initialize
|
||||
@stdparam_states = {}
|
||||
@regexp_states = {}
|
||||
@string_states = {}
|
||||
@accepting = {}
|
||||
|
@ -41,22 +45,54 @@ module ActionDispatch
|
|||
Array(t)
|
||||
end
|
||||
|
||||
def move(t, a)
|
||||
def move(t, full_string, start_index, end_index)
|
||||
return [] if t.empty?
|
||||
|
||||
regexps = []
|
||||
strings = []
|
||||
next_states = []
|
||||
|
||||
t.each { |s|
|
||||
if states = @regexp_states[s]
|
||||
states.each { |re, v| regexps << v if re.match?(a) && !v.nil? }
|
||||
tok = full_string.slice(start_index, end_index - start_index)
|
||||
token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok)
|
||||
|
||||
t.each { |s, previous_start|
|
||||
if previous_start.nil?
|
||||
# In the simple case of a "default" param regex do this fast-path
|
||||
# and add all next states.
|
||||
if token_matches_default_component && states = @stdparam_states[s]
|
||||
states.each { |re, v| next_states << [v, nil].freeze if !v.nil? }
|
||||
end
|
||||
|
||||
# When we have a literal string, we can just pull the next state
|
||||
if states = @string_states[s]
|
||||
strings << states[a] unless states[a].nil?
|
||||
next_states << [states[tok], nil].freeze unless states[tok].nil?
|
||||
end
|
||||
end
|
||||
|
||||
# For regexes that aren't the "default" style, they may potentially
|
||||
# not be terminated by the first "token" [./?], so we need to continue
|
||||
# to attempt to match this regexp as well as any successful paths that
|
||||
# continue out of it. both paths could be valid.
|
||||
if states = @regexp_states[s]
|
||||
slice_start = if previous_start.nil?
|
||||
start_index
|
||||
else
|
||||
previous_start
|
||||
end
|
||||
|
||||
slice_length = end_index - slice_start
|
||||
curr_slice = full_string.slice(slice_start, slice_length)
|
||||
|
||||
states.each { |re, v|
|
||||
# if we match, we can try moving past this
|
||||
next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice)
|
||||
}
|
||||
|
||||
# and regardless, we must continue accepting tokens and retrying this regexp.
|
||||
# we need to remember where we started as well so we can take bigger slices.
|
||||
next_states << [s, slice_start].freeze
|
||||
end
|
||||
}
|
||||
strings.concat regexps
|
||||
|
||||
next_states
|
||||
end
|
||||
|
||||
def as_json(options = nil)
|
||||
|
@ -71,6 +107,7 @@ module ActionDispatch
|
|||
{
|
||||
regexp_states: simple_regexp,
|
||||
string_states: @string_states,
|
||||
stdparam_states: @stdparam_states,
|
||||
accepting: @accepting
|
||||
}
|
||||
end
|
||||
|
@ -125,18 +162,29 @@ module ActionDispatch
|
|||
|
||||
def []=(from, to, sym)
|
||||
to_mappings = states_hash_for(sym)[from] ||= {}
|
||||
case sym
|
||||
when Regexp
|
||||
# we must match the whole string to a token boundary
|
||||
sym = Regexp.new(/\A#{sym}\Z/)
|
||||
when Symbol
|
||||
# account for symbols in the constraints the same as strings
|
||||
sym = sym.to_s
|
||||
end
|
||||
to_mappings[sym] = to
|
||||
end
|
||||
|
||||
def states
|
||||
ss = @string_states.keys + @string_states.values.flat_map(&:values)
|
||||
ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values)
|
||||
rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values)
|
||||
(ss + rs).uniq
|
||||
(ss + ps + rs).uniq
|
||||
end
|
||||
|
||||
def transitions
|
||||
@string_states.flat_map { |from, hash|
|
||||
hash.map { |s, to| [from, s, to] }
|
||||
} + @stdparam_states.flat_map { |from, hash|
|
||||
hash.map { |s, to| [from, s, to] }
|
||||
} + @regexp_states.flat_map { |from, hash|
|
||||
hash.map { |s, to| [from, s, to] }
|
||||
}
|
||||
|
@ -145,10 +193,14 @@ module ActionDispatch
|
|||
private
|
||||
def states_hash_for(sym)
|
||||
case sym
|
||||
when String
|
||||
when String, Symbol
|
||||
@string_states
|
||||
when Regexp
|
||||
if sym == DEFAULT_EXP
|
||||
@stdparam_states
|
||||
else
|
||||
@regexp_states
|
||||
end
|
||||
else
|
||||
raise ArgumentError, "unknown symbol: %s" % sym.class
|
||||
end
|
||||
|
|
|
@ -104,6 +104,15 @@ module ActionDispatch
|
|||
end
|
||||
|
||||
class Star < Unary # :nodoc:
|
||||
attr_accessor :regexp
|
||||
|
||||
def initialize(left)
|
||||
super(left)
|
||||
|
||||
# By default wildcard routes are non-greedy and must match something.
|
||||
@regexp = /.+?/
|
||||
end
|
||||
|
||||
def star?; true; end
|
||||
def type; :STAR; end
|
||||
|
||||
|
|
|
@ -39,6 +39,27 @@ module ActionDispatch
|
|||
@spec
|
||||
end
|
||||
|
||||
def requirements_anchored?
|
||||
# each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal
|
||||
terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) }
|
||||
|
||||
terminals.each_with_index { |s, index|
|
||||
next if index < 1
|
||||
next unless s.symbol?
|
||||
|
||||
back = terminals[index - 1]
|
||||
fwd = terminals[index + 1]
|
||||
|
||||
# we also don't support this yet, constraints must be regexps
|
||||
return false if s.regexp.is_a?(Array)
|
||||
|
||||
return false if back.literal?
|
||||
return false if !fwd.nil? && fwd.literal?
|
||||
}
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def names
|
||||
@names ||= spec.find_all(&:symbol?).map(&:name)
|
||||
end
|
||||
|
|
|
@ -84,6 +84,8 @@ module ActionDispatch
|
|||
@decorated_ast ||= begin
|
||||
decorated_ast = path.ast
|
||||
decorated_ast.find_all(&:terminal?).each { |n| n.memo = self }
|
||||
# inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string.
|
||||
decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? }
|
||||
decorated_ast
|
||||
end
|
||||
end
|
||||
|
|
|
@ -85,7 +85,7 @@ module ActionDispatch
|
|||
private
|
||||
def partitioned_routes
|
||||
routes.partition { |r|
|
||||
r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? }
|
||||
r.path.anchored && r.path.requirements_anchored?
|
||||
}
|
||||
end
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ module ActionDispatch
|
|||
end
|
||||
|
||||
def partition_route(route)
|
||||
if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?)
|
||||
if route.path.anchored && route.path.requirements_anchored?
|
||||
anchored_routes << route
|
||||
else
|
||||
custom_routes << route
|
||||
|
|
|
@ -68,7 +68,7 @@ function highlight_state(index, color) {
|
|||
}
|
||||
|
||||
function highlight_finish(index) {
|
||||
svg_nodes[index].select('polygon')
|
||||
svg_nodes[index].select('ellipse')
|
||||
.style("fill", "while")
|
||||
.transition().duration(500)
|
||||
.style("fill", "blue");
|
||||
|
@ -77,53 +77,78 @@ function highlight_finish(index) {
|
|||
function match(input) {
|
||||
reset_graph();
|
||||
var table = tt();
|
||||
var states = [0];
|
||||
var states = [[0, null]];
|
||||
var regexp_states = table['regexp_states'];
|
||||
var string_states = table['string_states'];
|
||||
var stdparam_states = table['stdparam_states'];
|
||||
var accepting = table['accepting'];
|
||||
var default_re = new RegExp("^[^.\/?]+$");
|
||||
var start_index = 0;
|
||||
|
||||
highlight_state(0);
|
||||
|
||||
tokenize(input, function(token) {
|
||||
var end_index = start_index + token.length;
|
||||
|
||||
var new_states = [];
|
||||
for(var key in states) {
|
||||
var state = states[key];
|
||||
var state_parts = states[key];
|
||||
var state = state_parts[0];
|
||||
var previous_start = state_parts[1];
|
||||
|
||||
if(previous_start == null) {
|
||||
if(string_states[state] && string_states[state][token]) {
|
||||
var new_state = string_states[state][token];
|
||||
highlight_edge(state, new_state);
|
||||
highlight_state(new_state);
|
||||
new_states.push(new_state);
|
||||
new_states.push([new_state, null]);
|
||||
}
|
||||
|
||||
if(stdparam_states[state] && default_re.test(token)) {
|
||||
for(var key in stdparam_states[state]) {
|
||||
var new_state = stdparam_states[state][key];
|
||||
highlight_edge(state, new_state);
|
||||
highlight_state(new_state);
|
||||
new_states.push([new_state, null]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(regexp_states[state]) {
|
||||
var slice_start = previous_start != null ? previous_start : start_index;
|
||||
|
||||
for(var key in regexp_states[state]) {
|
||||
var re = new RegExp("^" + key + "$");
|
||||
if(re.test(token)) {
|
||||
|
||||
var accumulation = input.slice(slice_start, end_index);
|
||||
|
||||
if(re.test(accumulation)) {
|
||||
var new_state = regexp_states[state][key];
|
||||
highlight_edge(state, new_state);
|
||||
highlight_state(new_state);
|
||||
new_states.push(new_state);
|
||||
new_states.push([new_state, null]);
|
||||
}
|
||||
|
||||
// retry the same regexp with the accumulated data either way
|
||||
new_states.push([state, slice_start]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(new_states.length == 0) {
|
||||
return;
|
||||
}
|
||||
states = new_states;
|
||||
start_index = end_index;
|
||||
});
|
||||
|
||||
for(var key in states) {
|
||||
var state = states[key];
|
||||
var state_parts = states[key];
|
||||
var state = state_parts[0];
|
||||
var slice_start = state_parts[1];
|
||||
|
||||
// we must ignore ones that are still accepting more data
|
||||
if (slice_start != null) continue;
|
||||
|
||||
if(accepting[state]) {
|
||||
for(var mkey in svg_edges[state]) {
|
||||
if(!regexp_states[mkey] && !string_states[mkey]) {
|
||||
highlight_edge(state, mkey);
|
||||
highlight_finish(mkey);
|
||||
}
|
||||
}
|
||||
highlight_finish(state);
|
||||
} else {
|
||||
highlight_state(state, "red");
|
||||
}
|
||||
|
|
|
@ -8,13 +8,13 @@ module ActionDispatch
|
|||
class TestBuilder < ActiveSupport::TestCase
|
||||
def test_following_states_multi
|
||||
table = tt ["a|a"]
|
||||
assert_equal 1, table.move([0], "a").length
|
||||
assert_equal 1, table.move([[0, nil]], "a", 0, 1).length
|
||||
end
|
||||
|
||||
def test_following_states_multi_regexp
|
||||
table = tt [":a|b"]
|
||||
assert_equal 1, table.move([0], "fooo").length
|
||||
assert_equal 2, table.move([0], "b").length
|
||||
assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length
|
||||
assert_equal 2, table.move([[0, nil]], "b", 0, 1).length
|
||||
end
|
||||
|
||||
def test_multi_path
|
||||
|
@ -25,8 +25,8 @@ module ActionDispatch
|
|||
[2, "b"],
|
||||
[2, "/"],
|
||||
[1, "c"],
|
||||
].inject([0]) { |state, (exp, sym)|
|
||||
new = table.move(state, sym)
|
||||
].inject([[0, nil]]) { |state, (exp, sym)|
|
||||
new = table.move(state, sym, 0, sym.length)
|
||||
assert_equal exp, new.length
|
||||
new
|
||||
}
|
||||
|
@ -60,6 +60,23 @@ module ActionDispatch
|
|||
assert_equal 2, memos.length
|
||||
end
|
||||
|
||||
def test_catchall
|
||||
table = tt %w{
|
||||
/
|
||||
/*unmatched_route
|
||||
}
|
||||
|
||||
sim = Simulator.new table
|
||||
|
||||
# matches just the /*unmatched_route
|
||||
memos = sim.memos "/test"
|
||||
assert_equal 1, memos.length
|
||||
|
||||
# matches just the /
|
||||
memos = sim.memos "/"
|
||||
assert_equal 1, memos.length
|
||||
end
|
||||
|
||||
private
|
||||
def ast(strings)
|
||||
parser = Journey::Parser.new
|
||||
|
|
|
@ -89,7 +89,7 @@ module ActionDispatch
|
|||
sim = GTG::Simulator.new builder.transition_table
|
||||
|
||||
memos = sim.memos "/articles/new"
|
||||
assert_equal [paths[1], paths[3]], memos
|
||||
assert_equal Set[paths[1], paths[3]], Set.new(memos)
|
||||
end
|
||||
|
||||
private
|
||||
|
|
|
@ -45,12 +45,24 @@ module ActionDispatch
|
|||
assert_equal 1, @routes.anchored_routes.length
|
||||
assert_empty @routes.custom_routes
|
||||
|
||||
mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/
|
||||
mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false
|
||||
|
||||
assert_equal 1, @routes.custom_routes.length
|
||||
assert_equal 1, @routes.anchored_routes.length
|
||||
end
|
||||
|
||||
def test_custom_anchored_not_partition_route
|
||||
mapper.get "/foo/:bar", to: "foo#bar", as: "aaron"
|
||||
|
||||
assert_equal 1, @routes.anchored_routes.length
|
||||
assert_empty @routes.custom_routes
|
||||
|
||||
mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/
|
||||
|
||||
assert_equal 2, @routes.anchored_routes.length
|
||||
assert_empty @routes.custom_routes
|
||||
end
|
||||
|
||||
def test_first_name_wins
|
||||
mapper.get "/hello", to: "foo#bar", as: "aaron"
|
||||
assert_raise(ArgumentError) do
|
||||
|
|
Loading…
Reference in a new issue