From 6ab985da28fd4e2b5c93b50ed4952f43459f8bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?= Date: Tue, 5 Jan 2021 22:20:21 +0000 Subject: [PATCH] Revert "actionpack: Improve performance by allowing routes with custom regexes in the FSM." This reverts commit c67c764aabb7aaabe4034245481842b0df1480bc. This broken constaints using symbols as values. Test added on this commit. --- .../action_dispatch/journey/gtg/builder.rb | 23 ++--- .../action_dispatch/journey/gtg/simulator.rb | 14 +--- .../journey/gtg/transition_table.rb | 84 ++++--------------- .../lib/action_dispatch/journey/nodes/node.rb | 9 -- .../action_dispatch/journey/path/pattern.rb | 21 ----- .../lib/action_dispatch/journey/route.rb | 2 - .../lib/action_dispatch/journey/router.rb | 2 +- .../lib/action_dispatch/journey/routes.rb | 2 +- .../action_dispatch/journey/visualizer/fsm.js | 73 ++++++---------- actionpack/test/controller/routing_test.rb | 12 +++ actionpack/test/journey/gtg/builder_test.rb | 27 ++---- .../test/journey/gtg/transition_table_test.rb | 2 +- actionpack/test/journey/routes_test.rb | 14 +--- 13 files changed, 79 insertions(+), 206 deletions(-) diff --git a/actionpack/lib/action_dispatch/journey/gtg/builder.rb b/actionpack/lib/action_dispatch/journey/gtg/builder.rb index e83cda55a9..794cfb7de5 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/builder.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/builder.rb @@ -6,13 +6,13 @@ module ActionDispatch module Journey # :nodoc: module GTG # :nodoc: class Builder # :nodoc: - DUMMY_END_NODE = Nodes::Dummy.new + DUMMY = Nodes::Dummy.new attr_reader :root, :ast, :endpoints def initialize(root) @root = root - @ast = Nodes::Cat.new root, DUMMY_END_NODE + @ast = Nodes::Cat.new root, DUMMY @followpos = build_followpos end @@ -28,12 +28,12 @@ module ActionDispatch marked[s] = true # mark s s.group_by { |state| symbol(state) }.each do |sym, ps| - u = ps.flat_map { |l| @followpos[l] }.uniq + u = ps.flat_map { |l| @followpos[l] } next if u.empty? from = state_id[s] - if u.all? { |pos| pos == DUMMY_END_NODE } + if u.all? { |pos| pos == DUMMY } to = state_id[Object.new] dtrans[from, to] = sym dtrans.add_accepting(to) @@ -43,9 +43,9 @@ module ActionDispatch to = state_id[u] dtrans[from, to] = sym - if u.include?(DUMMY_END_NODE) + if u.include?(DUMMY) ps.each do |state| - if @followpos[state].include?(DUMMY_END_NODE) + if @followpos[state].include?(DUMMY) dtrans.add_memo(to, state.memo) end end @@ -66,10 +66,7 @@ module ActionDispatch when Nodes::Group true when Nodes::Star - # the default star regex is /(.+)/ which is NOT nullable - # but since different constraints can be provided we must - # actually check if this is the case or not. - node.regexp.match?("") + true when Nodes::Or node.children.any? { |c| nullable?(c) } when Nodes::Cat @@ -107,7 +104,7 @@ module ActionDispatch def lastpos(node) case node when Nodes::Star - lastpos(node.left) + firstpos(node.left) when Nodes::Or node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!) when Nodes::Cat @@ -134,6 +131,10 @@ module ActionDispatch lastpos(n.left).each do |i| table[i] += firstpos(n.right) end + when Nodes::Star + lastpos(n).each do |i| + table[i] += firstpos(n) + end end end table diff --git a/actionpack/lib/action_dispatch/journey/gtg/simulator.rb b/actionpack/lib/action_dispatch/journey/gtg/simulator.rb index 67aa399c88..6038cff3c1 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/simulator.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/simulator.rb @@ -14,7 +14,7 @@ module ActionDispatch end class Simulator # :nodoc: - INITIAL_STATE = [ [0, nil] ].freeze + INITIAL_STATE = [0].freeze attr_reader :tt @@ -25,19 +25,13 @@ module ActionDispatch def memos(string) input = StringScanner.new(string) state = INITIAL_STATE - start_index = 0 while sym = input.scan(%r([/.?]|[^/.?]+)) - end_index = start_index + sym.length - - state = tt.move(state, string, start_index, end_index) - - start_index = end_index + state = tt.move(state, sym) end - acceptance_states = state.each_with_object([]) do |s_d, memos| - s, idx = s_d - memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s) + acceptance_states = state.each_with_object([]) do |s, memos| + memos.concat(tt.memo(s)) if tt.accepting?(s) end acceptance_states.empty? ? yield : acceptance_states diff --git a/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb b/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb index eca9a9d24e..2db6523e2f 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb @@ -10,15 +10,11 @@ module ActionDispatch attr_reader :memos - DEFAULT_EXP = /[^.\/?]+/ - DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/) - def initialize - @stdparam_states = {} - @regexp_states = {} - @string_states = {} - @accepting = {} - @memos = Hash.new { |h, k| h[k] = [] } + @regexp_states = {} + @string_states = {} + @accepting = {} + @memos = Hash.new { |h, k| h[k] = [] } end def add_accepting(state) @@ -45,54 +41,22 @@ module ActionDispatch Array(t) end - def move(t, full_string, start_index, end_index) + def move(t, a) return [] if t.empty? - next_states = [] + regexps = [] + strings = [] - tok = full_string.slice(start_index, end_index - start_index) - token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok) - - t.each { |s, previous_start| - if previous_start.nil? - # In the simple case of a "default" param regex do this fast-path - # and add all next states. - if token_matches_default_component && states = @stdparam_states[s] - states.each { |re, v| next_states << [v, nil].freeze if !v.nil? } - end - - # When we have a literal string, we can just pull the next state - if states = @string_states[s] - next_states << [states[tok], nil].freeze unless states[tok].nil? - end + t.each { |s| + if states = @regexp_states[s] + states.each { |re, v| regexps << v if re.match?(a) && !v.nil? } end - # For regexes that aren't the "default" style, they may potentially - # not be terminated by the first "token" [./?], so we need to continue - # to attempt to match this regexp as well as any successful paths that - # continue out of it. both paths could be valid. - if states = @regexp_states[s] - slice_start = if previous_start.nil? - start_index - else - previous_start - end - - slice_length = end_index - slice_start - curr_slice = full_string.slice(slice_start, slice_length) - - states.each { |re, v| - # if we match, we can try moving past this - next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice) - } - - # and regardless, we must continue accepting tokens and retrying this regexp. - # we need to remember where we started as well so we can take bigger slices. - next_states << [s, slice_start].freeze + if states = @string_states[s] + strings << states[a] unless states[a].nil? end } - - next_states + strings.concat regexps end def as_json(options = nil) @@ -105,10 +69,9 @@ module ActionDispatch end { - regexp_states: simple_regexp, - string_states: @string_states, - stdparam_states: @stdparam_states, - accepting: @accepting + regexp_states: simple_regexp, + string_states: @string_states, + accepting: @accepting } end @@ -162,25 +125,18 @@ module ActionDispatch def []=(from, to, sym) to_mappings = states_hash_for(sym)[from] ||= {} - if sym.is_a? Regexp - # we must match the whole string to a token boundary - sym = Regexp.new(/\A#{sym}\Z/) - end to_mappings[sym] = to end def states ss = @string_states.keys + @string_states.values.flat_map(&:values) - ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values) rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values) - (ss + ps + rs).uniq + (ss + rs).uniq end def transitions @string_states.flat_map { |from, hash| hash.map { |s, to| [from, s, to] } - } + @stdparam_states.flat_map { |from, hash| - hash.map { |s, to| [from, s, to] } } + @regexp_states.flat_map { |from, hash| hash.map { |s, to| [from, s, to] } } @@ -192,11 +148,7 @@ module ActionDispatch when String @string_states when Regexp - if sym == DEFAULT_EXP - @stdparam_states - else - @regexp_states - end + @regexp_states else raise ArgumentError, "unknown symbol: %s" % sym.class end diff --git a/actionpack/lib/action_dispatch/journey/nodes/node.rb b/actionpack/lib/action_dispatch/journey/nodes/node.rb index ea0a6835e9..bfc9fbfb5e 100644 --- a/actionpack/lib/action_dispatch/journey/nodes/node.rb +++ b/actionpack/lib/action_dispatch/journey/nodes/node.rb @@ -104,15 +104,6 @@ module ActionDispatch end class Star < Unary # :nodoc: - attr_accessor :regexp - - def initialize(left) - super(left) - - # By default wildcard routes are non-greedy and must match something. - @regexp = /.+?/ - end - def star?; true; end def type; :STAR; end diff --git a/actionpack/lib/action_dispatch/journey/path/pattern.rb b/actionpack/lib/action_dispatch/journey/path/pattern.rb index 7d47800932..36b205cee6 100644 --- a/actionpack/lib/action_dispatch/journey/path/pattern.rb +++ b/actionpack/lib/action_dispatch/journey/path/pattern.rb @@ -39,27 +39,6 @@ module ActionDispatch @spec end - def requirements_anchored? - # each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal - terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) } - - terminals.each_with_index { |s, index| - next if index < 1 - next unless s.symbol? - - back = terminals[index - 1] - fwd = terminals[index + 1] - - # we also don't support this yet, constraints must be regexps - return false if s.regexp.is_a?(Array) - - return false if back.literal? - return false if !fwd.nil? && fwd.literal? - } - - true - end - def names @names ||= spec.find_all(&:symbol?).map(&:name) end diff --git a/actionpack/lib/action_dispatch/journey/route.rb b/actionpack/lib/action_dispatch/journey/route.rb index 38dfcea956..21ee766e5c 100644 --- a/actionpack/lib/action_dispatch/journey/route.rb +++ b/actionpack/lib/action_dispatch/journey/route.rb @@ -84,8 +84,6 @@ module ActionDispatch @decorated_ast ||= begin decorated_ast = path.ast decorated_ast.find_all(&:terminal?).each { |n| n.memo = self } - # inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string. - decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? } decorated_ast end end diff --git a/actionpack/lib/action_dispatch/journey/router.rb b/actionpack/lib/action_dispatch/journey/router.rb index 27671b65da..b6b71720db 100644 --- a/actionpack/lib/action_dispatch/journey/router.rb +++ b/actionpack/lib/action_dispatch/journey/router.rb @@ -85,7 +85,7 @@ module ActionDispatch private def partitioned_routes routes.partition { |r| - r.path.anchored && r.path.requirements_anchored? + r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? } } end diff --git a/actionpack/lib/action_dispatch/journey/routes.rb b/actionpack/lib/action_dispatch/journey/routes.rb index 9dfea744e8..3f055db66d 100644 --- a/actionpack/lib/action_dispatch/journey/routes.rb +++ b/actionpack/lib/action_dispatch/journey/routes.rb @@ -41,7 +41,7 @@ module ActionDispatch end def partition_route(route) - if route.path.anchored && route.path.requirements_anchored? + if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?) anchored_routes << route else custom_routes << route diff --git a/actionpack/lib/action_dispatch/journey/visualizer/fsm.js b/actionpack/lib/action_dispatch/journey/visualizer/fsm.js index 6daf0cb1c1..d9bcaef928 100644 --- a/actionpack/lib/action_dispatch/journey/visualizer/fsm.js +++ b/actionpack/lib/action_dispatch/journey/visualizer/fsm.js @@ -68,7 +68,7 @@ function highlight_state(index, color) { } function highlight_finish(index) { - svg_nodes[index].select('ellipse') + svg_nodes[index].select('polygon') .style("fill", "while") .transition().duration(500) .style("fill", "blue"); @@ -76,79 +76,54 @@ function highlight_finish(index) { function match(input) { reset_graph(); - var table = tt(); - var states = [[0, null]]; - var regexp_states = table['regexp_states']; - var string_states = table['string_states']; - var stdparam_states = table['stdparam_states']; - var accepting = table['accepting']; - var default_re = new RegExp("^[^.\/?]+$"); - var start_index = 0; + var table = tt(); + var states = [0]; + var regexp_states = table['regexp_states']; + var string_states = table['string_states']; + var accepting = table['accepting']; highlight_state(0); tokenize(input, function(token) { - var end_index = start_index + token.length; - var new_states = []; for(var key in states) { - var state_parts = states[key]; - var state = state_parts[0]; - var previous_start = state_parts[1]; + var state = states[key]; - if(previous_start == null) { - if(string_states[state] && string_states[state][token]) { - var new_state = string_states[state][token]; - highlight_edge(state, new_state); - highlight_state(new_state); - new_states.push([new_state, null]); - } - - if(stdparam_states[state] && default_re.test(token)) { - for(var key in stdparam_states[state]) { - var new_state = stdparam_states[state][key]; - highlight_edge(state, new_state); - highlight_state(new_state); - new_states.push([new_state, null]); - } - } + if(string_states[state] && string_states[state][token]) { + var new_state = string_states[state][token]; + highlight_edge(state, new_state); + highlight_state(new_state); + new_states.push(new_state); } if(regexp_states[state]) { - var slice_start = previous_start != null ? previous_start : start_index; - for(var key in regexp_states[state]) { var re = new RegExp("^" + key + "$"); - - var accumulation = input.slice(slice_start, end_index); - - if(re.test(accumulation)) { + if(re.test(token)) { var new_state = regexp_states[state][key]; highlight_edge(state, new_state); highlight_state(new_state); - new_states.push([new_state, null]); + new_states.push(new_state); } - - // retry the same regexp with the accumulated data either way - new_states.push([state, slice_start]); } } } + if(new_states.length == 0) { + return; + } states = new_states; - start_index = end_index; }); for(var key in states) { - var state_parts = states[key]; - var state = state_parts[0]; - var slice_start = state_parts[1]; - - // we must ignore ones that are still accepting more data - if (slice_start != null) continue; - + var state = states[key]; if(accepting[state]) { - highlight_finish(state); + for(var mkey in svg_edges[state]) { + if(!regexp_states[mkey] && !string_states[mkey]) { + highlight_edge(state, mkey); + highlight_finish(mkey); + } + } } else { highlight_state(state, "red"); } diff --git a/actionpack/test/controller/routing_test.rb b/actionpack/test/controller/routing_test.rb index 7ab249b907..3da7ab2799 100644 --- a/actionpack/test/controller/routing_test.rb +++ b/actionpack/test/controller/routing_test.rb @@ -244,6 +244,18 @@ class LegacyRouteSetTests < ActiveSupport::TestCase assert_equal "clients", get(URI("http://clients.example.org/")) end + def test_format_symbol_constraints + rs.draw do + get "/api", constraints: { format: :json }, + to: lambda { |env| [200, {}, %w{json}] } + get "/api", constraints: { format: :xml }, + to: lambda { |env| [200, {}, %w{xml}] } + end + + assert_equal "json", get(URI("http://www.example.org/api.json")) + assert_equal "xml", get(URI("http://clients.example.org/api.xml")) + end + def test_lambda_constraints rs.draw do get "/", constraints: lambda { |req| diff --git a/actionpack/test/journey/gtg/builder_test.rb b/actionpack/test/journey/gtg/builder_test.rb index 719c3c9959..9dc786e261 100644 --- a/actionpack/test/journey/gtg/builder_test.rb +++ b/actionpack/test/journey/gtg/builder_test.rb @@ -8,13 +8,13 @@ module ActionDispatch class TestBuilder < ActiveSupport::TestCase def test_following_states_multi table = tt ["a|a"] - assert_equal 1, table.move([[0, nil]], "a", 0, 1).length + assert_equal 1, table.move([0], "a").length end def test_following_states_multi_regexp table = tt [":a|b"] - assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length - assert_equal 2, table.move([[0, nil]], "b", 0, 1).length + assert_equal 1, table.move([0], "fooo").length + assert_equal 2, table.move([0], "b").length end def test_multi_path @@ -25,8 +25,8 @@ module ActionDispatch [2, "b"], [2, "/"], [1, "c"], - ].inject([[0, nil]]) { |state, (exp, sym)| - new = table.move(state, sym, 0, sym.length) + ].inject([0]) { |state, (exp, sym)| + new = table.move(state, sym) assert_equal exp, new.length new } @@ -60,23 +60,6 @@ module ActionDispatch assert_equal 2, memos.length end - def test_catchall - table = tt %w{ - / - /*unmatched_route - } - - sim = Simulator.new table - - # matches just the /*unmatched_route - memos = sim.memos "/test" - assert_equal 1, memos.length - - # matches just the / - memos = sim.memos "/" - assert_equal 1, memos.length - end - private def ast(strings) parser = Journey::Parser.new diff --git a/actionpack/test/journey/gtg/transition_table_test.rb b/actionpack/test/journey/gtg/transition_table_test.rb index 7394764a4c..9044934f05 100644 --- a/actionpack/test/journey/gtg/transition_table_test.rb +++ b/actionpack/test/journey/gtg/transition_table_test.rb @@ -89,7 +89,7 @@ module ActionDispatch sim = GTG::Simulator.new builder.transition_table memos = sim.memos "/articles/new" - assert_equal Set[paths[1], paths[3]], Set.new(memos) + assert_equal [paths[1], paths[3]], memos end private diff --git a/actionpack/test/journey/routes_test.rb b/actionpack/test/journey/routes_test.rb index 1b23d72849..d5c81a8421 100644 --- a/actionpack/test/journey/routes_test.rb +++ b/actionpack/test/journey/routes_test.rb @@ -45,24 +45,12 @@ module ActionDispatch assert_equal 1, @routes.anchored_routes.length assert_empty @routes.custom_routes - mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false + mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/ assert_equal 1, @routes.custom_routes.length assert_equal 1, @routes.anchored_routes.length end - def test_custom_anchored_not_partition_route - mapper.get "/foo/:bar", to: "foo#bar", as: "aaron" - - assert_equal 1, @routes.anchored_routes.length - assert_empty @routes.custom_routes - - mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/ - - assert_equal 2, @routes.anchored_routes.length - assert_empty @routes.custom_routes - end - def test_first_name_wins mapper.get "/hello", to: "foo#bar", as: "aaron" assert_raise(ArgumentError) do