diff --git a/actionpack/lib/action_dispatch/journey/gtg/builder.rb b/actionpack/lib/action_dispatch/journey/gtg/builder.rb index 794cfb7de5..e83cda55a9 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/builder.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/builder.rb @@ -6,13 +6,13 @@ module ActionDispatch module Journey # :nodoc: module GTG # :nodoc: class Builder # :nodoc: - DUMMY = Nodes::Dummy.new + DUMMY_END_NODE = Nodes::Dummy.new attr_reader :root, :ast, :endpoints def initialize(root) @root = root - @ast = Nodes::Cat.new root, DUMMY + @ast = Nodes::Cat.new root, DUMMY_END_NODE @followpos = build_followpos end @@ -28,12 +28,12 @@ module ActionDispatch marked[s] = true # mark s s.group_by { |state| symbol(state) }.each do |sym, ps| - u = ps.flat_map { |l| @followpos[l] } + u = ps.flat_map { |l| @followpos[l] }.uniq next if u.empty? from = state_id[s] - if u.all? { |pos| pos == DUMMY } + if u.all? { |pos| pos == DUMMY_END_NODE } to = state_id[Object.new] dtrans[from, to] = sym dtrans.add_accepting(to) @@ -43,9 +43,9 @@ module ActionDispatch to = state_id[u] dtrans[from, to] = sym - if u.include?(DUMMY) + if u.include?(DUMMY_END_NODE) ps.each do |state| - if @followpos[state].include?(DUMMY) + if @followpos[state].include?(DUMMY_END_NODE) dtrans.add_memo(to, state.memo) end end @@ -66,7 +66,10 @@ module ActionDispatch when Nodes::Group true when Nodes::Star - true + # the default star regex is /(.+)/ which is NOT nullable + # but since different constraints can be provided we must + # actually check if this is the case or not. + node.regexp.match?("") when Nodes::Or node.children.any? { |c| nullable?(c) } when Nodes::Cat @@ -104,7 +107,7 @@ module ActionDispatch def lastpos(node) case node when Nodes::Star - firstpos(node.left) + lastpos(node.left) when Nodes::Or node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!) when Nodes::Cat @@ -131,10 +134,6 @@ module ActionDispatch lastpos(n.left).each do |i| table[i] += firstpos(n.right) end - when Nodes::Star - lastpos(n).each do |i| - table[i] += firstpos(n) - end end end table diff --git a/actionpack/lib/action_dispatch/journey/gtg/simulator.rb b/actionpack/lib/action_dispatch/journey/gtg/simulator.rb index 6038cff3c1..67aa399c88 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/simulator.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/simulator.rb @@ -14,7 +14,7 @@ module ActionDispatch end class Simulator # :nodoc: - INITIAL_STATE = [0].freeze + INITIAL_STATE = [ [0, nil] ].freeze attr_reader :tt @@ -25,13 +25,19 @@ module ActionDispatch def memos(string) input = StringScanner.new(string) state = INITIAL_STATE + start_index = 0 while sym = input.scan(%r([/.?]|[^/.?]+)) - state = tt.move(state, sym) + end_index = start_index + sym.length + + state = tt.move(state, string, start_index, end_index) + + start_index = end_index end - acceptance_states = state.each_with_object([]) do |s, memos| - memos.concat(tt.memo(s)) if tt.accepting?(s) + acceptance_states = state.each_with_object([]) do |s_d, memos| + s, idx = s_d + memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s) end acceptance_states.empty? ? yield : acceptance_states diff --git a/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb b/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb index 2db6523e2f..f3c93d5340 100644 --- a/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb +++ b/actionpack/lib/action_dispatch/journey/gtg/transition_table.rb @@ -10,11 +10,15 @@ module ActionDispatch attr_reader :memos + DEFAULT_EXP = /[^.\/?]+/ + DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/) + def initialize - @regexp_states = {} - @string_states = {} - @accepting = {} - @memos = Hash.new { |h, k| h[k] = [] } + @stdparam_states = {} + @regexp_states = {} + @string_states = {} + @accepting = {} + @memos = Hash.new { |h, k| h[k] = [] } end def add_accepting(state) @@ -41,22 +45,54 @@ module ActionDispatch Array(t) end - def move(t, a) + def move(t, full_string, start_index, end_index) return [] if t.empty? - regexps = [] - strings = [] + next_states = [] - t.each { |s| - if states = @regexp_states[s] - states.each { |re, v| regexps << v if re.match?(a) && !v.nil? } + tok = full_string.slice(start_index, end_index - start_index) + token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok) + + t.each { |s, previous_start| + if previous_start.nil? + # In the simple case of a "default" param regex do this fast-path + # and add all next states. + if token_matches_default_component && states = @stdparam_states[s] + states.each { |re, v| next_states << [v, nil].freeze if !v.nil? } + end + + # When we have a literal string, we can just pull the next state + if states = @string_states[s] + next_states << [states[tok], nil].freeze unless states[tok].nil? + end end - if states = @string_states[s] - strings << states[a] unless states[a].nil? + # For regexes that aren't the "default" style, they may potentially + # not be terminated by the first "token" [./?], so we need to continue + # to attempt to match this regexp as well as any successful paths that + # continue out of it. both paths could be valid. + if states = @regexp_states[s] + slice_start = if previous_start.nil? + start_index + else + previous_start + end + + slice_length = end_index - slice_start + curr_slice = full_string.slice(slice_start, slice_length) + + states.each { |re, v| + # if we match, we can try moving past this + next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice) + } + + # and regardless, we must continue accepting tokens and retrying this regexp. + # we need to remember where we started as well so we can take bigger slices. + next_states << [s, slice_start].freeze end } - strings.concat regexps + + next_states end def as_json(options = nil) @@ -69,9 +105,10 @@ module ActionDispatch end { - regexp_states: simple_regexp, - string_states: @string_states, - accepting: @accepting + regexp_states: simple_regexp, + string_states: @string_states, + stdparam_states: @stdparam_states, + accepting: @accepting } end @@ -125,18 +162,29 @@ module ActionDispatch def []=(from, to, sym) to_mappings = states_hash_for(sym)[from] ||= {} + case sym + when Regexp + # we must match the whole string to a token boundary + sym = Regexp.new(/\A#{sym}\Z/) + when Symbol + # account for symbols in the constraints the same as strings + sym = sym.to_s + end to_mappings[sym] = to end def states ss = @string_states.keys + @string_states.values.flat_map(&:values) + ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values) rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values) - (ss + rs).uniq + (ss + ps + rs).uniq end def transitions @string_states.flat_map { |from, hash| hash.map { |s, to| [from, s, to] } + } + @stdparam_states.flat_map { |from, hash| + hash.map { |s, to| [from, s, to] } } + @regexp_states.flat_map { |from, hash| hash.map { |s, to| [from, s, to] } } @@ -145,10 +193,14 @@ module ActionDispatch private def states_hash_for(sym) case sym - when String + when String, Symbol @string_states when Regexp - @regexp_states + if sym == DEFAULT_EXP + @stdparam_states + else + @regexp_states + end else raise ArgumentError, "unknown symbol: %s" % sym.class end diff --git a/actionpack/lib/action_dispatch/journey/nodes/node.rb b/actionpack/lib/action_dispatch/journey/nodes/node.rb index bfc9fbfb5e..ea0a6835e9 100644 --- a/actionpack/lib/action_dispatch/journey/nodes/node.rb +++ b/actionpack/lib/action_dispatch/journey/nodes/node.rb @@ -104,6 +104,15 @@ module ActionDispatch end class Star < Unary # :nodoc: + attr_accessor :regexp + + def initialize(left) + super(left) + + # By default wildcard routes are non-greedy and must match something. + @regexp = /.+?/ + end + def star?; true; end def type; :STAR; end diff --git a/actionpack/lib/action_dispatch/journey/path/pattern.rb b/actionpack/lib/action_dispatch/journey/path/pattern.rb index 36b205cee6..7d47800932 100644 --- a/actionpack/lib/action_dispatch/journey/path/pattern.rb +++ b/actionpack/lib/action_dispatch/journey/path/pattern.rb @@ -39,6 +39,27 @@ module ActionDispatch @spec end + def requirements_anchored? + # each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal + terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) } + + terminals.each_with_index { |s, index| + next if index < 1 + next unless s.symbol? + + back = terminals[index - 1] + fwd = terminals[index + 1] + + # we also don't support this yet, constraints must be regexps + return false if s.regexp.is_a?(Array) + + return false if back.literal? + return false if !fwd.nil? && fwd.literal? + } + + true + end + def names @names ||= spec.find_all(&:symbol?).map(&:name) end diff --git a/actionpack/lib/action_dispatch/journey/route.rb b/actionpack/lib/action_dispatch/journey/route.rb index 21ee766e5c..38dfcea956 100644 --- a/actionpack/lib/action_dispatch/journey/route.rb +++ b/actionpack/lib/action_dispatch/journey/route.rb @@ -84,6 +84,8 @@ module ActionDispatch @decorated_ast ||= begin decorated_ast = path.ast decorated_ast.find_all(&:terminal?).each { |n| n.memo = self } + # inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string. + decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? } decorated_ast end end diff --git a/actionpack/lib/action_dispatch/journey/router.rb b/actionpack/lib/action_dispatch/journey/router.rb index b6b71720db..27671b65da 100644 --- a/actionpack/lib/action_dispatch/journey/router.rb +++ b/actionpack/lib/action_dispatch/journey/router.rb @@ -85,7 +85,7 @@ module ActionDispatch private def partitioned_routes routes.partition { |r| - r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? } + r.path.anchored && r.path.requirements_anchored? } end diff --git a/actionpack/lib/action_dispatch/journey/routes.rb b/actionpack/lib/action_dispatch/journey/routes.rb index 3f055db66d..9dfea744e8 100644 --- a/actionpack/lib/action_dispatch/journey/routes.rb +++ b/actionpack/lib/action_dispatch/journey/routes.rb @@ -41,7 +41,7 @@ module ActionDispatch end def partition_route(route) - if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?) + if route.path.anchored && route.path.requirements_anchored? anchored_routes << route else custom_routes << route diff --git a/actionpack/lib/action_dispatch/journey/visualizer/fsm.js b/actionpack/lib/action_dispatch/journey/visualizer/fsm.js index d9bcaef928..6daf0cb1c1 100644 --- a/actionpack/lib/action_dispatch/journey/visualizer/fsm.js +++ b/actionpack/lib/action_dispatch/journey/visualizer/fsm.js @@ -68,7 +68,7 @@ function highlight_state(index, color) { } function highlight_finish(index) { - svg_nodes[index].select('polygon') + svg_nodes[index].select('ellipse') .style("fill", "while") .transition().duration(500) .style("fill", "blue"); @@ -76,54 +76,79 @@ function highlight_finish(index) { function match(input) { reset_graph(); - var table = tt(); - var states = [0]; - var regexp_states = table['regexp_states']; - var string_states = table['string_states']; - var accepting = table['accepting']; + var table = tt(); + var states = [[0, null]]; + var regexp_states = table['regexp_states']; + var string_states = table['string_states']; + var stdparam_states = table['stdparam_states']; + var accepting = table['accepting']; + var default_re = new RegExp("^[^.\/?]+$"); + var start_index = 0; highlight_state(0); tokenize(input, function(token) { + var end_index = start_index + token.length; + var new_states = []; for(var key in states) { - var state = states[key]; + var state_parts = states[key]; + var state = state_parts[0]; + var previous_start = state_parts[1]; - if(string_states[state] && string_states[state][token]) { - var new_state = string_states[state][token]; - highlight_edge(state, new_state); - highlight_state(new_state); - new_states.push(new_state); - } + if(previous_start == null) { + if(string_states[state] && string_states[state][token]) { + var new_state = string_states[state][token]; + highlight_edge(state, new_state); + highlight_state(new_state); + new_states.push([new_state, null]); + } - if(regexp_states[state]) { - for(var key in regexp_states[state]) { - var re = new RegExp("^" + key + "$"); - if(re.test(token)) { - var new_state = regexp_states[state][key]; + if(stdparam_states[state] && default_re.test(token)) { + for(var key in stdparam_states[state]) { + var new_state = stdparam_states[state][key]; highlight_edge(state, new_state); highlight_state(new_state); - new_states.push(new_state); + new_states.push([new_state, null]); } } } + + if(regexp_states[state]) { + var slice_start = previous_start != null ? previous_start : start_index; + + for(var key in regexp_states[state]) { + var re = new RegExp("^" + key + "$"); + + var accumulation = input.slice(slice_start, end_index); + + if(re.test(accumulation)) { + var new_state = regexp_states[state][key]; + highlight_edge(state, new_state); + highlight_state(new_state); + new_states.push([new_state, null]); + } + + // retry the same regexp with the accumulated data either way + new_states.push([state, slice_start]); + } + } } - if(new_states.length == 0) { - return; - } states = new_states; + start_index = end_index; }); for(var key in states) { - var state = states[key]; + var state_parts = states[key]; + var state = state_parts[0]; + var slice_start = state_parts[1]; + + // we must ignore ones that are still accepting more data + if (slice_start != null) continue; + if(accepting[state]) { - for(var mkey in svg_edges[state]) { - if(!regexp_states[mkey] && !string_states[mkey]) { - highlight_edge(state, mkey); - highlight_finish(mkey); - } - } + highlight_finish(state); } else { highlight_state(state, "red"); } diff --git a/actionpack/test/journey/gtg/builder_test.rb b/actionpack/test/journey/gtg/builder_test.rb index 9dc786e261..719c3c9959 100644 --- a/actionpack/test/journey/gtg/builder_test.rb +++ b/actionpack/test/journey/gtg/builder_test.rb @@ -8,13 +8,13 @@ module ActionDispatch class TestBuilder < ActiveSupport::TestCase def test_following_states_multi table = tt ["a|a"] - assert_equal 1, table.move([0], "a").length + assert_equal 1, table.move([[0, nil]], "a", 0, 1).length end def test_following_states_multi_regexp table = tt [":a|b"] - assert_equal 1, table.move([0], "fooo").length - assert_equal 2, table.move([0], "b").length + assert_equal 1, table.move([[0, nil]], "fooo", 0, 4).length + assert_equal 2, table.move([[0, nil]], "b", 0, 1).length end def test_multi_path @@ -25,8 +25,8 @@ module ActionDispatch [2, "b"], [2, "/"], [1, "c"], - ].inject([0]) { |state, (exp, sym)| - new = table.move(state, sym) + ].inject([[0, nil]]) { |state, (exp, sym)| + new = table.move(state, sym, 0, sym.length) assert_equal exp, new.length new } @@ -60,6 +60,23 @@ module ActionDispatch assert_equal 2, memos.length end + def test_catchall + table = tt %w{ + / + /*unmatched_route + } + + sim = Simulator.new table + + # matches just the /*unmatched_route + memos = sim.memos "/test" + assert_equal 1, memos.length + + # matches just the / + memos = sim.memos "/" + assert_equal 1, memos.length + end + private def ast(strings) parser = Journey::Parser.new diff --git a/actionpack/test/journey/gtg/transition_table_test.rb b/actionpack/test/journey/gtg/transition_table_test.rb index 9044934f05..7394764a4c 100644 --- a/actionpack/test/journey/gtg/transition_table_test.rb +++ b/actionpack/test/journey/gtg/transition_table_test.rb @@ -89,7 +89,7 @@ module ActionDispatch sim = GTG::Simulator.new builder.transition_table memos = sim.memos "/articles/new" - assert_equal [paths[1], paths[3]], memos + assert_equal Set[paths[1], paths[3]], Set.new(memos) end private diff --git a/actionpack/test/journey/routes_test.rb b/actionpack/test/journey/routes_test.rb index d5c81a8421..1b23d72849 100644 --- a/actionpack/test/journey/routes_test.rb +++ b/actionpack/test/journey/routes_test.rb @@ -45,12 +45,24 @@ module ActionDispatch assert_equal 1, @routes.anchored_routes.length assert_empty @routes.custom_routes - mapper.get "/hello/:who", to: "foo#bar", as: "bar", who: /\d/ + mapper.get "/not_anchored/hello/:who-notanchored", to: "foo#bar", as: "bar", who: /\d/, anchor: false assert_equal 1, @routes.custom_routes.length assert_equal 1, @routes.anchored_routes.length end + def test_custom_anchored_not_partition_route + mapper.get "/foo/:bar", to: "foo#bar", as: "aaron" + + assert_equal 1, @routes.anchored_routes.length + assert_empty @routes.custom_routes + + mapper.get "/:user/:repo", to: "foo#bar", as: "bar", repo: /[\w.]+/ + + assert_equal 2, @routes.anchored_routes.length + assert_empty @routes.custom_routes + end + def test_first_name_wins mapper.get "/hello", to: "foo#bar", as: "aaron" assert_raise(ArgumentError) do