From 549a8fbfa88303e43a08d2ac8031dd8ca6fe8cb1 Mon Sep 17 00:00:00 2001 From: Thomas Walpole Date: Tue, 30 Oct 2018 12:04:40 -0700 Subject: [PATCH] Optionally process alternation in the regexp disassembler --- .../selector/builders/xpath_builder.rb | 6 +- lib/capybara/selector/regexp_disassembler.rb | 69 ++++++++++++++++--- lib/capybara/spec/session/all_spec.rb | 5 ++ lib/capybara/spec/views/with_html.erb | 2 +- spec/regexp_dissassembler_spec.rb | 52 ++++++++++++-- 5 files changed, 115 insertions(+), 19 deletions(-) diff --git a/lib/capybara/selector/builders/xpath_builder.rb b/lib/capybara/selector/builders/xpath_builder.rb index a491110e..60a6acb5 100644 --- a/lib/capybara/selector/builders/xpath_builder.rb +++ b/lib/capybara/selector/builders/xpath_builder.rb @@ -44,9 +44,9 @@ module Capybara def regexp_to_xpath_conditions(regexp) condition = XPath.current condition = condition.uppercase if regexp.casefold? - Selector::RegexpDisassembler.new(regexp).substrings.map do |str| - condition.contains(str) - end.reduce(:&) + Selector::RegexpDisassembler.new(regexp).alternated_substrings.map do |strs| + strs.map { |str| condition.contains(str) }.reduce(:&) + end.reduce(:|) end end end diff --git a/lib/capybara/selector/regexp_disassembler.rb b/lib/capybara/selector/regexp_disassembler.rb index 97e2f69a..5de51a52 100644 --- a/lib/capybara/selector/regexp_disassembler.rb +++ b/lib/capybara/selector/regexp_disassembler.rb @@ -10,16 +10,28 @@ module Capybara @regexp = regexp end + def alternated_substrings + @options ||= begin + process(alternation: true) + end + end + def substrings @substrings ||= begin - strs = extract_strings(Regexp::Parser.parse(@regexp), [+'']) - strs.map!(&:upcase) if @regexp.casefold? - strs.reject(&:empty?).uniq + process(alternation: false).first end end private + def process(alternation:) + strs = extract_strings(Regexp::Parser.parse(@regexp), [''], alternation: alternation) + strs = collapse(combine(strs).map &:flatten) + strs.each { |str| str.map!(&:upcase) } if @regexp.casefold? + strs + end + + def min_repeat(exp) exp.quantifier&.min || 1 end @@ -32,31 +44,66 @@ module Capybara min_repeat(exp).zero? end - def extract_strings(expression, strings) + + def combine(strs) + suffixes = [[]] + strs.reverse_each do |str| + if str.is_a? Set + prefixes = str.each_with_object([]) { |s, memo| memo.concat combine(s) } + + result = [] + prefixes.product(suffixes) { |pair| result << pair.flatten(1) } + suffixes = result + else + suffixes.each do |arr| + arr.unshift str + end + end + end + suffixes + end + + def collapse(strs) + strs.map do |substrings| + substrings.slice_before { |str| str.empty? }.map(&:join).reject(&:empty?).uniq + end + end + + def extract_strings(expression, strings, alternation: false) expression.each do |exp| if optional?(exp) - strings.push(+'') + strings.push('') + next + end + + if %i[meta].include?(exp.type) && !exp.terminal? && alternation + alternatives = exp.alternatives.map { |sub_exp| extract_strings(sub_exp, [], alternation: true) } + if alternatives.all? { |alt| alt.any? { |a| !a.empty? } } + strings.push(Set.new(alternatives)) + else + strings.push('') + end next end if %i[meta set].include?(exp.type) - strings.push(+'') + strings.push('') next end if exp.terminal? case exp.type when :literal - strings.last << (exp.text * min_repeat(exp)) + strings.push (exp.text * min_repeat(exp)) when :escape - strings.last << (exp.char * min_repeat(exp)) + strings.push (exp.char * min_repeat(exp)) else - strings.push(+'') + strings.push('') end else - min_repeat(exp).times { extract_strings(exp, strings) } + min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) } end - strings.push(+'') unless fixed_repeat?(exp) + strings.push('') unless fixed_repeat?(exp) end strings end diff --git a/lib/capybara/spec/session/all_spec.rb b/lib/capybara/spec/session/all_spec.rb index f23798a5..6e5ce5eb 100644 --- a/lib/capybara/spec/session/all_spec.rb +++ b/lib/capybara/spec/session/all_spec.rb @@ -56,6 +56,11 @@ Capybara::SpecHelper.spec '#all' do expect(@session.all(:xpath, '//h1').first.text).to eq('This is a test') expect(@session.all(:xpath, "//input[@id='test_field']").first.value).to eq('monkey') end + + it 'should use alternated regex for :id' do + expect(@session.all(:xpath, './/h2', id: /h2/).unfiltered_size).to eq 3 + expect(@session.all(:xpath, './/h2', id: /h2(one|two)/).unfiltered_size).to eq 2 + end end context 'with css as default selector' do diff --git a/lib/capybara/spec/views/with_html.erb b/lib/capybara/spec/views/with_html.erb index cfdc6548..2fcf270d 100644 --- a/lib/capybara/spec/views/with_html.erb +++ b/lib/capybara/spec/views/with_html.erb @@ -10,7 +10,7 @@

Header Class Test One

Header Class Test Two

-

Header Class Test Three

+

Header Class Test Three

Header Class Test Four

Header Class Test Five

diff --git a/spec/regexp_dissassembler_spec.rb b/spec/regexp_dissassembler_spec.rb index 613280bc..cc8a113f 100644 --- a/spec/regexp_dissassembler_spec.rb +++ b/spec/regexp_dissassembler_spec.rb @@ -47,7 +47,7 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do /abc[a-z]/ => %w[abc], /abc[a-z]def[0-9]g/ => %w[abc def g], /[0-9]abc/ => %w[abc], - /[0-9]+/ => %w[], + /[0-9]+/ => [], /abc[0-9&&[^7]]/ => %w[abc] ) end @@ -88,11 +88,47 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do ) end - it 'handles alternation' do - verify_strings( + it 'ignores alternation for #substrings' do + { /abc|def/ => [], /ab(?:c|d)/ => %w[ab], - /ab(c|d)ef/ => %w[ab ef] + /ab(c|d|e)fg/ => %w[ab fg], + /ab?(c|d)fg/ => %w[a fg], + /ab(c|d)ef/ => %w[ab ef], + /ab(cd?|ef)g/ => %w[ab g], + /ab(cd|ef*)g/ => %w[ab g], + /ab|cd*/ => [], + /cd(?:ef|gh)|xyz/ => [], + /(cd(?:ef|gh)|xyz)/ => [], + /cd(ef|gh)+/ => %w[cd], + /cd(ef|gh)?/ => %w[cd], + /cd(ef|gh)?ij/ => %w[cd ij], + /cd(ef|gh)+ij/ => %w[cd ij], + /cd(ef|gh){2}ij/ => %w[cd ij], + /(cd(ef|g*))/ => %w[cd] + }.each do |regexp, expected| + expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected + end + end + + it 'handles alternation for #options' do + verify_alternated_strings( + /abc|def/ => [%w[abc],%w[def]], + /ab(?:c|d)/ => [%w[abc],%w[abd]], + /ab(c|d|e)fg/ => [%w[abcfg],%w[abdfg],%w[abefg]], + /ab?(c|d)fg/ => [%w[a cfg], %w[a dfg]], + /ab(c|d)ef/ => [%w[abcef], %w[abdef]], + /ab(cd?|ef)g/ => [%w[abc g], %w[abefg]], + /ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]], + /ab|cd*/ => [%w[ab], %w[c]], + /cd(?:ef|gh)|xyz/ => [%w[cdef],%w[cdgh],%w[xyz]], + /(cd(?:ef|gh)|xyz)/ => [%w[cdef],%w[cdgh],%w[xyz]], + /cd(ef|gh)+/ => [%w[cdef], %w[cdgh]], + /cd(ef|gh)?/ => [%w[cd]], + /cd(ef|gh)?ij/ => [%w[cd ij]], + /cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]], + /cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]], + /(cd(ef|g*))/ => [%w[cd]] ) end @@ -152,5 +188,13 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do hsh.each do |regexp, expected| expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected end + verify_alternated_strings(hsh, wrap: true) + end + + def verify_alternated_strings(hsh, wrap: false) + hsh.each do |regexp, expected| + expected = [expected] if wrap + expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected + end end end