diff --git a/lib/capybara/selector/regexp_disassembler.rb b/lib/capybara/selector/regexp_disassembler.rb index f749cfb8..9b24c99b 100644 --- a/lib/capybara/selector/regexp_disassembler.rb +++ b/lib/capybara/selector/regexp_disassembler.rb @@ -12,18 +12,48 @@ module Capybara def alternated_substrings @alternated_substrings ||= begin - process(alternation: true) + or_strings = process(alternation: true) + remove_or_covered(or_strings) + or_strings.any?(&:empty?) ? [] : or_strings end end def substrings @substrings ||= begin - process(alternation: false).first + strs = process(alternation: false).first + remove_and_covered(strs) end end private + def remove_and_covered(strings) + # If we have "ab" and "abcd" required - only need to check for "abcd" + strings.delete_if do |sub_string| + strings.any? do |cover_string| + next if sub_string.equal? cover_string + + cover_string.include?(sub_string) + end + end + end + + def remove_or_covered(or_series) + # If we are going to match `("a" and "b") or ("ade" and "bce")` it only makes sense to match ("a" and "b") + + # Ensure minimum sets of strings are being or'd + or_series.each { |strs| remove_and_covered(strs) } + + # Remove any of the alternated string series that fully contain any other string series + or_series.delete_if do |and_strs| + or_series.any? do |and_strs2| + next if and_strs.equal? and_strs2 + + remove_and_covered(and_strs + and_strs2) == and_strs + end + end + end + def process(alternation:) strs = extract_strings(Regexp::Parser.parse(@regexp), alternation: alternation) strs = collapse(combine(strs).map(&:flatten)) @@ -68,8 +98,8 @@ module Capybara end def extract_strings(expression, strings = [], alternation: false) - expression.each do |exp| - if optional?(exp) + expression.each do |exp| # rubocop:disable Metrics/BlockLength + if optional?(exp) && !(alternation && zero_or_one?(exp)) strings.push(nil) next end @@ -87,12 +117,25 @@ module Capybara if exp.terminal? case exp.type when :literal - strings.push(exp.text * min_repeat(exp)) + if zero_or_one?(exp) + strings.push(Set.new([[''], [exp.text]])) + next + else + strings.push(exp.text * min_repeat(exp)) + end when :escape - strings.push(exp.char * min_repeat(exp)) + if zero_or_one?(exp) + strings.push(Set.new([[''], [exp.text]])) + next + else + strings.push(exp.char * min_repeat(exp)) + end else strings.push(nil) end + elsif alternation && zero_or_one?(exp) + strings.push(Set.new([[''], extract_strings(exp, alternation: true)])) + next else min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) } end @@ -101,6 +144,10 @@ module Capybara strings end + def zero_or_one?(exp) + exp.quantity == [0, 1] + end + def alternative_strings(expression) alternatives = expression.alternatives.map { |sub_exp| extract_strings(sub_exp, alternation: true) } if alternatives.all?(&:any?) diff --git a/spec/regexp_dissassembler_spec.rb b/spec/regexp_dissassembler_spec.rb index 58e8590e..542555f4 100644 --- a/spec/regexp_dissassembler_spec.rb +++ b/spec/regexp_dissassembler_spec.rb @@ -27,18 +27,37 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do /abc./ => %w[abc], /abc.*/ => %w[abc], /abc.def/ => %w[abc def], - /abc.def.ghi/ => %w[abc def ghi] + /abc.def.ghi/ => %w[abc def ghi], + /abc.abcd.abcde/ => %w[abcde], + /.*/ => [] ) end - it 'handles optional characters' do - verify_strings( + it 'ignores optional characters for substrings' do + { /abc*def/ => %w[ab def], /abc*/ => %w[ab], + /c*/ => [], /abc?def/ => %w[ab def], /abc?/ => %w[ab], /abc?def?/ => %w[ab de], - /abc?def?g/ => %w[ab de g] + /abc?def?g/ => %w[ab de g], + /d?/ => [] + }.each do |regexp, expected| + expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected + end + end + + it 'handles optional characters for #alternated_substrings' do + verify_alternated_strings( + /abc*def/ => [%w[ab def]], + /abc*/ => [%w[ab]], + /c*/ => [], + /abc?def/ => [%w[abdef], %w[abcdef]], + /abc?/ => [%w[ab]], + /abc?def?/ => [%w[abde], %w[abcde]], + /abc?def?g/ => [%w[abdeg], %w[abdefg], %w[abcdeg], %w[abcdefg]], + /d?/ => [] ) end @@ -111,24 +130,31 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do end end - it 'handles alternation for #options' do + it 'handles alternation for #alternated_substrings' do verify_alternated_strings( /abc|def/ => [%w[abc], %w[def]], /ab(?:c|d)/ => [%w[abc], %w[abd]], /ab(c|d|e)fg/ => [%w[abcfg], %w[abdfg], %w[abefg]], - /ab?(c|d)fg/ => [%w[a cfg], %w[a dfg]], + /ab?(c|d)fg/ => [%w[acfg], %w[adfg], %w[abcfg], %w[abdfg]], /ab(c|d)ef/ => [%w[abcef], %w[abdef]], - /ab(cd?|ef)g/ => [%w[abc g], %w[abefg]], + /ab(cd?|ef)g/ => [%w[abcg], %w[abcdg], %w[abefg]], /ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]], /ab|cd*/ => [%w[ab], %w[c]], /cd(?:ef|gh)|xyz/ => [%w[cdef], %w[cdgh], %w[xyz]], /(cd(?:ef|gh)|xyz)/ => [%w[cdef], %w[cdgh], %w[xyz]], /cd(ef|gh)+/ => [%w[cdef], %w[cdgh]], /cd(ef|gh)?/ => [%w[cd]], - /cd(ef|gh)?ij/ => [%w[cd ij]], + /cd(ef|gh)?ij/ => [%w[cdij], %w[cdefij], %w[cdghij]], /cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]], /cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]], - /(cd(ef|g*))/ => [%w[cd]] + /(cd(ef|g*))/ => [%w[cd]], + /a|b*/ => [], + /ab(?:c|d?)/ => [%w[ab]], + /ab(c|d)|a*/ => [], + /(abc)?(d|e)/ => [%w[d], %w[e]], + /(abc*de)?(d|e)/ => [%w[d], %w[e]], + /(abc*de)?(d|e?)/ => [], + /(abc)?(d|e?)/ => [] ) end @@ -193,7 +219,7 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do def verify_alternated_strings(hsh, wrap: false) hsh.each do |regexp, expected| - expected = [expected] if wrap + expected = [expected] if wrap && (expected != []) expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected end end