Optionally process alternation in the regexp disassembler

This commit is contained in:
Thomas Walpole 2018-10-30 12:04:40 -07:00
parent 3b0eec9fb7
commit 549a8fbfa8
5 changed files with 115 additions and 19 deletions

View File

@ -44,9 +44,9 @@ module Capybara
def regexp_to_xpath_conditions(regexp)
condition = XPath.current
condition = condition.uppercase if regexp.casefold?
Selector::RegexpDisassembler.new(regexp).substrings.map do |str|
condition.contains(str)
end.reduce(:&)
Selector::RegexpDisassembler.new(regexp).alternated_substrings.map do |strs|
strs.map { |str| condition.contains(str) }.reduce(:&)
end.reduce(:|)
end
end
end

View File

@ -10,16 +10,28 @@ module Capybara
@regexp = regexp
end
def alternated_substrings
@options ||= begin
process(alternation: true)
end
end
def substrings
@substrings ||= begin
strs = extract_strings(Regexp::Parser.parse(@regexp), [+''])
strs.map!(&:upcase) if @regexp.casefold?
strs.reject(&:empty?).uniq
process(alternation: false).first
end
end
private
def process(alternation:)
strs = extract_strings(Regexp::Parser.parse(@regexp), [''], alternation: alternation)
strs = collapse(combine(strs).map &:flatten)
strs.each { |str| str.map!(&:upcase) } if @regexp.casefold?
strs
end
def min_repeat(exp)
exp.quantifier&.min || 1
end
@ -32,31 +44,66 @@ module Capybara
min_repeat(exp).zero?
end
def extract_strings(expression, strings)
def combine(strs)
suffixes = [[]]
strs.reverse_each do |str|
if str.is_a? Set
prefixes = str.each_with_object([]) { |s, memo| memo.concat combine(s) }
result = []
prefixes.product(suffixes) { |pair| result << pair.flatten(1) }
suffixes = result
else
suffixes.each do |arr|
arr.unshift str
end
end
end
suffixes
end
def collapse(strs)
strs.map do |substrings|
substrings.slice_before { |str| str.empty? }.map(&:join).reject(&:empty?).uniq
end
end
def extract_strings(expression, strings, alternation: false)
expression.each do |exp|
if optional?(exp)
strings.push(+'')
strings.push('')
next
end
if %i[meta].include?(exp.type) && !exp.terminal? && alternation
alternatives = exp.alternatives.map { |sub_exp| extract_strings(sub_exp, [], alternation: true) }
if alternatives.all? { |alt| alt.any? { |a| !a.empty? } }
strings.push(Set.new(alternatives))
else
strings.push('')
end
next
end
if %i[meta set].include?(exp.type)
strings.push(+'')
strings.push('')
next
end
if exp.terminal?
case exp.type
when :literal
strings.last << (exp.text * min_repeat(exp))
strings.push (exp.text * min_repeat(exp))
when :escape
strings.last << (exp.char * min_repeat(exp))
strings.push (exp.char * min_repeat(exp))
else
strings.push(+'')
strings.push('')
end
else
min_repeat(exp).times { extract_strings(exp, strings) }
min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) }
end
strings.push(+'') unless fixed_repeat?(exp)
strings.push('') unless fixed_repeat?(exp)
end
strings
end

View File

@ -56,6 +56,11 @@ Capybara::SpecHelper.spec '#all' do
expect(@session.all(:xpath, '//h1').first.text).to eq('This is a test')
expect(@session.all(:xpath, "//input[@id='test_field']").first.value).to eq('monkey')
end
it 'should use alternated regex for :id' do
expect(@session.all(:xpath, './/h2', id: /h2/).unfiltered_size).to eq 3
expect(@session.all(:xpath, './/h2', id: /h2(one|two)/).unfiltered_size).to eq 2
end
end
context 'with css as default selector' do

View File

@ -10,7 +10,7 @@
<h2 class="no text"></h2>
<h2 class="head" id="h2one">Header Class Test One</h2>
<h2 class="head" id="h2two">Header Class Test Two</h2>
<h2 class="head">Header Class Test Three</h2>
<h2 class="head" id="h2_">Header Class Test Three</h2>
<h2 class="head">Header Class Test Four</h2>
<h2 class="head">Header Class Test Five</h2>

View File

@ -47,7 +47,7 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
/abc[a-z]/ => %w[abc],
/abc[a-z]def[0-9]g/ => %w[abc def g],
/[0-9]abc/ => %w[abc],
/[0-9]+/ => %w[],
/[0-9]+/ => [],
/abc[0-9&&[^7]]/ => %w[abc]
)
end
@ -88,11 +88,47 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
)
end
it 'handles alternation' do
verify_strings(
it 'ignores alternation for #substrings' do
{
/abc|def/ => [],
/ab(?:c|d)/ => %w[ab],
/ab(c|d)ef/ => %w[ab ef]
/ab(c|d|e)fg/ => %w[ab fg],
/ab?(c|d)fg/ => %w[a fg],
/ab(c|d)ef/ => %w[ab ef],
/ab(cd?|ef)g/ => %w[ab g],
/ab(cd|ef*)g/ => %w[ab g],
/ab|cd*/ => [],
/cd(?:ef|gh)|xyz/ => [],
/(cd(?:ef|gh)|xyz)/ => [],
/cd(ef|gh)+/ => %w[cd],
/cd(ef|gh)?/ => %w[cd],
/cd(ef|gh)?ij/ => %w[cd ij],
/cd(ef|gh)+ij/ => %w[cd ij],
/cd(ef|gh){2}ij/ => %w[cd ij],
/(cd(ef|g*))/ => %w[cd]
}.each do |regexp, expected|
expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected
end
end
it 'handles alternation for #options' do
verify_alternated_strings(
/abc|def/ => [%w[abc],%w[def]],
/ab(?:c|d)/ => [%w[abc],%w[abd]],
/ab(c|d|e)fg/ => [%w[abcfg],%w[abdfg],%w[abefg]],
/ab?(c|d)fg/ => [%w[a cfg], %w[a dfg]],
/ab(c|d)ef/ => [%w[abcef], %w[abdef]],
/ab(cd?|ef)g/ => [%w[abc g], %w[abefg]],
/ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]],
/ab|cd*/ => [%w[ab], %w[c]],
/cd(?:ef|gh)|xyz/ => [%w[cdef],%w[cdgh],%w[xyz]],
/(cd(?:ef|gh)|xyz)/ => [%w[cdef],%w[cdgh],%w[xyz]],
/cd(ef|gh)+/ => [%w[cdef], %w[cdgh]],
/cd(ef|gh)?/ => [%w[cd]],
/cd(ef|gh)?ij/ => [%w[cd ij]],
/cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]],
/cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]],
/(cd(ef|g*))/ => [%w[cd]]
)
end
@ -152,5 +188,13 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
hsh.each do |regexp, expected|
expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected
end
verify_alternated_strings(hsh, wrap: true)
end
def verify_alternated_strings(hsh, wrap: false)
hsh.each do |regexp, expected|
expected = [expected] if wrap
expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected
end
end
end