Optimize substrings generated from Regexp
This commit is contained in:
parent
df1be804dc
commit
9e15cf95f5
|
@ -12,18 +12,48 @@ module Capybara
|
||||||
|
|
||||||
def alternated_substrings
|
def alternated_substrings
|
||||||
@alternated_substrings ||= begin
|
@alternated_substrings ||= begin
|
||||||
process(alternation: true)
|
or_strings = process(alternation: true)
|
||||||
|
remove_or_covered(or_strings)
|
||||||
|
or_strings.any?(&:empty?) ? [] : or_strings
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def substrings
|
def substrings
|
||||||
@substrings ||= begin
|
@substrings ||= begin
|
||||||
process(alternation: false).first
|
strs = process(alternation: false).first
|
||||||
|
remove_and_covered(strs)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def remove_and_covered(strings)
|
||||||
|
# If we have "ab" and "abcd" required - only need to check for "abcd"
|
||||||
|
strings.delete_if do |sub_string|
|
||||||
|
strings.any? do |cover_string|
|
||||||
|
next if sub_string.equal? cover_string
|
||||||
|
|
||||||
|
cover_string.include?(sub_string)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def remove_or_covered(or_series)
|
||||||
|
# If we are going to match `("a" and "b") or ("ade" and "bce")` it only makes sense to match ("a" and "b")
|
||||||
|
|
||||||
|
# Ensure minimum sets of strings are being or'd
|
||||||
|
or_series.each { |strs| remove_and_covered(strs) }
|
||||||
|
|
||||||
|
# Remove any of the alternated string series that fully contain any other string series
|
||||||
|
or_series.delete_if do |and_strs|
|
||||||
|
or_series.any? do |and_strs2|
|
||||||
|
next if and_strs.equal? and_strs2
|
||||||
|
|
||||||
|
remove_and_covered(and_strs + and_strs2) == and_strs
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def process(alternation:)
|
def process(alternation:)
|
||||||
strs = extract_strings(Regexp::Parser.parse(@regexp), alternation: alternation)
|
strs = extract_strings(Regexp::Parser.parse(@regexp), alternation: alternation)
|
||||||
strs = collapse(combine(strs).map(&:flatten))
|
strs = collapse(combine(strs).map(&:flatten))
|
||||||
|
@ -68,8 +98,8 @@ module Capybara
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_strings(expression, strings = [], alternation: false)
|
def extract_strings(expression, strings = [], alternation: false)
|
||||||
expression.each do |exp|
|
expression.each do |exp| # rubocop:disable Metrics/BlockLength
|
||||||
if optional?(exp)
|
if optional?(exp) && !(alternation && zero_or_one?(exp))
|
||||||
strings.push(nil)
|
strings.push(nil)
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
|
@ -87,12 +117,25 @@ module Capybara
|
||||||
if exp.terminal?
|
if exp.terminal?
|
||||||
case exp.type
|
case exp.type
|
||||||
when :literal
|
when :literal
|
||||||
strings.push(exp.text * min_repeat(exp))
|
if zero_or_one?(exp)
|
||||||
|
strings.push(Set.new([[''], [exp.text]]))
|
||||||
|
next
|
||||||
|
else
|
||||||
|
strings.push(exp.text * min_repeat(exp))
|
||||||
|
end
|
||||||
when :escape
|
when :escape
|
||||||
strings.push(exp.char * min_repeat(exp))
|
if zero_or_one?(exp)
|
||||||
|
strings.push(Set.new([[''], [exp.text]]))
|
||||||
|
next
|
||||||
|
else
|
||||||
|
strings.push(exp.char * min_repeat(exp))
|
||||||
|
end
|
||||||
else
|
else
|
||||||
strings.push(nil)
|
strings.push(nil)
|
||||||
end
|
end
|
||||||
|
elsif alternation && zero_or_one?(exp)
|
||||||
|
strings.push(Set.new([[''], extract_strings(exp, alternation: true)]))
|
||||||
|
next
|
||||||
else
|
else
|
||||||
min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) }
|
min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) }
|
||||||
end
|
end
|
||||||
|
@ -101,6 +144,10 @@ module Capybara
|
||||||
strings
|
strings
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def zero_or_one?(exp)
|
||||||
|
exp.quantity == [0, 1]
|
||||||
|
end
|
||||||
|
|
||||||
def alternative_strings(expression)
|
def alternative_strings(expression)
|
||||||
alternatives = expression.alternatives.map { |sub_exp| extract_strings(sub_exp, alternation: true) }
|
alternatives = expression.alternatives.map { |sub_exp| extract_strings(sub_exp, alternation: true) }
|
||||||
if alternatives.all?(&:any?)
|
if alternatives.all?(&:any?)
|
||||||
|
|
|
@ -27,18 +27,37 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
||||||
/abc./ => %w[abc],
|
/abc./ => %w[abc],
|
||||||
/abc.*/ => %w[abc],
|
/abc.*/ => %w[abc],
|
||||||
/abc.def/ => %w[abc def],
|
/abc.def/ => %w[abc def],
|
||||||
/abc.def.ghi/ => %w[abc def ghi]
|
/abc.def.ghi/ => %w[abc def ghi],
|
||||||
|
/abc.abcd.abcde/ => %w[abcde],
|
||||||
|
/.*/ => []
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'handles optional characters' do
|
it 'ignores optional characters for substrings' do
|
||||||
verify_strings(
|
{
|
||||||
/abc*def/ => %w[ab def],
|
/abc*def/ => %w[ab def],
|
||||||
/abc*/ => %w[ab],
|
/abc*/ => %w[ab],
|
||||||
|
/c*/ => [],
|
||||||
/abc?def/ => %w[ab def],
|
/abc?def/ => %w[ab def],
|
||||||
/abc?/ => %w[ab],
|
/abc?/ => %w[ab],
|
||||||
/abc?def?/ => %w[ab de],
|
/abc?def?/ => %w[ab de],
|
||||||
/abc?def?g/ => %w[ab de g]
|
/abc?def?g/ => %w[ab de g],
|
||||||
|
/d?/ => []
|
||||||
|
}.each do |regexp, expected|
|
||||||
|
expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'handles optional characters for #alternated_substrings' do
|
||||||
|
verify_alternated_strings(
|
||||||
|
/abc*def/ => [%w[ab def]],
|
||||||
|
/abc*/ => [%w[ab]],
|
||||||
|
/c*/ => [],
|
||||||
|
/abc?def/ => [%w[abdef], %w[abcdef]],
|
||||||
|
/abc?/ => [%w[ab]],
|
||||||
|
/abc?def?/ => [%w[abde], %w[abcde]],
|
||||||
|
/abc?def?g/ => [%w[abdeg], %w[abdefg], %w[abcdeg], %w[abcdefg]],
|
||||||
|
/d?/ => []
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -111,24 +130,31 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'handles alternation for #options' do
|
it 'handles alternation for #alternated_substrings' do
|
||||||
verify_alternated_strings(
|
verify_alternated_strings(
|
||||||
/abc|def/ => [%w[abc], %w[def]],
|
/abc|def/ => [%w[abc], %w[def]],
|
||||||
/ab(?:c|d)/ => [%w[abc], %w[abd]],
|
/ab(?:c|d)/ => [%w[abc], %w[abd]],
|
||||||
/ab(c|d|e)fg/ => [%w[abcfg], %w[abdfg], %w[abefg]],
|
/ab(c|d|e)fg/ => [%w[abcfg], %w[abdfg], %w[abefg]],
|
||||||
/ab?(c|d)fg/ => [%w[a cfg], %w[a dfg]],
|
/ab?(c|d)fg/ => [%w[acfg], %w[adfg], %w[abcfg], %w[abdfg]],
|
||||||
/ab(c|d)ef/ => [%w[abcef], %w[abdef]],
|
/ab(c|d)ef/ => [%w[abcef], %w[abdef]],
|
||||||
/ab(cd?|ef)g/ => [%w[abc g], %w[abefg]],
|
/ab(cd?|ef)g/ => [%w[abcg], %w[abcdg], %w[abefg]],
|
||||||
/ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]],
|
/ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]],
|
||||||
/ab|cd*/ => [%w[ab], %w[c]],
|
/ab|cd*/ => [%w[ab], %w[c]],
|
||||||
/cd(?:ef|gh)|xyz/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
/cd(?:ef|gh)|xyz/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
||||||
/(cd(?:ef|gh)|xyz)/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
/(cd(?:ef|gh)|xyz)/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
||||||
/cd(ef|gh)+/ => [%w[cdef], %w[cdgh]],
|
/cd(ef|gh)+/ => [%w[cdef], %w[cdgh]],
|
||||||
/cd(ef|gh)?/ => [%w[cd]],
|
/cd(ef|gh)?/ => [%w[cd]],
|
||||||
/cd(ef|gh)?ij/ => [%w[cd ij]],
|
/cd(ef|gh)?ij/ => [%w[cdij], %w[cdefij], %w[cdghij]],
|
||||||
/cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]],
|
/cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]],
|
||||||
/cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]],
|
/cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]],
|
||||||
/(cd(ef|g*))/ => [%w[cd]]
|
/(cd(ef|g*))/ => [%w[cd]],
|
||||||
|
/a|b*/ => [],
|
||||||
|
/ab(?:c|d?)/ => [%w[ab]],
|
||||||
|
/ab(c|d)|a*/ => [],
|
||||||
|
/(abc)?(d|e)/ => [%w[d], %w[e]],
|
||||||
|
/(abc*de)?(d|e)/ => [%w[d], %w[e]],
|
||||||
|
/(abc*de)?(d|e?)/ => [],
|
||||||
|
/(abc)?(d|e?)/ => []
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -193,7 +219,7 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
||||||
|
|
||||||
def verify_alternated_strings(hsh, wrap: false)
|
def verify_alternated_strings(hsh, wrap: false)
|
||||||
hsh.each do |regexp, expected|
|
hsh.each do |regexp, expected|
|
||||||
expected = [expected] if wrap
|
expected = [expected] if wrap && (expected != [])
|
||||||
expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected
|
expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue