Optimize substrings generated from Regexp
This commit is contained in:
parent
df1be804dc
commit
9e15cf95f5
|
@ -12,18 +12,48 @@ module Capybara
|
|||
|
||||
def alternated_substrings
|
||||
@alternated_substrings ||= begin
|
||||
process(alternation: true)
|
||||
or_strings = process(alternation: true)
|
||||
remove_or_covered(or_strings)
|
||||
or_strings.any?(&:empty?) ? [] : or_strings
|
||||
end
|
||||
end
|
||||
|
||||
def substrings
|
||||
@substrings ||= begin
|
||||
process(alternation: false).first
|
||||
strs = process(alternation: false).first
|
||||
remove_and_covered(strs)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def remove_and_covered(strings)
|
||||
# If we have "ab" and "abcd" required - only need to check for "abcd"
|
||||
strings.delete_if do |sub_string|
|
||||
strings.any? do |cover_string|
|
||||
next if sub_string.equal? cover_string
|
||||
|
||||
cover_string.include?(sub_string)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def remove_or_covered(or_series)
|
||||
# If we are going to match `("a" and "b") or ("ade" and "bce")` it only makes sense to match ("a" and "b")
|
||||
|
||||
# Ensure minimum sets of strings are being or'd
|
||||
or_series.each { |strs| remove_and_covered(strs) }
|
||||
|
||||
# Remove any of the alternated string series that fully contain any other string series
|
||||
or_series.delete_if do |and_strs|
|
||||
or_series.any? do |and_strs2|
|
||||
next if and_strs.equal? and_strs2
|
||||
|
||||
remove_and_covered(and_strs + and_strs2) == and_strs
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def process(alternation:)
|
||||
strs = extract_strings(Regexp::Parser.parse(@regexp), alternation: alternation)
|
||||
strs = collapse(combine(strs).map(&:flatten))
|
||||
|
@ -68,8 +98,8 @@ module Capybara
|
|||
end
|
||||
|
||||
def extract_strings(expression, strings = [], alternation: false)
|
||||
expression.each do |exp|
|
||||
if optional?(exp)
|
||||
expression.each do |exp| # rubocop:disable Metrics/BlockLength
|
||||
if optional?(exp) && !(alternation && zero_or_one?(exp))
|
||||
strings.push(nil)
|
||||
next
|
||||
end
|
||||
|
@ -87,12 +117,25 @@ module Capybara
|
|||
if exp.terminal?
|
||||
case exp.type
|
||||
when :literal
|
||||
if zero_or_one?(exp)
|
||||
strings.push(Set.new([[''], [exp.text]]))
|
||||
next
|
||||
else
|
||||
strings.push(exp.text * min_repeat(exp))
|
||||
end
|
||||
when :escape
|
||||
if zero_or_one?(exp)
|
||||
strings.push(Set.new([[''], [exp.text]]))
|
||||
next
|
||||
else
|
||||
strings.push(exp.char * min_repeat(exp))
|
||||
end
|
||||
else
|
||||
strings.push(nil)
|
||||
end
|
||||
elsif alternation && zero_or_one?(exp)
|
||||
strings.push(Set.new([[''], extract_strings(exp, alternation: true)]))
|
||||
next
|
||||
else
|
||||
min_repeat(exp).times { extract_strings(exp, strings, alternation: alternation) }
|
||||
end
|
||||
|
@ -101,6 +144,10 @@ module Capybara
|
|||
strings
|
||||
end
|
||||
|
||||
def zero_or_one?(exp)
|
||||
exp.quantity == [0, 1]
|
||||
end
|
||||
|
||||
def alternative_strings(expression)
|
||||
alternatives = expression.alternatives.map { |sub_exp| extract_strings(sub_exp, alternation: true) }
|
||||
if alternatives.all?(&:any?)
|
||||
|
|
|
@ -27,18 +27,37 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
|||
/abc./ => %w[abc],
|
||||
/abc.*/ => %w[abc],
|
||||
/abc.def/ => %w[abc def],
|
||||
/abc.def.ghi/ => %w[abc def ghi]
|
||||
/abc.def.ghi/ => %w[abc def ghi],
|
||||
/abc.abcd.abcde/ => %w[abcde],
|
||||
/.*/ => []
|
||||
)
|
||||
end
|
||||
|
||||
it 'handles optional characters' do
|
||||
verify_strings(
|
||||
it 'ignores optional characters for substrings' do
|
||||
{
|
||||
/abc*def/ => %w[ab def],
|
||||
/abc*/ => %w[ab],
|
||||
/c*/ => [],
|
||||
/abc?def/ => %w[ab def],
|
||||
/abc?/ => %w[ab],
|
||||
/abc?def?/ => %w[ab de],
|
||||
/abc?def?g/ => %w[ab de g]
|
||||
/abc?def?g/ => %w[ab de g],
|
||||
/d?/ => []
|
||||
}.each do |regexp, expected|
|
||||
expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected
|
||||
end
|
||||
end
|
||||
|
||||
it 'handles optional characters for #alternated_substrings' do
|
||||
verify_alternated_strings(
|
||||
/abc*def/ => [%w[ab def]],
|
||||
/abc*/ => [%w[ab]],
|
||||
/c*/ => [],
|
||||
/abc?def/ => [%w[abdef], %w[abcdef]],
|
||||
/abc?/ => [%w[ab]],
|
||||
/abc?def?/ => [%w[abde], %w[abcde]],
|
||||
/abc?def?g/ => [%w[abdeg], %w[abdefg], %w[abcdeg], %w[abcdefg]],
|
||||
/d?/ => []
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -111,24 +130,31 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
|||
end
|
||||
end
|
||||
|
||||
it 'handles alternation for #options' do
|
||||
it 'handles alternation for #alternated_substrings' do
|
||||
verify_alternated_strings(
|
||||
/abc|def/ => [%w[abc], %w[def]],
|
||||
/ab(?:c|d)/ => [%w[abc], %w[abd]],
|
||||
/ab(c|d|e)fg/ => [%w[abcfg], %w[abdfg], %w[abefg]],
|
||||
/ab?(c|d)fg/ => [%w[a cfg], %w[a dfg]],
|
||||
/ab?(c|d)fg/ => [%w[acfg], %w[adfg], %w[abcfg], %w[abdfg]],
|
||||
/ab(c|d)ef/ => [%w[abcef], %w[abdef]],
|
||||
/ab(cd?|ef)g/ => [%w[abc g], %w[abefg]],
|
||||
/ab(cd?|ef)g/ => [%w[abcg], %w[abcdg], %w[abefg]],
|
||||
/ab(cd|ef*)g/ => [%w[abcdg], %w[abe g]],
|
||||
/ab|cd*/ => [%w[ab], %w[c]],
|
||||
/cd(?:ef|gh)|xyz/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
||||
/(cd(?:ef|gh)|xyz)/ => [%w[cdef], %w[cdgh], %w[xyz]],
|
||||
/cd(ef|gh)+/ => [%w[cdef], %w[cdgh]],
|
||||
/cd(ef|gh)?/ => [%w[cd]],
|
||||
/cd(ef|gh)?ij/ => [%w[cd ij]],
|
||||
/cd(ef|gh)?ij/ => [%w[cdij], %w[cdefij], %w[cdghij]],
|
||||
/cd(ef|gh)+ij/ => [%w[cdef ij], %w[cdgh ij]],
|
||||
/cd(ef|gh){2}ij/ => [%w[cdefefij], %w[cdefghij], %w[cdghefij], %w[cdghghij]],
|
||||
/(cd(ef|g*))/ => [%w[cd]]
|
||||
/(cd(ef|g*))/ => [%w[cd]],
|
||||
/a|b*/ => [],
|
||||
/ab(?:c|d?)/ => [%w[ab]],
|
||||
/ab(c|d)|a*/ => [],
|
||||
/(abc)?(d|e)/ => [%w[d], %w[e]],
|
||||
/(abc*de)?(d|e)/ => [%w[d], %w[e]],
|
||||
/(abc*de)?(d|e?)/ => [],
|
||||
/(abc)?(d|e?)/ => []
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -193,7 +219,7 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
|||
|
||||
def verify_alternated_strings(hsh, wrap: false)
|
||||
hsh.each do |regexp, expected|
|
||||
expected = [expected] if wrap
|
||||
expected = [expected] if wrap && (expected != [])
|
||||
expect(Capybara::Selector::RegexpDisassembler.new(regexp).alternated_substrings).to eq expected
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue