1
0
Fork 0
mirror of https://github.com/teamcapybara/capybara.git synced 2022-11-09 12:08:07 -05:00

implement regexp disassembler using regexp_parser gem

This commit is contained in:
Thomas Walpole 2018-10-15 18:48:15 -07:00
parent fb26f50768
commit cfae2bfb0c
3 changed files with 49 additions and 47 deletions

View file

@ -30,6 +30,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency('nokogiri', ['~> 1.8'])
s.add_runtime_dependency('rack', ['>= 1.6.0'])
s.add_runtime_dependency('rack-test', ['>= 0.6.3'])
s.add_runtime_dependency('regexp_parser', ['~>1.2'])
s.add_runtime_dependency('xpath', ['~>3.2'])
s.add_development_dependency('cucumber', ['>= 2.3.0'])

View file

@ -1,66 +1,65 @@
# frozen_string_literal: true
require 'regexp_parser'
module Capybara
class Selector
# @api private
class RegexpDisassembler
def initialize(regexp)
@regexp = regexp
@regexp_source = regexp.source
end
def substrings
@substrings ||= begin
source = @regexp_source.dup
source.gsub!(/\\[^pgk]/, '.') # replace escaped characters with wildcard
source.gsub!(/\\[gk](?:<[^>]*>)?/, '.') # replace sub expressions and back references with wildcard
source.gsub!(/\\p\{[[:alpha:]]+\}?/, '.') # replace character properties with wildcard
source.gsub!(/\[\[:[a-z]+:\]\]/, '.') # replace posix classes with wildcard
while source.gsub!(/\[(?:[^\[\]]+)\]/, '.'); end # replace character classes with wildcard
source.gsub!(/\(\?<?[=!][^)]*\)/, '') # remove lookahead/lookbehind assertions
source.gsub!(/\(\?(?:<[^>]+>|>|:)/, '(') # replace named, atomic, and non-matching groups with unnamed matching groups
while source.gsub!(GROUP_REGEX) { |_m| simplify_group(Regexp.last_match) }; end
source.gsub!(/.[*?]\??/, '.') # replace optional character with wildcard
source.gsub!(/(.)\+\??/, '\1.') # replace one or more with character plus wildcard
source.gsub!(/(?<char>.)#{COUNTED_REP_REGEX.source}/) do |_m| # repeat counted characters
(Regexp.last_match[:char] * Regexp.last_match[:min_rep].to_i).tap { |str| str << '.' if Regexp.last_match[:max_rep] }
end
return [] if source.include?('|') # can't handle alternation here
strs = source.match(/\A\^?(.*?)\$?\Z/).captures[0].split('.').reject(&:empty?).uniq
strs = strs.map(&:upcase) if @regexp.casefold?
strs
strs = extract_strings(Regexp::Parser.parse(@regexp), [+''])
strs.map!(&:upcase) if @regexp.casefold?
strs.reject(&:empty?).uniq
end
end
private
def simplify_group(matches)
if matches[:group].include?('|') # no support for alternation in groups
'.'
elsif matches[:one_or_more] # required but may repeat becomes text + wildcard
matches[:group][1..-2] + '.'
elsif matches[:optional] # optional group becomes wildcard
'.'
elsif matches[:min_rep]
(matches[:group] * matches[:min_rep].to_i).tap { |r| r << '.' if matches[:max_rep] }
else
matches[:group][1..-2]
end
def min_repeat(exp)
exp.quantifier&.min || 1
end
COUNTED_REP_REGEX = /\{(?<min_rep>\d*)(?:,(?<max_rep>\d*))?\}/
GROUP_REGEX = /
(?<group>\([^()]*\))
(?:
(?:
(?<optional>[*?]) |
(?<one_or_more>\+) |
(?:#{COUNTED_REP_REGEX.source})
)\??
)?
/x
def fixed_repeat?(exp)
min_repeat(exp) == (exp.quantifier&.max || 1)
end
def optional?(exp)
min_repeat(exp).zero?
end
def extract_strings(expression, strings)
expression.each do |exp|
if optional?(exp)
strings.push(+'')
next
end
if %i[meta set].include?(exp.type)
strings.push(+'')
next
end
if exp.terminal?
case exp.type
when :literal
strings.last << (exp.text * min_repeat(exp))
when :escape
strings.last << (exp.char * min_repeat(exp))
else
strings.push(+'')
end
else
min_repeat(exp).times { extract_strings(exp, strings) }
end
strings.push(+'') unless fixed_repeat?(exp)
end
strings
end
end
end
end

View file

@ -12,9 +12,11 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
it 'handles escaped characters' do
verify_strings(
/abc\\def/ => %w[abc def],
/\nabc/ => %w[abc],
%r{abc/} => %w[abc/]
/abc\\def/ => %w[abc\def],
/abc\.def/ => %w[abc.def],
/\nabc/ => ["\nabc"],
%r{abc/} => %w[abc/],
/ab\++cd/ => %w[ab+ cd]
)
end