mirror of
https://github.com/teamcapybara/capybara.git
synced 2022-11-09 12:08:07 -05:00
implement regexp disassembler using regexp_parser gem
This commit is contained in:
parent
fb26f50768
commit
cfae2bfb0c
3 changed files with 49 additions and 47 deletions
|
@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|||
s.add_runtime_dependency('nokogiri', ['~> 1.8'])
|
||||
s.add_runtime_dependency('rack', ['>= 1.6.0'])
|
||||
s.add_runtime_dependency('rack-test', ['>= 0.6.3'])
|
||||
s.add_runtime_dependency('regexp_parser', ['~>1.2'])
|
||||
s.add_runtime_dependency('xpath', ['~>3.2'])
|
||||
|
||||
s.add_development_dependency('cucumber', ['>= 2.3.0'])
|
||||
|
|
|
@ -1,66 +1,65 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'regexp_parser'
|
||||
|
||||
module Capybara
|
||||
class Selector
|
||||
# @api private
|
||||
class RegexpDisassembler
|
||||
def initialize(regexp)
|
||||
@regexp = regexp
|
||||
@regexp_source = regexp.source
|
||||
end
|
||||
|
||||
def substrings
|
||||
@substrings ||= begin
|
||||
source = @regexp_source.dup
|
||||
source.gsub!(/\\[^pgk]/, '.') # replace escaped characters with wildcard
|
||||
source.gsub!(/\\[gk](?:<[^>]*>)?/, '.') # replace sub expressions and back references with wildcard
|
||||
source.gsub!(/\\p\{[[:alpha:]]+\}?/, '.') # replace character properties with wildcard
|
||||
source.gsub!(/\[\[:[a-z]+:\]\]/, '.') # replace posix classes with wildcard
|
||||
while source.gsub!(/\[(?:[^\[\]]+)\]/, '.'); end # replace character classes with wildcard
|
||||
source.gsub!(/\(\?<?[=!][^)]*\)/, '') # remove lookahead/lookbehind assertions
|
||||
source.gsub!(/\(\?(?:<[^>]+>|>|:)/, '(') # replace named, atomic, and non-matching groups with unnamed matching groups
|
||||
|
||||
while source.gsub!(GROUP_REGEX) { |_m| simplify_group(Regexp.last_match) }; end
|
||||
source.gsub!(/.[*?]\??/, '.') # replace optional character with wildcard
|
||||
source.gsub!(/(.)\+\??/, '\1.') # replace one or more with character plus wildcard
|
||||
source.gsub!(/(?<char>.)#{COUNTED_REP_REGEX.source}/) do |_m| # repeat counted characters
|
||||
(Regexp.last_match[:char] * Regexp.last_match[:min_rep].to_i).tap { |str| str << '.' if Regexp.last_match[:max_rep] }
|
||||
end
|
||||
return [] if source.include?('|') # can't handle alternation here
|
||||
|
||||
strs = source.match(/\A\^?(.*?)\$?\Z/).captures[0].split('.').reject(&:empty?).uniq
|
||||
strs = strs.map(&:upcase) if @regexp.casefold?
|
||||
strs
|
||||
strs = extract_strings(Regexp::Parser.parse(@regexp), [+''])
|
||||
strs.map!(&:upcase) if @regexp.casefold?
|
||||
strs.reject(&:empty?).uniq
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def simplify_group(matches)
|
||||
if matches[:group].include?('|') # no support for alternation in groups
|
||||
'.'
|
||||
elsif matches[:one_or_more] # required but may repeat becomes text + wildcard
|
||||
matches[:group][1..-2] + '.'
|
||||
elsif matches[:optional] # optional group becomes wildcard
|
||||
'.'
|
||||
elsif matches[:min_rep]
|
||||
(matches[:group] * matches[:min_rep].to_i).tap { |r| r << '.' if matches[:max_rep] }
|
||||
else
|
||||
matches[:group][1..-2]
|
||||
end
|
||||
def min_repeat(exp)
|
||||
exp.quantifier&.min || 1
|
||||
end
|
||||
|
||||
COUNTED_REP_REGEX = /\{(?<min_rep>\d*)(?:,(?<max_rep>\d*))?\}/
|
||||
GROUP_REGEX = /
|
||||
(?<group>\([^()]*\))
|
||||
(?:
|
||||
(?:
|
||||
(?<optional>[*?]) |
|
||||
(?<one_or_more>\+) |
|
||||
(?:#{COUNTED_REP_REGEX.source})
|
||||
)\??
|
||||
)?
|
||||
/x
|
||||
def fixed_repeat?(exp)
|
||||
min_repeat(exp) == (exp.quantifier&.max || 1)
|
||||
end
|
||||
|
||||
def optional?(exp)
|
||||
min_repeat(exp).zero?
|
||||
end
|
||||
|
||||
def extract_strings(expression, strings)
|
||||
expression.each do |exp|
|
||||
if optional?(exp)
|
||||
strings.push(+'')
|
||||
next
|
||||
end
|
||||
|
||||
if %i[meta set].include?(exp.type)
|
||||
strings.push(+'')
|
||||
next
|
||||
end
|
||||
|
||||
if exp.terminal?
|
||||
case exp.type
|
||||
when :literal
|
||||
strings.last << (exp.text * min_repeat(exp))
|
||||
when :escape
|
||||
strings.last << (exp.char * min_repeat(exp))
|
||||
else
|
||||
strings.push(+'')
|
||||
end
|
||||
else
|
||||
min_repeat(exp).times { extract_strings(exp, strings) }
|
||||
end
|
||||
strings.push(+'') unless fixed_repeat?(exp)
|
||||
end
|
||||
strings
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -12,9 +12,11 @@ RSpec.describe Capybara::Selector::RegexpDisassembler do
|
|||
|
||||
it 'handles escaped characters' do
|
||||
verify_strings(
|
||||
/abc\\def/ => %w[abc def],
|
||||
/\nabc/ => %w[abc],
|
||||
%r{abc/} => %w[abc/]
|
||||
/abc\\def/ => %w[abc\def],
|
||||
/abc\.def/ => %w[abc.def],
|
||||
/\nabc/ => ["\nabc"],
|
||||
%r{abc/} => %w[abc/],
|
||||
/ab\++cd/ => %w[ab+ cd]
|
||||
)
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue