1
0
Fork 0
mirror of https://github.com/teamcapybara/capybara.git synced 2022-11-09 12:08:07 -05:00

Move regexp disassembler into own class

This commit is contained in:
Thomas Walpole 2018-09-26 14:55:56 -07:00
parent 907b9e269f
commit 15ca8c7d6a
6 changed files with 251 additions and 27 deletions

View file

@ -97,9 +97,7 @@ Capybara.add_selector(:link) do
when true
XPath.attr(:href)
when Regexp
regexp_to_substrings(href).map do |str|
XPath.attr(:href).contains(str)
end.reduce(&:&)
XPath.attr(:href)[regexp_to_conditions(href)]
else
XPath.attr(:href) == href.to_s
end
@ -140,7 +138,7 @@ Capybara.add_selector(:link) do
if (href = options[:href])
if !href.is_a?(Regexp)
desc << " with href #{href.inspect}"
elsif regexp_to_substrings(href).any?
elsif regexp_to_conditions(href)
desc << " with href matching #{href.inspect}"
end
end
@ -149,7 +147,7 @@ Capybara.add_selector(:link) do
end
describe_node_filters do |href: nil, **|
" with href matching #{href.inspect}" if href.is_a?(Regexp) && regexp_to_substrings(href).empty?
" with href matching #{href.inspect}" if href.is_a?(Regexp) && regexp_to_conditions(href).nil?
end
end
@ -485,9 +483,7 @@ Capybara.add_selector(:element) do
expression_filter(:attributes, matcher: /.+/) do |xpath, name, val|
case val
when Regexp
regexp_to_substrings(val).inject(xpath) do |xp, str|
xp[XPath.attr(name).contains(str)]
end
xpath[XPath.attr(name)[regexp_to_conditions(val)]]
when true
xpath[XPath.attr(name)]
when false

View file

@ -0,0 +1,73 @@
# frozen_string_literal: true
require 'xpath'
module Capybara
class Selector
class RegexpDisassembler
def initialize(regexp)
@regexp = regexp
@regexp_source = regexp.source
end
def conditions
condition = XPath.current
condition = condition.uppercase if @regexp.casefold?
substrings.map do |str|
condition.contains(@regexp.casefold? ? str.upcase : str)
end.reduce(:&)
end
def substrings
@substrings ||= begin
source = @regexp_source.dup
source.gsub!(/\\[^pgk]/, '.') # replace escaped characters with wildcard
source.gsub!(/\\[gk](?:<[^>]*>)?/, '.') # replace sub expressions and back references with wildcard
source.gsub!(/\\p\{[[:alpha:]]+\}?/, '.') # replace character properties with wildcard
source.gsub!(/\[\[:[a-z]+:\]\]/, '.') # replace posix classes with wildcard
while source.gsub!(/\[(?:[^\[\]]+)\]/, '.'); end # replace character classes with wildcard
source.gsub!(/\(\?<?[=!][^)]*\)/, '') # remove lookahead/lookbehind assertions
source.gsub!(/\(\?(?:<[^>]+>|>|:)/, '(') # replace named, atomic, and non-matching groups with unnamed matching groups
while source.gsub!(GROUP_REGEX) { |_m| simplify_group(Regexp.last_match) }; end
source.gsub!(/.[*?]\??/, '.') # replace optional character with wildcard
source.gsub!(/(.)\+\??/, '\1.') # replace one or more with character plus wildcard
source.gsub!(/(?<char>.)#{COUNTED_REP_REGEX.source}/) do |_m| # repeat counted characters
(Regexp.last_match[:char] * Regexp.last_match[:min_rep].to_i).tap { |str| str << '.' if Regexp.last_match[:max_rep] }
end
return [] if source.include?('|') # can't handle alternation here
source.match(/\A\^?(.*?)\$?\Z/).captures[0].split('.').reject(&:empty?).uniq
end
end
private
def simplify_group(matches)
if matches[:group].include?('|') # no support for alternation in groups
'.'
elsif matches[:one_or_more] # required but may repeat becomes text + wildcard
matches[:group][1..-2] + '.'
elsif matches[:optional] # optional group becomes wildcard
'.'
elsif matches[:min_rep]
(matches[:group] * matches[:min_rep].to_i).tap { |r| r << '.' if matches[:max_rep] }
else
matches[:group][1..-2]
end
end
COUNTED_REP_REGEX = /\{(?<min_rep>\d*)(?:,(?<max_rep>\d*))?\}/
GROUP_REGEX = /
(?<group>\([^()]*\))
(?:
(?:
(?<optional>[*?]) |
(?<one_or_more>\+) |
(?:#{COUNTED_REP_REGEX.source})
)\??
)?
/x
end
end
end

View file

@ -4,6 +4,7 @@
require 'capybara/selector/filter_set'
require 'capybara/selector/css'
require 'capybara/selector/regexp_disassembler'
module Capybara
#
@ -445,24 +446,9 @@ module Capybara
Array(classes).map { |klass| XPath.attr(:class).contains_word(klass) }.reduce(:&)
end
def regexp_to_substrings(regexp)
return [] unless regexp.options.zero?
regexp.source.match(CONVERTIBLE_REGEXP) do |match|
match.captures.reject(&:empty?)
end || []
def regexp_to_conditions(regexp)
RegexpDisassembler.new(regexp).conditions
end
CONVERTIBLE_REGEXP = /
\A
\^? # start
([^\[\]\\^$.|?*+()]*) # leading literal characters
[^|]*? # do not try to convert expressions with alternates
(?<!\\) # skip metacharacters - ie has preceding slash
([^\[\]\\^$.|?*+()]*) # trailing literal characters
\$? # end
\z
/x
end
end

View file

@ -1,5 +1,17 @@
# frozen_string_literal: true
module XPath
module DSL
def lowercase
method(:translate, 'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸŽŠŒ', 'abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿžšœ')
end
def uppercase
method(:translate, 'abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿžšœ', 'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸŽŠŒ')
end
end
end
module Capybara
module XPathPatches
module Renderer

View file

@ -0,0 +1,154 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Capybara::Selector::RegexpDisassembler do
it 'handles strings' do
verify_strings(
/abcdef/ => %w[abcdef],
/abc def/ => ['abc def']
)
end
it 'handles escaped characters' do
verify_strings(
/abc\\def/ => %w[abc def],
/\nabc/ => %w[abc],
%r{abc/} => %w[abc/]
)
end
it 'handles wildcards' do
verify_strings(
/abc.*def/ => %w[abc def],
/.*def/ => %w[def],
/abc./ => %w[abc],
/abc.*/ => %w[abc],
/abc.def/ => %w[abc def],
/abc.def.ghi/ => %w[abc def ghi]
)
end
it 'handles optional characters' do
verify_strings(
/abc*def/ => %w[ab def],
/abc*/ => %w[ab],
/abc?def/ => %w[ab def],
/abc?/ => %w[ab],
/abc?def?/ => %w[ab de],
/abc?def?g/ => %w[ab de g]
)
end
it 'handles character classes' do
verify_strings(
/abc[a-z]/ => %w[abc],
/abc[a-z]def[0-9]g/ => %w[abc def g],
/[0-9]abc/ => %w[abc],
/[0-9]+/ => %w[],
/abc[0-9&&[^7]]/ => %w[abc]
)
end
it 'handles posix bracket expressions' do
verify_strings(
/abc[[:alpha:]]/ => %w[abc],
/[[:digit:]]abc/ => %w[abc],
/abc[[:print:]]def/ => %w[abc def]
)
end
it 'handles repitition' do
verify_strings(
/abc{3}/ => %w[abccc],
/abc{3}d/ => %w[abcccd],
/abc{0}/ => %w[ab],
/abc{,2}/ => %w[ab],
/abc{2,}/ => %w[abcc],
/def{1,5}/ => %w[def],
/abc+def/ => %w[abc def],
/ab(cde){,4}/ => %w[ab],
/(ab){,2}cd/ => %w[cd],
/(abc){2,3}/ => %w[abcabc],
/(abc){3}/ => %w[abcabcabc],
/ab{2,3}cd/ => %w[abb cd],
/(ab){2,3}cd/ => %w[abab cd]
)
end
it 'handles non-greedy repetition' do
verify_strings(
/abc.*?/ => %w[abc],
/abc+?/ => %w[abc],
/abc*?cde/ => %w[ab cde],
/(abc)+?def/ => %w[abc def],
/ab(cde)*?fg/ => %w[ab fg]
)
end
it 'handles alternation' do
verify_strings(
/abc|def/ => [],
/ab(?:c|d)/ => %w[ab],
/ab(c|d)ef/ => %w[ab ef]
)
end
it 'handles grouping' do
verify_strings(
/(abc)/ => %w[abc],
/(abc)?/ => [],
/ab(cde)/ => %w[abcde],
/(abc)de/ => %w[abcde],
/ab(cde)fg/ => %w[abcdefg],
/ab(?<name>cd)ef/ => %w[abcdef],
/gh(?>ij)kl/ => %w[ghijkl],
/m(n.*p)q/ => %w[mn pq],
/(?:ab(cd)*){2,3}/ => %w[ab],
/(ab(cd){3})?/ => [],
/(ab(cd)+){2}/ => %w[abcd]
)
end
it 'handles meta characters' do
verify_strings(
/abc\d/ => %w[abc],
/abc\wdef/ => %w[abc def],
/\habc/ => %w[abc]
)
end
it 'handles character properties' do
verify_strings(
/ab\p{Alpha}cd/ => %w[ab cd],
/ab\p{Blank}/ => %w[ab],
/\p{Digit}cd/ => %w[cd]
)
end
it 'handles backreferences' do
verify_strings(
/a(?<group>abc).\k<group>.+/ => %w[aabc]
)
end
it 'handles subexpressions' do
verify_strings(
/\A(?<paren>a\g<paren>*b)+\z/ => %w[a b]
)
end
it 'handles anchors' do
verify_strings(
/^abc/ => %w[abc],
/def$/ => %w[def],
/^abc$/ => %w[abc]
)
end
def verify_strings(hsh)
hsh.each do |regexp, expected|
expect(Capybara::Selector::RegexpDisassembler.new(regexp).substrings).to eq expected
end
end
end

View file

@ -31,6 +31,7 @@ RSpec.describe Capybara do
<input type="file" id="file" class=".special file"/>
<input type="hidden" id="hidden_field" value="this is hidden"/>
<input type="submit" value="click me" title="submit button"/>
<input type="button" value="don't click me" title="Other button 1"/>
<a href="#">link</a>
<fieldset></fieldset>
<select id="select">
@ -264,7 +265,9 @@ RSpec.describe Capybara do
expect(string.find(:element, 'input', title: /sub.*mit.*/).value).to eq 'click me'
expect(string.find(:element, 'input', title: /^submit button$/).value).to eq 'click me'
expect(string.find(:element, 'input', title: /^(?:submit|other) button$/).value).to eq 'click me'
expect(string.find(:element, 'input', title: /SuBmIt/i).value).to eq 'click me'
expect(string.find(:element, 'input', title: /SuB.*mIt/i).value).to eq 'click me'
expect(string.find(:element, 'input', title: /^Su.*Bm.*It/i).value).to eq 'click me'
expect(string.find(:element, 'input', title: /^Ot.*he.*r b.*\d/i).value).to eq "don't click me"
end
it 'still works with system keys' do
@ -299,7 +302,7 @@ RSpec.describe Capybara do
expect(string.find(:element, 'input', type: XPath.ends_with('ext'))[:type]).to eq 'text'
expect(string.find(:element, 'input', type: XPath.contains('ckb'))[:type]).to eq 'checkbox'
expect(string.find(:element, 'input', title: XPath.contains_word('submit'))[:type]).to eq 'submit'
expect(string.find(:element, 'input', title: XPath.contains_word('button'))[:type]).to eq 'submit'
expect(string.find(:element, 'input', title: XPath.contains_word('button 1'))[:type]).to eq 'button'
end
end
end