1
0
Fork 0
mirror of https://github.com/teamcapybara/capybara.git synced 2022-11-09 12:08:07 -05:00

Merge pull request #2485 from teamcapybara/Nokogiri_HTML5

Support HTML5 parsing using Nokogiri >= 1.12.0
This commit is contained in:
Thomas Walpole 2021-08-08 18:03:14 -07:00 committed by GitHub
commit 090bebf3a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 45 additions and 47 deletions

View file

@ -33,8 +33,9 @@ cache:
matrix: matrix:
include: include:
- gemfile: gemfiles/Gemfile.gumbo - gemfile: Gemfile
rvm: 3.0 rvm: 3.0
env: HTML5_PARSING=true
script: bundle exec rake rack_smoke script: bundle exec rake rack_smoke
- gemfile: Gemfile - gemfile: Gemfile
rvm: 2.7 rvm: 2.7

View file

@ -8,6 +8,7 @@ Release date: unreleased
### Added ### Added
* `allow_label_click` accepts click options to be used when clicking an associated label * `allow_label_click` accepts click options to be used when clicking an associated label
* Deprecated `allow_gumbo=` in favor of `use_html5_parsing=` to enable use of Nokogiri::HTL5 when available
### Fixed ### Fixed

View file

@ -1,7 +0,0 @@
source 'https://rubygems.org'
gem 'bundler', '< 3.0'
gemspec path: '..'
gem 'xpath', github: 'teamcapybara/xpath'
gem 'nokogumbo'

View file

@ -39,7 +39,7 @@ module Capybara
# See {Capybara.configure} # See {Capybara.configure}
# @!method javascript_driver # @!method javascript_driver
# See {Capybara.configure} # See {Capybara.configure}
# @!method allow_gumbo # @!method use_html5_parsing
# See {Capybara.configure} # See {Capybara.configure}
Config::OPTIONS.each do |method| Config::OPTIONS.each do |method|
def_delegators :config, method, "#{method}=" def_delegators :config, method, "#{method}="
@ -69,7 +69,7 @@ module Capybara
# #
# #### Configurable options # #### Configurable options
# #
# - **allow_gumbo** (Boolean = `false`) - When `nokogumbo` is available, whether it will be used to parse HTML strings. # - **use_html5_parsing** (Boolean = `false`) - When Nokogiri >= 1.12.0 or `nokogumbo` is installed, whether HTML5 parsing will be used for HTML strings.
# - **always_include_port** (Boolean = `false`) - Whether the Rack server's port should automatically be inserted into every visited URL # - **always_include_port** (Boolean = `false`) - Whether the Rack server's port should automatically be inserted into every visited URL
# unless another port is explicitly specified. # unless another port is explicitly specified.
# - **app_host** (String, `nil`) - The default host to use when giving a relative URL to visit, must be a valid URL e.g. `http://www.example.com`. # - **app_host** (String, `nil`) - The default host to use when giving a relative URL to visit, must be a valid URL e.g. `http://www.example.com`.
@ -385,26 +385,21 @@ module Capybara
# @return [Nokogiri::HTML::Document] HTML document # @return [Nokogiri::HTML::Document] HTML document
# #
def HTML(html) # rubocop:disable Naming/MethodName def HTML(html) # rubocop:disable Naming/MethodName
if Nokogiri.respond_to?(:HTML5) && Capybara.allow_gumbo # Nokogumbo installed and allowed for use # Nokogiri >= 1.12.0 or Nokogumbo installed and allowed for use
Nokogiri::HTML5(html).tap do |document| html_parser, using_html5 = if defined?(Nokogiri::HTML5) && Capybara.use_html5_parsing
document.xpath('//template').each do |template| [Nokogiri::HTML5, true]
# template elements content is not part of the document
template.inner_html = ''
end
document.xpath('//textarea').each do |textarea|
# The Nokogumbo HTML5 parser already returns spec compliant contents
textarea['_capybara_raw_value'] = textarea.content
end
end
else else
Nokogiri::HTML(html).tap do |document| [defined?(Nokogiri::HTML4) ? Nokogiri::HTML4 : Nokogiri::HTML, false]
document.xpath('//template').each do |template| end
# template elements content is not part of the document
template.inner_html = '' html_parser.parse(html).tap do |document|
end document.xpath('//template').each do |template|
document.xpath('//textarea').each do |textarea| # template elements content is not part of the document
textarea['_capybara_raw_value'] = textarea.content.delete_prefix("\n") template.inner_html = ''
end end
document.xpath('//textarea').each do |textarea|
# The Nokogiri HTML5 parser already returns spec compliant contents
textarea['_capybara_raw_value'] = using_html5 ? textarea.content : textarea.content.delete_prefix("\n")
end end
end end
end end
@ -516,6 +511,6 @@ Capybara.configure do |config|
config.test_id = nil config.test_id = nil
config.predicates_wait = true config.predicates_wait = true
config.default_normalize_ws = false config.default_normalize_ws = false
config.allow_gumbo = false config.use_html5_parsing = false
config.w3c_click_offset = false config.w3c_click_offset = false
end end

View file

@ -7,9 +7,11 @@ module Capybara
class Config class Config
extend Forwardable extend Forwardable
OPTIONS = %i[app reuse_server threadsafe server default_driver javascript_driver allow_gumbo].freeze OPTIONS = %i[
app reuse_server threadsafe server default_driver javascript_driver use_html5_parsing allow_gumbo
].freeze
attr_accessor :app, :allow_gumbo attr_accessor :app, :use_html5_parsing
attr_reader :reuse_server, :threadsafe, :session_options # rubocop:disable Style/BisectedAttrAccessor attr_reader :reuse_server, :threadsafe, :session_options # rubocop:disable Style/BisectedAttrAccessor
attr_writer :default_driver, :javascript_driver attr_writer :default_driver, :javascript_driver
@ -88,5 +90,15 @@ module Capybara
end end
@deprecation_notified[method] = true @deprecation_notified[method] = true
end end
def allow_gumbo=(val)
deprecate('allow_gumbo=', 'use_html5_parsing=')
self.use_html5_parsing = val
end
def allow_gumbo
deprecate('allow_gumbo', 'use_html5_parsing')
use_html5_parsing
end
end end
end end

View file

@ -110,7 +110,7 @@ module Capybara
# No need for an xpath if only checking the current element # No need for an xpath if only checking the current element
!(native.key?('hidden') || !(native.key?('hidden') ||
/display:\s?none/.match?(native[:style] || '') || /display:\s?none/.match?(native[:style] || '') ||
%w[script head].include?(tag_name)) %w[script head style].include?(tag_name))
end end
end end

View file

@ -36,7 +36,7 @@ module Capybara
Capybara.test_id = nil Capybara.test_id = nil
Capybara.predicates_wait = true Capybara.predicates_wait = true
Capybara.default_normalize_ws = false Capybara.default_normalize_ws = false
Capybara.allow_gumbo = true Capybara.use_html5_parsing = !ENV['HTML5_PARSING'].nil?
Capybara.w3c_click_offset = false Capybara.w3c_click_offset = false
reset_threadsafe reset_threadsafe
end end

View file

@ -125,12 +125,13 @@ RSpec.describe Capybara do
it 'drops illegal fragments when using gumbo' do it 'drops illegal fragments when using gumbo' do
skip 'libxml is less strict than Gumbo' unless Nokogiri.respond_to?(:HTML5) skip 'libxml is less strict than Gumbo' unless Nokogiri.respond_to?(:HTML5)
described_class.use_html5_parsing = true
expect(described_class.string('<td>1</td>')).not_to have_css('td') expect(described_class.string('<td>1</td>')).not_to have_css('td')
end end
it 'can disable use of gumbo' do it 'can disable use of HTML5 parsing' do
skip "Test doesn't make sense unlesss nokogumbo is loaded" unless Nokogiri.respond_to?(:HTML5) skip "Test doesn't make sense unlesss HTML5 parsing is loaded (Nokogumbo or Nokogiri >= 1.12.0)" unless Nokogiri.respond_to?(:HTML5)
described_class.allow_gumbo = false described_class.use_html5_parsing = false
expect(described_class.string('<td>1</td>')).to have_css('td') expect(described_class.string('<td>1</td>')).to have_css('td')
end end

View file

@ -1,12 +1,6 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'spec_helper' require 'spec_helper'
nokogumbo_required = begin
require 'nokogumbo'
true
rescue LoadError
false
end
module TestSessions module TestSessions
RackTest = Capybara::Session.new(:rack_test, TestApp) RackTest = Capybara::Session.new(:rack_test, TestApp)
@ -256,11 +250,12 @@ RSpec.describe Capybara::RackTest::Driver do
end end
RSpec.describe 'Capybara::String' do RSpec.describe 'Capybara::String' do
it 'should use gumbo' do it 'should use HTML5 parsing' do
skip 'Only valid if gumbo is included' unless nokogumbo_required skip 'Only valid if Nokogiri >= 1.12.0 or gumbo is included' unless defined? Nokogiri::HTML5
allow(Nokogiri).to receive(:HTML5).and_call_original Capybara.use_html5_parsing = true
allow(Nokogiri::HTML5).to receive(:parse).and_call_original
Capybara.string('<div id=test_div></div>') Capybara.string('<div id=test_div></div>')
expect(Nokogiri).to have_received(:HTML5) expect(Nokogiri::HTML5).to have_received(:parse)
end end
end end