1
0
Fork 0
mirror of https://github.com/teamcapybara/capybara.git synced 2022-11-09 12:08:07 -05:00

Support HTML5 parsing using Nokogiri >= 1.12.0

This commit is contained in:
Thomas Walpole 2021-08-07 18:51:57 -07:00
parent f2113294de
commit ca84279566
9 changed files with 45 additions and 47 deletions

View file

@ -33,8 +33,9 @@ cache:
matrix:
include:
- gemfile: gemfiles/Gemfile.gumbo
- gemfile: Gemfile
rvm: 3.0
env: HTML5_PARSING=true
script: bundle exec rake rack_smoke
- gemfile: Gemfile
rvm: 2.7

View file

@ -8,6 +8,7 @@ Release date: unreleased
### Added
* `allow_label_click` accepts click options to be used when clicking an associated label
* Deprecated `allow_gumbo=` in favor of `use_html5_parsing=` to enable use of Nokogiri::HTL5 when available
### Fixed

View file

@ -1,7 +0,0 @@
source 'https://rubygems.org'
gem 'bundler', '< 3.0'
gemspec path: '..'
gem 'xpath', github: 'teamcapybara/xpath'
gem 'nokogumbo'

View file

@ -39,7 +39,7 @@ module Capybara
# See {Capybara.configure}
# @!method javascript_driver
# See {Capybara.configure}
# @!method allow_gumbo
# @!method use_html5_parsing
# See {Capybara.configure}
Config::OPTIONS.each do |method|
def_delegators :config, method, "#{method}="
@ -69,7 +69,7 @@ module Capybara
#
# #### Configurable options
#
# - **allow_gumbo** (Boolean = `false`) - When `nokogumbo` is available, whether it will be used to parse HTML strings.
# - **use_html5_parsing** (Boolean = `false`) - When Nokogiri >= 1.12.0 or `nokogumbo` is installed, whether HTML5 parsing will be used for HTML strings.
# - **always_include_port** (Boolean = `false`) - Whether the Rack server's port should automatically be inserted into every visited URL
# unless another port is explicitly specified.
# - **app_host** (String, `nil`) - The default host to use when giving a relative URL to visit, must be a valid URL e.g. `http://www.example.com`.
@ -385,26 +385,21 @@ module Capybara
# @return [Nokogiri::HTML::Document] HTML document
#
def HTML(html) # rubocop:disable Naming/MethodName
if Nokogiri.respond_to?(:HTML5) && Capybara.allow_gumbo # Nokogumbo installed and allowed for use
Nokogiri::HTML5(html).tap do |document|
document.xpath('//template').each do |template|
# template elements content is not part of the document
template.inner_html = ''
end
document.xpath('//textarea').each do |textarea|
# The Nokogumbo HTML5 parser already returns spec compliant contents
textarea['_capybara_raw_value'] = textarea.content
end
end
# Nokogiri >= 1.12.0 or Nokogumbo installed and allowed for use
html_parser, using_html5 = if defined?(Nokogiri::HTML5) && Capybara.use_html5_parsing
[Nokogiri::HTML5, true]
else
Nokogiri::HTML(html).tap do |document|
document.xpath('//template').each do |template|
# template elements content is not part of the document
template.inner_html = ''
end
document.xpath('//textarea').each do |textarea|
textarea['_capybara_raw_value'] = textarea.content.delete_prefix("\n")
end
[defined?(Nokogiri::HTML4) ? Nokogiri::HTML4 : Nokogiri::HTML, false]
end
html_parser.parse(html).tap do |document|
document.xpath('//template').each do |template|
# template elements content is not part of the document
template.inner_html = ''
end
document.xpath('//textarea').each do |textarea|
# The Nokogiri HTML5 parser already returns spec compliant contents
textarea['_capybara_raw_value'] = using_html5 ? textarea.content : textarea.content.delete_prefix("\n")
end
end
end
@ -516,6 +511,6 @@ Capybara.configure do |config|
config.test_id = nil
config.predicates_wait = true
config.default_normalize_ws = false
config.allow_gumbo = false
config.use_html5_parsing = false
config.w3c_click_offset = false
end

View file

@ -7,9 +7,11 @@ module Capybara
class Config
extend Forwardable
OPTIONS = %i[app reuse_server threadsafe server default_driver javascript_driver allow_gumbo].freeze
OPTIONS = %i[
app reuse_server threadsafe server default_driver javascript_driver use_html5_parsing allow_gumbo
].freeze
attr_accessor :app, :allow_gumbo
attr_accessor :app, :use_html5_parsing
attr_reader :reuse_server, :threadsafe, :session_options # rubocop:disable Style/BisectedAttrAccessor
attr_writer :default_driver, :javascript_driver
@ -88,5 +90,15 @@ module Capybara
end
@deprecation_notified[method] = true
end
def allow_gumbo=(val)
deprecate('allow_gumbo=', 'use_html5_parsing=')
self.use_html5_parsing = val
end
def allow_gumbo
deprecate('allow_gumbo', 'use_html5_parsing')
use_html5_parsing
end
end
end

View file

@ -110,7 +110,7 @@ module Capybara
# No need for an xpath if only checking the current element
!(native.key?('hidden') ||
/display:\s?none/.match?(native[:style] || '') ||
%w[script head].include?(tag_name))
%w[script head style].include?(tag_name))
end
end

View file

@ -36,7 +36,7 @@ module Capybara
Capybara.test_id = nil
Capybara.predicates_wait = true
Capybara.default_normalize_ws = false
Capybara.allow_gumbo = true
Capybara.use_html5_parsing = !ENV['HTML5_PARSING'].nil?
Capybara.w3c_click_offset = false
reset_threadsafe
end

View file

@ -125,12 +125,13 @@ RSpec.describe Capybara do
it 'drops illegal fragments when using gumbo' do
skip 'libxml is less strict than Gumbo' unless Nokogiri.respond_to?(:HTML5)
described_class.use_html5_parsing = true
expect(described_class.string('<td>1</td>')).not_to have_css('td')
end
it 'can disable use of gumbo' do
skip "Test doesn't make sense unlesss nokogumbo is loaded" unless Nokogiri.respond_to?(:HTML5)
described_class.allow_gumbo = false
it 'can disable use of HTML5 parsing' do
skip "Test doesn't make sense unlesss HTML5 parsing is loaded (Nokogumbo or Nokogiri >= 1.12.0)" unless Nokogiri.respond_to?(:HTML5)
described_class.use_html5_parsing = false
expect(described_class.string('<td>1</td>')).to have_css('td')
end

View file

@ -1,12 +1,6 @@
# frozen_string_literal: true
require 'spec_helper'
nokogumbo_required = begin
require 'nokogumbo'
true
rescue LoadError
false
end
module TestSessions
RackTest = Capybara::Session.new(:rack_test, TestApp)
@ -256,11 +250,12 @@ RSpec.describe Capybara::RackTest::Driver do
end
RSpec.describe 'Capybara::String' do
it 'should use gumbo' do
skip 'Only valid if gumbo is included' unless nokogumbo_required
allow(Nokogiri).to receive(:HTML5).and_call_original
it 'should use HTML5 parsing' do
skip 'Only valid if Nokogiri >= 1.12.0 or gumbo is included' unless defined? Nokogiri::HTML5
Capybara.use_html5_parsing = true
allow(Nokogiri::HTML5).to receive(:parse).and_call_original
Capybara.string('<div id=test_div></div>')
expect(Nokogiri).to have_received(:HTML5)
expect(Nokogiri::HTML5).to have_received(:parse)
end
end