diff --git a/.travis.yml b/.travis.yml index d72c48ff..cda0cb5e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,8 +33,9 @@ cache: matrix: include: - - gemfile: gemfiles/Gemfile.gumbo + - gemfile: Gemfile rvm: 3.0 + env: HTML5_PARSING=true script: bundle exec rake rack_smoke - gemfile: Gemfile rvm: 2.7 diff --git a/History.md b/History.md index 91d4a52d..b17aea59 100644 --- a/History.md +++ b/History.md @@ -8,6 +8,7 @@ Release date: unreleased ### Added * `allow_label_click` accepts click options to be used when clicking an associated label +* Deprecated `allow_gumbo=` in favor of `use_html5_parsing=` to enable use of Nokogiri::HTL5 when available ### Fixed diff --git a/gemfiles/Gemfile.gumbo b/gemfiles/Gemfile.gumbo deleted file mode 100644 index 93164ad8..00000000 --- a/gemfiles/Gemfile.gumbo +++ /dev/null @@ -1,7 +0,0 @@ -source 'https://rubygems.org' - -gem 'bundler', '< 3.0' -gemspec path: '..' - -gem 'xpath', github: 'teamcapybara/xpath' -gem 'nokogumbo' \ No newline at end of file diff --git a/lib/capybara.rb b/lib/capybara.rb index 95c2ac78..39282c91 100644 --- a/lib/capybara.rb +++ b/lib/capybara.rb @@ -39,7 +39,7 @@ module Capybara # See {Capybara.configure} # @!method javascript_driver # See {Capybara.configure} - # @!method allow_gumbo + # @!method use_html5_parsing # See {Capybara.configure} Config::OPTIONS.each do |method| def_delegators :config, method, "#{method}=" @@ -69,7 +69,7 @@ module Capybara # # #### Configurable options # - # - **allow_gumbo** (Boolean = `false`) - When `nokogumbo` is available, whether it will be used to parse HTML strings. + # - **use_html5_parsing** (Boolean = `false`) - When Nokogiri >= 1.12.0 or `nokogumbo` is installed, whether HTML5 parsing will be used for HTML strings. # - **always_include_port** (Boolean = `false`) - Whether the Rack server's port should automatically be inserted into every visited URL # unless another port is explicitly specified. # - **app_host** (String, `nil`) - The default host to use when giving a relative URL to visit, must be a valid URL e.g. `http://www.example.com`. @@ -385,26 +385,21 @@ module Capybara # @return [Nokogiri::HTML::Document] HTML document # def HTML(html) # rubocop:disable Naming/MethodName - if Nokogiri.respond_to?(:HTML5) && Capybara.allow_gumbo # Nokogumbo installed and allowed for use - Nokogiri::HTML5(html).tap do |document| - document.xpath('//template').each do |template| - # template elements content is not part of the document - template.inner_html = '' - end - document.xpath('//textarea').each do |textarea| - # The Nokogumbo HTML5 parser already returns spec compliant contents - textarea['_capybara_raw_value'] = textarea.content - end - end + # Nokogiri >= 1.12.0 or Nokogumbo installed and allowed for use + html_parser, using_html5 = if defined?(Nokogiri::HTML5) && Capybara.use_html5_parsing + [Nokogiri::HTML5, true] else - Nokogiri::HTML(html).tap do |document| - document.xpath('//template').each do |template| - # template elements content is not part of the document - template.inner_html = '' - end - document.xpath('//textarea').each do |textarea| - textarea['_capybara_raw_value'] = textarea.content.delete_prefix("\n") - end + [defined?(Nokogiri::HTML4) ? Nokogiri::HTML4 : Nokogiri::HTML, false] + end + + html_parser.parse(html).tap do |document| + document.xpath('//template').each do |template| + # template elements content is not part of the document + template.inner_html = '' + end + document.xpath('//textarea').each do |textarea| + # The Nokogiri HTML5 parser already returns spec compliant contents + textarea['_capybara_raw_value'] = using_html5 ? textarea.content : textarea.content.delete_prefix("\n") end end end @@ -516,6 +511,6 @@ Capybara.configure do |config| config.test_id = nil config.predicates_wait = true config.default_normalize_ws = false - config.allow_gumbo = false + config.use_html5_parsing = false config.w3c_click_offset = false end diff --git a/lib/capybara/config.rb b/lib/capybara/config.rb index 490cf675..79e8291d 100644 --- a/lib/capybara/config.rb +++ b/lib/capybara/config.rb @@ -7,9 +7,11 @@ module Capybara class Config extend Forwardable - OPTIONS = %i[app reuse_server threadsafe server default_driver javascript_driver allow_gumbo].freeze + OPTIONS = %i[ + app reuse_server threadsafe server default_driver javascript_driver use_html5_parsing allow_gumbo + ].freeze - attr_accessor :app, :allow_gumbo + attr_accessor :app, :use_html5_parsing attr_reader :reuse_server, :threadsafe, :session_options # rubocop:disable Style/BisectedAttrAccessor attr_writer :default_driver, :javascript_driver @@ -88,5 +90,15 @@ module Capybara end @deprecation_notified[method] = true end + + def allow_gumbo=(val) + deprecate('allow_gumbo=', 'use_html5_parsing=') + self.use_html5_parsing = val + end + + def allow_gumbo + deprecate('allow_gumbo', 'use_html5_parsing') + use_html5_parsing + end end end diff --git a/lib/capybara/node/simple.rb b/lib/capybara/node/simple.rb index 2f8e0df2..20c230b5 100644 --- a/lib/capybara/node/simple.rb +++ b/lib/capybara/node/simple.rb @@ -110,7 +110,7 @@ module Capybara # No need for an xpath if only checking the current element !(native.key?('hidden') || /display:\s?none/.match?(native[:style] || '') || - %w[script head].include?(tag_name)) + %w[script head style].include?(tag_name)) end end diff --git a/lib/capybara/spec/spec_helper.rb b/lib/capybara/spec/spec_helper.rb index c471afc2..d1b0b900 100644 --- a/lib/capybara/spec/spec_helper.rb +++ b/lib/capybara/spec/spec_helper.rb @@ -36,7 +36,7 @@ module Capybara Capybara.test_id = nil Capybara.predicates_wait = true Capybara.default_normalize_ws = false - Capybara.allow_gumbo = true + Capybara.use_html5_parsing = !ENV['HTML5_PARSING'].nil? Capybara.w3c_click_offset = false reset_threadsafe end diff --git a/spec/basic_node_spec.rb b/spec/basic_node_spec.rb index 0a59b779..acb3cabd 100644 --- a/spec/basic_node_spec.rb +++ b/spec/basic_node_spec.rb @@ -125,12 +125,13 @@ RSpec.describe Capybara do it 'drops illegal fragments when using gumbo' do skip 'libxml is less strict than Gumbo' unless Nokogiri.respond_to?(:HTML5) + described_class.use_html5_parsing = true expect(described_class.string('