From 0c43bfbd3c0d8c240fd50207e6aacb4da8142bed Mon Sep 17 00:00:00 2001
From: Thomas Walpole <twalpole@gmail.com>
Date: Mon, 5 Mar 2018 14:57:33 -0800
Subject: [PATCH 1/3] Change #text to be closer to what a browser displays

---
 lib/capybara/helpers.rb                    |  6 +++--
 lib/capybara/queries/ancestor_query.rb     |  8 +++++-
 lib/capybara/queries/text_query.rb         |  9 +++----
 lib/capybara/queries/title_query.rb        |  4 +--
 lib/capybara/rack_test/node.rb             | 29 ++++++++++++++++------
 lib/capybara/rspec/matchers.rb             |  1 -
 lib/capybara/selenium/node.rb              |  8 +++---
 lib/capybara/spec/session/ancestor_spec.rb |  8 +++---
 lib/capybara/spec/session/assert_text.rb   | 11 ++++----
 lib/capybara/spec/session/assert_title.rb  | 15 ++++++++---
 lib/capybara/spec/session/has_text_spec.rb |  9 ++-----
 lib/capybara/spec/session/node_spec.rb     |  2 +-
 lib/capybara/spec/session/text_spec.rb     | 12 ++++++---
 lib/capybara/spec/test_app.rb              |  9 +++++++
 lib/capybara/spec/views/with_html.erb      | 17 +++++++++++++
 15 files changed, 102 insertions(+), 46 deletions(-)

diff --git a/lib/capybara/helpers.rb b/lib/capybara/helpers.rb
index 3dc08e40..91c54ad4 100644
--- a/lib/capybara/helpers.rb
+++ b/lib/capybara/helpers.rb
@@ -10,6 +10,7 @@ module Capybara
     # Normalizes whitespace space by stripping leading and trailing
     # whitespace and replacing sequences of whitespace characters
     # with a single space.
+    # TODO: Deprecate this see: https://w3c.github.io/webdriver/webdriver-spec.html#dfn-bot-dom-getvisibletext
     #
     # @param [String] text     Text to normalize
     # @return [String]         Normalized text
@@ -28,10 +29,11 @@ module Capybara
     # @param [Fixnum, Boolean, nil] options Options passed to Regexp.new when creating the Regexp
     # @return [Regexp] Regexp to match the passed in text and options
     #
-    def to_regexp(text, exact: false, options: nil)
+    def to_regexp(text, exact: false, all_whitespace: false, options: nil)
       return text if text.is_a?(Regexp)
 
-      escaped = Regexp.escape(normalize_whitespace(text))
+      escaped = Regexp.escape(text)
+      escaped = escaped.gsub("\\ ", "[[:blank:]]") if all_whitespace
       escaped = "\\A#{escaped}\\z" if exact
       Regexp.new(escaped, options)
     end
diff --git a/lib/capybara/queries/ancestor_query.rb b/lib/capybara/queries/ancestor_query.rb
index 87e67b7d..94f92cd2 100644
--- a/lib/capybara/queries/ancestor_query.rb
+++ b/lib/capybara/queries/ancestor_query.rb
@@ -2,7 +2,7 @@
 
 module Capybara
   module Queries
-    class AncestorQuery < MatchQuery
+    class AncestorQuery < Capybara::Queries::SelectorQuery
       # @api private
       def resolve_for(node, exact = nil)
         @child_node = node
@@ -18,6 +18,12 @@ module Capybara
         desc += " that is an ancestor of #{child_query.description}" if child_query
         desc
       end
+
+    private
+
+      def valid_keys
+        super - COUNT_KEYS
+      end
     end
   end
 end
diff --git a/lib/capybara/queries/text_query.rb b/lib/capybara/queries/text_query.rb
index 2b29235e..2c886b31 100644
--- a/lib/capybara/queries/text_query.rb
+++ b/lib/capybara/queries/text_query.rb
@@ -10,11 +10,8 @@ module Capybara
         else
           type
         end
-        @expected_text = if expected_text.is_a?(Regexp)
-          expected_text
-        else
-          Capybara::Helpers.normalize_whitespace(expected_text)
-        end
+
+        @expected_text = expected_text.is_a?(Regexp) ? expected_text : expected_text.to_s
         @options = options
         super(@options)
         self.session_options = session_options
@@ -94,7 +91,7 @@ module Capybara
       end
 
       def text(node, query_type)
-        Capybara::Helpers.normalize_whitespace(node.text(query_type))
+        node.text(query_type)
       end
     end
   end
diff --git a/lib/capybara/queries/title_query.rb b/lib/capybara/queries/title_query.rb
index 6013c020..325f8d79 100644
--- a/lib/capybara/queries/title_query.rb
+++ b/lib/capybara/queries/title_query.rb
@@ -5,10 +5,10 @@ module Capybara
   module Queries
     class TitleQuery < BaseQuery
       def initialize(expected_title, **options)
-        @expected_title = expected_title.is_a?(Regexp) ? expected_title : Capybara::Helpers.normalize_whitespace(expected_title)
+        @expected_title = expected_title.is_a?(Regexp) ? expected_title : expected_title.to_s
         @options = options
         super(@options)
-        @search_regexp = Capybara::Helpers.to_regexp(@expected_title, exact: options.fetch(:exact, false))
+        @search_regexp = Capybara::Helpers.to_regexp(@expected_title, all_whitespace: true, exact: options.fetch(:exact, false))
         assert_valid_keys
       end
 
diff --git a/lib/capybara/rack_test/node.rb b/lib/capybara/rack_test/node.rb
index a1d4d835..a9a082a2 100644
--- a/lib/capybara/rack_test/node.rb
+++ b/lib/capybara/rack_test/node.rb
@@ -1,12 +1,22 @@
 # frozen_string_literal: true
 
 class Capybara::RackTest::Node < Capybara::Driver::Node
+  BLOCK_ELEMENTS = %w[p h1 h2 h3 h4 h5 h6 ol ul pre address blockquote dl div fieldset form hr noscript table].freeze
+
   def all_text
-    Capybara::Helpers.normalize_whitespace(native.text)
+    native.text
+          .gsub(/[\u200b\u200e\u200f]/, '')
+          .gsub(/[\ \n\f\t\v\u2028\u2029]+/, ' ')
+          .strip
+          .tr("\u00a0", ' ')
   end
 
   def visible_text
-    Capybara::Helpers.normalize_whitespace(unnormalized_text)
+    displayed_text.gsub(/\ +/, ' ')
+                  .gsub(/\ *\n+\ */, "\n")
+                  .gsub(/\n+/, "\n")
+                  .strip
+                  .tr("\u00a0", ' ')
   end
 
   def [](name)
@@ -103,15 +113,20 @@ class Capybara::RackTest::Node < Capybara::Driver::Node
 
 protected
 
-  def unnormalized_text(check_ancestor_visibility = true)
-    if !string_node.visible?(check_ancestor_visibility)
+  # @api private
+  def displayed_text(check_ancestor: true)
+    if !string_node.visible?(check_ancestor)
       ''
     elsif native.text?
       native.text
+            .gsub(/[\u200b\u200e\u200f]/, '')
+            .gsub(/[\ \n\f\t\v\u2028\u2029]+/, ' ')
     elsif native.element?
-      native.children.map do |child|
-        Capybara::RackTest::Node.new(driver, child).unnormalized_text(false)
-      end.join
+      text = native.children.map do |child|
+        Capybara::RackTest::Node.new(driver, child).displayed_text(check_ancestor: false)
+      end.join || ''
+      text = "\n#{text}\n" if BLOCK_ELEMENTS.include?(tag_name)
+      text
     else
       ''
     end
diff --git a/lib/capybara/rspec/matchers.rb b/lib/capybara/rspec/matchers.rb
index a62d4c8b..f40ad530 100644
--- a/lib/capybara/rspec/matchers.rb
+++ b/lib/capybara/rspec/matchers.rb
@@ -154,7 +154,6 @@ module Capybara
       end
 
       def format(content)
-        content = Capybara::Helpers.normalize_whitespace(content) unless content.is_a? Regexp
         content.inspect
       end
     end
diff --git a/lib/capybara/selenium/node.rb b/lib/capybara/selenium/node.rb
index 18e51ac8..0acbc6e4 100644
--- a/lib/capybara/selenium/node.rb
+++ b/lib/capybara/selenium/node.rb
@@ -15,13 +15,15 @@ class Capybara::Selenium::Node < Capybara::Driver::Node
   )
 
   def visible_text
-    # Selenium doesn't normalize Unicode whitespace.
-    Capybara::Helpers.normalize_whitespace(native.text)
+    native.text
   end
 
   def all_text
     text = driver.execute_script("return arguments[0].textContent", self)
-    Capybara::Helpers.normalize_whitespace(text)
+    text.gsub(/[\u200b\u200e\u200f]/, '')
+        .gsub(/[\ \n\f\t\v\u2028\u2029]+/, ' ')
+        .strip
+        .tr("\u00a0", ' ')
   end
 
   def [](name)
diff --git a/lib/capybara/spec/session/ancestor_spec.rb b/lib/capybara/spec/session/ancestor_spec.rb
index 8f947114..fdcad1b5 100644
--- a/lib/capybara/spec/session/ancestor_spec.rb
+++ b/lib/capybara/spec/session/ancestor_spec.rb
@@ -17,7 +17,7 @@ Capybara::SpecHelper.spec '#ancestor' do
 
   it "should find the ancestor element using the given locator and options" do
     el = @session.find(:css, '#child')
-    expect(el.ancestor('//div', text: 'Ancestor Ancestor Ancestor')[:id]).to eq('ancestor3')
+    expect(el.ancestor('//div', text: "Ancestor\nAncestor\nAncestor")[:id]).to eq('ancestor3')
   end
 
   it "should raise an error if there are multiple matches" do
@@ -53,8 +53,8 @@ Capybara::SpecHelper.spec '#ancestor' do
         xpath { |num| ".//*[@id='ancestor#{num}']" }
       end
       el = @session.find(:css, '#child')
-      expect(el.ancestor(:level, 1).text).to eq('Ancestor Child')
-      expect(el.ancestor(:level, 3).text).to eq('Ancestor Ancestor Ancestor Child')
+      expect(el.ancestor(:level, 1)[:id]).to eq "ancestor1"
+      expect(el.ancestor(:level, 3)[:id]).to eq "ancestor3"
     end
   end
 
@@ -62,7 +62,7 @@ Capybara::SpecHelper.spec '#ancestor' do
     el = @session.find(:css, '#child')
     expect do
       el.ancestor(:xpath, '//div[@id="nosuchthing"]')
-    end.to raise_error(Capybara::ElementNotFound, "Unable to find xpath \"//div[@id=\\\"nosuchthing\\\"]\" that is an ancestor of visible css \"#child\"")
+    end.to raise_error(Capybara::ElementNotFound, "Unable to find visible xpath \"//div[@id=\\\"nosuchthing\\\"]\" that is an ancestor of visible css \"#child\"")
   end
 
   context "within a scope" do
diff --git a/lib/capybara/spec/session/assert_text.rb b/lib/capybara/spec/session/assert_text.rb
index d2614825..1bbab9bd 100644
--- a/lib/capybara/spec/session/assert_text.rb
+++ b/lib/capybara/spec/session/assert_text.rb
@@ -7,8 +7,7 @@ Capybara::SpecHelper.spec '#assert_text' do
     expect(@session.assert_text('Lorem')).to eq(true)
     expect(@session.assert_text('Redirect')).to eq(true)
     expect(@session.assert_text(:Redirect)).to eq(true)
-    expect(@session.assert_text('text with whitespace')).to eq(true)
-    expect(@session.assert_text("text     with \n\n whitespace")).to eq(true)
+    expect(@session.assert_text('text with   whitespace')).to eq(true)
   end
 
   it "should take scopes into account" do
@@ -49,7 +48,7 @@ Capybara::SpecHelper.spec '#assert_text' do
   it "should raise error with a helpful message if the requested text is present but with incorrect case" do
     @session.visit('/with_html')
     expect do
-      @session.assert_text('Text With Whitespace')
+      @session.assert_text('Text With   Whitespace')
     end.to raise_error(Capybara::ExpectationNotMet, /it was found 1 time using a case insensitive search/)
   end
 
@@ -77,7 +76,7 @@ Capybara::SpecHelper.spec '#assert_text' do
     @session.visit('/with_html')
     expect do
       @session.assert_text(/xxxxyzzz/)
-    end.to raise_error(Capybara::ExpectationNotMet, /\Aexpected to find text matching \/xxxxyzzz\/ in "This is a test Header Class(.+)"\Z/)
+    end.to raise_error(Capybara::ExpectationNotMet, /\Aexpected to find text matching \/xxxxyzzz\/ in "This is a test\\nHeader Class(.+)"\Z/)
   end
 
   it "should escape any characters that would have special meaning in a regexp" do
@@ -112,7 +111,7 @@ Capybara::SpecHelper.spec '#assert_text' do
       Capybara.using_wait_time(0) do
         @session.visit('/with_js')
         @session.find(:css, '#reload-list').click
-        @session.find(:css, '#the-list').assert_text('Foo Bar', wait: 0.9)
+        @session.find(:css, '#the-list').assert_text("Foo\nBar", wait: 0.9)
       end
     end
 
@@ -174,7 +173,7 @@ Capybara::SpecHelper.spec '#assert_no_text' do
     @session.visit('/with_html')
     expect do
       @session.assert_no_text('Lorem')
-    end.to raise_error(Capybara::ExpectationNotMet, /\Aexpected not to find text "Lorem" in "This is a test Header Class.+"\Z/)
+    end.to raise_error(Capybara::ExpectationNotMet, /\Aexpected not to find text "Lorem" in "This is a test.*"\z/)
   end
 
   it "should be true if scoped to an element which does not have the text" do
diff --git a/lib/capybara/spec/session/assert_title.rb b/lib/capybara/spec/session/assert_title.rb
index f184c821..4cfab505 100644
--- a/lib/capybara/spec/session/assert_title.rb
+++ b/lib/capybara/spec/session/assert_title.rb
@@ -40,11 +40,20 @@ Capybara::SpecHelper.spec '#assert_title' do
     end.to raise_error(Capybara::ExpectationNotMet, 'expected "with_js" to include "monkey"')
   end
 
-  it "should normalize given title" do
-    @session.assert_title('  with_js  ')
+  it "should not normalize given title" do
+    @session.visit('/with_js')
+    expect { @session.assert_title('  with_js  ') }.to raise_error(Capybara::ExpectationNotMet)
   end
 
-  it "should normalize given title in error message" do
+  it "should match correctly normalized title" do
+    uri = Addressable::URI.parse('/with_title')
+    uri.query_values = { title: ' &nbsp; with space &nbsp;title   ' }
+    @session.visit(uri.to_s)
+    @session.assert_title('  with space  title')
+    expect { @session.assert_title('with space title') }.to raise_error(Capybara::ExpectationNotMet)
+  end
+
+  it "should not normalize given title in error message" do
     expect do
       @session.assert_title(2)
     end.to raise_error(Capybara::ExpectationNotMet, 'expected "with_js" to include "2"')
diff --git a/lib/capybara/spec/session/has_text_spec.rb b/lib/capybara/spec/session/has_text_spec.rb
index 5be2bfb3..04192cde 100644
--- a/lib/capybara/spec/session/has_text_spec.rb
+++ b/lib/capybara/spec/session/has_text_spec.rb
@@ -29,14 +29,9 @@ Capybara::SpecHelper.spec '#has_text?' do
     expect(@session).to have_text('exercitation ullamco laboris')
   end
 
-  it "should ignore extra whitespace and newlines" do
+  it "should search correctly normalized text" do
     @session.visit('/with_html')
-    expect(@session).to have_text('text with whitespace')
-  end
-
-  it "should ignore whitespace and newlines in the search string" do
-    @session.visit('/with_html')
-    expect(@session).to have_text("text     with \n\n whitespace")
+    expect(@session).to have_text('text with   whitespace')
   end
 
   it "should be false if the given text is not on the page" do
diff --git a/lib/capybara/spec/session/node_spec.rb b/lib/capybara/spec/session/node_spec.rb
index 8117123b..77bab014 100644
--- a/lib/capybara/spec/session/node_spec.rb
+++ b/lib/capybara/spec/session/node_spec.rb
@@ -119,7 +119,7 @@ Capybara::SpecHelper.spec "node" do
         @session.visit('/with_js')
         @session.find(:css, '#existing_content_editable_child').set('WYSIWYG')
         expect(@session.find(:css, '#existing_content_editable_child').text).to eq('WYSIWYG')
-        expect(@session.find(:css, '#existing_content_editable_child_parent').text).to eq('Some content WYSIWYG')
+        expect(@session.find(:css, '#existing_content_editable_child_parent').text).to eq("Some content\nWYSIWYG")
       end
     end
   end
diff --git a/lib/capybara/spec/session/text_spec.rb b/lib/capybara/spec/session/text_spec.rb
index be500ec0..ab34004b 100644
--- a/lib/capybara/spec/session/text_spec.rb
+++ b/lib/capybara/spec/session/text_spec.rb
@@ -51,9 +51,15 @@ Capybara::SpecHelper.spec '#text' do
     after { Capybara.default_selector = :xpath }
   end
 
-  it "should strip whitespace" do
+  it "should be correctly normalized when visible" do
     @session.visit('/with_html')
-    @session.find(:css, '#second')
-    expect(@session.find(:css, '#second').text).to match(/\ADuis aute .* text with whitespace .* est laborum\.\z/)
+    el = @session.find(:css, '#normalized')
+    expect(el.text).to eq "Some text\nMore text\nAnd more text\nEven more    text on multiple lines"
+  end
+
+  it "should be a textContent with irrelevant whitespace collapsed when non-visible" do
+    @session.visit('/with_html')
+    el = @session.find(:css, '#non_visible_normalized', visible: false)
+    expect(el.text(:all)).to eq "Some textMore text And more text Even more    text on multiple lines"
   end
 end
diff --git a/lib/capybara/spec/test_app.rb b/lib/capybara/spec/test_app.rb
index 48342154..60202909 100644
--- a/lib/capybara/spec/test_app.rb
+++ b/lib/capybara/spec/test_app.rb
@@ -144,6 +144,15 @@ class TestApp < Sinatra::Base
     erb :with_html, locals: { referrer: request.referrer }
   end
 
+  get '/with_title' do
+    <<-HTML
+      <title>#{params[:title] || 'Test Title'}</title>
+      <body>
+        <svg><title>abcdefg</title></svg>
+      </body>
+    HTML
+  end
+
   get '/:view' do |view|
     erb view.to_sym, locals: { referrer: request.referrer }
   end
diff --git a/lib/capybara/spec/views/with_html.erb b/lib/capybara/spec/views/with_html.erb
index 91c51998..a4e98602 100644
--- a/lib/capybara/spec/views/with_html.erb
+++ b/lib/capybara/spec/views/with_html.erb
@@ -153,3 +153,20 @@ banana</textarea>
 </div>
 
 <div id='1escape.me' class="2escape">needs escaping</div>
+
+<div id="normalized">
+  Some text<div>More   text</div>
+  <div> And more text</div>
+  Even more &nbsp;&nbsp; text
+
+  on multiple lines
+</div>
+
+<div id="non_visible_normalized" style="display: none">
+  Some text<div>More   text</div>
+  <div> And more text</div>
+  Even more &nbsp;&nbsp; text
+
+  on multiple lines
+</div>
+

From 129759bf967bfcd699fa31645b56fd75c63cc99f Mon Sep 17 00:00:00 2001
From: Thomas Walpole <twalpole@gmail.com>
Date: Fri, 9 Mar 2018 10:02:15 -0800
Subject: [PATCH 2/3] fix unicode whiespace stripping

---
 lib/capybara/rack_test/node.rb         | 9 +++++----
 lib/capybara/selenium/node.rb          | 3 ++-
 lib/capybara/spec/session/text_spec.rb | 7 +++++++
 lib/capybara/spec/views/with_html.erb  | 3 +++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/lib/capybara/rack_test/node.rb b/lib/capybara/rack_test/node.rb
index a9a082a2..42e93512 100644
--- a/lib/capybara/rack_test/node.rb
+++ b/lib/capybara/rack_test/node.rb
@@ -7,15 +7,16 @@ class Capybara::RackTest::Node < Capybara::Driver::Node
     native.text
           .gsub(/[\u200b\u200e\u200f]/, '')
           .gsub(/[\ \n\f\t\v\u2028\u2029]+/, ' ')
-          .strip
+          .gsub(/\A[[:space:]&&[^\u00a0]]+/, "")
+          .gsub(/[[:space:]&&[^\u00a0]]+\z/, "")
           .tr("\u00a0", ' ')
   end
 
   def visible_text
     displayed_text.gsub(/\ +/, ' ')
-                  .gsub(/\ *\n+\ */, "\n")
-                  .gsub(/\n+/, "\n")
-                  .strip
+                  .gsub(/[\ \n]*\n[\ \n]*/, "\n")
+                  .gsub(/\A[[:space:]&&[^\u00a0]]+/, "")
+                  .gsub(/[[:space:]&&[^\u00a0]]+\z/, "")
                   .tr("\u00a0", ' ')
   end
 
diff --git a/lib/capybara/selenium/node.rb b/lib/capybara/selenium/node.rb
index 0acbc6e4..a3b54038 100644
--- a/lib/capybara/selenium/node.rb
+++ b/lib/capybara/selenium/node.rb
@@ -22,7 +22,8 @@ class Capybara::Selenium::Node < Capybara::Driver::Node
     text = driver.execute_script("return arguments[0].textContent", self)
     text.gsub(/[\u200b\u200e\u200f]/, '')
         .gsub(/[\ \n\f\t\v\u2028\u2029]+/, ' ')
-        .strip
+        .gsub(/\A[[:space:]&&[^\u00a0]]+/, "")
+        .gsub(/[[:space:]&&[^\u00a0]]+\z/, "")
         .tr("\u00a0", ' ')
   end
 
diff --git a/lib/capybara/spec/session/text_spec.rb b/lib/capybara/spec/session/text_spec.rb
index ab34004b..16a1c8be 100644
--- a/lib/capybara/spec/session/text_spec.rb
+++ b/lib/capybara/spec/session/text_spec.rb
@@ -62,4 +62,11 @@ Capybara::SpecHelper.spec '#text' do
     el = @session.find(:css, '#non_visible_normalized', visible: false)
     expect(el.text(:all)).to eq "Some textMore text And more text Even more    text on multiple lines"
   end
+
+  it "should strip correctly" do
+    @session.visit('/with_html')
+    el = @session.find(:css, '#ws')
+    expect(el.text).to eq " "
+    expect(el.text(:all)).to eq " "
+  end
 end
diff --git a/lib/capybara/spec/views/with_html.erb b/lib/capybara/spec/views/with_html.erb
index a4e98602..b48a7d38 100644
--- a/lib/capybara/spec/views/with_html.erb
+++ b/lib/capybara/spec/views/with_html.erb
@@ -170,3 +170,6 @@ banana</textarea>
   on multiple lines
 </div>
 
+<div id="ws">
+&#x20;&#x1680;&#x2000;&#x2001;&#x2002; &#x2003;&#x2004;&nbsp;&#x2005; &#x2006;&#x2007;&#x2008;&#x2009;&#x200A;&#x202F;&#x205F;&#x3000;
+</div>

From b0e82b092531b2fb7def85e4e6350a2493097fcd Mon Sep 17 00:00:00 2001
From: Thomas Walpole <twalpole@gmail.com>
Date: Mon, 12 Mar 2018 10:23:32 -0700
Subject: [PATCH 3/3] Deprecate Capybara::Helpers::normalize_whitespace

---
 lib/capybara/helpers.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/capybara/helpers.rb b/lib/capybara/helpers.rb
index 91c54ad4..ae2f5953 100644
--- a/lib/capybara/helpers.rb
+++ b/lib/capybara/helpers.rb
@@ -6,16 +6,16 @@ module Capybara
     extend self
 
     ##
-    #
+    # @deprecated
     # Normalizes whitespace space by stripping leading and trailing
     # whitespace and replacing sequences of whitespace characters
     # with a single space.
-    # TODO: Deprecate this see: https://w3c.github.io/webdriver/webdriver-spec.html#dfn-bot-dom-getvisibletext
     #
     # @param [String] text     Text to normalize
     # @return [String]         Normalized text
     #
     def normalize_whitespace(text)
+      warn "DEPRECATED: Capybara::Helpers::normalize_whitespace is deprecated, please update your driver"
       text.to_s.gsub(/[[:space:]]+/, ' ').strip
     end