diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb index 41fd4be76ac..16b703a3323 100644 --- a/lib/banzai/filter/abstract_reference_filter.rb +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -110,30 +110,45 @@ module Banzai end def call - if object_class.reference_pattern - # `#123` - replace_text_nodes_matching(object_class.reference_pattern) do |content| - object_link_filter(content, object_class.reference_pattern) - end + return doc if project.nil? - # `[Issue](#123)`, which is turned into - # `Issue` - replace_link_nodes_with_href(object_class.reference_pattern) do |link, text| - object_link_filter(link, object_class.reference_pattern, link_text: text) - end - end + ref_pattern = object_class.reference_pattern + link_pattern = object_class.link_reference_pattern - if object_class.link_reference_pattern - # `http://gitlab.example.com/namespace/project/issues/123`, which is turned into - # `http://gitlab.example.com/namespace/project/issues/123` - replace_link_nodes_with_text(object_class.link_reference_pattern) do |text| - object_link_filter(text, object_class.link_reference_pattern) - end + each_node do |node| + if text_node?(node) && ref_pattern + replace_text_when_pattern_matches(node, ref_pattern) do |content| + object_link_filter(content, ref_pattern) + end - # `[Issue](http://gitlab.example.com/namespace/project/issues/123)`, which is turned into - # `Issue` - replace_link_nodes_with_href(object_class.link_reference_pattern) do |link, text| - object_link_filter(link, object_class.link_reference_pattern, link_text: text) + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if ref_pattern && link =~ /\A#{ref_pattern}/ + replace_link_node_with_href(node, link) do + object_link_filter(link, ref_pattern, link_text: text) + end + + next + end + + next unless link_pattern + + if link == text && text =~ /\A#{link_pattern}/ + replace_link_node_with_text(node, link) do + object_link_filter(text, link_pattern) + end + + next + end + + if link =~ /\A#{link_pattern}\z/ + replace_link_node_with_href(node, link) do + object_link_filter(link, link_pattern, link_text: text) + end + + next + end + end end end diff --git a/lib/banzai/filter/external_issue_reference_filter.rb b/lib/banzai/filter/external_issue_reference_filter.rb index edc26386903..a3b66c4645a 100644 --- a/lib/banzai/filter/external_issue_reference_filter.rb +++ b/lib/banzai/filter/external_issue_reference_filter.rb @@ -37,13 +37,27 @@ module Banzai # Early return if the project isn't using an external tracker return doc if project.nil? || project.default_issues_tracker? - replace_text_nodes_matching(ExternalIssue.reference_pattern) do |content| - issue_link_filter(content) + ref_pattern = ExternalIssue.reference_pattern + ref_start_pattern = /\A#{ref_pattern}\z/ + + each_node do |node| + if text_node?(node) + replace_text_when_pattern_matches(node, ref_pattern) do |content| + issue_link_filter(content) + end + + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if link =~ ref_start_pattern + replace_link_node_with_href(node, link) do + issue_link_filter(link, link_text: text) + end + end + end + end end - replace_link_nodes_with_href(ExternalIssue.reference_pattern) do |link, text| - issue_link_filter(link, link_text: text) - end + doc end # Replace `JIRA-123` issue references in text with links to the referenced diff --git a/lib/banzai/filter/reference_filter.rb b/lib/banzai/filter/reference_filter.rb index a3326ae042c..31386cf851c 100644 --- a/lib/banzai/filter/reference_filter.rb +++ b/lib/banzai/filter/reference_filter.rb @@ -52,18 +52,13 @@ module Banzai html.html_safe? ? html : ERB::Util.html_escape_once(html) end - def ignore_parents - @ignore_parents ||= begin - # Don't look for references in text nodes that are children of these - # elements. + def ignore_ancestor_query + @ignore_ancestor_query ||= begin parents = %w(pre code a style) parents << 'blockquote' if context[:ignore_blockquotes] - parents.to_set - end - end - def ignored_ancestry?(node) - has_ancestor?(node, ignore_parents) + parents.map { |n| "ancestor::#{n}" }.join(' or ') + end end def project @@ -74,120 +69,67 @@ module Banzai "gfm gfm-#{type}" end - # Iterate through the document's text nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's content matches `pattern` AND - # * The node is not an ancestor of an ignored node type - # - # pattern - Regex pattern against which to match the node's content - # - # Yields the current node's String contents. The result of the block will - # replace the node's existing content and update the current document. - # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_text_nodes_matching(pattern) - return doc if project.nil? - - search_text_nodes(doc).each do |node| - next if ignored_ancestry?(node) - next unless node.text =~ pattern - - content = node.to_html - - html = yield content - - next if html == content - - node.replace(html) - end - - doc - end - - # Iterate through the document's link nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's content matches `pattern` - # - # pattern - Regex pattern against which to match the node's content - # - # Yields the current node's String contents. The result of the block will - # replace the node and update the current document. - # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_link_nodes_with_text(pattern) - return doc if project.nil? - - doc.xpath('descendant-or-self::a').each do |node| - klass = node.attr('class') - next if klass && klass.include?('gfm') - - link = node.attr('href') - text = node.text - - next unless link && text - - link = CGI.unescape(link) - next unless link.force_encoding('UTF-8').valid_encoding? - # Ignore ending punctionation like periods or commas - next unless link == text && text =~ /\A#{pattern}/ - - html = yield text - - next if html == text - - node.replace(html) - end - - doc - end - - # Iterate through the document's link nodes, yielding the current node's - # content if: - # - # * The `project` context value is present AND - # * The node's HREF matches `pattern` - # - # pattern - Regex pattern against which to match the node's HREF - # - # Yields the current node's String HREF and String content. - # The result of the block will replace the node and update the current document. - # - # Returns the updated Nokogiri::HTML::DocumentFragment object. - def replace_link_nodes_with_href(pattern) - return doc if project.nil? - - doc.xpath('descendant-or-self::a').each do |node| - klass = node.attr('class') - next if klass && klass.include?('gfm') - - link = node.attr('href') - text = node.text - - next unless link && text - link = CGI.unescape(link) - next unless link.force_encoding('UTF-8').valid_encoding? - next unless link && link =~ /\A#{pattern}\z/ - - html = yield link, text - - next if html == link - - node.replace(html) - end - - doc - end - # Ensure that a :project key exists in context # # Note that while the key might exist, its value could be nil! def validate needs :project end + + # Iterates over all and text() nodes in a document. + # + # Nodes are skipped whenever their ancestor is one of the nodes returned + # by `ignore_ancestor_query`. Link tags are not processed if they have a + # "gfm" class or the "href" attribute is empty. + def each_node + query = %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})] + | descendant-or-self::a[ + not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "") + ]} + + doc.xpath(query).each do |node| + yield node + end + end + + # Yields the link's URL and text whenever the node is a valid tag. + def yield_valid_link(node) + link = CGI.unescape(node.attr('href').to_s) + text = node.text + + return unless link.force_encoding('UTF-8').valid_encoding? + + yield link, text + end + + def replace_text_when_pattern_matches(node, pattern) + return unless node.text =~ pattern + + content = node.to_html + html = yield content + + node.replace(html) unless content == html + end + + def replace_link_node_with_text(node, link) + html = yield + + node.replace(html) unless html == node.text + end + + def replace_link_node_with_href(node, link) + html = yield + + node.replace(html) unless html == link + end + + def text_node?(node) + node.is_a?(Nokogiri::XML::Text) + end + + def element_node?(node) + node.is_a?(Nokogiri::XML::Element) + end end end end diff --git a/lib/banzai/filter/user_reference_filter.rb b/lib/banzai/filter/user_reference_filter.rb index 989fa64e078..eea3af842b6 100644 --- a/lib/banzai/filter/user_reference_filter.rb +++ b/lib/banzai/filter/user_reference_filter.rb @@ -59,13 +59,28 @@ module Banzai end def call - replace_text_nodes_matching(User.reference_pattern) do |content| - user_link_filter(content) + return doc if project.nil? + + ref_pattern = User.reference_pattern + ref_pattern_start = /\A#{ref_pattern}\z/ + + each_node do |node| + if text_node?(node) + replace_text_when_pattern_matches(node, ref_pattern) do |content| + user_link_filter(content) + end + elsif element_node?(node) + yield_valid_link(node) do |link, text| + if link =~ ref_pattern_start + replace_link_node_with_href(node, link) do + user_link_filter(link, link_text: text) + end + end + end + end end - replace_link_nodes_with_href(User.reference_pattern) do |link, text| - user_link_filter(link, link_text: text) - end + doc end # Replace `@user` user references in text with links to the referenced