Merge branch '41719-mr-title-fix' into 'master'
Render htmlentities correctly for links not supported by Rinku Closes #41719 See merge request gitlab-org/gitlab-ce!17180
This commit is contained in:
commit
089b2fb9fc
6 changed files with 176 additions and 92 deletions
5
changelogs/unreleased/41719-mr-title-fix.yml
Normal file
5
changelogs/unreleased/41719-mr-title-fix.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Render htmlentities correctly for links not supported by Rinku
|
||||
merge_request:
|
||||
author:
|
||||
type: fixed
|
|
@ -25,8 +25,8 @@ module Banzai
|
|||
# period or comma for punctuation without those characters being included
|
||||
# in the generated link.
|
||||
#
|
||||
# Rubular: http://rubular.com/r/cxjPyZc7Sb
|
||||
LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
|
||||
# Rubular: http://rubular.com/r/JzPhi6DCZp
|
||||
LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
|
||||
|
||||
# Text matching LINK_PATTERN inside these elements will not be linked
|
||||
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
|
||||
|
@ -35,53 +35,19 @@ module Banzai
|
|||
TEXT_QUERY = %Q(descendant-or-self::text()[
|
||||
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
|
||||
and contains(., '://')
|
||||
and not(starts-with(., 'http'))
|
||||
and not(starts-with(., 'ftp'))
|
||||
]).freeze
|
||||
|
||||
PUNCTUATION_PAIRS = {
|
||||
"'" => "'",
|
||||
'"' => '"',
|
||||
')' => '(',
|
||||
']' => '[',
|
||||
'}' => '{'
|
||||
}.freeze
|
||||
|
||||
def call
|
||||
return doc if context[:autolink] == false
|
||||
|
||||
rinku_parse
|
||||
text_parse
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Run the text through Rinku as a first pass
|
||||
#
|
||||
# This will quickly autolink http(s) and ftp links.
|
||||
#
|
||||
# `@doc` will be re-parsed with the HTML String from Rinku.
|
||||
def rinku_parse
|
||||
# Convert the options from a Hash to a String that Rinku expects
|
||||
options = tag_options(link_options)
|
||||
|
||||
# NOTE: We don't parse email links because it will erroneously match
|
||||
# external Commit and CommitRange references.
|
||||
#
|
||||
# The final argument tells Rinku to link short URLs that don't include a
|
||||
# period (e.g., http://localhost:3000/)
|
||||
rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
|
||||
|
||||
return if rinku == html
|
||||
|
||||
# Rinku returns a String, so parse it back to a Nokogiri::XML::Document
|
||||
# for further processing.
|
||||
@doc = parse_html(rinku)
|
||||
end
|
||||
|
||||
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
|
||||
def contains_unsafe?(scheme)
|
||||
return false unless scheme
|
||||
|
||||
scheme = scheme.strip.downcase
|
||||
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
|
||||
end
|
||||
|
||||
# Autolinks any text matching LINK_PATTERN that Rinku didn't already
|
||||
# replace
|
||||
def text_parse
|
||||
doc.xpath(TEXT_QUERY).each do |node|
|
||||
content = node.to_html
|
||||
|
||||
|
@ -97,6 +63,16 @@ module Banzai
|
|||
doc
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
|
||||
def contains_unsafe?(scheme)
|
||||
return false unless scheme
|
||||
|
||||
scheme = scheme.strip.downcase
|
||||
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
|
||||
end
|
||||
|
||||
def autolink_match(match)
|
||||
# start by stripping out dangerous links
|
||||
begin
|
||||
|
@ -112,12 +88,30 @@ module Banzai
|
|||
match.gsub!(/((?:&[\w#]+;)+)\z/, '')
|
||||
dropped = ($1 || '').html_safe
|
||||
|
||||
# To match the behaviour of Rinku, if the matched link ends with a
|
||||
# closing part of a matched pair of punctuation, we remove that trailing
|
||||
# character unless there are an equal number of closing and opening
|
||||
# characters in the link.
|
||||
if match.end_with?(*PUNCTUATION_PAIRS.keys)
|
||||
close_character = match[-1]
|
||||
close_count = match.count(close_character)
|
||||
open_character = PUNCTUATION_PAIRS[close_character]
|
||||
open_count = match.count(open_character)
|
||||
|
||||
if open_count != close_count || open_character == close_character
|
||||
dropped += close_character
|
||||
match = match[0..-2]
|
||||
end
|
||||
end
|
||||
|
||||
options = link_options.merge(href: match)
|
||||
content_tag(:a, match, options) + dropped
|
||||
content_tag(:a, match.html_safe, options) + dropped
|
||||
end
|
||||
|
||||
def autolink_filter(text)
|
||||
text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
|
||||
Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
|
||||
autolink_match(link)
|
||||
end
|
||||
end
|
||||
|
||||
def link_options
|
||||
|
|
|
@ -14,7 +14,7 @@ module Gitlab
|
|||
end
|
||||
|
||||
def mark(marker_ranges)
|
||||
return rich_line unless marker_ranges
|
||||
return rich_line unless marker_ranges&.any?
|
||||
|
||||
if html_escaped
|
||||
rich_marker_ranges = []
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
module Gitlab
|
||||
class StringRegexMarker < StringRangeMarker
|
||||
def mark(regex, group: 0, &block)
|
||||
regex_match = raw_line.match(regex)
|
||||
return rich_line unless regex_match
|
||||
ranges = []
|
||||
|
||||
begin_index, end_index = regex_match.offset(group)
|
||||
name_range = begin_index..(end_index - 1)
|
||||
raw_line.scan(regex) do
|
||||
begin_index, end_index = Regexp.last_match.offset(group)
|
||||
|
||||
super([name_range], &block)
|
||||
ranges << (begin_index..(end_index - 1))
|
||||
end
|
||||
|
||||
super(ranges, &block)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,6 +4,7 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
include FilterSpecHelper
|
||||
|
||||
let(:link) { 'http://about.gitlab.com/' }
|
||||
let(:quotes) { ['"', "'"] }
|
||||
|
||||
it 'does nothing when :autolink is false' do
|
||||
exp = act = link
|
||||
|
@ -15,17 +16,7 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(filter(act).to_html).to eq exp
|
||||
end
|
||||
|
||||
context 'when the input contains no links' do
|
||||
it 'does not parse_html back the rinku returned value' do
|
||||
act = HTML::Pipeline.parse('<p>This text contains no links to autolink</p>')
|
||||
|
||||
expect_any_instance_of(described_class).not_to receive(:parse_html)
|
||||
|
||||
filter(act).to_html
|
||||
end
|
||||
end
|
||||
|
||||
context 'Rinku schemes' do
|
||||
context 'Various schemes' do
|
||||
it 'autolinks http' do
|
||||
doc = filter("See #{link}")
|
||||
expect(doc.at_css('a').text).to eq link
|
||||
|
@ -56,33 +47,27 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(doc.at_css('a')['href']).to eq link
|
||||
end
|
||||
|
||||
it 'autolinks multiple URLs' do
|
||||
link1 = 'http://localhost:3000/'
|
||||
link2 = 'http://google.com/'
|
||||
|
||||
doc = filter("See #{link1} and #{link2}")
|
||||
|
||||
found_links = doc.css('a')
|
||||
|
||||
expect(found_links.size).to eq(2)
|
||||
expect(found_links[0].text).to eq(link1)
|
||||
expect(found_links[0]['href']).to eq(link1)
|
||||
expect(found_links[1].text).to eq(link2)
|
||||
expect(found_links[1]['href']).to eq(link2)
|
||||
end
|
||||
|
||||
it 'accepts link_attr options' do
|
||||
doc = filter("See #{link}", link_attr: { class: 'custom' })
|
||||
|
||||
expect(doc.at_css('a')['class']).to eq 'custom'
|
||||
end
|
||||
|
||||
described_class::IGNORE_PARENTS.each do |elem|
|
||||
it "ignores valid links contained inside '#{elem}' element" do
|
||||
exp = act = "<#{elem}>See #{link}</#{elem}>"
|
||||
expect(filter(act).to_html).to eq exp
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the input contains link' do
|
||||
it 'does parse_html back the rinku returned value' do
|
||||
act = HTML::Pipeline.parse("<p>See #{link}</p>")
|
||||
|
||||
expect_any_instance_of(described_class).to receive(:parse_html).at_least(:once).and_call_original
|
||||
|
||||
filter(act).to_html
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'other schemes' do
|
||||
let(:link) { 'foo://bar.baz/' }
|
||||
|
||||
it 'autolinks smb' do
|
||||
link = 'smb:///Volumes/shared/foo.pdf'
|
||||
doc = filter("See #{link}")
|
||||
|
@ -91,6 +76,21 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(doc.at_css('a')['href']).to eq link
|
||||
end
|
||||
|
||||
it 'autolinks multiple occurences of smb' do
|
||||
link1 = 'smb:///Volumes/shared/foo.pdf'
|
||||
link2 = 'smb:///Volumes/shared/bar.pdf'
|
||||
|
||||
doc = filter("See #{link1} and #{link2}")
|
||||
|
||||
found_links = doc.css('a')
|
||||
|
||||
expect(found_links.size).to eq(2)
|
||||
expect(found_links[0].text).to eq(link1)
|
||||
expect(found_links[0]['href']).to eq(link1)
|
||||
expect(found_links[1].text).to eq(link2)
|
||||
expect(found_links[1]['href']).to eq(link2)
|
||||
end
|
||||
|
||||
it 'autolinks irc' do
|
||||
link = 'irc://irc.freenode.net/git'
|
||||
doc = filter("See #{link}")
|
||||
|
@ -132,6 +132,45 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(doc.at_css('a').text).to eq link
|
||||
end
|
||||
|
||||
it 'includes trailing punctuation when part of a balanced pair' do
|
||||
described_class::PUNCTUATION_PAIRS.each do |close, open|
|
||||
next if open.in?(quotes)
|
||||
|
||||
balanced_link = "#{link}#{open}abc#{close}"
|
||||
balanced_actual = filter("See #{balanced_link}...")
|
||||
unbalanced_link = "#{link}#{close}"
|
||||
unbalanced_actual = filter("See #{unbalanced_link}...")
|
||||
|
||||
expect(balanced_actual.at_css('a').text).to eq(balanced_link)
|
||||
expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
|
||||
expect(unbalanced_actual.at_css('a').text).to eq(link)
|
||||
expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes trailing quotes' do
|
||||
quotes.each do |quote|
|
||||
balanced_link = "#{link}#{quote}abc#{quote}"
|
||||
balanced_actual = filter("See #{balanced_link}...")
|
||||
unbalanced_link = "#{link}#{quote}"
|
||||
unbalanced_actual = filter("See #{unbalanced_link}...")
|
||||
|
||||
expect(balanced_actual.at_css('a').text).to eq(balanced_link[0...-1])
|
||||
expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
|
||||
expect(unbalanced_actual.at_css('a').text).to eq(link)
|
||||
expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes one closing punctuation mark when the punctuation in the link is unbalanced' do
|
||||
complicated_link = "(#{link}(a'b[c'd]))'"
|
||||
expected_complicated_link = %Q{(<a href="#{link}(a'b[c'd]))">#{link}(a'b[c'd]))</a>'}
|
||||
actual = unescape(filter(complicated_link).to_html)
|
||||
|
||||
expect(actual).to eq(Rinku.auto_link(complicated_link))
|
||||
expect(actual).to eq(expected_complicated_link)
|
||||
end
|
||||
|
||||
it 'does not include trailing HTML entities' do
|
||||
doc = filter("See <<<#{link}>>>")
|
||||
|
||||
|
@ -151,4 +190,29 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the link is inside a tag' do
|
||||
%w[http rdar].each do |protocol|
|
||||
it "renders text after the link correctly for #{protocol}" do
|
||||
doc = filter(ERB::Util.html_escape_once("<#{protocol}://link><another>"))
|
||||
|
||||
expect(doc.children.last.text).to include('<another>')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Rinku does not escape these characters in HTML attributes, but content_tag
|
||||
# does. We don't care about that difference for these specs, though.
|
||||
def unescape(html)
|
||||
%w([ ] { }).each do |cgi_escape|
|
||||
html.sub!(CGI.escape(cgi_escape), cgi_escape)
|
||||
end
|
||||
|
||||
quotes.each do |html_escape|
|
||||
html.sub!(CGI.escape_html(html_escape), html_escape)
|
||||
html.sub!(CGI.escape(html_escape), CGI.escape_html(html_escape))
|
||||
end
|
||||
|
||||
html
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,17 +2,36 @@ require 'spec_helper'
|
|||
|
||||
describe Gitlab::StringRegexMarker do
|
||||
describe '#mark' do
|
||||
let(:raw) { %{"name": "AFNetworking"} }
|
||||
let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe }
|
||||
subject do
|
||||
described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:|
|
||||
%{<a href="#">#{text}</a>}
|
||||
context 'with a single occurrence' do
|
||||
let(:raw) { %{"name": "AFNetworking"} }
|
||||
let(:rich) { %{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"AFNetworking"</span>}.html_safe }
|
||||
|
||||
subject do
|
||||
described_class.new(raw, rich).mark(/"[^"]+":\s*"(?<name>[^"]+)"/, group: :name) do |text, left:, right:|
|
||||
%{<a href="#">#{text}</a>}
|
||||
end
|
||||
end
|
||||
|
||||
it 'marks the match' do
|
||||
expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>})
|
||||
expect(subject).to be_html_safe
|
||||
end
|
||||
end
|
||||
|
||||
it 'marks the inline diffs' do
|
||||
expect(subject).to eq(%{<span class="key">"name"</span><span class="punctuation">: </span><span class="value">"<a href="#">AFNetworking</a>"</span>})
|
||||
expect(subject).to be_html_safe
|
||||
context 'with multiple occurrences' do
|
||||
let(:raw) { %{a <b> <c> d} }
|
||||
let(:rich) { %{a <b> <c> d}.html_safe }
|
||||
|
||||
subject do
|
||||
described_class.new(raw, rich).mark(/<[a-z]>/) do |text, left:, right:|
|
||||
%{<strong>#{text}</strong>}
|
||||
end
|
||||
end
|
||||
|
||||
it 'marks the matches' do
|
||||
expect(subject).to eq(%{a <strong><b></strong> <strong><c></strong> d})
|
||||
expect(subject).to be_html_safe
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue