Match Rinku's behaviour for closing punctuation in links
Rinku 2.0.0 (the version we use) will remove the last character of a link if it's a closing part of a punctuation pair (different types of parentheses and quotes), unless both of the below are true: 1. The matching pair has different start and end characters. 2. There are equal numbers of both in the matched string (they don't have to be balanced).
This commit is contained in:
parent
1a09d5cda8
commit
cb55bc3c07
2 changed files with 95 additions and 34 deletions
|
@ -25,7 +25,7 @@ module Banzai
|
|||
# period or comma for punctuation without those characters being included
|
||||
# in the generated link.
|
||||
#
|
||||
# Rubular: http://rubular.com/r/cxjPyZc7Sb
|
||||
# Rubular: http://rubular.com/r/JzPhi6DCZp
|
||||
LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
|
||||
|
||||
# Text matching LINK_PATTERN inside these elements will not be linked
|
||||
|
@ -37,23 +37,17 @@ module Banzai
|
|||
and contains(., '://')
|
||||
]).freeze
|
||||
|
||||
PUNCTUATION_PAIRS = {
|
||||
"'" => "'",
|
||||
'"' => '"',
|
||||
')' => '(',
|
||||
']' => '[',
|
||||
'}' => '{'
|
||||
}.freeze
|
||||
|
||||
def call
|
||||
return doc if context[:autolink] == false
|
||||
|
||||
text_parse
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
|
||||
def contains_unsafe?(scheme)
|
||||
return false unless scheme
|
||||
|
||||
scheme = scheme.strip.downcase
|
||||
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
|
||||
end
|
||||
|
||||
def text_parse
|
||||
doc.xpath(TEXT_QUERY).each do |node|
|
||||
content = node.to_html
|
||||
|
||||
|
@ -69,6 +63,16 @@ module Banzai
|
|||
doc
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
|
||||
def contains_unsafe?(scheme)
|
||||
return false unless scheme
|
||||
|
||||
scheme = scheme.strip.downcase
|
||||
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
|
||||
end
|
||||
|
||||
def autolink_match(match)
|
||||
# start by stripping out dangerous links
|
||||
begin
|
||||
|
@ -84,6 +88,22 @@ module Banzai
|
|||
match.gsub!(/((?:&[\w#]+;)+)\z/, '')
|
||||
dropped = ($1 || '').html_safe
|
||||
|
||||
# To match the behaviour of Rinku, if the matched link ends with a
|
||||
# closing part of a matched pair of punctuation, we remove that trailing
|
||||
# character unless there are an equal number of closing and opening
|
||||
# characters in the link.
|
||||
if match.end_with?(*PUNCTUATION_PAIRS.keys)
|
||||
close_character = match[-1]
|
||||
close_count = match.count(close_character)
|
||||
open_character = PUNCTUATION_PAIRS[close_character]
|
||||
open_count = match.count(open_character)
|
||||
|
||||
if open_count != close_count || open_character == close_character
|
||||
dropped += close_character
|
||||
match = match[0..-2]
|
||||
end
|
||||
end
|
||||
|
||||
options = link_options.merge(href: match)
|
||||
content_tag(:a, match.html_safe, options) + dropped
|
||||
end
|
||||
|
|
|
@ -4,6 +4,7 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
include FilterSpecHelper
|
||||
|
||||
let(:link) { 'http://about.gitlab.com/' }
|
||||
let(:quotes) { ['"', "'"] }
|
||||
|
||||
it 'does nothing when :autolink is false' do
|
||||
exp = act = link
|
||||
|
@ -15,16 +16,6 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(filter(act).to_html).to eq exp
|
||||
end
|
||||
|
||||
context 'when the input contains no links' do
|
||||
it 'does not parse_html back the rinku returned value' do
|
||||
act = HTML::Pipeline.parse('<p>This text contains no links to autolink</p>')
|
||||
|
||||
expect_any_instance_of(described_class).not_to receive(:parse_html)
|
||||
|
||||
filter(act).to_html
|
||||
end
|
||||
end
|
||||
|
||||
context 'Various schemes' do
|
||||
it 'autolinks http' do
|
||||
doc = filter("See #{link}")
|
||||
|
@ -141,6 +132,45 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
expect(doc.at_css('a').text).to eq link
|
||||
end
|
||||
|
||||
it 'includes trailing punctuation when part of a balanced pair' do
|
||||
described_class::PUNCTUATION_PAIRS.each do |close, open|
|
||||
next if open.in?(quotes)
|
||||
|
||||
balanced_link = "#{link}#{open}abc#{close}"
|
||||
balanced_actual = filter("See #{balanced_link}...")
|
||||
unbalanced_link = "#{link}#{close}"
|
||||
unbalanced_actual = filter("See #{unbalanced_link}...")
|
||||
|
||||
expect(balanced_actual.at_css('a').text).to eq(balanced_link)
|
||||
expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
|
||||
expect(unbalanced_actual.at_css('a').text).to eq(link)
|
||||
expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes trailing quotes' do
|
||||
quotes.each do |quote|
|
||||
balanced_link = "#{link}#{quote}abc#{quote}"
|
||||
balanced_actual = filter("See #{balanced_link}...")
|
||||
unbalanced_link = "#{link}#{quote}"
|
||||
unbalanced_actual = filter("See #{unbalanced_link}...")
|
||||
|
||||
expect(balanced_actual.at_css('a').text).to eq(balanced_link[0...-1])
|
||||
expect(unescape(balanced_actual.to_html)).to eq(Rinku.auto_link("See #{balanced_link}..."))
|
||||
expect(unbalanced_actual.at_css('a').text).to eq(link)
|
||||
expect(unescape(unbalanced_actual.to_html)).to eq(Rinku.auto_link("See #{unbalanced_link}..."))
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes one closing punctuation mark when the punctuation in the link is unbalanced' do
|
||||
complicated_link = "(#{link}(a'b[c'd]))'"
|
||||
expected_complicated_link = %Q{(<a href="#{link}(a'b[c'd]))">#{link}(a'b[c'd]))</a>'}
|
||||
actual = unescape(filter(complicated_link).to_html)
|
||||
|
||||
expect(actual).to eq(Rinku.auto_link(complicated_link))
|
||||
expect(actual).to eq(expected_complicated_link)
|
||||
end
|
||||
|
||||
it 'does not include trailing HTML entities' do
|
||||
doc = filter("See <<<#{link}>>>")
|
||||
|
||||
|
@ -162,16 +192,27 @@ describe Banzai::Filter::AutolinkFilter do
|
|||
end
|
||||
|
||||
context 'when the link is inside a tag' do
|
||||
it 'renders text after the link correctly for http' do
|
||||
doc = filter(ERB::Util.html_escape_once("<http://link><another>"))
|
||||
%w[http rdar].each do |protocol|
|
||||
it "renders text after the link correctly for #{protocol}" do
|
||||
doc = filter(ERB::Util.html_escape_once("<#{protocol}://link><another>"))
|
||||
|
||||
expect(doc.children.last.text).to include('<another>')
|
||||
end
|
||||
|
||||
it 'renders text after the link correctly for not other protocol' do
|
||||
doc = filter(ERB::Util.html_escape_once("<rdar://link><another>"))
|
||||
|
||||
expect(doc.children.last.text).to include('<another>')
|
||||
end
|
||||
end
|
||||
|
||||
# Rinku does not escape these characters in HTML attributes, but content_tag
|
||||
# does. We don't care about that difference for these specs, though.
|
||||
def unescape(html)
|
||||
%w([ ] { }).each do |cgi_escape|
|
||||
html.sub!(CGI.escape(cgi_escape), cgi_escape)
|
||||
end
|
||||
|
||||
quotes.each do |html_escape|
|
||||
html.sub!(CGI.escape_html(html_escape), html_escape)
|
||||
html.sub!(CGI.escape(html_escape), CGI.escape_html(html_escape))
|
||||
end
|
||||
|
||||
html
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue