Resolve "Wiki: links with spaces in the url render incorrectly with CommonMark"

This commit is contained in:
Brett Walker 2018-08-21 08:20:30 +00:00 committed by Sean McGivern
parent 5dabe6d1f3
commit 198f8a2c3d
4 changed files with 149 additions and 0 deletions

View file

@ -0,0 +1,5 @@
---
title: Allow spaces in wiki markdown links when using CommonMark
merge_request: 20417
author:
type: fixed

View file

@ -0,0 +1,77 @@
# frozen_string_literal: true
require 'uri'
module Banzai
module Filter
# HTML Filter for markdown links with spaces in the URLs
#
# Based on Banzai::Filter::AutolinkFilter
#
# CommonMark does not allow spaces in the url portion of a link.
# For example, `[example](page slug)` is not valid. However,
# in our wikis, we support (via RedCarpet) this type of link, allowing
# wiki pages to be easily linked by their title. This filter adds that functionality.
# The intent is for this to only be used in Wikis - in general, we want
# to adhere to CommonMark's spec.
#
class SpacedLinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper
# Pattern to match a standard markdown link
#
# Rubular: http://rubular.com/r/z9EAHxYmKI
LINK_PATTERN = /\[([^\]]+)\]\(([^)"]+)(?: \"([^\"]+)\")?\)/
# Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
# The XPath query to use for finding text nodes to parse.
TEXT_QUERY = %Q(descendant-or-self::text()[
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
and contains(., ']\(')
]).freeze
def call
return doc if context[:markdown_engine] == :redcarpet
doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html
next unless content.match(LINK_PATTERN)
html = spaced_link_filter(content)
next if html == content
node.replace(html)
end
doc
end
private
def spaced_link_match(link)
match = LINK_PATTERN.match(link)
return link unless match && match[1] && match[2]
# escape the spaces in the url so that it's a valid markdown link,
# then run it through the markdown processor again, let it do its magic
text = match[1]
new_link = match[2].gsub(' ', '%20')
title = match[3] ? " \"#{match[3]}\"" : ''
html = Banzai::Filter::MarkdownFilter.call("[#{text}](#{new_link}#{title})", context)
# link is wrapped in a <p>, so strip that off
html.sub('<p>', '').chomp('</p>')
end
def spaced_link_filter(text)
Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
spaced_link_match(link)
end
end
end
end
end

View file

@ -5,6 +5,7 @@ module Banzai
@filters ||= begin
super.insert_after(Filter::TableOfContentsFilter, Filter::GollumTagsFilter)
.insert_before(Filter::TaskListFilter, Filter::WikiLinkFilter)
.insert_before(Filter::WikiLinkFilter, Filter::SpacedLinkFilter)
end
end
end

View file

@ -0,0 +1,66 @@
require 'spec_helper'
describe Banzai::Filter::SpacedLinkFilter do
include FilterSpecHelper
let(:link) { '[example](page slug)' }
it 'converts slug with spaces to a link' do
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq 'example'
expect(doc.at_css('a')['href']).to eq 'page%20slug'
expect(doc.at_css('p')).to eq nil
end
it 'converts slug with spaces and a title to a link' do
link = '[example](page slug "title")'
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq 'example'
expect(doc.at_css('a')['href']).to eq 'page%20slug'
expect(doc.at_css('a')['title']).to eq 'title'
expect(doc.at_css('p')).to eq nil
end
it 'does nothing when markdown_engine is redcarpet' do
exp = act = link
expect(filter(act, markdown_engine: :redcarpet).to_html).to eq exp
end
it 'does nothing with empty text' do
link = '[](page slug)'
doc = filter("See #{link}")
expect(doc.at_css('a')).to eq nil
end
it 'does nothing with an empty slug' do
link = '[example]()'
doc = filter("See #{link}")
expect(doc.at_css('a')).to eq nil
end
it 'converts multiple URLs' do
link1 = '[first](slug one)'
link2 = '[second](http://example.com/slug two)'
doc = filter("See #{link1} and #{link2}")
found_links = doc.css('a')
expect(found_links.size).to eq(2)
expect(found_links[0].text).to eq 'first'
expect(found_links[0]['href']).to eq 'slug%20one'
expect(found_links[1].text).to eq 'second'
expect(found_links[1]['href']).to eq 'http://example.com/slug%20two'
end
described_class::IGNORE_PARENTS.each do |elem|
it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>"
expect(filter(act).to_html).to eq exp
end
end
end