diff --git a/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml b/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml new file mode 100644 index 00000000000..88ba8028e2c --- /dev/null +++ b/changelogs/unreleased/48869-wiki-slugs-with-spaces.yml @@ -0,0 +1,5 @@ +--- +title: Allow spaces in wiki markdown links when using CommonMark +merge_request: 20417 +author: +type: fixed diff --git a/lib/banzai/filter/spaced_link_filter.rb b/lib/banzai/filter/spaced_link_filter.rb new file mode 100644 index 00000000000..574a8a6c7a5 --- /dev/null +++ b/lib/banzai/filter/spaced_link_filter.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'uri' + +module Banzai + module Filter + # HTML Filter for markdown links with spaces in the URLs + # + # Based on Banzai::Filter::AutolinkFilter + # + # CommonMark does not allow spaces in the url portion of a link. + # For example, `[example](page slug)` is not valid. However, + # in our wikis, we support (via RedCarpet) this type of link, allowing + # wiki pages to be easily linked by their title. This filter adds that functionality. + # The intent is for this to only be used in Wikis - in general, we want + # to adhere to CommonMark's spec. + # + class SpacedLinkFilter < HTML::Pipeline::Filter + include ActionView::Helpers::TagHelper + + # Pattern to match a standard markdown link + # + # Rubular: http://rubular.com/r/z9EAHxYmKI + LINK_PATTERN = /\[([^\]]+)\]\(([^)"]+)(?: \"([^\"]+)\")?\)/ + + # Text matching LINK_PATTERN inside these elements will not be linked + IGNORE_PARENTS = %w(a code kbd pre script style).to_set + + # The XPath query to use for finding text nodes to parse. + TEXT_QUERY = %Q(descendant-or-self::text()[ + not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')}) + and contains(., ']\(') + ]).freeze + + def call + return doc if context[:markdown_engine] == :redcarpet + + doc.xpath(TEXT_QUERY).each do |node| + content = node.to_html + + next unless content.match(LINK_PATTERN) + + html = spaced_link_filter(content) + + next if html == content + + node.replace(html) + end + + doc + end + + private + + def spaced_link_match(link) + match = LINK_PATTERN.match(link) + return link unless match && match[1] && match[2] + + # escape the spaces in the url so that it's a valid markdown link, + # then run it through the markdown processor again, let it do its magic + text = match[1] + new_link = match[2].gsub(' ', '%20') + title = match[3] ? " \"#{match[3]}\"" : '' + html = Banzai::Filter::MarkdownFilter.call("[#{text}](#{new_link}#{title})", context) + + # link is wrapped in a

, so strip that off + html.sub('

', '').chomp('

') + end + + def spaced_link_filter(text) + Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:| + spaced_link_match(link) + end + end + end + end +end diff --git a/lib/banzai/pipeline/wiki_pipeline.rb b/lib/banzai/pipeline/wiki_pipeline.rb index c37b8e71cb0..737ff0cc818 100644 --- a/lib/banzai/pipeline/wiki_pipeline.rb +++ b/lib/banzai/pipeline/wiki_pipeline.rb @@ -5,6 +5,7 @@ module Banzai @filters ||= begin super.insert_after(Filter::TableOfContentsFilter, Filter::GollumTagsFilter) .insert_before(Filter::TaskListFilter, Filter::WikiLinkFilter) + .insert_before(Filter::WikiLinkFilter, Filter::SpacedLinkFilter) end end end diff --git a/spec/lib/banzai/filter/spaced_link_filter_spec.rb b/spec/lib/banzai/filter/spaced_link_filter_spec.rb new file mode 100644 index 00000000000..4463c011522 --- /dev/null +++ b/spec/lib/banzai/filter/spaced_link_filter_spec.rb @@ -0,0 +1,66 @@ +require 'spec_helper' + +describe Banzai::Filter::SpacedLinkFilter do + include FilterSpecHelper + + let(:link) { '[example](page slug)' } + + it 'converts slug with spaces to a link' do + doc = filter("See #{link}") + + expect(doc.at_css('a').text).to eq 'example' + expect(doc.at_css('a')['href']).to eq 'page%20slug' + expect(doc.at_css('p')).to eq nil + end + + it 'converts slug with spaces and a title to a link' do + link = '[example](page slug "title")' + doc = filter("See #{link}") + + expect(doc.at_css('a').text).to eq 'example' + expect(doc.at_css('a')['href']).to eq 'page%20slug' + expect(doc.at_css('a')['title']).to eq 'title' + expect(doc.at_css('p')).to eq nil + end + + it 'does nothing when markdown_engine is redcarpet' do + exp = act = link + expect(filter(act, markdown_engine: :redcarpet).to_html).to eq exp + end + + it 'does nothing with empty text' do + link = '[](page slug)' + doc = filter("See #{link}") + + expect(doc.at_css('a')).to eq nil + end + + it 'does nothing with an empty slug' do + link = '[example]()' + doc = filter("See #{link}") + + expect(doc.at_css('a')).to eq nil + end + + it 'converts multiple URLs' do + link1 = '[first](slug one)' + link2 = '[second](http://example.com/slug two)' + doc = filter("See #{link1} and #{link2}") + + found_links = doc.css('a') + + expect(found_links.size).to eq(2) + expect(found_links[0].text).to eq 'first' + expect(found_links[0]['href']).to eq 'slug%20one' + expect(found_links[1].text).to eq 'second' + expect(found_links[1]['href']).to eq 'http://example.com/slug%20two' + end + + described_class::IGNORE_PARENTS.each do |elem| + it "ignores valid links contained inside '#{elem}' element" do + exp = act = "<#{elem}>See #{link}" + + expect(filter(act).to_html).to eq exp + end + end +end