Properly process footnotes in markdown

All the ids and classes were stripped.  Add them back in
and make ids unique
This commit is contained in:
Brett Walker 2019-01-04 20:54:02 -06:00
parent 1b3affafab
commit 34ab6dfa05
6 changed files with 171 additions and 3 deletions

View file

@ -0,0 +1,5 @@
---
title: Footnotes now work render properly in markdown
merge_request: 24168
author:
type: fixed

View file

@ -0,0 +1,55 @@
# frozen_string_literal: true
module Banzai
module Filter
# HTML Filter for footnotes
#
# Footnotes are supported in CommonMark. However we were stripping
# the ids during sanitization. Those are now allowed.
#
# Footnotes are numbered the same - the first one has `id=fn1`, the
# second is `id=fn2`, etc. In order to allow footnotes when rendering
# multiple markdown blocks on a page, we need to make each footnote
# reference unique.
#
# This filter adds a random number to each footnote (the same number
# can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`.
#
class FootnoteFilter < HTML::Pipeline::Filter
INTEGER_PATTERN = /\A\d+\Z/.freeze
def call
return doc unless first_footnote = doc.at_css('ol > li[id=fn1]')
# Sanitization stripped off the section wrapper - add it back in
first_footnote.parent.wrap('<section class="footnotes">')
doc.css('sup > a[id]').each do |link_node|
ref_num = link_node[:id].delete_prefix('fnref')
footnote_node = doc.at_css("li[id=fn#{ref_num}]")
backref_node = doc.at_css("li[id=fn#{ref_num}] a[href=\"#fnref#{ref_num}\"]")
if ref_num =~ INTEGER_PATTERN && footnote_node && backref_node
rand_ref_num = "#{ref_num}-#{random_number}"
link_node[:href] = "#fn#{rand_ref_num}"
link_node[:id] = "fnref#{rand_ref_num}"
footnote_node[:id] = "fn#{rand_ref_num}"
backref_node[:href] = "#fnref#{rand_ref_num}"
# Sanitization stripped off class - add it back in
link_node.parent.append_class('footnote-ref')
backref_node.append_class('footnote-backref')
end
end
doc
end
private
def random_number
@random_number ||= rand(10000)
end
end
end
end

View file

@ -8,8 +8,10 @@ module Banzai
class SanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d\z/.freeze
FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d\z/.freeze
def whitelist
strong_memoize(:whitelist) do
@ -57,6 +59,13 @@ module Banzai
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
# Allow `id` in a and li elements for footnotes
whitelist[:attributes]['a'].push('id')
whitelist[:attributes]['li'] = %w(id)
# ...but remove any `id` properties not matching for footnotes
whitelist[:transformers].push(self.class.remove_non_footnote_ids)
whitelist
end
@ -112,6 +121,20 @@ module Banzai
end
end
end
def remove_non_footnote_ids
lambda do |env|
node = env[:node]
return unless node.name == 'a' || node.name == 'li'
return unless node.has_attribute?('id')
return if node.name == 'a' && node['id'] =~ FOOTNOTE_LINK_REFERENCE_PATTERN
return if node.name == 'li' && node['id'] =~ FOOTNOTE_LI_REFERENCE_PATTERN
node.remove_attribute('id')
end
end
end
end
end

View file

@ -30,6 +30,7 @@ module Banzai
Filter::AutolinkFilter,
Filter::ExternalLinkFilter,
Filter::SuggestionFilter,
Filter::FootnoteFilter,
*reference_filters,

View file

@ -0,0 +1,48 @@
# frozen_string_literal: true
require 'spec_helper'
describe Banzai::Filter::FootnoteFilter do
include FilterSpecHelper
# first[^1] and second[^second]
# [^1]: one
# [^second]: two
let(:footnote) do
<<-EOF.strip_heredoc
<p>first<sup><a href="#fn1" id="fnref1">1</a></sup> and second<sup><a href="#fn2" id="fnref2">2</a></sup></p>
<ol>
<li id="fn1">
<p>one <a href="#fnref1"></a></p>
</li>
<li id="fn2">
<p>two <a href="#fnref2"></a></p>
</li>
</ol>
EOF
end
context 'when footnotes exist' do
let(:doc) { filter(footnote) }
let(:link_node) { doc.css('sup > a').first }
let(:identifier) { link_node[:id].delete_prefix('fnref1-') }
it 'adds identifier to footnotes' do
expect(link_node[:id]).to eq "fnref1-#{identifier}"
expect(link_node[:href]).to eq "#fn1-#{identifier}"
expect(doc.css("li[id=fn1-#{identifier}]")).not_to be_empty
expect(doc.css("li[id=fn1-#{identifier}] a[href=\"#fnref1-#{identifier}\"]")).not_to be_empty
end
it 'uses the same identifier for all footnotes' do
expect(doc.css("li[id=fn2-#{identifier}]")).not_to be_empty
expect(doc.css("li[id=fn2-#{identifier}] a[href=\"#fnref2-#{identifier}\"]")).not_to be_empty
end
it 'adds section and classes' do
expect(doc.css("section[class=footnotes]")).not_to be_empty
expect(doc.css("sup[class=footnote-ref]").count).to eq 2
expect(doc.css("a[class=footnote-backref]").count).to eq 2
end
end
end

View file

@ -246,7 +246,7 @@ describe Banzai::Filter::SanitizationFilter do
'protocol-based JS injection: spaces and entities' => {
input: '<a href=" &#14; javascript:alert(\'XSS\');">foo</a>',
output: '<a href>foo</a>'
output: '<a href="">foo</a>'
},
'protocol whitespace' => {
@ -300,5 +300,41 @@ describe Banzai::Filter::SanitizationFilter do
expect(act.to_html).to eq exp
end
describe 'footnotes' do
it 'allows id property on links' do
exp = %q{<a href="#fn1" id="fnref1">foo/bar.md</a>}
act = filter(exp)
expect(act.to_html).to eq exp
end
it 'allows id property on li element' do
exp = %q{<ol><li id="fn1">footnote</li></ol>}
act = filter(exp)
expect(act.to_html).to eq exp
end
it 'only allows valid footnote formats for links' do
exp = %q{<a href="#fn1">link</a>}
%w[fnrefx test xfnref1].each do |id|
act = filter(%Q{<a href="#fn1" id="#{id}">link</a>})
expect(act.to_html).to eq exp
end
end
it 'only allows valid footnote formats for li' do
exp = %q{<ol><li>footnote</li></ol>}
%w[fnx test xfn1].each do |id|
act = filter(%Q{<ol><li id="#{id}">footnote</li></ol>})
expect(act.to_html).to eq exp
end
end
end
end
end