Remove class and id attributes from SanitizationFilter whitelist
This commit is contained in:
parent
d9b6b9201e
commit
70bbf093aa
5 changed files with 59 additions and 36 deletions
|
@ -423,7 +423,7 @@ Quote break.
|
|||
|
||||
You can also use raw HTML in your Markdown, and it'll mostly work pretty well.
|
||||
|
||||
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements.
|
||||
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements.
|
||||
|
||||
```no-highlight
|
||||
<dl>
|
||||
|
|
|
@ -10,8 +10,9 @@ module Gitlab
|
|||
def whitelist
|
||||
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
|
||||
|
||||
# Allow `class` and `id` on all elements
|
||||
whitelist[:attributes][:all].push('class', 'id')
|
||||
# Allow code highlighting
|
||||
whitelist[:attributes]['pre'] = %w(class)
|
||||
whitelist[:attributes]['span'] = %w(class)
|
||||
|
||||
# Allow table alignment
|
||||
whitelist[:attributes]['th'] = %w(style)
|
||||
|
@ -23,6 +24,9 @@ module Gitlab
|
|||
# Remove `rel` attribute from `a` elements
|
||||
whitelist[:transformers].push(remove_rel)
|
||||
|
||||
# Remove `class` attribute from non-highlight spans
|
||||
whitelist[:transformers].push(clean_spans)
|
||||
|
||||
whitelist
|
||||
end
|
||||
|
||||
|
@ -33,6 +37,17 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
def clean_spans
|
||||
lambda do |env|
|
||||
return unless env[:node_name] == 'span'
|
||||
return unless env[:node].has_attribute?('class')
|
||||
|
||||
unless has_ancestor?(env[:node], 'pre')
|
||||
env[:node].remove_attribute('class')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -60,8 +60,8 @@ describe 'GitLab Markdown' do
|
|||
@feat.teardown
|
||||
end
|
||||
|
||||
# Given a header ID, goes to that element's parent (the header), then to its
|
||||
# second sibling (the body).
|
||||
# Given a header ID, goes to that element's parent (the header itself), then
|
||||
# its next sibling element (the body).
|
||||
def get_section(id)
|
||||
@doc.at_css("##{id}").parent.next_element
|
||||
end
|
||||
|
@ -119,18 +119,18 @@ describe 'GitLab Markdown' do
|
|||
describe 'HTML::Pipeline' do
|
||||
describe 'SanitizationFilter' do
|
||||
it 'uses a permissive whitelist' do
|
||||
expect(@doc).to have_selector('b#manual-b')
|
||||
expect(@doc).to have_selector('em#manual-em')
|
||||
expect(@doc).to have_selector("code#manual-code")
|
||||
expect(@doc).to have_selector('b:contains("b tag")')
|
||||
expect(@doc).to have_selector('em:contains("em tag")')
|
||||
expect(@doc).to have_selector('code:contains("code tag")')
|
||||
expect(@doc).to have_selector('kbd:contains("s")')
|
||||
expect(@doc).to have_selector('strike:contains(Emoji)')
|
||||
expect(@doc).to have_selector('img#manual-img')
|
||||
expect(@doc).to have_selector('br#manual-br')
|
||||
expect(@doc).to have_selector('hr#manual-hr')
|
||||
expect(@doc).to have_selector('img[src*="smile.png"]')
|
||||
expect(@doc).to have_selector('br')
|
||||
expect(@doc).to have_selector('hr')
|
||||
end
|
||||
|
||||
it 'permits span elements' do
|
||||
expect(@doc).to have_selector('span#span-class-light.light')
|
||||
expect(@doc).to have_selector('span:contains("span tag")')
|
||||
end
|
||||
|
||||
it 'permits table alignment' do
|
||||
|
@ -144,13 +144,12 @@ describe 'GitLab Markdown' do
|
|||
end
|
||||
|
||||
it 'removes `rel` attribute from links' do
|
||||
expect(@doc).to have_selector('a#a-rel-nofollow')
|
||||
expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]')
|
||||
body = get_section('sanitizationfilter')
|
||||
expect(body).not_to have_selector('a[rel]')
|
||||
end
|
||||
|
||||
it "removes `href` from `a` elements if it's fishy" do
|
||||
expect(@doc).to have_selector('a#a-href-javascript')
|
||||
expect(@doc).not_to have_selector('a#a-href-javascript[href]')
|
||||
expect(@doc).not_to have_selector('a[href*="javascript"]')
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -228,7 +227,8 @@ describe 'GitLab Markdown' do
|
|||
|
||||
%w(code a kbd).each do |elem|
|
||||
it "ignores links inside '#{elem}' element" do
|
||||
expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text
|
||||
body = get_section('autolinkfilter')
|
||||
expect(body).not_to have_selector("#{elem} a")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
26
spec/fixtures/markdown.md.erb
vendored
26
spec/fixtures/markdown.md.erb
vendored
|
@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through...
|
|||
|
||||
### SanitizationFilter
|
||||
|
||||
GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a>
|
||||
GitLab uses <a href="http://git.io/vfW8a">HTML::Pipeline::SanitizationFilter</a>
|
||||
to sanitize the generated HTML, stripping dangerous or unwanted tags.
|
||||
|
||||
Its default whitelist is pretty permissive. Check it:
|
||||
|
||||
<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>.
|
||||
<b>b tag</b> and <em>em tag</em>.
|
||||
|
||||
<code id="manual-code">echo "Hello, world!"</code>
|
||||
<code>code tag</code>
|
||||
|
||||
Press <kbd>s</kbd> to search.
|
||||
|
||||
<strike>Emoji</strike> Plain old images! <img
|
||||
src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20"
|
||||
height="20" id="manual-img" />
|
||||
<strike>Emoji</strike> Plain old images! <img src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" height="20" />
|
||||
|
||||
Here comes a line break:
|
||||
|
||||
<br id="manual-br" />
|
||||
<br />
|
||||
|
||||
And a horizontal rule:
|
||||
|
||||
<hr id="manual-hr" />
|
||||
<hr />
|
||||
|
||||
As permissive as it is, we've allowed even more stuff:
|
||||
|
||||
<span class="light" id="span-class-light">Span elements</span>
|
||||
<span>span tag</span>
|
||||
|
||||
<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a>
|
||||
<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
|
||||
|
||||
<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a>
|
||||
<a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
|
||||
|
||||
### Escaping
|
||||
|
||||
|
@ -125,9 +123,9 @@ These are all plain text that should get turned into links:
|
|||
|
||||
But it shouldn't autolink text inside certain tags:
|
||||
|
||||
- <code id="autolink-code">http://about.gitlab.com/</code>
|
||||
- <a id="autolink-a">http://about.gitlab.com/</a>
|
||||
- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd>
|
||||
- <code>http://about.gitlab.com/</code>
|
||||
- <a>http://about.gitlab.com/</a>
|
||||
- <kbd>http://about.gitlab.com/</kbd>
|
||||
|
||||
### Reference Filters (e.g., #<%= issue.iid %>)
|
||||
|
||||
|
|
|
@ -29,17 +29,27 @@ module Gitlab::Markdown
|
|||
exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"
|
||||
expect(filter(act).to_html).to eq exp
|
||||
end
|
||||
|
||||
it 'sanitizes `class` attribute on any element' do
|
||||
act = %q{<strong class="foo">Strong</strong>}
|
||||
expect(filter(act).to_html).to eq %q{<strong>Strong</strong>}
|
||||
end
|
||||
|
||||
it 'sanitizes `id` attribute on any element' do
|
||||
act = %q{<em id="foo">Emphasis</em>}
|
||||
expect(filter(act).to_html).to eq %q{<em>Emphasis</em>}
|
||||
end
|
||||
end
|
||||
|
||||
describe 'custom whitelist' do
|
||||
it 'allows `class` attribute on any element' do
|
||||
exp = act = %q{<strong class="foo">Strong</strong>}
|
||||
it 'allows syntax highlighting' do
|
||||
exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>}
|
||||
expect(filter(act).to_html).to eq exp
|
||||
end
|
||||
|
||||
it 'allows `id` attribute on any element' do
|
||||
exp = act = %q{<em id="foo">Emphasis</em>}
|
||||
expect(filter(act).to_html).to eq exp
|
||||
it 'sanitizes `class` attribute from non-highlight spans' do
|
||||
act = %q{<span class="k">def</span>}
|
||||
expect(filter(act).to_html).to eq %q{<span>def</span>}
|
||||
end
|
||||
|
||||
it 'allows `style` attribute on table elements' do
|
||||
|
|
Loading…
Reference in a new issue