Remove class and id attributes from SanitizationFilter whitelist

This commit is contained in:
Robert Speicher 2015-05-08 12:17:54 -04:00
parent d9b6b9201e
commit 70bbf093aa
5 changed files with 59 additions and 36 deletions

View file

@ -423,7 +423,7 @@ Quote break.
You can also use raw HTML in your Markdown, and it'll mostly work pretty well. You can also use raw HTML in your Markdown, and it'll mostly work pretty well.
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements. See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements.
```no-highlight ```no-highlight
<dl> <dl>

View file

@ -10,8 +10,9 @@ module Gitlab
def whitelist def whitelist
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
# Allow `class` and `id` on all elements # Allow code highlighting
whitelist[:attributes][:all].push('class', 'id') whitelist[:attributes]['pre'] = %w(class)
whitelist[:attributes]['span'] = %w(class)
# Allow table alignment # Allow table alignment
whitelist[:attributes]['th'] = %w(style) whitelist[:attributes]['th'] = %w(style)
@ -23,6 +24,9 @@ module Gitlab
# Remove `rel` attribute from `a` elements # Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel) whitelist[:transformers].push(remove_rel)
# Remove `class` attribute from non-highlight spans
whitelist[:transformers].push(clean_spans)
whitelist whitelist
end end
@ -33,6 +37,17 @@ module Gitlab
end end
end end
end end
def clean_spans
lambda do |env|
return unless env[:node_name] == 'span'
return unless env[:node].has_attribute?('class')
unless has_ancestor?(env[:node], 'pre')
env[:node].remove_attribute('class')
end
end
end
end end
end end
end end

View file

@ -60,8 +60,8 @@ describe 'GitLab Markdown' do
@feat.teardown @feat.teardown
end end
# Given a header ID, goes to that element's parent (the header), then to its # Given a header ID, goes to that element's parent (the header itself), then
# second sibling (the body). # its next sibling element (the body).
def get_section(id) def get_section(id)
@doc.at_css("##{id}").parent.next_element @doc.at_css("##{id}").parent.next_element
end end
@ -119,18 +119,18 @@ describe 'GitLab Markdown' do
describe 'HTML::Pipeline' do describe 'HTML::Pipeline' do
describe 'SanitizationFilter' do describe 'SanitizationFilter' do
it 'uses a permissive whitelist' do it 'uses a permissive whitelist' do
expect(@doc).to have_selector('b#manual-b') expect(@doc).to have_selector('b:contains("b tag")')
expect(@doc).to have_selector('em#manual-em') expect(@doc).to have_selector('em:contains("em tag")')
expect(@doc).to have_selector("code#manual-code") expect(@doc).to have_selector('code:contains("code tag")')
expect(@doc).to have_selector('kbd:contains("s")') expect(@doc).to have_selector('kbd:contains("s")')
expect(@doc).to have_selector('strike:contains(Emoji)') expect(@doc).to have_selector('strike:contains(Emoji)')
expect(@doc).to have_selector('img#manual-img') expect(@doc).to have_selector('img[src*="smile.png"]')
expect(@doc).to have_selector('br#manual-br') expect(@doc).to have_selector('br')
expect(@doc).to have_selector('hr#manual-hr') expect(@doc).to have_selector('hr')
end end
it 'permits span elements' do it 'permits span elements' do
expect(@doc).to have_selector('span#span-class-light.light') expect(@doc).to have_selector('span:contains("span tag")')
end end
it 'permits table alignment' do it 'permits table alignment' do
@ -144,13 +144,12 @@ describe 'GitLab Markdown' do
end end
it 'removes `rel` attribute from links' do it 'removes `rel` attribute from links' do
expect(@doc).to have_selector('a#a-rel-nofollow') body = get_section('sanitizationfilter')
expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]') expect(body).not_to have_selector('a[rel]')
end end
it "removes `href` from `a` elements if it's fishy" do it "removes `href` from `a` elements if it's fishy" do
expect(@doc).to have_selector('a#a-href-javascript') expect(@doc).not_to have_selector('a[href*="javascript"]')
expect(@doc).not_to have_selector('a#a-href-javascript[href]')
end end
end end
@ -228,7 +227,8 @@ describe 'GitLab Markdown' do
%w(code a kbd).each do |elem| %w(code a kbd).each do |elem|
it "ignores links inside '#{elem}' element" do it "ignores links inside '#{elem}' element" do
expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text body = get_section('autolinkfilter')
expect(body).not_to have_selector("#{elem} a")
end end
end end
end end

View file

@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through...
### SanitizationFilter ### SanitizationFilter
GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a> GitLab uses <a href="http://git.io/vfW8a">HTML::Pipeline::SanitizationFilter</a>
to sanitize the generated HTML, stripping dangerous or unwanted tags. to sanitize the generated HTML, stripping dangerous or unwanted tags.
Its default whitelist is pretty permissive. Check it: Its default whitelist is pretty permissive. Check it:
<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>. <b>b tag</b> and <em>em tag</em>.
<code id="manual-code">echo "Hello, world!"</code> <code>code tag</code>
Press <kbd>s</kbd> to search. Press <kbd>s</kbd> to search.
<strike>Emoji</strike> Plain old images! <img <strike>Emoji</strike> Plain old images! <img src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" height="20" />
src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20"
height="20" id="manual-img" />
Here comes a line break: Here comes a line break:
<br id="manual-br" /> <br />
And a horizontal rule: And a horizontal rule:
<hr id="manual-hr" /> <hr />
As permissive as it is, we've allowed even more stuff: As permissive as it is, we've allowed even more stuff:
<span class="light" id="span-class-light">Span elements</span> <span>span tag</span>
<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a> <a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a> <a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
### Escaping ### Escaping
@ -125,9 +123,9 @@ These are all plain text that should get turned into links:
But it shouldn't autolink text inside certain tags: But it shouldn't autolink text inside certain tags:
- <code id="autolink-code">http://about.gitlab.com/</code> - <code>http://about.gitlab.com/</code>
- <a id="autolink-a">http://about.gitlab.com/</a> - <a>http://about.gitlab.com/</a>
- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd> - <kbd>http://about.gitlab.com/</kbd>
### Reference Filters (e.g., #<%= issue.iid %>) ### Reference Filters (e.g., #<%= issue.iid %>)

View file

@ -29,17 +29,27 @@ module Gitlab::Markdown
exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>" exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq exp
end end
it 'sanitizes `class` attribute on any element' do
act = %q{<strong class="foo">Strong</strong>}
expect(filter(act).to_html).to eq %q{<strong>Strong</strong>}
end
it 'sanitizes `id` attribute on any element' do
act = %q{<em id="foo">Emphasis</em>}
expect(filter(act).to_html).to eq %q{<em>Emphasis</em>}
end
end end
describe 'custom whitelist' do describe 'custom whitelist' do
it 'allows `class` attribute on any element' do it 'allows syntax highlighting' do
exp = act = %q{<strong class="foo">Strong</strong>} exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>}
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq exp
end end
it 'allows `id` attribute on any element' do it 'sanitizes `class` attribute from non-highlight spans' do
exp = act = %q{<em id="foo">Emphasis</em>} act = %q{<span class="k">def</span>}
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq %q{<span>def</span>}
end end
it 'allows `style` attribute on table elements' do it 'allows `style` attribute on table elements' do