From 16f8ca566b8637dc8092a6b630c23a82a905b437 Mon Sep 17 00:00:00 2001 From: Robert Speicher Date: Thu, 1 Oct 2015 23:40:29 -0400 Subject: [PATCH] Add custom protocol whitelisting to SanitizationFilter Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2613 --- lib/gitlab/markdown/sanitization_filter.rb | 19 ++++ .../markdown/sanitization_filter_spec.rb | 101 ++++++++++++++++-- 2 files changed, 110 insertions(+), 10 deletions(-) diff --git a/lib/gitlab/markdown/sanitization_filter.rb b/lib/gitlab/markdown/sanitization_filter.rb index e368de7d848..ffb9dc33b64 100644 --- a/lib/gitlab/markdown/sanitization_filter.rb +++ b/lib/gitlab/markdown/sanitization_filter.rb @@ -48,6 +48,12 @@ module Gitlab # Allow span elements whitelist[:elements].push('span') + # Allow any protocol in `a` elements... + whitelist[:protocols].delete('a') + + # ...but then remove links with the `javascript` protocol + whitelist[:transformers].push(remove_javascript_links) + # Remove `rel` attribute from `a` elements whitelist[:transformers].push(remove_rel) @@ -57,6 +63,19 @@ module Gitlab whitelist end + def remove_javascript_links + lambda do |env| + node = env[:node] + + return unless node.name == 'a' + return unless node.has_attribute?('href') + + if node['href'].start_with?('javascript', ':javascript') + node.remove_attribute('href') + end + end + end + def remove_rel lambda do |env| if env[:node_name] == 'a' diff --git a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb index e50c82d0b3c..27cd00e8054 100644 --- a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb +++ b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb @@ -44,7 +44,7 @@ module Gitlab::Markdown instance = described_class.new('Foo') 3.times { instance.whitelist } - expect(instance.whitelist[:transformers].size).to eq 4 + expect(instance.whitelist[:transformers].size).to eq 5 end it 'allows syntax highlighting' do @@ -77,19 +77,100 @@ module Gitlab::Markdown end it 'removes `rel` attribute from `a` elements' do - doc = filter(%q{Link}) + act = %q{Link} + exp = %q{Link} - expect(doc.css('a').size).to eq 1 - expect(doc.at_css('a')['href']).to eq '#' - expect(doc.at_css('a')['rel']).to be_nil + expect(filter(act).to_html).to eq exp end - it 'removes script-like `href` attribute from `a` elements' do - html = %q{Hi} - doc = filter(html) + # Adapted from the Sanitize test suite: http://git.io/vczrM + protocols = { + 'protocol-based JS injection: simple, no spaces' => { + input: 'foo', + output: 'foo' + }, - expect(doc.css('a').size).to eq 1 - expect(doc.at_css('a')['href']).to be_nil + 'protocol-based JS injection: simple, spaces before' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: simple, spaces after' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: simple, spaces before and after' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: preceding colon' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: UTF-8 encoding' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: long UTF-8 encoding' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: long UTF-8 encoding without semicolons' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: hex encoding' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: long hex encoding' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: hex encoding without semicolons' => { + input: 'foo', + output: 'foo' + }, + + 'protocol-based JS injection: null char' => { + input: "foo", + output: '' + }, + + 'protocol-based JS injection: spaces and entities' => { + input: 'foo', + output: 'foo' + }, + } + + protocols.each do |name, data| + it "handles #{name}" do + doc = filter(data[:input]) + + expect(doc.to_html).to eq data[:output] + end + end + + it 'allows non-standard anchor schemes' do + exp = %q{IRC} + act = filter(exp) + + expect(act.to_html).to eq exp + end + + it 'allows relative links' do + exp = %q{foo/bar.md} + act = filter(exp) + + expect(act.to_html).to eq exp end end