From 41de7b345b0abdaba2f0d7614ebdb1cc7310a5fb Mon Sep 17 00:00:00 2001 From: Robert Speicher Date: Mon, 14 Mar 2016 16:07:51 -0400 Subject: [PATCH] Be more intelligent about sanitizing links with unsafe protocols This prevents false matches on relative links like `[database](database.md)`. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/14220 --- lib/banzai/filter/sanitization_filter.rb | 9 +++++++-- .../banzai/filter/sanitization_filter_spec.rb | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index abd79b329ae..e8011519608 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -7,7 +7,7 @@ module Banzai # # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. class SanitizationFilter < HTML::Pipeline::SanitizationFilter - UNSAFE_PROTOCOLS = %w(javascript :javascript data vbscript).freeze + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze def whitelist whitelist = super @@ -64,7 +64,12 @@ module Banzai return unless node.name == 'a' return unless node.has_attribute?('href') - if node['href'].start_with?(*UNSAFE_PROTOCOLS) + begin + uri = Addressable::URI.parse(node['href']) + uri.scheme.strip! if uri.scheme + + node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(uri.scheme) + rescue Addressable::URI::InvalidURIError node.remove_attribute('href') end end diff --git a/spec/lib/banzai/filter/sanitization_filter_spec.rb b/spec/lib/banzai/filter/sanitization_filter_spec.rb index 4a7b00c7660..27ce312b11c 100644 --- a/spec/lib/banzai/filter/sanitization_filter_spec.rb +++ b/spec/lib/banzai/filter/sanitization_filter_spec.rb @@ -149,10 +149,20 @@ describe Banzai::Filter::SanitizationFilter, lib: true do output: '' }, + 'protocol-based JS injection: invalid URL char' => { + input: '', + output: '' + }, + 'protocol-based JS injection: spaces and entities' => { input: 'foo', output: 'foo' }, + + 'protocol whitespace' => { + input: '', + output: '' + } } protocols.each do |name, data| @@ -177,6 +187,16 @@ describe Banzai::Filter::SanitizationFilter, lib: true do expect(output.to_html).to eq 'XSS' end + it 'disallows invalid URIs' do + expect(Addressable::URI).to receive(:parse).with('foo://example.com'). + and_raise(Addressable::URI::InvalidURIError) + + input = 'Foo' + output = filter(input) + + expect(output.to_html).to eq 'Foo' + end + it 'allows non-standard anchor schemes' do exp = %q{IRC} act = filter(exp)