Merge branch 'issue-63298-asciidoc-sanitization' into 'master'

Prevent excessive sanitization of AsciiDoc ouptut

Closes #63298

See merge request gitlab-org/gitlab-ce!30290
This commit is contained in:
Douwe Maan 2019-07-12 08:34:14 +00:00
commit 8405483031
9 changed files with 326 additions and 113 deletions

View File

@ -9,7 +9,6 @@
@import 'framework/animations';
@import 'framework/vue_transitions';
@import 'framework/asciidoctor';
@import 'framework/banner';
@import 'framework/blocks';
@import 'framework/buttons';

View File

@ -1,27 +0,0 @@
.admonitionblock td.icon {
width: 1%;
[class^='fa icon-'] {
@extend .fa-2x;
}
.icon-note {
@extend .fa-thumb-tack;
}
.icon-tip {
@extend .fa-lightbulb-o;
}
.icon-warning {
@extend .fa-exclamation-triangle;
}
.icon-caution {
@extend .fa-fire;
}
.icon-important {
@extend .fa-exclamation-circle;
}
}

View File

@ -1,5 +1,5 @@
/**
* Apply Markdown typography
* Apply Markup (Markdown/AsciiDoc) typography
*
*/
.md:not(.use-csslab) {
@ -245,6 +245,21 @@
}
}
ul.checklist,
ul.none,
ol.none,
ul.no-bullet,
ol.no-bullet,
ol.unnumbered,
ul.unstyled,
ol.unstyled {
list-style-type: none;
li {
margin-left: 0;
}
}
li {
line-height: 1.6em;
margin-left: 25px;
@ -321,6 +336,54 @@
visibility: visible;
}
}
.big {
font-size: larger;
}
.small {
font-size: smaller;
}
.underline {
text-decoration: underline;
}
.overline {
text-decoration: overline;
}
.line-through {
text-decoration: line-through;
}
.admonitionblock td.icon {
width: 1%;
[class^='fa icon-'] {
@extend .fa-2x;
}
.icon-note {
@extend .fa-thumb-tack;
}
.icon-tip {
@extend .fa-lightbulb-o;
}
.icon-warning {
@extend .fa-exclamation-triangle;
}
.icon-caution {
@extend .fa-fire;
}
.icon-important {
@extend .fa-exclamation-circle;
}
}
}

View File

@ -0,0 +1,5 @@
---
title: "Prevent excessive sanitization of AsciiDoc ouptut"
merge_request: 30290
author: Guillaume Grossetie
type: added

View File

@ -0,0 +1,62 @@
# frozen_string_literal: true
module Banzai
module Filter
# Sanitize HTML produced by AsciiDoc/Asciidoctor.
#
# Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter
# Classes used by Asciidoctor to style components
ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze
CALLOUT_CLASSES = ['conum'].freeze
CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze
LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze
ELEMENT_CLASSES_WHITELIST = {
span: %w(big small underline overline line-through).freeze,
div: ['admonitionblock'].freeze,
td: ['icon'].freeze,
i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES,
ul: LIST_CLASSES,
ol: LIST_CLASSES
}.freeze
def customize_whitelist(whitelist)
# Allow marks
whitelist[:elements].push('mark')
# Allow any classes in `span`, `i`, `div`, `td`, `ul` and `ol` elements
# but then remove any unknown classes
whitelist[:attributes]['span'] = %w(class)
whitelist[:attributes]['div'].push('class')
whitelist[:attributes]['td'] = %w(class)
whitelist[:attributes]['i'] = %w(class)
whitelist[:attributes]['ul'] = %w(class)
whitelist[:attributes]['ol'] = %w(class)
whitelist[:transformers].push(self.class.remove_element_classes)
whitelist
end
class << self
def remove_element_classes
lambda do |env|
node = env[:node]
return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym])
return unless node.has_attribute?('class')
classes = node['class'].strip.split(' ')
allowed_classes = (classes & classes_whitelist)
if allowed_classes.empty?
node.remove_attribute('class')
else
node['class'] = allowed_classes.join(' ')
end
end
end
end
end
end
end

View File

@ -0,0 +1,96 @@
# frozen_string_literal: true
module Banzai
module Filter
# Sanitize HTML produced by markup languages (Markdown, AsciiDoc...).
# Specific rules are implemented in dedicated filters:
#
# - Banzai::Filter::SanitizationFilter (Markdown)
# - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor)
#
# Extends HTML::Pipeline::SanitizationFilter with common rules.
class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
def whitelist
strong_memoize(:whitelist) do
whitelist = super.deep_dup
# Allow span elements
whitelist[:elements].push('span')
# Allow data-math-style attribute in order to support LaTeX formatting
whitelist[:attributes]['code'] = %w(data-math-style)
whitelist[:attributes]['pre'] = %w(data-math-style)
# Allow html5 details/summary elements
whitelist[:elements].push('details')
whitelist[:elements].push('summary')
# Allow abbr elements with title attribute
whitelist[:elements].push('abbr')
whitelist[:attributes]['abbr'] = %w(title)
# Disallow `name` attribute globally, allow on `a`
whitelist[:attributes][:all].delete('name')
whitelist[:attributes]['a'].push('name')
# Allow any protocol in `a` elements
# and then remove links with unsafe protocols
whitelist[:protocols].delete('a')
whitelist[:transformers].push(self.class.remove_unsafe_links)
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(self.class.remove_rel)
customize_whitelist(whitelist)
end
end
def customize_whitelist(whitelist)
raise NotImplementedError
end
class << self
def remove_unsafe_links
lambda do |env|
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('href')
begin
node['href'] = node['href'].strip
uri = Addressable::URI.parse(node['href'])
return unless uri.scheme
# Remove all invalid scheme characters before checking against the
# list of unsafe protocols.
#
# See https://tools.ietf.org/html/rfc3986#section-3.1
scheme = uri.scheme
.strip
.downcase
.gsub(/[^A-Za-z0-9\+\.\-]+/, '')
node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
rescue Addressable::URI::InvalidURIError
node.remove_attribute('href')
end
end
end
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
env[:node].remove_attribute('rel')
end
end
end
end
end
end
end

View File

@ -2,23 +2,13 @@
module Banzai
module Filter
# Sanitize HTML
# Sanitize HTML produced by Markdown.
#
# Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
class SanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
# Extends Banzai::Filter::BaseSanitizationFilter with specific rules.
class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter
# Styles used by Markdown for table alignment
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
def whitelist
strong_memoize(:whitelist) do
customize_whitelist(super.deep_dup)
end
end
private
def customize_whitelist(whitelist)
# Allow table alignment; we whitelist specific text-align values in a
# transformer below
@ -26,36 +16,9 @@ module Banzai
whitelist[:attributes]['td'] = %w(style)
whitelist[:css] = { properties: ['text-align'] }
# Allow span elements
whitelist[:elements].push('span')
# Allow data-math-style attribute in order to support LaTeX formatting
whitelist[:attributes]['code'] = %w(data-math-style)
whitelist[:attributes]['pre'] = %w(data-math-style)
# Allow html5 details/summary elements
whitelist[:elements].push('details')
whitelist[:elements].push('summary')
# Allow abbr elements with title attribute
whitelist[:elements].push('abbr')
whitelist[:attributes]['abbr'] = %w(title)
# Allow the 'data-sourcepos' from CommonMark on all elements
whitelist[:attributes][:all].push('data-sourcepos')
# Disallow `name` attribute globally, allow on `a`
whitelist[:attributes][:all].delete('name')
whitelist[:attributes]['a'].push('name')
# Allow any protocol in `a` elements
# and then remove links with unsafe protocols
whitelist[:protocols].delete('a')
whitelist[:transformers].push(self.class.remove_unsafe_links)
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(self.class.remove_rel)
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
@ -69,43 +32,6 @@ module Banzai
end
class << self
def remove_unsafe_links
lambda do |env|
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('href')
begin
node['href'] = node['href'].strip
uri = Addressable::URI.parse(node['href'])
return unless uri.scheme
# Remove all invalid scheme characters before checking against the
# list of unsafe protocols.
#
# See https://tools.ietf.org/html/rfc3986#section-3.1
scheme = uri.scheme
.strip
.downcase
.gsub(/[^A-Za-z0-9\+\.\-]+/, '')
node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
rescue Addressable::URI::InvalidURIError
node.remove_attribute('href')
end
end
end
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
env[:node].remove_attribute('rel')
end
end
end
def remove_unsafe_table_style
lambda do |env|
node = env[:node]

View File

@ -5,7 +5,7 @@ module Banzai
class AsciiDocPipeline < BasePipeline
def self.filters
FilterArray[
Filter::SanitizationFilter,
Filter::AsciiDocSanitizationFilter,
Filter::SyntaxHighlightFilter,
Filter::ExternalLinkFilter,
Filter::PlantumlFilter,

View File

@ -45,28 +45,117 @@ module Gitlab
end
context "XSS" do
links = {
'links' => {
items = {
'link with extra attribute' => {
input: 'link:mylink"onmouseover="alert(1)[Click Here]',
output: "<div>\n<p><a href=\"mylink\">Click Here</a></p>\n</div>"
},
'images' => {
'link with unsafe scheme' => {
input: 'link:data://danger[Click Here]',
output: "<div>\n<p><a>Click Here</a></p>\n</div>"
},
'image with onerror' => {
input: 'image:https://localhost.com/image.png[Alt text" onerror="alert(7)]',
output: "<div>\n<p><span><img src=\"https://localhost.com/image.png\" alt='Alt text\" onerror=\"alert(7)'></span></p>\n</div>"
},
'pre' => {
'fenced code with inline script' => {
input: '```mypre"><script>alert(3)</script>',
output: "<div>\n<div>\n<pre class=\"code highlight js-syntax-highlight plaintext\" lang=\"plaintext\" v-pre=\"true\"><code><span id=\"LC1\" class=\"line\" lang=\"plaintext\">\"&gt;</span></code></pre>\n</div>\n</div>"
}
}
links.each do |name, data|
items.each do |name, data|
it "does not convert dangerous #{name} into HTML" do
expect(render(data[:input], context)).to include(data[:output])
end
end
end
context 'with admonition' do
it 'preserves classes' do
input = <<~ADOC
NOTE: An admonition paragraph, like this note, grabs the readers attention.
ADOC
output = <<~HTML
<div class="admonitionblock">
<table>
<tr>
<td class="icon">
<i class="fa icon-note" title="Note"></i>
</td>
<td>
An admonition paragraph, like this note, grabs the readers attention.
</td>
</tr>
</table>
</div>
HTML
expect(render(input, context)).to include(output.strip)
end
end
context 'with checklist' do
it 'preserves classes' do
input = <<~ADOC
* [x] checked
* [ ] not checked
ADOC
output = <<~HTML
<div>
<ul class="checklist">
<li>
<p><i class="fa fa-check-square-o"></i> checked</p>
</li>
<li>
<p><i class="fa fa-square-o"></i> not checked</p>
</li>
</ul>
</div>
HTML
expect(render(input, context)).to include(output.strip)
end
end
context 'with marks' do
it 'preserves classes' do
input = <<~ADOC
Werewolves are allergic to #cassia cinnamon#.
Did the werewolves read the [.small]#small print#?
Where did all the [.underline.small]#cores# run off to?
We need [.line-through]#ten# make that twenty VMs.
[.big]##O##nce upon an infinite loop.
ADOC
output = <<~HTML
<div>
<p>Werewolves are allergic to <mark>cassia cinnamon</mark>.</p>
</div>
<div>
<p>Did the werewolves read the <span class="small">small print</span>?</p>
</div>
<div>
<p>Where did all the <span class="underline small">cores</span> run off to?</p>
</div>
<div>
<p>We need <span class="line-through">ten</span> make that twenty VMs.</p>
</div>
<div>
<p><span class="big">O</span>nce upon an infinite loop.</p>
</div>
HTML
expect(render(input, context)).to include(output.strip)
end
end
context 'with fenced block' do
it 'highlights syntax' do
input = <<~ADOC