From a546b9fbc5abdb010c19a2fb24e8df50001374f7 Mon Sep 17 00:00:00 2001 From: Guillaume Grossetie Date: Wed, 3 Jul 2019 10:53:00 +0200 Subject: [PATCH] Prevent excessive sanitization of AsciiDoc ouptut --- app/assets/stylesheets/framework.scss | 1 - .../stylesheets/framework/asciidoctor.scss | 27 ----- .../stylesheets/framework/typography.scss | 65 +++++++++++- ...revent-excessive-sanitization-asciidoc.yml | 5 + .../filter/ascii_doc_sanitization_filter.rb | 62 ++++++++++++ lib/banzai/filter/base_sanitization_filter.rb | 96 ++++++++++++++++++ lib/banzai/filter/sanitization_filter.rb | 82 +-------------- lib/banzai/pipeline/ascii_doc_pipeline.rb | 2 +- spec/lib/gitlab/asciidoc_spec.rb | 99 ++++++++++++++++++- 9 files changed, 326 insertions(+), 113 deletions(-) delete mode 100644 app/assets/stylesheets/framework/asciidoctor.scss create mode 100644 changelogs/unreleased/63298-prevent-excessive-sanitization-asciidoc.yml create mode 100644 lib/banzai/filter/ascii_doc_sanitization_filter.rb create mode 100644 lib/banzai/filter/base_sanitization_filter.rb diff --git a/app/assets/stylesheets/framework.scss b/app/assets/stylesheets/framework.scss index 9b1d9d51f9c..82b4ec750ff 100644 --- a/app/assets/stylesheets/framework.scss +++ b/app/assets/stylesheets/framework.scss @@ -9,7 +9,6 @@ @import 'framework/animations'; @import 'framework/vue_transitions'; -@import 'framework/asciidoctor'; @import 'framework/banner'; @import 'framework/blocks'; @import 'framework/buttons'; diff --git a/app/assets/stylesheets/framework/asciidoctor.scss b/app/assets/stylesheets/framework/asciidoctor.scss deleted file mode 100644 index 1586265d40e..00000000000 --- a/app/assets/stylesheets/framework/asciidoctor.scss +++ /dev/null @@ -1,27 +0,0 @@ -.admonitionblock td.icon { - width: 1%; - - [class^='fa icon-'] { - @extend .fa-2x; - } - - .icon-note { - @extend .fa-thumb-tack; - } - - .icon-tip { - @extend .fa-lightbulb-o; - } - - .icon-warning { - @extend .fa-exclamation-triangle; - } - - .icon-caution { - @extend .fa-fire; - } - - .icon-important { - @extend .fa-exclamation-circle; - } -} diff --git a/app/assets/stylesheets/framework/typography.scss b/app/assets/stylesheets/framework/typography.scss index 7baab478034..c201605e83d 100644 --- a/app/assets/stylesheets/framework/typography.scss +++ b/app/assets/stylesheets/framework/typography.scss @@ -1,5 +1,5 @@ /** - * Apply Markdown typography + * Apply Markup (Markdown/AsciiDoc) typography * */ .md:not(.use-csslab) { @@ -245,6 +245,21 @@ } } + ul.checklist, + ul.none, + ol.none, + ul.no-bullet, + ol.no-bullet, + ol.unnumbered, + ul.unstyled, + ol.unstyled { + list-style-type: none; + + li { + margin-left: 0; + } + } + li { line-height: 1.6em; margin-left: 25px; @@ -321,6 +336,54 @@ visibility: visible; } } + + .big { + font-size: larger; + } + + .small { + font-size: smaller; + } + + .underline { + text-decoration: underline; + } + + .overline { + text-decoration: overline; + } + + .line-through { + text-decoration: line-through; + } + + .admonitionblock td.icon { + width: 1%; + + [class^='fa icon-'] { + @extend .fa-2x; + } + + .icon-note { + @extend .fa-thumb-tack; + } + + .icon-tip { + @extend .fa-lightbulb-o; + } + + .icon-warning { + @extend .fa-exclamation-triangle; + } + + .icon-caution { + @extend .fa-fire; + } + + .icon-important { + @extend .fa-exclamation-circle; + } + } } diff --git a/changelogs/unreleased/63298-prevent-excessive-sanitization-asciidoc.yml b/changelogs/unreleased/63298-prevent-excessive-sanitization-asciidoc.yml new file mode 100644 index 00000000000..cd8206cdb99 --- /dev/null +++ b/changelogs/unreleased/63298-prevent-excessive-sanitization-asciidoc.yml @@ -0,0 +1,5 @@ +--- +title: "Prevent excessive sanitization of AsciiDoc ouptut" +merge_request: 30290 +author: Guillaume Grossetie +type: added \ No newline at end of file diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb new file mode 100644 index 00000000000..a78bb60103c --- /dev/null +++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by AsciiDoc/Asciidoctor. + # + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Classes used by Asciidoctor to style components + ADMONITION_CLASSES = %w(fa icon-note icon-tip icon-warning icon-caution icon-important).freeze + CALLOUT_CLASSES = ['conum'].freeze + CHECKLIST_CLASSES = %w(fa fa-check-square-o fa-square-o).freeze + + LIST_CLASSES = %w(checklist none no-bullet unnumbered unstyled).freeze + + ELEMENT_CLASSES_WHITELIST = { + span: %w(big small underline overline line-through).freeze, + div: ['admonitionblock'].freeze, + td: ['icon'].freeze, + i: ADMONITION_CLASSES + CALLOUT_CLASSES + CHECKLIST_CLASSES, + ul: LIST_CLASSES, + ol: LIST_CLASSES + }.freeze + + def customize_whitelist(whitelist) + # Allow marks + whitelist[:elements].push('mark') + + # Allow any classes in `span`, `i`, `div`, `td`, `ul` and `ol` elements + # but then remove any unknown classes + whitelist[:attributes]['span'] = %w(class) + whitelist[:attributes]['div'].push('class') + whitelist[:attributes]['td'] = %w(class) + whitelist[:attributes]['i'] = %w(class) + whitelist[:attributes]['ul'] = %w(class) + whitelist[:attributes]['ol'] = %w(class) + whitelist[:transformers].push(self.class.remove_element_classes) + + whitelist + end + + class << self + def remove_element_classes + lambda do |env| + node = env[:node] + + return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym]) + return unless node.has_attribute?('class') + + classes = node['class'].strip.split(' ') + allowed_classes = (classes & classes_whitelist) + if allowed_classes.empty? + node.remove_attribute('class') + else + node['class'] = allowed_classes.join(' ') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb new file mode 100644 index 00000000000..420e92cb1e8 --- /dev/null +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # Sanitize HTML produced by markup languages (Markdown, AsciiDoc...). + # Specific rules are implemented in dedicated filters: + # + # - Banzai::Filter::SanitizationFilter (Markdown) + # - Banzai::Filter::AsciiDocSanitizationFilter (AsciiDoc/Asciidoctor) + # + # Extends HTML::Pipeline::SanitizationFilter with common rules. + class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter + include Gitlab::Utils::StrongMemoize + + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + + def whitelist + strong_memoize(:whitelist) do + whitelist = super.deep_dup + + # Allow span elements + whitelist[:elements].push('span') + + # Allow data-math-style attribute in order to support LaTeX formatting + whitelist[:attributes]['code'] = %w(data-math-style) + whitelist[:attributes]['pre'] = %w(data-math-style) + + # Allow html5 details/summary elements + whitelist[:elements].push('details') + whitelist[:elements].push('summary') + + # Allow abbr elements with title attribute + whitelist[:elements].push('abbr') + whitelist[:attributes]['abbr'] = %w(title) + + # Disallow `name` attribute globally, allow on `a` + whitelist[:attributes][:all].delete('name') + whitelist[:attributes]['a'].push('name') + + # Allow any protocol in `a` elements + # and then remove links with unsafe protocols + whitelist[:protocols].delete('a') + whitelist[:transformers].push(self.class.remove_unsafe_links) + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(self.class.remove_rel) + + customize_whitelist(whitelist) + end + end + + def customize_whitelist(whitelist) + raise NotImplementedError + end + + class << self + def remove_unsafe_links + lambda do |env| + node = env[:node] + + return unless node.name == 'a' + return unless node.has_attribute?('href') + + begin + node['href'] = node['href'].strip + uri = Addressable::URI.parse(node['href']) + + return unless uri.scheme + + # Remove all invalid scheme characters before checking against the + # list of unsafe protocols. + # + # See https://tools.ietf.org/html/rfc3986#section-3.1 + scheme = uri.scheme + .strip + .downcase + .gsub(/[^A-Za-z0-9\+\.\-]+/, '') + + node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) + rescue Addressable::URI::InvalidURIError + node.remove_attribute('href') + end + end + end + + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + end + end + end +end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index a4a06eae7b7..f57e57890f8 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -2,23 +2,13 @@ module Banzai module Filter - # Sanitize HTML + # Sanitize HTML produced by Markdown. # - # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. - class SanitizationFilter < HTML::Pipeline::SanitizationFilter - include Gitlab::Utils::StrongMemoize - - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. + class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter + # Styles used by Markdown for table alignment TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/.freeze - def whitelist - strong_memoize(:whitelist) do - customize_whitelist(super.deep_dup) - end - end - - private - def customize_whitelist(whitelist) # Allow table alignment; we whitelist specific text-align values in a # transformer below @@ -26,36 +16,9 @@ module Banzai whitelist[:attributes]['td'] = %w(style) whitelist[:css] = { properties: ['text-align'] } - # Allow span elements - whitelist[:elements].push('span') - - # Allow data-math-style attribute in order to support LaTeX formatting - whitelist[:attributes]['code'] = %w(data-math-style) - whitelist[:attributes]['pre'] = %w(data-math-style) - - # Allow html5 details/summary elements - whitelist[:elements].push('details') - whitelist[:elements].push('summary') - - # Allow abbr elements with title attribute - whitelist[:elements].push('abbr') - whitelist[:attributes]['abbr'] = %w(title) - # Allow the 'data-sourcepos' from CommonMark on all elements whitelist[:attributes][:all].push('data-sourcepos') - # Disallow `name` attribute globally, allow on `a` - whitelist[:attributes][:all].delete('name') - whitelist[:attributes]['a'].push('name') - - # Allow any protocol in `a` elements - # and then remove links with unsafe protocols - whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.remove_unsafe_links) - - # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(self.class.remove_rel) - # Remove any `style` properties not required for table alignment whitelist[:transformers].push(self.class.remove_unsafe_table_style) @@ -69,43 +32,6 @@ module Banzai end class << self - def remove_unsafe_links - lambda do |env| - node = env[:node] - - return unless node.name == 'a' - return unless node.has_attribute?('href') - - begin - node['href'] = node['href'].strip - uri = Addressable::URI.parse(node['href']) - - return unless uri.scheme - - # Remove all invalid scheme characters before checking against the - # list of unsafe protocols. - # - # See https://tools.ietf.org/html/rfc3986#section-3.1 - scheme = uri.scheme - .strip - .downcase - .gsub(/[^A-Za-z0-9\+\.\-]+/, '') - - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') - end - end - end - - def remove_rel - lambda do |env| - if env[:node_name] == 'a' - env[:node].remove_attribute('rel') - end - end - end - def remove_unsafe_table_style lambda do |env| node = env[:node] diff --git a/lib/banzai/pipeline/ascii_doc_pipeline.rb b/lib/banzai/pipeline/ascii_doc_pipeline.rb index 6be489c6572..d25b74b23b2 100644 --- a/lib/banzai/pipeline/ascii_doc_pipeline.rb +++ b/lib/banzai/pipeline/ascii_doc_pipeline.rb @@ -5,7 +5,7 @@ module Banzai class AsciiDocPipeline < BasePipeline def self.filters FilterArray[ - Filter::SanitizationFilter, + Filter::AsciiDocSanitizationFilter, Filter::SyntaxHighlightFilter, Filter::ExternalLinkFilter, Filter::PlantumlFilter, diff --git a/spec/lib/gitlab/asciidoc_spec.rb b/spec/lib/gitlab/asciidoc_spec.rb index 8f2434acd26..ff002acbd35 100644 --- a/spec/lib/gitlab/asciidoc_spec.rb +++ b/spec/lib/gitlab/asciidoc_spec.rb @@ -45,28 +45,117 @@ module Gitlab end context "XSS" do - links = { - 'links' => { + items = { + 'link with extra attribute' => { input: 'link:mylink"onmouseover="alert(1)[Click Here]', output: "
\n

Click Here

\n
" }, - 'images' => { + 'link with unsafe scheme' => { + input: 'link:data://danger[Click Here]', + output: "
\n

Click Here

\n
" + }, + 'image with onerror' => { input: 'image:https://localhost.com/image.png[Alt text" onerror="alert(7)]', output: "
\n

Alt text\" onerror=\"alert(7)

\n
" }, - 'pre' => { + 'fenced code with inline script' => { input: '```mypre">', output: "
\n
\n
\">
\n
\n
" } } - links.each do |name, data| + items.each do |name, data| it "does not convert dangerous #{name} into HTML" do expect(render(data[:input], context)).to include(data[:output]) end end end + context 'with admonition' do + it 'preserves classes' do + input = <<~ADOC + NOTE: An admonition paragraph, like this note, grabs the reader’s attention. + ADOC + + output = <<~HTML +
+ + + + + +
+ + + An admonition paragraph, like this note, grabs the reader’s attention. +
+
+ HTML + + expect(render(input, context)).to include(output.strip) + end + end + + context 'with checklist' do + it 'preserves classes' do + input = <<~ADOC + * [x] checked + * [ ] not checked + ADOC + + output = <<~HTML +
+
    +
  • +

    checked

    +
  • +
  • +

    not checked

    +
  • +
+
+ HTML + + expect(render(input, context)).to include(output.strip) + end + end + + context 'with marks' do + it 'preserves classes' do + input = <<~ADOC + Werewolves are allergic to #cassia cinnamon#. + + Did the werewolves read the [.small]#small print#? + + Where did all the [.underline.small]#cores# run off to? + + We need [.line-through]#ten# make that twenty VMs. + + [.big]##O##nce upon an infinite loop. + ADOC + + output = <<~HTML +
+

Werewolves are allergic to cassia cinnamon.

+
+
+

Did the werewolves read the small print?

+
+
+

Where did all the cores run off to?

+
+
+

We need ten make that twenty VMs.

+
+
+

Once upon an infinite loop.

+
+ HTML + + expect(render(input, context)).to include(output.strip) + end + end + context 'with fenced block' do it 'highlights syntax' do input = <<~ADOC