From ca823abacd85f3b78d4b0aeb3e07fdc7a39f090d Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Wed, 21 Sep 2016 15:55:04 +0200 Subject: [PATCH] Fix the leak mentioned in 504a3b5 by another way The previous fix introduced another leak; as it made Banzai::Filter::SanitizationFiler#customized? always return false, so we were always appending two elements to HTML::Pipeline::SanitizationFilter::WHITELIST[:elements]. This growth in the elements array would slow the sanitization process over time. --- CHANGELOG | 1 + lib/banzai/filter/sanitization_filter.rb | 60 ++++++++++++------------ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c1da384b0f1..3394a6bdf05 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,7 @@ v 8.13.0 (unreleased) - Speed-up group milestones show page v 8.12.1 (unreleased) + - Fix a memory leak in HTML::Pipeline::SanitizationFilter::WHITELIST v 8.12.0 - Update the rouge gem to 2.0.6, which adds highlighting support for JSX, Prometheus, and others. !6251 diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index ca80aac5a08..2470362e019 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -43,55 +43,57 @@ module Banzai whitelist[:protocols].delete('a') # ...but then remove links with unsafe protocols - whitelist[:transformers].push(remove_unsafe_links) + whitelist[:transformers].push(self.class.remove_unsafe_links) # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(remove_rel) + whitelist[:transformers].push(self.class.remove_rel) # Remove `class` attribute from non-highlight spans - whitelist[:transformers].push(clean_spans) + whitelist[:transformers].push(self.class.clean_spans) whitelist end - def remove_unsafe_links - lambda do |env| - node = env[:node] + class << self + def remove_unsafe_links + lambda do |env| + node = env[:node] - return unless node.name == 'a' - return unless node.has_attribute?('href') + return unless node.name == 'a' + return unless node.has_attribute?('href') - begin - uri = Addressable::URI.parse(node['href']) - uri.scheme = uri.scheme.strip.downcase if uri.scheme + begin + uri = Addressable::URI.parse(node['href']) + uri.scheme = uri.scheme.strip.downcase if uri.scheme - node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(uri.scheme) - rescue Addressable::URI::InvalidURIError - node.remove_attribute('href') + node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(uri.scheme) + rescue Addressable::URI::InvalidURIError + node.remove_attribute('href') + end end end - end - def remove_rel - lambda do |env| - if env[:node_name] == 'a' - env[:node].remove_attribute('rel') + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end end end - end - def clean_spans - lambda do |env| - node = env[:node] + def clean_spans + lambda do |env| + node = env[:node] - return unless node.name == 'span' - return unless node.has_attribute?('class') + return unless node.name == 'span' + return unless node.has_attribute?('class') - unless has_ancestor?(node, 'pre') - node.remove_attribute('class') + unless node.ancestors.any? { |n| n.name.casecmp('pre').zero? } + node.remove_attribute('class') + end + + { node_whitelist: [node] } end - - { node_whitelist: [node] } end end end