130 lines
3.8 KiB
Ruby
130 lines
3.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Banzai
|
|
module Filter
|
|
# HTML Filter to modify the attributes of external links
|
|
class ExternalLinkFilter < HTML::Pipeline::Filter
|
|
SCHEMES = ['http', 'https', nil].freeze
|
|
RTLO = "\u202E"
|
|
ENCODED_RTLO = '%E2%80%AE'
|
|
|
|
def call
|
|
links.each do |node|
|
|
# URI.parse does stricter checking on the url than Addressable,
|
|
# such as on `mailto:` links. Since we've been using it, do an
|
|
# initial parse for validity and then use Addressable
|
|
# for IDN support, etc
|
|
uri = uri_strict(node_src(node))
|
|
if uri
|
|
node.set_attribute(node_src_attribute(node), uri.to_s)
|
|
addressable_uri = addressable_uri(node_src(node))
|
|
else
|
|
addressable_uri = nil
|
|
end
|
|
|
|
unless internal_url?(addressable_uri)
|
|
punycode_autolink_node!(addressable_uri, node)
|
|
sanitize_link_text!(node)
|
|
add_malicious_tooltip!(addressable_uri, node)
|
|
add_nofollow!(addressable_uri, node)
|
|
end
|
|
end
|
|
|
|
doc
|
|
end
|
|
|
|
private
|
|
|
|
# if this is a link to a proxied image, then `src` is already the correct
|
|
# proxied url, so work with the `data-canonical-src`
|
|
def node_src_attribute(node)
|
|
node['data-canonical-src'] ? 'data-canonical-src' : 'href'
|
|
end
|
|
|
|
def node_src(node)
|
|
node[node_src_attribute(node)]
|
|
end
|
|
|
|
def uri_strict(href)
|
|
URI.parse(href)
|
|
rescue URI::Error
|
|
nil
|
|
end
|
|
|
|
def addressable_uri(href)
|
|
Addressable::URI.parse(href)
|
|
rescue Addressable::URI::InvalidURIError
|
|
nil
|
|
end
|
|
|
|
def links
|
|
query = 'descendant-or-self::a[@href and not(@href = "")]'
|
|
doc.xpath(query)
|
|
end
|
|
|
|
def internal_url?(uri)
|
|
return false if uri.nil?
|
|
# Relative URLs miss a hostname
|
|
return true unless uri.hostname
|
|
|
|
uri.hostname == internal_url.hostname
|
|
end
|
|
|
|
def internal_url
|
|
@internal_url ||= URI.parse(Gitlab.config.gitlab.url)
|
|
end
|
|
|
|
# Only replace an autolink with an IDN with it's punycode
|
|
# version if we need emailable links. Otherwise let it
|
|
# be shown normally and the tooltips will show the
|
|
# punycode version.
|
|
def punycode_autolink_node!(uri, node)
|
|
return unless uri
|
|
return unless context[:emailable_links]
|
|
|
|
unencoded_uri_str = Addressable::URI.unencode(node_src(node))
|
|
|
|
if unencoded_uri_str == node.content && idn?(uri)
|
|
node.content = uri.normalize
|
|
end
|
|
end
|
|
|
|
# escape any right-to-left (RTLO) characters in link text
|
|
def sanitize_link_text!(node)
|
|
node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO)
|
|
end
|
|
|
|
# If the domain is an international domain name (IDN),
|
|
# let's expose with a tooltip in case it's intended
|
|
# to be malicious. This is particularly useful for links
|
|
# where the link text is not the same as the actual link.
|
|
# We will continue to show the unicode version of the domain
|
|
# in autolinked link text, which could contain emojis, etc.
|
|
#
|
|
# Also show the tooltip if the url contains the RTLO character,
|
|
# as this is an indicator of a malicious link
|
|
def add_malicious_tooltip!(uri, node)
|
|
if idn?(uri) || has_encoded_rtlo?(uri)
|
|
node.add_class('has-tooltip')
|
|
node.set_attribute('title', uri.normalize)
|
|
end
|
|
end
|
|
|
|
def add_nofollow!(uri, node)
|
|
if SCHEMES.include?(uri&.scheme)
|
|
node.set_attribute('rel', 'nofollow noreferrer noopener')
|
|
node.set_attribute('target', '_blank')
|
|
end
|
|
end
|
|
|
|
def idn?(uri)
|
|
uri&.normalized_host&.start_with?('xn--')
|
|
end
|
|
|
|
def has_encoded_rtlo?(uri)
|
|
uri&.to_s&.include?(ENCODED_RTLO)
|
|
end
|
|
end
|
|
end
|
|
end
|