# frozen_string_literal: true module Banzai module Filter class MarkdownPostEscapeFilter < HTML::Pipeline::Filter LITERAL_KEYWORD = MarkdownPreEscapeFilter::LITERAL_KEYWORD LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-(.*?)-#{LITERAL_KEYWORD}}.freeze NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze SPAN_REGEX = %r{(.*?)}.freeze CSS_A = 'a' XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze CSS_LANG_TAG = 'pre' XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_LANG_TAG).freeze def call return doc unless result[:escaped_literals] # For any literals that actually didn't get escape processed # (for example in code blocks), remove the special sequence. html.gsub!(NOT_LITERAL_REGEX, '\1') # Replace any left over literal sequences with `span` so that our # reference processing is short-circuited html.gsub!(LITERAL_REGEX, '\1') # Since literals are converted in links, we need to remove any surrounding `span`. # Note: this could have been done in the renderer, # Banzai::Renderer::CommonMark::HTML. However, we eventually want to use # the built-in compiled renderer, rather than the ruby version, for speed. # So let's do this work here. doc.xpath(XPATH_A).each do |node| node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href'] node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title'] end doc.xpath(XPATH_LANG_TAG).each do |node| node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang'] end doc end end end end