diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
deleted file mode 100644
index e1ae9eee56c..00000000000
--- a/lib/gitlab/email/html_cleaner.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
- module Email
- # HtmlCleaner cleans up the extremely dirty HTML that many email clients
- # generate by stripping out any excess divs or spans, removing styling in
- # the process (which also makes the html more suitable to be parsed as
- # Markdown).
- class HtmlCleaner
- # Elements to hoist all children out of
- HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
- # Node types to always delete
- HTML_DELETE_ELEMENT_TYPES = [
- Nokogiri::XML::Node::DTD_NODE,
- Nokogiri::XML::Node::COMMENT_NODE,
- ]
-
- # Private variables:
- # @doc - nokogiri document
- # @out - same as @doc, but only if trimming has occured
- def initialize(html)
- if html.is_a?(String)
- @doc = Nokogiri::HTML(html)
- else
- @doc = html
- end
- end
-
- class << self
- # HtmlCleaner.trim(inp, opts={})
- #
- # Arguments:
- # inp - Either a HTML string or a Nokogiri document.
- # Options:
- # :return => :doc, :string
- # Specify the desired return type.
- # Defaults to the type of the input.
- # A value of :string is equivalent to calling get_document_text()
- # on the returned document.
- def trim(inp, opts={})
- cleaner = HtmlCleaner.new(inp)
-
- opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
- if opts[:return] == :string
- cleaner.output_html
- else
- cleaner.output_document
- end
- end
-
- # HtmlCleaner.get_document_text(doc)
- #
- # Get the body portion of the document, including html, as a string.
- def get_document_text(doc)
- body = doc.xpath('//body')
- if body
- body.inner_html
- else
- doc.inner_html
- end
- end
- end
-
- def output_document
- @out ||= begin
- doc = @doc
- trim_process_node doc
- add_newlines doc
- doc
- end
- end
-
- def output_html
- HtmlCleaner.get_document_text(output_document)
- end
-
- private
-
- def add_newlines(doc)
- # Replace
tags with a markdown \n
- doc.xpath('//br').each do |br|
- br.replace(new_linebreak_node doc, 2)
- end
- # Surround
tags with newlines, to help with line-wise postprocessing
- # and ensure markdown paragraphs
- doc.xpath('//p').each do |p|
- p.before(new_linebreak_node doc)
- p.after(new_linebreak_node doc, 2)
- end
- end
-
- def new_linebreak_node(doc, count=1)
- Nokogiri::XML::Text.new("\n" * count, doc)
- end
-
- def trim_process_node(node)
- if should_hoist?(node)
- hoisted = trim_hoist_element node
- hoisted.each { |child| trim_process_node child }
- elsif should_delete?(node)
- node.remove
- else
- if children = node.children
- children.each { |child| trim_process_node child }
- end
- end
-
- node
- end
-
- def trim_hoist_element(element)
- hoisted = []
- element.children.each do |child|
- element.before(child)
- hoisted << child
- end
- element.remove
- hoisted
- end
-
- def should_hoist?(node)
- return false unless node.element?
- HTML_HOIST_ELEMENTS.include? node.name
- end
-
- def should_delete?(node)
- return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
- return true if node.element? && node.name == 'head'
- return true if node.text? && node.text.strip.blank?
-
- false
- end
- end
- end
-end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 6e768e46a71..6ed36b51f12 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,31 +23,19 @@ module Gitlab
private
def select_body(message)
- html = nil
- text = nil
+ text = message.text_part if message.multipart?
+ text ||= message if message.content_type !~ /text\/html/
- if message.multipart?
- html = fix_charset(message.html_part)
- text = fix_charset(message.text_part)
- elsif message.content_type =~ /text\/html/
- html = fix_charset(message)
- end
+ return "" unless text
- # prefer plain text
- return text if text
-
- if html
- body = HtmlCleaner.new(html).output_html
- else
- body = fix_charset(message)
- end
+ text = fix_charset(text)
# Certain trigger phrases that means we didn't parse correctly
- if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
- body
+ text
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
diff --git a/spec/fixtures/emails/dutch.eml b/spec/fixtures/emails/dutch.eml
index 7be08dc4938..3142bf30c3b 100644
--- a/spec/fixtures/emails/dutch.eml
+++ b/spec/fixtures/emails/dutch.eml
@@ -17,4 +17,4 @@ Dit is een antwoord in het Nederlands.
Op 18 juli 2013 10:23 schreef Sander Datema het volgende:
-Dit is de originele post.
\ No newline at end of file
+Dit is de originele post.
diff --git a/spec/fixtures/emails/html_only.eml b/spec/fixtures/emails/html_only.eml
deleted file mode 100644
index 561b8db2c79..00000000000
--- a/spec/fixtures/emails/html_only.eml
+++ /dev/null
@@ -1,93 +0,0 @@
-
-Delivered-To: walter@breakingbad.com
-Received: by 10.64.13.41 with SMTP id m9csp29769iec;
- Thu, 20 Jun 2013 08:53:22 -0700 (PDT)
-X-Received: by 10.252.23.9 with SMTP id p9mr4055675lag.4.1371743601980;
- Thu, 20 Jun 2013 08:53:21 -0700 (PDT)
-Received: from mail-la0-x229.google.com (mail-la0-x229.google.com [2a00:1450:4010:c03::229])
- by mx.google.com with ESMTPS id u4si430203lae.48.2013.06.20.08.53.20
- for Grizzly B just sent you a private message Log in to our EC2 instance -or- log into a new Digital Ocean instanc=
-e? Please visit this link to respond: http://=
-meta.discourse.org/t/regarding-your-post-in-site-customization-not-working/=
-7641/5 To unsubscribe from these emails, visit your user preferences.
not wor=
-king"