diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb deleted file mode 100644 index e1ae9eee56c..00000000000 --- a/lib/gitlab/email/html_cleaner.rb +++ /dev/null @@ -1,135 +0,0 @@ -# Taken mostly from Discourse's Email::HtmlCleaner -module Gitlab - module Email - # HtmlCleaner cleans up the extremely dirty HTML that many email clients - # generate by stripping out any excess divs or spans, removing styling in - # the process (which also makes the html more suitable to be parsed as - # Markdown). - class HtmlCleaner - # Elements to hoist all children out of - HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td) - # Node types to always delete - HTML_DELETE_ELEMENT_TYPES = [ - Nokogiri::XML::Node::DTD_NODE, - Nokogiri::XML::Node::COMMENT_NODE, - ] - - # Private variables: - # @doc - nokogiri document - # @out - same as @doc, but only if trimming has occured - def initialize(html) - if html.is_a?(String) - @doc = Nokogiri::HTML(html) - else - @doc = html - end - end - - class << self - # HtmlCleaner.trim(inp, opts={}) - # - # Arguments: - # inp - Either a HTML string or a Nokogiri document. - # Options: - # :return => :doc, :string - # Specify the desired return type. - # Defaults to the type of the input. - # A value of :string is equivalent to calling get_document_text() - # on the returned document. - def trim(inp, opts={}) - cleaner = HtmlCleaner.new(inp) - - opts[:return] ||= (inp.is_a?(String) ? :string : :doc) - - if opts[:return] == :string - cleaner.output_html - else - cleaner.output_document - end - end - - # HtmlCleaner.get_document_text(doc) - # - # Get the body portion of the document, including html, as a string. - def get_document_text(doc) - body = doc.xpath('//body') - if body - body.inner_html - else - doc.inner_html - end - end - end - - def output_document - @out ||= begin - doc = @doc - trim_process_node doc - add_newlines doc - doc - end - end - - def output_html - HtmlCleaner.get_document_text(output_document) - end - - private - - def add_newlines(doc) - # Replace
tags with a markdown \n - doc.xpath('//br').each do |br| - br.replace(new_linebreak_node doc, 2) - end - # Surround

tags with newlines, to help with line-wise postprocessing - # and ensure markdown paragraphs - doc.xpath('//p').each do |p| - p.before(new_linebreak_node doc) - p.after(new_linebreak_node doc, 2) - end - end - - def new_linebreak_node(doc, count=1) - Nokogiri::XML::Text.new("\n" * count, doc) - end - - def trim_process_node(node) - if should_hoist?(node) - hoisted = trim_hoist_element node - hoisted.each { |child| trim_process_node child } - elsif should_delete?(node) - node.remove - else - if children = node.children - children.each { |child| trim_process_node child } - end - end - - node - end - - def trim_hoist_element(element) - hoisted = [] - element.children.each do |child| - element.before(child) - hoisted << child - end - element.remove - hoisted - end - - def should_hoist?(node) - return false unless node.element? - HTML_HOIST_ELEMENTS.include? node.name - end - - def should_delete?(node) - return true if HTML_DELETE_ELEMENT_TYPES.include? node.type - return true if node.element? && node.name == 'head' - return true if node.text? && node.text.strip.blank? - - false - end - end - end -end diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb index 6e768e46a71..6ed36b51f12 100644 --- a/lib/gitlab/email/reply_parser.rb +++ b/lib/gitlab/email/reply_parser.rb @@ -23,31 +23,19 @@ module Gitlab private def select_body(message) - html = nil - text = nil + text = message.text_part if message.multipart? + text ||= message if message.content_type !~ /text\/html/ - if message.multipart? - html = fix_charset(message.html_part) - text = fix_charset(message.text_part) - elsif message.content_type =~ /text\/html/ - html = fix_charset(message) - end + return "" unless text - # prefer plain text - return text if text - - if html - body = HtmlCleaner.new(html).output_html - else - body = fix_charset(message) - end + text = fix_charset(text) # Certain trigger phrases that means we didn't parse correctly - if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ + if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ return "" end - body + text end # Force encoding to UTF-8 on a Mail::Message or Mail::Part diff --git a/spec/fixtures/emails/dutch.eml b/spec/fixtures/emails/dutch.eml index 7be08dc4938..3142bf30c3b 100644 --- a/spec/fixtures/emails/dutch.eml +++ b/spec/fixtures/emails/dutch.eml @@ -17,4 +17,4 @@ Dit is een antwoord in het Nederlands. Op 18 juli 2013 10:23 schreef Sander Datema het volgende: -Dit is de originele post. \ No newline at end of file +Dit is de originele post. diff --git a/spec/fixtures/emails/html_only.eml b/spec/fixtures/emails/html_only.eml deleted file mode 100644 index 561b8db2c79..00000000000 --- a/spec/fixtures/emails/html_only.eml +++ /dev/null @@ -1,93 +0,0 @@ - -Delivered-To: walter@breakingbad.com -Received: by 10.64.13.41 with SMTP id m9csp29769iec; - Thu, 20 Jun 2013 08:53:22 -0700 (PDT) -X-Received: by 10.252.23.9 with SMTP id p9mr4055675lag.4.1371743601980; - Thu, 20 Jun 2013 08:53:21 -0700 (PDT) -Received: from mail-la0-x229.google.com (mail-la0-x229.google.com [2a00:1450:4010:c03::229]) - by mx.google.com with ESMTPS id u4si430203lae.48.2013.06.20.08.53.20 - for - (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); - Thu, 20 Jun 2013 08:53:21 -0700 (PDT) -X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; - d=google.com; s=20120113; - h=x-forwarded-to:x-forwarded-for:delivered-to:x-return-path - :content-type:mime-version:content-transfer-encoding:x-mailer - :message-id:date:subject:from:in-reply-to:to:resent-date:resent-from - :resent-to:resent-subject:resent-message-id:resent-user-agent - :x-scanned-by:x-gm-message-state; - bh=9O67r74ofh9WkEaKTRB/frQ3MKOtQlbCac2mz0/MiyY=; - b=YVAo2/JDMP53RxDmqDEKNcEMtggtfaVyq2DoseZ6vBAfB7G6NtHC9ZEkRs4oGhk6LU - fnyAPe0wnz5d9WINoMAuuTRIhplLxzcqysduSnAJAQ2qqR7mFBnlj9wJeVEKltNwmUME - nPwxsf8go20VBzrZCtECPedcLi60wbl32NCXVn0qwt2LvKiy6ktSS5Xgb4zY8i4dfXAP - 6Y5gu32boooWIb9DkH1TJkn3C0RrEugNlw/DUnXrnkFefgxWF3pt/zcoW/wYRyikOdx+ - smBClgR9my6QmsS2KsQrMvWJZUva7fddTiZ6FC22e4hW+8Wha0RaZOZu5O7hjg6G4/1g - IEyg== -X-Received: by 10.112.55.9 with SMTP id n9mr5916187lbp.5.1371743600857; - Thu, 20 Jun 2013 08:53:20 -0700 (PDT) -X-Forwarded-To: walter@breakingbad.com -X-Forwarded-For: walter@breakingbad.com -Delivered-To: walter@breakingbad.com -Content-Type: text/html; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: quoted-printable -X-Mailer: BlackBerry Email (10.1.0.1720) -Message-ID: <20130619231548.6307981.74194.2379@breakingbad.com> -Date: Wed, 19 Jun 2013 19:15:48 -0400 -Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site - Customization not working" -From: aaron@breakingbad.com -In-Reply-To: <51c238655a394_5f4e3ce6690667bd@tiefighter2.mail> -To: reply+20c1b0a8bd1a63c0163cc7e7641ca06b@appmail.adventuretime.ooo -ReSent-Date: Thu, 20 Jun 2013 11:53:08 -0400 (EDT) -ReSent-From: Aaron -ReSent-Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site - Customization not working" -X-Gm-Message-State: ALoCoQl1BtN83rAX7At808XAPv1yCqUK3Du2IvK7eCyY3jsI77u4e5cak28307pYYHAo1JlO/Eu9 - -

The EC2 instance - I've seen that th= -ere tends to be odd and unrecommended settings on the Bitnami installs that= - I've checked out.
= - = -

= - = -
= - = - = -
= -From: Grizzly B via Discourse Meta
Sent: Wednesday, J= -une 19, 2013 19:02
To: aaron@breakingbad.com
= -Reply To: Grizzly B via Discourse Meta
Subject: [Disc= -ourse Meta] [PM] re: Regarding your post in "Site Customization
not wor= -king"

Grizzly B just sent you a private message

- -

Log in to our EC2 instance -or- log into a new Digital Ocean instanc= -e?

- -

Please visit this link to respond: http://= -meta.discourse.org/t/regarding-your-post-in-site-customization-not-working/= -7641/5

- -

To unsubscribe from these emails, visit your user preferences.

-
diff --git a/spec/fixtures/emails/plaintext_only.eml b/spec/fixtures/emails/plaintext_only.eml new file mode 100644 index 00000000000..1bfaec771dc --- /dev/null +++ b/spec/fixtures/emails/plaintext_only.eml @@ -0,0 +1,42 @@ +Delivered-To: reply@discourse.org +Return-Path: +MIME-Version: 1.0 +From: +To: + =?utf-8?Q?Discourse_Meta?= + +Subject: + =?utf-8?Q?Re:_[Discourse_Meta]_[Lounge]_Testing_default_email_replies?= +Importance: Normal +Date: Fri, 28 Nov 2014 21:29:10 +0000 +In-Reply-To: +References: + , +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +IyMjIHJlcGx5IGZyb20gZGVmYXVsdCBtYWlsIGNsaWVudCBpbiBXaW5kb3dzIDguMSBNZXRybw0K +DQoNClRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWlj +ayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gg +anVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0 +aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cu +IFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBi +cm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVt +cHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUg +bGF6eSBkb2cuDQoNCg0KVGhpcyBpcyBhICoqYm9sZCoqIHdvcmQgaW4gTWFya2Rvd24NCg0KDQpU +aGlzIGlzIGEgbGluayBodHRwOi8vZXhhbXBsZS5jb20NCiANCg0KDQoNCg0KDQpGcm9tOiBBcnBp +dCBKYWxhbg0KU2VudDog4oCORnJpZGF54oCOLCDigI5Ob3ZlbWJlcuKAjiDigI4yOOKAjiwg4oCO +MjAxNCDigI4xMuKAjjrigI4zNeKAjiDigI5QTQ0KVG86IGplZmYgYXR3b29kDQoNCg0KDQoNCg0K +DQogdGVjaEFQSg0KTm92ZW1iZXIgMjggDQoNClRlc3QgcmVwbHkuDQoNCkZpcnN0IHBhcmFncmFw +aC4NCg0KU2Vjb25kIHBhcmFncmFwaC4NCg0KDQoNClRvIHJlc3BvbmQsIHJlcGx5IHRvIHRoaXMg +ZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJzZS5vcmcvdC90ZXN0aW5nLWRlZmF1 +bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJvd3Nlci4NCg0KDQoNClByZXZpb3Vz +IFJlcGxpZXMNCg0KIGNvZGluZ2hvcnJvcg0KTm92ZW1iZXIgMjggDQoNCldlJ3JlIHRlc3Rpbmcg +dGhlIGxhdGVzdCBHaXRIdWIgZW1haWwgcHJvY2Vzc2luZyBsaWJyYXJ5IHdoaWNoIHdlIGFyZSBp +bnRlZ3JhdGluZyBub3cuDQoNCmh0dHBzOi8vZ2l0aHViLmNvbS9naXRodWIvZW1haWxfcmVwbHlf +cGFyc2VyDQoNCkdvIGFoZWFkIGFuZCByZXBseSB0byB0aGlzIHRvcGljIGFuZCBJJ2xsIHJlcGx5 +IGZyb20gdmFyaW91cyBlbWFpbCBjbGllbnRzIGZvciB0ZXN0aW5nLg0KDQoNCg0KDQoNClRvIHJl +c3BvbmQsIHJlcGx5IHRvIHRoaXMgZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJz +ZS5vcmcvdC90ZXN0aW5nLWRlZmF1bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJv +d3Nlci4NCg0KDQpUbyB1bnN1YnNjcmliZSBmcm9tIHRoZXNlIGVtYWlscywgdmlzaXQgeW91ciB1 +c2VyIHByZWZlcmVuY2VzLg== diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb index a94c92ad53c..7cae1da8050 100644 --- a/spec/lib/gitlab/email/reply_parser_spec.rb +++ b/spec/lib/gitlab/email/reply_parser_spec.rb @@ -19,9 +19,22 @@ describe Gitlab::Email::ReplyParser do expect(test_parse_body(fixture_file("emails/no_content_reply.eml"))).to eq("") end - it "can parse the html section" do - expect(test_parse_body(fixture_file("emails/html_only.eml"))).to eq("The EC2 instance - I've seen that there tends to be odd and " + - "unrecommended settings on the Bitnami installs that I've checked out.") + it "properly renders plaintext-only email" do + expect(test_parse_body(fixture_file("emails/plaintext_only.eml"))). + to eq( + <<-BODY.strip_heredoc.chomp + ### reply from default mail client in Windows 8.1 Metro + + + The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + + + This is a **bold** word in Markdown + + + This is a link http://example.com + BODY + ) end it "supports a Dutch reply" do