No HTML-only email please

This commit is contained in:
Douwe Maan 2015-08-21 16:09:55 -07:00
parent 3abb356dd2
commit 15fc7bd613
6 changed files with 65 additions and 250 deletions

View File

@ -1,135 +0,0 @@
# Taken mostly from Discourse's Email::HtmlCleaner
module Gitlab
module Email
# HtmlCleaner cleans up the extremely dirty HTML that many email clients
# generate by stripping out any excess divs or spans, removing styling in
# the process (which also makes the html more suitable to be parsed as
# Markdown).
class HtmlCleaner
# Elements to hoist all children out of
HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
# Node types to always delete
HTML_DELETE_ELEMENT_TYPES = [
Nokogiri::XML::Node::DTD_NODE,
Nokogiri::XML::Node::COMMENT_NODE,
]
# Private variables:
# @doc - nokogiri document
# @out - same as @doc, but only if trimming has occured
def initialize(html)
if html.is_a?(String)
@doc = Nokogiri::HTML(html)
else
@doc = html
end
end
class << self
# HtmlCleaner.trim(inp, opts={})
#
# Arguments:
# inp - Either a HTML string or a Nokogiri document.
# Options:
# :return => :doc, :string
# Specify the desired return type.
# Defaults to the type of the input.
# A value of :string is equivalent to calling get_document_text()
# on the returned document.
def trim(inp, opts={})
cleaner = HtmlCleaner.new(inp)
opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
if opts[:return] == :string
cleaner.output_html
else
cleaner.output_document
end
end
# HtmlCleaner.get_document_text(doc)
#
# Get the body portion of the document, including html, as a string.
def get_document_text(doc)
body = doc.xpath('//body')
if body
body.inner_html
else
doc.inner_html
end
end
end
def output_document
@out ||= begin
doc = @doc
trim_process_node doc
add_newlines doc
doc
end
end
def output_html
HtmlCleaner.get_document_text(output_document)
end
private
def add_newlines(doc)
# Replace <br> tags with a markdown \n
doc.xpath('//br').each do |br|
br.replace(new_linebreak_node doc, 2)
end
# Surround <p> tags with newlines, to help with line-wise postprocessing
# and ensure markdown paragraphs
doc.xpath('//p').each do |p|
p.before(new_linebreak_node doc)
p.after(new_linebreak_node doc, 2)
end
end
def new_linebreak_node(doc, count=1)
Nokogiri::XML::Text.new("\n" * count, doc)
end
def trim_process_node(node)
if should_hoist?(node)
hoisted = trim_hoist_element node
hoisted.each { |child| trim_process_node child }
elsif should_delete?(node)
node.remove
else
if children = node.children
children.each { |child| trim_process_node child }
end
end
node
end
def trim_hoist_element(element)
hoisted = []
element.children.each do |child|
element.before(child)
hoisted << child
end
element.remove
hoisted
end
def should_hoist?(node)
return false unless node.element?
HTML_HOIST_ELEMENTS.include? node.name
end
def should_delete?(node)
return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
return true if node.element? && node.name == 'head'
return true if node.text? && node.text.strip.blank?
false
end
end
end
end

View File

@ -23,31 +23,19 @@ module Gitlab
private
def select_body(message)
html = nil
text = nil
text = message.text_part if message.multipart?
text ||= message if message.content_type !~ /text\/html/
if message.multipart?
html = fix_charset(message.html_part)
text = fix_charset(message.text_part)
elsif message.content_type =~ /text\/html/
html = fix_charset(message)
end
return "" unless text
# prefer plain text
return text if text
if html
body = HtmlCleaner.new(html).output_html
else
body = fix_charset(message)
end
text = fix_charset(text)
# Certain trigger phrases that means we didn't parse correctly
if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
body
text
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part

View File

@ -17,4 +17,4 @@ Dit is een antwoord in het Nederlands.
Op 18 juli 2013 10:23 schreef Sander Datema het volgende:
Dit is de originele post.
Dit is de originele post.

View File

@ -1,93 +0,0 @@
Delivered-To: walter@breakingbad.com
Received: by 10.64.13.41 with SMTP id m9csp29769iec;
Thu, 20 Jun 2013 08:53:22 -0700 (PDT)
X-Received: by 10.252.23.9 with SMTP id p9mr4055675lag.4.1371743601980;
Thu, 20 Jun 2013 08:53:21 -0700 (PDT)
Received: from mail-la0-x229.google.com (mail-la0-x229.google.com [2a00:1450:4010:c03::229])
by mx.google.com with ESMTPS id u4si430203lae.48.2013.06.20.08.53.20
for <walter@breakingbad.com>
(version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128);
Thu, 20 Jun 2013 08:53:21 -0700 (PDT)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20120113;
h=x-forwarded-to:x-forwarded-for:delivered-to:x-return-path
:content-type:mime-version:content-transfer-encoding:x-mailer
:message-id:date:subject:from:in-reply-to:to:resent-date:resent-from
:resent-to:resent-subject:resent-message-id:resent-user-agent
:x-scanned-by:x-gm-message-state;
bh=9O67r74ofh9WkEaKTRB/frQ3MKOtQlbCac2mz0/MiyY=;
b=YVAo2/JDMP53RxDmqDEKNcEMtggtfaVyq2DoseZ6vBAfB7G6NtHC9ZEkRs4oGhk6LU
fnyAPe0wnz5d9WINoMAuuTRIhplLxzcqysduSnAJAQ2qqR7mFBnlj9wJeVEKltNwmUME
nPwxsf8go20VBzrZCtECPedcLi60wbl32NCXVn0qwt2LvKiy6ktSS5Xgb4zY8i4dfXAP
6Y5gu32boooWIb9DkH1TJkn3C0RrEugNlw/DUnXrnkFefgxWF3pt/zcoW/wYRyikOdx+
smBClgR9my6QmsS2KsQrMvWJZUva7fddTiZ6FC22e4hW+8Wha0RaZOZu5O7hjg6G4/1g
IEyg==
X-Received: by 10.112.55.9 with SMTP id n9mr5916187lbp.5.1371743600857;
Thu, 20 Jun 2013 08:53:20 -0700 (PDT)
X-Forwarded-To: walter@breakingbad.com
X-Forwarded-For: walter@breakingbad.com
Delivered-To: walter@breakingbad.com
Content-Type: text/html; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
X-Mailer: BlackBerry Email (10.1.0.1720)
Message-ID: <20130619231548.6307981.74194.2379@breakingbad.com>
Date: Wed, 19 Jun 2013 19:15:48 -0400
Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site
Customization not working"
From: aaron@breakingbad.com
In-Reply-To: <51c238655a394_5f4e3ce6690667bd@tiefighter2.mail>
To: reply+20c1b0a8bd1a63c0163cc7e7641ca06b@appmail.adventuretime.ooo
ReSent-Date: Thu, 20 Jun 2013 11:53:08 -0400 (EDT)
ReSent-From: Aaron <aaron@breakingbad.com>
ReSent-Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site
Customization not working"
X-Gm-Message-State: ALoCoQl1BtN83rAX7At808XAPv1yCqUK3Du2IvK7eCyY3jsI77u4e5cak28307pYYHAo1JlO/Eu9
<html><head></head><body data-blackberry-caret-color=3D"#00a8df" style=3D"b=
ackground-color: rgb(255, 255, 255); line-height: initial;"><div id=3D"BB10=
_response_div" style=3D"width: 100%; font-size: initial; font-family: Calib=
ri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125); text-align: initial; =
background-color: rgb(255, 255, 255);">The EC2 instance - I've seen that th=
ere tends to be odd and unrecommended settings on the Bitnami installs that=
I've checked out.</div> =
=
<div id=3D"response_div_spacer" style=3D"width: 100%; font-size: ini=
tial; font-family: Calibri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125=
); text-align: initial; background-color: rgb(255, 255, 255);"><br style=3D=
"display:initial"></div> =
=
<div id=3D"_signaturePlaceholder" style=3D"font-size: initial; font-=
family: Calibri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125); text-ali=
gn: initial; background-color: rgb(255, 255, 255);"></div> =
=
=
<table width=3D"100%" style=3D"background-color:white;bord=
er-spacing:0px;"> <tbody><tr><td id=3D"_persistentHeaderContainer" colspan=
=3D"2" style=3D"font-size: initial; text-align: initial; background-color: =
rgb(255, 255, 255);"> <div id=
=3D"_persistentHeader" style=3D"border-style: solid none none; border-top-c=
olor: rgb(181, 196, 223); border-top-width: 1pt; padding: 3pt 0in 0in; font=
-family: Tahoma, 'BB Alpha Sans', 'Slate Pro'; font-size: 10pt;"> <div><b>=
From: </b>Grizzly B via Discourse Meta</div><div><b>Sent: </b>Wednesday, J=
une 19, 2013 19:02</div><div><b>To: </b>aaron@breakingbad.com</div><div><b>=
Reply To: </b>Grizzly B via Discourse Meta</div><div><b>Subject: </b>[Disc=
ourse Meta] [PM] re: Regarding your post in "Site Customization<br> not wor=
king"</div></div></td></tr></tbody></table><div id=3D"_persistentHeaderEnd"=
style=3D"border-style: solid none none; border-top-color: rgb(186, 188, 20=
9); border-top-width: 1pt; font-size: initial; text-align: initial; backgro=
und-color: rgb(255, 255, 255);"></div><br><div id=3D"_originalContent" styl=
e=3D""><p>Grizzly B just sent you a private message</p>
<hr><p>Log in to our EC2 instance -or- log into a new Digital Ocean instanc=
e?</p>
<hr><p>Please visit this link to respond: <a href=3D"http://meta.discourse.=
org/t/regarding-your-post-in-site-customization-not-working/7641/5">http://=
meta.discourse.org/t/regarding-your-post-in-site-customization-not-working/=
7641/5</a></p>
<p>To unsubscribe from these emails, visit your <a href=3D"http://meta.disc=
ourse.org/user_preferences">user preferences</a>.</p>
<br><!--end of _originalContent --></div></body></html>

42
spec/fixtures/emails/plaintext_only.eml vendored Normal file
View File

@ -0,0 +1,42 @@
Delivered-To: reply@discourse.org
Return-Path: <walter.white@googlemail.com>
MIME-Version: 1.0
From: <walter.white@googlemail.com>
To:
=?utf-8?Q?Discourse_Meta?=
<reply@discourse.org>
Subject:
=?utf-8?Q?Re:_[Discourse_Meta]_[Lounge]_Testing_default_email_replies?=
Importance: Normal
Date: Fri, 28 Nov 2014 21:29:10 +0000
In-Reply-To: <topic/22638/86406@meta.discourse.org>
References:
<topic/22638@meta.discourse.org>,<topic/22638/86406@meta.discourse.org>
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
IyMjIHJlcGx5IGZyb20gZGVmYXVsdCBtYWlsIGNsaWVudCBpbiBXaW5kb3dzIDguMSBNZXRybw0K
DQoNClRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWlj
ayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gg
anVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0
aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cu
IFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBi
cm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVt
cHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUg
bGF6eSBkb2cuDQoNCg0KVGhpcyBpcyBhICoqYm9sZCoqIHdvcmQgaW4gTWFya2Rvd24NCg0KDQpU
aGlzIGlzIGEgbGluayBodHRwOi8vZXhhbXBsZS5jb20NCiANCg0KDQoNCg0KDQpGcm9tOiBBcnBp
dCBKYWxhbg0KU2VudDog4oCORnJpZGF54oCOLCDigI5Ob3ZlbWJlcuKAjiDigI4yOOKAjiwg4oCO
MjAxNCDigI4xMuKAjjrigI4zNeKAjiDigI5QTQ0KVG86IGplZmYgYXR3b29kDQoNCg0KDQoNCg0K
DQogdGVjaEFQSg0KTm92ZW1iZXIgMjggDQoNClRlc3QgcmVwbHkuDQoNCkZpcnN0IHBhcmFncmFw
aC4NCg0KU2Vjb25kIHBhcmFncmFwaC4NCg0KDQoNClRvIHJlc3BvbmQsIHJlcGx5IHRvIHRoaXMg
ZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJzZS5vcmcvdC90ZXN0aW5nLWRlZmF1
bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJvd3Nlci4NCg0KDQoNClByZXZpb3Vz
IFJlcGxpZXMNCg0KIGNvZGluZ2hvcnJvcg0KTm92ZW1iZXIgMjggDQoNCldlJ3JlIHRlc3Rpbmcg
dGhlIGxhdGVzdCBHaXRIdWIgZW1haWwgcHJvY2Vzc2luZyBsaWJyYXJ5IHdoaWNoIHdlIGFyZSBp
bnRlZ3JhdGluZyBub3cuDQoNCmh0dHBzOi8vZ2l0aHViLmNvbS9naXRodWIvZW1haWxfcmVwbHlf
cGFyc2VyDQoNCkdvIGFoZWFkIGFuZCByZXBseSB0byB0aGlzIHRvcGljIGFuZCBJJ2xsIHJlcGx5
IGZyb20gdmFyaW91cyBlbWFpbCBjbGllbnRzIGZvciB0ZXN0aW5nLg0KDQoNCg0KDQoNClRvIHJl
c3BvbmQsIHJlcGx5IHRvIHRoaXMgZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJz
ZS5vcmcvdC90ZXN0aW5nLWRlZmF1bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJv
d3Nlci4NCg0KDQpUbyB1bnN1YnNjcmliZSBmcm9tIHRoZXNlIGVtYWlscywgdmlzaXQgeW91ciB1
c2VyIHByZWZlcmVuY2VzLg==

View File

@ -19,9 +19,22 @@ describe Gitlab::Email::ReplyParser do
expect(test_parse_body(fixture_file("emails/no_content_reply.eml"))).to eq("")
end
it "can parse the html section" do
expect(test_parse_body(fixture_file("emails/html_only.eml"))).to eq("The EC2 instance - I've seen that there tends to be odd and " +
"unrecommended settings on the Bitnami installs that I've checked out.")
it "properly renders plaintext-only email" do
expect(test_parse_body(fixture_file("emails/plaintext_only.eml"))).
to eq(
<<-BODY.strip_heredoc.chomp
### reply from default mail client in Windows 8.1 Metro
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
This is a **bold** word in Markdown
This is a link http://example.com
BODY
)
end
it "supports a Dutch reply" do