Merge branch 'bugfix/html-email-brackets' into 'master'

unwrap links without an href

Closes #27645

See merge request !9045
This commit is contained in:
Douwe Maan 2017-06-20 17:14:37 +00:00
commit af5c7c76ff
4 changed files with 41 additions and 0 deletions

View File

@ -0,0 +1,4 @@
---
title: Fix an email parsing bug where brackets would be inserted in emails from some Outlook clients
merge_request: 9045
author: jneen

View File

@ -17,6 +17,13 @@ module Gitlab
def filter_replies!
document.xpath('//blockquote').each(&:remove)
document.xpath('//table').each(&:remove)
# bogus links with no href are sometimes added by outlook,
# and can result in Html2Text adding extra square brackets
# to the text, so we unwrap them here.
document.xpath('//a[not(@href)]').each do |link|
link.replace(link.children)
end
end
def filtered_html

View File

@ -0,0 +1,26 @@
MIME-Version: 1.0
Received: by 10.25.161.144 with HTTP; Tue, 7 Oct 2014 22:17:17 -0700 (PDT)
X-Originating-IP: [117.207.85.84]
In-Reply-To: <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail>
References: <topic/35@discourse.techapj.com>
<5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail>
Date: Wed, 8 Oct 2014 10:47:17 +0530
Delivered-To: arpit@techapj.com
Message-ID: <CAOJeqne=SJ_LwN4sb-0Y95ejc2OpreVhdmcPn0TnmwSvTCYzzQ@mail.gmail.com>
Subject: Re: [Discourse] [Meta] Welcome to techAPJ's Discourse!
From: Arpit Jalan <arpit@techapj.com>
To: Discourse <mail+e1c7f2a380e33840aeb654f075490bad@arpitjalan.com>Accept-Language: en-US
Content-Language: en-US
X-MS-Has-Attach:
X-MS-TNEF-Correlator:
x-originating-ip: [134.68.31.227]
Content-Type: multipart/alternative;
boundary="_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_"
MIME-Version: 1.0
--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_
Content-Type: text/html; charset="utf-8"
<a name="_MailEndCompose">no brackets!</a>
--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_--

View File

@ -208,5 +208,9 @@ describe Gitlab::Email::ReplyParser, lib: true do
it "properly renders html-only email from MS Outlook" do
expect(test_parse_body(fixture_file("emails/outlook_html.eml"))).to eq("Microsoft Outlook 2010")
end
it "does not wrap links with no href in unnecessary brackets" do
expect(test_parse_body(fixture_file("emails/html_empty_link.eml"))).to eq("no brackets!")
end
end
end