diff --git a/changelogs/unreleased/27645-html-email-brackets-bug.yml b/changelogs/unreleased/27645-html-email-brackets-bug.yml new file mode 100644 index 00000000000..e8004d03884 --- /dev/null +++ b/changelogs/unreleased/27645-html-email-brackets-bug.yml @@ -0,0 +1,4 @@ +--- +title: Fix an email parsing bug where brackets would be inserted in emails from some Outlook clients +merge_request: 9045 +author: jneen diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb index a4ca62bfc41..50559a48973 100644 --- a/lib/gitlab/email/html_parser.rb +++ b/lib/gitlab/email/html_parser.rb @@ -17,6 +17,13 @@ module Gitlab def filter_replies! document.xpath('//blockquote').each(&:remove) document.xpath('//table').each(&:remove) + + # bogus links with no href are sometimes added by outlook, + # and can result in Html2Text adding extra square brackets + # to the text, so we unwrap them here. + document.xpath('//a[not(@href)]').each do |link| + link.replace(link.children) + end end def filtered_html diff --git a/spec/fixtures/emails/html_empty_link.eml b/spec/fixtures/emails/html_empty_link.eml new file mode 100644 index 00000000000..1672b98b925 --- /dev/null +++ b/spec/fixtures/emails/html_empty_link.eml @@ -0,0 +1,26 @@ + +MIME-Version: 1.0 +Received: by 10.25.161.144 with HTTP; Tue, 7 Oct 2014 22:17:17 -0700 (PDT) +X-Originating-IP: [117.207.85.84] +In-Reply-To: <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +References: + <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +Date: Wed, 8 Oct 2014 10:47:17 +0530 +Delivered-To: arpit@techapj.com +Message-ID: +Subject: Re: [Discourse] [Meta] Welcome to techAPJ's Discourse! +From: Arpit Jalan +To: Discourse Accept-Language: en-US +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: +x-originating-ip: [134.68.31.227] +Content-Type: multipart/alternative; + boundary="_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_" +MIME-Version: 1.0 + +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_ +Content-Type: text/html; charset="utf-8" + +no brackets! +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_-- diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb index 28698e89c33..71659d5e8b0 100644 --- a/spec/lib/gitlab/email/reply_parser_spec.rb +++ b/spec/lib/gitlab/email/reply_parser_spec.rb @@ -208,5 +208,9 @@ describe Gitlab::Email::ReplyParser, lib: true do it "properly renders html-only email from MS Outlook" do expect(test_parse_body(fixture_file("emails/outlook_html.eml"))).to eq("Microsoft Outlook 2010") end + + it "does not wrap links with no href in unnecessary brackets" do + expect(test_parse_body(fixture_file("emails/html_empty_link.eml"))).to eq("no brackets!") + end end end