From d866c7763cdaa801a0b6dacf125250c5117843d9 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 19 Jun 2017 12:06:33 -0700 Subject: [PATCH 1/3] add a spec for no-href link parsing --- spec/fixtures/emails/html_empty_link.eml | 26 ++++++++++++++++++++++ spec/lib/gitlab/email/reply_parser_spec.rb | 4 ++++ 2 files changed, 30 insertions(+) create mode 100644 spec/fixtures/emails/html_empty_link.eml diff --git a/spec/fixtures/emails/html_empty_link.eml b/spec/fixtures/emails/html_empty_link.eml new file mode 100644 index 00000000000..1672b98b925 --- /dev/null +++ b/spec/fixtures/emails/html_empty_link.eml @@ -0,0 +1,26 @@ + +MIME-Version: 1.0 +Received: by 10.25.161.144 with HTTP; Tue, 7 Oct 2014 22:17:17 -0700 (PDT) +X-Originating-IP: [117.207.85.84] +In-Reply-To: <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +References: + <5434c8b52bb3a_623ff09fec70f049749@discourse-app.mail> +Date: Wed, 8 Oct 2014 10:47:17 +0530 +Delivered-To: arpit@techapj.com +Message-ID: +Subject: Re: [Discourse] [Meta] Welcome to techAPJ's Discourse! +From: Arpit Jalan +To: Discourse Accept-Language: en-US +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: +x-originating-ip: [134.68.31.227] +Content-Type: multipart/alternative; + boundary="_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_" +MIME-Version: 1.0 + +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_ +Content-Type: text/html; charset="utf-8" + +no brackets! +--_000_B0DFE1BEB3739743BC9B639D0E6BC8FF217A6341IUMSSGMBX104ads_-- diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb index 28698e89c33..71659d5e8b0 100644 --- a/spec/lib/gitlab/email/reply_parser_spec.rb +++ b/spec/lib/gitlab/email/reply_parser_spec.rb @@ -208,5 +208,9 @@ describe Gitlab::Email::ReplyParser, lib: true do it "properly renders html-only email from MS Outlook" do expect(test_parse_body(fixture_file("emails/outlook_html.eml"))).to eq("Microsoft Outlook 2010") end + + it "does not wrap links with no href in unnecessary brackets" do + expect(test_parse_body(fixture_file("emails/html_empty_link.eml"))).to eq("no brackets!") + end end end From ef39fb0b54d5303c53fe0751f730fe3824bbaaa0 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 6 Feb 2017 10:22:31 -0800 Subject: [PATCH 2/3] unwrap all links with no href --- lib/gitlab/email/html_parser.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/gitlab/email/html_parser.rb b/lib/gitlab/email/html_parser.rb index a4ca62bfc41..50559a48973 100644 --- a/lib/gitlab/email/html_parser.rb +++ b/lib/gitlab/email/html_parser.rb @@ -17,6 +17,13 @@ module Gitlab def filter_replies! document.xpath('//blockquote').each(&:remove) document.xpath('//table').each(&:remove) + + # bogus links with no href are sometimes added by outlook, + # and can result in Html2Text adding extra square brackets + # to the text, so we unwrap them here. + document.xpath('//a[not(@href)]').each do |link| + link.replace(link.children) + end end def filtered_html From b869a99a743f02873038aeeb07d7b5ffbf4f6d89 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 19 Jun 2017 16:57:47 -0700 Subject: [PATCH 3/3] add a changelog for the email parsing bug --- changelogs/unreleased/27645-html-email-brackets-bug.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelogs/unreleased/27645-html-email-brackets-bug.yml diff --git a/changelogs/unreleased/27645-html-email-brackets-bug.yml b/changelogs/unreleased/27645-html-email-brackets-bug.yml new file mode 100644 index 00000000000..e8004d03884 --- /dev/null +++ b/changelogs/unreleased/27645-html-email-brackets-bug.yml @@ -0,0 +1,4 @@ +--- +title: Fix an email parsing bug where brackets would be inserted in emails from some Outlook clients +merge_request: 9045 +author: jneen