diff --git a/Gemfile b/Gemfile index 9dfaf7a48a2..fff808e9805 100644 --- a/Gemfile +++ b/Gemfile @@ -332,7 +332,7 @@ gem 'octokit', '~> 4.3.0' gem 'mail_room', '~> 0.9.0' -gem 'email_reply_parser', '~> 0.5.8' +gem 'email_reply_trimmer', '~> 0.1' gem 'html2text' gem 'ruby-prof', '~> 0.16.2' diff --git a/Gemfile.lock b/Gemfile.lock index 9f8367b420a..765d57c6238 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -167,7 +167,7 @@ GEM railties (>= 4.2) dropzonejs-rails (0.7.2) rails (> 3.1) - email_reply_parser (0.5.8) + email_reply_trimmer (0.1.6) email_spec (1.6.0) launchy (~> 2.1) mail (~> 2.2) @@ -839,7 +839,7 @@ DEPENDENCIES diffy (~> 3.1.0) doorkeeper (~> 4.2.0) dropzonejs-rails (~> 0.7.1) - email_reply_parser (~> 0.5.8) + email_reply_trimmer (~> 0.1) email_spec (~> 1.6.0) factory_girl_rails (~> 4.7.0) ffaker (~> 2.0.0) diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb index f586c5ab062..e53a99cd8e0 100644 --- a/lib/gitlab/email/reply_parser.rb +++ b/lib/gitlab/email/reply_parser.rb @@ -13,9 +13,11 @@ module Gitlab encoding = body.encoding - body = discourse_email_trimmer(body) + body = EmailReplyTrimmer.trim(body) - body = EmailReplyParser.parse_reply(body) + # TODO [jneen]: do we want to allow empty-quoting? (replies only containing a blockquote) + # EmailReplyTrimmer allows this as a special case, so we detect it manually here. + return "" if body.lines.all? { |l| l.strip.empty? || l.start_with?('>') } body.force_encoding(encoding).encode("UTF-8") end @@ -57,30 +59,6 @@ module Gitlab rescue nil end - - REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date) - REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" }) - - def discourse_email_trimmer(body) - lines = body.scrub.lines.to_a - range_end = 0 - - lines.each_with_index do |l, idx| - # This one might be controversial but so many reply lines have years, times and end with a colon. - # Let's try it and see how well it works. - break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) || - (l =~ /On \w+ \d+,? \d+,?.*wrote:/) - - # Headers on subsequent lines - break if (0..2).all? { |off| lines[idx + off] =~ REPLYING_HEADER_REGEX } - # Headers on the same line - break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3 - - range_end = idx - end - - lines[0..range_end].join.strip - end end end end diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb index c7a0139d32a..106f2b0f10a 100644 --- a/spec/lib/gitlab/email/reply_parser_spec.rb +++ b/spec/lib/gitlab/email/reply_parser_spec.rb @@ -88,8 +88,6 @@ describe Gitlab::Email::ReplyParser, lib: true do expect(test_parse_body(fixture_file("emails/inline_reply.eml"))). to eq( <<-BODY.strip_heredoc.chomp - On Wed, Oct 8, 2014 at 11:12 AM, techAPJ wrote: - > techAPJ > November 28 > @@ -158,7 +156,7 @@ describe Gitlab::Email::ReplyParser, lib: true do <<-BODY.strip_heredoc.chomp ### this is a reply from iOS default mail - The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. Here's some **bold** markdown text.