switch to email_reply_trimmer from discourse
This commit is contained in:
parent
174dd4cee0
commit
a3bb2463c4
2
Gemfile
2
Gemfile
|
@ -332,7 +332,7 @@ gem 'octokit', '~> 4.3.0'
|
||||||
|
|
||||||
gem 'mail_room', '~> 0.9.0'
|
gem 'mail_room', '~> 0.9.0'
|
||||||
|
|
||||||
gem 'email_reply_parser', '~> 0.5.8'
|
gem 'email_reply_trimmer', '~> 0.1'
|
||||||
gem 'html2text'
|
gem 'html2text'
|
||||||
|
|
||||||
gem 'ruby-prof', '~> 0.16.2'
|
gem 'ruby-prof', '~> 0.16.2'
|
||||||
|
|
|
@ -167,7 +167,7 @@ GEM
|
||||||
railties (>= 4.2)
|
railties (>= 4.2)
|
||||||
dropzonejs-rails (0.7.2)
|
dropzonejs-rails (0.7.2)
|
||||||
rails (> 3.1)
|
rails (> 3.1)
|
||||||
email_reply_parser (0.5.8)
|
email_reply_trimmer (0.1.6)
|
||||||
email_spec (1.6.0)
|
email_spec (1.6.0)
|
||||||
launchy (~> 2.1)
|
launchy (~> 2.1)
|
||||||
mail (~> 2.2)
|
mail (~> 2.2)
|
||||||
|
@ -839,7 +839,7 @@ DEPENDENCIES
|
||||||
diffy (~> 3.1.0)
|
diffy (~> 3.1.0)
|
||||||
doorkeeper (~> 4.2.0)
|
doorkeeper (~> 4.2.0)
|
||||||
dropzonejs-rails (~> 0.7.1)
|
dropzonejs-rails (~> 0.7.1)
|
||||||
email_reply_parser (~> 0.5.8)
|
email_reply_trimmer (~> 0.1)
|
||||||
email_spec (~> 1.6.0)
|
email_spec (~> 1.6.0)
|
||||||
factory_girl_rails (~> 4.7.0)
|
factory_girl_rails (~> 4.7.0)
|
||||||
ffaker (~> 2.0.0)
|
ffaker (~> 2.0.0)
|
||||||
|
|
|
@ -13,9 +13,11 @@ module Gitlab
|
||||||
|
|
||||||
encoding = body.encoding
|
encoding = body.encoding
|
||||||
|
|
||||||
body = discourse_email_trimmer(body)
|
body = EmailReplyTrimmer.trim(body)
|
||||||
|
|
||||||
body = EmailReplyParser.parse_reply(body)
|
# TODO [jneen]: do we want to allow empty-quoting? (replies only containing a blockquote)
|
||||||
|
# EmailReplyTrimmer allows this as a special case, so we detect it manually here.
|
||||||
|
return "" if body.lines.all? { |l| l.strip.empty? || l.start_with?('>') }
|
||||||
|
|
||||||
body.force_encoding(encoding).encode("UTF-8")
|
body.force_encoding(encoding).encode("UTF-8")
|
||||||
end
|
end
|
||||||
|
@ -57,30 +59,6 @@ module Gitlab
|
||||||
rescue
|
rescue
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
|
|
||||||
REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
|
|
||||||
|
|
||||||
def discourse_email_trimmer(body)
|
|
||||||
lines = body.scrub.lines.to_a
|
|
||||||
range_end = 0
|
|
||||||
|
|
||||||
lines.each_with_index do |l, idx|
|
|
||||||
# This one might be controversial but so many reply lines have years, times and end with a colon.
|
|
||||||
# Let's try it and see how well it works.
|
|
||||||
break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
|
|
||||||
(l =~ /On \w+ \d+,? \d+,?.*wrote:/)
|
|
||||||
|
|
||||||
# Headers on subsequent lines
|
|
||||||
break if (0..2).all? { |off| lines[idx + off] =~ REPLYING_HEADER_REGEX }
|
|
||||||
# Headers on the same line
|
|
||||||
break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
|
|
||||||
|
|
||||||
range_end = idx
|
|
||||||
end
|
|
||||||
|
|
||||||
lines[0..range_end].join.strip
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -88,8 +88,6 @@ describe Gitlab::Email::ReplyParser, lib: true do
|
||||||
expect(test_parse_body(fixture_file("emails/inline_reply.eml"))).
|
expect(test_parse_body(fixture_file("emails/inline_reply.eml"))).
|
||||||
to eq(
|
to eq(
|
||||||
<<-BODY.strip_heredoc.chomp
|
<<-BODY.strip_heredoc.chomp
|
||||||
On Wed, Oct 8, 2014 at 11:12 AM, techAPJ <info@unconfigured.discourse.org> wrote:
|
|
||||||
|
|
||||||
> techAPJ <https://meta.discourse.org/users/techapj>
|
> techAPJ <https://meta.discourse.org/users/techapj>
|
||||||
> November 28
|
> November 28
|
||||||
>
|
>
|
||||||
|
|
Loading…
Reference in New Issue