2015-08-20 14:05:06 -04:00
|
|
|
# Inspired in great part by Discourse's Email::Receiver
|
|
|
|
module Gitlab
|
|
|
|
module Email
|
|
|
|
class ReplyParser
|
|
|
|
attr_accessor :message
|
|
|
|
|
|
|
|
def initialize(message)
|
|
|
|
@message = message
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
body = select_body(message)
|
|
|
|
|
|
|
|
encoding = body.encoding
|
|
|
|
|
|
|
|
body = discourse_email_trimmer(body)
|
|
|
|
|
|
|
|
body = EmailReplyParser.parse_reply(body)
|
|
|
|
|
|
|
|
body.force_encoding(encoding).encode("UTF-8")
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def select_body(message)
|
2015-08-21 19:09:55 -04:00
|
|
|
text = message.text_part if message.multipart?
|
|
|
|
text ||= message if message.content_type !~ /text\/html/
|
2015-08-20 14:05:06 -04:00
|
|
|
|
2015-08-21 19:09:55 -04:00
|
|
|
return "" unless text
|
2015-08-20 14:05:06 -04:00
|
|
|
|
2015-08-21 19:09:55 -04:00
|
|
|
text = fix_charset(text)
|
2015-08-20 14:05:06 -04:00
|
|
|
|
|
|
|
# Certain trigger phrases that means we didn't parse correctly
|
2015-08-21 19:09:55 -04:00
|
|
|
if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
|
2015-08-20 14:05:06 -04:00
|
|
|
return ""
|
|
|
|
end
|
|
|
|
|
2015-08-21 19:09:55 -04:00
|
|
|
text
|
2015-08-20 14:05:06 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
|
|
|
|
def fix_charset(object)
|
|
|
|
return nil if object.nil?
|
|
|
|
|
|
|
|
if object.charset
|
|
|
|
object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
|
|
|
|
else
|
|
|
|
object.body.to_s
|
|
|
|
end
|
|
|
|
rescue
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
|
|
|
|
REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
|
|
|
|
|
|
|
|
def discourse_email_trimmer(body)
|
|
|
|
lines = body.scrub.lines.to_a
|
|
|
|
range_end = 0
|
|
|
|
|
|
|
|
lines.each_with_index do |l, idx|
|
|
|
|
# This one might be controversial but so many reply lines have years, times and end with a colon.
|
2015-08-20 17:25:56 -04:00
|
|
|
# Let's try it and see how well it works.
|
|
|
|
break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
|
2015-08-20 14:05:06 -04:00
|
|
|
(l =~ /On \w+ \d+,? \d+,?.*wrote:/)
|
|
|
|
|
|
|
|
# Headers on subsequent lines
|
2016-05-10 22:58:06 -04:00
|
|
|
break if (0..2).all? { |off| lines[idx + off] =~ REPLYING_HEADER_REGEX }
|
2015-08-20 14:05:06 -04:00
|
|
|
# Headers on the same line
|
|
|
|
break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
|
|
|
|
|
|
|
|
range_end = idx
|
|
|
|
end
|
|
|
|
|
|
|
|
lines[0..range_end].join.strip
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|