Merge the `RubyVer` module into `Utilities`
This commit is contained in:
parent
a58bf11afe
commit
100a4f2169
|
@ -9,8 +9,6 @@ module Mail # :doc:
|
|||
require 'net/smtp'
|
||||
require 'mini_mime'
|
||||
|
||||
require 'mail/version_specific'
|
||||
|
||||
require 'mail/version'
|
||||
|
||||
require 'mail/indifferent_hash'
|
||||
|
|
|
@ -293,5 +293,270 @@ module Mail
|
|||
def generate_message_id
|
||||
"<#{Mail.random_tag}@#{::Socket.gethostname}.mail>"
|
||||
end
|
||||
|
||||
class StrictCharsetEncoder
|
||||
def encode(string, charset)
|
||||
case charset
|
||||
when /utf-?7/i
|
||||
Mail::Utilities.decode_utf7(string)
|
||||
else
|
||||
string.force_encoding(Mail::Utilities.pick_encoding(charset))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class BestEffortCharsetEncoder
|
||||
def encode(string, charset)
|
||||
case charset
|
||||
when /utf-?7/i
|
||||
Mail::Utilities.decode_utf7(string)
|
||||
else
|
||||
string.force_encoding(pick_encoding(charset))
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def pick_encoding(charset)
|
||||
charset = case charset
|
||||
when /ansi_x3.110-1983/
|
||||
'ISO-8859-1'
|
||||
when /Windows-?1258/i # Windows-1258 is similar to 1252
|
||||
"Windows-1252"
|
||||
else
|
||||
charset
|
||||
end
|
||||
Mail::Utilities.pick_encoding(charset)
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_accessor :charset_encoder
|
||||
end
|
||||
self.charset_encoder = BestEffortCharsetEncoder.new
|
||||
|
||||
# Escapes any parenthesis in a string that are unescaped this uses
|
||||
# a Ruby 1.9.1 regexp feature of negative look behind
|
||||
def Utilities.escape_paren( str )
|
||||
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
|
||||
str.gsub(re) { |s| '\\' + s }
|
||||
end
|
||||
|
||||
def Utilities.paren( str )
|
||||
str = ::Mail::Utilities.unparen( str )
|
||||
str = escape_paren( str )
|
||||
'(' + str + ')'
|
||||
end
|
||||
|
||||
def Utilities.escape_bracket( str )
|
||||
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
|
||||
str.gsub(re) { |s| '\\' + s }
|
||||
end
|
||||
|
||||
def Utilities.bracket( str )
|
||||
str = ::Mail::Utilities.unbracket( str )
|
||||
str = escape_bracket( str )
|
||||
'<' + str + '>'
|
||||
end
|
||||
|
||||
def Utilities.decode_base64(str)
|
||||
if !str.end_with?("=") && str.length % 4 != 0
|
||||
str = str.ljust((str.length + 3) & ~3, "=")
|
||||
end
|
||||
str.unpack( 'm' ).first
|
||||
end
|
||||
|
||||
def Utilities.encode_base64(str)
|
||||
[str].pack( 'm' )
|
||||
end
|
||||
|
||||
def Utilities.has_constant?(klass, string)
|
||||
klass.const_defined?( string, false )
|
||||
end
|
||||
|
||||
def Utilities.get_constant(klass, string)
|
||||
klass.const_get( string )
|
||||
end
|
||||
|
||||
def Utilities.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
|
||||
to_encoding = Encoding.find(to_encoding)
|
||||
replacement_char = to_encoding == Encoding::UTF_8 ? '<27>' : '?'
|
||||
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
|
||||
end
|
||||
|
||||
# From Ruby stdlib Net::IMAP
|
||||
def Utilities.encode_utf7(string)
|
||||
string.gsub(/(&)|[^\x20-\x7e]+/) do
|
||||
if $1
|
||||
"&-"
|
||||
else
|
||||
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
|
||||
"&" + base64.delete("=").tr("/", ",") + "-"
|
||||
end
|
||||
end.force_encoding(Encoding::ASCII_8BIT)
|
||||
end
|
||||
|
||||
def Utilities.decode_utf7(utf7)
|
||||
utf7.gsub(/&([^-]+)?-/n) do
|
||||
if $1
|
||||
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
|
||||
else
|
||||
"&"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def Utilities.b_value_encode(str, encoding = nil)
|
||||
encoding = str.encoding.to_s
|
||||
[Utilities.encode_base64(str), encoding]
|
||||
end
|
||||
|
||||
def Utilities.b_value_decode(str)
|
||||
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
|
||||
if match
|
||||
charset = match[1]
|
||||
str = Utilities.decode_base64(match[2])
|
||||
str = charset_encoder.encode(str, charset)
|
||||
end
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.q_value_encode(str, encoding = nil)
|
||||
encoding = str.encoding.to_s
|
||||
[Encodings::QuotedPrintable.encode(str), encoding]
|
||||
end
|
||||
|
||||
def Utilities.q_value_decode(str)
|
||||
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
|
||||
if match
|
||||
charset = match[1]
|
||||
string = match[2].gsub(/_/, '=20')
|
||||
# Remove trailing = if it exists in a Q encoding
|
||||
string = string.sub(/\=$/, '')
|
||||
str = Encodings::QuotedPrintable.decode(string)
|
||||
str = charset_encoder.encode(str, charset)
|
||||
# We assume that binary strings hold utf-8 directly to work around
|
||||
# jruby/jruby#829 which subtly changes String#encode semantics.
|
||||
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
|
||||
end
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.param_decode(str, encoding)
|
||||
str = uri_parser.unescape(str)
|
||||
str = charset_encoder.encode(str, encoding) if encoding
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.param_encode(str)
|
||||
encoding = str.encoding.to_s.downcase
|
||||
language = Configuration.instance.param_encode_language
|
||||
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
|
||||
end
|
||||
|
||||
def Utilities.uri_parser
|
||||
URI::DEFAULT_PARSER
|
||||
end
|
||||
|
||||
# Pick a Ruby encoding corresponding to the message charset. Most
|
||||
# charsets have a Ruby encoding, but some need manual aliasing here.
|
||||
#
|
||||
# TODO: add this as a test somewhere:
|
||||
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
|
||||
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
|
||||
def Utilities.pick_encoding(charset)
|
||||
charset = charset.to_s
|
||||
encoding = case charset.downcase
|
||||
|
||||
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
|
||||
when /^iso[-_]?8859-(\d+)(-i)?$/
|
||||
"ISO-8859-#{$1}"
|
||||
|
||||
# ISO-8859-15, ISO-2022-JP and alike
|
||||
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
|
||||
"ISO-#{$1}-#{$2}"
|
||||
|
||||
# "ISO-2022-JP-KDDI" and alike
|
||||
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
|
||||
"ISO-#{$1}-#{$2}-#{$3}"
|
||||
|
||||
# UTF-8, UTF-32BE and alike
|
||||
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
|
||||
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
|
||||
|
||||
# Windows-1252 and alike
|
||||
when /^windows-?(.*)$/
|
||||
"Windows-#{$1}"
|
||||
|
||||
when '8bit'
|
||||
Encoding::ASCII_8BIT
|
||||
|
||||
# alternatives/misspellings of us-ascii seen in the wild
|
||||
when /^iso[-_]?646(-us)?$/, 'us=ascii'
|
||||
Encoding::ASCII
|
||||
|
||||
# Microsoft-specific alias for MACROMAN
|
||||
when 'macintosh'
|
||||
Encoding::MACROMAN
|
||||
|
||||
# Microsoft-specific alias for CP949 (Korean)
|
||||
when 'ks_c_5601-1987'
|
||||
Encoding::CP949
|
||||
|
||||
# Wrongly written Shift_JIS (Japanese)
|
||||
when 'shift-jis'
|
||||
Encoding::Shift_JIS
|
||||
|
||||
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
|
||||
when 'gb2312'
|
||||
Encoding::GB18030
|
||||
|
||||
when 'cp-850'
|
||||
Encoding::CP850
|
||||
|
||||
when 'latin2'
|
||||
Encoding::ISO_8859_2
|
||||
|
||||
else
|
||||
charset
|
||||
end
|
||||
|
||||
convert_to_encoding(encoding)
|
||||
end
|
||||
|
||||
def Utilities.string_byteslice(str, *args)
|
||||
str.byteslice(*args)
|
||||
end
|
||||
|
||||
class << self
|
||||
private
|
||||
|
||||
def convert_to_encoding(encoding)
|
||||
if encoding.is_a?(Encoding)
|
||||
encoding
|
||||
else
|
||||
# Fall back to ASCII for charsets that Ruby doesn't recognize
|
||||
begin
|
||||
Encoding.find(encoding)
|
||||
rescue ArgumentError
|
||||
Encoding::BINARY
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def transcode_to_scrubbed_utf8(str)
|
||||
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "<EFBFBD>")
|
||||
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "<EFBFBD>").encode(Encoding::UTF_8)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,271 +0,0 @@
|
|||
# encoding: utf-8
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Mail
|
||||
module Utilities
|
||||
class StrictCharsetEncoder
|
||||
def encode(string, charset)
|
||||
case charset
|
||||
when /utf-?7/i
|
||||
Mail::Utilities.decode_utf7(string)
|
||||
else
|
||||
string.force_encoding(Mail::Utilities.pick_encoding(charset))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class BestEffortCharsetEncoder
|
||||
def encode(string, charset)
|
||||
case charset
|
||||
when /utf-?7/i
|
||||
Mail::Utilities.decode_utf7(string)
|
||||
else
|
||||
string.force_encoding(pick_encoding(charset))
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def pick_encoding(charset)
|
||||
charset = case charset
|
||||
when /ansi_x3.110-1983/
|
||||
'ISO-8859-1'
|
||||
when /Windows-?1258/i # Windows-1258 is similar to 1252
|
||||
"Windows-1252"
|
||||
else
|
||||
charset
|
||||
end
|
||||
Mail::Utilities.pick_encoding(charset)
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_accessor :charset_encoder
|
||||
end
|
||||
self.charset_encoder = BestEffortCharsetEncoder.new
|
||||
|
||||
# Escapes any parenthesis in a string that are unescaped this uses
|
||||
# a Ruby 1.9.1 regexp feature of negative look behind
|
||||
def Utilities.escape_paren( str )
|
||||
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
|
||||
str.gsub(re) { |s| '\\' + s }
|
||||
end
|
||||
|
||||
def Utilities.paren( str )
|
||||
str = ::Mail::Utilities.unparen( str )
|
||||
str = escape_paren( str )
|
||||
'(' + str + ')'
|
||||
end
|
||||
|
||||
def Utilities.escape_bracket( str )
|
||||
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
|
||||
str.gsub(re) { |s| '\\' + s }
|
||||
end
|
||||
|
||||
def Utilities.bracket( str )
|
||||
str = ::Mail::Utilities.unbracket( str )
|
||||
str = escape_bracket( str )
|
||||
'<' + str + '>'
|
||||
end
|
||||
|
||||
def Utilities.decode_base64(str)
|
||||
if !str.end_with?("=") && str.length % 4 != 0
|
||||
str = str.ljust((str.length + 3) & ~3, "=")
|
||||
end
|
||||
str.unpack( 'm' ).first
|
||||
end
|
||||
|
||||
def Utilities.encode_base64(str)
|
||||
[str].pack( 'm' )
|
||||
end
|
||||
|
||||
def Utilities.has_constant?(klass, string)
|
||||
klass.const_defined?( string, false )
|
||||
end
|
||||
|
||||
def Utilities.get_constant(klass, string)
|
||||
klass.const_get( string )
|
||||
end
|
||||
|
||||
def Utilities.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
|
||||
to_encoding = Encoding.find(to_encoding)
|
||||
replacement_char = to_encoding == Encoding::UTF_8 ? '<27>' : '?'
|
||||
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
|
||||
end
|
||||
|
||||
# From Ruby stdlib Net::IMAP
|
||||
def Utilities.encode_utf7(string)
|
||||
string.gsub(/(&)|[^\x20-\x7e]+/) do
|
||||
if $1
|
||||
"&-"
|
||||
else
|
||||
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
|
||||
"&" + base64.delete("=").tr("/", ",") + "-"
|
||||
end
|
||||
end.force_encoding(Encoding::ASCII_8BIT)
|
||||
end
|
||||
|
||||
def Utilities.decode_utf7(utf7)
|
||||
utf7.gsub(/&([^-]+)?-/n) do
|
||||
if $1
|
||||
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
|
||||
else
|
||||
"&"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def Utilities.b_value_encode(str, encoding = nil)
|
||||
encoding = str.encoding.to_s
|
||||
[Utilities.encode_base64(str), encoding]
|
||||
end
|
||||
|
||||
def Utilities.b_value_decode(str)
|
||||
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
|
||||
if match
|
||||
charset = match[1]
|
||||
str = Utilities.decode_base64(match[2])
|
||||
str = charset_encoder.encode(str, charset)
|
||||
end
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.q_value_encode(str, encoding = nil)
|
||||
encoding = str.encoding.to_s
|
||||
[Encodings::QuotedPrintable.encode(str), encoding]
|
||||
end
|
||||
|
||||
def Utilities.q_value_decode(str)
|
||||
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
|
||||
if match
|
||||
charset = match[1]
|
||||
string = match[2].gsub(/_/, '=20')
|
||||
# Remove trailing = if it exists in a Q encoding
|
||||
string = string.sub(/\=$/, '')
|
||||
str = Encodings::QuotedPrintable.decode(string)
|
||||
str = charset_encoder.encode(str, charset)
|
||||
# We assume that binary strings hold utf-8 directly to work around
|
||||
# jruby/jruby#829 which subtly changes String#encode semantics.
|
||||
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
|
||||
end
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.param_decode(str, encoding)
|
||||
str = uri_parser.unescape(str)
|
||||
str = charset_encoder.encode(str, encoding) if encoding
|
||||
transcode_to_scrubbed_utf8(str)
|
||||
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
||||
warn "Encoding conversion failed #{$!}"
|
||||
str.dup.force_encoding(Encoding::UTF_8)
|
||||
end
|
||||
|
||||
def Utilities.param_encode(str)
|
||||
encoding = str.encoding.to_s.downcase
|
||||
language = Configuration.instance.param_encode_language
|
||||
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
|
||||
end
|
||||
|
||||
def Utilities.uri_parser
|
||||
URI::DEFAULT_PARSER
|
||||
end
|
||||
|
||||
# Pick a Ruby encoding corresponding to the message charset. Most
|
||||
# charsets have a Ruby encoding, but some need manual aliasing here.
|
||||
#
|
||||
# TODO: add this as a test somewhere:
|
||||
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
|
||||
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
|
||||
def Utilities.pick_encoding(charset)
|
||||
charset = charset.to_s
|
||||
encoding = case charset.downcase
|
||||
|
||||
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
|
||||
when /^iso[-_]?8859-(\d+)(-i)?$/
|
||||
"ISO-8859-#{$1}"
|
||||
|
||||
# ISO-8859-15, ISO-2022-JP and alike
|
||||
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
|
||||
"ISO-#{$1}-#{$2}"
|
||||
|
||||
# "ISO-2022-JP-KDDI" and alike
|
||||
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
|
||||
"ISO-#{$1}-#{$2}-#{$3}"
|
||||
|
||||
# UTF-8, UTF-32BE and alike
|
||||
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
|
||||
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
|
||||
|
||||
# Windows-1252 and alike
|
||||
when /^windows-?(.*)$/
|
||||
"Windows-#{$1}"
|
||||
|
||||
when '8bit'
|
||||
Encoding::ASCII_8BIT
|
||||
|
||||
# alternatives/misspellings of us-ascii seen in the wild
|
||||
when /^iso[-_]?646(-us)?$/, 'us=ascii'
|
||||
Encoding::ASCII
|
||||
|
||||
# Microsoft-specific alias for MACROMAN
|
||||
when 'macintosh'
|
||||
Encoding::MACROMAN
|
||||
|
||||
# Microsoft-specific alias for CP949 (Korean)
|
||||
when 'ks_c_5601-1987'
|
||||
Encoding::CP949
|
||||
|
||||
# Wrongly written Shift_JIS (Japanese)
|
||||
when 'shift-jis'
|
||||
Encoding::Shift_JIS
|
||||
|
||||
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
|
||||
when 'gb2312'
|
||||
Encoding::GB18030
|
||||
|
||||
when 'cp-850'
|
||||
Encoding::CP850
|
||||
|
||||
when 'latin2'
|
||||
Encoding::ISO_8859_2
|
||||
|
||||
else
|
||||
charset
|
||||
end
|
||||
|
||||
convert_to_encoding(encoding)
|
||||
end
|
||||
|
||||
def Utilities.string_byteslice(str, *args)
|
||||
str.byteslice(*args)
|
||||
end
|
||||
|
||||
class << self
|
||||
private
|
||||
|
||||
def convert_to_encoding(encoding)
|
||||
if encoding.is_a?(Encoding)
|
||||
encoding
|
||||
else
|
||||
# Fall back to ASCII for charsets that Ruby doesn't recognize
|
||||
begin
|
||||
Encoding.find(encoding)
|
||||
rescue ArgumentError
|
||||
Encoding::BINARY
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def transcode_to_scrubbed_utf8(str)
|
||||
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "<EFBFBD>")
|
||||
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "<EFBFBD>").encode(Encoding::UTF_8)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -525,4 +525,11 @@ describe "Utilities Module" do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '.decode_base64' do
|
||||
it "handles unpadded base64 correctly" do
|
||||
decoded = Mail::Utilities.decode_base64("YQ")
|
||||
expect(decoded).to eq "a"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
# encoding: utf-8
|
||||
# frozen_string_literal: true
|
||||
require 'spec_helper'
|
||||
|
||||
describe '.decode_base64' do
|
||||
it "handles unpadded base64 correctly" do
|
||||
decoded = Mail::Utilities.decode_base64("YQ")
|
||||
expect(decoded).to eq "a"
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue