Merge the `RubyVer` module into `Utilities`

This commit is contained in:
David Rodríguez 2022-04-20 09:29:49 +02:00
parent a58bf11afe
commit 100a4f2169
No known key found for this signature in database
GPG Key ID: 1008A258BB37309C
5 changed files with 272 additions and 283 deletions

View File

@ -9,8 +9,6 @@ module Mail # :doc:
require 'net/smtp'
require 'mini_mime'
require 'mail/version_specific'
require 'mail/version'
require 'mail/indifferent_hash'

View File

@ -293,5 +293,270 @@ module Mail
def generate_message_id
"<#{Mail.random_tag}@#{::Socket.gethostname}.mail>"
end
class StrictCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Utilities.decode_utf7(string)
else
string.force_encoding(Mail::Utilities.pick_encoding(charset))
end
end
end
class BestEffortCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Utilities.decode_utf7(string)
else
string.force_encoding(pick_encoding(charset))
end
end
private
def pick_encoding(charset)
charset = case charset
when /ansi_x3.110-1983/
'ISO-8859-1'
when /Windows-?1258/i # Windows-1258 is similar to 1252
"Windows-1252"
else
charset
end
Mail::Utilities.pick_encoding(charset)
end
end
class << self
attr_accessor :charset_encoder
end
self.charset_encoder = BestEffortCharsetEncoder.new
# Escapes any parenthesis in a string that are unescaped this uses
# a Ruby 1.9.1 regexp feature of negative look behind
def Utilities.escape_paren( str )
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
str.gsub(re) { |s| '\\' + s }
end
def Utilities.paren( str )
str = ::Mail::Utilities.unparen( str )
str = escape_paren( str )
'(' + str + ')'
end
def Utilities.escape_bracket( str )
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
str.gsub(re) { |s| '\\' + s }
end
def Utilities.bracket( str )
str = ::Mail::Utilities.unbracket( str )
str = escape_bracket( str )
'<' + str + '>'
end
def Utilities.decode_base64(str)
if !str.end_with?("=") && str.length % 4 != 0
str = str.ljust((str.length + 3) & ~3, "=")
end
str.unpack( 'm' ).first
end
def Utilities.encode_base64(str)
[str].pack( 'm' )
end
def Utilities.has_constant?(klass, string)
klass.const_defined?( string, false )
end
def Utilities.get_constant(klass, string)
klass.const_get( string )
end
def Utilities.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
to_encoding = Encoding.find(to_encoding)
replacement_char = to_encoding == Encoding::UTF_8 ? '<27>' : '?'
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
end
# From Ruby stdlib Net::IMAP
def Utilities.encode_utf7(string)
string.gsub(/(&)|[^\x20-\x7e]+/) do
if $1
"&-"
else
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
"&" + base64.delete("=").tr("/", ",") + "-"
end
end.force_encoding(Encoding::ASCII_8BIT)
end
def Utilities.decode_utf7(utf7)
utf7.gsub(/&([^-]+)?-/n) do
if $1
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
else
"&"
end
end
end
def Utilities.b_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Utilities.encode_base64(str), encoding]
end
def Utilities.b_value_decode(str)
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
if match
charset = match[1]
str = Utilities.decode_base64(match[2])
str = charset_encoder.encode(str, charset)
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.q_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Encodings::QuotedPrintable.encode(str), encoding]
end
def Utilities.q_value_decode(str)
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
if match
charset = match[1]
string = match[2].gsub(/_/, '=20')
# Remove trailing = if it exists in a Q encoding
string = string.sub(/\=$/, '')
str = Encodings::QuotedPrintable.decode(string)
str = charset_encoder.encode(str, charset)
# We assume that binary strings hold utf-8 directly to work around
# jruby/jruby#829 which subtly changes String#encode semantics.
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.param_decode(str, encoding)
str = uri_parser.unescape(str)
str = charset_encoder.encode(str, encoding) if encoding
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.param_encode(str)
encoding = str.encoding.to_s.downcase
language = Configuration.instance.param_encode_language
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
end
def Utilities.uri_parser
URI::DEFAULT_PARSER
end
# Pick a Ruby encoding corresponding to the message charset. Most
# charsets have a Ruby encoding, but some need manual aliasing here.
#
# TODO: add this as a test somewhere:
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
def Utilities.pick_encoding(charset)
charset = charset.to_s
encoding = case charset.downcase
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
when /^iso[-_]?8859-(\d+)(-i)?$/
"ISO-8859-#{$1}"
# ISO-8859-15, ISO-2022-JP and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
"ISO-#{$1}-#{$2}"
# "ISO-2022-JP-KDDI" and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
"ISO-#{$1}-#{$2}-#{$3}"
# UTF-8, UTF-32BE and alike
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
# Windows-1252 and alike
when /^windows-?(.*)$/
"Windows-#{$1}"
when '8bit'
Encoding::ASCII_8BIT
# alternatives/misspellings of us-ascii seen in the wild
when /^iso[-_]?646(-us)?$/, 'us=ascii'
Encoding::ASCII
# Microsoft-specific alias for MACROMAN
when 'macintosh'
Encoding::MACROMAN
# Microsoft-specific alias for CP949 (Korean)
when 'ks_c_5601-1987'
Encoding::CP949
# Wrongly written Shift_JIS (Japanese)
when 'shift-jis'
Encoding::Shift_JIS
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
when 'gb2312'
Encoding::GB18030
when 'cp-850'
Encoding::CP850
when 'latin2'
Encoding::ISO_8859_2
else
charset
end
convert_to_encoding(encoding)
end
def Utilities.string_byteslice(str, *args)
str.byteslice(*args)
end
class << self
private
def convert_to_encoding(encoding)
if encoding.is_a?(Encoding)
encoding
else
# Fall back to ASCII for charsets that Ruby doesn't recognize
begin
Encoding.find(encoding)
rescue ArgumentError
Encoding::BINARY
end
end
end
def transcode_to_scrubbed_utf8(str)
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "<EFBFBD>")
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "<EFBFBD>").encode(Encoding::UTF_8)
end
end
end
end

View File

@ -1,271 +0,0 @@
# encoding: utf-8
# frozen_string_literal: true
module Mail
module Utilities
class StrictCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Utilities.decode_utf7(string)
else
string.force_encoding(Mail::Utilities.pick_encoding(charset))
end
end
end
class BestEffortCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Utilities.decode_utf7(string)
else
string.force_encoding(pick_encoding(charset))
end
end
private
def pick_encoding(charset)
charset = case charset
when /ansi_x3.110-1983/
'ISO-8859-1'
when /Windows-?1258/i # Windows-1258 is similar to 1252
"Windows-1252"
else
charset
end
Mail::Utilities.pick_encoding(charset)
end
end
class << self
attr_accessor :charset_encoder
end
self.charset_encoder = BestEffortCharsetEncoder.new
# Escapes any parenthesis in a string that are unescaped this uses
# a Ruby 1.9.1 regexp feature of negative look behind
def Utilities.escape_paren( str )
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
str.gsub(re) { |s| '\\' + s }
end
def Utilities.paren( str )
str = ::Mail::Utilities.unparen( str )
str = escape_paren( str )
'(' + str + ')'
end
def Utilities.escape_bracket( str )
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
str.gsub(re) { |s| '\\' + s }
end
def Utilities.bracket( str )
str = ::Mail::Utilities.unbracket( str )
str = escape_bracket( str )
'<' + str + '>'
end
def Utilities.decode_base64(str)
if !str.end_with?("=") && str.length % 4 != 0
str = str.ljust((str.length + 3) & ~3, "=")
end
str.unpack( 'm' ).first
end
def Utilities.encode_base64(str)
[str].pack( 'm' )
end
def Utilities.has_constant?(klass, string)
klass.const_defined?( string, false )
end
def Utilities.get_constant(klass, string)
klass.const_get( string )
end
def Utilities.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
to_encoding = Encoding.find(to_encoding)
replacement_char = to_encoding == Encoding::UTF_8 ? '<27>' : '?'
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
end
# From Ruby stdlib Net::IMAP
def Utilities.encode_utf7(string)
string.gsub(/(&)|[^\x20-\x7e]+/) do
if $1
"&-"
else
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
"&" + base64.delete("=").tr("/", ",") + "-"
end
end.force_encoding(Encoding::ASCII_8BIT)
end
def Utilities.decode_utf7(utf7)
utf7.gsub(/&([^-]+)?-/n) do
if $1
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
else
"&"
end
end
end
def Utilities.b_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Utilities.encode_base64(str), encoding]
end
def Utilities.b_value_decode(str)
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
if match
charset = match[1]
str = Utilities.decode_base64(match[2])
str = charset_encoder.encode(str, charset)
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.q_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Encodings::QuotedPrintable.encode(str), encoding]
end
def Utilities.q_value_decode(str)
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
if match
charset = match[1]
string = match[2].gsub(/_/, '=20')
# Remove trailing = if it exists in a Q encoding
string = string.sub(/\=$/, '')
str = Encodings::QuotedPrintable.decode(string)
str = charset_encoder.encode(str, charset)
# We assume that binary strings hold utf-8 directly to work around
# jruby/jruby#829 which subtly changes String#encode semantics.
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.param_decode(str, encoding)
str = uri_parser.unescape(str)
str = charset_encoder.encode(str, encoding) if encoding
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Utilities.param_encode(str)
encoding = str.encoding.to_s.downcase
language = Configuration.instance.param_encode_language
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
end
def Utilities.uri_parser
URI::DEFAULT_PARSER
end
# Pick a Ruby encoding corresponding to the message charset. Most
# charsets have a Ruby encoding, but some need manual aliasing here.
#
# TODO: add this as a test somewhere:
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
def Utilities.pick_encoding(charset)
charset = charset.to_s
encoding = case charset.downcase
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
when /^iso[-_]?8859-(\d+)(-i)?$/
"ISO-8859-#{$1}"
# ISO-8859-15, ISO-2022-JP and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
"ISO-#{$1}-#{$2}"
# "ISO-2022-JP-KDDI" and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
"ISO-#{$1}-#{$2}-#{$3}"
# UTF-8, UTF-32BE and alike
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
# Windows-1252 and alike
when /^windows-?(.*)$/
"Windows-#{$1}"
when '8bit'
Encoding::ASCII_8BIT
# alternatives/misspellings of us-ascii seen in the wild
when /^iso[-_]?646(-us)?$/, 'us=ascii'
Encoding::ASCII
# Microsoft-specific alias for MACROMAN
when 'macintosh'
Encoding::MACROMAN
# Microsoft-specific alias for CP949 (Korean)
when 'ks_c_5601-1987'
Encoding::CP949
# Wrongly written Shift_JIS (Japanese)
when 'shift-jis'
Encoding::Shift_JIS
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
when 'gb2312'
Encoding::GB18030
when 'cp-850'
Encoding::CP850
when 'latin2'
Encoding::ISO_8859_2
else
charset
end
convert_to_encoding(encoding)
end
def Utilities.string_byteslice(str, *args)
str.byteslice(*args)
end
class << self
private
def convert_to_encoding(encoding)
if encoding.is_a?(Encoding)
encoding
else
# Fall back to ASCII for charsets that Ruby doesn't recognize
begin
Encoding.find(encoding)
rescue ArgumentError
Encoding::BINARY
end
end
end
def transcode_to_scrubbed_utf8(str)
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "<EFBFBD>")
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "<EFBFBD>").encode(Encoding::UTF_8)
end
end
end
end

View File

@ -525,4 +525,11 @@ describe "Utilities Module" do
end
end
end
describe '.decode_base64' do
it "handles unpadded base64 correctly" do
decoded = Mail::Utilities.decode_base64("YQ")
expect(decoded).to eq "a"
end
end
end

View File

@ -1,10 +0,0 @@
# encoding: utf-8
# frozen_string_literal: true
require 'spec_helper'
describe '.decode_base64' do
it "handles unpadded base64 correctly" do
decoded = Mail::Utilities.decode_base64("YQ")
expect(decoded).to eq "a"
end
end