Remove ActiveSupport definition dependency; Update to Unicode 7.0.0
This commit is contained in:
parent
64ef1a12ef
commit
01263a8ad0
|
@ -1,6 +1,26 @@
|
|||
module Mail
|
||||
module Multibyte
|
||||
module Unicode
|
||||
# Adapted from https://github.com/rails/rails/blob/master/activesupport/lib/active_support/multibyte/unicode.rb
|
||||
# under the MIT license
|
||||
# The Unicode version that is supported by the implementation
|
||||
UNICODE_VERSION = '7.0.0'
|
||||
|
||||
# Holds data about a codepoint in the Unicode database.
|
||||
class Codepoint
|
||||
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
||||
|
||||
# Initializing Codepoint object with default values
|
||||
def initialize
|
||||
@combining_class = 0
|
||||
@uppercase_mapping = 0
|
||||
@lowercase_mapping = 0
|
||||
end
|
||||
|
||||
def swapcase_mapping
|
||||
uppercase_mapping > 0 ? uppercase_mapping : lowercase_mapping
|
||||
end
|
||||
end
|
||||
|
||||
extend self
|
||||
|
||||
|
@ -8,9 +28,6 @@ module Mail
|
|||
# information about normalization.
|
||||
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
||||
|
||||
# The Unicode version that is supported by the implementation
|
||||
UNICODE_VERSION = '5.2.0'
|
||||
|
||||
# The default normalization used for operations that require normalization. It can be set to any of the
|
||||
# normalizations in NORMALIZATION_FORMS.
|
||||
#
|
||||
|
@ -308,11 +325,6 @@ module Mail
|
|||
end.pack('U*')
|
||||
end
|
||||
|
||||
# Holds data about a codepoint in the Unicode database
|
||||
class Codepoint
|
||||
attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
|
||||
end
|
||||
|
||||
# Holds static data from the Unicode database
|
||||
class UnicodeDatabase
|
||||
ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
|
||||
|
@ -390,11 +402,3 @@ module Mail
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
unless defined?(ActiveSupport)
|
||||
module ActiveSupport
|
||||
unless const_defined?(:Multibyte)
|
||||
Multibyte = Mail::Multibyte
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/env ruby
|
||||
# from https://github.com/rails/rails/blob/master/activesupport/bin/generate_tables
|
||||
# under MIT license
|
||||
# Usage:
|
||||
# ruby tasks/generate_tables
|
||||
# The Unicode version downloaded is determined by the UNICODE_VERSION lib/mail/multibye/unicode.rb
|
||||
require './lib/mail/multibyte/unicode'
|
||||
|
||||
require 'open-uri'
|
||||
require 'tmpdir'
|
||||
|
||||
module Mail
|
||||
module Multibyte
|
||||
module Unicode
|
||||
|
||||
class UnicodeDatabase
|
||||
def load; end
|
||||
end
|
||||
|
||||
class DatabaseGenerator
|
||||
BASE_URI = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd/"
|
||||
SOURCES = {
|
||||
:codepoints => BASE_URI + 'UnicodeData.txt',
|
||||
:composition_exclusion => BASE_URI + 'CompositionExclusions.txt',
|
||||
:grapheme_break_property => BASE_URI + 'auxiliary/GraphemeBreakProperty.txt',
|
||||
:cp1252 => 'http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT'
|
||||
}
|
||||
|
||||
def initialize
|
||||
@ucd = Unicode::UnicodeDatabase.new
|
||||
end
|
||||
|
||||
def parse_codepoints(line)
|
||||
codepoint = Codepoint.new
|
||||
raise "Could not parse input." unless line =~ /^
|
||||
([0-9A-F]+); # code
|
||||
([^;]+); # name
|
||||
([A-Z]+); # general category
|
||||
([0-9]+); # canonical combining class
|
||||
([A-Z]+); # bidi class
|
||||
(<([A-Z]*)>)? # decomposition type
|
||||
((\ ?[0-9A-F]+)*); # decomposition mapping
|
||||
([0-9]*); # decimal digit
|
||||
([0-9]*); # digit
|
||||
([^;]*); # numeric
|
||||
([YN]*); # bidi mirrored
|
||||
([^;]*); # unicode 1.0 name
|
||||
([^;]*); # iso comment
|
||||
([0-9A-F]*); # simple uppercase mapping
|
||||
([0-9A-F]*); # simple lowercase mapping
|
||||
([0-9A-F]*)$/ix # simple titlecase mapping
|
||||
codepoint.code = $1.hex
|
||||
#codepoint.name = $2
|
||||
#codepoint.category = $3
|
||||
codepoint.combining_class = Integer($4)
|
||||
#codepoint.bidi_class = $5
|
||||
codepoint.decomp_type = $7
|
||||
codepoint.decomp_mapping = ($8=='') ? nil : $8.split.collect { |element| element.hex }
|
||||
#codepoint.bidi_mirrored = ($13=='Y') ? true : false
|
||||
codepoint.uppercase_mapping = ($16=='') ? 0 : $16.hex
|
||||
codepoint.lowercase_mapping = ($17=='') ? 0 : $17.hex
|
||||
#codepoint.titlecase_mapping = ($18=='') ? nil : $18.hex
|
||||
@ucd.codepoints[codepoint.code] = codepoint
|
||||
end
|
||||
|
||||
def parse_grapheme_break_property(line)
|
||||
if line =~ /^([0-9A-F.]+)\s*;\s*([\w]+)\s*#/
|
||||
type = $2.downcase.intern
|
||||
@ucd.boundary[type] ||= []
|
||||
if $1.include? '..'
|
||||
parts = $1.split '..'
|
||||
@ucd.boundary[type] << (parts[0].hex..parts[1].hex)
|
||||
else
|
||||
@ucd.boundary[type] << $1.hex
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse_composition_exclusion(line)
|
||||
if line =~ /^([0-9A-F]+)/i
|
||||
@ucd.composition_exclusion << $1.hex
|
||||
end
|
||||
end
|
||||
|
||||
def parse_cp1252(line)
|
||||
if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i
|
||||
@ucd.cp1252[$1.hex] = $2.hex
|
||||
end
|
||||
end
|
||||
|
||||
def create_composition_map
|
||||
@ucd.codepoints.each do |_, cp|
|
||||
if !cp.nil? and cp.combining_class == 0 and cp.decomp_type.nil? and !cp.decomp_mapping.nil? and cp.decomp_mapping.length == 2 and @ucd.codepoints[cp.decomp_mapping[0]].combining_class == 0 and !@ucd.composition_exclusion.include?(cp.code)
|
||||
@ucd.composition_map[cp.decomp_mapping[0]] ||= {}
|
||||
@ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_boundary_map
|
||||
@ucd.boundary.each do |k,v|
|
||||
if [:lf, :cr].include? k
|
||||
@ucd.boundary[k] = v[0]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse
|
||||
SOURCES.each do |type, url|
|
||||
filename = File.join(Dir.tmpdir, "#{url.split('/').last}")
|
||||
unless File.exist?(filename)
|
||||
$stderr.puts "Downloading #{url.split('/').last}"
|
||||
File.open(filename, 'wb') do |target|
|
||||
open(url) do |source|
|
||||
source.each_line { |line| target.write line }
|
||||
end
|
||||
end
|
||||
end
|
||||
File.open(filename) do |file|
|
||||
file.each_line { |line| send "parse_#{type}".intern, line }
|
||||
end
|
||||
end
|
||||
create_composition_map
|
||||
normalize_boundary_map
|
||||
end
|
||||
|
||||
def dump_to(filename)
|
||||
File.open(filename, 'wb') do |f|
|
||||
f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if __FILE__ == $0
|
||||
filename = Mail::Multibyte::Unicode::UnicodeDatabase.filename
|
||||
generator = Mail::Multibyte::Unicode::DatabaseGenerator.new
|
||||
generator.parse
|
||||
print "Writing to: #{filename}"
|
||||
generator.dump_to filename
|
||||
puts " (#{File.size(filename)} bytes)"
|
||||
end
|
Loading…
Reference in New Issue