1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00
rails--rails/activesupport/bin/generate_tables
Xavier Noria e6ab70c439 applies new string literal convention to the rest of the project
The current code base is not uniform. After some discussion,
we have chosen to go with double quotes by default.
2016-08-06 19:28:46 +02:00

139 lines
4.7 KiB
Ruby
Executable file

#!/usr/bin/env ruby
begin
$:.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
require "active_support"
rescue IOError
end
require "open-uri"
require "tmpdir"
module ActiveSupport
module Multibyte
module Unicode
class UnicodeDatabase
def load; end
end
class DatabaseGenerator
BASE_URI = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd/"
SOURCES = {
:codepoints => BASE_URI + "UnicodeData.txt",
:composition_exclusion => BASE_URI + "CompositionExclusions.txt",
:grapheme_break_property => BASE_URI + "auxiliary/GraphemeBreakProperty.txt",
:cp1252 => "http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT"
}
def initialize
@ucd = Unicode::UnicodeDatabase.new
end
def parse_codepoints(line)
codepoint = Codepoint.new
raise "Could not parse input." unless line =~ /^
([0-9A-F]+); # code
([^;]+); # name
([A-Z]+); # general category
([0-9]+); # canonical combining class
([A-Z]+); # bidi class
(<([A-Z]*)>)? # decomposition type
((\ ?[0-9A-F]+)*); # decomposition mapping
([0-9]*); # decimal digit
([0-9]*); # digit
([^;]*); # numeric
([YN]*); # bidi mirrored
([^;]*); # unicode 1.0 name
([^;]*); # iso comment
([0-9A-F]*); # simple uppercase mapping
([0-9A-F]*); # simple lowercase mapping
([0-9A-F]*)$/ix # simple titlecase mapping
codepoint.code = $1.hex
codepoint.combining_class = Integer($4)
codepoint.decomp_type = $7
codepoint.decomp_mapping = ($8=="") ? nil : $8.split.collect(&:hex)
codepoint.uppercase_mapping = ($16=="") ? 0 : $16.hex
codepoint.lowercase_mapping = ($17=="") ? 0 : $17.hex
@ucd.codepoints[codepoint.code] = codepoint
end
def parse_grapheme_break_property(line)
if line =~ /^([0-9A-F.]+)\s*;\s*([\w]+)\s*#/
type = $2.downcase.intern
@ucd.boundary[type] ||= []
if $1.include? ".."
parts = $1.split ".."
@ucd.boundary[type] << (parts[0].hex..parts[1].hex)
else
@ucd.boundary[type] << $1.hex
end
end
end
def parse_composition_exclusion(line)
if line =~ /^([0-9A-F]+)/i
@ucd.composition_exclusion << $1.hex
end
end
def parse_cp1252(line)
if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i
@ucd.cp1252[$1.hex] = $2.hex
end
end
def create_composition_map
@ucd.codepoints.each do |_, cp|
if !cp.nil? and cp.combining_class == 0 and cp.decomp_type.nil? and !cp.decomp_mapping.nil? and cp.decomp_mapping.length == 2 and @ucd.codepoints[cp.decomp_mapping[0]].combining_class == 0 and !@ucd.composition_exclusion.include?(cp.code)
@ucd.composition_map[cp.decomp_mapping[0]] ||= {}
@ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code
end
end
end
def normalize_boundary_map
@ucd.boundary.each do |k,v|
if [:lf, :cr].include? k
@ucd.boundary[k] = v[0]
end
end
end
def parse
SOURCES.each do |type, url|
filename = File.join(Dir.tmpdir, "#{url.split('/').last}")
unless File.exist?(filename)
$stderr.puts "Downloading #{url.split('/').last}"
File.open(filename, "wb") do |target|
open(url) do |source|
source.each_line { |line| target.write line }
end
end
end
File.open(filename) do |file|
file.each_line { |line| send "parse_#{type}".intern, line }
end
end
create_composition_map
normalize_boundary_map
end
def dump_to(filename)
File.open(filename, "wb") do |f|
f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252])
end
end
end
end
end
end
if __FILE__ == $0
filename = ActiveSupport::Multibyte::Unicode::UnicodeDatabase.filename
generator = ActiveSupport::Multibyte::Unicode::DatabaseGenerator.new
generator.parse
print "Writing to: #{filename}"
generator.dump_to filename
puts " (#{File.size(filename)} bytes)"
end