mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	 7aaf5b2878
			
		
	
	
		7aaf5b2878
		
	
	
	
	
		
			
			git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66023 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
		
			
				
	
	
		
			577 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			577 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Ruby
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env ruby
 | |
| 
 | |
| # Creates the data structures needed by Oniguruma to map Unicode codepoints to
 | |
| # property names and POSIX character classes
 | |
| #
 | |
| # To use this, get UnicodeData.txt, Scripts.txt, PropList.txt,
 | |
| # PropertyAliases.txt, PropertyValueAliases.txt, DerivedCoreProperties.txt,
 | |
| # DerivedAge.txt and Blocks.txt  from unicode.org.
 | |
| # (http://unicode.org/Public/UNIDATA/) And run following command.
 | |
| # ruby1.9 tool/enc-unicode.rb data_dir > enc/unicode/name2ctype.kwd
 | |
| # You can get source file for gperf.  After this, simply make ruby.
 | |
| 
 | |
| if ARGV[0] == "--header"
 | |
|   header = true
 | |
|   ARGV.shift
 | |
| end
 | |
| unless ARGV.size == 2
 | |
|   abort "Usage: #{$0} data_directory emoji_data_directory"
 | |
| end
 | |
| 
 | |
| pat = /(?:\A|\/)([.\d]+)\z/
 | |
| $versions = {
 | |
|   :Unicode => ARGV[0][pat, 1],
 | |
|   :Emoji => ARGV[1][pat, 1],
 | |
| }
 | |
| 
 | |
| POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print XPosixPunct Space Upper XDigit Word Alnum ASCII Punct]
 | |
| 
 | |
| def pair_codepoints(codepoints)
 | |
| 
 | |
|   # We have a sorted Array of codepoints that we wish to partition into
 | |
|   # ranges such that the start- and endpoints form an inclusive set of
 | |
|   # codepoints with property _property_. Note: It is intended that some ranges
 | |
|   # will begin with the value with  which they end, e.g. 0x0020 -> 0x0020
 | |
| 
 | |
|   codepoints.sort!
 | |
|   last_cp = codepoints.first
 | |
|   pairs = [[last_cp, nil]]
 | |
|   codepoints[1..-1].each do |codepoint|
 | |
|     next if last_cp == codepoint
 | |
| 
 | |
|     # If the current codepoint does not follow directly on from the last
 | |
|     # codepoint, the last codepoint represents the end of the current range,
 | |
|     # and the current codepoint represents the start of the next range.
 | |
|     if last_cp.next != codepoint
 | |
|       pairs[-1][-1] = last_cp
 | |
|       pairs << [codepoint, nil]
 | |
|     end
 | |
|     last_cp = codepoint
 | |
|   end
 | |
| 
 | |
|   # The final pair has as its endpoint the last codepoint for this property
 | |
|   pairs[-1][-1] = codepoints.last
 | |
|   pairs
 | |
| end
 | |
| 
 | |
| def parse_unicode_data(file)
 | |
|   last_cp = 0
 | |
|   data = {'Any' => (0x0000..0x10ffff).to_a, 'Assigned' => [],
 | |
|     'ASCII' => (0..0x007F).to_a, 'NEWLINE' => [0x0a], 'Cn' => []}
 | |
|   beg_cp = nil
 | |
|   IO.foreach(file) do |line|
 | |
|     fields = line.split(';')
 | |
|     cp = fields[0].to_i(16)
 | |
| 
 | |
|     case fields[1]
 | |
|     when /\A<(.*),\s*First>\z/
 | |
|       beg_cp = cp
 | |
|       next
 | |
|     when /\A<(.*),\s*Last>\z/
 | |
|       cps = (beg_cp..cp).to_a
 | |
|     else
 | |
|       beg_cp = cp
 | |
|       cps = [cp]
 | |
|     end
 | |
| 
 | |
|     # The Cn category represents unassigned characters. These are not listed in
 | |
|     # UnicodeData.txt so we must derive them by looking for 'holes' in the range
 | |
|     # of listed codepoints. We increment the last codepoint seen and compare it
 | |
|     # with the current codepoint. If the current codepoint is less than
 | |
|     # last_cp.next we have found a hole, so we add the missing codepoint to the
 | |
|     # Cn category.
 | |
|     data['Cn'].concat((last_cp.next...beg_cp).to_a)
 | |
| 
 | |
|     # Assigned - Defined in unicode.c; interpreted as every character in the
 | |
|     # Unicode range minus the unassigned characters
 | |
|     data['Assigned'].concat(cps)
 | |
| 
 | |
|     # The third field denotes the 'General' category, e.g. Lu
 | |
|     (data[fields[2]] ||= []).concat(cps)
 | |
| 
 | |
|     # The 'Major' category is the first letter of the 'General' category, e.g.
 | |
|     # 'Lu' -> 'L'
 | |
|     (data[fields[2][0,1]] ||= []).concat(cps)
 | |
|     last_cp = cp
 | |
|   end
 | |
| 
 | |
|   # The last Cn codepoint should be 0x10ffff. If it's not, append the missing
 | |
|   # codepoints to Cn and C
 | |
|   cn_remainder = (last_cp.next..0x10ffff).to_a
 | |
|   data['Cn'] += cn_remainder
 | |
|   data['C'] += data['Cn']
 | |
| 
 | |
|   # Special case for LC (Cased_Letter). LC = Ll + Lt + Lu
 | |
|   data['LC'] = data['Ll'] + data['Lt'] + data['Lu']
 | |
| 
 | |
|   # Define General Category properties
 | |
|   gcps = data.keys.sort - POSIX_NAMES
 | |
| 
 | |
|   # Returns General Category Property names and the data
 | |
|   [gcps, data]
 | |
| end
 | |
| 
 | |
| def define_posix_props(data)
 | |
|   # We now derive the character classes (POSIX brackets), e.g. [[:alpha:]]
 | |
|   #
 | |
| 
 | |
|   data['Alpha'] = data['Alphabetic']
 | |
|   data['Upper'] = data['Uppercase']
 | |
|   data['Lower'] = data['Lowercase']
 | |
|   data['Punct'] = data['Punctuation']
 | |
|   data['XPosixPunct'] = data['Punctuation'] + [0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e]
 | |
|   data['Digit'] = data['Decimal_Number']
 | |
|   data['XDigit'] = (0x0030..0x0039).to_a + (0x0041..0x0046).to_a +
 | |
|                    (0x0061..0x0066).to_a
 | |
|   data['Alnum'] = data['Alpha'] + data['Digit']
 | |
|   data['Space'] = data['White_Space']
 | |
|   data['Blank'] = data['Space_Separator'] + [0x0009]
 | |
|   data['Cntrl'] = data['Cc']
 | |
|   data['Word'] = data['Alpha'] + data['Mark'] + data['Digit'] + data['Connector_Punctuation']
 | |
|   data['Graph'] = data['Any'] - data['Space'] - data['Cntrl'] -
 | |
|     data['Surrogate'] - data['Unassigned']
 | |
|   data['Print'] = data['Graph'] + data['Space_Separator']
 | |
| end
 | |
| 
 | |
| def parse_scripts(data, categories)
 | |
|   files = [
 | |
|     {:fn => 'DerivedCoreProperties.txt', :title => 'Derived Property'},
 | |
|     {:fn => 'Scripts.txt', :title => 'Script'},
 | |
|     {:fn => 'PropList.txt', :title => 'Binary Property'},
 | |
|     {:fn => 'emoji-data.txt', :title => 'Emoji'}
 | |
|   ]
 | |
|   current = nil
 | |
|   cps = []
 | |
|   names = {}
 | |
|   files.each do |file|
 | |
|     data_foreach(file[:fn]) do |line|
 | |
|       if /^# Total (?:code points|elements): / =~ line
 | |
|         data[current] = cps
 | |
|         categories[current] = file[:title]
 | |
|         (names[file[:title]] ||= []) << current
 | |
|         cps = []
 | |
|       elsif /^([0-9a-fA-F]+)(?:\.\.([0-9a-fA-F]+))?\s*;\s*(\w+)/ =~ line
 | |
|         current = $3
 | |
|         $2 ? cps.concat(($1.to_i(16)..$2.to_i(16)).to_a) : cps.push($1.to_i(16))
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   #  All code points not explicitly listed for Script
 | |
|   #  have the value Unknown (Zzzz).
 | |
|   data['Unknown'] =  (0..0x10ffff).to_a - data.values_at(*names['Script']).flatten
 | |
|   categories['Unknown'] = 'Script'
 | |
|   names.values.flatten << 'Unknown'
 | |
| end
 | |
| 
 | |
| def parse_aliases(data)
 | |
|   kv = {}
 | |
|   data_foreach('PropertyAliases.txt') do |line|
 | |
|     next unless /^(\w+)\s*; (\w+)/ =~ line
 | |
|     data[$1] = data[$2]
 | |
|     kv[normalize_propname($1)] = normalize_propname($2)
 | |
|   end
 | |
|   data_foreach('PropertyValueAliases.txt') do |line|
 | |
|     next unless /^(sc|gc)\s*; (\w+)\s*; (\w+)(?:\s*; (\w+))?/ =~ line
 | |
|     if $1 == 'gc'
 | |
|       data[$3] = data[$2]
 | |
|       data[$4] = data[$2]
 | |
|       kv[normalize_propname($3)] = normalize_propname($2)
 | |
|       kv[normalize_propname($4)] = normalize_propname($2) if $4
 | |
|     else
 | |
|       data[$2] = data[$3]
 | |
|       data[$4] = data[$3]
 | |
|       kv[normalize_propname($2)] = normalize_propname($3)
 | |
|       kv[normalize_propname($4)] = normalize_propname($3) if $4
 | |
|     end
 | |
|   end
 | |
|   kv
 | |
| end
 | |
| 
 | |
| # According to Unicode6.0.0/ch03.pdf, Section 3.1, "An update version
 | |
| # never involves any additions to the character repertoire." Versions
 | |
| # in DerivedAge.txt should always be /\d+\.\d+/
 | |
| def parse_age(data)
 | |
|   current = nil
 | |
|   last_constname = nil
 | |
|   cps = []
 | |
|   ages = []
 | |
|   data_foreach('DerivedAge.txt') do |line|
 | |
|     if /^# Total code points: / =~ line
 | |
|       constname = constantize_agename(current)
 | |
|       # each version matches all previous versions
 | |
|       cps.concat(data[last_constname]) if last_constname
 | |
|       data[constname] = cps
 | |
|       make_const(constname, cps, "Derived Age #{current}")
 | |
|       ages << current
 | |
|       last_constname = constname
 | |
|       cps = []
 | |
|     elsif /^([0-9a-fA-F]+)(?:\.\.([0-9a-fA-F]+))?\s*;\s*(\d+\.\d+)/ =~ line
 | |
|       current = $3
 | |
|       $2 ? cps.concat(($1.to_i(16)..$2.to_i(16)).to_a) : cps.push($1.to_i(16))
 | |
|     end
 | |
|   end
 | |
|   ages
 | |
| end
 | |
| 
 | |
| def parse_GraphemeBreakProperty(data)
 | |
|   current = nil
 | |
|   cps = []
 | |
|   ages = []
 | |
|   data_foreach('auxiliary/GraphemeBreakProperty.txt') do |line|
 | |
|     if /^# Total code points: / =~ line
 | |
|       constname = constantize_Grapheme_Cluster_Break(current)
 | |
|       data[constname] = cps
 | |
|       make_const(constname, cps, "Grapheme_Cluster_Break=#{current}")
 | |
|       ages << current
 | |
|       cps = []
 | |
|     elsif /^([0-9a-fA-F]+)(?:\.\.([0-9a-fA-F]+))?\s*;\s*(\w+)/ =~ line
 | |
|       current = $3
 | |
|       $2 ? cps.concat(($1.to_i(16)..$2.to_i(16)).to_a) : cps.push($1.to_i(16))
 | |
|     end
 | |
|   end
 | |
|   ages
 | |
| end
 | |
| 
 | |
| def parse_block(data)
 | |
|   cps = []
 | |
|   blocks = []
 | |
|   data_foreach('Blocks.txt') do |line|
 | |
|     if /^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+);\s*(.*)/ =~ line
 | |
|       cps = ($1.to_i(16)..$2.to_i(16)).to_a
 | |
|       constname = constantize_blockname($3)
 | |
|       data[constname] = cps
 | |
|       make_const(constname, cps, "Block")
 | |
|       blocks << constname
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   # All code points not belonging to any of the named blocks
 | |
|   # have the value No_Block.
 | |
|   no_block = (0..0x10ffff).to_a - data.values_at(*blocks).flatten
 | |
|   constname = constantize_blockname("No_Block")
 | |
|   make_const(constname, no_block, "Block")
 | |
|   blocks << constname
 | |
| end
 | |
| 
 | |
| # shim for Ruby 1.8
 | |
| unless {}.respond_to?(:key)
 | |
|   class Hash
 | |
|     alias key index
 | |
|   end
 | |
| end
 | |
| 
 | |
| $const_cache = {}
 | |
| # make_const(property, pairs, name): Prints a 'static const' structure for a
 | |
| # given property, group of paired codepoints, and a human-friendly name for
 | |
| # the group
 | |
| def make_const(prop, data, name)
 | |
|   if name.empty?
 | |
|     puts "\n/* '#{prop}' */"
 | |
|   else
 | |
|     puts "\n/* '#{prop}': #{name} */"
 | |
|   end
 | |
|   if origprop = $const_cache.key(data)
 | |
|     puts "#define CR_#{prop} CR_#{origprop}"
 | |
|   else
 | |
|     $const_cache[prop] = data
 | |
|     pairs = pair_codepoints(data)
 | |
|     puts "static const OnigCodePoint CR_#{prop}[] = {"
 | |
|     # The first element of the constant is the number of pairs of codepoints
 | |
|     puts "\t#{pairs.size},"
 | |
|     pairs.each do |pair|
 | |
|       pair.map! { |c|  c == 0 ? '0x0000' : sprintf("%0#6x", c) }
 | |
|       puts "\t#{pair.first}, #{pair.last},"
 | |
|     end
 | |
|     puts "}; /* CR_#{prop} */"
 | |
|   end
 | |
| end
 | |
| 
 | |
| def normalize_propname(name)
 | |
|   name = name.downcase
 | |
|   name.delete!('- _')
 | |
|   name
 | |
| end
 | |
| 
 | |
| def constantize_agename(name)
 | |
|   "Age_#{name.sub(/\./, '_')}"
 | |
| end
 | |
| 
 | |
| def constantize_Grapheme_Cluster_Break(name)
 | |
|   "Grapheme_Cluster_Break_#{name}"
 | |
| end
 | |
| 
 | |
| def constantize_blockname(name)
 | |
|   "In_#{name.gsub(/\W/, '_')}"
 | |
| end
 | |
| 
 | |
| def get_file(name)
 | |
|   File.join(ARGV[name.start_with?("emoji-") ? 1 : 0], name)
 | |
| end
 | |
| 
 | |
| def data_foreach(name, &block)
 | |
|   fn = get_file(name)
 | |
|   warn "Reading #{name}"
 | |
|   if /^emoji-/ =~ name
 | |
|     sep = ""
 | |
|     pat = /^# #{Regexp.quote(File.basename(name))}.*^# Version: ([\d.]+)/m
 | |
|     type = :Emoji
 | |
|   else
 | |
|     sep = "\n"
 | |
|     pat = /^# #{File.basename(name).sub(/\./, '-([\\d.]+)\\.')}/
 | |
|     type = :Unicode
 | |
|   end
 | |
|   File.open(fn, 'rb') do |f|
 | |
|     line = f.gets(sep)
 | |
|     unless version = line[pat, 1]
 | |
|       raise ArgumentError, <<-ERROR
 | |
| #{name}: no #{type} version
 | |
| #{line.gsub(/^/, '> ')}
 | |
|       ERROR
 | |
|     end
 | |
|     if !(v = $versions[type])
 | |
|       $versions[type] = version
 | |
|     elsif v != version
 | |
|       raise ArgumentError, <<-ERROR
 | |
| #{name}: #{type} version mismatch: #{version} to #{v}
 | |
| #{line.gsub(/^/, '> ')}
 | |
|       ERROR
 | |
|     end
 | |
|     f.each(&block)
 | |
|   end
 | |
| end
 | |
| 
 | |
| # Write Data
 | |
| class Unifdef
 | |
|   attr_accessor :output, :top, :stack, :stdout, :kwdonly
 | |
|   def initialize(out)
 | |
|     @top = @output = []
 | |
|     @stack = []
 | |
|     $stdout, @stdout = self, out
 | |
|   end
 | |
|   def restore
 | |
|     $stdout = @stdout
 | |
|   end
 | |
|   def ifdef(sym)
 | |
|     if @kwdonly
 | |
|       @stdout.puts "#ifdef #{sym}"
 | |
|     else
 | |
|       @stack << @top
 | |
|       @top << tmp = [sym]
 | |
|       @top = tmp
 | |
|     end
 | |
|     if block_given?
 | |
|       begin
 | |
|         return yield
 | |
|       ensure
 | |
|         endif(sym)
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   def endif(sym)
 | |
|     if @kwdonly
 | |
|       @stdout.puts "#endif /* #{sym} */"
 | |
|     else
 | |
|       unless sym == @top[0]
 | |
|         restore
 | |
|         raise ArgumentError, "#{sym} unmatch to #{@top[0]}"
 | |
|       end
 | |
|       @top = @stack.pop
 | |
|     end
 | |
|   end
 | |
|   def show(dest, *syms)
 | |
|     _show(dest, @output, syms)
 | |
|   end
 | |
|   def _show(dest, ary, syms)
 | |
|     if Symbol === (sym = ary[0])
 | |
|       unless syms.include?(sym)
 | |
|         return
 | |
|       end
 | |
|     end
 | |
|     ary.each do |e|
 | |
|       case e
 | |
|       when Array
 | |
|         _show(dest, e, syms)
 | |
|       when String
 | |
|         dest.print e
 | |
|       end
 | |
|     end
 | |
|   end
 | |
|   def write(str)
 | |
|     if @kwdonly
 | |
|       @stdout.write(str)
 | |
|     else
 | |
|       @top << str
 | |
|     end
 | |
|     self
 | |
|   end
 | |
|   alias << write
 | |
| end
 | |
| 
 | |
| output = Unifdef.new($stdout)
 | |
| output.kwdonly = !header
 | |
| 
 | |
| puts '%{'
 | |
| props, data = parse_unicode_data(get_file('UnicodeData.txt'))
 | |
| categories = {}
 | |
| props.concat parse_scripts(data, categories)
 | |
| aliases = parse_aliases(data)
 | |
| ages = blocks = graphemeBreaks = nil
 | |
| define_posix_props(data)
 | |
| POSIX_NAMES.each do |name|
 | |
|   if name == 'XPosixPunct'
 | |
|     make_const(name, data[name], "[[:Punct:]]")
 | |
|   elsif name == 'Punct'
 | |
|     make_const(name, data[name], "")
 | |
|   else
 | |
|     make_const(name, data[name], "[[:#{name}:]]")
 | |
|   end
 | |
| end
 | |
| output.ifdef :USE_UNICODE_PROPERTIES do
 | |
|   props.each do |name|
 | |
|     category = categories[name] ||
 | |
|                case name.size
 | |
|                when 1 then 'Major Category'
 | |
|                when 2 then 'General Category'
 | |
|                else        '-'
 | |
|                end
 | |
|     make_const(name, data[name], category)
 | |
|   end
 | |
|   output.ifdef :USE_UNICODE_AGE_PROPERTIES do
 | |
|     ages = parse_age(data)
 | |
|   end
 | |
|   graphemeBreaks = parse_GraphemeBreakProperty(data)
 | |
|   blocks = parse_block(data)
 | |
| end
 | |
| puts(<<'__HEREDOC')
 | |
| 
 | |
| static const OnigCodePoint* const CodeRanges[] = {
 | |
| __HEREDOC
 | |
| POSIX_NAMES.each{|name|puts"  CR_#{name},"}
 | |
| output.ifdef :USE_UNICODE_PROPERTIES do
 | |
|   props.each{|name| puts"  CR_#{name},"}
 | |
|   output.ifdef :USE_UNICODE_AGE_PROPERTIES do
 | |
|     ages.each{|name|  puts"  CR_#{constantize_agename(name)},"}
 | |
|   end
 | |
|   graphemeBreaks.each{|name|  puts"  CR_#{constantize_Grapheme_Cluster_Break(name)},"}
 | |
|   blocks.each{|name|puts"  CR_#{name},"}
 | |
| end
 | |
| 
 | |
| puts(<<'__HEREDOC')
 | |
| };
 | |
| struct uniname2ctype_struct {
 | |
|   short name;
 | |
|   unsigned short ctype;
 | |
| };
 | |
| #define uniname2ctype_offset(str) offsetof(struct uniname2ctype_pool_t, uniname2ctype_pool_##str)
 | |
| 
 | |
| static const struct uniname2ctype_struct *uniname2ctype_p(
 | |
| #if !(/*ANSI*/+0) /* if ANSI, old style not to conflict with generated prototype */
 | |
|     const char *, unsigned int
 | |
| #endif
 | |
| );
 | |
| %}
 | |
| struct uniname2ctype_struct;
 | |
| %%
 | |
| __HEREDOC
 | |
| 
 | |
| i = -1
 | |
| name_to_index = {}
 | |
| POSIX_NAMES.each do |name|
 | |
|   i += 1
 | |
|   next if name == 'NEWLINE'
 | |
|   name = normalize_propname(name)
 | |
|   name_to_index[name] = i
 | |
|   puts"%-40s %3d" % [name + ',', i]
 | |
| end
 | |
| output.ifdef :USE_UNICODE_PROPERTIES do
 | |
|   props.each do |name|
 | |
|     i += 1
 | |
|     name = normalize_propname(name)
 | |
|     name_to_index[name] = i
 | |
|     puts "%-40s %3d" % [name + ',', i]
 | |
|   end
 | |
|   aliases.each_pair do |k, v|
 | |
|     next if name_to_index[k]
 | |
|     next unless v = name_to_index[v]
 | |
|     puts "%-40s %3d" % [k + ',', v]
 | |
|   end
 | |
|   output.ifdef :USE_UNICODE_AGE_PROPERTIES do
 | |
|     ages.each do |name|
 | |
|       i += 1
 | |
|       name = "age=#{name}"
 | |
|       name_to_index[name] = i
 | |
|       puts "%-40s %3d" % [name + ',', i]
 | |
|     end
 | |
|   end
 | |
|   graphemeBreaks.each do |name|
 | |
|     i += 1
 | |
|     name = "graphemeclusterbreak=#{name.delete('_').downcase}"
 | |
|     name_to_index[name] = i
 | |
|     puts "%-40s %3d" % [name + ',', i]
 | |
|   end
 | |
|   blocks.each do |name|
 | |
|     i += 1
 | |
|     name = normalize_propname(name)
 | |
|     name_to_index[name] = i
 | |
|     puts "%-40s %3d" % [name + ',', i]
 | |
|   end
 | |
| end
 | |
| puts(<<'__HEREDOC')
 | |
| %%
 | |
| static int
 | |
| uniname2ctype(const UChar *name, unsigned int len)
 | |
| {
 | |
|   const struct uniname2ctype_struct *p = uniname2ctype_p((const char *)name, len);
 | |
|   if (p) return p->ctype;
 | |
|   return -1;
 | |
| }
 | |
| __HEREDOC
 | |
| $versions.each do |type, ver|
 | |
|   name = type == :Unicode ? "ONIG_UNICODE_VERSION" : "ONIG_UNICODE_EMOJI_VERSION"
 | |
|   versions = ver.scan(/\d+/)
 | |
|   print("#if defined #{name}_STRING && !( \\\n")
 | |
|   versions.zip(%w[MAJOR MINOR TEENY]) do |v, n|
 | |
|     print("      #{name}_#{n} == #{v} && \\\n")
 | |
|   end
 | |
|   print("      1)\n")
 | |
|   print("# error #{name}_STRING mismatch\n")
 | |
|   print("#endif\n")
 | |
|   print("#define #{name}_STRING #{ver.dump}\n")
 | |
|   versions.zip(%w[MAJOR MINOR TEENY]) do |v, n|
 | |
|     print("#define #{name}_#{n} #{v}\n")
 | |
|   end
 | |
| end
 | |
| 
 | |
| output.restore
 | |
| 
 | |
| if header
 | |
|   require 'tempfile'
 | |
| 
 | |
|   NAME2CTYPE = %w[gperf -7 -c -j1 -i1 -t -C -P -T -H uniname2ctype_hash -Q uniname2ctype_pool -N uniname2ctype_p]
 | |
| 
 | |
|   fds = []
 | |
|   syms = %i[USE_UNICODE_PROPERTIES USE_UNICODE_AGE_PROPERTIES]
 | |
|   begin
 | |
|     fds << (tmp = Tempfile.new(%w"name2ctype .h"))
 | |
|     IO.popen([*NAME2CTYPE, out: tmp], "w") {|f| output.show(f, *syms)}
 | |
|   end while syms.pop
 | |
|   fds.each(&:close)
 | |
|   ff = nil
 | |
|   IO.popen(%W[diff -DUSE_UNICODE_AGE_PROPERTIES #{fds[1].path} #{fds[0].path}], "r") {|age|
 | |
|     IO.popen(%W[diff -DUSE_UNICODE_PROPERTIES #{fds[2].path} -], "r", in: age) {|f|
 | |
|       ansi = false
 | |
|       f.each {|line|
 | |
|         if /ANSI-C code produced by gperf/ =~ line
 | |
|           ansi = true
 | |
|         end
 | |
|         line.sub!(/\/\*ANSI\*\//, '1') if ansi
 | |
|         line.gsub!(/\(int\)\((?:long|size_t)\)&\(\(struct uniname2ctype_pool_t \*\)0\)->uniname2ctype_pool_(str\d+),\s+/,
 | |
|                    'uniname2ctype_offset(\1), ')
 | |
|         if ff = (!ff ? /^(uniname2ctype_hash) /=~line : /^\}/!~line) # no line can match both, exclusive flip-flop
 | |
|           line.sub!(/^( *(?:register\s+)?(.*\S)\s+hval\s*=\s*)(?=len;)/, '\1(\2)')
 | |
|         end
 | |
|         puts line
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| end
 |