mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	 62f8df2d3c
			
		
	
	
		62f8df2d3c
		
	
	
	
	
		
			
			* tool/transcode-tblgen.rb: modified for enc-emoji4unicode.rb. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26856 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
		
			
				
	
	
		
			133 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			133 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| #!/usr/bin/env ruby
 | |
| 
 | |
| # example:
 | |
| # ./enc-emoji4unicode.rb emoji4unicode.xml > ../enc/trans/emoji-exchange-tbl.rb
 | |
| 
 | |
| require 'rexml/document'
 | |
| require File.expand_path("../transcode-tblgen", __FILE__)
 | |
| 
 | |
| class EmojiTable
 | |
|   VERBOSE_MODE = false
 | |
| 
 | |
|   def initialize(xml_path)
 | |
|     @doc = REXML::Document.new File.open(xml_path)
 | |
|     @kddi_undoc = make_kddi_undoc_map()
 | |
|   end
 | |
| 
 | |
|   def conversion(from_carrier, to_carrier, &block)
 | |
|     REXML::XPath.each(@doc.root, '//e') do |e|
 | |
|       from = e.attribute(from_carrier.downcase).to_s
 | |
|       to = e.attribute(to_carrier.downcase).to_s
 | |
|       text_fallback = e.attribute('text_fallback').to_s
 | |
|       name = e.attribute('name').to_s
 | |
|       if from =~ /^(?:\*|\+)(.+)$/ # proposed or unified
 | |
|         from = $1
 | |
|       end
 | |
|       if from.empty? || from !~ /^[0-9A-F]+$/
 | |
|         # do nothing
 | |
|       else
 | |
|         from_utf8 = [from.hex].pack("U").unpack("H*").first
 | |
|         if to =~ /^(?:>|\*)?([0-9A-F\+]+)$/
 | |
|           str_to = $1
 | |
|           if str_to =~ /^\+/ # unicode "proposed" begins at "+"
 | |
|             proposal = true
 | |
|             str_to.sub!(/^\+/, '')
 | |
|           else
 | |
|             proposal = false
 | |
|           end
 | |
|           tos = str_to.split('+')
 | |
|           to_utf8 = tos.map(&:hex).pack("U*").unpack("H*").first
 | |
|           comment = "[%s] U+%X -> %s" % [name, from.hex, tos.map{|c| "U+%X"%c.hex}.join(' ')]
 | |
|           block.call(:from => from_utf8,
 | |
|                      :to => to_utf8,
 | |
|                      :comment => comment,
 | |
|                      :fallback => false,
 | |
|                      :proposal => proposal)
 | |
|         elsif to.empty?
 | |
|           if text_fallback.empty?
 | |
|             comment = "[%s] U+%X -> U+3013 (GETA)" % [name, from.hex]
 | |
|             block.call(:from => from_utf8,
 | |
|                        :to => "\u{3013}".unpack("H*").first,
 | |
|                        :comment => comment, # geta
 | |
|                        :fallback => true,
 | |
|                        :proposal => false)
 | |
|           else
 | |
|             to_utf8 = text_fallback.unpack("H*").first
 | |
|             comment = %([%s] U+%X -> "%s") % [name, from.hex, text_fallback]
 | |
|             block.call(:from => from_utf8,
 | |
|                        :to => to_utf8,
 | |
|                        :comment => comment,
 | |
|                        :fallback => true,
 | |
|                        :proposal => false)
 | |
|           end
 | |
|         else
 | |
|           raise "something wrong: %s -> %s" % [from, to]
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def generate(io, from_carrier, to_carrier)
 | |
|     from_encoding = (from_carrier == "Unicode") ? "UTF-8" : "UTF8-"+from_carrier
 | |
|     to_encoding   = (to_carrier == "Unicode" )  ? "UTF-8" : "UTF8-"+to_carrier
 | |
|       io.puts "EMOJI_EXCHANGE_TBL['#{from_encoding}']['#{to_encoding}'] = ["
 | |
|       io.puts "  # for documented codepoints" if from_carrier == "KDDI"
 | |
|       self.conversion(from_carrier, to_carrier) do |params|
 | |
|         from, to = params[:from], %Q{"#{params[:to]}"}
 | |
|         to = ":undef" if params[:fallback] || params[:proposal]
 | |
|         io.puts %{  ["#{from}", #{to}], # #{params[:comment]}}
 | |
|       end
 | |
|       if from_carrier == "KDDI"
 | |
|         io.puts "  # for undocumented codepoints"
 | |
|         self.conversion(from_carrier, to_carrier) do |params|
 | |
|           from, to = params[:from], %Q{"#{params[:to]}"}
 | |
|           to = ":undef" if params[:fallback] || params[:proposal]
 | |
|           unicode = utf8_to_ucs(from)
 | |
|           undoc = ucs_to_utf8(@kddi_undoc[unicode])
 | |
|           io.puts %{  ["#{undoc}", #{to}], # #{params[:comment]}}
 | |
|         end
 | |
|       end
 | |
|       io.puts "]"
 | |
|       io.puts
 | |
|   end
 | |
| 
 | |
|   private
 | |
| 
 | |
|   def utf8_to_ucs(cp)
 | |
|     return [cp].pack("H*").unpack("U*").first
 | |
|   end
 | |
| 
 | |
|   def ucs_to_utf8(cp)
 | |
|     return [cp].pack("U*").unpack("H*").first
 | |
|   end
 | |
| 
 | |
|   def make_kddi_undoc_map()
 | |
|     pub_to_sjis = citrus_decode_mapsrc(
 | |
|       "mskanji", 2, "UCS/EMOJI_SHIFT_JIS-KDDI").sort_by{|u, s| s}
 | |
|     sjis_to_undoc = citrus_decode_mapsrc(
 | |
|       "mskanji", 2, "EMOJI_SHIFT_JIS-KDDI-UNDOC/UCS").sort_by{|s, u| s}
 | |
|     return pub_to_sjis.zip(sjis_to_undoc).inject({}) {|h, rec|
 | |
|       raise "no match sjis codepoint" if rec[0][1] != rec[1][0]
 | |
|       h[rec[0][0]] = rec[1][1]
 | |
|       next h
 | |
|     }
 | |
|   end
 | |
| end
 | |
| 
 | |
| if ARGV.empty?
 | |
|   puts "usage: #$0 [emoji4unicode.xml]"
 | |
|   exit 1
 | |
| end
 | |
| $srcdir = File.expand_path("../../enc/trans", __FILE__)
 | |
| emoji_table = EmojiTable.new(ARGV[0])
 | |
| 
 | |
| companies = %w(DoCoMo KDDI SoftBank Unicode)
 | |
| 
 | |
| io = STDOUT
 | |
| io.puts "EMOJI_EXCHANGE_TBL = Hash.new{|h,k| h[k] = {}}"
 | |
| companies.each do |from_company|
 | |
|   companies.each do |to_company|
 | |
|     next if from_company == to_company
 | |
|     emoji_table.generate(io, from_company, to_company)
 | |
|   end
 | |
| end
 |