mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	add tests for grapheme clusters using Unicode Emoji test data
Add file test/ruby/enc/test_emoji_breaks.rb to test String#each_grapheme_cluster test data provided by Unicode (at https://www.unicode.org/Public/emoji/#{EMOJI_VERSION}/). Lines containing emoji for genies, zombies, and wrestling are ignored because there seems to be a bug (#15343) in the implementation. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65990 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									ebff9dc10e
								
							
						
					
					
						commit
						0409290ec0
					
				
					 1 changed files with 117 additions and 0 deletions
				
			
		
							
								
								
									
										117
									
								
								test/ruby/enc/test_emoji_breaks.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								test/ruby/enc/test_emoji_breaks.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,117 @@ | ||||||
|  | # frozen_string_literal: true | ||||||
|  | # Copyright © 2018 Martin J. Dürst (duerst@it.aoyama.ac.jp) | ||||||
|  | 
 | ||||||
|  | require "test/unit" | ||||||
|  | 
 | ||||||
|  | class BreakTest | ||||||
|  |   attr_reader :string, :comment, :filename, :line_number, :type, :shortname | ||||||
|  | 
 | ||||||
|  |   def initialize (filename, line_number, data, comment='') | ||||||
|  |     @filename = filename | ||||||
|  |     @line_number = line_number | ||||||
|  |     @comment = comment | ||||||
|  |     if filename=='emoji-test' | ||||||
|  |       codes, @type = data.split(/\s*;\s*/) | ||||||
|  |       @shortname = '' | ||||||
|  |     else | ||||||
|  |       codes, @type, @shortname = data.split(/\s*;\s*/) | ||||||
|  |     end | ||||||
|  |     @string = codes.split(/\s+/) | ||||||
|  |                    .map do |ch| | ||||||
|  |                           c = ch.to_i(16) | ||||||
|  |                            # eliminate cases with surrogates | ||||||
|  |                           # raise ArgumentError if 0xD800 <= c and c <= 0xDFFF | ||||||
|  |                           c.chr('UTF-8') | ||||||
|  |                         end.join | ||||||
|  |     raise ArgumentError if data.match? /genie/ or comment.match? /genie/ | ||||||
|  |     raise ArgumentError if data.match? /zombie/ or comment.match? /zombie/ | ||||||
|  |     raise ArgumentError if data.match? /wrestling/ or comment.match? /wrestling/ | ||||||
|  |   end | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | class TestEmojiBreaks < Test::Unit::TestCase | ||||||
|  |   EMOJI_DATA_FILES = %w[emoji-sequences emoji-test emoji-variation-sequences emoji-zwj-sequences] | ||||||
|  |   EMOJI_VERSION = '5.0' # hard-coded, should be replaced by | ||||||
|  |                         # RbConfig::CONFIG['UNICODE_EMOJI_VERSION'] or so, see feature #15341 | ||||||
|  |   EMOJI_DATA_PATH = File.expand_path("../../../enc/unicode/data/emoji/#{EMOJI_VERSION}", __dir__) | ||||||
|  | 
 | ||||||
|  |   def self.expand_filename(basename) | ||||||
|  |     File.expand_path("#{EMOJI_DATA_PATH}/#{basename}.txt", __dir__) | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def self.data_files_available? | ||||||
|  |     EMOJI_DATA_FILES.all? do |f| | ||||||
|  |       File.exist?(expand_filename(f)) | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def test_data_files_available | ||||||
|  |     unless TestEmojiBreaks.data_files_available? | ||||||
|  |       skip "Emoji data files not available in #{EMOJI_DATA_PATH}." | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | TestEmojiBreaks.data_files_available? and  class TestEmojiBreaks | ||||||
|  |   def read_data | ||||||
|  |     tests = [] | ||||||
|  |     EMOJI_DATA_FILES.each do |filename| | ||||||
|  |       version_mismatch = true | ||||||
|  |       file_tests = [] | ||||||
|  |       IO.foreach(TestEmojiBreaks.expand_filename(filename), encoding: Encoding::UTF_8) do |line| | ||||||
|  |         line.chomp! | ||||||
|  |         raise "File Name Mismatch"  if $.==1 and not line=="# #{filename}.txt" | ||||||
|  |         version_mismatch = false  if line=="# Version: #{EMOJI_VERSION}" | ||||||
|  |         next  if /\A(#|\z)/.match? line | ||||||
|  |         file_tests << BreakTest.new(filename, $., *line.split('#')) rescue 'whatever' | ||||||
|  |       end | ||||||
|  |       raise "File Version Mismatch"  if version_mismatch | ||||||
|  |       tests += file_tests | ||||||
|  |     end | ||||||
|  |     tests | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def all_tests | ||||||
|  |     @@tests ||= read_data | ||||||
|  |   rescue Errno::ENOENT | ||||||
|  |     @@tests ||= [] | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def test_single_emoji | ||||||
|  |     all_tests.each do |test| | ||||||
|  |       expected = [test.string] | ||||||
|  |       actual = test.string.each_grapheme_cluster.to_a | ||||||
|  |       assert_equal expected, actual, | ||||||
|  |         "file: #{test.filename}, line #{test.line_number}, expected '#{expected}', " + | ||||||
|  |         "but got '#{actual}', type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def test_embedded_emoji | ||||||
|  |     all_tests.each do |test| | ||||||
|  |       expected = ["A", test.string, "Z"] | ||||||
|  |       actual = "A#{test.string}Z".each_grapheme_cluster.to_a | ||||||
|  |       assert_equal expected, actual, | ||||||
|  |         "file: #{test.filename}, line #{test.line_number}, expected '#{expected}', " + | ||||||
|  |         "but got '#{actual}', type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}" | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   # test some pseodorandom combinations of emoji | ||||||
|  |   def test_mixed_emoji | ||||||
|  |     srand 0 | ||||||
|  |     length = all_tests.length | ||||||
|  |     step = 503 # use a prime number | ||||||
|  |     all_tests.each do |test1| | ||||||
|  |       start = rand step | ||||||
|  |       start.step(by: step, to: length-1) do |t2| | ||||||
|  |         test2 = all_tests[t2] | ||||||
|  |         expected = [test1.string, test2.string] | ||||||
|  |         actual = (test1.string+test2.string).each_grapheme_cluster.to_a | ||||||
|  |         assert_equal expected, actual, | ||||||
|  |           "file: #{test1.filename}, line #{test1.line_number}, expected '#{expected}', " + | ||||||
|  |           "but got '#{actual}', type: #{test1.type}, shortname: #{test1.shortname}, comment: #{test1.comment}" | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | end | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 duerst
						duerst