mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
151208a19b
This reverts commit 48f1e8c5d8
.
157 lines
5.3 KiB
Ruby
157 lines
5.3 KiB
Ruby
# frozen_string_literal: true
|
|
# Copyright © 2018 Martin J. Dürst (duerst@it.aoyama.ac.jp)
|
|
|
|
require "test/unit"
|
|
|
|
class TestEmojiBreaks < Test::Unit::TestCase
|
|
end
|
|
|
|
class TestEmojiBreaks::BreakTest
|
|
attr_reader :string, :comment, :filename, :line_number, :type, :shortname
|
|
|
|
def initialize(filename, line_number, data, comment='')
|
|
@filename = filename
|
|
@line_number = line_number
|
|
@comment = comment.gsub(/\s+/, ' ').strip
|
|
if filename=='emoji-test' or filename=='emoji-variation-sequences'
|
|
codes, @type = data.split(/\s*;\s*/)
|
|
@shortname = ''
|
|
else
|
|
codes, @type, @shortname = data.split(/\s*;\s*/)
|
|
end
|
|
@type = @type.gsub(/\s+/, ' ').strip
|
|
@shortname = @shortname.gsub(/\s+/, ' ').strip
|
|
@string = codes.split(/\s+/)
|
|
.map do |ch|
|
|
c = ch.to_i(16)
|
|
# eliminate cases with surrogates
|
|
# raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
|
|
c.chr('UTF-8')
|
|
end.join
|
|
end
|
|
end
|
|
|
|
class TestEmojiBreaks::BreakFile
|
|
attr_reader :basename, :fullname, :version
|
|
FILES = []
|
|
|
|
def initialize(basename, path, version)
|
|
@basename = basename
|
|
@fullname = "#{path}/#{basename}.txt" # File.expand_path(path + version, __dir__)
|
|
@version = version
|
|
FILES << self
|
|
end
|
|
|
|
def self.files
|
|
FILES
|
|
end
|
|
end
|
|
|
|
class TestEmojiBreaks < Test::Unit::TestCase
|
|
UNICODE_VERSION = RbConfig::CONFIG['UNICODE_VERSION']
|
|
UNICODE_DATA_PATH = File.expand_path("../../../enc/unicode/data/#{UNICODE_VERSION}/ucd/emoji", __dir__)
|
|
EMOJI_VERSION = RbConfig::CONFIG['UNICODE_EMOJI_VERSION']
|
|
EMOJI_DATA_PATH = File.expand_path("../../../enc/unicode/data/emoji/#{EMOJI_VERSION}", __dir__)
|
|
|
|
EMOJI_DATA_FILES = %w[emoji-sequences emoji-test emoji-zwj-sequences].map do |basename|
|
|
BreakFile.new(basename, EMOJI_DATA_PATH, EMOJI_VERSION)
|
|
end
|
|
UNICODE_DATA_FILE = BreakFile.new('emoji-variation-sequences', UNICODE_DATA_PATH, UNICODE_VERSION)
|
|
EMOJI_DATA_FILES << UNICODE_DATA_FILE
|
|
|
|
def self.data_files_available?
|
|
EMOJI_DATA_FILES.all? do |f|
|
|
File.exist?(f.fullname)
|
|
end
|
|
end
|
|
|
|
def test_data_files_available
|
|
assert_equal 4, EMOJI_DATA_FILES.size # debugging test
|
|
unless TestEmojiBreaks.data_files_available?
|
|
omit "Emoji data files not available in #{EMOJI_DATA_PATH}."
|
|
end
|
|
end
|
|
end
|
|
|
|
TestEmojiBreaks.data_files_available? and class TestEmojiBreaks
|
|
def read_data
|
|
tests = []
|
|
EMOJI_DATA_FILES.each do |file|
|
|
version_mismatch = true
|
|
file_tests = []
|
|
IO.foreach(file.fullname, encoding: Encoding::UTF_8) do |line|
|
|
line.chomp!
|
|
if $.==1
|
|
if line=="# #{file.basename}-#{file.version}.txt"
|
|
version_mismatch = false
|
|
elsif line!="# #{file.basename}.txt"
|
|
raise "File Name Mismatch: line: #{line}, expected filename: #{file.basename}.txt"
|
|
end
|
|
end
|
|
version_mismatch = false if line =~ /^# Version: #{file.version}/
|
|
next if line.match?(/\A(#|\z)/)
|
|
if line =~ /^(\h{4,6})\.\.(\h{4,6}) *(;.+)/ # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
|
|
range_start = $1.to_i(16)
|
|
range_end = $2.to_i(16)
|
|
rest = $3
|
|
(range_start..range_end).each do |code_point|
|
|
file_tests << BreakTest.new(file.basename, $., *(code_point.to_s(16)+rest).split('#', 2))
|
|
end
|
|
else
|
|
file_tests << BreakTest.new(file.basename, $., *line.split('#', 2))
|
|
end
|
|
end
|
|
raise "File Version Mismatch: file: #{file.fullname}, version: #{file.version}" if version_mismatch
|
|
tests += file_tests
|
|
end
|
|
tests
|
|
end
|
|
|
|
def all_tests
|
|
@@tests ||= read_data
|
|
rescue Errno::ENOENT
|
|
@@tests ||= []
|
|
end
|
|
|
|
def test_single_emoji
|
|
all_tests.each do |test|
|
|
expected = [test.string]
|
|
actual = test.string.each_grapheme_cluster.to_a
|
|
assert_equal expected, actual,
|
|
"file: #{test.filename}, line #{test.line_number}, " +
|
|
"type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
|
|
end
|
|
end
|
|
|
|
def test_embedded_emoji
|
|
all_tests.each do |test|
|
|
expected = ["\t", test.string, "\t"]
|
|
actual = "\t#{test.string}\t".each_grapheme_cluster.to_a
|
|
assert_equal expected, actual,
|
|
"file: #{test.filename}, line #{test.line_number}, " +
|
|
"type: #{test.type}, shortname: #{test.shortname}, comment: #{test.comment}"
|
|
end
|
|
end
|
|
|
|
# test some pseodorandom combinations of emoji
|
|
def test_mixed_emoji
|
|
srand 0
|
|
length = all_tests.length
|
|
step = 503 # use a prime number
|
|
all_tests.each do |test1|
|
|
start = rand step
|
|
start.step(by: step, to: length-1) do |t2|
|
|
test2 = all_tests[t2]
|
|
# exclude skin tones, because they glue to previous grapheme clusters
|
|
next if (0x1F3FB..0x1F3FF).include? test2.string.ord
|
|
expected = [test1.string, test2.string]
|
|
actual = (test1.string+test2.string).each_grapheme_cluster.to_a
|
|
assert_equal expected, actual,
|
|
"file1: #{test1.filename}, line1 #{test1.line_number}, " +
|
|
"file2: #{test2.filename}, line2 #{test2.line_number},\n" +
|
|
"type1: #{test1.type}, shortname1: #{test1.shortname}, comment1: #{test1.comment},\n" +
|
|
"type2: #{test2.type}, shortname2: #{test2.shortname}, comment2: #{test2.comment}"
|
|
end
|
|
end
|
|
end
|
|
end
|