2018-11-26 04:03:11 -05:00
# frozen_string_literal: true
# Copyright © 2018 Martin J. Dürst (duerst@it.aoyama.ac.jp)
require " test/unit "
2019-06-28 03:02:03 -04:00
class TestEmojiBreaks < Test :: Unit :: TestCase
end
class TestEmojiBreaks :: BreakTest
2018-11-26 04:03:11 -05:00
attr_reader :string , :comment , :filename , :line_number , :type , :shortname
2019-06-28 03:02:47 -04:00
def initialize ( filename , line_number , data , comment = '' )
2018-11-26 04:03:11 -05:00
@filename = filename
@line_number = line_number
2018-11-26 18:30:43 -05:00
@comment = comment . gsub ( / \ s+ / , ' ' ) . strip
2021-08-17 03:03:52 -04:00
if filename == 'emoji-test' or filename == 'emoji-variation-sequences'
2018-11-26 04:03:11 -05:00
codes , @type = data . split ( / \ s*; \ s* / )
@shortname = ''
else
codes , @type , @shortname = data . split ( / \ s*; \ s* / )
end
2018-11-26 18:30:43 -05:00
@type = @type . gsub ( / \ s+ / , ' ' ) . strip
@shortname = @shortname . gsub ( / \ s+ / , ' ' ) . strip
2018-11-26 04:03:11 -05:00
@string = codes . split ( / \ s+ / )
. map do | ch |
c = ch . to_i ( 16 )
# eliminate cases with surrogates
# raise ArgumentError if 0xD800 <= c and c <= 0xDFFF
c . chr ( 'UTF-8' )
end . join
end
end
2021-07-08 01:40:47 -04:00
class TestEmojiBreaks :: BreakFile
2021-07-27 04:05:06 -04:00
attr_reader :basename , :fullname , :version
2021-07-08 01:40:47 -04:00
FILES = [ ]
2021-07-27 04:05:06 -04:00
def initialize ( basename , path , version )
2021-07-08 01:40:47 -04:00
@basename = basename
@fullname = " #{ path } / #{ basename } .txt " # File.expand_path(path + version, __dir__)
2021-07-27 04:05:06 -04:00
@version = version
2021-07-08 01:40:47 -04:00
FILES << self
end
def self . files
FILES
end
end
2018-11-26 04:03:11 -05:00
class TestEmojiBreaks < Test :: Unit :: TestCase
2021-07-08 01:40:47 -04:00
UNICODE_VERSION = RbConfig :: CONFIG [ 'UNICODE_VERSION' ]
UNICODE_DATA_PATH = File . expand_path ( " ../../../enc/unicode/data/ #{ UNICODE_VERSION } /ucd/emoji " , __dir__ )
EMOJI_VERSION = RbConfig :: CONFIG [ 'UNICODE_EMOJI_VERSION' ]
EMOJI_DATA_PATH = File . expand_path ( " ../../../enc/unicode/data/emoji/ #{ EMOJI_VERSION } " , __dir__ )
2018-11-26 04:03:11 -05:00
2021-07-08 01:40:47 -04:00
EMOJI_DATA_FILES = %w[ emoji-sequences emoji-test emoji-zwj-sequences ] . map do | basename |
2021-07-27 04:05:06 -04:00
BreakFile . new ( basename , EMOJI_DATA_PATH , EMOJI_VERSION )
2018-11-26 04:03:11 -05:00
end
2021-07-27 04:05:06 -04:00
UNICODE_DATA_FILE = BreakFile . new ( 'emoji-variation-sequences' , UNICODE_DATA_PATH , UNICODE_VERSION [ 0 .. - 3 ] ) # [0..-3] deals with a versioning mismatch problem in Unicode
2021-07-08 01:40:47 -04:00
EMOJI_DATA_FILES << UNICODE_DATA_FILE
2018-11-26 04:03:11 -05:00
def self . data_files_available?
EMOJI_DATA_FILES . all? do | f |
2021-07-08 01:40:47 -04:00
File . exist? ( f . fullname )
2018-11-26 04:03:11 -05:00
end
end
def test_data_files_available
2021-07-08 01:40:47 -04:00
assert_equal 4 , EMOJI_DATA_FILES . size # debugging test
2018-11-26 04:03:11 -05:00
unless TestEmojiBreaks . data_files_available?
skip " Emoji data files not available in #{ EMOJI_DATA_PATH } . "
end
end
end
TestEmojiBreaks . data_files_available? and class TestEmojiBreaks
def read_data
tests = [ ]
2021-07-08 01:40:47 -04:00
EMOJI_DATA_FILES . each do | file |
2018-11-26 04:03:11 -05:00
version_mismatch = true
file_tests = [ ]
2021-07-08 01:40:47 -04:00
IO . foreach ( file . fullname , encoding : Encoding :: UTF_8 ) do | line |
2018-11-26 04:03:11 -05:00
line . chomp!
2021-07-27 04:05:06 -04:00
raise " File Name Mismatch: line: #{ line } , expected filename: #{ file . basename } .txt " if $. == 1 and not line == " # #{ file . basename } .txt "
version_mismatch = false if line =~ / ^ # Version: #{ file . version } /
2021-08-17 03:03:52 -04:00
next if line . match? ( / \ A( # | \ z) / )
2021-07-27 05:12:24 -04:00
if line =~ / ^( \ h{4,6}) \ . \ .( \ h{4,6}) *(;.+) / # deal with Unicode ranges in emoji-sequences.txt (Bug #18028)
range_start = $1 . to_i ( 16 )
range_end = $2 . to_i ( 16 )
rest = $3
( range_start .. range_end ) . each do | code_point |
2021-08-17 03:03:52 -04:00
file_tests << BreakTest . new ( file . basename , $. , * ( code_point . to_s ( 16 ) + rest ) . split ( '#' , 2 ) )
2021-07-27 05:12:24 -04:00
end
else
2021-08-17 03:03:52 -04:00
file_tests << BreakTest . new ( file . basename , $. , * line . split ( '#' , 2 ) )
2021-07-27 05:12:24 -04:00
end
2018-11-26 04:03:11 -05:00
end
2021-07-27 04:05:06 -04:00
raise " File Version Mismatch: file: #{ file . fullname } , version: #{ file . version } " if version_mismatch
2018-11-26 04:03:11 -05:00
tests += file_tests
end
tests
end
def all_tests
@@tests || = read_data
rescue Errno :: ENOENT
@@tests || = [ ]
end
def test_single_emoji
all_tests . each do | test |
expected = [ test . string ]
actual = test . string . each_grapheme_cluster . to_a
assert_equal expected , actual ,
2018-11-26 18:30:43 -05:00
" file: #{ test . filename } , line #{ test . line_number } , " +
" type: #{ test . type } , shortname: #{ test . shortname } , comment: #{ test . comment } "
2018-11-26 04:03:11 -05:00
end
end
def test_embedded_emoji
all_tests . each do | test |
2018-12-03 23:11:51 -05:00
expected = [ " \t " , test . string , " \t " ]
actual = " \t #{ test . string } \t " . each_grapheme_cluster . to_a
2018-11-26 04:03:11 -05:00
assert_equal expected , actual ,
2018-11-26 18:30:43 -05:00
" file: #{ test . filename } , line #{ test . line_number } , " +
" type: #{ test . type } , shortname: #{ test . shortname } , comment: #{ test . comment } "
2018-11-26 04:03:11 -05:00
end
end
# test some pseodorandom combinations of emoji
def test_mixed_emoji
srand 0
length = all_tests . length
2018-12-05 03:10:24 -05:00
step = 503 # use a prime number
2018-11-26 04:03:11 -05:00
all_tests . each do | test1 |
start = rand step
start . step ( by : step , to : length - 1 ) do | t2 |
test2 = all_tests [ t2 ]
2018-12-04 01:31:40 -05:00
# exclude skin tones, because they glue to previous grapheme clusters
next if ( 0x1F3FB .. 0x1F3FF ) . include? test2 . string . ord
2018-11-26 04:03:11 -05:00
expected = [ test1 . string , test2 . string ]
actual = ( test1 . string + test2 . string ) . each_grapheme_cluster . to_a
assert_equal expected , actual ,
2018-11-26 18:30:43 -05:00
" file1: #{ test1 . filename } , line1 #{ test1 . line_number } , " +
" file2: #{ test2 . filename } , line2 #{ test2 . line_number } , \n " +
" type1: #{ test1 . type } , shortname1: #{ test1 . shortname } , comment1: #{ test1 . comment } , \n " +
" type2: #{ test2 . type } , shortname2: #{ test2 . shortname } , comment2: #{ test2 . comment } "
2018-11-26 04:03:11 -05:00
end
end
end
end