mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* test/ruby/enc/test_case_comprehensive.rb: Add set of comprehensive
(across most Unicode characters; later across most character encodings) tests for case mapping. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55168 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
61ddd3e0ad
commit
c1fedc8de7
2 changed files with 123 additions and 0 deletions
|
@ -1,3 +1,9 @@
|
|||
Thu May 26 09:45:41 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* test/ruby/enc/test_case_comprehensive.rb: Add set of comprehensive
|
||||
(across most Unicode characters; later across most character encodings)
|
||||
tests for case mapping.
|
||||
|
||||
Thu May 26 05:00:13 2016 Benoit Daloze <eregontp@gmail.com>
|
||||
|
||||
* class.c (rb_define_class): Fix documentation.
|
||||
|
|
117
test/ruby/enc/test_case_comprehensive.rb
Normal file
117
test/ruby/enc/test_case_comprehensive.rb
Normal file
|
@ -0,0 +1,117 @@
|
|||
# Copyright © 2016 Martin J. Dürst (duerst@it.aoyama.ac.jp)
|
||||
|
||||
require "test/unit"
|
||||
require 'unicode_normalize/normalize' # only for UNICODE_VERSION
|
||||
|
||||
class CaseTest
|
||||
attr_reader :method_name, :attributes, :first_data, :follow_data
|
||||
def initialize(method_name, attributes, first_data, follow_data=first_data)
|
||||
@method_name = method_name
|
||||
@attributes = attributes
|
||||
@first_data = first_data
|
||||
@follow_data = follow_data
|
||||
end
|
||||
end
|
||||
|
||||
class TestComprehensiveCaseFold < Test::Unit::TestCase
|
||||
UNICODE_VERSION = UnicodeNormalize::UNICODE_VERSION
|
||||
UNICODE_DATA_PATH = "../../../enc/unicode/data/#{UNICODE_VERSION}"
|
||||
|
||||
def self.hex2utf8(s)
|
||||
s.split(' ').map { |c| c.to_i(16) }.pack('U*')
|
||||
end
|
||||
|
||||
def self.read_data_file (filename)
|
||||
IO.readlines(File.expand_path("#{UNICODE_DATA_PATH}/#{filename}.txt", __dir__), encoding: Encoding::ASCII_8BIT)
|
||||
.tap do |lines|
|
||||
raise "File Version Mismatch" unless filename=='UnicodeData' or /#{filename}-#{UNICODE_VERSION}\.txt/ =~ lines[0]
|
||||
end
|
||||
.reject { |line| line =~ /^[\#@]/ or line =~ /^\s*$/ or line =~ /Surrogate/ }
|
||||
.each do |line|
|
||||
data = line.chomp.split('#')[0].split /;\s*/, 15
|
||||
code = data[0].to_i(16).chr('UTF-8')
|
||||
yield code, data
|
||||
end
|
||||
end
|
||||
|
||||
def self.read_data
|
||||
@@codepoints = []
|
||||
|
||||
downcase = Hash.new { |h, c| c }
|
||||
upcase = Hash.new { |h, c| c }
|
||||
titlecase = Hash.new { |h, c| c }
|
||||
casefold = Hash.new { |h, c| c }
|
||||
turkic_upcase = Hash.new { |h, c| upcase[c] }
|
||||
turkic_downcase = Hash.new { |h, c| downcase[c] }
|
||||
turkic_titlecase = Hash.new { |h, c| titlecase[c] }
|
||||
ascii_upcase = Hash.new { |h, c| c =~ /^[a-zA-Z]$/ ? upcase[c] : c }
|
||||
ascii_downcase = Hash.new { |h, c| c =~ /^[a-zA-Z]$/ ? downcase[c] : c }
|
||||
ascii_titlecase = Hash.new { |h, c| c =~ /^[a-zA-Z]$/ ? titlecase[c] : c }
|
||||
|
||||
read_data_file('UnicodeData') do |code, data|
|
||||
@@codepoints << code
|
||||
upcase[code] = hex2utf8 data[12] unless data[12].empty?
|
||||
downcase[code] = hex2utf8 data[13] unless data[13].empty?
|
||||
titlecase[code] = hex2utf8 data[14] unless data[14].empty?
|
||||
end
|
||||
read_data_file('CaseFolding') do |code, data|
|
||||
casefold[code] = hex2utf8(data[2]) if data[1] =~ /^[CF]$/
|
||||
end
|
||||
|
||||
read_data_file('SpecialCasing') do |code, data|
|
||||
case data[4]
|
||||
when ''
|
||||
upcase[code] = hex2utf8 data[3]
|
||||
downcase[code] = hex2utf8 data[1]
|
||||
titlecase[code] = hex2utf8 data[2]
|
||||
when /^tr\s*/
|
||||
if data[4]!='tr After_I'
|
||||
turkic_upcase[code] = hex2utf8 data[3]
|
||||
turkic_downcase[code] = hex2utf8 data[1]
|
||||
turkic_titlecase[code] = hex2utf8 data[2]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
tests = [
|
||||
CaseTest.new(:downcase, [:lithuanian], downcase),
|
||||
CaseTest.new(:upcase, [:lithuanian], upcase),
|
||||
CaseTest.new(:capitalize, [:lithuanian], titlecase, downcase),
|
||||
# swapcase?????!!!!!
|
||||
CaseTest.new(:downcase, [:fold], casefold),
|
||||
CaseTest.new(:upcase, [:turkic], turkic_upcase),
|
||||
CaseTest.new(:downcase, [:turkic], turkic_downcase),
|
||||
CaseTest.new(:capitalize, [:turkic], turkic_titlecase, turkic_downcase),
|
||||
CaseTest.new(:upcase, [:ascii], ascii_upcase),
|
||||
CaseTest.new(:downcase, [:ascii], ascii_downcase),
|
||||
CaseTest.new(:capitalize, [:ascii], ascii_titlecase, ascii_downcase),
|
||||
]
|
||||
end
|
||||
|
||||
def self.all_tests
|
||||
@@tests ||= read_data
|
||||
end
|
||||
|
||||
def self.generate_casefold_tests (encoding)
|
||||
all_tests.each do |test|
|
||||
attributes = test.attributes.map(&:to_s).join '-'
|
||||
attributes.prepend '_' unless attributes.empty?
|
||||
define_method "test_#{encoding}_#{test.method_name}#{attributes}" do
|
||||
@@codepoints.each do |code|
|
||||
begin
|
||||
source = code.encode(encoding) * 5
|
||||
target = test.first_data[code].encode(encoding) + test.follow_data[code].encode(encoding) * 4
|
||||
result = source.send(test.method_name, *test.attributes)
|
||||
assert_equal target, result,
|
||||
"from #{source} (#{source.dump}) expected #{target.dump} but was #{result.dump}"
|
||||
rescue Encoding::UndefinedConversionError
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
generate_casefold_tests 'US-ASCII'
|
||||
generate_casefold_tests 'ASCII-8BIT'
|
||||
generate_casefold_tests 'UTF-8'
|
||||
end
|
Loading…
Add table
Reference in a new issue