Remove deprecated methods in ActiveSupport::Multibyte::Unicode

This commit is contained in:
Rafael Mendonça França 2020-10-29 15:43:33 +00:00
parent 8f14d5ad4b
commit 2c6f5c0b8a
No known key found for this signature in database
GPG Key ID: FC23B6D0F1EEE948
6 changed files with 15 additions and 191 deletions

View File

@ -1,3 +1,11 @@
* Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`,
`ActiveSupport::Multibyte::Unicode.unpack_graphemes`,
`ActiveSupport::Multibyte::Unicode.normalize`,
`ActiveSupport::Multibyte::Unicode.downcase`,
`ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`.
*Rafael Mendonça França*
* Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`.
*Rafael Mendonça França*

View File

@ -10,13 +10,6 @@ module ActiveSupport
# information about normalization.
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
NORMALIZATION_FORM_ALIASES = { # :nodoc:
c: :nfc,
d: :nfd,
kc: :nfkc,
kd: :nfkd
}
# The Unicode version that is supported by the implementation
UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]
@ -25,34 +18,7 @@ module ActiveSupport
# in NORMALIZATION_FORMS.
#
# ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
attr_accessor :default_normalization_form
@default_normalization_form = :kc
# Unpack the string at grapheme boundaries. Returns a list of character
# lists.
#
# Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
# Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
def unpack_graphemes(string)
ActiveSupport::Deprecation.warn(<<-MSG.squish)
ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be
removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead.
MSG
string.scan(/\X/).map(&:codepoints)
end
# Reverse operation of unpack_graphemes.
#
# Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
def pack_graphemes(unpacked)
ActiveSupport::Deprecation.warn(<<-MSG.squish)
ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be
removed from Rails 6.1. Use array.flatten.pack("U*") instead.
MSG
unpacked.flatten.pack("U*")
end
attr_accessor :default_normalization_form # TODO: Deprecate
# Decompose composed characters to the decomposed form.
def decompose(type, codepoints)
@ -107,46 +73,6 @@ module ActiveSupport
end
end
# Returns the KC normalization of the string by default. NFKC is
# considered the best normalization form for passing strings to databases
# and validations.
#
# * <tt>string</tt> - The string to perform normalization on.
# * <tt>form</tt> - The form you want to normalize in. Should be one of
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
# Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
def normalize(string, form = nil)
form ||= @default_normalization_form
# See https://www.unicode.org/reports/tr15, Table 1
if alias_form = NORMALIZATION_FORM_ALIASES[form]
ActiveSupport::Deprecation.warn(<<-MSG.squish)
ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead.
MSG
string.unicode_normalize(alias_form)
else
ActiveSupport::Deprecation.warn(<<-MSG.squish)
ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
removed from Rails 6.1. Use String#unicode_normalize instead.
MSG
raise ArgumentError, "#{form} is not a valid normalization variant", caller
end
end
%w(downcase upcase swapcase).each do |method|
define_method(method) do |string|
ActiveSupport::Deprecation.warn(<<-MSG.squish)
ActiveSupport::Multibyte::Unicode##{method} is deprecated and
will be removed from Rails 6.1. Use String methods directly.
MSG
string.public_send(method)
end
end
private
def recode_windows1252_chars(string)
string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)

View File

@ -678,36 +678,6 @@ class MultibyteCharsExtrasTest < ActiveSupport::TestCase
assert_equal BYTE_STRING.dup.mb_chars.class, ActiveSupport::Multibyte::Chars
end
def test_unicode_normalize_deprecation
# String#unicode_normalize default form is `:nfc`, and
# different than Multibyte::Unicode default, `:nkfc`.
# Deprecation should suggest the right form if no params
# are given and default is used.
assert_deprecated(/unicode_normalize\(:nfkc\)/) do
ActiveSupport::Multibyte::Unicode.normalize("")
end
assert_deprecated(/unicode_normalize\(:nfd\)/) do
ActiveSupport::Multibyte::Unicode.normalize("", :d)
end
end
def test_unicode_deprecations
assert_deprecated { ActiveSupport::Multibyte::Unicode.downcase("") }
assert_deprecated { ActiveSupport::Multibyte::Unicode.upcase("") }
assert_deprecated { ActiveSupport::Multibyte::Unicode.swapcase("") }
end
def test_normalize_non_unicode_string
# Fullwidth Latin Capital Letter A in Windows 31J
str = "\u{ff21}".encode(Encoding::Windows_31J)
assert_raise Encoding::CompatibilityError do
ActiveSupport::Deprecation.silence do
ActiveSupport::Multibyte::Unicode.normalize(str)
end
end
end
private
def string_from_classes(classes)
# Characters from the character classes as described in UAX #29

View File

@ -1,60 +0,0 @@
# frozen_string_literal: true
require_relative "abstract_unit"
require_relative "multibyte_test_helpers"
class MultibyteGraphemeBreakConformanceTest < ActiveSupport::TestCase
include MultibyteTestHelpers
UNIDATA_FILE = "/auxiliary/GraphemeBreakTest.txt"
RUN_P = begin
Downloader.download(UNIDATA_URL + UNIDATA_FILE, CACHE_DIR + UNIDATA_FILE)
rescue
end
def setup
skip "Unable to download test data" unless RUN_P
end
def test_breaks
ActiveSupport::Deprecation.silence do
each_line_of_break_tests do |*cols|
*clusters, comment = *cols
packed = ActiveSupport::Multibyte::Unicode.pack_graphemes(clusters)
assert_equal clusters, ActiveSupport::Multibyte::Unicode.unpack_graphemes(packed), comment
end
end
end
private
def each_line_of_break_tests(&block)
lines = 0
max_test_lines = 0 # Don't limit below 21, because that's the header of the testfile
File.open(File.join(CACHE_DIR, UNIDATA_FILE), "r") do | f |
until f.eof? || (max_test_lines > 21 && lines > max_test_lines)
lines += 1
line = f.gets.chomp!
next if line.empty? || line.start_with?("#")
cols, comment = line.split("#")
# Cluster breaks are represented by ÷
clusters = cols.split("÷").map { |e| e.strip }.reject { |e| e.empty? }
clusters = clusters.map do |cluster|
# Codepoints within each cluster are separated by ×
codepoints = cluster.split("×").map { |e| e.strip }.reject { |e| e.empty? }
# codepoints are in hex in the test suite, pack wants them as integers
codepoints.map { |codepoint| codepoint.to_i(16) }
end
# The tests contain a solitary U+D800 <Non Private Use High
# Surrogate, First> character, which Ruby does not allow to stand
# alone in a UTF-8 string. So we'll just skip it.
next if clusters.flatten.include?(0xd800)
clusters << comment.strip
yield(*clusters)
end
end
end
end

View File

@ -1,32 +1,6 @@
# frozen_string_literal: true
require "fileutils"
require "open-uri"
require "tmpdir"
module MultibyteTestHelpers
class Downloader
def self.download(from, to)
unless File.exist?(to)
unless File.exist?(File.dirname(to))
system "mkdir -p #{File.dirname(to)}"
end
URI.open(from) do |source|
File.open(to, "w") do |target|
source.each_line do |l|
target.write l
end
end
end
end
true
end
end
UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}/ucd"
CACHE_DIR = "#{Dir.tmpdir}/cache/unicode_conformance/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}"
FileUtils.mkdir_p(CACHE_DIR)
UNICODE_STRING = "こにちわ"
ASCII_STRING = "ohayo"
BYTE_STRING = (+"\270\236\010\210\245").force_encoding("ASCII-8BIT").freeze

View File

@ -306,6 +306,12 @@ Please refer to the [Changelog][active-support] for detailed changes.
* Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`.
* Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`,
`ActiveSupport::Multibyte::Unicode.unpack_graphemes`,
`ActiveSupport::Multibyte::Unicode.normalize`,
`ActiveSupport::Multibyte::Unicode.downcase`,
`ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`.
### Deprecations
### Notable changes