diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index fddac3d429..ac389c2b5f 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -1,3 +1,11 @@ +* Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`, + `ActiveSupport::Multibyte::Unicode.unpack_graphemes`, + `ActiveSupport::Multibyte::Unicode.normalize`, + `ActiveSupport::Multibyte::Unicode.downcase`, + `ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`. + + *Rafael Mendonça França* + * Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`. *Rafael Mendonça França* diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index e992bf29ca..072824339f 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -10,13 +10,6 @@ module ActiveSupport # information about normalization. NORMALIZATION_FORMS = [:c, :kc, :d, :kd] - NORMALIZATION_FORM_ALIASES = { # :nodoc: - c: :nfc, - d: :nfd, - kc: :nfkc, - kd: :nfkd - } - # The Unicode version that is supported by the implementation UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"] @@ -25,34 +18,7 @@ module ActiveSupport # in NORMALIZATION_FORMS. # # ActiveSupport::Multibyte::Unicode.default_normalization_form = :c - attr_accessor :default_normalization_form - @default_normalization_form = :kc - - # Unpack the string at grapheme boundaries. Returns a list of character - # lists. - # - # Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]] - # Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]] - def unpack_graphemes(string) - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be - removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead. - MSG - - string.scan(/\X/).map(&:codepoints) - end - - # Reverse operation of unpack_graphemes. - # - # Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि' - def pack_graphemes(unpacked) - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be - removed from Rails 6.1. Use array.flatten.pack("U*") instead. - MSG - - unpacked.flatten.pack("U*") - end + attr_accessor :default_normalization_form # TODO: Deprecate # Decompose composed characters to the decomposed form. def decompose(type, codepoints) @@ -107,46 +73,6 @@ module ActiveSupport end end - # Returns the KC normalization of the string by default. NFKC is - # considered the best normalization form for passing strings to databases - # and validations. - # - # * string - The string to perform normalization on. - # * form - The form you want to normalize in. Should be one of - # the following: :c, :kc, :d, or :kd. - # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form. - def normalize(string, form = nil) - form ||= @default_normalization_form - - # See https://www.unicode.org/reports/tr15, Table 1 - if alias_form = NORMALIZATION_FORM_ALIASES[form] - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be - removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead. - MSG - - string.unicode_normalize(alias_form) - else - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be - removed from Rails 6.1. Use String#unicode_normalize instead. - MSG - - raise ArgumentError, "#{form} is not a valid normalization variant", caller - end - end - - %w(downcase upcase swapcase).each do |method| - define_method(method) do |string| - ActiveSupport::Deprecation.warn(<<-MSG.squish) - ActiveSupport::Multibyte::Unicode##{method} is deprecated and - will be removed from Rails 6.1. Use String methods directly. - MSG - - string.public_send(method) - end - end - private def recode_windows1252_chars(string) string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace) diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb index 35560ede23..2e7dcbbf6f 100644 --- a/activesupport/test/multibyte_chars_test.rb +++ b/activesupport/test/multibyte_chars_test.rb @@ -678,36 +678,6 @@ class MultibyteCharsExtrasTest < ActiveSupport::TestCase assert_equal BYTE_STRING.dup.mb_chars.class, ActiveSupport::Multibyte::Chars end - def test_unicode_normalize_deprecation - # String#unicode_normalize default form is `:nfc`, and - # different than Multibyte::Unicode default, `:nkfc`. - # Deprecation should suggest the right form if no params - # are given and default is used. - assert_deprecated(/unicode_normalize\(:nfkc\)/) do - ActiveSupport::Multibyte::Unicode.normalize("") - end - - assert_deprecated(/unicode_normalize\(:nfd\)/) do - ActiveSupport::Multibyte::Unicode.normalize("", :d) - end - end - - def test_unicode_deprecations - assert_deprecated { ActiveSupport::Multibyte::Unicode.downcase("") } - assert_deprecated { ActiveSupport::Multibyte::Unicode.upcase("") } - assert_deprecated { ActiveSupport::Multibyte::Unicode.swapcase("") } - end - - def test_normalize_non_unicode_string - # Fullwidth Latin Capital Letter A in Windows 31J - str = "\u{ff21}".encode(Encoding::Windows_31J) - assert_raise Encoding::CompatibilityError do - ActiveSupport::Deprecation.silence do - ActiveSupport::Multibyte::Unicode.normalize(str) - end - end - end - private def string_from_classes(classes) # Characters from the character classes as described in UAX #29 diff --git a/activesupport/test/multibyte_grapheme_break_conformance_test.rb b/activesupport/test/multibyte_grapheme_break_conformance_test.rb deleted file mode 100644 index 30b1d7c42c..0000000000 --- a/activesupport/test/multibyte_grapheme_break_conformance_test.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -require_relative "abstract_unit" -require_relative "multibyte_test_helpers" - -class MultibyteGraphemeBreakConformanceTest < ActiveSupport::TestCase - include MultibyteTestHelpers - - UNIDATA_FILE = "/auxiliary/GraphemeBreakTest.txt" - RUN_P = begin - Downloader.download(UNIDATA_URL + UNIDATA_FILE, CACHE_DIR + UNIDATA_FILE) - rescue - end - - def setup - skip "Unable to download test data" unless RUN_P - end - - def test_breaks - ActiveSupport::Deprecation.silence do - each_line_of_break_tests do |*cols| - *clusters, comment = *cols - packed = ActiveSupport::Multibyte::Unicode.pack_graphemes(clusters) - assert_equal clusters, ActiveSupport::Multibyte::Unicode.unpack_graphemes(packed), comment - end - end - end - - private - def each_line_of_break_tests(&block) - lines = 0 - max_test_lines = 0 # Don't limit below 21, because that's the header of the testfile - File.open(File.join(CACHE_DIR, UNIDATA_FILE), "r") do | f | - until f.eof? || (max_test_lines > 21 && lines > max_test_lines) - lines += 1 - line = f.gets.chomp! - next if line.empty? || line.start_with?("#") - - cols, comment = line.split("#") - # Cluster breaks are represented by ÷ - clusters = cols.split("÷").map { |e| e.strip }.reject { |e| e.empty? } - clusters = clusters.map do |cluster| - # Codepoints within each cluster are separated by × - codepoints = cluster.split("×").map { |e| e.strip }.reject { |e| e.empty? } - # codepoints are in hex in the test suite, pack wants them as integers - codepoints.map { |codepoint| codepoint.to_i(16) } - end - - # The tests contain a solitary U+D800 character, which Ruby does not allow to stand - # alone in a UTF-8 string. So we'll just skip it. - next if clusters.flatten.include?(0xd800) - - clusters << comment.strip - - yield(*clusters) - end - end - end -end diff --git a/activesupport/test/multibyte_test_helpers.rb b/activesupport/test/multibyte_test_helpers.rb index af7038c55b..28ed286021 100644 --- a/activesupport/test/multibyte_test_helpers.rb +++ b/activesupport/test/multibyte_test_helpers.rb @@ -1,32 +1,6 @@ # frozen_string_literal: true -require "fileutils" -require "open-uri" -require "tmpdir" - module MultibyteTestHelpers - class Downloader - def self.download(from, to) - unless File.exist?(to) - unless File.exist?(File.dirname(to)) - system "mkdir -p #{File.dirname(to)}" - end - URI.open(from) do |source| - File.open(to, "w") do |target| - source.each_line do |l| - target.write l - end - end - end - end - true - end - end - - UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}/ucd" - CACHE_DIR = "#{Dir.tmpdir}/cache/unicode_conformance/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}" - FileUtils.mkdir_p(CACHE_DIR) - UNICODE_STRING = "こにちわ" ASCII_STRING = "ohayo" BYTE_STRING = (+"\270\236\010\210\245").force_encoding("ASCII-8BIT").freeze diff --git a/guides/source/6_1_release_notes.md b/guides/source/6_1_release_notes.md index 6f6e073f39..eb74f1bb2c 100644 --- a/guides/source/6_1_release_notes.md +++ b/guides/source/6_1_release_notes.md @@ -306,6 +306,12 @@ Please refer to the [Changelog][active-support] for detailed changes. * Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`. +* Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`, + `ActiveSupport::Multibyte::Unicode.unpack_graphemes`, + `ActiveSupport::Multibyte::Unicode.normalize`, + `ActiveSupport::Multibyte::Unicode.downcase`, + `ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`. + ### Deprecations ### Notable changes