merge revision(s) 67439,67441,67453,67476: [Backport #15740]

change lib/unicode_normalize/tables.rb to single item per line to make diffs shorter * template/unicode_norm_gen.tmpl: Change formatting of output to produce only a single item (or range) for each line to make future diffs shorter and easier to understand and check. * lib/unicode_normalize/tables.rb: output of the above update to Unicode Version 12.1.0 (beta) Unicode Version 12.1.0 adds one single character, U+32FF SQUARE ERA NAME REIWA, for the new Japanese era starting on May 1st. 12.1.0 will be finalized only on May 7th, so we go with the beta version because further changes in the data we need are highly unlikely, and we want to make sure Ruby is ready for the new era. * common.mk: change UNICODE_VERSION to 12.1.0, UNICODE_BETA to YES * enc/unicode/12.1.0, enc/unicode/12.1.0/casefold.h, enc/unicode/12.1.0/name2ctype.h: add directory and generated data files for new version * lib/unicode_normalize/tables.rb: update for new character * test/ruby/test_regexp.rb: add test for character property age=12.1 * test/test_unicode_normalize.rb: add test for NFKC decomposition of new character This (mostly) completes issue #15195. remove Unicode 12.0.0 related directory and generated files git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67525 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2019-04-13 15:01:39 +00:00 · 2019-04-13 15:01:39 +00:00 · 69ec39363e
commit 69ec39363e
parent c5f1c9e222
9 changed files with 10111 additions and 1545 deletions
--- a/7
+++ b/7
@ -272,8 +272,11 @@ sufficient information, see the ChangeLog file or Redmine

 [Regexp/String]

-    * Update Unicode version and Emoji version from 11.0.0 to 12.0.0.
-      [Feature #15321]
+    * Update Unicode version to 12.1.0, adding support for
+      U+32FF SQUARE ERA NAME REIWA [Feature #15195]
+
+    * Update Unicode version and Emoji version from 11.0.0 to
+      12.0.0. [Feature #15321]

    * Update Unicode version from 10.0.0 to 11.0.0. [Feature #14802]

--- a/common.mk
+++ b/common.mk
@ -15,9 +15,9 @@ mflags = $(MFLAGS)
 gnumake_recursive =
 enable_shared = $(ENABLE_SHARED:no=)

-UNICODE_VERSION = 12.0.0
+UNICODE_VERSION = 12.1.0
 UNICODE_EMOJI_VERSION = 12.0
-UNICODE_BETA = NO
+UNICODE_BETA = YES

 ### set the following environment variable or uncomment the line if
 ### the Unicode data files should be updated completely on every update ('make up',...).
--- a/enc/unicode/12.1.0/casefold.h
+++ b/enc/unicode/12.1.0/casefold.h
@ -3,14 +3,14 @@

 #if defined ONIG_UNICODE_VERSION_STRING && !( \
      ONIG_UNICODE_VERSION_MAJOR == 12 && \
-      ONIG_UNICODE_VERSION_MINOR == 0 && \
+      ONIG_UNICODE_VERSION_MINOR == 1 && \
      ONIG_UNICODE_VERSION_TEENY == 0 && \
      1)
 # error ONIG_UNICODE_VERSION_STRING mismatch
 #endif
-#define ONIG_UNICODE_VERSION_STRING "12.0.0"
+#define ONIG_UNICODE_VERSION_STRING "12.1.0"
 #define ONIG_UNICODE_VERSION_MAJOR 12
-#define ONIG_UNICODE_VERSION_MINOR 0
+#define ONIG_UNICODE_VERSION_MINOR 1
 #define ONIG_UNICODE_VERSION_TEENY 0

 static const CaseFold_11_Type CaseFold_11_Table[] = {
--- a/enc/unicode/12.1.0/name2ctype.h
+++ b/enc/unicode/12.1.0/name2ctype.h
--- a/lib/unicode_normalize/tables.rb
+++ b/lib/unicode_normalize/tables.rb
--- a/template/unicode_norm_gen.tmpl
+++ b/template/unicode_norm_gen.tmpl
@ -42,7 +42,7 @@ end
 class Array
  def to_UTF8() collect {|c| c.to_UTF8}.join('') end

-  def each_regexp_chars(n = 8) # converts an array of Integers to character ranges
+  def each_regexp_chars(n = 1) # converts an array of Integers to character ranges
    sort.inject([]) do |ranges, value|
      if ranges.last and ranges.last[1]+1>=value
        ranges.last[1] = value
@ -193,28 +193,28 @@ module UnicodeNormalize  # :nodoc:
    "<%end%>]"

  class_table = {
-% combining_class.each_slice(8) do |slice|
-   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=><%=value%><%=%>,<% end%>
+% combining_class.each do |key, value|
+    "<%=key.to_UTF8%>"=><%=value%><%=%>,
 % end
  }
  class_table.default = 0
  CLASS_TABLE = class_table.freeze

  DECOMPOSITION_TABLE = {
-% decomposition_table.each_slice(8) do |slice|
-   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
+% decomposition_table.each do |key, value|
+    "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
 % end
  }.freeze

  KOMPATIBLE_TABLE = {
-% kompatible_table.each_slice(8) do |slice|
-   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
+% kompatible_table.each do |key, value|
+    "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
 % end
  }.freeze

  COMPOSITION_TABLE = {
-% composition_table.each_slice(8) do |slice|
-   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
+% composition_table.each do |key, value|
+    "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
 % end
  }.freeze
 end
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@ -1075,6 +1075,9 @@ class TestRegexp < Test::Unit::TestCase
    assert_no_match(/^\p{age=3.0}$/u, "\u2754")
    assert_no_match(/^\p{age=2.0}$/u, "\u2754")
    assert_no_match(/^\p{age=1.1}$/u, "\u2754")
+
+    assert_no_match(/^\p{age=12.0}$/u, "\u32FF")
+    assert_match(/^\p{age=12.1}$/u, "\u32FF")
  end

  MatchData_A = eval("class MatchData_\u{3042} < MatchData; self; end")
--- a/test/test_unicode_normalize.rb
+++ b/test/test_unicode_normalize.rb
@ -187,6 +187,10 @@ class TestUnicodeNormalize
    assert_raise(Encoding::CompatibilityError) { "abc".force_encoding('ISO-8859-1').unicode_normalized? }
  end

+  def test_reiwa
+    assert_equal "\u4EE4\u548C", "\u32FF".unicode_normalize(:nfkc)
+  end
+
  def test_us_ascii
    ascii_string = 'abc'.encode('US-ASCII')

--- a/version.h
+++ b/version.h
@ -1,6 +1,6 @@
 #define RUBY_VERSION "2.6.3"
 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 55
+#define RUBY_PATCHLEVEL 56

 #define RUBY_RELEASE_YEAR 2019
 #define RUBY_RELEASE_MONTH 4