1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

merge revision(s) 67439,67441,67453,67476: [Backport #15740]

change lib/unicode_normalize/tables.rb to single item per line to make diffs shorter

        * template/unicode_norm_gen.tmpl: Change formatting of output to produce only a
          single item (or range) for each line to make future diffs shorter and easier
          to understand and check.

        * lib/unicode_normalize/tables.rb: output of the above

        update to Unicode Version 12.1.0 (beta)

        Unicode Version 12.1.0 adds one single character, U+32FF SQUARE ERA NAME REIWA,
        for the new Japanese era starting on May 1st. 12.1.0 will be finalized only on
        May 7th, so we go with the beta version because further changes in the data we
        need are highly unlikely, and we want to make sure Ruby is ready for the new era.

        * common.mk: change UNICODE_VERSION to 12.1.0, UNICODE_BETA to YES

        * enc/unicode/12.1.0, enc/unicode/12.1.0/casefold.h, enc/unicode/12.1.0/name2ctype.h:
          add directory and generated data files for new version

        * lib/unicode_normalize/tables.rb: update for new character

        * test/ruby/test_regexp.rb: add test for character property age=12.1

        * test/test_unicode_normalize.rb: add test for NFKC decomposition of new character

        This (mostly) completes issue #15195.

        remove Unicode 12.0.0 related directory and generated files


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67525 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2019-04-13 15:01:39 +00:00
parent c5f1c9e222
commit 69ec39363e
9 changed files with 10111 additions and 1545 deletions

7
NEWS
View file

@ -272,8 +272,11 @@ sufficient information, see the ChangeLog file or Redmine
[Regexp/String]
* Update Unicode version and Emoji version from 11.0.0 to 12.0.0.
[Feature #15321]
* Update Unicode version to 12.1.0, adding support for
U+32FF SQUARE ERA NAME REIWA [Feature #15195]
* Update Unicode version and Emoji version from 11.0.0 to
12.0.0. [Feature #15321]
* Update Unicode version from 10.0.0 to 11.0.0. [Feature #14802]

View file

@ -15,9 +15,9 @@ mflags = $(MFLAGS)
gnumake_recursive =
enable_shared = $(ENABLE_SHARED:no=)
UNICODE_VERSION = 12.0.0
UNICODE_VERSION = 12.1.0
UNICODE_EMOJI_VERSION = 12.0
UNICODE_BETA = NO
UNICODE_BETA = YES
### set the following environment variable or uncomment the line if
### the Unicode data files should be updated completely on every update ('make up',...).

View file

@ -3,14 +3,14 @@
#if defined ONIG_UNICODE_VERSION_STRING && !( \
ONIG_UNICODE_VERSION_MAJOR == 12 && \
ONIG_UNICODE_VERSION_MINOR == 0 && \
ONIG_UNICODE_VERSION_MINOR == 1 && \
ONIG_UNICODE_VERSION_TEENY == 0 && \
1)
# error ONIG_UNICODE_VERSION_STRING mismatch
#endif
#define ONIG_UNICODE_VERSION_STRING "12.0.0"
#define ONIG_UNICODE_VERSION_STRING "12.1.0"
#define ONIG_UNICODE_VERSION_MAJOR 12
#define ONIG_UNICODE_VERSION_MINOR 0
#define ONIG_UNICODE_VERSION_MINOR 1
#define ONIG_UNICODE_VERSION_TEENY 0
static const CaseFold_11_Type CaseFold_11_Table[] = {

File diff suppressed because it is too large Load diff

View file

@ -42,7 +42,7 @@ end
class Array
def to_UTF8() collect {|c| c.to_UTF8}.join('') end
def each_regexp_chars(n = 8) # converts an array of Integers to character ranges
def each_regexp_chars(n = 1) # converts an array of Integers to character ranges
sort.inject([]) do |ranges, value|
if ranges.last and ranges.last[1]+1>=value
ranges.last[1] = value
@ -193,28 +193,28 @@ module UnicodeNormalize # :nodoc:
"<%end%>]"
class_table = {
% combining_class.each_slice(8) do |slice|
<% slice.each do |key, value|%> "<%=key.to_UTF8%>"=><%=value%><%=%>,<% end%>
% combining_class.each do |key, value|
"<%=key.to_UTF8%>"=><%=value%><%=%>,
% end
}
class_table.default = 0
CLASS_TABLE = class_table.freeze
DECOMPOSITION_TABLE = {
% decomposition_table.each_slice(8) do |slice|
<% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
% decomposition_table.each do |key, value|
"<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
% end
}.freeze
KOMPATIBLE_TABLE = {
% kompatible_table.each_slice(8) do |slice|
<% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
% kompatible_table.each do |key, value|
"<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
% end
}.freeze
COMPOSITION_TABLE = {
% composition_table.each_slice(8) do |slice|
<% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,<% end%>
% composition_table.each do |key, value|
"<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>"<%=%>,
% end
}.freeze
end

View file

@ -1075,6 +1075,9 @@ class TestRegexp < Test::Unit::TestCase
assert_no_match(/^\p{age=3.0}$/u, "\u2754")
assert_no_match(/^\p{age=2.0}$/u, "\u2754")
assert_no_match(/^\p{age=1.1}$/u, "\u2754")
assert_no_match(/^\p{age=12.0}$/u, "\u32FF")
assert_match(/^\p{age=12.1}$/u, "\u32FF")
end
MatchData_A = eval("class MatchData_\u{3042} < MatchData; self; end")

View file

@ -187,6 +187,10 @@ class TestUnicodeNormalize
assert_raise(Encoding::CompatibilityError) { "abc".force_encoding('ISO-8859-1').unicode_normalized? }
end
def test_reiwa
assert_equal "\u4EE4\u548C", "\u32FF".unicode_normalize(:nfkc)
end
def test_us_ascii
ascii_string = 'abc'.encode('US-ASCII')

View file

@ -1,6 +1,6 @@
#define RUBY_VERSION "2.6.3"
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
#define RUBY_PATCHLEVEL 55
#define RUBY_PATCHLEVEL 56
#define RUBY_RELEASE_YEAR 2019
#define RUBY_RELEASE_MONTH 4