1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

fix for emoji-data.txt

* common.mk: download emoji-data.txt.  As emoji data files are
  located in a separate directory in Unicode.org site, reearranged
  Unicode data files directories same as the site.

* tool/enc-unicode.rb (get_file): search emoji data files in the
  second argument path.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60977 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2017-12-02 03:12:51 +00:00
parent 8b180dd74e
commit 01830719f6
5 changed files with 677 additions and 392 deletions

View file

@ -16,12 +16,14 @@ gnumake_recursive =
enable_shared = $(ENABLE_SHARED:no=) enable_shared = $(ENABLE_SHARED:no=)
UNICODE_VERSION = 10.0.0 UNICODE_VERSION = 10.0.0
UNICODE_EMOJI_VERSION = 5.0
### set the following environment variable or uncomment the line if ### set the following environment variable or uncomment the line if
### the Unicode data files should be updated completely on every update ('make up',...). ### the Unicode data files should be updated completely on every update ('make up',...).
# ALWAYS_UPDATE_UNICODE = yes # ALWAYS_UPDATE_UNICODE = yes
UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION) UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION)/ucd
UNICODE_SRC_DATA_DIR = $(srcdir)/$(UNICODE_DATA_DIR) UNICODE_SRC_DATA_DIR = $(srcdir)/$(UNICODE_DATA_DIR)
UNICODE_SRC_EMOJI_DATA_DIR = $(srcdir)/enc/unicode/data/emoji/$(UNICODE_EMOJI_VERSION)
UNICODE_HDR_DIR = $(srcdir)/enc/unicode/$(UNICODE_VERSION) UNICODE_HDR_DIR = $(srcdir)/enc/unicode/$(UNICODE_VERSION)
UNICODE_DATA_HEADERS = \ UNICODE_DATA_HEADERS = \
$(UNICODE_HDR_DIR)/casefold.h \ $(UNICODE_HDR_DIR)/casefold.h \
@ -1207,21 +1209,34 @@ UNICODE_PROPERTY_FILES = \
$(UNICODE_SRC_DATA_DIR)/auxiliary/GraphemeBreakProperty.txt \ $(UNICODE_SRC_DATA_DIR)/auxiliary/GraphemeBreakProperty.txt \
$(empty) $(empty)
UNICODE_EMOJI_FILES = \
$(UNICODE_SRC_EMOJI_DATA_DIR)/emoji-data.txt \
$(empty)
update-unicode: $(UNICODE_FILES) update-unicode: $(UNICODE_FILES)
CACHE_DIR = $(srcdir)/.downloaded-cache CACHE_DIR = $(srcdir)/.downloaded-cache
UNICODE_DOWNLOAD = \ UNICODE_DOWNLOAD = \
$(BASERUBY) $(srcdir)/tool/downloader.rb \ $(BASERUBY) $(srcdir)/tool/downloader.rb \
--cache-dir=$(CACHE_DIR) \ --cache-dir=$(CACHE_DIR) \
-d $(srcdir)/$(UNICODE_DATA_DIR) \ -d $(UNICODE_SRC_DATA_DIR) \
-p $(UNICODE_VERSION)/ucd \ -p $(UNICODE_VERSION)/ucd \
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode -e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
UNICODE_EMOJI_DOWNLOAD = \
$(BASERUBY) $(srcdir)/tool/downloader.rb \
--cache-dir=$(CACHE_DIR) \
-d $(UNICODE_SRC_EMOJI_DATA_DIR) \
-p emoji/$(UNICODE_EMOJI_VERSION) \
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
$(UNICODE_PROPERTY_FILES): update-unicode-property-files $(UNICODE_PROPERTY_FILES): update-unicode-property-files
update-unicode-property-files: update-unicode-property-files:
$(ECHO) Downloading Unicode $(UNICODE_VERSION) property files... $(ECHO) Downloading Unicode $(UNICODE_VERSION) property files...
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_DATA_DIR)/auxiliary" $(Q) $(MAKEDIRS) "$(UNICODE_SRC_DATA_DIR)/auxiliary"
$(Q) $(UNICODE_DOWNLOAD) $(UNICODE_PROPERTY_FILES) $(Q) $(UNICODE_DOWNLOAD) $(UNICODE_PROPERTY_FILES)
$(ECHO) Downloading Unicode emoji $(UNICODE_VERSION) files...
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_EMOJI_DATA_DIR)"
$(Q) $(UNICODE_EMOJI_DOWNLOAD) $(UNICODE_EMOJI_FILES)
$(UNICODE_FILES): update-unicode-files $(UNICODE_FILES): update-unicode-files
update-unicode-files: update-unicode-files:
@ -1259,7 +1274,9 @@ $(UNICODE_HDR_DIR)/$(ALWAYS_UPDATE_UNICODE:yes=name2ctype.h): \
$(UNICODE_HDR_DIR)/name2ctype.h: $(UNICODE_HDR_DIR)/name2ctype.h:
$(MAKEDIRS) $(@D) $(MAKEDIRS) $(@D)
$(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header $(UNICODE_SRC_DATA_DIR) > $@ $(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header \
$(UNICODE_SRC_DATA_DIR) $(UNICODE_SRC_EMOJI_DATA_DIR) > $@.new
$(MV) $@.new $@
# the next non-comment line was: # the next non-comment line was:
# $(UNICODE_HDR_DIR)/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \ # $(UNICODE_HDR_DIR)/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \

View file

@ -1419,7 +1419,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0130, {2|F|D, {0x0069, 0x0307}}}, {0x0130, {2|F|D, {0x0069, 0x0307}}},
}; };
/* C code produced by gperf version 3.0.4 */ /* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseFold_11_hash -N onigenc_unicode_CaseFold_11_lookup -n */ /* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseFold_11_hash -N onigenc_unicode_CaseFold_11_lookup -n */
/* maximum key range = 3623, duplicates = 0 */ /* maximum key range = 3623, duplicates = 0 */
@ -1462,12 +1462,6 @@ onigenc_unicode_CaseFold_11_hash(const OnigCodePoint code)
return asso_values[bits_of(code, 2)+81] + asso_values[bits_of(code, 1)+2] + asso_values[bits_of(code, 0)]; return asso_values[bits_of(code, 2)+81] + asso_values[bits_of(code, 1)+2] + asso_values[bits_of(code, 0)];
} }
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList3 * static const CodePointList3 *
onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code) onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
{ {
@ -3583,9 +3577,9 @@ onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE) if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
{ {
register int key = onigenc_unicode_CaseFold_11_hash(code); register unsigned int key = onigenc_unicode_CaseFold_11_hash(code);
if (key <= MAX_HASH_VALUE && key >= 0) if (key <= MAX_HASH_VALUE)
{ {
register short s = wordlist[key]; register short s = wordlist[key];
@ -4868,7 +4862,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x0069, {1|U, {0x0049}}}, {0x0069, {1|U, {0x0049}}},
}; };
/* C code produced by gperf version 3.0.4 */ /* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_11_hash -N onigenc_unicode_CaseUnfold_11_lookup -n */ /* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_11_hash -N onigenc_unicode_CaseUnfold_11_lookup -n */
/* maximum key range = 2216, duplicates = 0 */ /* maximum key range = 2216, duplicates = 0 */
@ -4910,12 +4904,6 @@ onigenc_unicode_CaseUnfold_11_hash(const OnigCodePoint code)
return asso_values[bits_of(code, 2)+66] + asso_values[bits_of(code, 1)+4] + asso_values[bits_of(code, 0)]; return asso_values[bits_of(code, 2)+66] + asso_values[bits_of(code, 1)+4] + asso_values[bits_of(code, 0)];
} }
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList3 * static const CodePointList3 *
onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code) onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
{ {
@ -6602,9 +6590,9 @@ onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE) if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
{ {
register int key = onigenc_unicode_CaseUnfold_11_hash(code); register unsigned int key = onigenc_unicode_CaseUnfold_11_hash(code);
if (key <= MAX_HASH_VALUE && key >= 0) if (key <= MAX_HASH_VALUE)
{ {
register short s = wordlist[key]; register short s = wordlist[key];
@ -6679,7 +6667,7 @@ static const CaseUnfold_12_Type CaseUnfold_12_Table[] = {
{{0x0069, 0x0307}, {1, {0x0130}}}, {{0x0069, 0x0307}, {1, {0x0130}}},
}; };
/* C code produced by gperf version 3.0.4 */ /* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3,4,5,6 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_12_hash -N onigenc_unicode_CaseUnfold_12_lookup -n */ /* Command-line: gperf -7 -k1,2,3,4,5,6 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_12_hash -N onigenc_unicode_CaseUnfold_12_lookup -n */
/* maximum key range = 71, duplicates = 0 */ /* maximum key range = 71, duplicates = 0 */
@ -6714,12 +6702,6 @@ onigenc_unicode_CaseUnfold_12_hash(const OnigCodePoint *codes)
return asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)]; return asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
} }
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList2 * static const CodePointList2 *
onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes) onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
{ {
@ -6804,9 +6786,9 @@ onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE && if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE &&
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE) codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE)
{ {
register int key = onigenc_unicode_CaseUnfold_12_hash(codes); register unsigned int key = onigenc_unicode_CaseUnfold_12_hash(codes);
if (key <= MAX_HASH_VALUE && key >= 0) if (key <= MAX_HASH_VALUE)
{ {
register short s = wordlist[key]; register short s = wordlist[key];
@ -6835,7 +6817,7 @@ static const CaseUnfold_13_Type CaseUnfold_13_Table[] = {
{{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}}, {{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}},
}; };
/* C code produced by gperf version 3.0.4 */ /* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup -n */ /* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup -n */
/* maximum key range = 20, duplicates = 0 */ /* maximum key range = 20, duplicates = 0 */
@ -6870,12 +6852,6 @@ onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes)
return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)]; return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
} }
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList2 * static const CodePointList2 *
onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes) onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
{ {
@ -6918,9 +6894,9 @@ onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE && codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE &&
codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE) codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE)
{ {
register int key = onigenc_unicode_CaseUnfold_13_hash(codes); register unsigned int key = onigenc_unicode_CaseUnfold_13_hash(codes);
if (key <= MAX_HASH_VALUE && key >= 0) if (key <= MAX_HASH_VALUE)
{ {
register short s = wordlist[key]; register short s = wordlist[key];

File diff suppressed because it is too large Load diff

View file

@ -7,7 +7,7 @@
# Constants for input and ouput directory # Constants for input and ouput directory
InputDataDir = ARGV[0] || 'enc/unicode/data' InputDataDir = ARGV[0] || 'enc/unicode/data'
unicode_version = InputDataDir[/[\d.]+\z/] unicode_version = InputDataDir[/.*\/(\d+\.\d+\.\d+)(?=\/|\z)/, 1]
# convenience methods # convenience methods
class Integer class Integer

View file

@ -14,8 +14,8 @@ if ARGV[0] == "--header"
header = true header = true
ARGV.shift ARGV.shift
end end
unless ARGV.size == 1 unless ARGV.size == 2
abort "Usage: #{$0} data_directory" abort "Usage: #{$0} data_directory emoji_data_directory"
end end
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/] $unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
@ -302,7 +302,7 @@ def constantize_blockname(name)
end end
def get_file(name) def get_file(name)
File.join(ARGV[0], name) File.join(ARGV[name.start_with?("emoji-") ? 1 : 0], name)
end end
def data_foreach(name, &block) def data_foreach(name, &block)