mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
fix for emoji-data.txt
* common.mk: download emoji-data.txt. As emoji data files are located in a separate directory in Unicode.org site, reearranged Unicode data files directories same as the site. * tool/enc-unicode.rb (get_file): search emoji data files in the second argument path. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60977 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
8b180dd74e
commit
01830719f6
5 changed files with 677 additions and 392 deletions
23
common.mk
23
common.mk
|
@ -16,12 +16,14 @@ gnumake_recursive =
|
|||
enable_shared = $(ENABLE_SHARED:no=)
|
||||
|
||||
UNICODE_VERSION = 10.0.0
|
||||
UNICODE_EMOJI_VERSION = 5.0
|
||||
|
||||
### set the following environment variable or uncomment the line if
|
||||
### the Unicode data files should be updated completely on every update ('make up',...).
|
||||
# ALWAYS_UPDATE_UNICODE = yes
|
||||
UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION)
|
||||
UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION)/ucd
|
||||
UNICODE_SRC_DATA_DIR = $(srcdir)/$(UNICODE_DATA_DIR)
|
||||
UNICODE_SRC_EMOJI_DATA_DIR = $(srcdir)/enc/unicode/data/emoji/$(UNICODE_EMOJI_VERSION)
|
||||
UNICODE_HDR_DIR = $(srcdir)/enc/unicode/$(UNICODE_VERSION)
|
||||
UNICODE_DATA_HEADERS = \
|
||||
$(UNICODE_HDR_DIR)/casefold.h \
|
||||
|
@ -1207,21 +1209,34 @@ UNICODE_PROPERTY_FILES = \
|
|||
$(UNICODE_SRC_DATA_DIR)/auxiliary/GraphemeBreakProperty.txt \
|
||||
$(empty)
|
||||
|
||||
UNICODE_EMOJI_FILES = \
|
||||
$(UNICODE_SRC_EMOJI_DATA_DIR)/emoji-data.txt \
|
||||
$(empty)
|
||||
|
||||
update-unicode: $(UNICODE_FILES)
|
||||
|
||||
CACHE_DIR = $(srcdir)/.downloaded-cache
|
||||
UNICODE_DOWNLOAD = \
|
||||
$(BASERUBY) $(srcdir)/tool/downloader.rb \
|
||||
--cache-dir=$(CACHE_DIR) \
|
||||
-d $(srcdir)/$(UNICODE_DATA_DIR) \
|
||||
-d $(UNICODE_SRC_DATA_DIR) \
|
||||
-p $(UNICODE_VERSION)/ucd \
|
||||
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
|
||||
UNICODE_EMOJI_DOWNLOAD = \
|
||||
$(BASERUBY) $(srcdir)/tool/downloader.rb \
|
||||
--cache-dir=$(CACHE_DIR) \
|
||||
-d $(UNICODE_SRC_EMOJI_DATA_DIR) \
|
||||
-p emoji/$(UNICODE_EMOJI_VERSION) \
|
||||
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
|
||||
|
||||
$(UNICODE_PROPERTY_FILES): update-unicode-property-files
|
||||
update-unicode-property-files:
|
||||
$(ECHO) Downloading Unicode $(UNICODE_VERSION) property files...
|
||||
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_DATA_DIR)/auxiliary"
|
||||
$(Q) $(UNICODE_DOWNLOAD) $(UNICODE_PROPERTY_FILES)
|
||||
$(ECHO) Downloading Unicode emoji $(UNICODE_VERSION) files...
|
||||
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_EMOJI_DATA_DIR)"
|
||||
$(Q) $(UNICODE_EMOJI_DOWNLOAD) $(UNICODE_EMOJI_FILES)
|
||||
|
||||
$(UNICODE_FILES): update-unicode-files
|
||||
update-unicode-files:
|
||||
|
@ -1259,7 +1274,9 @@ $(UNICODE_HDR_DIR)/$(ALWAYS_UPDATE_UNICODE:yes=name2ctype.h): \
|
|||
|
||||
$(UNICODE_HDR_DIR)/name2ctype.h:
|
||||
$(MAKEDIRS) $(@D)
|
||||
$(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header $(UNICODE_SRC_DATA_DIR) > $@
|
||||
$(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header \
|
||||
$(UNICODE_SRC_DATA_DIR) $(UNICODE_SRC_EMOJI_DATA_DIR) > $@.new
|
||||
$(MV) $@.new $@
|
||||
|
||||
# the next non-comment line was:
|
||||
# $(UNICODE_HDR_DIR)/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \
|
||||
|
|
|
@ -1419,7 +1419,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x0130, {2|F|D, {0x0069, 0x0307}}},
|
||||
};
|
||||
|
||||
/* C code produced by gperf version 3.0.4 */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseFold_11_hash -N onigenc_unicode_CaseFold_11_lookup -n */
|
||||
|
||||
/* maximum key range = 3623, duplicates = 0 */
|
||||
|
@ -1462,12 +1462,6 @@ onigenc_unicode_CaseFold_11_hash(const OnigCodePoint code)
|
|||
return asso_values[bits_of(code, 2)+81] + asso_values[bits_of(code, 1)+2] + asso_values[bits_of(code, 0)];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
static const CodePointList3 *
|
||||
onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
|
||||
{
|
||||
|
@ -3583,9 +3577,9 @@ onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
|
|||
|
||||
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
|
||||
{
|
||||
register int key = onigenc_unicode_CaseFold_11_hash(code);
|
||||
register unsigned int key = onigenc_unicode_CaseFold_11_hash(code);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
register short s = wordlist[key];
|
||||
|
||||
|
@ -4868,7 +4862,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
|
|||
{0x0069, {1|U, {0x0049}}},
|
||||
};
|
||||
|
||||
/* C code produced by gperf version 3.0.4 */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_11_hash -N onigenc_unicode_CaseUnfold_11_lookup -n */
|
||||
|
||||
/* maximum key range = 2216, duplicates = 0 */
|
||||
|
@ -4910,12 +4904,6 @@ onigenc_unicode_CaseUnfold_11_hash(const OnigCodePoint code)
|
|||
return asso_values[bits_of(code, 2)+66] + asso_values[bits_of(code, 1)+4] + asso_values[bits_of(code, 0)];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
static const CodePointList3 *
|
||||
onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
|
||||
{
|
||||
|
@ -6602,9 +6590,9 @@ onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
|
|||
|
||||
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
|
||||
{
|
||||
register int key = onigenc_unicode_CaseUnfold_11_hash(code);
|
||||
register unsigned int key = onigenc_unicode_CaseUnfold_11_hash(code);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
register short s = wordlist[key];
|
||||
|
||||
|
@ -6679,7 +6667,7 @@ static const CaseUnfold_12_Type CaseUnfold_12_Table[] = {
|
|||
{{0x0069, 0x0307}, {1, {0x0130}}},
|
||||
};
|
||||
|
||||
/* C code produced by gperf version 3.0.4 */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: gperf -7 -k1,2,3,4,5,6 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_12_hash -N onigenc_unicode_CaseUnfold_12_lookup -n */
|
||||
|
||||
/* maximum key range = 71, duplicates = 0 */
|
||||
|
@ -6714,12 +6702,6 @@ onigenc_unicode_CaseUnfold_12_hash(const OnigCodePoint *codes)
|
|||
return asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
static const CodePointList2 *
|
||||
onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
|
||||
{
|
||||
|
@ -6804,9 +6786,9 @@ onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
|
|||
if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE &&
|
||||
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE)
|
||||
{
|
||||
register int key = onigenc_unicode_CaseUnfold_12_hash(codes);
|
||||
register unsigned int key = onigenc_unicode_CaseUnfold_12_hash(codes);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
register short s = wordlist[key];
|
||||
|
||||
|
@ -6835,7 +6817,7 @@ static const CaseUnfold_13_Type CaseUnfold_13_Table[] = {
|
|||
{{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}},
|
||||
};
|
||||
|
||||
/* C code produced by gperf version 3.0.4 */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup -n */
|
||||
|
||||
/* maximum key range = 20, duplicates = 0 */
|
||||
|
@ -6870,12 +6852,6 @@ onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes)
|
|||
return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
static const CodePointList2 *
|
||||
onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
|
||||
{
|
||||
|
@ -6918,9 +6894,9 @@ onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
|
|||
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE &&
|
||||
codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE)
|
||||
{
|
||||
register int key = onigenc_unicode_CaseUnfold_13_hash(codes);
|
||||
register unsigned int key = onigenc_unicode_CaseUnfold_13_hash(codes);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
register short s = wordlist[key];
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -7,7 +7,7 @@
|
|||
|
||||
# Constants for input and ouput directory
|
||||
InputDataDir = ARGV[0] || 'enc/unicode/data'
|
||||
unicode_version = InputDataDir[/[\d.]+\z/]
|
||||
unicode_version = InputDataDir[/.*\/(\d+\.\d+\.\d+)(?=\/|\z)/, 1]
|
||||
|
||||
# convenience methods
|
||||
class Integer
|
||||
|
|
|
@ -14,8 +14,8 @@ if ARGV[0] == "--header"
|
|||
header = true
|
||||
ARGV.shift
|
||||
end
|
||||
unless ARGV.size == 1
|
||||
abort "Usage: #{$0} data_directory"
|
||||
unless ARGV.size == 2
|
||||
abort "Usage: #{$0} data_directory emoji_data_directory"
|
||||
end
|
||||
|
||||
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
|
||||
|
@ -302,7 +302,7 @@ def constantize_blockname(name)
|
|||
end
|
||||
|
||||
def get_file(name)
|
||||
File.join(ARGV[0], name)
|
||||
File.join(ARGV[name.start_with?("emoji-") ? 1 : 0], name)
|
||||
end
|
||||
|
||||
def data_foreach(name, &block)
|
||||
|
|
Loading…
Reference in a new issue