mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* file.c (rb_str_normalize_ospath):
HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through U+2FAFF are not decomposed (this avoids problems with round trip conversions from old Mac text encodings). http://developer.apple.com/library/mac/qa/qa1173/_index.html Therefore fix r42457 to exclude the range. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
06ed1c781a
commit
9962aad7b0
8 changed files with 183 additions and 4 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
||||||
|
Sun Aug 11 04:48:14 2013 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* file.c (rb_str_normalize_ospath):
|
||||||
|
HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
|
||||||
|
U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
|
||||||
|
U+2FAFF are not decomposed (this avoids problems with round trip
|
||||||
|
conversions from old Mac text encodings).
|
||||||
|
http://developer.apple.com/library/mac/qa/qa1173/_index.html
|
||||||
|
Therefore fix r42457 to exclude the range.
|
||||||
|
|
||||||
Sun Aug 11 03:26:07 2013 Tanaka Akira <akr@fsij.org>
|
Sun Aug 11 03:26:07 2013 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
* bignum.c (bitsize): Fix a conditional expression.
|
* bignum.c (bitsize): Fix a conditional expression.
|
||||||
|
|
4
dir.c
4
dir.c
|
@ -84,8 +84,6 @@ char *strchr(char*,char);
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
|
|
||||||
VALUE rb_str_normalize_ospath(const char *ptr, long len);
|
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
is_hfs(DIR *dirp)
|
is_hfs(DIR *dirp)
|
||||||
{
|
{
|
||||||
|
@ -1420,7 +1418,7 @@ glob_helper(
|
||||||
name = dp->d_name;
|
name = dp->d_name;
|
||||||
namlen = NAMLEN(dp);
|
namlen = NAMLEN(dp);
|
||||||
# if HAVE_HFS
|
# if HAVE_HFS
|
||||||
if (hfs_p && has_nonascii(name, namlen)) {
|
if (0&&hfs_p && has_nonascii(name, namlen)) {
|
||||||
if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
|
if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
|
||||||
RSTRING_GETMEM(utf8str, name, namlen);
|
RSTRING_GETMEM(utf8str, name, namlen);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \
|
||||||
$(hdrdir)/ruby/encoding.h \
|
$(hdrdir)/ruby/encoding.h \
|
||||||
$(hdrdir)/ruby/oniguruma.h
|
$(hdrdir)/ruby/oniguruma.h
|
||||||
qsort.o: $(hdrdir)/ruby/util.h
|
qsort.o: $(hdrdir)/ruby/util.h
|
||||||
|
normalize.o: $(top_srcdir)/internal.h
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
|
||||||
$srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
|
$srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
|
||||||
inits = $srcs.map {|s| File.basename(s, ".*")}
|
inits = $srcs.map {|s| File.basename(s, ".*")}
|
||||||
inits.delete("init")
|
inits.delete("init")
|
||||||
|
|
18
ext/-test-/string/normalize.c
Normal file
18
ext/-test-/string/normalize.c
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
#include "ruby.h"
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
static VALUE
|
||||||
|
normalize_ospath(VALUE str)
|
||||||
|
{
|
||||||
|
return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define normalize_ospath rb_f_notimplement
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void
|
||||||
|
Init_normalize(VALUE klass)
|
||||||
|
{
|
||||||
|
rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
|
||||||
|
}
|
43
file.c
43
file.c
|
@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path)
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
VALUE
|
VALUE
|
||||||
rb_str_normalize_ospath(const char *ptr, long len)
|
rb_str_normalize_ospath0(const char *ptr, long len)
|
||||||
{
|
{
|
||||||
VALUE str;
|
VALUE str;
|
||||||
CFIndex buflen = 0;
|
CFIndex buflen = 0;
|
||||||
|
@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len)
|
||||||
CFRelease(s);
|
CFRelease(s);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VALUE
|
||||||
|
rb_str_normalize_ospath(const char *ptr, long len)
|
||||||
|
{
|
||||||
|
const char *p = ptr;
|
||||||
|
const char *e = ptr + len;
|
||||||
|
const char *p1 = p;
|
||||||
|
VALUE str = rb_str_buf_new(len);
|
||||||
|
rb_encoding *enc = rb_utf8_encoding();
|
||||||
|
rb_enc_associate(str, enc);
|
||||||
|
|
||||||
|
while (p < e) {
|
||||||
|
int l;
|
||||||
|
int r = rb_enc_precise_mbclen(p, e, enc);
|
||||||
|
if (!MBCLEN_CHARFOUND_P(r)) {
|
||||||
|
/* invalid byte shall not happen but */
|
||||||
|
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
|
||||||
|
rb_str_cat2(str, "\xEF\xBF\xBD");
|
||||||
|
p += 1;
|
||||||
|
}
|
||||||
|
l = MBCLEN_CHARFOUND_LEN(r);
|
||||||
|
int c = rb_enc_mbc_to_codepoint(p, e, enc);
|
||||||
|
if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
|
||||||
|
(0x2F800 <= c && c <= 0x2FAFF)) {
|
||||||
|
if (p - p1 > 0) {
|
||||||
|
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
|
||||||
|
}
|
||||||
|
rb_str_cat(str, p, l);
|
||||||
|
p += l;
|
||||||
|
p1 = p;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p += l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (p - p1 > 0) {
|
||||||
|
rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static long
|
static long
|
||||||
|
|
|
@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALUE y);
|
||||||
VALUE rb_big_mul_toom3(VALUE x, VALUE y);
|
VALUE rb_big_mul_toom3(VALUE x, VALUE y);
|
||||||
VALUE rb_big_sq_fast(VALUE x);
|
VALUE rb_big_sq_fast(VALUE x);
|
||||||
|
|
||||||
|
/* file.c */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
VALUE rb_str_normalize_ospath(const char *ptr, long len);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* io.c */
|
/* io.c */
|
||||||
void rb_maygvl_fd_fix_cloexec(int fd);
|
void rb_maygvl_fd_fix_cloexec(int fd);
|
||||||
|
|
||||||
|
|
105
test/-ext-/string/test_normalize.rb
Normal file
105
test/-ext-/string/test_normalize.rb
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
require 'test/unit'
|
||||||
|
require "-test-/string/string"
|
||||||
|
require "tempfile"
|
||||||
|
|
||||||
|
class Test_StringNormalize < Test::Unit::TestCase
|
||||||
|
=begin
|
||||||
|
def test_normalize_all
|
||||||
|
exclude = [
|
||||||
|
#0x340, 0x341, 0x343, 0x344
|
||||||
|
]
|
||||||
|
(0x0080..0xFFFD).each do |n|
|
||||||
|
next if 0xD800 <= n && n <= 0xDFFF
|
||||||
|
next if exclude.include? n
|
||||||
|
code = n.to_s(16)
|
||||||
|
Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
|
||||||
|
ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
|
||||||
|
assert_equal 1, ary.size
|
||||||
|
result = ary[0]
|
||||||
|
rn = result[/\/\h+-(.+?)-/, 1]
|
||||||
|
#assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
|
||||||
|
r2 = Bug::String.new(result ).normalize_ospath
|
||||||
|
rn2 = r2[/\/\h+-(.+?)-/, 1]
|
||||||
|
if tempfile.path == result
|
||||||
|
if tempfile.path == r2
|
||||||
|
else
|
||||||
|
puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
if tempfile.path == r2
|
||||||
|
# puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
|
||||||
|
elsif result == r2
|
||||||
|
puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
|
||||||
|
else
|
||||||
|
puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
=end
|
||||||
|
|
||||||
|
def test_normalize
|
||||||
|
%[
|
||||||
|
\u304C \u304B\u3099
|
||||||
|
\u3077 \u3075\u309A
|
||||||
|
\u308F\u3099 \u308F\u3099
|
||||||
|
\u30F4 \u30A6\u3099
|
||||||
|
\u30DD \u30DB\u309A
|
||||||
|
\u30AB\u303A \u30AB\u303A
|
||||||
|
\u00C1 A\u0301
|
||||||
|
B\u030A B\u030A
|
||||||
|
\u0386 \u0391\u0301
|
||||||
|
\u03D3 \u03D2\u0301
|
||||||
|
\u0401 \u0415\u0308
|
||||||
|
\u2260 =\u0338
|
||||||
|
].scan(/(\S+)\s+(\S+)/) do |expected, src|
|
||||||
|
result = Bug::String.new(src).normalize_ospath
|
||||||
|
assert_equal expected, result,
|
||||||
|
"#{expected.dump} is expected but #{src.dump}"
|
||||||
|
end
|
||||||
|
rescue NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_not_normalize_kc
|
||||||
|
%[
|
||||||
|
\u2460
|
||||||
|
\u2162
|
||||||
|
\u3349
|
||||||
|
\u33A1
|
||||||
|
\u337B
|
||||||
|
\u2116
|
||||||
|
\u33CD
|
||||||
|
\u2121
|
||||||
|
\u32A4
|
||||||
|
\u3231
|
||||||
|
].split.each do |src|
|
||||||
|
result = Bug::String.new(src).normalize_ospath
|
||||||
|
assert_equal src, result,
|
||||||
|
"#{src.dump} is expected not to be normalized, but #{result.dump}"
|
||||||
|
end
|
||||||
|
rescue NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_dont_normalize_hfsplus
|
||||||
|
%[
|
||||||
|
\u2190\u0338
|
||||||
|
\u219A
|
||||||
|
\u212B
|
||||||
|
\uF90A
|
||||||
|
\uF9F4
|
||||||
|
\uF961 \uF9DB
|
||||||
|
\uF96F \uF3AA
|
||||||
|
\uF915 \uF95C \uF9BF
|
||||||
|
\uFA0C
|
||||||
|
\uFA10
|
||||||
|
\uFA19
|
||||||
|
\uFA26
|
||||||
|
].split.each do |src|
|
||||||
|
result = Bug::String.new(src).normalize_ospath
|
||||||
|
assert_equal src, result,
|
||||||
|
"#{src.dump} is expected not to be normalized, but #{result.dump}"
|
||||||
|
end
|
||||||
|
rescue NotImplementedError
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Add a link
Reference in a new issue