* file.c (rb_str_normalize_ospath):

HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through U+2FAFF are not decomposed (this avoids problems with round trip conversions from old Mac text encodings). http://developer.apple.com/library/mac/qa/qa1173/_index.html Therefore fix r42457 to exclude the range. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2013-08-10 20:44:10 +00:00 · 2013-08-10 20:44:10 +00:00 · 9962aad7b0
commit 9962aad7b0
parent 06ed1c781a
8 changed files with 183 additions and 4 deletions
--- a/10
+++ b/10
@ -1,3 +1,13 @@
 Sun Aug 11 04:48:14 2013  NARUSE, Yui  <naruse@ruby-lang.org>
 	* file.c (rb_str_normalize_ospath):
 	  HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
 	  U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
 	  U+2FAFF are not decomposed (this avoids problems with round trip
 	  conversions from old Mac text encodings).
 	  http://developer.apple.com/library/mac/qa/qa1173/_index.html
 	  Therefore fix r42457 to exclude the range.
 Sun Aug 11 03:26:07 2013  Tanaka Akira  <akr@fsij.org>
 	* bignum.c (bitsize): Fix a conditional expression.
--- a/dir.c
+++ b/dir.c
@ -84,8 +84,6 @@ char *strchr(char*,char);
 #include <sys/param.h>
 #include <sys/mount.h>
 VALUE rb_str_normalize_ospath(const char *ptr, long len);
 static inline int
 is_hfs(DIR *dirp)
 {
@ -1420,7 +1418,7 @@ glob_helper(
 	    name = dp->d_name;
 	    namlen = NAMLEN(dp);
 # if HAVE_HFS
-	    if (hfs_p && has_nonascii(name, namlen)) {
+	    if (0&&hfs_p && has_nonascii(name, namlen)) {
 		if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
 		    RSTRING_GETMEM(utf8str, name, namlen);
 		}
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \
  $(hdrdir)/ruby/encoding.h \
  $(hdrdir)/ruby/oniguruma.h
 qsort.o: $(hdrdir)/ruby/util.h
 normalize.o: $(top_srcdir)/internal.h
--- a/ext/-test-/string/extconf.rb
+++ b/ext/-test-/string/extconf.rb
@ -1,3 +1,4 @@
 $INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
 $srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
 inits = $srcs.map {|s| File.basename(s, ".*")}
 inits.delete("init")
--- a/ext/-test-/string/normalize.c
+++ b/ext/-test-/string/normalize.c
@ -0,0 +1,18 @@
 #include "ruby.h"
 #include "internal.h"
 #ifdef __APPLE__
 static VALUE
 normalize_ospath(VALUE str)
 {
    return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
 }
 #else
 #define normalize_ospath rb_f_notimplement
 #endif
 void
 Init_normalize(VALUE klass)
 {
    rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
 }
--- a/file.c
+++ b/file.c
@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path)
 #ifdef __APPLE__
 VALUE
-rb_str_normalize_ospath(const char *ptr, long len)
+rb_str_normalize_ospath0(const char *ptr, long len)
 {
    VALUE str;
    CFIndex buflen = 0;
@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len)
    CFRelease(s);
    return str;
 }
 VALUE
 rb_str_normalize_ospath(const char *ptr, long len)
 {
    const char *p = ptr;
    const char *e = ptr + len;
    const char *p1 = p;
    VALUE str = rb_str_buf_new(len);
    rb_encoding *enc = rb_utf8_encoding();
    rb_enc_associate(str, enc);
    while (p < e) {
 	int l;
 	int r = rb_enc_precise_mbclen(p, e, enc);
 	if (!MBCLEN_CHARFOUND_P(r)) {
 	    /* invalid byte shall not happen but */
 	    rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
 	    rb_str_cat2(str, "\xEF\xBF\xBD");
 	    p += 1;
 	}
 	l = MBCLEN_CHARFOUND_LEN(r);
 	int c = rb_enc_mbc_to_codepoint(p, e, enc);
 	if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
 		(0x2F800 <= c && c <= 0x2FAFF)) {
 	    if (p - p1 > 0) {
 		rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
 	    }
 	    rb_str_cat(str, p, l);
 	    p += l;
 	    p1 = p;
 	}
 	else {
 	    p += l;
 	}
    }
    if (p - p1 > 0) {
 	rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
    }
    return str;
 }
 #endif
 static long
--- a/internal.h
+++ b/internal.h
@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALUE y);
 VALUE rb_big_mul_toom3(VALUE x, VALUE y);
 VALUE rb_big_sq_fast(VALUE x);
 /* file.c */
 #ifdef __APPLE__
 VALUE rb_str_normalize_ospath(const char *ptr, long len);
 #endif
 /* io.c */
 void rb_maygvl_fd_fix_cloexec(int fd);
--- a/test/-ext-/string/test_normalize.rb
+++ b/test/-ext-/string/test_normalize.rb
@ -0,0 +1,105 @@
 require 'test/unit'
 require "-test-/string/string"
 require "tempfile"
 class Test_StringNormalize < Test::Unit::TestCase
 =begin
  def test_normalize_all
    exclude = [
      #0x340, 0x341, 0x343, 0x344
    ]
    (0x0080..0xFFFD).each do |n|
      next if 0xD800 <= n && n <= 0xDFFF
      next if exclude.include? n
      code = n.to_s(16)
      Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
        ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
        assert_equal 1, ary.size
        result = ary[0]
        rn = result[/\/\h+-(.+?)-/, 1]
        #assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
        r2 = Bug::String.new(result ).normalize_ospath
        rn2 = r2[/\/\h+-(.+?)-/, 1]
        if tempfile.path == result
          if tempfile.path == r2
          else
            puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
          end
        else
          if tempfile.path == r2
            # puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
          elsif result == r2
            puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
          else
            puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
          end
        end
      end
    end
  end
 =end
  def test_normalize
    %[
      \u304C \u304B\u3099
      \u3077 \u3075\u309A
      \u308F\u3099 \u308F\u3099
      \u30F4 \u30A6\u3099
      \u30DD \u30DB\u309A
      \u30AB\u303A \u30AB\u303A
      \u00C1 A\u0301
      B\u030A B\u030A
      \u0386 \u0391\u0301
      \u03D3 \u03D2\u0301
      \u0401 \u0415\u0308
      \u2260 =\u0338
    ].scan(/(\S+)\s+(\S+)/) do |expected, src|
      result = Bug::String.new(src).normalize_ospath
      assert_equal expected, result,
        "#{expected.dump} is expected but #{src.dump}"
    end
  rescue NotImplementedError
  end
  def test_not_normalize_kc
    %[
      \u2460
      \u2162
      \u3349
      \u33A1
      \u337B
      \u2116
      \u33CD
      \u2121
      \u32A4
      \u3231
    ].split.each do |src|
      result = Bug::String.new(src).normalize_ospath
      assert_equal src, result,
        "#{src.dump} is expected not to be normalized, but #{result.dump}"
    end
  rescue NotImplementedError
  end
  def test_dont_normalize_hfsplus
    %[
      \u2190\u0338
      \u219A
      \u212B
      \uF90A
      \uF9F4
      \uF961 \uF9DB
      \uF96F \uF3AA
      \uF915 \uF95C \uF9BF
      \uFA0C
      \uFA10
      \uFA19
      \uFA26
    ].split.each do |src|
      result = Bug::String.new(src).normalize_ospath
      assert_equal src, result,
        "#{src.dump} is expected not to be normalized, but #{result.dump}"
    end
  rescue NotImplementedError
  end
 end