From 9962aad7b0184e385b40c26c5a109bff7abbe43c Mon Sep 17 00:00:00 2001
From: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Sat, 10 Aug 2013 20:44:10 +0000
Subject: [PATCH] * file.c (rb_str_normalize_ospath):   HFS Plus (Mac OS
 Extended) uses a variant of Normal Form D in which   U+2000 through U+2FFF,
 U+F900 through U+FAFF, and U+2F800 through   U+2FAFF are not decomposed (this
 avoids problems with round trip   conversions from old Mac text encodings).  
 http://developer.apple.com/library/mac/qa/qa1173/_index.html   Therefore fix
 r42457 to exclude the range.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog                           |  10 +++
 dir.c                               |   4 +-
 ext/-test-/string/depend            |   1 +
 ext/-test-/string/extconf.rb        |   1 +
 ext/-test-/string/normalize.c       |  18 +++++
 file.c                              |  43 +++++++++++-
 internal.h                          |   5 ++
 test/-ext-/string/test_normalize.rb | 105 ++++++++++++++++++++++++++++
 8 files changed, 183 insertions(+), 4 deletions(-)
 create mode 100644 ext/-test-/string/normalize.c
 create mode 100644 test/-ext-/string/test_normalize.rb

diff --git a/ChangeLog b/ChangeLog
index 3e74589bb1..131ae62709 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Aug 11 04:48:14 2013  NARUSE, Yui  <naruse@ruby-lang.org>
+
+	* file.c (rb_str_normalize_ospath):
+	  HFS Plus (Mac OS Extended) uses a variant of Normal Form D in which
+	  U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
+	  U+2FAFF are not decomposed (this avoids problems with round trip
+	  conversions from old Mac text encodings).
+	  http://developer.apple.com/library/mac/qa/qa1173/_index.html
+	  Therefore fix r42457 to exclude the range.
+
 Sun Aug 11 03:26:07 2013  Tanaka Akira  <akr@fsij.org>
 
 	* bignum.c (bitsize): Fix a conditional expression.
diff --git a/dir.c b/dir.c
index 7e9c659f9e..cf9aeafee4 100644
--- a/dir.c
+++ b/dir.c
@@ -84,8 +84,6 @@ char *strchr(char*,char);
 #include <sys/param.h>
 #include <sys/mount.h>
 
-VALUE rb_str_normalize_ospath(const char *ptr, long len);
-
 static inline int
 is_hfs(DIR *dirp)
 {
@@ -1420,7 +1418,7 @@ glob_helper(
 	    name = dp->d_name;
 	    namlen = NAMLEN(dp);
 # if HAVE_HFS
-	    if (hfs_p && has_nonascii(name, namlen)) {
+	    if (0&&hfs_p && has_nonascii(name, namlen)) {
 		if (!NIL_P(utf8str = rb_str_normalize_ospath(name, namlen))) {
 		    RSTRING_GETMEM(utf8str, name, namlen);
 		}
diff --git a/ext/-test-/string/depend b/ext/-test-/string/depend
index aad1fab427..86617ff289 100644
--- a/ext/-test-/string/depend
+++ b/ext/-test-/string/depend
@@ -2,3 +2,4 @@ $(OBJS): $(HDRS) $(ruby_headers) \
   $(hdrdir)/ruby/encoding.h \
   $(hdrdir)/ruby/oniguruma.h
 qsort.o: $(hdrdir)/ruby/util.h
+normalize.o: $(top_srcdir)/internal.h
diff --git a/ext/-test-/string/extconf.rb b/ext/-test-/string/extconf.rb
index 42c10b994b..10d33cbab9 100644
--- a/ext/-test-/string/extconf.rb
+++ b/ext/-test-/string/extconf.rb
@@ -1,3 +1,4 @@
+$INCFLAGS << " -I$(topdir) -I$(top_srcdir)"
 $srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")]
 inits = $srcs.map {|s| File.basename(s, ".*")}
 inits.delete("init")
diff --git a/ext/-test-/string/normalize.c b/ext/-test-/string/normalize.c
new file mode 100644
index 0000000000..22bb6d7887
--- /dev/null
+++ b/ext/-test-/string/normalize.c
@@ -0,0 +1,18 @@
+#include "ruby.h"
+#include "internal.h"
+
+#ifdef __APPLE__
+static VALUE
+normalize_ospath(VALUE str)
+{
+    return rb_str_normalize_ospath(RSTRING_PTR(str), RSTRING_LEN(str));
+}
+#else
+#define normalize_ospath rb_f_notimplement
+#endif
+
+void
+Init_normalize(VALUE klass)
+{
+    rb_define_method(klass, "normalize_ospath", normalize_ospath, 0);
+}
diff --git a/file.c b/file.c
index 3923ddeec7..347b7e6d0e 100644
--- a/file.c
+++ b/file.c
@@ -245,7 +245,7 @@ rb_str_encode_ospath(VALUE path)
 
 #ifdef __APPLE__
 VALUE
-rb_str_normalize_ospath(const char *ptr, long len)
+rb_str_normalize_ospath0(const char *ptr, long len)
 {
     VALUE str;
     CFIndex buflen = 0;
@@ -267,6 +267,47 @@ rb_str_normalize_ospath(const char *ptr, long len)
     CFRelease(s);
     return str;
 }
+
+VALUE
+rb_str_normalize_ospath(const char *ptr, long len)
+{
+    const char *p = ptr;
+    const char *e = ptr + len;
+    const char *p1 = p;
+    VALUE str = rb_str_buf_new(len);
+    rb_encoding *enc = rb_utf8_encoding();
+    rb_enc_associate(str, enc);
+
+    while (p < e) {
+	int l;
+	int r = rb_enc_precise_mbclen(p, e, enc);
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    /* invalid byte shall not happen but */
+	    rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+	    rb_str_cat2(str, "\xEF\xBF\xBD");
+	    p += 1;
+	}
+	l = MBCLEN_CHARFOUND_LEN(r);
+	int c = rb_enc_mbc_to_codepoint(p, e, enc);
+	if ((0x2000 <= c && c <= 0x2FFF) || (0xF900 <= c && c <= 0xFAFF) ||
+		(0x2F800 <= c && c <= 0x2FAFF)) {
+	    if (p - p1 > 0) {
+		rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+	    }
+	    rb_str_cat(str, p, l);
+	    p += l;
+	    p1 = p;
+	}
+	else {
+	    p += l;
+	}
+    }
+    if (p - p1 > 0) {
+	rb_str_append(str, rb_str_normalize_ospath0(p1, p-p1));
+    }
+
+    return str;
+}
 #endif
 
 static long
diff --git a/internal.h b/internal.h
index a4cf43b939..d52b2e9442 100644
--- a/internal.h
+++ b/internal.h
@@ -513,6 +513,11 @@ VALUE rb_big_mul_karatsuba(VALUE x, VALUE y);
 VALUE rb_big_mul_toom3(VALUE x, VALUE y);
 VALUE rb_big_sq_fast(VALUE x);
 
+/* file.c */
+#ifdef __APPLE__
+VALUE rb_str_normalize_ospath(const char *ptr, long len);
+#endif
+
 /* io.c */
 void rb_maygvl_fd_fix_cloexec(int fd);
 
diff --git a/test/-ext-/string/test_normalize.rb b/test/-ext-/string/test_normalize.rb
new file mode 100644
index 0000000000..da24391110
--- /dev/null
+++ b/test/-ext-/string/test_normalize.rb
@@ -0,0 +1,105 @@
+require 'test/unit'
+require "-test-/string/string"
+require "tempfile"
+
+class Test_StringNormalize < Test::Unit::TestCase
+=begin
+  def test_normalize_all
+    exclude = [
+      #0x340, 0x341, 0x343, 0x344
+    ]
+    (0x0080..0xFFFD).each do |n|
+      next if 0xD800 <= n && n <= 0xDFFF
+      next if exclude.include? n
+      code = n.to_s(16)
+      Tempfile.create("#{code}-#{n.chr(Encoding::UTF_8)}-") do |tempfile|
+        ary = Dir.glob(File.expand_path("../#{code}-*", tempfile.path))
+        assert_equal 1, ary.size
+        result = ary[0]
+        rn = result[/\/\h+-(.+?)-/, 1]
+        #assert_equal tempfile.path, result, "#{rn.dump} is not U+#{n.to_s(16)}"
+        r2 = Bug::String.new(result ).normalize_ospath
+        rn2 = r2[/\/\h+-(.+?)-/, 1]
+        if tempfile.path == result
+          if tempfile.path == r2
+          else
+            puts "U+#{n.to_s(16)} shouldn't be r2#{rn2.dump}"
+          end
+        else
+          if tempfile.path == r2
+            # puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump}"
+          elsif result == r2
+            puts "U+#{n.to_s(16)} shouldn't be #{rn.dump}"
+          else
+            puts "U+#{n.to_s(16)} shouldn't be r#{rn.dump} r2#{rn2.dump}"
+          end
+        end
+      end
+    end
+  end
+=end
+
+  def test_normalize
+    %[
+      \u304C \u304B\u3099
+      \u3077 \u3075\u309A
+      \u308F\u3099 \u308F\u3099
+      \u30F4 \u30A6\u3099
+      \u30DD \u30DB\u309A
+      \u30AB\u303A \u30AB\u303A
+      \u00C1 A\u0301
+      B\u030A B\u030A
+      \u0386 \u0391\u0301
+      \u03D3 \u03D2\u0301
+      \u0401 \u0415\u0308
+      \u2260 =\u0338
+    ].scan(/(\S+)\s+(\S+)/) do |expected, src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal expected, result,
+        "#{expected.dump} is expected but #{src.dump}"
+    end
+  rescue NotImplementedError
+  end
+
+  def test_not_normalize_kc
+    %[
+      \u2460
+      \u2162
+      \u3349
+      \u33A1
+      \u337B
+      \u2116
+      \u33CD
+      \u2121
+      \u32A4
+      \u3231
+    ].split.each do |src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal src, result,
+        "#{src.dump} is expected not to be normalized, but #{result.dump}"
+    end
+  rescue NotImplementedError
+  end
+
+  def test_dont_normalize_hfsplus
+    %[
+      \u2190\u0338
+      \u219A
+      \u212B
+      \uF90A
+      \uF9F4
+      \uF961 \uF9DB
+      \uF96F \uF3AA
+      \uF915 \uF95C \uF9BF
+      \uFA0C
+      \uFA10
+      \uFA19
+      \uFA26
+    ].split.each do |src|
+      result = Bug::String.new(src).normalize_ospath
+      assert_equal src, result,
+        "#{src.dump} is expected not to be normalized, but #{result.dump}"
+    end
+  rescue NotImplementedError
+  end
+end