* string.c (str_scrub_bang): add String#scrub!. [Feature #8414]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40810 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2013-05-18 11:16:56 +00:00 · 2013-05-18 11:16:56 +00:00 · 05c0a22854
commit 05c0a22854
parent bfec5ad41f
4 changed files with 45 additions and 1 deletions
--- a/4
+++ b/4
@ -1,3 +1,7 @@
+Sat May 18 20:15:28 2013  NARUSE, Yui  <naruse@ruby-lang.org>
+
+	* string.c (str_scrub_bang): add String#scrub!. [Feature #8414]
+
 Sat May 18 16:59:52 2013  Tanaka Akira  <akr@fsij.org>

 	* ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE.
--- a/2
+++ b/2
@ -32,7 +32,7 @@ with all sufficient information, see the ChangeLog file.

 * String
  * New methods:
-    * added String#scrub to verify and fix invalid byte sequence.
+    * String#scrub and String#scrub! verify and fix invalid byte sequence.
  * extended methods:
    * If invalid: :replace is specified for String#encode, replace
      invalid byte sequence even if the destination encoding equals to
--- a/string.c
+++ b/string.c
@ -8014,6 +8014,28 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
    }
 }

+/*
+ *  call-seq:
+ *    str.scrub! -> str
+ *    str.scrub!(repl) -> str
+ *    str.scrub!{|bytes|} -> str
+ *
+ *  If the string is invalid byte sequence then replace invalid bytes with given replacement
+ *  character, else returns self.
+ *  If block is given, replace invalid bytes with returned value of the block.
+ *
+ *     "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
+ *     "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
+ *     "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
+ */
+VALUE
+rb_str_scrub_bang(int argc, VALUE *argv, VALUE str)
+{
+    VALUE new = rb_str_scrub(argc, argv, str);
+    rb_str_replace(str, new);
+    return str;
+}
+
 /**********************************************************************
 * Document-class: Symbol
 *
@ -8500,6 +8522,7 @@ Init_String(void)
    rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
    rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
    rb_define_method(rb_cString, "scrub", rb_str_scrub, -1);
+    rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1);

    rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
    rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@ -1491,6 +1491,11 @@ class TestM17N < Test::Unit::TestCase
  end

  def test_scrub
+    str = "\u3042\u3044"
+    assert_not_same(str, str.scrub)
+    str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
+    assert_not_same(str, str.scrub)
+
    assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
    assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)

@ -1529,4 +1534,16 @@ class TestM17N < Test::Unit::TestCase
                 "\xff".force_encoding(Encoding::UTF_32LE).
                 scrub)
  end
+
+  def test_scrub_bang
+    str = "\u3042\u3044"
+    assert_same(str, str.scrub!)
+    str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
+    assert_same(str, str.scrub!)
+
+    str = u("\x80\x80\x80")
+    str.scrub!
+    assert_same(str, str.scrub!)
+    assert_equal("\uFFFD\uFFFD\uFFFD", str)
+  end
 end