1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* string.c (str_scrub_bang): add String#scrub!. [Feature #8414]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40810 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2013-05-18 11:16:56 +00:00
parent bfec5ad41f
commit 05c0a22854
4 changed files with 45 additions and 1 deletions

View file

@ -1,3 +1,7 @@
Sat May 18 20:15:28 2013 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (str_scrub_bang): add String#scrub!. [Feature #8414]
Sat May 18 16:59:52 2013 Tanaka Akira <akr@fsij.org> Sat May 18 16:59:52 2013 Tanaka Akira <akr@fsij.org>
* ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE. * ext/socket/mkconstants.rb (INTEGER2NUM): Renamed from INTEGER2VALUE.

2
NEWS
View file

@ -32,7 +32,7 @@ with all sufficient information, see the ChangeLog file.
* String * String
* New methods: * New methods:
* added String#scrub to verify and fix invalid byte sequence. * String#scrub and String#scrub! verify and fix invalid byte sequence.
* extended methods: * extended methods:
* If invalid: :replace is specified for String#encode, replace * If invalid: :replace is specified for String#encode, replace
invalid byte sequence even if the destination encoding equals to invalid byte sequence even if the destination encoding equals to

View file

@ -8014,6 +8014,28 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str)
} }
} }
/*
* call-seq:
* str.scrub! -> str
* str.scrub!(repl) -> str
* str.scrub!{|bytes|} -> str
*
* If the string is invalid byte sequence then replace invalid bytes with given replacement
* character, else returns self.
* If block is given, replace invalid bytes with returned value of the block.
*
* "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
* "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
* "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
*/
VALUE
rb_str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
VALUE new = rb_str_scrub(argc, argv, str);
rb_str_replace(str, new);
return str;
}
/********************************************************************** /**********************************************************************
* Document-class: Symbol * Document-class: Symbol
* *
@ -8500,6 +8522,7 @@ Init_String(void)
rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1); rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
rb_define_method(rb_cString, "scrub", rb_str_scrub, -1); rb_define_method(rb_cString, "scrub", rb_str_scrub, -1);
rb_define_method(rb_cString, "scrub!", rb_str_scrub_bang, -1);
rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);

View file

@ -1491,6 +1491,11 @@ class TestM17N < Test::Unit::TestCase
end end
def test_scrub def test_scrub
str = "\u3042\u3044"
assert_not_same(str, str.scrub)
str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
assert_not_same(str, str.scrub)
assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
@ -1529,4 +1534,16 @@ class TestM17N < Test::Unit::TestCase
"\xff".force_encoding(Encoding::UTF_32LE). "\xff".force_encoding(Encoding::UTF_32LE).
scrub) scrub)
end end
def test_scrub_bang
str = "\u3042\u3044"
assert_same(str, str.scrub!)
str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
assert_same(str, str.scrub!)
str = u("\x80\x80\x80")
str.scrub!
assert_same(str, str.scrub!)
assert_equal("\uFFFD\uFFFD\uFFFD", str)
end
end end