1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

String#{lines,chars,codepoints,bytes} now return an array.

* string.c (rb_str_each_line, rb_str_lines): String#lines now
  returns an array instead of an enumerator.  Passing a block is
  deprecated but still supported for backwards compatibility.
  Based on the patch by yhara. [Feature #6670]

* string.c (rb_str_each_char, rb_str_chars): Ditto for
  String#chars.

* string.c (rb_str_each_codepoint, rb_str_codepoints): Ditto for
  String#codepoints.

* string.c (rb_str_each_byte, rb_str_bytes): Ditto for
  String#bytes.

* NEWS: Add notes for the above changes.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37838 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
knu 2012-11-24 18:46:15 +00:00
parent 7f0dd3a5a2
commit 3f9b0936aa
4 changed files with 456 additions and 115 deletions

View file

@ -1,3 +1,21 @@
Sun Nov 25 03:44:50 2012 Akinori MUSHA <knu@iDaemons.org>
* string.c (rb_str_each_line, rb_str_lines): String#lines now
returns an array instead of an enumerator. Passing a block is
deprecated but still supported for backwards compatibility.
Based on the patch by yhara. [Feature #6670]
* string.c (rb_str_each_char, rb_str_chars): Ditto for
String#chars.
* string.c (rb_str_each_codepoint, rb_str_codepoints): Ditto for
String#codepoints.
* string.c (rb_str_each_byte, rb_str_bytes): Ditto for
String#bytes.
* NEWS: Add notes for the above changes.
Sun Nov 25 02:07:37 2012 Akinori MUSHA <knu@iDaemons.org>
* test/ruby/envutil.rb (Test::Unit::Assertions#assert_warning)

17
NEWS
View file

@ -129,6 +129,11 @@ with all sufficient information, see the ChangeLog file.
* String
* added method:
* added String#b returning a copied string whose encoding is ASCII-8BIT.
* change return value:
* String#lines now returns an array instead of an enumerator.
* String#chars now returns an array instead of an enumerator.
* String#codepoints now returns an array instead of an enumerator.
* String#bytes now returns an array instead of an enumerator.
* Struct
* added method:
@ -302,6 +307,18 @@ with all sufficient information, see the ChangeLog file.
See above.
* String#lines
* String#chars
* String#codepoints
* String#bytes
These methods no longer return an Enumerator, although passing a
block is still supported for backwards compatibility.
Code like str.lines.with_index(1) { |line, lineno| ... } no longer
works because str.lines returns an array. Replace lines with
each_line in such cases.
* Signal.trap
See above.

417
string.c
View file

@ -6098,45 +6098,8 @@ rb_str_split(VALUE str, const char *sep0)
}
/*
* call-seq:
* str.each_line(separator=$/) {|substr| block } -> str
* str.each_line(separator=$/) -> an_enumerator
*
* str.lines(separator=$/) {|substr| block } -> str
* str.lines(separator=$/) -> an_enumerator
*
* Splits <i>str</i> using the supplied parameter as the record separator
* (<code>$/</code> by default), passing each substring in turn to the supplied
* block. If a zero-length record separator is supplied, the string is split
* into paragraphs delimited by multiple successive newlines.
*
* If no block is given, an enumerator is returned instead.
*
* print "Example one\n"
* "hello\nworld".each_line {|s| p s}
* print "Example two\n"
* "hello\nworld".each_line('l') {|s| p s}
* print "Example three\n"
* "hello\n\n\nworld".each_line('') {|s| p s}
*
* <em>produces:</em>
*
* Example one
* "hello\n"
* "world"
* Example two
* "hel"
* "l"
* "o\nworl"
* "d"
* Example three
* "hello\n\n\n"
* "world"
*/
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
{
rb_encoding *enc;
VALUE rs;
@ -6146,6 +6109,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
VALUE line;
int n;
VALUE orig = str;
VALUE ary;
if (argc == 0) {
rs = rb_rs;
@ -6153,10 +6117,34 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
else {
rb_scan_args(argc, argv, "01", &rs);
}
RETURN_ENUMERATOR(str, argc, argv);
if (rb_block_given_p()) {
if (wantarray) {
#if 0 /* next major */
rb_warn("given block not used");
ary = rb_ary_new();
#else
rb_warning("passing a block to String#lines is deprecated");
wantarray = 0;
#endif
}
}
else {
if (wantarray)
ary = rb_ary_new();
else
RETURN_ENUMERATOR(str, argc, argv);
}
if (NIL_P(rs)) {
rb_yield(str);
return orig;
if (wantarray) {
rb_ary_push(ary, str);
return ary;
}
else {
rb_yield(str);
return orig;
}
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
@ -6179,7 +6167,10 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (wantarray)
rb_ary_push(ary, line);
else
rb_yield(line);
str_mod_check(str, ptr, len);
s = p;
}
@ -6215,7 +6206,10 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (wantarray)
rb_ary_push(ary, line);
else
rb_yield(line);
str_mod_check(str, ptr, len);
s = p + (rslen ? rslen : n);
}
@ -6227,11 +6221,76 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
if (wantarray)
rb_ary_push(ary, line);
else
rb_yield(line);
RB_GC_GUARD(str);
}
return orig;
if (wantarray)
return ary;
else
return orig;
}
/*
* call-seq:
* str.each_line(separator=$/) {|substr| block } -> str
* str.each_line(separator=$/) -> an_enumerator
*
* Splits <i>str</i> using the supplied parameter as the record
* separator (<code>$/</code> by default), passing each substring in
* turn to the supplied block. If a zero-length record separator is
* supplied, the string is split into paragraphs delimited by
* multiple successive newlines.
*
* If no block is given, an enumerator is returned instead.
*
* print "Example one\n"
* "hello\nworld".each_line {|s| p s}
* print "Example two\n"
* "hello\nworld".each_line('l') {|s| p s}
* print "Example three\n"
* "hello\n\n\nworld".each_line('') {|s| p s}
*
* <em>produces:</em>
*
* Example one
* "hello\n"
* "world"
* Example two
* "hel"
* "l"
* "o\nworl"
* "d"
* Example three
* "hello\n\n\n"
* "world"
*/
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
return rb_str_enumerate_lines(argc, argv, str, 0);
}
/*
* call-seq:
* str.lines(separator=$/) -> an_array
*
* Returns an array of lines in <i>str</i> split using the supplied
* record separator (<code>$/</code> by default). This is a
* shorthand for <code>str.each_line(separator).to_a</code>.
*
* If a block is given, which is a deprecated form, works the same as
* <code>each_line</code>.
*/
static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
return rb_str_enumerate_lines(argc, argv, str, 1);
}
static VALUE
@ -6240,16 +6299,49 @@ rb_str_each_byte_size(VALUE str, VALUE args)
return LONG2FIX(RSTRING_LEN(str));
}
static VALUE
rb_str_enumerate_bytes(VALUE str, int wantarray)
{
long i;
VALUE ary;
if (rb_block_given_p()) {
if (wantarray) {
#if 0 /* next major */
rb_warn("given block not used");
ary = rb_ary_new();
#else
rb_warning("passing a block to String#bytes is deprecated");
wantarray = 0;
#endif
}
}
else {
if (wantarray)
ary = rb_ary_new2(RSTRING_LEN(str));
else
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
}
for (i=0; i<RSTRING_LEN(str); i++) {
if (wantarray)
rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
else
rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
if (wantarray)
return ary;
else
return str;
}
/*
* call-seq:
* str.bytes {|fixnum| block } -> str
* str.bytes -> an_enumerator
*
* str.each_byte {|fixnum| block } -> str
* str.each_byte -> an_enumerator
*
* Passes each byte in <i>str</i> to the given block, or returns
* an enumerator if no block is given.
* Passes each byte in <i>str</i> to the given block, or returns an
* enumerator if no block is given.
*
* "hello".each_byte {|c| print c, ' ' }
*
@ -6261,13 +6353,24 @@ rb_str_each_byte_size(VALUE str, VALUE args)
static VALUE
rb_str_each_byte(VALUE str)
{
long i;
return rb_str_enumerate_bytes(str, 0);
}
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
for (i=0; i<RSTRING_LEN(str); i++) {
rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
}
return str;
/*
* call-seq:
* str.bytes -> an_array
*
* Returns an array of bytes in <i>str</i>. This is a shorthand for
* <code>str.each_byte.to_a</code>.
*
* If a block is given, which is a deprecated form, works the same as
* <code>each_byte</code>.
*/
static VALUE
rb_str_bytes(VALUE str)
{
return rb_str_enumerate_bytes(str, 1);
}
static VALUE
@ -6285,11 +6388,65 @@ rb_str_each_char_size(VALUE str)
return LONG2FIX(len);
}
static VALUE
rb_str_enumerate_chars(VALUE str, int wantarray)
{
VALUE orig = str;
long i, len, n;
const char *ptr;
rb_encoding *enc;
VALUE ary;
if (rb_block_given_p()) {
if (wantarray) {
#if 0 /* next major */
rb_warn("given block not used");
ary = rb_ary_new();
#else
rb_warning("passing a block to String#chars is deprecated");
wantarray = 0;
#endif
}
}
else {
if (wantarray)
ary = rb_ary_new();
else
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
}
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
enc = rb_enc_get(str);
switch (ENC_CODERANGE(str)) {
case ENC_CODERANGE_VALID:
case ENC_CODERANGE_7BIT:
for (i = 0; i < len; i += n) {
n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
if (wantarray)
rb_ary_push(ary, rb_str_subseq(str, i, n));
else
rb_yield(rb_str_subseq(str, i, n));
}
break;
default:
for (i = 0; i < len; i += n) {
n = rb_enc_mbclen(ptr + i, ptr + len, enc);
if (wantarray)
rb_ary_push(ary, rb_str_subseq(str, i, n));
else
rb_yield(rb_str_subseq(str, i, n));
}
}
if (wantarray)
return ary;
else
return orig;
}
/*
* call-seq:
* str.chars {|cstr| block } -> str
* str.chars -> an_enumerator
*
* str.each_char {|cstr| block } -> str
* str.each_char -> an_enumerator
*
@ -6306,38 +6463,79 @@ rb_str_each_char_size(VALUE str)
static VALUE
rb_str_each_char(VALUE str)
{
VALUE orig = str;
long i, len, n;
const char *ptr;
rb_encoding *enc;
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
len = RSTRING_LEN(str);
enc = rb_enc_get(str);
switch (ENC_CODERANGE(str)) {
case ENC_CODERANGE_VALID:
case ENC_CODERANGE_7BIT:
for (i = 0; i < len; i += n) {
n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
rb_yield(rb_str_subseq(str, i, n));
}
break;
default:
for (i = 0; i < len; i += n) {
n = rb_enc_mbclen(ptr + i, ptr + len, enc);
rb_yield(rb_str_subseq(str, i, n));
}
}
return orig;
return rb_str_enumerate_chars(str, 0);
}
/*
* call-seq:
* str.codepoints {|integer| block } -> str
* str.codepoints -> an_enumerator
* str.chars -> an_array
*
* Returns an array of characters in <i>str</i>. This is a shorthand
* for <code>str.each_char.to_a</code>.
*
* If a block is given, which is a deprecated form, works the same as
* <code>each_char</code>.
*/
static VALUE
rb_str_chars(VALUE str)
{
return rb_str_enumerate_chars(str, 1);
}
static VALUE
rb_str_enumerate_codepoints(VALUE str, int wantarray)
{
VALUE orig = str;
int n;
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
VALUE ary;
if (single_byte_optimizable(str))
return rb_str_enumerate_bytes(str, wantarray);
if (rb_block_given_p()) {
if (wantarray) {
#if 0 /* next major */
rb_warn("given block not used");
ary = rb_ary_new();
#else
rb_warning("passing a block to String#codepoints is deprecated");
wantarray = 0;
#endif
}
}
else {
if (wantarray)
ary = rb_ary_new();
else
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
}
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
while (ptr < end) {
c = rb_enc_codepoint_len(ptr, end, &n, enc);
if (wantarray)
rb_ary_push(ary, UINT2NUM(c));
else
rb_yield(UINT2NUM(c));
ptr += n;
}
RB_GC_GUARD(str);
if (wantarray)
return ary;
else
return orig;
}
/*
* call-seq:
* str.each_codepoint {|integer| block } -> str
* str.each_codepoint -> an_enumerator
*
@ -6357,27 +6555,28 @@ rb_str_each_char(VALUE str)
static VALUE
rb_str_each_codepoint(VALUE str)
{
VALUE orig = str;
int n;
unsigned int c;
const char *ptr, *end;
rb_encoding *enc;
if (single_byte_optimizable(str)) return rb_str_each_byte(str);
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
str = rb_str_new4(str);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
enc = STR_ENC_GET(str);
while (ptr < end) {
c = rb_enc_codepoint_len(ptr, end, &n, enc);
rb_yield(UINT2NUM(c));
ptr += n;
}
RB_GC_GUARD(str);
return orig;
return rb_str_enumerate_codepoints(str, 0);
}
/*
* call-seq:
* str.codepoints -> an_array
*
* Returns an array of the <code>Integer</code> ordinals of the
* characters in <i>str</i>. This is a shorthand for
* <code>str.each_codepoint.to_a</code>.
*
* If a block is given, which is a deprecated form, works the same as
* <code>each_codepoint</code>.
*/
static VALUE
rb_str_codepoints(VALUE str)
{
return rb_str_enumerate_codepoints(str, 1);
}
static long
chopped_length(VALUE str)
{
@ -7994,10 +8193,10 @@ Init_String(void)
rb_define_method(rb_cString, "hex", rb_str_hex, 0);
rb_define_method(rb_cString, "oct", rb_str_oct, 0);
rb_define_method(rb_cString, "split", rb_str_split_m, -1);
rb_define_method(rb_cString, "lines", rb_str_each_line, -1);
rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0);
rb_define_method(rb_cString, "chars", rb_str_each_char, 0);
rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0);
rb_define_method(rb_cString, "lines", rb_str_lines, -1);
rb_define_method(rb_cString, "bytes", rb_str_bytes, 0);
rb_define_method(rb_cString, "chars", rb_str_chars, 0);
rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
rb_define_method(rb_cString, "concat", rb_str_concat, 1);

View file

@ -626,36 +626,121 @@ class TestString < Test::Unit::TestCase
end
def test_each_byte
s = S("ABC")
res = []
S("ABC").each_byte {|x| res << x }
assert_equal s.object_id, s.each_byte {|x| res << x }.object_id
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
assert_equal 65, s.each_byte.next
end
def test_bytes
s = S("ABC")
assert_equal [65, 66, 67], s.bytes
if RUBY_VERSION >= "2.1.0"
assert_warn(/block not used/) {
assert_equal [65, 66, 67], s.bytes {}
}
else
assert_warning(/deprecated/) {
res = []
assert_equal s.object_id, s.bytes {|x| res << x }.object_id
assert_equal(65, res[0])
assert_equal(66, res[1])
assert_equal(67, res[2])
}
end
end
def test_each_codepoint
# Single byte optimization
assert_equal 65, S("ABC").each_codepoint.next
s = S("\u3042\u3044\u3046")
res = []
S("ABC").codepoints.each {|x| res << x}
assert_equal([65, 66, 67], res)
assert_equal s.object_id, s.each_codepoint {|x| res << x }.object_id
assert_equal(0x3042, res[0])
assert_equal(0x3044, res[1])
assert_equal(0x3046, res[2])
assert_equal 0x3042, s.each_codepoint.next
end
def test_codepoints
# Single byte optimization
assert_equal [65, 66, 67], S("ABC").codepoints
s = S("\u3042\u3044\u3046")
assert_equal [0x3042, 0x3044, 0x3046], s.codepoints
if RUBY_VERSION >= "2.1.0"
assert_warn(/block not used/) {
assert_equal [0x3042, 0x3044, 0x3046], s.codepoints {}
}
else
assert_warning(/deprecated/) {
res = []
assert_equal s.object_id, s.codepoints {|x| res << x }.object_id
assert_equal(0x3042, res[0])
assert_equal(0x3044, res[1])
assert_equal(0x3046, res[2])
}
end
end
def test_each_char
s = S("ABC")
res = []
assert_equal s.object_id, s.each_char {|x| res << x }.object_id
assert_equal("A", res[0])
assert_equal("B", res[1])
assert_equal("C", res[2])
assert_equal "A", S("ABC").each_char.next
end
def test_chars
s = S("ABC")
assert_equal ["A", "B", "C"], s.chars
if RUBY_VERSION >= "2.1.0"
assert_warn(/block not used/) {
assert_equal ["A", "B", "C"], s.chars {}
}
else
assert_warning(/deprecated/) {
res = []
assert_equal s.object_id, s.chars {|x| res << x }.object_id
assert_equal("A", res[0])
assert_equal("B", res[1])
assert_equal("C", res[2])
}
end
end
def test_each_line
save = $/
$/ = "\n"
res=[]
S("hello\nworld").lines.each {|x| res << x}
S("hello\nworld").each_line {|x| res << x}
assert_equal(S("hello\n"), res[0])
assert_equal(S("world"), res[1])
res=[]
S("hello\n\n\nworld").lines(S('')).each {|x| res << x}
S("hello\n\n\nworld").each_line(S('')) {|x| res << x}
assert_equal(S("hello\n\n\n"), res[0])
assert_equal(S("world"), res[1])
$/ = "!"
res=[]
S("hello!world").lines.each {|x| res << x}
S("hello!world").each_line {|x| res << x}
assert_equal(S("hello!"), res[0])
assert_equal(S("world"), res[1])
@ -671,6 +756,28 @@ class TestString < Test::Unit::TestCase
s = nil
"foo\nbar".each_line(nil) {|s2| s = s2 }
assert_equal("foo\nbar", s)
assert_equal "hello\n", S("hello\nworld").each_line.next
assert_equal "hello\nworld", S("hello\nworld").each_line(nil).next
end
def test_lines
s = S("hello\nworld")
assert_equal ["hello\n", "world"], s.lines
assert_equal ["hello\nworld"], s.lines(nil)
if RUBY_VERSION >= "2.1.0"
assert_warn(/block not used/) {
assert_equal ["hello\n", "world"], s.lines {}
}
else
assert_warning(/deprecated/) {
res = []
assert_equal s.object_id, s.lines {|x| res << x }.object_id
assert_equal(S("hello\n"), res[0])
assert_equal(S("world"), res[1])
}
end
end
def test_empty?