mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enum.c (enum_chunk): new method Enumerable#chunk.
* enum.c (enum_slice_before): new method Enumerable#slice_before. [ruby-dev:38392] [ruby-dev:39240] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25032 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
089beb67bd
commit
475074d5da
3 changed files with 401 additions and 0 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
Tue Sep 22 10:29:06 2009 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* enum.c (enum_chunk): new method Enumerable#chunk.
|
||||||
|
* enum.c (enum_slice_before): new method Enumerable#slice_before.
|
||||||
|
[ruby-dev:38392] [ruby-dev:39240]
|
||||||
|
|
||||||
Tue Sep 22 05:58:25 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Tue Sep 22 05:58:25 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* compile.c, cont.c, gc.c, insns.def, iseq.c, iseq.h, process.c,
|
* compile.c, cont.c, gc.c, insns.def, iseq.c, iseq.h, process.c,
|
||||||
|
|
325
enum.c
325
enum.c
|
@ -1802,6 +1802,329 @@ enum_cycle(int argc, VALUE *argv, VALUE obj)
|
||||||
return Qnil; /* not reached */
|
return Qnil; /* not reached */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct chunk_arg {
|
||||||
|
VALUE categorize;
|
||||||
|
VALUE state;
|
||||||
|
VALUE prev_value;
|
||||||
|
VALUE prev_elts;
|
||||||
|
VALUE yielder;
|
||||||
|
};
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
chunk_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
|
||||||
|
{
|
||||||
|
struct chunk_arg *argp = (struct chunk_arg *)_argp;
|
||||||
|
VALUE v;
|
||||||
|
VALUE alone = ID2SYM(rb_intern("_alone"));
|
||||||
|
VALUE separator = ID2SYM(rb_intern("_separator"));
|
||||||
|
|
||||||
|
ENUM_WANT_SVALUE();
|
||||||
|
|
||||||
|
if (NIL_P(argp->state))
|
||||||
|
v = rb_funcall(argp->categorize, rb_intern("call"), 1, i);
|
||||||
|
else
|
||||||
|
v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, argp->state);
|
||||||
|
|
||||||
|
if (v == alone) {
|
||||||
|
if (!NIL_P(argp->prev_value)) {
|
||||||
|
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||||
|
argp->prev_value = argp->prev_elts = Qnil;
|
||||||
|
}
|
||||||
|
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, rb_ary_new3(1, i)));
|
||||||
|
}
|
||||||
|
else if (NIL_P(v) || v == separator) {
|
||||||
|
if (!NIL_P(argp->prev_value)) {
|
||||||
|
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||||
|
argp->prev_value = argp->prev_elts = Qnil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == '_') {
|
||||||
|
rb_raise(rb_eRuntimeError, "symbol begins with an underscore is reserved");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (NIL_P(argp->prev_value)) {
|
||||||
|
argp->prev_value = v;
|
||||||
|
argp->prev_elts = rb_ary_new3(1, i);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (rb_equal(argp->prev_value, v)) {
|
||||||
|
rb_ary_push(argp->prev_elts, i);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||||
|
argp->prev_value = v;
|
||||||
|
argp->prev_elts = rb_ary_new3(1, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
chunk_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
|
||||||
|
{
|
||||||
|
VALUE enumerable;
|
||||||
|
struct chunk_arg arg;
|
||||||
|
|
||||||
|
enumerable = rb_ivar_get(enumerator, rb_intern("chunk_enumerable"));
|
||||||
|
arg.categorize = rb_ivar_get(enumerator, rb_intern("chunk_categorize"));
|
||||||
|
arg.state = rb_ivar_get(enumerator, rb_intern("chunk_initial_state"));
|
||||||
|
arg.prev_value = Qnil;
|
||||||
|
arg.prev_elts = Qnil;
|
||||||
|
arg.yielder = yielder;
|
||||||
|
|
||||||
|
if (!NIL_P(arg.state))
|
||||||
|
arg.state = rb_obj_dup(arg.state);
|
||||||
|
|
||||||
|
rb_block_call(enumerable, id_each, 0, 0, chunk_ii, (VALUE)&arg);
|
||||||
|
if (!NIL_P(arg.prev_elts))
|
||||||
|
rb_funcall(arg.yielder, rb_intern("<<"), 1, rb_assoc_new(arg.prev_value, arg.prev_elts));
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* enum.chunk {|elt| ... } => enumerator
|
||||||
|
* enum.chunk(initial_state) {|elt, state| ... } => enumerator
|
||||||
|
*
|
||||||
|
* Creates an enumerator for each chunked elements.
|
||||||
|
* The elements which have same block value are chunked.
|
||||||
|
*
|
||||||
|
* The result enumerator yields the block value and an array of chunked elements.
|
||||||
|
* So "each" method can be called as follows.
|
||||||
|
*
|
||||||
|
* enum.chunk {|elt| key }.each {|key, ary| ... }
|
||||||
|
*
|
||||||
|
* For example, consecutive even numbers and odd numbers can be
|
||||||
|
* splitted as follows.
|
||||||
|
*
|
||||||
|
* [5, 3, 3, 5, 2, 8, 0, 6, 0, 3].chunk {|n|
|
||||||
|
* n.even?
|
||||||
|
* }.each {|even, ary|
|
||||||
|
* p [even, ary]
|
||||||
|
* }
|
||||||
|
* #=> [false, [5, 3, 3, 5]]
|
||||||
|
* # [true, [2, 8, 0, 6, 0]]
|
||||||
|
* # [false, [3]]
|
||||||
|
*
|
||||||
|
* This method is useful for sorted series of elements.
|
||||||
|
* The following example counts words for each initial letter.
|
||||||
|
*
|
||||||
|
* open("/usr/share/dict/words", "r:iso-8859-1") {|f|
|
||||||
|
* f.chunk {|line| line.ord }.each {|ch, lines| p [ch.chr, lines.length] }
|
||||||
|
* }
|
||||||
|
* #=> ["\n", 1]
|
||||||
|
* # ["A", 1327]
|
||||||
|
* # ["B", 1372]
|
||||||
|
* # ["C", 1507]
|
||||||
|
* # ["D", 791]
|
||||||
|
* # ...
|
||||||
|
*
|
||||||
|
* The following key values has special meaning:
|
||||||
|
* - nil and :_separator specifies that the elements are dropped.
|
||||||
|
* - :_alone specifies that the element should be chunked as a singleton.
|
||||||
|
* Other symbols which begins an underscore are reserved.
|
||||||
|
*
|
||||||
|
* nil and :_separator can be used to ignore some elements.
|
||||||
|
* For example, the sequence of hyphens in svn log can be eliminated as follows.
|
||||||
|
*
|
||||||
|
* sep = "-"*72 + "\n"
|
||||||
|
* IO.popen("svn log README") {|f|
|
||||||
|
* f.chunk {|line|
|
||||||
|
* line != sep || nil
|
||||||
|
* }.each {|_, lines|
|
||||||
|
* pp lines
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* #=> ["r20018 | knu | 2008-10-29 13:20:42 +0900 (Wed, 29 Oct 2008) | 2 lines\n",
|
||||||
|
* # "\n",
|
||||||
|
* # "* README, README.ja: Update the portability section.\n",
|
||||||
|
* # "\n"]
|
||||||
|
* # ["r16725 | knu | 2008-05-31 23:34:23 +0900 (Sat, 31 May 2008) | 2 lines\n",
|
||||||
|
* # "\n",
|
||||||
|
* # "* README, README.ja: Add a note about default C flags.\n",
|
||||||
|
* # "\n"]
|
||||||
|
* # ...
|
||||||
|
*
|
||||||
|
* :_alone can be used to pass through bunch of elements.
|
||||||
|
* For example, sort consective lines formed as Foo#bar and
|
||||||
|
* pass other lines, chunk can be used as follows.
|
||||||
|
*
|
||||||
|
* pat = /\A[A-Z][A-Za-z0-9_]+\#/
|
||||||
|
* open(filename) {|f|
|
||||||
|
* f.chunk {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
|
||||||
|
* if key != :_alone
|
||||||
|
* print lines.sort.join('')
|
||||||
|
* else
|
||||||
|
* print lines.join('')
|
||||||
|
* end
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* If the block needs to maintain state over multiple elements,
|
||||||
|
* _initial_state_ argument can be used.
|
||||||
|
* If non-nil value is given,
|
||||||
|
* it is duplicated for each "each" method invocation of the enumerator.
|
||||||
|
* The duplicated object is passed to 2nd argument of the block for "chunk" method..
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
enum_chunk(int argc, VALUE *argv, VALUE enumerable)
|
||||||
|
{
|
||||||
|
VALUE initial_state;
|
||||||
|
VALUE enumerator;
|
||||||
|
|
||||||
|
rb_scan_args(argc, argv, "01", &initial_state);
|
||||||
|
|
||||||
|
enumerator = rb_obj_alloc(rb_cEnumerator);
|
||||||
|
rb_ivar_set(enumerator, rb_intern("chunk_enumerable"), enumerable);
|
||||||
|
rb_ivar_set(enumerator, rb_intern("chunk_categorize"), rb_block_proc());
|
||||||
|
rb_ivar_set(enumerator, rb_intern("chunk_initial_state"), initial_state);
|
||||||
|
rb_block_call(enumerator, rb_intern("initialize"), 0, 0, chunk_i, enumerator);
|
||||||
|
return enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct slicebefore_arg {
|
||||||
|
VALUE separator_p;
|
||||||
|
VALUE state;
|
||||||
|
VALUE prev_elts;
|
||||||
|
VALUE yielder;
|
||||||
|
};
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
slicebefore_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
|
||||||
|
{
|
||||||
|
struct slicebefore_arg *argp = (struct slicebefore_arg *)_argp;
|
||||||
|
VALUE header_p;
|
||||||
|
|
||||||
|
ENUM_WANT_SVALUE();
|
||||||
|
|
||||||
|
if (NIL_P(argp->state))
|
||||||
|
header_p = rb_funcall(argp->separator_p, rb_intern("call"), 1, i);
|
||||||
|
else
|
||||||
|
header_p = rb_funcall(argp->separator_p, rb_intern("call"), 2, i, argp->state);
|
||||||
|
if (RTEST(header_p)) {
|
||||||
|
if (!NIL_P(argp->prev_elts))
|
||||||
|
rb_funcall(argp->yielder, rb_intern("<<"), 1, argp->prev_elts);
|
||||||
|
argp->prev_elts = rb_ary_new3(1, i);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (NIL_P(argp->prev_elts))
|
||||||
|
argp->prev_elts = rb_ary_new3(1, i);
|
||||||
|
else
|
||||||
|
rb_ary_push(argp->prev_elts, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
slicebefore_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
|
||||||
|
{
|
||||||
|
VALUE enumerable;
|
||||||
|
struct slicebefore_arg arg;
|
||||||
|
|
||||||
|
enumerable = rb_ivar_get(enumerator, rb_intern("slicebefore_enumerable"));
|
||||||
|
arg.separator_p = rb_ivar_get(enumerator, rb_intern("slicebefore_separator_p"));
|
||||||
|
arg.state = rb_ivar_get(enumerator, rb_intern("slicebefore_initial_state"));
|
||||||
|
arg.prev_elts = Qnil;
|
||||||
|
arg.yielder = yielder;
|
||||||
|
|
||||||
|
if (!NIL_P(arg.state))
|
||||||
|
arg.state = rb_obj_dup(arg.state);
|
||||||
|
|
||||||
|
rb_block_call(enumerable, id_each, 0, 0, slicebefore_ii, (VALUE)&arg);
|
||||||
|
if (!NIL_P(arg.prev_elts))
|
||||||
|
rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* enum.slice_before {|elt| ... } => enumerator
|
||||||
|
* enum.slice_before(initial_state) {|elt, state| ... } => enumerator
|
||||||
|
*
|
||||||
|
* Creates an enumerator for each chunked elements.
|
||||||
|
* The chunked elements begins an element which the block returns true value.
|
||||||
|
*
|
||||||
|
* The result enumerator yields the chunked elements as an array.
|
||||||
|
* So "each" method can be called as follows.
|
||||||
|
*
|
||||||
|
* enum.slice_before {|elt| bool }.each {|ary| ... }
|
||||||
|
*
|
||||||
|
* For example, iteration over ChangeLog entries can be implemented as follows.
|
||||||
|
*
|
||||||
|
* # iterate over ChangeLog entries.
|
||||||
|
* open("ChangeLog") {|f|
|
||||||
|
* f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* If the block needs to maintain state over multiple elements,
|
||||||
|
* _initial_state_ argument can be used.
|
||||||
|
* If non-nil value is given,
|
||||||
|
* it is duplicated for each "each" method invocation of the enumerator.
|
||||||
|
* The duplicated object is passed to 2nd argument of the block for "slice_before" method..
|
||||||
|
*
|
||||||
|
* For example, monotonically increasing elements can be chunked as follows.
|
||||||
|
*
|
||||||
|
* a = [2, 5, 2, 1, 4, 3, 1, 2, 8, 0]
|
||||||
|
* enum = a.slice_before(n: 0) {|elt, h|
|
||||||
|
* prev = h[:n]
|
||||||
|
* h[:n] = elt
|
||||||
|
* prev > elt
|
||||||
|
* }
|
||||||
|
* enum.each {|ary| p ary }
|
||||||
|
* #=> [2, 5]
|
||||||
|
* # [2]
|
||||||
|
* # [1, 4]
|
||||||
|
* # [3]
|
||||||
|
* # [1, 2, 8]
|
||||||
|
* # [0]
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* # parse mbox
|
||||||
|
* open("mbox") {|f|
|
||||||
|
* f.slice_before {|line|
|
||||||
|
* line.start_with? "From "
|
||||||
|
* }.each {|mail|
|
||||||
|
* unix_from = mail.shift
|
||||||
|
* i = mail.index("\n")
|
||||||
|
* header = mail[0...i]
|
||||||
|
* body = mail[(i+1)..-1]
|
||||||
|
* fields = header.slice_before {|line| !" \t".include?(line[0]) }.to_a
|
||||||
|
* p unix_from
|
||||||
|
* pp fields
|
||||||
|
* pp body
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* # split mails in mbox (slice before Unix From line after an empty line)
|
||||||
|
* open("mbox") {|f|
|
||||||
|
* f.slice_before(emp: true) {|line,h|
|
||||||
|
* prevemp = h[:emp]
|
||||||
|
* h[:emp] = line == "\n"
|
||||||
|
* prevemp && line.start_with?("From ")
|
||||||
|
* }.each {|mail|
|
||||||
|
* pp mail
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
|
||||||
|
{
|
||||||
|
VALUE initial_state, enumerator;
|
||||||
|
|
||||||
|
rb_scan_args(argc, argv, "01", &initial_state);
|
||||||
|
|
||||||
|
enumerator = rb_obj_alloc(rb_cEnumerator);
|
||||||
|
rb_ivar_set(enumerator, rb_intern("slicebefore_enumerable"), enumerable);
|
||||||
|
rb_ivar_set(enumerator, rb_intern("slicebefore_separator_p"), rb_block_proc());
|
||||||
|
rb_ivar_set(enumerator, rb_intern("slicebefore_initial_state"), initial_state);
|
||||||
|
rb_block_call(enumerator, rb_intern("initialize"), 0, 0, slicebefore_i, enumerator);
|
||||||
|
return enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* enum.join(sep=$,) -> str
|
* enum.join(sep=$,) -> str
|
||||||
|
@ -1881,6 +2204,8 @@ Init_Enumerable(void)
|
||||||
rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0);
|
rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0);
|
||||||
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
|
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
|
||||||
rb_define_method(rb_mEnumerable, "join", enum_join, -1);
|
rb_define_method(rb_mEnumerable, "join", enum_join, -1);
|
||||||
|
rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
|
||||||
|
rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
|
||||||
|
|
||||||
id_eqq = rb_intern("===");
|
id_eqq = rb_intern("===");
|
||||||
id_each = rb_intern("each");
|
id_each = rb_intern("each");
|
||||||
|
|
|
@ -315,4 +315,74 @@ class TestEnumerable < Test::Unit::TestCase
|
||||||
ensure
|
ensure
|
||||||
$, = ofs
|
$, = ofs
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_chunk
|
||||||
|
e = [].chunk {|elt| true }
|
||||||
|
assert_equal([], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.chunk {|elt| elt & 2 == 0 ? false : true }
|
||||||
|
assert_equal([[false, [1]], [true, [2, 3]], [false, [1]], [true, [2]]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.chunk(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
|
||||||
|
assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a)
|
||||||
|
assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a) # this tests h is duplicated.
|
||||||
|
|
||||||
|
hs = [{}]
|
||||||
|
e = [:foo].chunk(hs[0]) {|elt, h|
|
||||||
|
hs << h
|
||||||
|
true
|
||||||
|
}
|
||||||
|
assert_equal([[true, [:foo]]], e.to_a)
|
||||||
|
assert_equal([[true, [:foo]]], e.to_a)
|
||||||
|
assert_equal([{}, {}, {}], hs)
|
||||||
|
assert_not_same(hs[0], hs[1])
|
||||||
|
assert_not_same(hs[0], hs[2])
|
||||||
|
assert_not_same(hs[1], hs[2])
|
||||||
|
|
||||||
|
e = @obj.chunk {|elt| elt < 3 ? :_alone : true }
|
||||||
|
assert_equal([[:_alone, [1]],
|
||||||
|
[:_alone, [2]],
|
||||||
|
[true, [3]],
|
||||||
|
[:_alone, [1]],
|
||||||
|
[:_alone, [2]]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.chunk {|elt| elt == 3 ? :_separator : true }
|
||||||
|
assert_equal([[true, [1, 2]],
|
||||||
|
[true, [1, 2]]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.chunk {|elt| elt == 3 ? nil : true }
|
||||||
|
assert_equal([[true, [1, 2]],
|
||||||
|
[true, [1, 2]]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.chunk {|elt| :_foo }
|
||||||
|
assert_raise(RuntimeError) { e.to_a }
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_slice_before
|
||||||
|
e = [].slice_before {|elt| true }
|
||||||
|
assert_equal([], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.slice_before {|elt| elt.even? }
|
||||||
|
assert_equal([[1], [2,3,1], [2]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.slice_before {|elt| elt.odd? }
|
||||||
|
assert_equal([[1,2], [3], [1,2]], e.to_a)
|
||||||
|
|
||||||
|
e = @obj.slice_before(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
|
||||||
|
assert_equal([[1,2], [3,1,2]], e.to_a)
|
||||||
|
assert_equal([[1,2], [3,1,2]], e.to_a) # this tests h is duplicated.
|
||||||
|
|
||||||
|
hs = [{}]
|
||||||
|
e = [:foo].slice_before(hs[0]) {|elt, h|
|
||||||
|
hs << h
|
||||||
|
true
|
||||||
|
}
|
||||||
|
assert_equal([[:foo]], e.to_a)
|
||||||
|
assert_equal([[:foo]], e.to_a)
|
||||||
|
assert_equal([{}, {}, {}], hs)
|
||||||
|
assert_not_same(hs[0], hs[1])
|
||||||
|
assert_not_same(hs[0], hs[2])
|
||||||
|
assert_not_same(hs[1], hs[2])
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue