mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* enum.c (enum_chunk): new method Enumerable#chunk.
* enum.c (enum_slice_before): new method Enumerable#slice_before. [ruby-dev:38392] [ruby-dev:39240] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25032 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
089beb67bd
commit
475074d5da
3 changed files with 401 additions and 0 deletions
|
@ -1,3 +1,9 @@
|
|||
Tue Sep 22 10:29:06 2009 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* enum.c (enum_chunk): new method Enumerable#chunk.
|
||||
* enum.c (enum_slice_before): new method Enumerable#slice_before.
|
||||
[ruby-dev:38392] [ruby-dev:39240]
|
||||
|
||||
Tue Sep 22 05:58:25 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* compile.c, cont.c, gc.c, insns.def, iseq.c, iseq.h, process.c,
|
||||
|
|
325
enum.c
325
enum.c
|
@ -1802,6 +1802,329 @@ enum_cycle(int argc, VALUE *argv, VALUE obj)
|
|||
return Qnil; /* not reached */
|
||||
}
|
||||
|
||||
struct chunk_arg {
|
||||
VALUE categorize;
|
||||
VALUE state;
|
||||
VALUE prev_value;
|
||||
VALUE prev_elts;
|
||||
VALUE yielder;
|
||||
};
|
||||
|
||||
static VALUE
|
||||
chunk_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
|
||||
{
|
||||
struct chunk_arg *argp = (struct chunk_arg *)_argp;
|
||||
VALUE v;
|
||||
VALUE alone = ID2SYM(rb_intern("_alone"));
|
||||
VALUE separator = ID2SYM(rb_intern("_separator"));
|
||||
|
||||
ENUM_WANT_SVALUE();
|
||||
|
||||
if (NIL_P(argp->state))
|
||||
v = rb_funcall(argp->categorize, rb_intern("call"), 1, i);
|
||||
else
|
||||
v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, argp->state);
|
||||
|
||||
if (v == alone) {
|
||||
if (!NIL_P(argp->prev_value)) {
|
||||
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||
argp->prev_value = argp->prev_elts = Qnil;
|
||||
}
|
||||
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, rb_ary_new3(1, i)));
|
||||
}
|
||||
else if (NIL_P(v) || v == separator) {
|
||||
if (!NIL_P(argp->prev_value)) {
|
||||
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||
argp->prev_value = argp->prev_elts = Qnil;
|
||||
}
|
||||
}
|
||||
else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == '_') {
|
||||
rb_raise(rb_eRuntimeError, "symbol begins with an underscore is reserved");
|
||||
}
|
||||
else {
|
||||
if (NIL_P(argp->prev_value)) {
|
||||
argp->prev_value = v;
|
||||
argp->prev_elts = rb_ary_new3(1, i);
|
||||
}
|
||||
else {
|
||||
if (rb_equal(argp->prev_value, v)) {
|
||||
rb_ary_push(argp->prev_elts, i);
|
||||
}
|
||||
else {
|
||||
rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
|
||||
argp->prev_value = v;
|
||||
argp->prev_elts = rb_ary_new3(1, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
chunk_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
|
||||
{
|
||||
VALUE enumerable;
|
||||
struct chunk_arg arg;
|
||||
|
||||
enumerable = rb_ivar_get(enumerator, rb_intern("chunk_enumerable"));
|
||||
arg.categorize = rb_ivar_get(enumerator, rb_intern("chunk_categorize"));
|
||||
arg.state = rb_ivar_get(enumerator, rb_intern("chunk_initial_state"));
|
||||
arg.prev_value = Qnil;
|
||||
arg.prev_elts = Qnil;
|
||||
arg.yielder = yielder;
|
||||
|
||||
if (!NIL_P(arg.state))
|
||||
arg.state = rb_obj_dup(arg.state);
|
||||
|
||||
rb_block_call(enumerable, id_each, 0, 0, chunk_ii, (VALUE)&arg);
|
||||
if (!NIL_P(arg.prev_elts))
|
||||
rb_funcall(arg.yielder, rb_intern("<<"), 1, rb_assoc_new(arg.prev_value, arg.prev_elts));
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* enum.chunk {|elt| ... } => enumerator
|
||||
* enum.chunk(initial_state) {|elt, state| ... } => enumerator
|
||||
*
|
||||
* Creates an enumerator for each chunked elements.
|
||||
* The elements which have same block value are chunked.
|
||||
*
|
||||
* The result enumerator yields the block value and an array of chunked elements.
|
||||
* So "each" method can be called as follows.
|
||||
*
|
||||
* enum.chunk {|elt| key }.each {|key, ary| ... }
|
||||
*
|
||||
* For example, consecutive even numbers and odd numbers can be
|
||||
* splitted as follows.
|
||||
*
|
||||
* [5, 3, 3, 5, 2, 8, 0, 6, 0, 3].chunk {|n|
|
||||
* n.even?
|
||||
* }.each {|even, ary|
|
||||
* p [even, ary]
|
||||
* }
|
||||
* #=> [false, [5, 3, 3, 5]]
|
||||
* # [true, [2, 8, 0, 6, 0]]
|
||||
* # [false, [3]]
|
||||
*
|
||||
* This method is useful for sorted series of elements.
|
||||
* The following example counts words for each initial letter.
|
||||
*
|
||||
* open("/usr/share/dict/words", "r:iso-8859-1") {|f|
|
||||
* f.chunk {|line| line.ord }.each {|ch, lines| p [ch.chr, lines.length] }
|
||||
* }
|
||||
* #=> ["\n", 1]
|
||||
* # ["A", 1327]
|
||||
* # ["B", 1372]
|
||||
* # ["C", 1507]
|
||||
* # ["D", 791]
|
||||
* # ...
|
||||
*
|
||||
* The following key values has special meaning:
|
||||
* - nil and :_separator specifies that the elements are dropped.
|
||||
* - :_alone specifies that the element should be chunked as a singleton.
|
||||
* Other symbols which begins an underscore are reserved.
|
||||
*
|
||||
* nil and :_separator can be used to ignore some elements.
|
||||
* For example, the sequence of hyphens in svn log can be eliminated as follows.
|
||||
*
|
||||
* sep = "-"*72 + "\n"
|
||||
* IO.popen("svn log README") {|f|
|
||||
* f.chunk {|line|
|
||||
* line != sep || nil
|
||||
* }.each {|_, lines|
|
||||
* pp lines
|
||||
* }
|
||||
* }
|
||||
* #=> ["r20018 | knu | 2008-10-29 13:20:42 +0900 (Wed, 29 Oct 2008) | 2 lines\n",
|
||||
* # "\n",
|
||||
* # "* README, README.ja: Update the portability section.\n",
|
||||
* # "\n"]
|
||||
* # ["r16725 | knu | 2008-05-31 23:34:23 +0900 (Sat, 31 May 2008) | 2 lines\n",
|
||||
* # "\n",
|
||||
* # "* README, README.ja: Add a note about default C flags.\n",
|
||||
* # "\n"]
|
||||
* # ...
|
||||
*
|
||||
* :_alone can be used to pass through bunch of elements.
|
||||
* For example, sort consective lines formed as Foo#bar and
|
||||
* pass other lines, chunk can be used as follows.
|
||||
*
|
||||
* pat = /\A[A-Z][A-Za-z0-9_]+\#/
|
||||
* open(filename) {|f|
|
||||
* f.chunk {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
|
||||
* if key != :_alone
|
||||
* print lines.sort.join('')
|
||||
* else
|
||||
* print lines.join('')
|
||||
* end
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* If the block needs to maintain state over multiple elements,
|
||||
* _initial_state_ argument can be used.
|
||||
* If non-nil value is given,
|
||||
* it is duplicated for each "each" method invocation of the enumerator.
|
||||
* The duplicated object is passed to 2nd argument of the block for "chunk" method..
|
||||
*
|
||||
*/
|
||||
static VALUE
|
||||
enum_chunk(int argc, VALUE *argv, VALUE enumerable)
|
||||
{
|
||||
VALUE initial_state;
|
||||
VALUE enumerator;
|
||||
|
||||
rb_scan_args(argc, argv, "01", &initial_state);
|
||||
|
||||
enumerator = rb_obj_alloc(rb_cEnumerator);
|
||||
rb_ivar_set(enumerator, rb_intern("chunk_enumerable"), enumerable);
|
||||
rb_ivar_set(enumerator, rb_intern("chunk_categorize"), rb_block_proc());
|
||||
rb_ivar_set(enumerator, rb_intern("chunk_initial_state"), initial_state);
|
||||
rb_block_call(enumerator, rb_intern("initialize"), 0, 0, chunk_i, enumerator);
|
||||
return enumerator;
|
||||
}
|
||||
|
||||
|
||||
struct slicebefore_arg {
|
||||
VALUE separator_p;
|
||||
VALUE state;
|
||||
VALUE prev_elts;
|
||||
VALUE yielder;
|
||||
};
|
||||
|
||||
static VALUE
|
||||
slicebefore_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
|
||||
{
|
||||
struct slicebefore_arg *argp = (struct slicebefore_arg *)_argp;
|
||||
VALUE header_p;
|
||||
|
||||
ENUM_WANT_SVALUE();
|
||||
|
||||
if (NIL_P(argp->state))
|
||||
header_p = rb_funcall(argp->separator_p, rb_intern("call"), 1, i);
|
||||
else
|
||||
header_p = rb_funcall(argp->separator_p, rb_intern("call"), 2, i, argp->state);
|
||||
if (RTEST(header_p)) {
|
||||
if (!NIL_P(argp->prev_elts))
|
||||
rb_funcall(argp->yielder, rb_intern("<<"), 1, argp->prev_elts);
|
||||
argp->prev_elts = rb_ary_new3(1, i);
|
||||
}
|
||||
else {
|
||||
if (NIL_P(argp->prev_elts))
|
||||
argp->prev_elts = rb_ary_new3(1, i);
|
||||
else
|
||||
rb_ary_push(argp->prev_elts, i);
|
||||
}
|
||||
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
slicebefore_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
|
||||
{
|
||||
VALUE enumerable;
|
||||
struct slicebefore_arg arg;
|
||||
|
||||
enumerable = rb_ivar_get(enumerator, rb_intern("slicebefore_enumerable"));
|
||||
arg.separator_p = rb_ivar_get(enumerator, rb_intern("slicebefore_separator_p"));
|
||||
arg.state = rb_ivar_get(enumerator, rb_intern("slicebefore_initial_state"));
|
||||
arg.prev_elts = Qnil;
|
||||
arg.yielder = yielder;
|
||||
|
||||
if (!NIL_P(arg.state))
|
||||
arg.state = rb_obj_dup(arg.state);
|
||||
|
||||
rb_block_call(enumerable, id_each, 0, 0, slicebefore_ii, (VALUE)&arg);
|
||||
if (!NIL_P(arg.prev_elts))
|
||||
rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* enum.slice_before {|elt| ... } => enumerator
|
||||
* enum.slice_before(initial_state) {|elt, state| ... } => enumerator
|
||||
*
|
||||
* Creates an enumerator for each chunked elements.
|
||||
* The chunked elements begins an element which the block returns true value.
|
||||
*
|
||||
* The result enumerator yields the chunked elements as an array.
|
||||
* So "each" method can be called as follows.
|
||||
*
|
||||
* enum.slice_before {|elt| bool }.each {|ary| ... }
|
||||
*
|
||||
* For example, iteration over ChangeLog entries can be implemented as follows.
|
||||
*
|
||||
* # iterate over ChangeLog entries.
|
||||
* open("ChangeLog") {|f|
|
||||
* f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
|
||||
* }
|
||||
*
|
||||
* If the block needs to maintain state over multiple elements,
|
||||
* _initial_state_ argument can be used.
|
||||
* If non-nil value is given,
|
||||
* it is duplicated for each "each" method invocation of the enumerator.
|
||||
* The duplicated object is passed to 2nd argument of the block for "slice_before" method..
|
||||
*
|
||||
* For example, monotonically increasing elements can be chunked as follows.
|
||||
*
|
||||
* a = [2, 5, 2, 1, 4, 3, 1, 2, 8, 0]
|
||||
* enum = a.slice_before(n: 0) {|elt, h|
|
||||
* prev = h[:n]
|
||||
* h[:n] = elt
|
||||
* prev > elt
|
||||
* }
|
||||
* enum.each {|ary| p ary }
|
||||
* #=> [2, 5]
|
||||
* # [2]
|
||||
* # [1, 4]
|
||||
* # [3]
|
||||
* # [1, 2, 8]
|
||||
* # [0]
|
||||
*
|
||||
*
|
||||
* # parse mbox
|
||||
* open("mbox") {|f|
|
||||
* f.slice_before {|line|
|
||||
* line.start_with? "From "
|
||||
* }.each {|mail|
|
||||
* unix_from = mail.shift
|
||||
* i = mail.index("\n")
|
||||
* header = mail[0...i]
|
||||
* body = mail[(i+1)..-1]
|
||||
* fields = header.slice_before {|line| !" \t".include?(line[0]) }.to_a
|
||||
* p unix_from
|
||||
* pp fields
|
||||
* pp body
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* # split mails in mbox (slice before Unix From line after an empty line)
|
||||
* open("mbox") {|f|
|
||||
* f.slice_before(emp: true) {|line,h|
|
||||
* prevemp = h[:emp]
|
||||
* h[:emp] = line == "\n"
|
||||
* prevemp && line.start_with?("From ")
|
||||
* }.each {|mail|
|
||||
* pp mail
|
||||
* }
|
||||
*
|
||||
*/
|
||||
static VALUE
|
||||
enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
|
||||
{
|
||||
VALUE initial_state, enumerator;
|
||||
|
||||
rb_scan_args(argc, argv, "01", &initial_state);
|
||||
|
||||
enumerator = rb_obj_alloc(rb_cEnumerator);
|
||||
rb_ivar_set(enumerator, rb_intern("slicebefore_enumerable"), enumerable);
|
||||
rb_ivar_set(enumerator, rb_intern("slicebefore_separator_p"), rb_block_proc());
|
||||
rb_ivar_set(enumerator, rb_intern("slicebefore_initial_state"), initial_state);
|
||||
rb_block_call(enumerator, rb_intern("initialize"), 0, 0, slicebefore_i, enumerator);
|
||||
return enumerator;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* enum.join(sep=$,) -> str
|
||||
|
@ -1881,6 +2204,8 @@ Init_Enumerable(void)
|
|||
rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0);
|
||||
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
|
||||
rb_define_method(rb_mEnumerable, "join", enum_join, -1);
|
||||
rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
|
||||
rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
|
||||
|
||||
id_eqq = rb_intern("===");
|
||||
id_each = rb_intern("each");
|
||||
|
|
|
@ -315,4 +315,74 @@ class TestEnumerable < Test::Unit::TestCase
|
|||
ensure
|
||||
$, = ofs
|
||||
end
|
||||
|
||||
def test_chunk
|
||||
e = [].chunk {|elt| true }
|
||||
assert_equal([], e.to_a)
|
||||
|
||||
e = @obj.chunk {|elt| elt & 2 == 0 ? false : true }
|
||||
assert_equal([[false, [1]], [true, [2, 3]], [false, [1]], [true, [2]]], e.to_a)
|
||||
|
||||
e = @obj.chunk(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
|
||||
assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a)
|
||||
assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a) # this tests h is duplicated.
|
||||
|
||||
hs = [{}]
|
||||
e = [:foo].chunk(hs[0]) {|elt, h|
|
||||
hs << h
|
||||
true
|
||||
}
|
||||
assert_equal([[true, [:foo]]], e.to_a)
|
||||
assert_equal([[true, [:foo]]], e.to_a)
|
||||
assert_equal([{}, {}, {}], hs)
|
||||
assert_not_same(hs[0], hs[1])
|
||||
assert_not_same(hs[0], hs[2])
|
||||
assert_not_same(hs[1], hs[2])
|
||||
|
||||
e = @obj.chunk {|elt| elt < 3 ? :_alone : true }
|
||||
assert_equal([[:_alone, [1]],
|
||||
[:_alone, [2]],
|
||||
[true, [3]],
|
||||
[:_alone, [1]],
|
||||
[:_alone, [2]]], e.to_a)
|
||||
|
||||
e = @obj.chunk {|elt| elt == 3 ? :_separator : true }
|
||||
assert_equal([[true, [1, 2]],
|
||||
[true, [1, 2]]], e.to_a)
|
||||
|
||||
e = @obj.chunk {|elt| elt == 3 ? nil : true }
|
||||
assert_equal([[true, [1, 2]],
|
||||
[true, [1, 2]]], e.to_a)
|
||||
|
||||
e = @obj.chunk {|elt| :_foo }
|
||||
assert_raise(RuntimeError) { e.to_a }
|
||||
end
|
||||
|
||||
def test_slice_before
|
||||
e = [].slice_before {|elt| true }
|
||||
assert_equal([], e.to_a)
|
||||
|
||||
e = @obj.slice_before {|elt| elt.even? }
|
||||
assert_equal([[1], [2,3,1], [2]], e.to_a)
|
||||
|
||||
e = @obj.slice_before {|elt| elt.odd? }
|
||||
assert_equal([[1,2], [3], [1,2]], e.to_a)
|
||||
|
||||
e = @obj.slice_before(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }
|
||||
assert_equal([[1,2], [3,1,2]], e.to_a)
|
||||
assert_equal([[1,2], [3,1,2]], e.to_a) # this tests h is duplicated.
|
||||
|
||||
hs = [{}]
|
||||
e = [:foo].slice_before(hs[0]) {|elt, h|
|
||||
hs << h
|
||||
true
|
||||
}
|
||||
assert_equal([[:foo]], e.to_a)
|
||||
assert_equal([[:foo]], e.to_a)
|
||||
assert_equal([{}, {}, {}], hs)
|
||||
assert_not_same(hs[0], hs[1])
|
||||
assert_not_same(hs[0], hs[2])
|
||||
assert_not_same(hs[1], hs[2])
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue