1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

array.c: improve Array#sample

* array.c (rb_ary_sample): improve performance when many samples
  from a large array.  based on the patch by tomoya ishida
  <tomoyapenguin AT gmail.com> in [ruby-dev:49956].  [Bug #13136]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57380 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2017-01-20 02:39:27 +00:00
parent 8f76ad3795
commit 2de1dbdffc

38
array.c
View file

@ -4797,6 +4797,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
VALUE opts, randgen = rb_cRandom; VALUE opts, randgen = rb_cRandom;
long n, len, i, j, k, idx[10]; long n, len, i, j, k, idx[10];
long rnds[numberof(idx)]; long rnds[numberof(idx)];
long memo_threshold;
if (OPTHASH_GIVEN_P(opts)) { if (OPTHASH_GIVEN_P(opts)) {
VALUE rnd; VALUE rnd;
@ -4856,6 +4857,11 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
} }
return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k)); return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k));
} }
memo_threshold =
len < 2560 ? len / 128 :
len < 5120 ? len / 64 :
len < 10240 ? len / 32 :
len / 16;
if (n <= numberof(idx)) { if (n <= numberof(idx)) {
long sorted[numberof(idx)]; long sorted[numberof(idx)];
sorted[0] = idx[0] = rnds[0]; sorted[0] = idx[0] = rnds[0];
@ -4875,6 +4881,38 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
} }
}); });
} }
else if (n <= memo_threshold / 2) {
long max_idx = 0;
#undef RUBY_UNTYPED_DATA_WARNING
#define RUBY_UNTYPED_DATA_WARNING 0
VALUE vmemo = Data_Wrap_Struct(0, 0, 0, st_free_table);
st_table *memo = st_init_numtable_with_size(n);
DATA_PTR(vmemo) = memo;
result = rb_ary_new_capa(n);
RARRAY_PTR_USE(result, ptr_result, {
for (i=0; i<n; i++) {
long r = RAND_UPTO(len-i) + i;
ptr_result[i] = r;
if (r > max_idx) max_idx = r;
}
len = RARRAY_LEN(ary);
if (len <= max_idx) n = 0;
else if (n > len) n = len;
RARRAY_PTR_USE(ary, ptr_ary, {
for (i=0; i<n; i++) {
long j2 = j = ptr_result[i];
long i2 = i;
st_data_t value;
if (st_lookup(memo, (st_data_t)i, &value)) i2 = (long)value;
if (st_lookup(memo, (st_data_t)j, &value)) j2 = (long)value;
st_insert(memo, (st_data_t)j, (st_data_t)i2);
ptr_result[i] = ptr_ary[j2];
}
});
});
DATA_PTR(vmemo) = 0;
st_free_table(memo);
}
else { else {
result = rb_ary_dup(ary); result = rb_ary_dup(ary);
RBASIC_CLEAR_CLASS(result); RBASIC_CLEAR_CLASS(result);