From d0015e4ac6b812ea1681b1f5fa86fbab52a58960 Mon Sep 17 00:00:00 2001 From: watson1978 Date: Wed, 31 May 2017 12:30:57 +0000 Subject: [PATCH] Improve performance of implicit type conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To convert the object implicitly, it has had two parts in convert_type() which are 1. lookink up the method's id 2. calling the method Seems that strncmp() and strcmp() in convert_type() are slightly heavy to look up the method's id for type conversion. This patch will add and use internal APIs (rb_convert_type_with_id, rb_check_convert_type_with_id) to call the method without looking up the method's id when convert the object. Array#flatten -> 19 % up Array#+ -> 3 % up [ruby-dev:50024] [Bug #13341] [Fix GH-1537] ### Before Array#flatten 104.119k (± 1.1%) i/s - 525.690k in 5.049517s Array#+ 1.993M (± 1.8%) i/s - 10.010M in 5.024258s ### After Array#flatten 124.005k (± 1.0%) i/s - 624.240k in 5.034477s Array#+ 2.058M (± 4.8%) i/s - 10.302M in 5.019328s ### Test Code require 'benchmark/ips' class Foo def to_ary [1,2,3] end end Benchmark.ips do |x| ary = [] 100.times { |i| ary << i } array = [ary] x.report "Array#flatten" do |i| i.times { array.flatten } end x.report "Array#+" do |i| obj = Foo.new i.times { array + obj } end end git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58978 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- array.c | 6 ++-- compile.c | 18 ++++++------ file.c | 5 ++-- hash.c | 6 ++-- internal.h | 2 ++ io.c | 6 ++-- iseq.c | 8 +++--- object.c | 75 +++++++++++++++++++++++++++++++++++-------------- process.c | 4 +-- rational.c | 3 +- string.c | 4 +-- thread.c | 2 +- vm.c | 2 +- vm_args.c | 2 +- vm_insnhelper.c | 6 ++-- vm_trace.c | 2 +- 16 files changed, 94 insertions(+), 57 deletions(-) diff --git a/array.c b/array.c index 9ea4c05c52..2e57572284 100644 --- a/array.c +++ b/array.c @@ -642,13 +642,13 @@ rb_assoc_new(VALUE car, VALUE cdr) static VALUE to_ary(VALUE ary) { - return rb_convert_type(ary, T_ARRAY, "Array", "to_ary"); + return rb_convert_type_with_id(ary, T_ARRAY, "Array", idTo_ary); } VALUE rb_check_array_type(VALUE ary) { - return rb_check_convert_type(ary, T_ARRAY, "Array", "to_ary"); + return rb_check_convert_type_with_id(ary, T_ARRAY, "Array", idTo_ary); } /* @@ -2012,7 +2012,7 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first) val = tmp; goto str_join; } - tmp = rb_check_convert_type(val, T_ARRAY, "Array", "to_ary"); + tmp = rb_check_convert_type_with_id(val, T_ARRAY, "Array", idTo_ary); if (!NIL_P(tmp)) { obj = val; val = tmp; diff --git a/compile.c b/compile.c index a2a80c919a..a448b2120b 100644 --- a/compile.c +++ b/compile.c @@ -6598,7 +6598,7 @@ register_label(rb_iseq_t *iseq, struct st_table *labels_table, VALUE obj) { LABEL *label = 0; st_data_t tmp; - obj = rb_convert_type(obj, T_SYMBOL, "Symbol", "to_sym"); + obj = rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym); if (st_lookup(labels_table, obj, &tmp) == 0) { label = NEW_LABEL(0); @@ -6651,8 +6651,8 @@ iseq_build_from_ary_exception(rb_iseq_t *iseq, struct st_table *labels_table, LABEL *lstart, *lend, *lcont; unsigned int sp; - v = rb_convert_type(RARRAY_AREF(exception, i), T_ARRAY, - "Array", "to_ary"); + v = rb_convert_type_with_id(RARRAY_AREF(exception, i), T_ARRAY, + "Array", idTo_ary); if (RARRAY_LEN(v) != 6) { rb_raise(rb_eSyntaxError, "wrong exception entry"); } @@ -6840,7 +6840,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor, } break; case TS_GENTRY: - op = rb_convert_type(op, T_SYMBOL, "Symbol", "to_sym"); + op = rb_convert_type_with_id(op, T_SYMBOL, "Symbol", idTo_sym); argv[j] = (VALUE)rb_global_entry(SYM2ID(op)); break; case TS_IC: @@ -6856,8 +6856,8 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor, argv[j] = Qfalse; break; case TS_ID: - argv[j] = rb_convert_type(op, T_SYMBOL, - "Symbol", "to_sym"); + argv[j] = rb_convert_type_with_id(op, T_SYMBOL, + "Symbol", idTo_sym); break; case TS_CDHASH: { @@ -6865,7 +6865,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor, VALUE map = rb_hash_new(); rb_hash_tbl_raw(map)->type = &cdhash_type; - op = rb_convert_type(op, T_ARRAY, "Array", "to_ary"); + op = rb_convert_type_with_id(op, T_ARRAY, "Array", idTo_ary); for (i=0; i #endif +#include "id.h" #include "internal.h" #include "ruby/io.h" #include "ruby/util.h" @@ -1014,7 +1015,7 @@ rb_stat(VALUE file, struct stat *st) { VALUE tmp; - tmp = rb_check_convert_type(file, T_FILE, "IO", "to_io"); + tmp = rb_check_convert_type_with_id(file, T_FILE, "IO", idTo_io); if (!NIL_P(tmp)) { rb_io_t *fptr; @@ -1033,7 +1034,7 @@ w32_io_info(VALUE *file, BY_HANDLE_FILE_INFORMATION *st) VALUE tmp; HANDLE f, ret = 0; - tmp = rb_check_convert_type(*file, T_FILE, "IO", "to_io"); + tmp = rb_check_convert_type_with_id(*file, T_FILE, "IO", idTo_io); if (!NIL_P(tmp)) { rb_io_t *fptr; diff --git a/hash.c b/hash.c index 61ca381ac7..3176bc7a24 100644 --- a/hash.c +++ b/hash.c @@ -711,13 +711,13 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) static VALUE to_hash(VALUE hash) { - return rb_convert_type(hash, T_HASH, "Hash", "to_hash"); + return rb_convert_type_with_id(hash, T_HASH, "Hash", idTo_hash); } VALUE rb_check_hash_type(VALUE hash) { - return rb_check_convert_type(hash, T_HASH, "Hash", "to_hash"); + return rb_check_convert_type_with_id(hash, T_HASH, "Hash", idTo_hash); } /* @@ -1027,7 +1027,7 @@ rb_hash_set_default_proc(VALUE hash, VALUE proc) SET_DEFAULT(hash, proc); return proc; } - b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc"); + b = rb_check_convert_type_with_id(proc, T_DATA, "Proc", idTo_proc); if (NIL_P(b) || !rb_obj_is_proc(b)) { rb_raise(rb_eTypeError, "wrong default_proc type %s (expected Proc)", diff --git a/internal.h b/internal.h index ad977cbbcc..62b2664ebb 100644 --- a/internal.h +++ b/internal.h @@ -1428,6 +1428,8 @@ double rb_num_to_dbl(VALUE val); VALUE rb_obj_dig(int argc, VALUE *argv, VALUE self, VALUE notfound); VALUE rb_immutable_obj_clone(int, VALUE *, VALUE); VALUE rb_obj_not_equal(VALUE obj1, VALUE obj2); +VALUE rb_convert_type_with_id(VALUE,int,const char*,ID); +VALUE rb_check_convert_type_with_id(VALUE,int,const char*,ID); struct RBasicRaw { VALUE flags; diff --git a/io.c b/io.c index 19c588497a..3482602326 100644 --- a/io.c +++ b/io.c @@ -655,13 +655,13 @@ rb_io_get_fptr(VALUE io) VALUE rb_io_get_io(VALUE io) { - return rb_convert_type(io, T_FILE, "IO", "to_io"); + return rb_convert_type_with_id(io, T_FILE, "IO", idTo_io); } VALUE rb_io_check_io(VALUE io) { - return rb_check_convert_type(io, T_FILE, "IO", "to_io"); + return rb_check_convert_type_with_id(io, T_FILE, "IO", idTo_io); } VALUE @@ -9938,7 +9938,7 @@ open_key_args(int argc, VALUE *argv, VALUE opt, struct foreach_arg *arg) VALUE args; long n; - v = rb_convert_type(v, T_ARRAY, "Array", "to_ary"); + v = rb_convert_type_with_id(v, T_ARRAY, "Array", idTo_ary); n = RARRAY_LEN(v) + 1; #if SIZEOF_LONG > SIZEOF_INT if (n > INT_MAX) { diff --git a/iseq.c b/iseq.c index 81edd37c47..bfdbeadfa9 100644 --- a/iseq.c +++ b/iseq.c @@ -503,10 +503,10 @@ rb_iseq_load_iseq(VALUE fname) return NULL; } -#define CHECK_ARRAY(v) rb_convert_type((v), T_ARRAY, "Array", "to_ary") -#define CHECK_HASH(v) rb_convert_type((v), T_HASH, "Hash", "to_hash") -#define CHECK_STRING(v) rb_convert_type((v), T_STRING, "String", "to_str") -#define CHECK_SYMBOL(v) rb_convert_type((v), T_SYMBOL, "Symbol", "to_sym") +#define CHECK_ARRAY(v) rb_convert_type_with_id((v), T_ARRAY, "Array", idTo_ary) +#define CHECK_HASH(v) rb_convert_type_with_id((v), T_HASH, "Hash", idTo_hash) +#define CHECK_STRING(v) rb_convert_type_with_id((v), T_STRING, "String", idTo_str) +#define CHECK_SYMBOL(v) rb_convert_type_with_id((v), T_SYMBOL, "Symbol", idTo_sym) static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;} static enum iseq_type diff --git a/object.c b/object.c index 5e74359145..afeb1dc3e1 100644 --- a/object.c +++ b/object.c @@ -2660,28 +2660,12 @@ static const struct conv_method_tbl { #define IMPLICIT_CONVERSIONS 7 static VALUE -convert_type(VALUE val, const char *tname, const char *method, int raise) +convert_type_with_id(VALUE val, const char *tname, ID method, int raise, int index) { - ID m = 0; - int i = numberof(conv_method_names); - VALUE r; - static const char prefix[] = "to_"; - - if (strncmp(prefix, method, sizeof(prefix)-1) == 0) { - const char *const meth = &method[sizeof(prefix)-1]; - for (i=0; i < numberof(conv_method_names); i++) { - if (conv_method_names[i].method[0] == meth[0] && - strcmp(conv_method_names[i].method, meth) == 0) { - m = conv_method_names[i].id; - break; - } - } - } - if (!m) m = rb_intern(method); - r = rb_check_funcall(val, m, 0, 0); + VALUE r = rb_check_funcall(val, method, 0, 0); if (r == Qundef) { if (raise) { - const char *msg = i < IMPLICIT_CONVERSIONS ? + const char *msg = index < IMPLICIT_CONVERSIONS ? "no implicit conversion of" : "can't convert"; const char *cname = NIL_P(val) ? "nil" : val == Qtrue ? "true" : @@ -2698,6 +2682,27 @@ convert_type(VALUE val, const char *tname, const char *method, int raise) return r; } +static VALUE +convert_type(VALUE val, const char *tname, const char *method, int raise) +{ + ID m = 0; + int i = numberof(conv_method_names); + static const char prefix[] = "to_"; + + if (strncmp(prefix, method, sizeof(prefix)-1) == 0) { + const char *const meth = &method[sizeof(prefix)-1]; + for (i=0; i < numberof(conv_method_names); i++) { + if (conv_method_names[i].method[0] == meth[0] && + strcmp(conv_method_names[i].method, meth) == 0) { + m = conv_method_names[i].id; + break; + } + } + } + if (!m) m = rb_intern(method); + return convert_type_with_id(val, tname, m, raise, i); +} + NORETURN(static void conversion_mismatch(VALUE, const char *, const char *, VALUE)); static void conversion_mismatch(VALUE val, const char *tname, const char *method, VALUE result) @@ -2721,6 +2726,19 @@ rb_convert_type(VALUE val, int type, const char *tname, const char *method) return v; } +VALUE +rb_convert_type_with_id(VALUE val, int type, const char *tname, ID method) +{ + VALUE v; + + if (TYPE(val) == type) return val; + v = convert_type_with_id(val, tname, method, TRUE, 0); + if (TYPE(v) != type) { + conversion_mismatch(val, tname, RSTRING_PTR(rb_id2str(method)), v); + } + return v; +} + VALUE rb_check_convert_type(VALUE val, int type, const char *tname, const char *method) { @@ -2736,6 +2754,21 @@ rb_check_convert_type(VALUE val, int type, const char *tname, const char *method return v; } +VALUE +rb_check_convert_type_with_id(VALUE val, int type, const char *tname, ID method) +{ + VALUE v; + + /* always convert T_DATA */ + if (TYPE(val) == type && type != T_DATA) return val; + v = convert_type_with_id(val, tname, method, FALSE, 0); + if (NIL_P(v)) return Qnil; + if (TYPE(v) != type) { + conversion_mismatch(val, tname, RSTRING_PTR(rb_id2str(method)), v); + } + return v; +} + static VALUE rb_to_integer(VALUE val, const char *method) @@ -3175,7 +3208,7 @@ rb_String(VALUE val) { VALUE tmp = rb_check_string_type(val); if (NIL_P(tmp)) - tmp = rb_convert_type(val, T_STRING, "String", "to_s"); + tmp = rb_convert_type_with_id(val, T_STRING, "String", idTo_s); return tmp; } @@ -3205,7 +3238,7 @@ rb_Array(VALUE val) VALUE tmp = rb_check_array_type(val); if (NIL_P(tmp)) { - tmp = rb_check_convert_type(val, T_ARRAY, "Array", "to_a"); + tmp = rb_check_convert_type_with_id(val, T_ARRAY, "Array", idTo_a); if (NIL_P(tmp)) { return rb_ary_new3(1, val); } diff --git a/process.c b/process.c index 67f51db847..324d112e0b 100644 --- a/process.c +++ b/process.c @@ -1486,7 +1486,7 @@ check_exec_redirect_fd(VALUE v, int iskey) else goto wrong; } - else if (!NIL_P(tmp = rb_check_convert_type(v, T_FILE, "IO", "to_io"))) { + else if (!NIL_P(tmp = rb_check_convert_type_with_id(v, T_FILE, "IO", idTo_io))) { rb_io_t *fptr; GetOpenFile(tmp, fptr); if (fptr->tied_io_for_writing) @@ -2397,7 +2397,7 @@ rb_execarg_parent_start1(VALUE execarg_obj) } else { envtbl = rb_const_get(rb_cObject, id_ENV); - envtbl = rb_convert_type(envtbl, T_HASH, "Hash", "to_hash"); + envtbl = rb_convert_type_with_id(envtbl, T_HASH, "Hash", idTo_hash); } hide_obj(envtbl); if (envopts != Qfalse) { diff --git a/rational.c b/rational.c index bd32399e79..ce95facda0 100644 --- a/rational.c +++ b/rational.c @@ -6,6 +6,7 @@ */ #include "internal.h" +#include "id.h" #include #include @@ -2610,7 +2611,7 @@ nurat_s_convert(int argc, VALUE *argv, VALUE klass) if (argc == 1) { if (!(k_numeric_p(a1) && k_integer_p(a1))) - return rb_convert_type(a1, T_RATIONAL, "Rational", "to_r"); + return rb_convert_type_with_id(a1, T_RATIONAL, "Rational", idTo_r); } else { if ((k_numeric_p(a1) && k_numeric_p(a2)) && diff --git a/string.c b/string.c index 89e08d2cc2..df3f8fd3cd 100644 --- a/string.c +++ b/string.c @@ -1341,7 +1341,7 @@ rb_str_memsize(VALUE str) VALUE rb_str_to_str(VALUE str) { - return rb_convert_type(str, T_STRING, "String", "to_str"); + return rb_convert_type_with_id(str, T_STRING, "String", idTo_str); } static inline void str_discard(VALUE str); @@ -2225,7 +2225,7 @@ rb_str_fill_terminator(VALUE str, const int newminlen) VALUE rb_check_string_type(VALUE str) { - str = rb_check_convert_type(str, T_STRING, "String", "to_str"); + str = rb_check_convert_type_with_id(str, T_STRING, "String", idTo_str); return str; } diff --git a/thread.c b/thread.c index 4d954c76de..da834974bd 100644 --- a/thread.c +++ b/thread.c @@ -1875,7 +1875,7 @@ rb_thread_s_handle_interrupt(VALUE self, VALUE mask_arg) } mask = 0; - mask_arg = rb_convert_type(mask_arg, T_HASH, "Hash", "to_hash"); + mask_arg = rb_convert_type_with_id(mask_arg, T_HASH, "Hash", idTo_hash); rb_hash_foreach(mask_arg, handle_interrupt_arg_check_i, (VALUE)&mask); if (!mask) { return rb_yield(Qnil); diff --git a/vm.c b/vm.c index 4810321d6f..d90cb2f472 100644 --- a/vm.c +++ b/vm.c @@ -2736,7 +2736,7 @@ core_hash_merge_kwd(int argc, VALUE *argv) rb_check_arity(argc, 1, 2); hash = argv[0]; kw = argv[argc-1]; - kw = rb_convert_type(kw, T_HASH, "Hash", "to_hash"); + kw = rb_convert_type_with_id(kw, T_HASH, "Hash", idTo_hash); if (argc < 2) hash = kw; rb_hash_foreach(kw, argc < 2 ? kwcheck_i : kwmerge_i, hash); return hash; diff --git a/vm_args.c b/vm_args.c index 3bf56788b1..ae27d27d8c 100644 --- a/vm_args.c +++ b/vm_args.c @@ -777,7 +777,7 @@ vm_to_proc(VALUE proc) { if (UNLIKELY(!rb_obj_is_proc(proc))) { VALUE b; - b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc"); + b = rb_check_convert_type_with_id(proc, T_DATA, "Proc", idTo_proc); if (NIL_P(b) || !rb_obj_is_proc(b)) { rb_raise(rb_eTypeError, diff --git a/vm_insnhelper.c b/vm_insnhelper.c index a6f2943ecd..bb483d2d2c 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -2991,8 +2991,8 @@ static VALUE vm_concat_array(VALUE ary1, VALUE ary2st) { const VALUE ary2 = ary2st; - VALUE tmp1 = rb_check_convert_type(ary1, T_ARRAY, "Array", "to_a"); - VALUE tmp2 = rb_check_convert_type(ary2, T_ARRAY, "Array", "to_a"); + VALUE tmp1 = rb_check_convert_type_with_id(ary1, T_ARRAY, "Array", idTo_a); + VALUE tmp2 = rb_check_convert_type_with_id(ary2, T_ARRAY, "Array", idTo_a); if (NIL_P(tmp1)) { tmp1 = rb_ary_new3(1, ary1); @@ -3011,7 +3011,7 @@ vm_concat_array(VALUE ary1, VALUE ary2st) static VALUE vm_splat_array(VALUE flag, VALUE ary) { - VALUE tmp = rb_check_convert_type(ary, T_ARRAY, "Array", "to_a"); + VALUE tmp = rb_check_convert_type_with_id(ary, T_ARRAY, "Array", idTo_a); if (NIL_P(tmp)) { return rb_ary_new3(1, ary); } diff --git a/vm_trace.c b/vm_trace.c index decb2c32e4..2f78284805 100644 --- a/vm_trace.c +++ b/vm_trace.c @@ -683,7 +683,7 @@ static rb_event_flag_t symbol2event_flag(VALUE v) { ID id; - VALUE sym = rb_convert_type(v, T_SYMBOL, "Symbol", "to_sym"); + VALUE sym = rb_convert_type_with_id(v, T_SYMBOL, "Symbol", idTo_sym); const rb_event_flag_t RUBY_EVENT_A_CALL = RUBY_EVENT_CALL | RUBY_EVENT_B_CALL | RUBY_EVENT_C_CALL; const rb_event_flag_t RUBY_EVENT_A_RETURN =