From d46ab953765e2114b6f2f58792be9a7a974e5f27 Mon Sep 17 00:00:00 2001 From: shyouhei Date: Wed, 26 Dec 2018 00:59:37 +0000 Subject: [PATCH] insns.def: refactor to avoid CALL_METHOD macro These send and its variant instructions are the most frequently called paths in the entire process. Reducing macro expansions to make them dedicated function called vm_sendish() is the main goal of this changeset. It reduces the size of vm_exec_coref from 25,552 bytes to 23,728 bytes on my machine. I see no significant slowdown. Fix: [GH-2056] vanilla: ruby 2.6.0dev (2018-12-19 trunk 66449) [x86_64-darwin15] ours: ruby 2.6.0dev (2018-12-19 refactor-send 66449) [x86_64-darwin15] last_commit=insns.def: refactor to avoid CALL_METHOD macro Calculating ------------------------------------- vanilla ours vm2_defined_method 2.645M 2.823M i/s - 6.000M times in 5.109888s 4.783254s vm2_method 8.553M 8.873M i/s - 6.000M times in 1.579892s 1.524026s vm2_method_missing 3.772M 3.858M i/s - 6.000M times in 3.579482s 3.499220s vm2_method_with_block 8.494M 8.944M i/s - 6.000M times in 1.589774s 1.509463s vm2_poly_method 0.571 0.607 i/s - 1.000 times in 3.947570s 3.733528s vm2_poly_method_ov 5.514 5.168 i/s - 1.000 times in 0.408156s 0.436169s vm3_clearmethodcache 2.875 2.837 i/s - 1.000 times in 0.783018s 0.793493s Comparison: vm2_defined_method ours: 2822555.4 i/s vanilla: 2644878.1 i/s - 1.07x slower vm2_method ours: 8872947.8 i/s vanilla: 8553433.1 i/s - 1.04x slower vm2_method_missing ours: 3858192.3 i/s vanilla: 3772296.3 i/s - 1.02x slower vm2_method_with_block ours: 8943825.1 i/s vanilla: 8493955.0 i/s - 1.05x slower vm2_poly_method ours: 0.6 i/s vanilla: 0.6 i/s - 1.06x slower vm2_poly_method_ov vanilla: 5.5 i/s ours: 5.2 i/s - 1.07x slower vm3_clearmethodcache vanilla: 2.9 i/s ours: 2.8 i/s - 1.01x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- insns.def | 52 ++++----- tool/ruby_vm/views/_mjit_compile_insn.erb | 2 +- .../ruby_vm/views/_mjit_compile_insn_body.erb | 6 + vm.c | 13 ++- vm_args.c | 2 +- vm_insnhelper.c | 104 +++++++++++++++++- vm_insnhelper.h | 36 ------ 7 files changed, 146 insertions(+), 69 deletions(-) diff --git a/insns.def b/insns.def index 0bbe0c8ec5..80d1e81423 100644 --- a/insns.def +++ b/insns.def @@ -740,12 +740,13 @@ send (VALUE val) // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci); { - struct rb_calling_info calling; + VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, false); + val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap); - calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, FALSE); - calling.recv = TOPN(calling.argc = ci->orig_argc); - vm_search_method(ci, cc, calling.recv); - CALL_METHOD(&calling, ci, cc); + if (val == Qundef) { + RESTORE_REGS(); + NEXT_INSN(); + } } /* Invoke method without block */ @@ -757,10 +758,13 @@ opt_send_without_block // attr bool handles_sp = true; // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci); { - struct rb_calling_info calling; - calling.block_handler = VM_BLOCK_HANDLER_NONE; - vm_search_method(ci, cc, calling.recv = TOPN(calling.argc = ci->orig_argc)); - CALL_METHOD(&calling, ci, cc); + VALUE bh = VM_BLOCK_HANDLER_NONE; + val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap); + + if (val == Qundef) { + RESTORE_REGS(); + NEXT_INSN(); + } } DEFINE_INSN @@ -826,12 +830,13 @@ invokesuper (VALUE val) // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci); { - struct rb_calling_info calling; + VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, true); + val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_super_method); - calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, TRUE); - calling.recv = TOPN(calling.argc = ci->orig_argc); - vm_search_super_method(ec, GET_CFP(), &calling, ci, cc); - CALL_METHOD(&calling, ci, cc); + if (val == Qundef) { + RESTORE_REGS(); + NEXT_INSN(); + } } /* yield(args) */ @@ -843,21 +848,16 @@ invokeblock // attr bool handles_sp = true; // attr rb_snum_t sp_inc = sp_inc_of_invokeblock(ci); { - struct rb_calling_info calling; - VALUE block_handler; + static struct rb_call_cache cc = { + 0, 0, NULL, vm_invokeblock_i, + }; - calling.argc = ci->orig_argc; - calling.block_handler = VM_BLOCK_HANDLER_NONE; - calling.recv = Qundef; /* should not be used */ + VALUE bh = VM_BLOCK_HANDLER_NONE; + val = vm_sendish(ec, GET_CFP(), ci, &cc, bh, vm_search_invokeblock); - block_handler = VM_CF_BLOCK_HANDLER(GET_CFP()); - if (block_handler == VM_BLOCK_HANDLER_NONE) { - rb_vm_localjump_error("no block given (yield)", Qnil, 0); - } - - val = vm_invoke_block(ec, GET_CFP(), &calling, ci, block_handler); if (val == Qundef) { - EXEC_EC_CFP(val); + RESTORE_REGS(); + NEXT_INSN(); } } diff --git a/tool/ruby_vm/views/_mjit_compile_insn.erb b/tool/ruby_vm/views/_mjit_compile_insn.erb index a52c928abf..9ba0ffc1c3 100644 --- a/tool/ruby_vm/views/_mjit_compile_insn.erb +++ b/tool/ruby_vm/views/_mjit_compile_insn.erb @@ -81,6 +81,6 @@ % end % % # compiler: If insn returns (leave) or does longjmp (throw), the branch should no longer be compiled. TODO: create attr for it? -% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\sRESTORE_REGS\(\);/ +% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\bvm_pop_frame\(/ b->finish_p = TRUE; % end diff --git a/tool/ruby_vm/views/_mjit_compile_insn_body.erb b/tool/ruby_vm/views/_mjit_compile_insn_body.erb index 3b14c272b1..675f06f80f 100644 --- a/tool/ruby_vm/views/_mjit_compile_insn_body.erb +++ b/tool/ruby_vm/views/_mjit_compile_insn_body.erb @@ -29,6 +29,12 @@ % #endif % RESTORE_REGS % end +% expr.gsub!(/^(?\s*)NEXT_INSN\(\);\n/) do +% indent = Regexp.last_match[:indent] +% <<-end.gsub(/^ +/, '') +% #{indent}UNREACHABLE_RETURN(Qundef); +% end +% end % end % end % diff --git a/vm.c b/vm.c index 56f815ef48..b54d67bf5a 100644 --- a/vm.c +++ b/vm.c @@ -28,6 +28,17 @@ VALUE rb_str_concat_literals(size_t, const VALUE*); +/* :FIXME: This #ifdef is because we build pch in case of mswin and + * not in case of other situations. That distinction might change in + * a future. We would better make it detectable in something better + * than just _MSC_VER. */ +#ifdef _MSC_VER +RUBY_FUNC_EXPORTED +#else +MJIT_FUNC_EXPORTED +#endif +VALUE vm_exec(rb_execution_context_t *, int); + PUREFUNC(static inline const VALUE *VM_EP_LEP(const VALUE *)); static inline const VALUE * VM_EP_LEP(const VALUE *ep) @@ -1870,7 +1881,7 @@ static inline VALUE vm_exec_handle_exception(rb_execution_context_t *ec, enum ruby_tag_type state, VALUE errinfo, VALUE *initial); -MJIT_FUNC_EXPORTED VALUE +VALUE vm_exec(rb_execution_context_t *ec, int mjit_enable_p) { enum ruby_tag_type state; diff --git a/vm_args.c b/vm_args.c index bea38078ec..e1269bbc5f 100644 --- a/vm_args.c +++ b/vm_args.c @@ -875,7 +875,7 @@ refine_sym_proc_call(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg)) static VALUE vm_caller_setup_arg_block(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, - const struct rb_call_info *ci, rb_iseq_t *blockiseq, const int is_super) + const struct rb_call_info *ci, const rb_iseq_t *blockiseq, const int is_super) { if (ci->flag & VM_CALL_ARGS_BLOCKARG) { VALUE block_code = *(--reg_cfp->sp); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 4f39a0f5a3..5b90aea019 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -2444,8 +2444,7 @@ vm_super_outside(void) } static void -vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, - struct rb_calling_info *calling, struct rb_call_info *ci, struct rb_call_cache *cc) +vm_search_super_method(const rb_control_frame_t *reg_cfp, struct rb_call_info *ci, struct rb_call_cache *cc, VALUE recv) { VALUE current_defined_class, klass; const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(reg_cfp); @@ -2463,14 +2462,14 @@ vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg if (BUILTIN_TYPE(current_defined_class) != T_MODULE && BUILTIN_TYPE(current_defined_class) != T_ICLASS && /* bound UnboundMethod */ !FL_TEST(current_defined_class, RMODULE_INCLUDED_INTO_REFINEMENT) && - !rb_obj_is_kind_of(calling->recv, current_defined_class)) { + !rb_obj_is_kind_of(recv, current_defined_class)) { VALUE m = RB_TYPE_P(current_defined_class, T_ICLASS) ? RBASIC(current_defined_class)->klass : current_defined_class; rb_raise(rb_eTypeError, "self has wrong type to call super in this context: " "%"PRIsVALUE" (expected %"PRIsVALUE")", - rb_obj_class(calling->recv), m); + rb_obj_class(recv), m); } if (me->def->type == VM_METHOD_TYPE_BMETHOD && (ci->flag & VM_CALL_ZSUPER)) { @@ -3187,6 +3186,103 @@ vm_find_or_create_class_by_id(ID id, } } +static void +vm_search_method_wrap( + const struct rb_control_frame_struct *reg_cfp, + struct rb_call_info *ci, + struct rb_call_cache *cc, + VALUE recv) +{ + vm_search_method(ci, cc, recv); +} + +static void +vm_search_invokeblock( + const struct rb_control_frame_struct *reg_cfp, + struct rb_call_info *ci, + struct rb_call_cache *cc, + VALUE recv) +{ + /* Does nothing. */ +} + +static VALUE +vm_invokeblock_i( + struct rb_execution_context_struct *ec, + struct rb_control_frame_struct *reg_cfp, + struct rb_calling_info *calling, + const struct rb_call_info *ci, + struct rb_call_cache *cc) +{ + VALUE block_handler = VM_CF_BLOCK_HANDLER(GET_CFP()); + + if (block_handler == VM_BLOCK_HANDLER_NONE) { + rb_vm_localjump_error("no block given (yield)", Qnil, 0); + } + else { + return vm_invoke_block(ec, GET_CFP(), calling, ci, block_handler); + } +} + +static VALUE +vm_sendish( + struct rb_execution_context_struct *ec, + struct rb_control_frame_struct *reg_cfp, + struct rb_call_info *ci, + struct rb_call_cache *cc, + VALUE block_handler, + void (*method_explorer)( + const struct rb_control_frame_struct *reg_cfp, + struct rb_call_info *ci, + struct rb_call_cache *cc, + VALUE recv)) +{ + VALUE val; + int argc = ci->orig_argc; + VALUE recv = TOPN(argc); + struct rb_calling_info calling; + + calling.block_handler = block_handler; + calling.recv = recv; + calling.argc = argc; + + method_explorer(GET_CFP(), ci, cc, recv); + + val = cc->call(ec, GET_CFP(), &calling, ci, cc); + + if (val != Qundef) { + return val; /* CFUNC normal return */ + } + else { + RESTORE_REGS(); /* CFP pushed in cc->call() */ + } + +#ifdef MJIT_HEADER + /* When calling ISeq which may catch an exception from JIT-ed + code, we should not call mjit_exec directly to prevent the + caller frame from being canceled. That's because the caller + frame may have stack values in the local variables and the + cancelling the caller frame will purge them. But directly + calling mjit_exec is faster... */ + if (GET_ISEQ()->body->catch_except_p) { + VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); + return vm_exec(ec, true); + } + else if ((val = mjit_exec(ec)) == Qundef) { + VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH); + return vm_exec(ec, false); + } + else { + return val; + } +#else + /* When calling from VM, longjmp in the callee won't purge any + JIT-ed caller frames. So it's safe to directly call + mjit_exec. */ + return mjit_exec(ec); +#endif +} + static VALUE vm_opt_str_freeze(VALUE str, int bop, ID id) { diff --git a/vm_insnhelper.h b/vm_insnhelper.h index e44d91ec34..02745de01a 100644 --- a/vm_insnhelper.h +++ b/vm_insnhelper.h @@ -137,42 +137,6 @@ enum vm_regan_acttype { /* deal with control flow 2: method/iterator */ /**********************************************************/ -#ifdef MJIT_HEADER -/* When calling ISeq which may catch an exception from JIT-ed code, we should not call - mjit_exec directly to prevent the caller frame from being canceled. That's because - the caller frame may have stack values in the local variables and the cancelling - the caller frame will purge them. But directly calling mjit_exec is faster... */ -#define EXEC_EC_CFP(val) do { \ - if (ec->cfp->iseq->body->catch_except_p) { \ - VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \ - val = vm_exec(ec, TRUE); \ - } \ - else if ((val = mjit_exec(ec)) == Qundef) { \ - VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \ - val = vm_exec(ec, FALSE); \ - } \ -} while (0) -#else -/* When calling from VM, longjmp in the callee won't purge any JIT-ed caller frames. - So it's safe to directly call mjit_exec. */ -#define EXEC_EC_CFP(val) do { \ - if ((val = mjit_exec(ec)) == Qundef) { \ - RESTORE_REGS(); \ - NEXT_INSN(); \ - } \ -} while (0) -#endif - -#define CALL_METHOD(calling, ci, cc) do { \ - VALUE v = (*(cc)->call)(ec, GET_CFP(), (calling), (ci), (cc)); \ - if (v == Qundef) { \ - EXEC_EC_CFP(val); \ - } \ - else { \ - val = v; \ - } \ -} while (0) - /* set fastpath when cached method is *NOT* protected * because inline method cache does not care about receiver. */