insns.def: refactor to avoid CALL_METHOD macro

These send and its variant instructions are the most frequently called paths in the entire process. Reducing macro expansions to make them dedicated function called vm_sendish() is the main goal of this changeset. It reduces the size of vm_exec_coref from 25,552 bytes to 23,728 bytes on my machine. I see no significant slowdown. Fix: [GH-2056] vanilla: ruby 2.6.0dev (2018-12-19 trunk 66449) [x86_64-darwin15] ours: ruby 2.6.0dev (2018-12-19 refactor-send 66449) [x86_64-darwin15] last_commit=insns.def: refactor to avoid CALL_METHOD macro Calculating ------------------------------------- vanilla ours vm2_defined_method 2.645M 2.823M i/s - 6.000M times in 5.109888s 4.783254s vm2_method 8.553M 8.873M i/s - 6.000M times in 1.579892s 1.524026s vm2_method_missing 3.772M 3.858M i/s - 6.000M times in 3.579482s 3.499220s vm2_method_with_block 8.494M 8.944M i/s - 6.000M times in 1.589774s 1.509463s vm2_poly_method 0.571 0.607 i/s - 1.000 times in 3.947570s 3.733528s vm2_poly_method_ov 5.514 5.168 i/s - 1.000 times in 0.408156s 0.436169s vm3_clearmethodcache 2.875 2.837 i/s - 1.000 times in 0.783018s 0.793493s Comparison: vm2_defined_method ours: 2822555.4 i/s vanilla: 2644878.1 i/s - 1.07x slower vm2_method ours: 8872947.8 i/s vanilla: 8553433.1 i/s - 1.04x slower vm2_method_missing ours: 3858192.3 i/s vanilla: 3772296.3 i/s - 1.02x slower vm2_method_with_block ours: 8943825.1 i/s vanilla: 8493955.0 i/s - 1.05x slower vm2_poly_method ours: 0.6 i/s vanilla: 0.6 i/s - 1.06x slower vm2_poly_method_ov vanilla: 5.5 i/s ours: 5.2 i/s - 1.07x slower vm3_clearmethodcache vanilla: 2.9 i/s ours: 2.8 i/s - 1.01x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2018-12-26 00:59:37 +00:00 · 2018-12-26 00:59:37 +00:00 · d46ab95376
commit d46ab95376
parent 686881d383
7 changed files with 146 additions and 69 deletions
--- a/insns.def
+++ b/insns.def
@ -740,12 +740,13 @@ send
 (VALUE val)
 // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
 {
-    struct rb_calling_info calling;
+    VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, false);
    val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap);
-    calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, FALSE);
+    if (val == Qundef) {
-    calling.recv = TOPN(calling.argc = ci->orig_argc);
+        RESTORE_REGS();
-    vm_search_method(ci, cc, calling.recv);
+        NEXT_INSN();
-    CALL_METHOD(&calling, ci, cc);
+    }
 }
 /* Invoke method without block */
@ -757,10 +758,13 @@ opt_send_without_block
 // attr bool handles_sp = true;
 // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
 {
-    struct rb_calling_info calling;
+    VALUE bh = VM_BLOCK_HANDLER_NONE;
-    calling.block_handler = VM_BLOCK_HANDLER_NONE;
+    val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap);
-    vm_search_method(ci, cc, calling.recv = TOPN(calling.argc = ci->orig_argc));
+
-    CALL_METHOD(&calling, ci, cc);
+    if (val == Qundef) {
        RESTORE_REGS();
        NEXT_INSN();
    }
 }
 DEFINE_INSN
@ -826,12 +830,13 @@ invokesuper
 (VALUE val)
 // attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
 {
-    struct rb_calling_info calling;
+    VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, true);
    val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_super_method);
-    calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, TRUE);
+    if (val == Qundef) {
-    calling.recv = TOPN(calling.argc = ci->orig_argc);
+        RESTORE_REGS();
-    vm_search_super_method(ec, GET_CFP(), &calling, ci, cc);
+        NEXT_INSN();
-    CALL_METHOD(&calling, ci, cc);
+    }
 }
 /* yield(args) */
@ -843,21 +848,16 @@ invokeblock
 // attr bool handles_sp = true;
 // attr rb_snum_t sp_inc = sp_inc_of_invokeblock(ci);
 {
-    struct rb_calling_info calling;
+    static struct rb_call_cache cc = {
-    VALUE block_handler;
+        0, 0, NULL, vm_invokeblock_i,
    };
-    calling.argc = ci->orig_argc;
+    VALUE bh = VM_BLOCK_HANDLER_NONE;
-    calling.block_handler = VM_BLOCK_HANDLER_NONE;
+    val = vm_sendish(ec, GET_CFP(), ci, &cc, bh, vm_search_invokeblock);
    calling.recv = Qundef; /* should not be used */
    block_handler = VM_CF_BLOCK_HANDLER(GET_CFP());
    if (block_handler == VM_BLOCK_HANDLER_NONE) {
 	rb_vm_localjump_error("no block given (yield)", Qnil, 0);
    }
    val = vm_invoke_block(ec, GET_CFP(), &calling, ci, block_handler);
    if (val == Qundef) {
-        EXEC_EC_CFP(val);
+        RESTORE_REGS();
        NEXT_INSN();
    }
 }
--- a/tool/ruby_vm/views/_mjit_compile_insn.erb
+++ b/tool/ruby_vm/views/_mjit_compile_insn.erb
@ -81,6 +81,6 @@
 % end
 %
 % # compiler: If insn returns (leave) or does longjmp (throw), the branch should no longer be compiled. TODO: create attr for it?
-% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\sRESTORE_REGS\(\);/
+% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\bvm_pop_frame\(/
    b->finish_p = TRUE;
 % end
--- a/tool/ruby_vm/views/_mjit_compile_insn_body.erb
+++ b/tool/ruby_vm/views/_mjit_compile_insn_body.erb
@ -29,6 +29,12 @@
 %         #endif
 %       RESTORE_REGS
 %     end
 %     expr.gsub!(/^(?<indent>\s*)NEXT_INSN\(\);\n/) do
 %       indent = Regexp.last_match[:indent]
 %       <<-end.gsub(/^ +/, '')
 %         #{indent}UNREACHABLE_RETURN(Qundef);
 %       end
 %     end
 %   end
 % end
 %
--- a/vm.c
+++ b/vm.c
@ -28,6 +28,17 @@
 VALUE rb_str_concat_literals(size_t, const VALUE*);
 /* :FIXME: This #ifdef is because we build pch in case of mswin and
 * not in case of other situations.  That distinction might change in
 * a future.  We would better make it detectable in something better
 * than just _MSC_VER. */
 #ifdef _MSC_VER
 RUBY_FUNC_EXPORTED
 #else
 MJIT_FUNC_EXPORTED
 #endif
 VALUE vm_exec(rb_execution_context_t *, int);
 PUREFUNC(static inline const VALUE *VM_EP_LEP(const VALUE *));
 static inline const VALUE *
 VM_EP_LEP(const VALUE *ep)
@ -1870,7 +1881,7 @@ static inline VALUE
 vm_exec_handle_exception(rb_execution_context_t *ec, enum ruby_tag_type state,
                         VALUE errinfo, VALUE *initial);
-MJIT_FUNC_EXPORTED VALUE
+VALUE
 vm_exec(rb_execution_context_t *ec, int mjit_enable_p)
 {
    enum ruby_tag_type state;
--- a/vm_args.c
+++ b/vm_args.c
@ -875,7 +875,7 @@ refine_sym_proc_call(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg))
 static VALUE
 vm_caller_setup_arg_block(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
-                          const struct rb_call_info *ci, rb_iseq_t *blockiseq, const int is_super)
+                          const struct rb_call_info *ci, const rb_iseq_t *blockiseq, const int is_super)
 {
    if (ci->flag & VM_CALL_ARGS_BLOCKARG) {
 	VALUE block_code = *(--reg_cfp->sp);
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@ -2444,8 +2444,7 @@ vm_super_outside(void)
 }
 static void
-vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
+vm_search_super_method(const rb_control_frame_t *reg_cfp, struct rb_call_info *ci, struct rb_call_cache *cc, VALUE recv)
 		       struct rb_calling_info *calling, struct rb_call_info *ci, struct rb_call_cache *cc)
 {
    VALUE current_defined_class, klass;
    const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(reg_cfp);
@ -2463,14 +2462,14 @@ vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg
    if (BUILTIN_TYPE(current_defined_class) != T_MODULE &&
 	BUILTIN_TYPE(current_defined_class) != T_ICLASS && /* bound UnboundMethod */
 	!FL_TEST(current_defined_class, RMODULE_INCLUDED_INTO_REFINEMENT) &&
-	!rb_obj_is_kind_of(calling->recv, current_defined_class)) {
+	!rb_obj_is_kind_of(recv, current_defined_class)) {
 	VALUE m = RB_TYPE_P(current_defined_class, T_ICLASS) ?
 	    RBASIC(current_defined_class)->klass : current_defined_class;
 	rb_raise(rb_eTypeError,
 		 "self has wrong type to call super in this context: "
 		 "%"PRIsVALUE" (expected %"PRIsVALUE")",
-		 rb_obj_class(calling->recv), m);
+		 rb_obj_class(recv), m);
    }
    if (me->def->type == VM_METHOD_TYPE_BMETHOD && (ci->flag & VM_CALL_ZSUPER)) {
@ -3187,6 +3186,103 @@ vm_find_or_create_class_by_id(ID id,
    }
 }
 static void
 vm_search_method_wrap(
    const struct rb_control_frame_struct *reg_cfp,
    struct rb_call_info *ci,
    struct rb_call_cache *cc,
    VALUE recv)
 {
    vm_search_method(ci, cc, recv);
 }
 static void
 vm_search_invokeblock(
    const struct rb_control_frame_struct *reg_cfp,
    struct rb_call_info *ci,
    struct rb_call_cache *cc,
    VALUE recv)
 {
    /* Does nothing. */
 }
 static VALUE
 vm_invokeblock_i(
    struct rb_execution_context_struct *ec,
    struct rb_control_frame_struct *reg_cfp,
    struct rb_calling_info *calling,
    const struct rb_call_info *ci,
    struct rb_call_cache *cc)
 {
    VALUE block_handler = VM_CF_BLOCK_HANDLER(GET_CFP());
    if (block_handler == VM_BLOCK_HANDLER_NONE) {
 	rb_vm_localjump_error("no block given (yield)", Qnil, 0);
    }
    else {
        return vm_invoke_block(ec, GET_CFP(), calling, ci, block_handler);
    }
 }
 static VALUE
 vm_sendish(
    struct rb_execution_context_struct *ec,
    struct rb_control_frame_struct *reg_cfp,
    struct rb_call_info *ci,
    struct rb_call_cache *cc,
    VALUE block_handler,
    void (*method_explorer)(
        const struct rb_control_frame_struct *reg_cfp,
        struct rb_call_info *ci,
        struct rb_call_cache *cc,
        VALUE recv))
 {
    VALUE val;
    int argc = ci->orig_argc;
    VALUE recv = TOPN(argc);
    struct rb_calling_info calling;
    calling.block_handler = block_handler;
    calling.recv = recv;
    calling.argc = argc;
    method_explorer(GET_CFP(), ci, cc, recv);
    val = cc->call(ec, GET_CFP(), &calling, ci, cc);
    if (val != Qundef) {
        return val;             /* CFUNC normal return */
    }
    else {
        RESTORE_REGS();         /* CFP pushed in cc->call() */
    }
 #ifdef MJIT_HEADER
    /* When calling ISeq which may catch an exception from JIT-ed
       code, we should not call mjit_exec directly to prevent the
       caller frame from being canceled. That's because the caller
       frame may have stack values in the local variables and the
       cancelling the caller frame will purge them. But directly
       calling mjit_exec is faster... */
    if (GET_ISEQ()->body->catch_except_p) {
        VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH);
        return vm_exec(ec, true);
    }
    else if ((val = mjit_exec(ec)) == Qundef) {
        VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH);
        return vm_exec(ec, false);
    }
    else {
        return val;
    }
 #else
    /* When calling from VM, longjmp in the callee won't purge any
       JIT-ed caller frames.  So it's safe to directly call
       mjit_exec. */
    return mjit_exec(ec);
 #endif
 }
 static VALUE
 vm_opt_str_freeze(VALUE str, int bop, ID id)
 {
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@ -137,42 +137,6 @@ enum vm_regan_acttype {
 /* deal with control flow 2: method/iterator              */
 /**********************************************************/
 #ifdef MJIT_HEADER
 /* When calling ISeq which may catch an exception from JIT-ed code, we should not call
   mjit_exec directly to prevent the caller frame from being canceled. That's because
   the caller frame may have stack values in the local variables and the cancelling
   the caller frame will purge them. But directly calling mjit_exec is faster... */
 #define EXEC_EC_CFP(val) do { \
    if (ec->cfp->iseq->body->catch_except_p) { \
        VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \
        val = vm_exec(ec, TRUE); \
    } \
    else if ((val = mjit_exec(ec)) == Qundef) { \
        VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \
        val = vm_exec(ec, FALSE); \
    } \
 } while (0)
 #else
 /* When calling from VM, longjmp in the callee won't purge any JIT-ed caller frames.
   So it's safe to directly call mjit_exec. */
 #define EXEC_EC_CFP(val) do { \
    if ((val = mjit_exec(ec)) == Qundef) { \
        RESTORE_REGS(); \
        NEXT_INSN(); \
    } \
 } while (0)
 #endif
 #define CALL_METHOD(calling, ci, cc) do { \
    VALUE v = (*(cc)->call)(ec, GET_CFP(), (calling), (ci), (cc)); \
    if (v == Qundef) { \
        EXEC_EC_CFP(val); \
    } \
    else { \
 	val = v; \
    } \
 } while (0)
 /* set fastpath when cached method is *NOT* protected
 * because inline method cache does not care about receiver.
 */