1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/tool/ruby_vm/views/_mjit_compile_send.erb
Takashi Kokubun b16a2aa938
Reduce code size for rb_class_of
by inlining only hot path.

=== mame/optcarrot ===

$ benchmark-driver -v --rbenv 'before --jit;after --jit' benchmark.yml --repeat-count=24 --output=all
before --jit: ruby 2.8.0dev (2020-05-18T05:21:31Z master 0e5a58b6bf) +JIT [x86_64-linux]
after --jit: ruby 2.8.0dev (2020-05-18T06:12:04Z master 0e3d71a8d1) +JIT [x86_64-linux]
last_commit=Reduce code size for rb_class_of
Calculating -------------------------------------
                                 before --jit           after --jit
Optcarrot Lan_Master.nes    71.62880463568773     70.95730063273503 fps
                            71.73973684273152     71.98447841929851
                            75.03923801841310     75.54262519509039
                            75.16300287174957     77.64029272984344
                            75.16834828625935     78.67861469580785
                            75.17670723726911     78.81879353707393
                            75.67637908020630     79.18188850392886
                            76.19843953215396     79.66484891814478
                            77.28166716118808     79.80278072861037
                            77.38509903325165     80.05859292679696
                            78.12693418455953     80.34624804808006
                            78.73654441746730     80.66326571254345
                            79.25387513454415     80.69760605740196
                            79.44137881689524     81.32053489212245
                            79.50497657368358     81.50250852553751
                            79.62401328582868     82.27544931834611
                            79.79178811723664     82.67455264522741
                            81.20275352937418     82.93857260493297
                            81.57027048640776     83.15019118788184
                            81.63373188649095     83.20728816044721
                            81.93420437766426     83.25027576772972
                            82.05716136357167     83.27072145898173
                            82.21070805525066     83.36008265822194
                            82.56924063784872     83.36112268888493

=== benchmark-driver/sinatra ===

[rps]
before: 13143.49 rps
after: 13505.70 rps

[inlined rb_class_of size]
before: 11.5K
after: 3.8K

(calculated by `dwarftree --die inlined_subroutine --flat --merge --show-size`)
2020-05-17 23:38:19 -07:00

112 lines
6.5 KiB
Text

% # -*- C -*-
% # Copyright (c) 2018 Takashi Kokubun. All rights reserved.
% #
% # This file is a part of the programming language Ruby. Permission is hereby
% # granted, to either redistribute and/or modify this file, provided that the
% # conditions mentioned in the file COPYING are met. Consult the file for
% # details.
%
% # Optimized case of send / opt_send_without_block instructions.
{
% # compiler: Prepare operands which may be used by `insn.call_attribute`
% insn.opes.each_with_index do |ope, i|
MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>];
% end
% # compiler: Use captured cc to avoid race condition
const struct rb_callcache *captured_cc = captured_cc_entries(status)[call_data_index(cd, body)];
%
% # compiler: Inline send insn where some supported fastpath is used.
const rb_iseq_t *iseq = NULL;
const CALL_INFO ci = cd->ci;
int kw_splat = IS_ARGS_KW_SPLAT(ci) > 0;
extern bool rb_splat_or_kwargs_p(const struct rb_callinfo *restrict ci);
if (!status->compile_info->disable_send_cache && has_valid_method_type(captured_cc) && (
% # `CC_SET_FASTPATH(cd->cc, vm_call_cfunc_with_frame, ...)` in `vm_call_cfunc`
(vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC
&& !rb_splat_or_kwargs_p(ci) && !kw_splat)
% # `CC_SET_FASTPATH(cc, vm_call_iseq_setup_func(...), vm_call_iseq_optimizable_p(...))` in `vm_callee_setup_arg`,
% # and support only non-VM_CALL_TAILCALL path inside it
|| (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_ISEQ
&& fastpath_applied_iseq_p(ci, captured_cc, iseq = def_iseq_ptr(vm_cc_cme(captured_cc)->def))
&& !(vm_ci_flag(ci) & VM_CALL_TAILCALL))
)) {
int sp_inc = (int)sp_inc_of_sendish(ci);
fprintf(f, "{\n");
% # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
bool opt_class_of = !maybe_special_const_class_p(captured_cc->klass); // If true, use RBASIC_CLASS instead of CLASS_OF to reduce code size
fprintf(f, " const struct rb_callcache *cc = (const struct rb_callcache *)0x%"PRIxVALUE";\n", (VALUE)captured_cc);
fprintf(f, " const rb_callable_method_entry_t *cc_cme = (const rb_callable_method_entry_t *)0x%"PRIxVALUE";\n", (VALUE)vm_cc_cme(captured_cc));
fprintf(f, " const VALUE recv = stack[%d];\n", b->stack_size + sp_inc - 1);
fprintf(f, " if (UNLIKELY(%s || !vm_cc_valid_p(cc, cc_cme, %s(recv)))) {\n", opt_class_of ? "RB_SPECIAL_CONST_P(recv)" : "false", opt_class_of ? "RBASIC_CLASS" : "CLASS_OF");
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
fprintf(f, " goto send_cancel;\n");
fprintf(f, " }\n");
% # JIT: move sp and pc if necessary
<%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
% # JIT: If ISeq is inlinable, call the inlined method without pushing a frame.
if (iseq && status->inlined_iseqs != NULL && iseq->body == status->inlined_iseqs[pos]) {
fprintf(f, " {\n");
fprintf(f, " VALUE orig_self = reg_cfp->self;\n");
fprintf(f, " reg_cfp->self = stack[%d];\n", b->stack_size + sp_inc - 1);
fprintf(f, " stack[%d] = _mjit%d_inlined_%d(ec, reg_cfp, orig_self, original_iseq);\n", b->stack_size + sp_inc - 1, status->compiled_id, pos);
fprintf(f, " reg_cfp->self = orig_self;\n");
fprintf(f, " }\n");
}
else {
% # JIT: Forked `vm_sendish` (except method_explorer = vm_search_method_wrap) to inline various things
fprintf(f, " {\n");
fprintf(f, " VALUE val;\n");
fprintf(f, " struct rb_calling_info calling;\n");
% if insn.name == 'send'
fprintf(f, " calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, (const struct rb_callinfo *)0x%"PRIxVALUE", (rb_iseq_t *)0x%"PRIxVALUE", FALSE);\n", (VALUE)ci, (VALUE)blockiseq);
% else
fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n");
% end
fprintf(f, " calling.kw_splat = %d;\n", kw_splat);
fprintf(f, " calling.recv = stack[%d];\n", b->stack_size + sp_inc - 1);
fprintf(f, " calling.argc = %d;\n", vm_ci_argc(ci));
if (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC) {
% # TODO: optimize this more
fprintf(f, " CALL_DATA cd = (CALL_DATA)0x%"PRIxVALUE";\n", operands[0]);
fprintf(f, " val = vm_call_cfunc_with_frame(ec, reg_cfp, &calling, cd);\n");
}
else { // VM_METHOD_TYPE_ISEQ
% # fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, cc_cme, 0, %d, %d);\n", iseq->body->param.size, iseq->body->local_table_size);
if (iseq->body->catch_except_p) {
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
fprintf(f, " val = vm_exec(ec, TRUE);\n");
}
else {
fprintf(f, " if ((val = mjit_exec(ec)) == Qundef) {\n");
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup
fprintf(f, " val = vm_exec(ec, FALSE);\n");
fprintf(f, " }\n");
}
}
fprintf(f, " stack[%d] = val;\n", b->stack_size + sp_inc - 1);
fprintf(f, " }\n");
% # JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow.
fprintf(f, " if (UNLIKELY(!mjit_call_p)) {\n");
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %>);
if (!pc_moved_p) {
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos);
}
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_invalidate_all);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, " }\n");
}
% # compiler: Move JIT compiler's internal stack pointer
b->stack_size += <%= insn.call_attribute('sp_inc') %>;
fprintf(f, "}\n");
break;
}
}