Introduce frame-omitted method inlining

for ISeq including only leaf and no-handles_sp insns except leaf.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67574 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
k0kubun 2019-04-16 17:02:16 +00:00
parent b914166fcd
commit d71b78575b
6 changed files with 188 additions and 57 deletions

View File

@ -964,7 +964,8 @@ $(srcs_vpath)insns.inc: $(srcdir)/tool/ruby_vm/views/insns.inc.erb $(inc_common_
$(srcs_vpath)insns_info.inc: $(srcdir)/tool/ruby_vm/views/insns_info.inc.erb $(inc_common_headers) \
$(srcdir)/tool/ruby_vm/views/_insn_type_chars.erb $(srcdir)/tool/ruby_vm/views/_insn_name_info.erb \
$(srcdir)/tool/ruby_vm/views/_insn_len_info.erb $(srcdir)/tool/ruby_vm/views/_insn_operand_info.erb \
$(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_insn_stack_increase.erb
$(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_insn_stack_increase.erb \
$(srcdir)/tool/ruby_vm/views/_insn_sp_pc_dependency.erb
$(srcs_vpath)vmtc.inc: $(srcdir)/tool/ruby_vm/views/vmtc.inc.erb $(inc_common_headers)
$(srcs_vpath)vm.inc: $(srcdir)/tool/ruby_vm/views/vm.inc.erb $(inc_common_headers) \
$(srcdir)/tool/ruby_vm/views/_insn_entry.erb $(srcdir)/tool/ruby_vm/views/_trace_instruction.erb

View File

@ -25,6 +25,14 @@
#define NOT_COMPILED_STACK_SIZE -1
#define ALREADY_COMPILED_P(status, pos) (status->stack_size_for_pos[pos] != NOT_COMPILED_STACK_SIZE)
// For propagating information needed for lazily pushing a frame.
struct inlined_call_context {
int orig_argc; // ci->orig_argc
VALUE me; // cc->me
int param_size; // def_iseq_ptr(cc->me->def)->body->param.size
int local_size; // def_iseq_ptr(cc->me->def)->body->local_table_size
};
// Storage to keep compiler's status. This should have information
// which is global during one `mjit_compile` call. Ones conditional
// in each branch should be stored in `compile_branch`.
@ -39,8 +47,9 @@ struct compile_status {
struct rb_call_cache *cc_entries;
// Mutated optimization levels
struct rb_mjit_compile_info *compile_info;
// If `iseq_for_pos[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there.
const struct rb_iseq_constant_body **iseq_for_pos;
// If `inlined_iseqs[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there.
const struct rb_iseq_constant_body **inlined_iseqs;
struct inlined_call_context inline_context;
};
// Storage to keep data which is consistent in each conditional branch.
@ -66,10 +75,10 @@ has_valid_method_type(CALL_CACHE cc)
&& mjit_valid_class_serial_p(cc->class_serial) && cc->me;
}
// Returns true if iseq is inlinable, otherwise NULL. This becomes true in the same condition
// Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition
// as CC_SET_FASTPATH (in vm_callee_setup_arg) is called from vm_call_iseq_setup.
static bool
inlinable_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq)
fastpath_applied_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq)
{
extern bool rb_simple_iseq_p(const rb_iseq_t *iseq);
return iseq != NULL
@ -187,11 +196,48 @@ compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int st
}
}
// Print the block to cancel inlined method call. It's supporting only `opt_send_without_block` for now.
static void
compile_inlined_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct inlined_call_context *inline_context)
{
fprintf(f, "\ncancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n");
// Swap pc/sp set on cancel with original pc/sp.
fprintf(f, " const VALUE current_pc = reg_cfp->pc;\n");
fprintf(f, " const VALUE current_sp = reg_cfp->sp;\n");
fprintf(f, " reg_cfp->pc = orig_pc;\n");
fprintf(f, " reg_cfp->sp = orig_sp;\n\n");
// Lazily push the current call frame.
fprintf(f, " struct rb_calling_info calling;\n");
fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n"); // assumes `opt_send_without_block`
fprintf(f, " calling.argc = %d;\n", inline_context->orig_argc);
fprintf(f, " calling.recv = reg_cfp->self;\n");
fprintf(f, " reg_cfp->self = orig_self;\n");
fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n\n",
inline_context->me, inline_context->param_size, inline_context->local_size); // fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
// Start usual cancel from here.
fprintf(f, " reg_cfp = ec->cfp;\n"); // work on the new frame
fprintf(f, " reg_cfp->pc = current_pc;\n");
fprintf(f, " reg_cfp->sp = current_sp;\n");
for (unsigned int i = 0; i < body->stack_max; i++) { // should be always `status->local_stack_p`
fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i);
}
// We're not just returning Qundef here so that caller's normal cancel handler can
// push back `stack` to `cfp->sp`.
fprintf(f, " return vm_exec(ec, ec->cfp);\n");
}
// Print the block to cancel JIT execution.
static void
compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status)
{
unsigned int i;
if (status->inlined_iseqs == NULL) { // the current ISeq is being inlined
compile_inlined_cancel_handler(f, body, &status->inline_context);
return;
}
fprintf(f, "\nsend_cancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n");
@ -208,7 +254,7 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct
fprintf(f, "\ncancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n");
if (status->local_stack_p) {
for (i = 0; i < body->stack_max; i++) {
for (unsigned int i = 0; i < body->stack_max; i++) {
fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i);
}
}
@ -253,11 +299,45 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
return status->success;
}
// Return true if the ISeq can be inlined without pushing a new control frame.
static bool
inlinable_iseq_p(const struct rb_iseq_constant_body *body)
{
// 1) If catch_except_p, caller frame should be preserved when callee catches an exception.
// Then we need to wrap `vm_exec()` but then we can't inline the call inside it.
//
// 2) If `body->catch_except_p` is false and `handles_sp?` of an insn is false,
// sp is not moved as we assume `status->local_stack_p = !body->catch_except_p`.
//
// 3) If `body->catch_except_p` is false and `always_leaf?` of an insn is true,
// pc is not moved.
if (body->catch_except_p)
return false;
unsigned int pos = 0;
while (pos < body->iseq_size) {
#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
int insn = rb_vm_insn_addr2insn((void *)body->iseq_encoded[pos]);
#else
int insn = (int)body->iseq_encoded[pos];
#endif
// All insns in the ISeq except `leave` (to be overridden in the inlined code)
// should meet following strong assumptions:
// * Do not require `cfp->sp` motion
// * Do not move `cfp->pc`
// * Do not read any `cfp->pc`
if (insn != BIN(leave) && insn_may_depend_on_sp_or_pc(insn, body->iseq_encoded + (pos + 1)))
return false;
pos += insn_len(insn);
}
return true;
}
// This needs to be macro instead of a function because it's using `alloca`.
#define INIT_COMPILE_STATUS(status, body, compile_root_p) do { \
status = (struct compile_status){ \
.stack_size_for_pos = (int *)alloca(sizeof(int) * body->iseq_size), \
.iseq_for_pos = compile_root_p ? \
.inlined_iseqs = compile_root_p ? \
alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \
.cc_entries = (body->ci_size + body->ci_kw_size) > 0 ? \
alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL, \
@ -268,15 +348,16 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
}; \
memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); \
if (compile_root_p) \
memset(status.iseq_for_pos, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \
memset(status.inlined_iseqs, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \
else \
memset(status.compile_info, 0, sizeof(struct rb_mjit_compile_info)); \
} while (0)
// Compile inlinable ISeqs to C code in `f`. It returns true if it succeeds to compile them.
static bool
precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status)
precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
{
const struct rb_iseq_constant_body *body = iseq->body;
unsigned int pos = 0;
while (pos < body->iseq_size) {
#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
@ -285,30 +366,40 @@ precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, st
int insn = (int)body->iseq_encoded[pos];
#endif
if (insn == BIN(opt_send_without_block) || insn == BIN(send)) {
if (insn == BIN(opt_send_without_block)) { // `compile_inlined_cancel_handler` supports only `opt_send_without_block`
CALL_INFO ci = (CALL_INFO)body->iseq_encoded[pos + 1];
CALL_CACHE cc_copy = status->cc_entries + ((CALL_CACHE)body->iseq_encoded[pos + 2] - body->cc_entries); // use copy to avoid race condition
const rb_iseq_t *child_iseq;
if (has_valid_method_type(cc_copy) &&
!(ci->flag & VM_CALL_TAILCALL) && // inlining only non-tailcall path
cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg
!child_iseq->body->catch_except_p && // if catch_except_p, caller frame should be preserved when callee catches an exception.
mjit_target_iseq_p(child_iseq->body)) {
status->iseq_for_pos[pos] = child_iseq->body;
cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && fastpath_applied_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg
inlinable_iseq_p(child_iseq->body)) {
status->inlined_iseqs[pos] = child_iseq->body;
if (mjit_opts.verbose >= 1) // print beforehand because ISeq may be GCed during copy job.
fprintf(stderr, "JIT inline: %s@%s:%d\n", RSTRING_PTR(child_iseq->body->location.label),
fprintf(stderr, "JIT inline: %s@%s:%d => %s@%s:%d\n",
RSTRING_PTR(iseq->body->location.label),
RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno),
RSTRING_PTR(child_iseq->body->location.label),
RSTRING_PTR(rb_iseq_path(child_iseq)), FIX2INT(child_iseq->body->location.first_lineno));
struct compile_status child_status;
INIT_COMPILE_STATUS(child_status, child_iseq->body, false);
child_status.inline_context = (struct inlined_call_context){
.orig_argc = ci->orig_argc,
.me = (VALUE)cc_copy->me,
.param_size = child_iseq->body->param.size,
.local_size = child_iseq->body->local_table_size
};
if ((child_status.cc_entries != NULL || child_status.is_entries != NULL)
&& !mjit_copy_cache_from_main_thread(child_iseq, child_status.cc_entries, child_status.is_entries))
return false;
fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp));\n", pos);
fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", pos);
fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self));\n", pos);
fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self)\n{\n", pos);
fprintf(f, " const VALUE *orig_pc = reg_cfp->pc;\n");
fprintf(f, " const VALUE *orig_sp = reg_cfp->sp;\n");
bool success = mjit_compile_body(f, child_iseq, &child_status);
fprintf(f, "\n} /* end of _mjit_inlined_%d */\n\n", pos);
@ -337,7 +428,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname)
&& !mjit_copy_cache_from_main_thread(iseq, status.cc_entries, status.is_entries))
return false;
bool success = precompile_inlinable_iseqs(f, iseq->body, &status);
bool success = precompile_inlinable_iseqs(f, iseq, &status);
if (!success)
return false;

View File

@ -0,0 +1,27 @@
%# -*- C -*-
%# Copyright (c) 2019 Takashi Kokubun. All rights reserved.
%#
%# This file is a part of the programming language Ruby. Permission is hereby
%# granted, to either redistribute and/or modify this file, provided that the
%# conditions mentioned in the file COPYING are met. Consult the file for
%# details.
%#
PUREFUNC(MAYBE_UNUSED(static bool insn_may_depend_on_sp_or_pc(int insn, const VALUE *opes)));
static bool
insn_may_depend_on_sp_or_pc(int insn, const VALUE *opes)
{
switch (insn) {
% RubyVM::Instructions.each do |insn|
% # handles_sp?: If true, it requires to move sp in JIT
% # always_leaf?: If false, it may call an arbitrary method. pc should be moved
% # before the call, and the method may refer to caller's pc (lineno).
% unless !insn.is_a?(RubyVM::TraceInstructions) && !insn.handles_sp? && insn.always_leaf?
case <%= insn.bin %>:
% end
% end
return true;
default:
return false;
}
}

View File

@ -24,7 +24,7 @@
% end
if (!(ci->flag & VM_CALL_TAILCALL) // inlining non-tailcall path
&& cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc_copy, iseq = def_iseq_ptr(cc_copy->me->def))) { // CC_SET_FASTPATH in vm_callee_setup_arg
&& cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && fastpath_applied_iseq_p(ci, cc_copy, iseq = def_iseq_ptr(cc_copy->me->def))) { // CC_SET_FASTPATH in vm_callee_setup_arg
int param_size = iseq->body->param.size;
fprintf(f, "{\n");
@ -44,52 +44,57 @@
% # JIT: move sp and pc if necessary
<%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
% # JIT: Print insn body in insns.def
fprintf(f, " {\n");
fprintf(f, " struct rb_calling_info calling;\n");
% if insn.name == 'send'
fprintf(f, " calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, (CALL_INFO)0x%"PRIxVALUE", (rb_iseq_t *)0x%"PRIxVALUE", FALSE);\n", operands[0], operands[2]);
% else
fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n");
% end
fprintf(f, " calling.argc = %d;\n", ci->orig_argc);
fprintf(f, " calling.recv = stack[%d];\n", b->stack_size - 1 - argc);
% # JIT: Special CALL_METHOD. Bypass cc_copy->call and inline vm_call_iseq_setup_normal for vm_call_iseq_setup_func FASTPATH.
fprintf(f, " {\n");
fprintf(f, " VALUE v;\n");
fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n",
(VALUE)cc_copy->me, param_size, iseq->body->local_table_size); /* inlinable_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE */
if (iseq->body->catch_except_p) {
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
fprintf(f, " v = vm_exec(ec, TRUE);\n");
% # JIT: If ISeq is inlinable, call the inlined method without pushing a frame.
if (status->inlined_iseqs != NULL && status->inlined_iseqs[pos] == iseq->body) {
fprintf(f, " {\n");
fprintf(f, " VALUE orig_self = reg_cfp->self;\n");
fprintf(f, " reg_cfp->self = stack[%d];\n", b->stack_size - argc - 1);
fprintf(f, " stack[%d] = _mjit_inlined_%d(ec, reg_cfp, orig_self);\n", b->stack_size - argc - 1, pos);
fprintf(f, " reg_cfp->self = orig_self;\n");
fprintf(f, " }\n");
}
else {
if (status->iseq_for_pos != NULL && status->iseq_for_pos[pos] == iseq->body) {
fprintf(f, " v = _mjit_inlined_%d(ec, ec->cfp);\n", pos);
% # JIT: Print insn body in insns.def
fprintf(f, " {\n");
fprintf(f, " struct rb_calling_info calling;\n");
% if insn.name == 'send'
fprintf(f, " calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, (CALL_INFO)0x%"PRIxVALUE", (rb_iseq_t *)0x%"PRIxVALUE", FALSE);\n", operands[0], operands[2]);
% else
fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n");
% end
fprintf(f, " calling.argc = %d;\n", ci->orig_argc);
fprintf(f, " calling.recv = stack[%d];\n", b->stack_size - 1 - argc);
% # JIT: Special CALL_METHOD. Bypass cc_copy->call and inline vm_call_iseq_setup_normal for vm_call_iseq_setup_func FASTPATH.
fprintf(f, " {\n");
fprintf(f, " VALUE v;\n");
fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n",
(VALUE)cc_copy->me, param_size, iseq->body->local_table_size); // fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
if (iseq->body->catch_except_p) {
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
fprintf(f, " v = vm_exec(ec, TRUE);\n");
}
else {
fprintf(f, " v = mjit_exec(ec);\n");
fprintf(f, " if ((v = mjit_exec(ec)) == Qundef) {\n");
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup
fprintf(f, " v = vm_exec(ec, FALSE);\n");
fprintf(f, " }\n");
}
fprintf(f, " if (v == Qundef) {\n");
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); /* This is vm_call0_body's code after vm_call_iseq_setup */
fprintf(f, " v = vm_exec(ec, FALSE);\n");
fprintf(f, " }\n");
}
fprintf(f, " stack[%d] = v;\n", b->stack_size - argc - 1);
fprintf(f, " }\n");
fprintf(f, " stack[%d] = v;\n", b->stack_size - argc - 1);
fprintf(f, " }\n");
fprintf(f, " }\n");
fprintf(f, " }\n");
% # JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow.
fprintf(f, " if (UNLIKELY(ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS)) {\n");
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %>);
if (!pc_moved_p) {
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos);
fprintf(f, " if (UNLIKELY(ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS)) {\n");
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %>);
if (!pc_moved_p) {
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos);
}
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_trace);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, " }\n");
}
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_trace);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, " }\n");
% # compiler: Move JIT compiler's internal stack pointer
b->stack_size += <%= insn.call_attribute('sp_inc') %>;

View File

@ -19,3 +19,4 @@
<%= render 'sp_inc_helpers' %>
<%= render 'attributes' %>
<%= render 'insn_stack_increase' %>
<%= render 'insn_sp_pc_dependency' %>

View File

@ -71,6 +71,12 @@ switch (insn) {
fprintf(stderr, "MJIT warning: Unexpected JIT stack_size on leave: %d\n", b->stack_size);
status->success = false;
}
% # Special leave for an inlined call.
if (status->inlined_iseqs == NULL) { // the current ISeq is being inlined
fprintf(f, " return stack[0];\n");
b->stack_size += <%= insn.call_attribute('sp_inc') %>;
break;
}
% end
%
% # Main insn implementation generated by insns.def