1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/mjit/compiler.rb
Jemma Issroff d594a5a8bd
This commit implements the Object Shapes technique in CRuby.
Object Shapes is used for accessing instance variables and representing the
"frozenness" of objects.  Object instances have a "shape" and the shape
represents some attributes of the object (currently which instance variables are
set and the "frozenness").  Shapes form a tree data structure, and when a new
instance variable is set on an object, that object "transitions" to a new shape
in the shape tree.  Each shape has an ID that is used for caching. The shape
structure is independent of class, so objects of different types can have the
same shape.

For example:

```ruby
class Foo
  def initialize
    # Starts with shape id 0
    @a = 1 # transitions to shape id 1
    @b = 1 # transitions to shape id 2
  end
end

class Bar
  def initialize
    # Starts with shape id 0
    @a = 1 # transitions to shape id 1
    @b = 1 # transitions to shape id 2
  end
end

foo = Foo.new # `foo` has shape id 2
bar = Bar.new # `bar` has shape id 2
```

Both `foo` and `bar` instances have the same shape because they both set
instance variables of the same name in the same order.

This technique can help to improve inline cache hits as well as generate more
efficient machine code in JIT compilers.

This commit also adds some methods for debugging shapes on objects.  See
`RubyVM::Shape` for more details.

For more context on Object Shapes, see [Feature: #18776]

Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
Co-Authored-By: Eileen M. Uchitelle <eileencodes@gmail.com>
Co-Authored-By: John Hawthorn <john@hawthorn.email>
2022-09-28 08:26:21 -07:00

974 lines
42 KiB
Ruby

module RubyVM::MJIT
USE_RVARGC = C.USE_RVARGC
ROBJECT_EMBED_LEN_MAX = C.ROBJECT_EMBED_LEN_MAX
UNSUPPORTED_INSNS = [
:defineclass, # low priority
]
# Available variables and macros in JIT-ed function:
# ec: the first argument of _mjitXXX
# reg_cfp: the second argument of _mjitXXX
# GET_CFP(): refers to `reg_cfp`
# GET_EP(): refers to `reg_cfp->ep`
# GET_SP(): refers to `reg_cfp->sp`, or `(stack + stack_size)` if local_stack_p
# GET_SELF(): refers to `cfp_self`
# GET_LEP(): refers to `VM_EP_LEP(reg_cfp->ep)`
# EXEC_EC_CFP(): refers to `val = vm_exec(ec, true)` with frame setup
# CALL_METHOD(): using `GET_CFP()` and `EXEC_EC_CFP()`
# TOPN(): refers to `reg_cfp->sp`, or `*(stack + (stack_size - num - 1))` if local_stack_p
# STACK_ADDR_FROM_TOP(): refers to `reg_cfp->sp`, or `stack + (stack_size - num)` if local_stack_p
# DISPATCH_ORIGINAL_INSN(): expanded in _mjit_compile_insn.erb
# THROW_EXCEPTION(): specially defined for JIT
# RESTORE_REGS(): specially defined for `leave`
class << Compiler = Module.new
# mjit_compile
# @param funcname [String]
def compile(f, iseq, funcname, id)
status = C.compile_status.new # not freed for now
status.compiled_iseq = iseq.body
status.compiled_id = id
init_compile_status(status, iseq.body, true) # not freed for now
if iseq.body.ci_size > 0 && status.cc_entries_index == -1
return false
end
init_ivar_compile_status(iseq.body, status)
if !status.compile_info.disable_send_cache && !status.compile_info.disable_inlining
unless precompile_inlinable_iseqs(f, iseq, status)
return false
end
end
C.fprintf(f, "VALUE\n#{funcname}(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n")
success = compile_body(f, iseq, status)
C.fprintf(f, "\n} // end of #{funcname}\n")
return success
rescue => e # Should rb_rescue be called in C?
if C.mjit_opts.warnings || C.mjit_opts.verbose > 0
$stderr.puts e.full_message
end
return false
end
# mjit_compile_body
def compile_body(f, iseq, status)
status.success = true
status.local_stack_p = !iseq.body.catch_except_p
if status.local_stack_p
src = +" VALUE stack[#{iseq.body.stack_max}];\n"
else
src = +" VALUE *stack = reg_cfp->sp;\n"
end
unless status.inlined_iseqs.nil? # i.e. compile root
src << " static const rb_iseq_t *original_iseq = (const rb_iseq_t *)#{iseq};\n"
end
src << " static const VALUE *const original_body_iseq = (VALUE *)#{iseq.body.iseq_encoded};\n"
src << " VALUE cfp_self = reg_cfp->self;\n" # cache self across the method
src << "#undef GET_SELF\n"
src << "#define GET_SELF() cfp_self\n"
# Simulate `opt_pc` in setup_parameters_complex. Other PCs which may be passed by catch tables
# are not considered since vm_exec doesn't call jit_exec for catch tables.
if iseq.body.param.flags.has_opt
src << "\n"
src << " switch (reg_cfp->pc - ISEQ_BODY(reg_cfp->iseq)->iseq_encoded) {\n"
(0..iseq.body.param.opt_num).each do |i|
pc_offset = iseq.body.param.opt_table[i]
src << " case #{pc_offset}:\n"
src << " goto label_#{pc_offset};\n"
end
src << " }\n"
end
# Generate merged ivar guards first if needed
if !status.compile_info.disable_ivar_cache && status.merge_ivar_guards_p
src << " if (UNLIKELY(!(RB_TYPE_P(GET_SELF(), T_OBJECT)))) {"
src << " goto ivar_cancel;\n"
src << " }\n"
end
C.fprintf(f, src)
compile_insns(0, 0, status, iseq.body, f)
compile_cancel_handler(f, iseq.body, status)
C.fprintf(f, "#undef GET_SELF\n")
return status.success
end
# Compile one conditional branch. If it has branchXXX insn, this should be
# called multiple times for each branch.
def compile_insns(stack_size, pos, status, body, f)
branch = C.compile_branch.new # not freed for now
branch.stack_size = stack_size
branch.finish_p = false
while pos < body.iseq_size && !already_compiled?(status, pos) && !branch.finish_p
insn = INSNS.fetch(C.rb_vm_insn_decode(body.iseq_encoded[pos]))
status.stack_size_for_pos[pos] = branch.stack_size
C.fprintf(f, "\nlabel_#{pos}: /* #{insn.name} */\n")
pos = compile_insn(insn, pos, status, body.iseq_encoded + (pos+1), body, branch, f)
if status.success && branch.stack_size > body.stack_max
if mjit_opts.warnings || mjit_opts.verbose > 0
$stderr.puts "MJIT warning: JIT stack size (#{branch.stack_size}) exceeded its max size (#{body.stack_max})"
end
status.success = false
end
break unless status.success
end
end
# Main function of JIT compilation, vm_exec_core counterpart for JIT. Compile one insn to `f`, may modify
# b->stack_size and return next position.
#
# When you add a new instruction to insns.def, it would be nice to have JIT compilation support here but
# it's optional. This JIT compiler just ignores ISeq which includes unknown instruction, and ISeq which
# does not have it can be compiled as usual.
def compile_insn(insn, pos, status, operands, body, b, f)
sp_inc = C.mjit_call_attribute_sp_inc(insn.bin, operands)
next_pos = pos + insn.len
result = compile_insn_entry(f, insn, b.stack_size, sp_inc, status.local_stack_p, pos, next_pos, insn.len,
status.inlined_iseqs.nil?, status, operands, body)
if result.nil?
if C.mjit_opts.warnings || C.mjit_opts.verbose > 0
$stderr.puts "MJIT warning: Skipped to compile unsupported instruction: #{insn.name}"
end
status.success = false
else
src, next_pos, finish_p, compile_insns_p = result
C.fprintf(f, src)
b.stack_size += sp_inc
if finish_p
b.finish_p = true
end
if compile_insns_p
if already_compiled?(status, pos + insn.len)
C.fprintf(f, "goto label_#{pos + insn.len};\n")
else
compile_insns(b.stack_size, pos + insn.len, status, body, f)
end
end
end
# If next_pos is already compiled and this branch is not finished yet,
# next instruction won't be compiled in C code next and will need `goto`.
if !b.finish_p && next_pos < body.iseq_size && already_compiled?(status, next_pos)
C.fprintf(f, "goto label_#{next_pos};\n")
# Verify stack size assumption is the same among multiple branches
if status.stack_size_for_pos[next_pos] != b.stack_size
if mjit_opts.warnings || mjit_opts.verbose > 0
$stderr.puts "MJIT warning: JIT stack assumption is not the same between branches (#{status.stack_size_for_pos[next_pos]} != #{b.stack_size})\n"
end
status.success = false
end
end
return next_pos
end
# mjit_comiple.inc.erb
def compile_insn_entry(f, insn, stack_size, sp_inc, local_stack_p, pos, next_pos, insn_len, inlined_iseq_p, status, operands, body)
finish_p = false
compile_insns = false
# TODO: define this outside this method, or at least cache it
opt_send_without_block = INSNS.values.find { |i| i.name == :opt_send_without_block }
if opt_send_without_block.nil?
raise 'opt_send_without_block not found'
end
send_compatible_opt_insns = INSNS.values.select do |insn|
insn.name.start_with?('opt_') && opt_send_without_block.opes == insn.opes &&
insn.expr.lines.any? { |l| l.match(/\A\s+CALL_SIMPLE_METHOD\(\);\s+\z/) }
end.map(&:name)
case insn.name
when *UNSUPPORTED_INSNS
return nil
when :opt_send_without_block, :send
if src = compile_send(insn, stack_size, sp_inc, local_stack_p, pos, next_pos, status, operands, body)
return src, next_pos, finish_p, compile_insns
end
when *send_compatible_opt_insns
if C.has_cache_for_send(captured_cc_entries(status)[call_data_index(C.CALL_DATA.new(operands[0]), body)], insn.bin) &&
src = compile_send(opt_send_without_block, stack_size, sp_inc, local_stack_p, pos, next_pos, status, operands, body)
return src, next_pos, finish_p, compile_insns
end
when :getinstancevariable, :setinstancevariable
if src = compile_ivar(insn.name, stack_size, pos, status, operands, body)
return src, next_pos, finish_p, compile_insns
end
when :invokebuiltin, :opt_invokebuiltin_delegate
if compile_invokebuiltin(f, insn, stack_size, sp_inc, body, operands)
return '', next_pos, finish_p, compile_insns
end
when :opt_getconstant_path
if src = compile_getconstant_path(stack_size, pos, insn_len, operands, status)
return src, next_pos, finish_p, compile_insns
end
when :leave, :opt_invokebuiltin_delegate_leave
src = +''
# opt_invokebuiltin_delegate_leave also implements leave insn. We need to handle it here for inlining.
if insn.name == :opt_invokebuiltin_delegate_leave
compile_invokebuiltin(f, insn, stack_size, sp_inc, body, operands)
else
if stack_size != 1
$stderr.puts "MJIT warning: Unexpected JIT stack_size on leave: #{stack_size}" # TODO: check mjit_opts?
return nil
end
end
# Skip vm_pop_frame for inlined call
unless inlined_iseq_p
# Cancel on interrupts to make leave insn leaf
src << " if (UNLIKELY(RUBY_VM_INTERRUPTED_ANY(ec))) {\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " rb_threadptr_execute_interrupts(rb_ec_thread_ptr(ec), 0);\n"
src << " }\n"
src << " ec->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(reg_cfp);\n" # vm_pop_frame
end
src << " return stack[0];\n"
finish_p = true
return src, next_pos, finish_p, compile_insns
end
return compile_insn_default(insn, stack_size, sp_inc, local_stack_p, pos, next_pos, insn_len, inlined_iseq_p, operands)
rescue => e
puts e.full_message
nil
end
private
# Optimized case of send / opt_send_without_block instructions.
# _mjit_compile_send.erb
def compile_send(insn, stack_size, sp_inc, local_stack_p, pos, next_pos, status, operands, body)
# compiler: Use captured cc to avoid race condition
cd = C.CALL_DATA.new(operands[0])
cd_index = call_data_index(cd, body)
captured_cc = captured_cc_entries(status)[cd_index]
# compiler: Inline send insn where some supported fastpath is used.
ci = cd.ci
kw_splat = (C.vm_ci_flag(ci) & C.VM_CALL_KW_SPLAT) > 0
if !status.compile_info.disable_send_cache && has_valid_method_type?(captured_cc) && (
# `CC_SET_FASTPATH(cd->cc, vm_call_cfunc_with_frame, ...)` in `vm_call_cfunc`
(vm_cc_cme(captured_cc).def.type == C.VM_METHOD_TYPE_CFUNC && !C.rb_splat_or_kwargs_p(ci) && !kw_splat) ||
# `CC_SET_FASTPATH(cc, vm_call_iseq_setup_func(...), vm_call_iseq_optimizable_p(...))` in `vm_callee_setup_arg`,
# and support only non-VM_CALL_TAILCALL path inside it
(vm_cc_cme(captured_cc).def.type == C.VM_METHOD_TYPE_ISEQ &&
C.fastpath_applied_iseq_p(ci, captured_cc, iseq = def_iseq_ptr(vm_cc_cme(captured_cc).def)) &&
(C.vm_ci_flag(ci) & C.VM_CALL_TAILCALL) == 0)
)
src = +"{\n"
# JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
src << " const struct rb_callcache *cc = (struct rb_callcache *)#{captured_cc};\n"
src << " const rb_callable_method_entry_t *cc_cme = (rb_callable_method_entry_t *)#{vm_cc_cme(captured_cc)};\n"
src << " const VALUE recv = stack[#{stack_size + sp_inc - 1}];\n"
# If opt_class_of is true, use RBASIC_CLASS instead of CLASS_OF to reduce code size
opt_class_of = !maybe_special_const?(captured_cc.klass)
src << " if (UNLIKELY(#{opt_class_of ? 'RB_SPECIAL_CONST_P(recv)' : 'false'} || !vm_cc_valid_p(cc, cc_cme, #{opt_class_of ? 'RBASIC_CLASS' : 'CLASS_OF'}(recv)))) {\n"
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " goto send_cancel;\n"
src << " }\n"
# JIT: move sp and pc if necessary
pc_moved_p = compile_pc_and_sp(src, insn, stack_size, sp_inc, local_stack_p, next_pos)
# JIT: If ISeq is inlinable, call the inlined method without pushing a frame.
if iseq && status.inlined_iseqs && iseq.body.to_i == status.inlined_iseqs[pos]&.to_i
src << " {\n"
src << " VALUE orig_self = reg_cfp->self;\n"
src << " reg_cfp->self = stack[#{stack_size + sp_inc - 1}];\n"
src << " stack[#{stack_size + sp_inc - 1}] = _mjit#{status.compiled_id}_inlined_#{pos}(ec, reg_cfp, orig_self, original_iseq);\n"
src << " reg_cfp->self = orig_self;\n"
src << " }\n"
else
# JIT: Forked `vm_sendish` (except method_explorer = vm_search_method_wrap) to inline various things
src << " {\n"
src << " VALUE val;\n"
src << " struct rb_calling_info calling;\n"
if insn.name == :send
src << " calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, (const struct rb_callinfo *)#{ci}, (rb_iseq_t *)0x#{operands[1].to_s(16)}, FALSE);\n"
else
src << " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n"
end
src << " calling.kw_splat = #{kw_splat ? 1 : 0};\n"
src << " calling.recv = stack[#{stack_size + sp_inc - 1}];\n"
src << " calling.argc = #{C.vm_ci_argc(ci)};\n"
if vm_cc_cme(captured_cc).def.type == C.VM_METHOD_TYPE_CFUNC
# TODO: optimize this more
src << " calling.ci = (const struct rb_callinfo *)#{ci};\n" # creating local cd here because operand's cd->cc may not be the same as inlined cc.
src << " calling.cc = cc;"
src << " val = vm_call_cfunc_with_frame(ec, reg_cfp, &calling);\n"
else # :iseq
# fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
src << " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, cc_cme, 0, #{iseq.body.param.size}, #{iseq.body.local_table_size});\n"
if iseq.body.catch_except_p
src << " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"
src << " val = vm_exec(ec, true);\n"
else
src << " if ((val = jit_exec(ec)) == Qundef) {\n"
src << " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n" # This is vm_call0_body's code after vm_call_iseq_setup
src << " val = vm_exec(ec, false);\n"
src << " }\n"
end
end
src << " stack[#{stack_size + sp_inc - 1}] = val;\n"
src << " }\n"
# JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow.
src << " if (UNLIKELY(!mjit_call_p)) {\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size + sp_inc};\n"
if !pc_moved_p
src << " reg_cfp->pc = original_body_iseq + #{next_pos};\n"
end
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_invalidate_all);\n"
src << " goto cancel;\n"
src << " }\n"
end
src << "}\n"
return src
else
return nil
end
end
# _mjit_compile_ivar.erb
def compile_ivar(insn_name, stack_size, pos, status, operands, body)
ic_copy = (status.is_entries + (C.iseq_inline_storage_entry.new(operands[1]) - body.is_entries)).iv_cache
src = +''
if !status.compile_info.disable_ivar_cache && ic_copy.source_shape_id != C.INVALID_SHAPE_ID
# JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`.
# compile_pc_and_sp(src, insn, stack_size, sp_inc, local_stack_p, next_pos)
# JIT: prepare vm_getivar/vm_setivar arguments and variables
src << "{\n"
src << " VALUE obj = GET_SELF();\n"
src << " const shape_id_t source_shape_id = (rb_serial_t)#{ic_copy.source_shape_id};\n"
# JIT: cache hit path of vm_getivar/vm_setivar, or cancel JIT (recompile it with exivar)
if insn_name == :setinstancevariable
src << " const uint32_t index = #{ic_copy.attr_index - 1};\n"
src << " const shape_id_t dest_shape_id = (rb_serial_t)#{ic_copy.dest_shape_id};\n"
src << " if (source_shape_id == ROBJECT_SHAPE_ID(obj) && \n"
src << " dest_shape_id != ROBJECT_SHAPE_ID(obj)) {\n"
src << " if (UNLIKELY(index >= ROBJECT_NUMIV(obj))) {\n"
src << " rb_init_iv_list(obj);\n"
src << " }\n"
src << " ROBJECT_SET_SHAPE_ID(obj, dest_shape_id);\n"
src << " VALUE *ptr = ROBJECT_IVPTR(obj);\n"
src << " RB_OBJ_WRITE(obj, &ptr[index], stack[#{stack_size - 1}]);\n"
src << " }\n"
else
if ic_copy.attr_index == 0 # cache hit, but uninitialized iv
src << " /* Uninitialized instance variable */\n"
src << " if (source_shape_id == ROBJECT_SHAPE_ID(obj)) {\n"
src << " stack[#{stack_size}] = Qnil;\n"
src << " }\n"
else
src << " const uint32_t index = #{ic_copy.attr_index - 1};\n"
src << " if (source_shape_id == ROBJECT_SHAPE_ID(obj)) {\n"
src << " stack[#{stack_size}] = ROBJECT_IVPTR(obj)[index];\n"
src << " }\n"
end
end
src << " else {\n"
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " goto ivar_cancel;\n"
src << " }\n"
src << "}\n"
return src
elsif insn_name == :getinstancevariable && !status.compile_info.disable_exivar_cache && ic_copy.source_shape_id != C.INVALID_SHAPE_ID
# JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`.
# compile_pc_and_sp(src, insn, stack_size, sp_inc, local_stack_p, next_pos)
# JIT: prepare vm_getivar's arguments and variables
src << "{\n"
src << " VALUE obj = GET_SELF();\n"
src << " const shape_id_t source_shape_id = (rb_serial_t)#{ic_copy.source_shape_id};\n"
src << " const uint32_t index = #{ic_copy.attr_index - 1};\n"
# JIT: cache hit path of vm_getivar, or cancel JIT (recompile it without any ivar optimization)
src << " struct gen_ivtbl *ivtbl;\n"
src << " if (LIKELY(FL_TEST_RAW(obj, FL_EXIVAR) && source_shape_id == rb_shape_get_shape_id(obj) && rb_ivar_generic_ivtbl_lookup(obj, &ivtbl))) {\n"
src << " stack[#{stack_size}] = ivtbl->ivptr[index];\n"
src << " }\n"
src << " else {\n"
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " goto exivar_cancel;\n"
src << " }\n"
src << "}\n"
return src
else
return nil
end
end
# _mjit_compile_invokebulitin.erb
def compile_invokebuiltin(f, insn, stack_size, sp_inc, body, operands)
bf = C.RB_BUILTIN.new(operands[0])
if bf.compiler > 0
C.fprintf(f, "{\n")
C.fprintf(f, " VALUE val;\n")
C.builtin_compiler(f, bf, operands[1], stack_size, body.builtin_inline_p)
C.fprintf(f, " stack[#{stack_size + sp_inc - 1}] = val;\n")
C.fprintf(f, "}\n")
return true
else
return false
end
end
# _mjit_compile_getconstant_path.erb
def compile_getconstant_path(stack_size, pos, insn_len, operands, status)
ice = C.IC.new(operands[0]).entry
if !status.compile_info.disable_const_cache && ice
# JIT: Inline everything in IC, and cancel the slow path
src = +" if (vm_inlined_ic_hit_p(#{ice.flags}, #{ice.value}, (const rb_cref_t *)#{to_addr(ice.ic_cref)}, reg_cfp->ep)) {\n"
src << " stack[#{stack_size}] = #{ice.value};\n"
src << " }\n"
src << " else {\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " goto const_cancel;\n"
src << " }\n"
return src
else
return nil
end
end
# _mjit_compile_insn.erb
def compile_insn_default(insn, stack_size, sp_inc, local_stack_p, pos, next_pos, insn_len, inlined_iseq_p, operands)
src = +''
finish_p = false
compile_insns = false
# JIT: Declare stack_size to be used in some macro of _mjit_compile_insn_body.erb
src << "{\n"
if local_stack_p
src << " MAYBE_UNUSED(unsigned int) stack_size = #{stack_size};\n"
end
# JIT: Declare variables for operands, popped values and return values
insn.declarations.each do |decl|
src << " #{decl};\n"
end
# JIT: Set const expressions for `RubyVM::OperandsUnifications` insn
insn.preamble.each do |amble|
src << "#{amble.sub(/const \S+\s+/, '')}\n"
end
# JIT: Initialize operands
insn.opes.each_with_index do |ope, i|
src << " #{ope.fetch(:name)} = (#{ope.fetch(:type)})#{operands[i]};\n"
# TODO: resurrect comment_id
end
# JIT: Initialize popped values
insn.pops.reverse_each.with_index.reverse_each do |pop, i|
src << " #{pop.fetch(:name)} = stack[#{stack_size - (i + 1)}];\n"
end
# JIT: move sp and pc if necessary
pc_moved_p = compile_pc_and_sp(src, insn, stack_size, sp_inc, local_stack_p, next_pos)
# JIT: Print insn body in insns.def
next_pos = compile_insn_body(src, insn, pos, next_pos, insn_len, local_stack_p, stack_size, sp_inc, operands)
# JIT: Set return values
insn.rets.reverse_each.with_index do |ret, i|
# TOPN(n) = ...
src << " stack[#{stack_size + sp_inc - (i + 1)}] = #{ret.fetch(:name)};\n"
end
# JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow.
# leaf insn may not cancel JIT. leaf_without_check_ints is covered in RUBY_VM_CHECK_INTS of _mjit_compile_insn_body.erb.
unless insn.always_leaf? || insn.leaf_without_check_ints?
src << " if (UNLIKELY(!mjit_call_p)) {\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size + sp_inc};\n"
if !pc_moved_p
src << " reg_cfp->pc = original_body_iseq + #{next_pos};\n"
end
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_invalidate_all);\n"
src << " goto cancel;\n"
src << " }\n"
end
src << "}\n"
# compiler: If insn has conditional JUMP, the code should go to the branch not targeted by JUMP next.
if insn.expr.match?(/if\s+\([^{}]+\)\s+\{[^{}]+JUMP\([^)]+\);[^{}]+\}/)
compile_insns = true
end
# compiler: If insn returns (leave) or does longjmp (throw), the branch should no longer be compiled. TODO: create attr for it?
if insn.expr.match?(/\sTHROW_EXCEPTION\([^)]+\);/) || insn.expr.match?(/\bvm_pop_frame\(/)
finish_p = true
end
return src, next_pos, finish_p, compile_insns
end
# _mjit_compile_insn_body.erb
def compile_insn_body(src, insn, pos, next_pos, insn_len, local_stack_p, stack_size, sp_inc, operands)
# Print a body of insn, but with macro expansion.
expand_simple_macros(insn.expr).each_line do |line|
# Expand dynamic macro here
# TODO: support combination of following macros in the same line
case line
when /\A\s+RUBY_VM_CHECK_INTS\(ec\);\s+\z/
if insn.leaf_without_check_ints? # lazily move PC and optionalize mjit_call_p here
src << " if (UNLIKELY(RUBY_VM_INTERRUPTED_ANY(ec))) {\n"
src << " reg_cfp->pc = original_body_iseq + #{next_pos};\n" # ADD_PC(INSN_ATTR(width));
src << " rb_threadptr_execute_interrupts(rb_ec_thread_ptr(ec), 0);\n"
src << " if (UNLIKELY(!mjit_call_p)) {\n"
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_invalidate_all);\n"
src << " goto cancel;\n"
src << " }\n"
src << " }\n"
else
src << to_cstr(line)
end
when /\A\s+JUMP\((?<dest>[^)]+)\);\s+\z/
dest = Regexp.last_match[:dest]
if insn.name == :opt_case_dispatch # special case... TODO: use another macro to avoid checking name
else_offset = cast_offset(operands[1])
cdhash = C.cdhash_to_hash(operands[0])
base_pos = pos + insn_len
src << " switch (#{dest}) {\n"
cdhash.each do |_key, offset|
src << " case #{offset}:\n"
src << " goto label_#{base_pos + offset};\n"
end
src << " case #{else_offset}:\n"
src << " goto label_#{base_pos + else_offset};\n"
src << " }\n"
else
# Before we `goto` next insn, we need to set return values, especially for getinlinecache
insn.rets.reverse_each.with_index do |ret, i|
# TOPN(n) = ...
src << " stack[#{stack_size + sp_inc - (i + 1)}] = #{ret.fetch(:name)};\n"
end
next_pos = pos + insn_len + cast_offset(operands[0]) # workaround: assuming dest == operands[0]. TODO: avoid relying on it
src << " goto label_#{next_pos};\n"
end
when /\A\s+CALL_SIMPLE_METHOD\(\);\s+\z/
# For `opt_xxx`'s fallbacks.
if local_stack_p
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
end
src << " reg_cfp->pc = original_body_iseq + #{pos};\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_opt_insn);\n"
src << " goto cancel;\n"
when /\A(?<prefix>.+\b)INSN_LABEL\((?<name>[^)]+)\)(?<suffix>.+)\z/m
prefix, name, suffix = Regexp.last_match[:prefix], Regexp.last_match[:name], Regexp.last_match[:suffix]
src << "#{prefix}INSN_LABEL(#{name}_#{pos})#{suffix}"
else
if insn.handles_sp?
# If insn.handles_sp? is true, cfp->sp might be changed inside insns (like vm_caller_setup_arg_block)
# and thus we need to use cfp->sp, even when local_stack_p is TRUE. When insn.handles_sp? is true,
# cfp->sp should be available too because _mjit_compile_pc_and_sp.erb sets it.
src << to_cstr(line)
else
# If local_stack_p is TRUE and insn.handles_sp? is false, stack values are only available in local variables
# for stack. So we need to replace those macros if local_stack_p is TRUE here.
case line
when /\bGET_SP\(\)/
# reg_cfp->sp
src << to_cstr(line.sub(/\bGET_SP\(\)/, local_stack_p ? '(stack + stack_size)' : 'GET_SP()'))
when /\bSTACK_ADDR_FROM_TOP\((?<num>[^)]+)\)/
# #define STACK_ADDR_FROM_TOP(n) (GET_SP()-(n))
num = Regexp.last_match[:num]
src << to_cstr(line.sub(/\bSTACK_ADDR_FROM_TOP\(([^)]+)\)/, local_stack_p ? "(stack + (stack_size - (#{num})))" : "STACK_ADDR_FROM_TOP(#{num})"))
when /\bTOPN\((?<num>[^)]+)\)/
# #define TOPN(n) (*(GET_SP()-(n)-1))
num = Regexp.last_match[:num]
src << to_cstr(line.sub(/\bTOPN\(([^)]+)\)/, local_stack_p ? "*(stack + (stack_size - (#{num}) - 1))" : "TOPN(#{num})"))
else
src << to_cstr(line)
end
end
end
end
return next_pos
end
# _mjit_compile_pc_and_sp.erb
def compile_pc_and_sp(src, insn, stack_size, sp_inc, local_stack_p, next_pos)
# JIT: When an insn is leaf, we don't need to Move pc for a catch table on catch_except_p, #caller_locations,
# and rb_profile_frames. For check_ints, we lazily move PC when we have interruptions.
pc_moved_p = false
unless insn.always_leaf? || insn.leaf_without_check_ints?
src << " reg_cfp->pc = original_body_iseq + #{next_pos};\n" # ADD_PC(INSN_ATTR(width));
pc_moved_p = true
end
# JIT: move sp to use or preserve stack variables
if local_stack_p
# sp motion is optimized away for `handles_sp? #=> false` case.
# Thus sp should be set properly before `goto cancel`.
if insn.handles_sp?
# JIT-only behavior (pushing JIT's local variables to VM's stack):
push_size = -sp_inc + insn.rets.size - insn.pops.size
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{push_size};\n"
push_size.times do |i|
src << " *(reg_cfp->sp + #{i - push_size}) = stack[#{stack_size - push_size + i}];\n"
end
end
else
if insn.handles_sp?
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size - insn.pops.size};\n" # POPN(INSN_ATTR(popn));
else
src << " reg_cfp->sp = vm_base_ptr(reg_cfp) + #{stack_size};\n"
end
end
return pc_moved_p
end
# Print the block to cancel inlined method call. It's supporting only `opt_send_without_block` for now.
def compile_inlined_cancel_handler(f, body, inline_context)
src = +"\ncancel:\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel);\n"
src << " rb_mjit_recompile_inlining(original_iseq);\n"
# Swap pc/sp set on cancel with original pc/sp.
src << " const VALUE *current_pc = reg_cfp->pc;\n"
src << " VALUE *current_sp = reg_cfp->sp;\n"
src << " reg_cfp->pc = orig_pc;\n"
src << " reg_cfp->sp = orig_sp;\n\n"
# Lazily push the current call frame.
src << " struct rb_calling_info calling;\n"
src << " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n" # assumes `opt_send_without_block`
src << " calling.argc = #{inline_context.orig_argc};\n"
src << " calling.recv = reg_cfp->self;\n"
src << " reg_cfp->self = orig_self;\n"
# fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
src << " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)#{inline_context.me}, 0, #{inline_context.param_size}, #{inline_context.local_size});\n\n"
# Start usual cancel from here.
src << " reg_cfp = ec->cfp;\n" # work on the new frame
src << " reg_cfp->pc = current_pc;\n"
src << " reg_cfp->sp = current_sp;\n"
(0...body.stack_max).each do |i| # should be always `status->local_stack_p`
src << " *(vm_base_ptr(reg_cfp) + #{i}) = stack[#{i}];\n"
end
# We're not just returning Qundef here so that caller's normal cancel handler can
# push back `stack` to `cfp->sp`.
src << " return vm_exec(ec, false);\n"
C.fprintf(f, src)
end
# Print the block to cancel JIT execution.
def compile_cancel_handler(f, body, status)
if status.inlined_iseqs.nil? # the current ISeq is being inlined
compile_inlined_cancel_handler(f, body, status.inline_context)
return
end
src = +"\nsend_cancel:\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n"
src << " rb_mjit_recompile_send(original_iseq);\n"
src << " goto cancel;\n"
src << "\nivar_cancel:\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_ivar_inline);\n"
src << " rb_mjit_recompile_ivar(original_iseq);\n"
src << " goto cancel;\n"
src << "\nexivar_cancel:\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel_exivar_inline);\n"
src << " rb_mjit_recompile_exivar(original_iseq);\n"
src << " goto cancel;\n"
src << "\nconst_cancel:\n"
src << " rb_mjit_recompile_const(original_iseq);\n"
src << " goto cancel;\n"
src << "\ncancel:\n"
src << " RB_DEBUG_COUNTER_INC(mjit_cancel);\n"
if status.local_stack_p
(0...body.stack_max).each do |i|
src << " *(vm_base_ptr(reg_cfp) + #{i}) = stack[#{i}];\n"
end
end
src << " return Qundef;\n"
C.fprintf(f, src)
end
def precompile_inlinable_child_iseq(f, child_iseq, status, ci, cc, pos)
child_status = C.compile_status.new # not freed for now
child_status.compiled_iseq = status.compiled_iseq
child_status.compiled_id = status.compiled_id
init_compile_status(child_status, child_iseq.body, false) # not freed for now
child_status.inline_context.orig_argc = C.vm_ci_argc(ci)
child_status.inline_context.me = vm_cc_cme(cc).to_i
child_status.inline_context.param_size = child_iseq.body.param.size
child_status.inline_context.local_size = child_iseq.body.local_table_size
if child_iseq.body.ci_size > 0 && child_status.cc_entries_index == -1
return false
end
init_ivar_compile_status(child_iseq.body, child_status)
src = +"ALWAYS_INLINE(static VALUE _mjit#{status.compiled_id}_inlined_#{pos}(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self, const rb_iseq_t *original_iseq));\n"
src << "static inline VALUE\n_mjit#{status.compiled_id}_inlined_#{pos}(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self, const rb_iseq_t *original_iseq)\n{\n"
src << " const VALUE *orig_pc = reg_cfp->pc;\n"
src << " VALUE *orig_sp = reg_cfp->sp;\n"
C.fprintf(f, src)
success = compile_body(f, child_iseq, child_status)
C.fprintf(f, "\n} /* end of _mjit#{status.compiled_id}_inlined_#{pos} */\n\n")
return success;
end
def precompile_inlinable_iseqs(f, iseq, status)
body = iseq.body
pos = 0
while pos < body.iseq_size
insn = INSNS.fetch(C.rb_vm_insn_decode(body.iseq_encoded[pos]))
if insn.name == :opt_send_without_block || insn.name == :opt_size # `compile_inlined_cancel_handler` supports only `opt_send_without_block`
cd = C.CALL_DATA.new(body.iseq_encoded[pos + 1])
ci = cd.ci
cc = captured_cc_entries(status)[call_data_index(cd, body)] # use copy to avoid race condition
if (child_iseq = rb_mjit_inlinable_iseq(ci, cc)) != nil
status.inlined_iseqs[pos] = child_iseq.body
if C.mjit_opts.verbose >= 1 # print beforehand because ISeq may be GCed during copy job.
child_location = child_iseq.body.location
$stderr.puts "JIT inline: #{child_location.label}@#{C.rb_iseq_path(child_iseq)}:#{C.rb_iseq_first_lineno(child_iseq)} " \
"=> #{iseq.body.location.label}@#{C.rb_iseq_path(iseq)}:#{C.rb_iseq_first_lineno(iseq)}"
end
if !precompile_inlinable_child_iseq(f, child_iseq, status, ci, cc, pos)
return false
end
end
end
pos += insn.len
end
return true
end
def init_compile_status(status, body, compile_root_p)
status.stack_size_for_pos = Fiddle.malloc(Fiddle::SIZEOF_INT * body.iseq_size)
body.iseq_size.times do |i|
status.stack_size_for_pos[i] = C.NOT_COMPILED_STACK_SIZE
end
if compile_root_p
status.inlined_iseqs = Fiddle.malloc(Fiddle::SIZEOF_VOIDP * body.iseq_size)
body.iseq_size.times do |i|
status.inlined_iseqs[i] = nil
end
end
if ISEQ_IS_SIZE(body) > 0
status.is_entries = Fiddle.malloc(C.iseq_inline_storage_entry.sizeof * ISEQ_IS_SIZE(body))
end
if body.ci_size > 0
status.cc_entries_index = C.mjit_capture_cc_entries(status.compiled_iseq, body)
else
status.cc_entries_index = -1
end
if compile_root_p
status.compile_info = rb_mjit_iseq_compile_info(body)
else
status.compile_info = Fiddle.malloc(C.rb_mjit_compile_info.sizeof)
status.compile_info.disable_ivar_cache = false
status.compile_info.disable_exivar_cache = false
status.compile_info.disable_send_cache = false
status.compile_info.disable_inlining = false
status.compile_info.disable_const_cache = false
end
end
def init_ivar_compile_status(body, status)
C.mjit_capture_is_entries(body, status.is_entries)
pos = 0
while pos < body.iseq_size
insn = INSNS.fetch(C.rb_vm_insn_decode(body.iseq_encoded[pos]))
if insn.name == :getinstancevariable || insn.name == :setinstancevariable
status.merge_ivar_guards_p = true
return
end
pos += insn.len
end
end
# Expand simple macro that doesn't require dynamic C code.
def expand_simple_macros(arg_expr)
arg_expr.dup.tap do |expr|
# For `leave`. We can't proceed next ISeq in the same JIT function.
expr.gsub!(/^(?<indent>\s*)RESTORE_REGS\(\);\n/) do
indent = Regexp.last_match[:indent]
<<-end.gsub(/^ {12}/, '')
#if OPT_CALL_THREADED_CODE
#{indent}rb_ec_thread_ptr(ec)->retval = val;
#{indent}return 0;
#else
#{indent}return val;
#endif
end
end
expr.gsub!(/^(?<indent>\s*)NEXT_INSN\(\);\n/) do
indent = Regexp.last_match[:indent]
<<-end.gsub(/^ {12}/, '')
#{indent}UNREACHABLE_RETURN(Qundef);
end
end
end
end
def to_cstr(expr)
expr.gsub(/^(?!#)/, ' ') # indent everything but preprocessor lines
end
# Interpret unsigned long as signed long (VALUE -> OFFSET)
def cast_offset(offset)
bits = "%0#{8 * Fiddle::SIZEOF_VOIDP}d" % offset.to_s(2)
if bits[0] == '1' # negative
offset = -bits.chars.map { |i| i == '0' ? '1' : '0' }.join.to_i(2) - 1
end
offset
end
def captured_cc_entries(status)
status.compiled_iseq.jit_unit.cc_entries + status.cc_entries_index
end
def call_data_index(cd, body)
cd - body.call_data
end
def vm_cc_cme(cc)
# TODO: add VM_ASSERT like actual vm_cc_cme
cc.cme_
end
def def_iseq_ptr(method_def)
C.rb_iseq_check(method_def.body.iseq.iseqptr)
end
def rb_mjit_iseq_compile_info(body)
body.jit_unit.compile_info
end
def ISEQ_IS_SIZE(body)
body.ic_size + body.ivc_size + body.ise_size + body.icvarc_size
end
# Return true if an object of the class may be a special const (immediate).
# It's "maybe" because Integer and Float are not guaranteed to be an immediate.
# If this returns false, rb_class_of could be optimzied to RBASIC_CLASS.
def maybe_special_const?(klass)
[
C.rb_cFalseClass,
C.rb_cNilClass,
C.rb_cTrueClass,
C.rb_cInteger,
C.rb_cSymbol,
C.rb_cFloat,
].include?(klass)
end
def has_valid_method_type?(cc)
vm_cc_cme(cc) != nil
end
def already_compiled?(status, pos)
status.stack_size_for_pos[pos] != C.NOT_COMPILED_STACK_SIZE
end
# Return an iseq pointer if cc has inlinable iseq.
def rb_mjit_inlinable_iseq(ci, cc)
if has_valid_method_type?(cc) &&
C.vm_ci_flag(ci) & C.VM_CALL_TAILCALL == 0 && # inlining only non-tailcall path
vm_cc_cme(cc).def.type == C.VM_METHOD_TYPE_ISEQ &&
C.fastpath_applied_iseq_p(ci, cc, iseq = def_iseq_ptr(vm_cc_cme(cc).def)) &&
inlinable_iseq_p(iseq.body) # CC_SET_FASTPATH in vm_callee_setup_arg
return iseq
end
return nil
end
# Return true if the ISeq can be inlined without pushing a new control frame.
def inlinable_iseq_p(body)
# 1) If catch_except_p, caller frame should be preserved when callee catches an exception.
# Then we need to wrap `vm_exec()` but then we can't inline the call inside it.
#
# 2) If `body->catch_except_p` is false and `handles_sp?` of an insn is false,
# sp is not moved as we assume `status->local_stack_p = !body->catch_except_p`.
#
# 3) If `body->catch_except_p` is false and `always_leaf?` of an insn is true,
# pc is not moved.
if body.catch_except_p
return false
end
pos = 0
while pos < body.iseq_size
insn = INSNS.fetch(C.rb_vm_insn_decode(body.iseq_encoded[pos]))
# All insns in the ISeq except `leave` (to be overridden in the inlined code)
# should meet following strong assumptions:
# * Do not require `cfp->sp` motion
# * Do not move `cfp->pc`
# * Do not read any `cfp->pc`
if insn.name == :invokebuiltin || insn.name == :opt_invokebuiltin_delegate || insn.name == :opt_invokebuiltin_delegate_leave
# builtin insn's inlinability is handled by `Primitive.attr! 'inline'` per iseq
if !body.builtin_inline_p
return false;
end
elsif insn.name != :leave && C.insn_may_depend_on_sp_or_pc(insn.bin, body.iseq_encoded + (pos + 1))
return false
end
# At this moment, `cfp->ep` in an inlined method is not working.
case insn.name
when :getlocal,
:getlocal_WC_0,
:getlocal_WC_1,
:setlocal,
:setlocal_WC_0,
:setlocal_WC_1,
:getblockparam,
:getblockparamproxy,
:setblockparam
return false
end
pos += insn.len
end
return true
end
# CPointer::Struct could be nil on field reference, and this is a helper to
# handle that case while using CPointer::Struct#to_s in most cases.
# @param struct [RubyVM::MJIT::CPointer::Struct]
def to_addr(struct)
struct&.to_s || 'NULL'
end
end
private_constant(*constants)
end