Implement optimize send in yjit (#6488)

* Implement optimize send in yjit

This successfully makes all our benchmarks exit way less for optimize send reasons.
It makes some benchmarks faster, but not by as much as I'd like. I think this implementation
works, but there are definitely more optimial arrangements. For example, what if we compiled
send to a jump table? That seems like perhaps the most optimal we could do, but not obvious (to me)
how to implement give our current setup.

Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>

* Attempt at fixing the issues raised by @XrXr

* fix allowlist

* returns 0 instead of nil when not found

* remove comment about encoding exception

* Fix up c changes

* Update assert

Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>

* get rid of unneeded code and fix the flags

* Apply suggestions from code review

Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>

* rename and fix typo

Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
This commit is contained in:
Jimmy Miller 2022-10-11 16:37:05 -04:00 committed by GitHub
parent 913979bede
commit 467992ee35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2022-10-12 05:37:32 +09:00
Merged-By: maximecb <maximecb@ruby-lang.org>
8 changed files with 262 additions and 17 deletions

View File

@ -1112,6 +1112,28 @@ rb_check_id(volatile VALUE *namep)
return lookup_str_id(name);
}
// Used by yjit for handling .send without throwing exceptions
ID
rb_get_symbol_id(VALUE name)
{
if (STATIC_SYM_P(name)) {
return STATIC_SYM2ID(name);
}
else if (DYNAMIC_SYM_P(name)) {
if (SYMBOL_PINNED_P(name)) {
return RSYMBOL(name)->id;
}
else {
return 0;
}
}
else {
RUBY_ASSERT_ALWAYS(RB_TYPE_P(name, T_STRING));
return lookup_str_id(name);
}
}
VALUE
rb_check_symbol(volatile VALUE *namep)
{

2
yjit.c
View File

@ -509,6 +509,8 @@ rb_get_cme_def_body_attr_id(const rb_callable_method_entry_t *cme)
return cme->def->body.attr.id;
}
ID rb_get_symbol_id(VALUE namep);
enum method_optimized_type
rb_get_cme_def_body_optimized_type(const rb_callable_method_entry_t *cme)
{

View File

@ -332,6 +332,7 @@ fn main() {
.allowlist_function("rb_get_cfp_ep_level")
.allowlist_function("rb_get_cme_def_type")
.allowlist_function("rb_get_cme_def_body_attr_id")
.allowlist_function("rb_get_symbol_id")
.allowlist_function("rb_get_cme_def_body_optimized_type")
.allowlist_function("rb_get_cme_def_body_optimized_index")
.allowlist_function("rb_get_cme_def_body_cfunc")

View File

@ -1918,6 +1918,9 @@ pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;
// up to 5 different classes
pub const SEND_MAX_DEPTH: i32 = 5;
// up to 20 different methods for send
pub const SEND_MAX_CHAIN_DEPTH: i32 = 20;
// Codegen for setting an instance variable.
// Preconditions:
// - receiver is in REG0
@ -1929,7 +1932,15 @@ fn gen_set_ivar(
asm: &mut Assembler,
recv: VALUE,
ivar_name: ID,
flags: u32,
argc: i32,
) -> CodegenStatus {
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc as i32, ctx);
}
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_prepare_routine_call(jit, ctx, asm);
@ -4148,8 +4159,9 @@ fn gen_send_cfunc(
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
block: Option<IseqPtr>,
argc: i32,
recv_known_klass: *const VALUE,
flags: u32,
argc: i32,
) -> CodegenStatus {
let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
let cfunc_argc = unsafe { get_mct_argc(cfunc) };
@ -4158,8 +4170,6 @@ fn gen_send_cfunc(
// Create a side-exit to fall back to the interpreter
let side_exit = get_side_exit(jit, ocb, ctx);
let flags = unsafe { vm_ci_flag(ci) };
// If the function expects a Ruby array of arguments
if cfunc_argc < 0 && cfunc_argc != -1 {
gen_counter_incr!(asm, send_cfunc_ruby_array_varg);
@ -4228,6 +4238,11 @@ fn gen_send_cfunc(
return CantCompile;
}
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc as i32, ctx);
}
// Points to the receiver operand on the stack
let recv = ctx.stack_opnd(argc);
@ -4440,6 +4455,7 @@ fn gen_send_bmethod(
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
block: Option<IseqPtr>,
flags: u32,
argc: i32,
) -> CodegenStatus {
let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) };
@ -4469,7 +4485,7 @@ fn gen_send_bmethod(
}
let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA;
gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, argc)
gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc)
}
fn gen_send_iseq(
@ -4483,11 +4499,11 @@ fn gen_send_iseq(
prev_ep: Option<*const VALUE>,
cme: *const rb_callable_method_entry_t,
block: Option<IseqPtr>,
flags: u32,
argc: i32,
) -> CodegenStatus {
let mut argc = argc;
let flags = unsafe { vm_ci_flag(ci) };
// Create a side-exit to fall back to the interpreter
let side_exit = get_side_exit(jit, ocb, ctx);
@ -4711,6 +4727,13 @@ fn gen_send_iseq(
Some(leaf_builtin_raw)
};
if let (None, Some(builtin_info)) = (block, leaf_builtin) {
// this is a .send call not currently supported for builtins
if flags & VM_CALL_OPT_SEND != 0 {
gen_counter_incr!(asm, send_send_builtin);
return CantCompile;
}
let builtin_argc = unsafe { (*builtin_info).argc };
if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) {
asm.comment("inlined leaf builtin");
@ -4758,6 +4781,11 @@ fn gen_send_iseq(
push_splat_args(required_args, ctx, asm, ocb, side_exit)
}
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc as i32, ctx);
}
if doing_kw_call {
// Here we're calling a method with keyword arguments and specifying
// keyword arguments at this call site.
@ -5012,7 +5040,10 @@ fn gen_struct_aref(
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
_comptime_recv_klass: VALUE,
flags: u32,
argc: i32,
) -> CodegenStatus {
if unsafe { vm_ci_argc(ci) } != 0 {
return CantCompile;
}
@ -5034,6 +5065,11 @@ fn gen_struct_aref(
}
}
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc as i32, ctx);
}
// All structs from the same Struct class should have the same
// length. So if our comptime_recv is embedded all runtime
// structs of the same class should be as well, and the same is
@ -5067,11 +5103,18 @@ fn gen_struct_aset(
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
_comptime_recv_klass: VALUE,
flags: u32,
argc: i32,
) -> CodegenStatus {
if unsafe { vm_ci_argc(ci) } != 1 {
return CantCompile;
}
// This is a .send call and we need to adjust the stack
if flags & VM_CALL_OPT_SEND != 0 {
handle_opt_send_shift_stack(asm, argc, ctx);
}
let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
.try_into()
.unwrap();
@ -5113,9 +5156,9 @@ fn gen_send_general(
// see vm_call_method().
let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
let mid = unsafe { vm_ci_mid(ci) };
let flags = unsafe { vm_ci_flag(ci) };
let mut argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
let mut mid = unsafe { vm_ci_mid(ci) };
let mut flags = unsafe { vm_ci_flag(ci) };
// Don't JIT calls with keyword splat
if flags & VM_CALL_KW_SPLAT != 0 {
@ -5205,7 +5248,7 @@ fn gen_send_general(
VM_METHOD_TYPE_ISEQ => {
let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
return gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, argc);
return gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc);
}
VM_METHOD_TYPE_CFUNC => {
return gen_send_cfunc(
@ -5216,8 +5259,9 @@ fn gen_send_general(
ci,
cme,
block,
argc,
&comptime_recv_klass,
flags,
argc,
);
}
VM_METHOD_TYPE_IVAR => {
@ -5227,6 +5271,12 @@ fn gen_send_general(
return CantCompile;
}
// This is a .send call not supported right now for getters
if flags & VM_CALL_OPT_SEND != 0 {
gen_counter_incr!(asm, send_send_getter);
return CantCompile;
}
if c_method_tracing_currently_enabled(jit) {
// Can't generate code for firing c_call and c_return events
// :attr-tracing:
@ -5270,12 +5320,12 @@ fn gen_send_general(
return CantCompile;
} else {
let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
return gen_set_ivar(jit, ctx, asm, comptime_recv, ivar_name);
return gen_set_ivar(jit, ctx, asm, comptime_recv, ivar_name, flags, argc);
}
}
// Block method, e.g. define_method(:foo) { :my_block }
VM_METHOD_TYPE_BMETHOD => {
return gen_send_bmethod(jit, ctx, asm, ocb, ci, cme, block, argc);
return gen_send_bmethod(jit, ctx, asm, ocb, ci, cme, block, flags, argc);
}
VM_METHOD_TYPE_ZSUPER => {
gen_counter_incr!(asm, send_zsuper_method);
@ -5296,11 +5346,116 @@ fn gen_send_general(
}
// Send family of methods, e.g. call/apply
VM_METHOD_TYPE_OPTIMIZED => {
let opt_type = unsafe { get_cme_def_body_optimized_type(cme) };
match opt_type {
OPTIMIZED_METHOD_TYPE_SEND => {
gen_counter_incr!(asm, send_optimized_method_send);
return CantCompile;
// This is for method calls like `foo.send(:bar)`
// The `send` method does not get its own stack frame.
// instead we look up the method and call it,
// doing some stack shifting based on the VM_CALL_OPT_SEND flag
let starting_context = *ctx;
if argc == 0 {
gen_counter_incr!(asm, send_send_wrong_args);
return CantCompile;
}
argc -= 1;
let compile_time_name = jit_peek_at_stack(jit, ctx, argc as isize);
if !compile_time_name.string_p() && !compile_time_name.static_sym_p() {
gen_counter_incr!(asm, send_send_chain_not_string_or_sym);
return CantCompile;
}
mid = unsafe { rb_get_symbol_id(compile_time_name) };
if mid == 0 {
gen_counter_incr!(asm, send_send_null_mid);
return CantCompile;
}
cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
if cme.is_null() {
gen_counter_incr!(asm, send_send_null_cme);
return CantCompile;
}
// We aren't going to handle `send(send(:foo))`. We would need to
// do some stack manipulation here or keep track of how many levels
// deep we need to stack manipulate
// Because of how exits currently work, we can't do stack manipulation
// until we will no longer side exit.
let def_type = unsafe { get_cme_def_type(cme) };
if let VM_METHOD_TYPE_OPTIMIZED = def_type {
let opt_type = unsafe { get_cme_def_body_optimized_type(cme) };
if let OPTIMIZED_METHOD_TYPE_SEND = opt_type {
gen_counter_incr!(asm, send_send_nested);
return CantCompile;
}
}
flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND;
assume_method_lookup_stable(jit, ocb, comptime_recv_klass, cme);
let (known_class, type_mismatch_exit) = {
if compile_time_name.string_p() {
(
unsafe { rb_cString },
counted_exit!(ocb, side_exit, send_send_chain_not_string),
)
} else {
(
unsafe { rb_cSymbol },
counted_exit!(ocb, side_exit, send_send_chain_not_sym),
)
}
};
jit_guard_known_klass(
jit,
ctx,
asm,
ocb,
known_class,
ctx.stack_opnd(argc),
StackOpnd(argc as u16),
compile_time_name,
2, // We have string or symbol, so max depth is 2
type_mismatch_exit
);
// Need to do this here so we don't have too many live
// values for the register allocator.
let name_opnd = asm.load(ctx.stack_opnd(argc));
let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]);
asm.comment("chain_guard_send");
let chain_exit = counted_exit!(ocb, side_exit, send_send_chain);
asm.cmp(symbol_id_opnd, 0.into());
asm.jbe(chain_exit.into());
asm.cmp(symbol_id_opnd, mid.into());
jit_chain_guard(
JCC_JNE,
jit,
&starting_context,
asm,
ocb,
SEND_MAX_CHAIN_DEPTH as i32,
chain_exit,
);
// We have changed the argc, flags, mid, and cme, so we need to re-enter the match
// and compile whatever method we found from send.
continue;
}
OPTIMIZED_METHOD_TYPE_CALL => {
gen_counter_incr!(asm, send_optimized_method_call);
@ -5320,6 +5475,8 @@ fn gen_send_general(
cme,
comptime_recv,
comptime_recv_klass,
flags,
argc,
);
}
OPTIMIZED_METHOD_TYPE_STRUCT_ASET => {
@ -5332,6 +5489,8 @@ fn gen_send_general(
cme,
comptime_recv,
comptime_recv_klass,
flags,
argc,
);
}
_ => {
@ -5354,6 +5513,32 @@ fn gen_send_general(
}
}
/// Shifts the stack for send in order to remove the name of the method
/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c
/// E.g. when argc == 2
/// | | | | TOPN
/// +------+ | |
/// | arg1 | ---+ | | 0
/// +------+ | +------+
/// | arg0 | -+ +-> | arg1 | 1
/// +------+ | +------+
/// | sym | +---> | arg0 | 2
/// +------+ +------+
/// | recv | | recv | 3
///--+------+--------+------+------
///
/// We do this for our compiletime context and the actual stack
fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32, ctx: &mut Context) {
asm.comment("shift_stack");
for j in (0..argc).rev() {
let opnd = ctx.stack_opnd(j);
let opnd2 = ctx.stack_opnd(j + 1);
asm.mov(opnd2, opnd);
}
ctx.shift_stack(argc as usize);
}
fn gen_opt_send_without_block(
jit: &mut JITState,
ctx: &mut Context,
@ -5515,10 +5700,10 @@ fn gen_invokesuper(
VM_METHOD_TYPE_ISEQ => {
let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, argc)
gen_send_iseq(jit, ctx, asm, ocb, iseq, ci, frame_type, None, cme, block, ci_flags, argc)
}
VM_METHOD_TYPE_CFUNC => {
gen_send_cfunc(jit, ctx, asm, ocb, ci, cme, block, argc, ptr::null())
gen_send_cfunc(jit, ctx, asm, ocb, ci, cme, block, ptr::null(), ci_flags, argc)
}
_ => unreachable!(),
}

View File

@ -1061,6 +1061,21 @@ impl Context {
return top;
}
pub fn shift_stack(&mut self, argc: usize) {
assert!(argc < self.stack_size.into());
let method_name_index = (self.stack_size - argc as u16 - 1) as usize;
for i in method_name_index..(self.stack_size - 1) as usize {
if i + 1 < MAX_TEMP_TYPES {
self.temp_types[i] = self.temp_types[i + 1];
self.temp_mapping[i] = self.temp_mapping[i + 1];
}
}
self.stack_pop(1);
}
/// Get an operand pointing to a slot on the temp stack
pub fn stack_opnd(&self, idx: i32) -> Opnd {
// SP points just above the topmost value

View File

@ -352,6 +352,10 @@ impl VALUE {
self == Qnil
}
pub fn string_p(self) -> bool {
self.class_of() == unsafe { rb_cString }
}
/// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY)
pub fn builtin_type(self) -> ruby_value_type {
(self.builtin_flags() & (RUBY_T_MASK as usize)) as ruby_value_type
@ -629,6 +633,7 @@ mod manual_defs {
pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit;
pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit;
pub const VM_CALL_ZSUPER : u32 = 1 << VM_CALL_ZSUPER_bit;
pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit;
// From internal/struct.h - in anonymous enum, so we can't easily import it
pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize;

View File

@ -1362,6 +1362,9 @@ extern "C" {
extern "C" {
pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
}
extern "C" {
pub fn rb_get_symbol_id(namep: VALUE) -> ID;
}
extern "C" {
pub fn rb_get_cme_def_body_optimized_type(
cme: *const rb_callable_method_entry_t,

View File

@ -171,7 +171,6 @@ make_counters! {
send_zsuper_method,
send_undef_method,
send_optimized_method,
send_optimized_method_send,
send_optimized_method_call,
send_optimized_method_block_call,
send_missing_method,
@ -197,6 +196,19 @@ make_counters! {
send_args_splat_non_iseq,
send_args_splat_cfunc,
send_iseq_ruby2_keywords,
send_send_not_imm,
send_send_wrong_args,
send_send_null_mid,
send_send_null_cme,
send_send_nested,
send_send_chain,
send_send_chain_string,
send_send_chain_not_string,
send_send_chain_not_sym,
send_send_chain_not_string_or_sym,
send_send_getter,
send_send_builtin,
send_bmethod_ractor,
send_bmethod_block_arg,