From b5b6ab4194f16e96ee5004288cc469ac1bca41a3 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 26 Nov 2021 18:00:42 -0500 Subject: [PATCH] YJIT: Add ability to exit to interpreter from stubs Previously, YJIT assumed that it's always possible to generate a new basic block when servicing a stub in branch_stub_hit(). When YJIT is out of executable memory, for example, this assumption doesn't hold up. Add handling to branch_stub_hit() for servicing stubs without consuming more executable memory by adding a code path that exits to the interpreter at the location the branch stub represents. The new code path reconstructs interpreter state in branch_stub_hit() and then exits with a new snippet called `code_for_exit_from_stub` that returns `Qundef` from the YJIT native stack frame. As this change adds another place where we regenerate code from `branch_t`, extract the logic for it into a new function and call it regenerate_branch(). While we are at it, make the branch shrinking code path in branch_stub_hit() more explicit. This new functionality is hard to test without full support for out of memory conditions. To verify this change, I ran `RUBY_YJIT_ENABLE=1 make check -j12` with the following patch to stress test the new code path: ```diff diff --git a/yjit_core.c b/yjit_core.c index 4ab63d9806..5788b8c5ed 100644 --- a/yjit_core.c +++ b/yjit_core.c @@ -878,8 +878,12 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex cb_set_write_ptr(cb, branch->end_addr); } +if (rand() < RAND_MAX/2) { // Compile the new block version p_block = gen_block_version(target, target_ctx, ec); +}else{ + p_block = NULL; +} if (!p_block && branch_modified) { // We couldn't generate a new block for the branch, but we modified the branch. ``` We can enable the new test along with other OOM tests once full support lands. Other small changes: * yjit_utils.c (print_str): Update to work with new native frame shape. Follow up for 8fa0ee4d404. * yjit_iface.c (rb_yjit_init): Run yjit_init_core() after yjit_init_codegen() so `cb` and `ocb` are available. --- bootstraptest/test_yjit.rb | 18 +++++ yjit.c | 2 + yjit_codegen.c | 20 +++++ yjit_codegen.h | 2 + yjit_core.c | 158 ++++++++++++++++++++++++++----------- yjit_core.h | 6 +- yjit_iface.c | 5 +- yjit_utils.c | 4 - 8 files changed, 157 insertions(+), 58 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index 28fe9446ec..ee8991833c 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2434,6 +2434,24 @@ assert_equal 'ok', %q{ A.new.use 1 } +assert_equal 'ok', %q{ + # test hitting a branch stub when out of memory + def nimai(jita) + if jita + :ng + else + :ok + end + end + + nimai(true) + nimai(true) + + RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT) + + nimai(false) +} if false # disabled for now since OOM crashes in the test harness + # block invalidation while out of memory assert_equal 'new', %q{ def foo diff --git a/yjit.c b/yjit.c index 33517ca36d..56173a1360 100644 --- a/yjit.c +++ b/yjit.c @@ -123,6 +123,8 @@ YJIT_DECLARE_COUNTERS( compiled_iseq_count, compiled_block_count, + exit_from_branch_stub, + invalidation_count, invalidate_method_lookup, invalidate_bop_redefined, diff --git a/yjit_codegen.c b/yjit_codegen.c index 26362a7064..2cd4fd2bda 100644 --- a/yjit_codegen.c +++ b/yjit_codegen.c @@ -382,6 +382,26 @@ yjit_gen_leave_exit(codeblock_t *cb) return code_ptr; } +// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit +// to the interpreter when it cannot service a stub by generating new code. +// Before coming here, branch_stub_hit() takes care of fully reconstructing +// interpreter state. +static void +gen_code_for_exit_from_stub(void) +{ + codeblock_t *cb = ocb; + code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos); + + GEN_COUNTER_INC(cb, exit_from_branch_stub); + + pop(cb, REG_SP); + pop(cb, REG_EC); + pop(cb, REG_CFP); + + mov(cb, RAX, imm_opnd(Qundef)); + ret(cb); +} + // :side-exit: // Get an exit for the current instruction in the outlined block. The code // for each instruction often begins with several guards before proceeding diff --git a/yjit_codegen.h b/yjit_codegen.h index 4ae2536423..bbd29e671b 100644 --- a/yjit_codegen.h +++ b/yjit_codegen.h @@ -16,6 +16,8 @@ static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq); static void yjit_gen_block(block_t *block, rb_execution_context_t *ec); +static void gen_code_for_exit_from_stub(void); + static void yjit_init_codegen(void); #endif // #ifndef YJIT_CODEGEN_H diff --git a/yjit_core.c b/yjit_core.c index 32e0575d75..4460d325fc 100644 --- a/yjit_core.c +++ b/yjit_core.c @@ -9,6 +9,10 @@ #include "yjit_core.h" #include "yjit_codegen.h" +// For exiting from YJIT frame from branch_stub_hit(). +// Filled by gen_code_for_exit_from_stub(). +static uint8_t *code_for_exit_from_stub = NULL; + /* Get an operand for the adjusted stack pointer address */ @@ -597,6 +601,52 @@ add_block_version(blockid_t blockid, block_t *block) #endif } +static ptrdiff_t +branch_code_size(const branch_t *branch) +{ + return branch->end_addr - branch->start_addr; +} + +// Generate code for a branch, possibly rewriting and changing the size of it +static void +regenerate_branch(codeblock_t *cb, branch_t *branch) +{ + if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { + // Generating this branch would modify frozen bytes. Do nothing. + return; + } + + const uint32_t old_write_pos = cb->write_pos; + const bool branch_terminates_block = branch->end_addr == branch->block->end_addr; + + RUBY_ASSERT(branch->dst_addrs[0] != NULL); + + cb_set_write_ptr(cb, branch->start_addr); + branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); + branch->end_addr = cb_get_write_ptr(cb); + + if (branch_terminates_block) { + // Adjust block size + branch->block->end_addr = branch->end_addr; + } + + // cb->write_pos is both a write cursor and a marker for the end of + // everything written out so far. Leave cb->write_pos at the end of the + // block before returning. This function only ever bump or retain the end + // of block marker since that's what the majority of callers want. When the + // branch sits at the very end of the codeblock and it shrinks after + // regeneration, it's up to the caller to drop bytes off the end to + // not leave a gap and implement branch->shape. + if (old_write_pos > cb->write_pos) { + // We rewound cb->write_pos to generate the branch, now restore it. + cb_set_pos(cb, old_write_pos); + } + else { + // The branch sits at the end of cb and consumed some memory. + // Keep cb->write_pos. + } +} + // Create a new outgoing branch entry for a block static branch_t* make_branch_entry(block_t *block, const ctx_t *src_ctx, branchgen_fn gen_fn) @@ -777,13 +827,15 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t static uint8_t * branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_context_t *ec) { - uint8_t *dst_addr; + uint8_t *dst_addr = NULL; // Stop other ractors since we are going to patch machine code. // This is how the GC does it. RB_VM_LOCK_ENTER(); rb_vm_barrier(); + const ptrdiff_t branch_size_on_entry = branch_code_size(branch); + RUBY_ASSERT(branch != NULL); RUBY_ASSERT(target_idx < 2); blockid_t target = branch->targets[target_idx]; @@ -794,18 +846,13 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex if (branch->blocks[target_idx]) { dst_addr = branch->dst_addrs[target_idx]; } - else - { - //fprintf(stderr, "\nstub hit, branch: %p, target idx: %d\n", branch, target_idx); - //fprintf(stderr, "blockid.iseq=%p, blockid.idx=%d\n", target.iseq, target.idx); - //fprintf(stderr, "chain_depth=%d\n", target_ctx->chain_depth); - + else { // :stub-sp-flush: // Generated code do stack operations without modifying cfp->sp, while the // cfp->sp tells the GC what values on the stack to root. Generated code // generally takes care of updating cfp->sp when it calls runtime routines that - // could trigger GC, but for the case of branch stubs, it's inconvenient. So - // we do it here. + // could trigger GC, but it's inconvenient to do it before calling this function. + // So we do it here instead. VALUE *const original_interp_sp = ec->cfp->sp; ec->cfp->sp += target_ctx->sp_offset; @@ -818,8 +865,11 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex // If this block hasn't yet been compiled if (!p_block) { + const uint8_t branch_old_shape = branch->shape; + bool branch_modified = false; + // If the new block can be generated right after the branch (at cb->write_pos) - if (cb_get_write_ptr(cb) == branch->end_addr && branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { + if (cb_get_write_ptr(cb) == branch->end_addr) { // This branch should be terminating its block RUBY_ASSERT(branch->end_addr == branch->block->end_addr); @@ -827,43 +877,62 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex branch->shape = (uint8_t)target_idx; // Rewrite the branch with the new, potentially more compact shape - cb_set_write_ptr(cb, branch->start_addr); - branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - RUBY_ASSERT(cb_get_write_ptr(cb) <= branch->end_addr && "can't enlarge branches"); - branch->end_addr = cb_get_write_ptr(cb); - branch->block->end_addr = cb_get_write_ptr(cb); + regenerate_branch(cb, branch); + branch_modified = true; + + // Ensure that the branch terminates the codeblock just like + // before entering this if block. This drops bytes off the end + // in case we shrank the branch when regenerating. + cb_set_write_ptr(cb, branch->end_addr); } // Compile the new block version p_block = gen_block_version(target, target_ctx, ec); - RUBY_ASSERT(p_block); + + if (!p_block && branch_modified) { + // We couldn't generate a new block for the branch, but we modified the branch. + // Restore the branch by regenerating it. + branch->shape = branch_old_shape; + regenerate_branch(cb, branch); + } + } + + if (p_block) { + // Branch shape should reflect layout RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr)); + + // Add this branch to the list of incoming branches for the target + rb_darray_append(&p_block->incoming, branch); + + // Update the branch target address + dst_addr = p_block->start_addr; + branch->dst_addrs[target_idx] = dst_addr; + + // Mark this branch target as patched (no longer a stub) + branch->blocks[target_idx] = p_block; + + // Rewrite the branch with the new jump target address + regenerate_branch(cb, branch); + + // Restore interpreter sp, since the code hitting the stub expects the original. + ec->cfp->sp = original_interp_sp; } - - // Add this branch to the list of incoming branches for the target - rb_darray_append(&p_block->incoming, branch); - - // Update the branch target address - dst_addr = p_block->start_addr; - branch->dst_addrs[target_idx] = dst_addr; - - // Rewrite the branch with the new jump target address - if (branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { - RUBY_ASSERT(branch->dst_addrs[0] != NULL); - uint32_t cur_pos = cb->write_pos; - cb_set_write_ptr(cb, branch->start_addr); - branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - RUBY_ASSERT(cb_get_write_ptr(cb) == branch->end_addr && "branch can't change size"); - cb_set_pos(cb, cur_pos); + else { + // Failed to service the stub by generating a new block so now we + // need to exit to the interpreter at the stubbed location. We are + // intentionally *not* restoring original_interp_sp. At the time of + // writing, reconstructing interpreter state only involves setting + // cfp->sp and cfp->pc. We set both before trying to generate the + // block. All there is left to do to exit is to pop the native + // frame. We do that in code_for_exit_from_stub. + dst_addr = code_for_exit_from_stub; } - - // Mark this branch target as patched (no longer a stub) - branch->blocks[target_idx] = p_block; - - // Restore interpreter sp, since the code hitting the stub expects the original. - ec->cfp->sp = original_interp_sp; } + const ptrdiff_t new_branch_size = branch_code_size(branch); + RUBY_ASSERT_ALWAYS(new_branch_size >= 0); + RUBY_ASSERT_ALWAYS(new_branch_size <= branch_size_on_entry && "branch stubs should not enlarge branches"); + RB_VM_LOCK_LEAVE(); // Return a pointer to the compiled block version @@ -942,8 +1011,7 @@ gen_branch( // Call the branch generation function branch->start_addr = cb_get_write_ptr(cb); - gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], SHAPE_DEFAULT); - branch->end_addr = cb_get_write_ptr(cb); + regenerate_branch(cb, branch); } static void @@ -1191,13 +1259,7 @@ invalidate_block_version(block_t *block) } // Rewrite the branch with the new jump target address - RUBY_ASSERT(branch->dst_addrs[0] != NULL); - uint32_t cur_pos = cb->write_pos; - cb_set_write_ptr(cb, branch->start_addr); - branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - branch->end_addr = cb_get_write_ptr(cb); - branch->block->end_addr = cb_get_write_ptr(cb); - cb_set_pos(cb, cur_pos); + regenerate_branch(cb, branch); if (target_next && branch->end_addr > block->end_addr) { fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%ld block_size=%ld\n", @@ -1243,5 +1305,5 @@ invalidate_block_version(block_t *block) static void yjit_init_core(void) { - // Nothing yet + gen_code_for_exit_from_stub(); } diff --git a/yjit_core.h b/yjit_core.h index f31fd58230..299dae81f5 100644 --- a/yjit_core.h +++ b/yjit_core.h @@ -192,8 +192,8 @@ typedef struct yjit_branch_entry struct yjit_block_version *block; // Positions where the generated code starts and ends - uint8_t* start_addr; - uint8_t* end_addr; + uint8_t *start_addr; + uint8_t *end_addr; // Context right after the branch instruction ctx_t src_ctx; @@ -204,7 +204,7 @@ typedef struct yjit_branch_entry struct yjit_block_version *blocks[2]; // Jump target addresses - uint8_t* dst_addrs[2]; + uint8_t *dst_addrs[2]; // Branch code generation function branchgen_fn gen_fn; diff --git a/yjit_iface.c b/yjit_iface.c index a569128dce..917a32cbfa 100644 --- a/yjit_iface.c +++ b/yjit_iface.c @@ -1232,8 +1232,7 @@ rb_yjit_init(struct rb_yjit_options *options) } // If type propagation is disabled, max 1 version per block - if (rb_yjit_opts.no_type_prop) - { + if (rb_yjit_opts.no_type_prop) { rb_yjit_opts.max_versions = 1; } @@ -1241,8 +1240,8 @@ rb_yjit_init(struct rb_yjit_options *options) blocks_assuming_single_ractor_mode = st_init_numtable(); blocks_assuming_bops = st_init_numtable(); - yjit_init_core(); yjit_init_codegen(); + yjit_init_core(); // YJIT Ruby module mYjit = rb_define_module_under(rb_cRubyVM, "YJIT"); diff --git a/yjit_utils.c b/yjit_utils.c index ce89c5dd96..cbc50a4932 100644 --- a/yjit_utils.c +++ b/yjit_utils.c @@ -101,13 +101,9 @@ print_str(codeblock_t *cb, const char *str) cb_write_byte(cb, (uint8_t)str[i]); cb_write_byte(cb, 0); - push(cb, RSP); // Alignment - // Call the print function mov(cb, RAX, const_ptr_opnd((void*)&print_str_cfun)); call(cb, RAX); - pop(cb, RSP); // Alignment - pop_regs(cb); }