mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
YJIT: Fail gracefully while OOM for new entry points
Previously, YJIT crashes with rb_bug() when asked to compile new methods while out of executable memory. To handle this situation gracefully, this change keeps track of all the blocks compiled each invocation in case YJIT runs out of memory in the middle of a compliation sequence. The list is used to free all blocks in case compilation fails. yjit_gen_block() is renamed to gen_single_block() to make it distinct from gen_block_version(). Call to limit_block_version() and block_t allocation is moved into the function to help tidy error checking in the outer loop. limit_block_version() now returns by value. I feel that an out parameter with conditional mutation is unnecessarily hard to read in code that does not need to go for last drop performance. There is a good chance that the optimizer is able to output identical code anyways.
This commit is contained in:
parent
a84dc9d80d
commit
d0772632bf
Notes:
git
2021-12-02 02:26:03 +09:00
7 changed files with 131 additions and 66 deletions
|
@ -2474,6 +2474,18 @@ assert_equal 'new', %q{
|
||||||
test
|
test
|
||||||
} if false # disabled for now since OOM crashes in the test harness
|
} if false # disabled for now since OOM crashes in the test harness
|
||||||
|
|
||||||
|
assert_equal 'ok', %q{
|
||||||
|
# Try to compile new method while OOM
|
||||||
|
def foo
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
|
||||||
|
|
||||||
|
foo
|
||||||
|
foo
|
||||||
|
}
|
||||||
|
|
||||||
# struct aref embedded
|
# struct aref embedded
|
||||||
assert_equal '2', %q{
|
assert_equal '2', %q{
|
||||||
def foo(s)
|
def foo(s)
|
||||||
|
|
1
yjit.c
1
yjit.c
|
@ -122,6 +122,7 @@ YJIT_DECLARE_COUNTERS(
|
||||||
vm_insns_count,
|
vm_insns_count,
|
||||||
compiled_iseq_count,
|
compiled_iseq_count,
|
||||||
compiled_block_count,
|
compiled_block_count,
|
||||||
|
compilation_failure,
|
||||||
|
|
||||||
exit_from_branch_stub,
|
exit_from_branch_stub,
|
||||||
|
|
||||||
|
|
4
yjit.rb
4
yjit.rb
|
@ -193,8 +193,12 @@ module RubyVM::YJIT
|
||||||
total_insns_count = retired_in_yjit + stats[:vm_insns_count]
|
total_insns_count = retired_in_yjit + stats[:vm_insns_count]
|
||||||
yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count
|
yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count
|
||||||
|
|
||||||
|
# Number of failed compiler invocations
|
||||||
|
compilation_failure = stats[:compilation_failure]
|
||||||
|
|
||||||
$stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations])
|
$stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations])
|
||||||
$stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set])
|
$stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set])
|
||||||
|
$stderr.puts "compilation_failure: " + ("%10d" % compilation_failure) if compilation_failure != 0
|
||||||
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
|
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
|
||||||
$stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count])
|
$stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count])
|
||||||
$stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count])
|
$stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count])
|
||||||
|
|
|
@ -545,10 +545,15 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
|
||||||
{
|
{
|
||||||
RUBY_ASSERT(cb != NULL);
|
RUBY_ASSERT(cb != NULL);
|
||||||
|
|
||||||
if (cb->write_pos + 1024 >= cb->mem_size) {
|
enum { MAX_PROLOGUE_SIZE = 1024 };
|
||||||
rb_bug("out of executable memory");
|
|
||||||
|
// Check if we have enough executable memory
|
||||||
|
if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const uint32_t old_write_pos = cb->write_pos;
|
||||||
|
|
||||||
// Align the current write positon to cache line boundaries
|
// Align the current write positon to cache line boundaries
|
||||||
cb_align_pos(cb, 64);
|
cb_align_pos(cb, 64);
|
||||||
|
|
||||||
|
@ -581,6 +586,9 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
|
||||||
yjit_pc_guard(cb, iseq);
|
yjit_pc_guard(cb, iseq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify MAX_PROLOGUE_SIZE
|
||||||
|
RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
|
||||||
|
|
||||||
return code_ptr;
|
return code_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -625,32 +633,46 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile a sequence of bytecode instructions for a given basic block version
|
// Compile a sequence of bytecode instructions for a given basic block version.
|
||||||
static void
|
// Part of gen_block_version().
|
||||||
yjit_gen_block(block_t *block, rb_execution_context_t *ec)
|
static block_t *
|
||||||
|
gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
|
||||||
{
|
{
|
||||||
RUBY_ASSERT(cb != NULL);
|
RUBY_ASSERT(cb != NULL);
|
||||||
RUBY_ASSERT(block != NULL);
|
|
||||||
RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
|
|
||||||
|
|
||||||
// Copy the block's context to avoid mutating it
|
// Check if there is enough executable memory.
|
||||||
ctx_t ctx_copy = block->ctx;
|
// FIXME: This bound isn't enforced and long blocks can potentially use more.
|
||||||
|
enum { MAX_CODE_PER_BLOCK = 1024 };
|
||||||
|
if (cb->write_pos + MAX_CODE_PER_BLOCK >= cb->mem_size) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (ocb->write_pos + MAX_CODE_PER_BLOCK >= ocb->mem_size) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate the new block
|
||||||
|
block_t *block = calloc(1, sizeof(block_t));
|
||||||
|
if (!block) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the starting context to avoid mutating it
|
||||||
|
ctx_t ctx_copy = *start_ctx;
|
||||||
ctx_t *ctx = &ctx_copy;
|
ctx_t *ctx = &ctx_copy;
|
||||||
|
|
||||||
|
// Limit the number of specialized versions for this block
|
||||||
|
*ctx = limit_block_versions(blockid, ctx);
|
||||||
|
|
||||||
|
// Save the starting context on the block.
|
||||||
|
block->blockid = blockid;
|
||||||
|
block->ctx = *ctx;
|
||||||
|
|
||||||
|
RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
|
||||||
|
|
||||||
const rb_iseq_t *iseq = block->blockid.iseq;
|
const rb_iseq_t *iseq = block->blockid.iseq;
|
||||||
uint32_t insn_idx = block->blockid.idx;
|
uint32_t insn_idx = block->blockid.idx;
|
||||||
const uint32_t starting_insn_idx = insn_idx;
|
const uint32_t starting_insn_idx = insn_idx;
|
||||||
|
|
||||||
// NOTE: if we are ever deployed in production, we
|
|
||||||
// should probably just log an error and return NULL here,
|
|
||||||
// so we can fail more gracefully
|
|
||||||
if (cb->write_pos + 1024 >= cb->mem_size) {
|
|
||||||
rb_bug("out of executable memory");
|
|
||||||
}
|
|
||||||
if (ocb->write_pos + 1024 >= ocb->mem_size) {
|
|
||||||
rb_bug("out of executable memory (outlined block)");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize a JIT state object
|
// Initialize a JIT state object
|
||||||
jitstate_t jit = {
|
jitstate_t jit = {
|
||||||
.cb = cb,
|
.cb = cb,
|
||||||
|
@ -765,6 +787,8 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
|
||||||
idx += insn_len(opcode);
|
idx += insn_len(opcode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return block;
|
||||||
}
|
}
|
||||||
|
|
||||||
static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
|
static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
|
||||||
|
|
|
@ -14,7 +14,7 @@ static void jit_ensure_block_entry_exit(jitstate_t *jit);
|
||||||
|
|
||||||
static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq);
|
static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq);
|
||||||
|
|
||||||
static void yjit_gen_block(block_t *block, rb_execution_context_t *ec);
|
static block_t *gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec);
|
||||||
|
|
||||||
static void gen_code_for_exit_from_stub(void);
|
static void gen_code_for_exit_from_stub(void);
|
||||||
|
|
||||||
|
|
113
yjit_core.c
113
yjit_core.c
|
@ -542,9 +542,10 @@ static size_t get_num_versions(blockid_t blockid)
|
||||||
|
|
||||||
// Keep track of a block version. Block should be fully constructed.
|
// Keep track of a block version. Block should be fully constructed.
|
||||||
static void
|
static void
|
||||||
add_block_version(blockid_t blockid, block_t *block)
|
add_block_version(block_t *block)
|
||||||
{
|
{
|
||||||
const rb_iseq_t *iseq = block->blockid.iseq;
|
const blockid_t blockid = block->blockid;
|
||||||
|
const rb_iseq_t *iseq = blockid.iseq;
|
||||||
struct rb_iseq_constant_body *body = iseq->body;
|
struct rb_iseq_constant_body *body = iseq->body;
|
||||||
|
|
||||||
// Function entry blocks must have stack size 0
|
// Function entry blocks must have stack size 0
|
||||||
|
@ -704,57 +705,66 @@ find_block_version(blockid_t blockid, const ctx_t *ctx)
|
||||||
|
|
||||||
// Produce a generic context when the block version limit is hit for a blockid
|
// Produce a generic context when the block version limit is hit for a blockid
|
||||||
// Note that this will mutate the ctx argument
|
// Note that this will mutate the ctx argument
|
||||||
static void
|
static ctx_t
|
||||||
limit_block_versions(blockid_t blockid, ctx_t *ctx)
|
limit_block_versions(blockid_t blockid, const ctx_t *ctx)
|
||||||
{
|
{
|
||||||
// Guard chains implement limits separately, do nothing
|
// Guard chains implement limits separately, do nothing
|
||||||
if (ctx->chain_depth > 0)
|
if (ctx->chain_depth > 0)
|
||||||
return;
|
return *ctx;
|
||||||
|
|
||||||
// If this block version we're about to add will hit the version limit
|
// If this block version we're about to add will hit the version limit
|
||||||
if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions)
|
if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) {
|
||||||
{
|
|
||||||
// Produce a generic context that stores no type information,
|
// Produce a generic context that stores no type information,
|
||||||
// but still respects the stack_size and sp_offset constraints
|
// but still respects the stack_size and sp_offset constraints.
|
||||||
// This new context will then match all future requests.
|
// This new context will then match all future requests.
|
||||||
ctx_t generic_ctx = DEFAULT_CTX;
|
ctx_t generic_ctx = DEFAULT_CTX;
|
||||||
generic_ctx.stack_size = ctx->stack_size;
|
generic_ctx.stack_size = ctx->stack_size;
|
||||||
generic_ctx.sp_offset = ctx->sp_offset;
|
generic_ctx.sp_offset = ctx->sp_offset;
|
||||||
|
|
||||||
// Mutate the incoming context
|
// Mutate the incoming context
|
||||||
*ctx = generic_ctx;
|
return generic_ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return *ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile a new block version immediately
|
static void yjit_free_block(block_t *block);
|
||||||
|
|
||||||
|
// Immediately compile a series of block versions at a starting point and
|
||||||
|
// return the starting block.
|
||||||
static block_t *
|
static block_t *
|
||||||
gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
|
gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
|
||||||
{
|
{
|
||||||
// Allocate a new block version object
|
// Small array to keep track of all the blocks compiled per invocation. We
|
||||||
block_t *block = calloc(1, sizeof(block_t));
|
// tend to have small batches since we often break up compilation with lazy
|
||||||
block->blockid = blockid;
|
// stubs. Compilation is successful only if the whole batch is successful.
|
||||||
memcpy(&block->ctx, start_ctx, sizeof(ctx_t));
|
enum { MAX_PER_BATCH = 64 };
|
||||||
|
block_t *batch[MAX_PER_BATCH];
|
||||||
// Store a pointer to the first block (returned by this function)
|
int compiled_count = 0;
|
||||||
block_t *first_block = block;
|
bool batch_success = true;
|
||||||
|
block_t *block;
|
||||||
// Limit the number of specialized versions for this block
|
|
||||||
limit_block_versions(block->blockid, &block->ctx);
|
|
||||||
|
|
||||||
// Generate code for the first block
|
// Generate code for the first block
|
||||||
yjit_gen_block(block, ec);
|
block = gen_single_block(blockid, start_ctx, ec);
|
||||||
|
batch_success = block && compiled_count < MAX_PER_BATCH;
|
||||||
|
|
||||||
// Keep track of the new block version
|
if (batch_success) {
|
||||||
add_block_version(block->blockid, block);
|
// Track the block
|
||||||
|
add_block_version(block);
|
||||||
|
|
||||||
|
batch[compiled_count] = block;
|
||||||
|
compiled_count++;
|
||||||
|
}
|
||||||
|
|
||||||
// For each successor block to compile
|
// For each successor block to compile
|
||||||
for (;;) {
|
while (batch_success) {
|
||||||
// If the previous block compiled doesn't have outgoing branches, stop
|
// If the previous block compiled doesn't have outgoing branches, stop
|
||||||
if (rb_darray_size(block->outgoing) == 0) {
|
if (rb_darray_size(block->outgoing) == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the last outgoing branch from the previous block
|
// Get the last outgoing branch from the previous block. Blocks can use
|
||||||
|
// gen_direct_jump() to request a block to be placed immediately after.
|
||||||
branch_t *last_branch = rb_darray_back(block->outgoing);
|
branch_t *last_branch = rb_darray_back(block->outgoing);
|
||||||
|
|
||||||
// If there is no next block to compile, stop
|
// If there is no next block to compile, stop
|
||||||
|
@ -766,32 +776,48 @@ gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_contex
|
||||||
rb_bug("invalid target for last branch");
|
rb_bug("invalid target for last branch");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a new block version object
|
// Generate code for the current block using context from the last branch.
|
||||||
// Use the context from the branch
|
blockid_t requested_id = last_branch->targets[0];
|
||||||
block = calloc(1, sizeof(block_t));
|
const ctx_t *requested_ctx = &last_branch->target_ctxs[0];
|
||||||
block->blockid = last_branch->targets[0];
|
block = gen_single_block(requested_id, requested_ctx, ec);
|
||||||
block->ctx = last_branch->target_ctxs[0];
|
batch_success = block && compiled_count < MAX_PER_BATCH;
|
||||||
//memcpy(&block->ctx, ctx, sizeof(ctx_t));
|
|
||||||
|
|
||||||
// Limit the number of specialized versions for this block
|
// If the batch failed, stop
|
||||||
limit_block_versions(block->blockid, &block->ctx);
|
if (!batch_success) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Generate code for the current block
|
// Connect the last branch and the new block
|
||||||
yjit_gen_block(block, ec);
|
|
||||||
|
|
||||||
// Keep track of the new block version
|
|
||||||
add_block_version(block->blockid, block);
|
|
||||||
|
|
||||||
// Patch the last branch address
|
|
||||||
last_branch->dst_addrs[0] = block->start_addr;
|
last_branch->dst_addrs[0] = block->start_addr;
|
||||||
rb_darray_append(&block->incoming, last_branch);
|
rb_darray_append(&block->incoming, last_branch);
|
||||||
last_branch->blocks[0] = block;
|
last_branch->blocks[0] = block;
|
||||||
|
|
||||||
// This block should immediately follow the last branch
|
// This block should immediately follow the last branch
|
||||||
RUBY_ASSERT(block->start_addr == last_branch->end_addr);
|
RUBY_ASSERT(block->start_addr == last_branch->end_addr);
|
||||||
|
|
||||||
|
// Track the block
|
||||||
|
add_block_version(block);
|
||||||
|
|
||||||
|
batch[compiled_count] = block;
|
||||||
|
compiled_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return first_block;
|
if (batch_success) {
|
||||||
|
// Success. Return first block in the batch.
|
||||||
|
RUBY_ASSERT(compiled_count > 0);
|
||||||
|
return batch[0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// The batch failed. Free everything in the batch
|
||||||
|
for (int block_idx = 0; block_idx < compiled_count; block_idx++) {
|
||||||
|
yjit_free_block(batch[block_idx]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if YJIT_STATS
|
||||||
|
yjit_runtime_counters.compilation_failure++;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate a block version that is an entry point inserted into an iseq
|
// Generate a block version that is an entry point inserted into an iseq
|
||||||
|
@ -807,15 +833,14 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t
|
||||||
// The entry context makes no assumptions about types
|
// The entry context makes no assumptions about types
|
||||||
blockid_t blockid = { iseq, insn_idx };
|
blockid_t blockid = { iseq, insn_idx };
|
||||||
|
|
||||||
// Write the interpreter entry prologue
|
// Write the interpreter entry prologue. Might be NULL when out of memory.
|
||||||
uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
|
uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
|
||||||
|
|
||||||
// Try to generate code for the entry block
|
// Try to generate code for the entry block
|
||||||
block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
|
block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
|
||||||
|
|
||||||
// If we couldn't generate any code
|
// If we couldn't generate any code
|
||||||
if (block->end_idx == insn_idx)
|
if (!block || block->end_idx == insn_idx) {
|
||||||
{
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -483,8 +483,7 @@ rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec)
|
||||||
// Compile a block version starting at the first instruction
|
// Compile a block version starting at the first instruction
|
||||||
uint8_t *code_ptr = gen_entry_point(iseq, 0, ec);
|
uint8_t *code_ptr = gen_entry_point(iseq, 0, ec);
|
||||||
|
|
||||||
if (code_ptr)
|
if (code_ptr) {
|
||||||
{
|
|
||||||
iseq->body->jit_func = (yjit_func_t)code_ptr;
|
iseq->body->jit_func = (yjit_func_t)code_ptr;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Reference in a new issue