Implement greedy versioning. Refactor versioning logic. (#10)

* Implement eager versioning. Refactor versioning logic.

* Add --version-limit and --greedy-versioning command-line args
This commit is contained in:
Maxime Chevalier-Boisvert 2021-04-24 00:16:48 -04:00 committed by Alan Wu
parent 4c7afa64b4
commit 96f4f918b0
7 changed files with 90 additions and 45 deletions

View File

@ -96,4 +96,15 @@ The core of CRuby's interpreter logic is found in:
## Contributing
We welcome open source contributors. If you are interested in contributing to this project, please contact Maxime Chevalier [(@Love2Code) via twitter](https://twitter.com/Love2Code) or by email (maxime.chevalierboisvert@shopify.com). You can also feel free to open new issues to report bugs or just to ask questions. Suggestions on how to make this readme file more helpful for new contributors are most welcome.
We welcome open source contributors. You should feel free to open new issues to report bugs or just to ask questions.
Suggestions on how to make this readme file more helpful for new contributors are most welcome.
Bug fixes and bug reports are very valuable to us. If you find bugs in YJIT, it's very possible be that nobody has reported this bug before,
or that we don't have a good reproduction for it, so please open an issue and provide some information about your configuration and a description of how you
encountered the problem. If you are able to produce a small reproduction to help us track down the bug, that is very much appreciated as well.
If you would like to contribute a large patch to YJIT, we suggest opening an issue or a discussion on this repository so that
we can have an active discussion. A common problem is that sometimes people submit large pull requests to open source projects
without prior communication, and we have to reject them because the work they implemented does not fit within the design of the
project. We want to save you time and frustration, so please reach out and we can have a productive discussion as to how
you can contribute things we will want to merge into YJIT.

6
ruby.c
View File

@ -1039,6 +1039,12 @@ setup_yjit_options(const char *s, struct rb_yjit_options *yjit_opt)
if (opt_match_arg(s, l, "call-threshold")) {
yjit_opt->call_threshold = atoi(s + 1);
}
else if (opt_match_arg(s, l, "version-limit")) {
yjit_opt->version_limit = atoi(s + 1);
}
else if (opt_match_noarg(s, l, "greedy-versioning")) {
yjit_opt->greedy_versioning = true;
}
else if (opt_match_noarg(s, l, "stats")) {
yjit_opt->gen_stats = true;
}

8
yjit.h
View File

@ -30,12 +30,20 @@ typedef struct rb_iseq_struct rb_iseq_t;
#endif
struct rb_yjit_options {
// Enable compilation with YJIT
bool yjit_enabled;
// Number of method calls after which to start generating code
// Threshold==1 means compile on first execution
unsigned call_threshold;
// Generate versions greedily until the limit is hit
bool greedy_versioning;
// Maximum number of versions per block
// 1 means always create generic versions
unsigned version_limit;
// Capture and print out stats
bool gen_stats;
};

View File

@ -336,10 +336,15 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
// Compile a sequence of bytecode instructions for a given basic block version
void
yjit_gen_block(ctx_t *ctx, block_t *block, rb_execution_context_t *ec)
yjit_gen_block(block_t *block, rb_execution_context_t *ec)
{
RUBY_ASSERT(cb != NULL);
RUBY_ASSERT(block != NULL);
RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
// Copy the block's context to avoid mutating it
ctx_t ctx_copy = block->ctx;
ctx_t* ctx = &ctx_copy;
const rb_iseq_t *iseq = block->blockid.iseq;
uint32_t insn_idx = block->blockid.idx;

View File

@ -40,7 +40,7 @@ typedef codegen_status_t (*codegen_fn)(jitstate_t* jit, ctx_t* ctx);
uint8_t* yjit_entry_prologue();
void yjit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec);
void yjit_gen_block(block_t* block, rb_execution_context_t* ec);
void yjit_init_codegen(void);

View File

@ -3,14 +3,16 @@
#include "vm_sync.h"
#include "builtin.h"
#include "yjit.h"
#include "yjit_asm.h"
#include "yjit_utils.h"
#include "yjit_iface.h"
#include "yjit_core.h"
#include "yjit_codegen.h"
// Maximum number of versions per block
#define MAX_VERSIONS 4
// Maximum number of specialized block versions per block
// Zero means generic versions only
#define MAX_VERSIONS 3
/*
Get an operand for the adjusted stack pointer address
@ -419,26 +421,57 @@ block_t* find_block_version(blockid_t blockid, const ctx_t* ctx)
}
}
// If greedy versioning is enabled
if (rb_yjit_opts.greedy_versioning)
{
// If we're below the version limit, don't settle for an imperfect match
if ((uint32_t)rb_darray_size(versions) + 1 < rb_yjit_opts.version_limit && best_diff > 0) {
return NULL;
}
}
return best_version;
}
// Produce a generic context when the block version limit is hit for a blockid
// Note that this will mutate the ctx argument
void limit_block_versions(blockid_t blockid, ctx_t* ctx)
{
// Guard chains implement limits separately, do nothing
if (ctx->chain_depth > 0)
return;
// If this block version we're about to add will hit the version limit
if (get_num_versions(blockid) + 1 >= rb_yjit_opts.version_limit)
{
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints
// This new context will then match all future requests.
ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = ctx->stack_size;
generic_ctx.sp_offset = ctx->sp_offset;
// Mutate the incoming context
*ctx = generic_ctx;
}
}
// Compile a new block version immediately
block_t* gen_block_version(blockid_t blockid, const ctx_t* start_ctx, rb_execution_context_t* ec)
{
// Copy the context to avoid mutating it
ctx_t ctx_copy = *start_ctx;
ctx_t* ctx = &ctx_copy;
// Allocate a new block version object
block_t* first_block = calloc(1, sizeof(block_t));
first_block->blockid = blockid;
memcpy(&first_block->ctx, ctx, sizeof(ctx_t));
block_t* block = calloc(1, sizeof(block_t));
block->blockid = blockid;
memcpy(&block->ctx, start_ctx, sizeof(ctx_t));
// Block that is currently being compiled
block_t* block = first_block;
// Store a pointer to the first block (returned by this function)
block_t* first_block = block;
// Limit the number of specialized versions for this block
limit_block_versions(block->blockid, &block->ctx);
// Generate code for the first block
yjit_gen_block(ctx, block, ec);
yjit_gen_block(block, ec);
// Keep track of the new block version
add_block_version(block->blockid, block);
@ -462,16 +495,18 @@ block_t* gen_block_version(blockid_t blockid, const ctx_t* start_ctx, rb_executi
rb_bug("invalid target for last branch");
}
// Use the context from the branch
*ctx = last_branch->target_ctxs[0];
// Allocate a new block version object
// Use the context from the branch
block = calloc(1, sizeof(block_t));
block->blockid = last_branch->targets[0];
memcpy(&block->ctx, ctx, sizeof(ctx_t));
block->ctx = last_branch->target_ctxs[0];
//memcpy(&block->ctx, ctx, sizeof(ctx_t));
// Limit the number of specialized versions for this block
limit_block_versions(block->blockid, &block->ctx);
// Generate code for the current block
yjit_gen_block(ctx, block, ec);
yjit_gen_block(block, ec);
// Keep track of the new block version
add_block_version(block->blockid, block);
@ -514,7 +549,6 @@ static uint8_t *
branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_context_t* ec)
{
uint8_t* dst_addr;
ctx_t generic_ctx;
// Stop other ractors since we are going to patch machine code.
// This is how the GC does it.
@ -555,17 +589,6 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex
// If this block hasn't yet been compiled
if (!p_block) {
// Limit the number of block versions
if (target_ctx->chain_depth == 0) { // guard chains implement limits individually
if (get_num_versions(target) >= MAX_VERSIONS - 1) {
//fprintf(stderr, "version limit hit in branch_stub_hit\n");
generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = target_ctx->stack_size;
generic_ctx.sp_offset = target_ctx->sp_offset;
target_ctx = &generic_ctx;
}
}
// If the new block can be generated right after the branch (at cb->write_pos)
if (cb->write_pos == branch->end_pos) {
// This branch should be terminating its block
@ -720,7 +743,6 @@ void gen_direct_jump(
)
{
RUBY_ASSERT(target0.iseq != NULL);
ctx_t generic_ctx;
branch_t* branch = make_branch_entry(block, ctx, gen_jump_branch);
branch->targets[0] = target0;
@ -744,18 +766,8 @@ void gen_direct_jump(
}
else
{
// Limit the number of block versions
if (get_num_versions(target0) >= MAX_VERSIONS - 1)
{
//fprintf(stderr, "version limit hit in gen_direct_jump\n");
generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = ctx->stack_size;
generic_ctx.sp_offset = ctx->sp_offset;
ctx = &generic_ctx;
}
// The target block will be compiled next
// It will be compiled in gen_block_version()
// The target block will be compiled right after this one (fallthrough)
// See the loop in gen_block_version()
branch->dst_addrs[0] = NULL;
branch->shape = SHAPE_NEXT0;
branch->start_pos = cb->write_pos;

View File

@ -1044,6 +1044,9 @@ rb_yjit_init(struct rb_yjit_options *options)
if (rb_yjit_opts.call_threshold < 1) {
rb_yjit_opts.call_threshold = 2;
}
if (rb_yjit_opts.version_limit < 1) {
rb_yjit_opts.version_limit = 4;
}
blocks_assuming_stable_global_constant_state = st_init_numtable();
blocks_assuming_single_ractor_mode = st_init_numtable();