Implement greedy versioning. Refactor versioning logic. (#10)

* Implement eager versioning. Refactor versioning logic. * Add --version-limit and --greedy-versioning command-line args
2022-11-09 12:17:21 -05:00 · 2021-04-24 00:16:48 -04:00 · 2021-04-24 00:16:48 -04:00 · 96f4f918b0
commit 96f4f918b0
parent 4c7afa64b4
7 changed files with 90 additions and 45 deletions
--- a/README.md
+++ b/README.md
@ -96,4 +96,15 @@ The core of CRuby's interpreter logic is found in:

 ## Contributing

-We welcome open source contributors. If you are interested in contributing to this project, please contact Maxime Chevalier [(@Love2Code) via twitter](https://twitter.com/Love2Code) or by email (maxime.chevalierboisvert@shopify.com). You can also feel free to open new issues to report bugs or just to ask questions. Suggestions on how to make this readme file more helpful for new contributors are most welcome.
+We welcome open source contributors. You should feel free to open new issues to report bugs or just to ask questions.
+Suggestions on how to make this readme file more helpful for new contributors are most welcome.
+
+Bug fixes and bug reports are very valuable to us. If you find bugs in YJIT, it's very possible be that nobody has reported this bug before,
+or that we don't have a good reproduction for it, so please open an issue and provide some information about your configuration and a description of how you
+encountered the problem. If you are able to produce a small reproduction to help us track down the bug, that is very much appreciated as well.
+
+If you would like to contribute a large patch to YJIT, we suggest opening an issue or a discussion on this repository so that
+we can have an active discussion. A common problem is that sometimes people submit large pull requests to open source projects
+without prior communication, and we have to reject them because the work they implemented does not fit within the design of the
+project. We want to save you time and frustration, so please reach out and we can have a productive discussion as to how
+you can contribute things we will want to merge into YJIT.
--- a/ruby.c
+++ b/ruby.c
@ -1039,6 +1039,12 @@ setup_yjit_options(const char *s, struct rb_yjit_options *yjit_opt)
    if (opt_match_arg(s, l, "call-threshold")) {
        yjit_opt->call_threshold = atoi(s + 1);
    }
+    else if (opt_match_arg(s, l, "version-limit")) {
+        yjit_opt->version_limit = atoi(s + 1);
+    }
+    else if (opt_match_noarg(s, l, "greedy-versioning")) {
+        yjit_opt->greedy_versioning = true;
+    }
    else if (opt_match_noarg(s, l, "stats")) {
        yjit_opt->gen_stats = true;
    }
--- a/yjit.h
+++ b/yjit.h
@ -30,12 +30,20 @@ typedef struct rb_iseq_struct rb_iseq_t;
 #endif

 struct rb_yjit_options {
+    // Enable compilation with YJIT
    bool yjit_enabled;

    // Number of method calls after which to start generating code
    // Threshold==1 means compile on first execution
    unsigned call_threshold;

+    // Generate versions greedily until the limit is hit
+    bool greedy_versioning;
+
+    // Maximum number of versions per block
+    // 1 means always create generic versions
+    unsigned version_limit;
+
    // Capture and print out stats
    bool gen_stats;
 };
--- a/yjit_codegen.c
+++ b/yjit_codegen.c
@ -336,10 +336,15 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)

 // Compile a sequence of bytecode instructions for a given basic block version
 void
-yjit_gen_block(ctx_t *ctx, block_t *block, rb_execution_context_t *ec)
+yjit_gen_block(block_t *block, rb_execution_context_t *ec)
 {
    RUBY_ASSERT(cb != NULL);
    RUBY_ASSERT(block != NULL);
+    RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
+
+    // Copy the block's context to avoid mutating it
+    ctx_t ctx_copy = block->ctx;
+    ctx_t* ctx = &ctx_copy;

    const rb_iseq_t *iseq = block->blockid.iseq;
    uint32_t insn_idx = block->blockid.idx;
--- a/yjit_codegen.h
+++ b/yjit_codegen.h
@ -40,7 +40,7 @@ typedef codegen_status_t (*codegen_fn)(jitstate_t* jit, ctx_t* ctx);

 uint8_t* yjit_entry_prologue();

-void yjit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec);
+void yjit_gen_block(block_t* block, rb_execution_context_t* ec);

 void yjit_init_codegen(void);

--- a/yjit_core.c
+++ b/yjit_core.c
@ -3,14 +3,16 @@
 #include "vm_sync.h"
 #include "builtin.h"

+#include "yjit.h"
 #include "yjit_asm.h"
 #include "yjit_utils.h"
 #include "yjit_iface.h"
 #include "yjit_core.h"
 #include "yjit_codegen.h"

-// Maximum number of versions per block
-#define MAX_VERSIONS 4
+// Maximum number of specialized block versions per block
+// Zero means generic versions only
+#define MAX_VERSIONS 3

 /*
 Get an operand for the adjusted stack pointer address
@ -419,26 +421,57 @@ block_t* find_block_version(blockid_t blockid, const ctx_t* ctx)
        }
    }

+    // If greedy versioning is enabled
+    if (rb_yjit_opts.greedy_versioning)
+    {
+        // If we're below the version limit, don't settle for an imperfect match
+        if ((uint32_t)rb_darray_size(versions) + 1 < rb_yjit_opts.version_limit && best_diff > 0) {
+            return NULL;
+        }
+    }
+
    return best_version;
 }

+// Produce a generic context when the block version limit is hit for a blockid
+// Note that this will mutate the ctx argument
+void limit_block_versions(blockid_t blockid, ctx_t* ctx)
+{
+    // Guard chains implement limits separately, do nothing
+    if (ctx->chain_depth > 0)
+        return;
+
+    // If this block version we're about to add will hit the version limit
+    if (get_num_versions(blockid) + 1 >= rb_yjit_opts.version_limit)
+    {
+        // Produce a generic context that stores no type information,
+        // but still respects the stack_size and sp_offset constraints
+        // This new context will then match all future requests.
+        ctx_t generic_ctx = DEFAULT_CTX;
+        generic_ctx.stack_size = ctx->stack_size;
+        generic_ctx.sp_offset = ctx->sp_offset;
+
+        // Mutate the incoming context
+        *ctx = generic_ctx;
+    }
+}
+
 // Compile a new block version immediately
 block_t* gen_block_version(blockid_t blockid, const ctx_t* start_ctx, rb_execution_context_t* ec)
 {
-    // Copy the context to avoid mutating it
-    ctx_t ctx_copy = *start_ctx;
-    ctx_t* ctx = &ctx_copy;
-
    // Allocate a new block version object
-    block_t* first_block = calloc(1, sizeof(block_t));
-    first_block->blockid = blockid;
-    memcpy(&first_block->ctx, ctx, sizeof(ctx_t));
+    block_t* block = calloc(1, sizeof(block_t));
+    block->blockid = blockid;
+    memcpy(&block->ctx, start_ctx, sizeof(ctx_t));

-    // Block that is currently being compiled
-    block_t* block = first_block;
+    // Store a pointer to the first block (returned by this function)
+    block_t* first_block = block;
+
+    // Limit the number of specialized versions for this block
+    limit_block_versions(block->blockid, &block->ctx);

    // Generate code for the first block
-    yjit_gen_block(ctx, block, ec);
+    yjit_gen_block(block, ec);

    // Keep track of the new block version
    add_block_version(block->blockid, block);
@ -462,16 +495,18 @@ block_t* gen_block_version(blockid_t blockid, const ctx_t* start_ctx, rb_executi
            rb_bug("invalid target for last branch");
        }

-        // Use the context from the branch
-        *ctx = last_branch->target_ctxs[0];
-
        // Allocate a new block version object
+        // Use the context from the branch
        block = calloc(1, sizeof(block_t));
        block->blockid = last_branch->targets[0];
-        memcpy(&block->ctx, ctx, sizeof(ctx_t));
+        block->ctx = last_branch->target_ctxs[0];
+        //memcpy(&block->ctx, ctx, sizeof(ctx_t));
+
+        // Limit the number of specialized versions for this block
+        limit_block_versions(block->blockid, &block->ctx);

        // Generate code for the current block
-        yjit_gen_block(ctx, block, ec);
+        yjit_gen_block(block, ec);

        // Keep track of the new block version
        add_block_version(block->blockid, block);
@ -514,7 +549,6 @@ static uint8_t *
 branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_context_t* ec)
 {
    uint8_t* dst_addr;
-    ctx_t generic_ctx;

    // Stop other ractors since we are going to patch machine code.
    // This is how the GC does it.
@ -555,17 +589,6 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex

        // If this block hasn't yet been compiled
        if (!p_block) {
-            // Limit the number of block versions
-            if (target_ctx->chain_depth == 0) { // guard chains implement limits individually
-                if (get_num_versions(target) >= MAX_VERSIONS - 1) {
-                    //fprintf(stderr, "version limit hit in branch_stub_hit\n");
-                    generic_ctx = DEFAULT_CTX;
-                    generic_ctx.stack_size = target_ctx->stack_size;
-                    generic_ctx.sp_offset = target_ctx->sp_offset;
-                    target_ctx = &generic_ctx;
-                }
-            }
-
            // If the new block can be generated right after the branch (at cb->write_pos)
            if (cb->write_pos == branch->end_pos) {
                // This branch should be terminating its block
@ -720,7 +743,6 @@ void gen_direct_jump(
 )
 {
    RUBY_ASSERT(target0.iseq != NULL);
-    ctx_t generic_ctx;

    branch_t* branch = make_branch_entry(block, ctx, gen_jump_branch);
    branch->targets[0] = target0;
@ -744,18 +766,8 @@ void gen_direct_jump(
    }
    else
    {
-        // Limit the number of block versions
-        if (get_num_versions(target0) >= MAX_VERSIONS - 1)
-        {
-            //fprintf(stderr, "version limit hit in gen_direct_jump\n");
-            generic_ctx = DEFAULT_CTX;
-            generic_ctx.stack_size = ctx->stack_size;
-            generic_ctx.sp_offset = ctx->sp_offset;
-            ctx = &generic_ctx;
-        }
-
-        // The target block will be compiled next
-        // It will be compiled in gen_block_version()
+        // The target block will be compiled right after this one (fallthrough)
+        // See the loop in gen_block_version()
        branch->dst_addrs[0] = NULL;
        branch->shape = SHAPE_NEXT0;
        branch->start_pos = cb->write_pos;
--- a/yjit_iface.c
+++ b/yjit_iface.c
@ -1044,6 +1044,9 @@ rb_yjit_init(struct rb_yjit_options *options)
    if (rb_yjit_opts.call_threshold < 1) {
        rb_yjit_opts.call_threshold = 2;
    }
+    if (rb_yjit_opts.version_limit < 1) {
+        rb_yjit_opts.version_limit = 4;
+    }

    blocks_assuming_stable_global_constant_state = st_init_numtable();
    blocks_assuming_single_ractor_mode = st_init_numtable();