From f7717b1d463548414e607498e84815ac641ce3dc Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Thu, 10 Dec 2020 00:06:10 -0500 Subject: [PATCH] Start refactoring JIT engine --- ujit_codegen.c | 14 +++++--------- ujit_codegen.h | 2 +- ujit_core.c | 42 ++++++++++++++++++++++++++++++++++++------ ujit_core.h | 20 ++++++++++++++------ ujit_iface.c | 22 +++++----------------- 5 files changed, 61 insertions(+), 39 deletions(-) diff --git a/ujit_codegen.c b/ujit_codegen.c index 03b72ec2c3..c51f7b1944 100644 --- a/ujit_codegen.c +++ b/ujit_codegen.c @@ -43,7 +43,7 @@ static void ujit_gen_exit(codeblock_t* cb, ctx_t* ctx, VALUE* exit_pc) { // Write the adjusted SP back into the CFP - if (ctx->stack_diff != 0) + if (ctx->stack_size != 0) { x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0); lea(cb, REG_SP, stack_pointer); @@ -95,11 +95,10 @@ ujit_side_exit(codeblock_t* cb, ctx_t* ctx, VALUE* exit_pc) /* Compile a sequence of bytecode instructions starting at `insn_idx`. -Return the index to the first instruction not compiled in the sequence -through `next_ujit_idx`. Return `NULL` in case compilation fails. +Returns `NULL` if compilation fails. */ uint8_t * -ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int *next_ujit_idx) +ujit_compile_block(const rb_iseq_t *iseq, unsigned int insn_idx) { assert (cb != NULL); unsigned first_insn_idx = insn_idx; @@ -174,16 +173,13 @@ ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int *ne } } - // Let the caller know how many instructions ujit compiled - *next_ujit_idx = insn_idx; - // If no instructions were compiled if (num_instrs == 0) { return NULL; } // Generate code to exit to the interpreter - ujit_gen_exit(cb, &ctx, &encoded[*next_ujit_idx]); + ujit_gen_exit(cb, &ctx, &encoded[insn_idx]); map_addr2insn(code_ptr, first_opcode); @@ -191,7 +187,7 @@ ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int *ne // Dump list of compiled instrutions fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq); VALUE *pc = &encoded[first_insn_idx]; - VALUE *end_pc = &encoded[*next_ujit_idx]; + VALUE *end_pc = &encoded[insn_idx]; while (pc < end_pc) { int opcode = opcode_at_pc(iseq, pc); fprintf(stderr, " %04td %s\n", pc - encoded, insn_name(opcode)); diff --git a/ujit_codegen.h b/ujit_codegen.h index a6667b01df..7c03420097 100644 --- a/ujit_codegen.h +++ b/ujit_codegen.h @@ -3,7 +3,7 @@ #include "stddef.h" -uint8_t * ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int *next_ujit_idx); +uint8_t * ujit_compile_block(const rb_iseq_t *iseq, unsigned int insn_idx); void ujit_init_codegen(void); diff --git a/ujit_core.c b/ujit_core.c index 5ac6407219..261a33315f 100644 --- a/ujit_core.c +++ b/ujit_core.c @@ -1,7 +1,37 @@ +#include "internal.h" #include "ujit_asm.h" #include "ujit_iface.h" #include "ujit_core.h" + + + +// Table of block versions indexed by (iseq, index) tuples +st_table * version_tbl; + +/* +struct st_hash_type { + int (*compare)(st_data_t, st_data_t); // st_compare_func* + st_index_t (*hash)(st_data_t); // st_hash_func* +}; + +static const struct st_hash_type st_hashtype_num = { + st_numcmp, + st_numhash, +}; + +strcasehash(st_data_t arg) +{ + register const char *string = (const char *)arg; + ... +} + +*/ + + + + + // Get the current instruction opcode from the context object int ctx_get_opcode(ctx_t *ctx) @@ -23,7 +53,7 @@ Get an operand for the adjusted stack pointer address x86opnd_t ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes) { - int32_t offset = (ctx->stack_diff) * 8 + offset_bytes; + int32_t offset = (ctx->stack_size) * 8 + offset_bytes; return mem_opnd(64, REG_SP, offset); } @@ -34,10 +64,10 @@ Return a pointer to the new stack top x86opnd_t ctx_stack_push(ctx_t* ctx, size_t n) { - ctx->stack_diff += n; + ctx->stack_size += n; // SP points just above the topmost value - int32_t offset = (ctx->stack_diff - 1) * 8; + int32_t offset = (ctx->stack_size - 1) * 8; return mem_opnd(64, REG_SP, offset); } @@ -49,10 +79,10 @@ x86opnd_t ctx_stack_pop(ctx_t* ctx, size_t n) { // SP points just above the topmost value - int32_t offset = (ctx->stack_diff - 1) * 8; + int32_t offset = (ctx->stack_size - 1) * 8; x86opnd_t top = mem_opnd(64, REG_SP, offset); - ctx->stack_diff -= n; + ctx->stack_size -= n; return top; } @@ -61,7 +91,7 @@ x86opnd_t ctx_stack_opnd(ctx_t* ctx, int32_t idx) { // SP points just above the topmost value - int32_t offset = (ctx->stack_diff - 1 - idx) * 8; + int32_t offset = (ctx->stack_size - 1 - idx) * 8; x86opnd_t opnd = mem_opnd(64, REG_SP, offset); return opnd; diff --git a/ujit_core.h b/ujit_core.h index ede28e7834..320be8fa01 100644 --- a/ujit_core.h +++ b/ujit_core.h @@ -17,16 +17,18 @@ #define REG0_32 EAX #define REG1_32 ECX +// Maximum number of versions per block +#define MAX_VERSIONS 5 + // Code generation context typedef struct ctx_struct { - // Current PC - VALUE *pc; + // TODO: we may want to remove information that is not + // strictly necessary for versioning from this struct + // Some of the information here is only needed during + // code generation, eg: current pc - // Difference between the current stack pointer and actual stack top - int32_t stack_diff; - - // The iseq that owns the region that is compiling + // Instruction sequence this is associated with const rb_iseq_t *iseq; // Index in the iseq of the opcode we are replacing @@ -35,6 +37,12 @@ typedef struct ctx_struct // The start of the generated code uint8_t *code_ptr; + // Current PC + VALUE *pc; + + // Number of values pushed on the temporary stack + int32_t stack_size; + // Whether we know self is a heap object bool self_is_object; diff --git a/ujit_iface.c b/ujit_iface.c index 404c75dbee..ac541b7cdc 100644 --- a/ujit_iface.c +++ b/ujit_iface.c @@ -232,25 +232,13 @@ rb_ujit_compile_iseq(const rb_iseq_t *iseq) RB_VM_LOCK_ENTER(); VALUE *encoded = (VALUE *)iseq->body->iseq_encoded; - unsigned int insn_idx; - unsigned int next_ujit_idx = 0; + // Compile a block version starting at the first instruction + uint8_t* native_code_ptr = ujit_compile_block(iseq, 0); - for (insn_idx = 0; insn_idx < iseq->body->iseq_size; /* */) { - int insn = opcode_at_pc(iseq, &encoded[insn_idx]); - int len = insn_len(insn); - - uint8_t *native_code_ptr = NULL; - - // If ujit hasn't already compiled this instruction - if (insn_idx >= next_ujit_idx) { - native_code_ptr = ujit_compile_insn(iseq, insn_idx, &next_ujit_idx); - } - - if (native_code_ptr) { - encoded[insn_idx] = (VALUE)native_code_ptr; - } - insn_idx += len; + if (native_code_ptr) { + encoded[0] = (VALUE)native_code_ptr; } + RB_VM_LOCK_LEAVE(); #endif }