From e0c5d4ecd9e864fa531ecceb29ed0e195bf08644 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Mon, 28 Sep 2020 15:50:41 -0400 Subject: [PATCH] Implemented side-exits to interpreter. setlocal_WC_0 --- ujit_asm.c | 115 +++++++++++++++++++++++----- ujit_asm.h | 36 ++++++++- ujit_asm_tests.c | 29 +++---- ujit_compile.c | 194 ++++++++++++++++++++++++----------------------- 4 files changed, 240 insertions(+), 134 deletions(-) diff --git a/ujit_asm.c b/ujit_asm.c index 5d70c7b299..6a79e85854 100644 --- a/ujit_asm.c +++ b/ujit_asm.c @@ -127,6 +127,14 @@ x86opnd_t mem_opnd(size_t num_bits, x86opnd_t base_reg, int32_t disp) return opnd; } +x86opnd_t resize_opnd(x86opnd_t opnd, size_t num_bits) +{ + assert (num_bits % 8 == 0); + x86opnd_t sub = opnd; + sub.num_bits = num_bits; + return sub; +} + x86opnd_t imm_opnd(int64_t imm) { x86opnd_t opnd = { @@ -149,11 +157,12 @@ x86opnd_t const_ptr_opnd(void* ptr) return opnd; } -void cb_init(codeblock_t* cb, size_t mem_size) +// Allocate a block of executable memory +uint8_t* alloc_exec_mem(size_t mem_size) { // Map the memory as executable - cb->mem_block = (uint8_t*)mmap( - &cb_init, + uint8_t* mem_block = (uint8_t*)mmap( + &alloc_exec_mem, mem_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, @@ -162,12 +171,19 @@ void cb_init(codeblock_t* cb, size_t mem_size) ); // Check that the memory mapping was successful - if (cb->mem_block == MAP_FAILED) + if (mem_block == MAP_FAILED) { fprintf(stderr, "mmap call failed\n"); exit(-1); } + return mem_block; +} + +// Initialize a code block object +void cb_init(codeblock_t* cb, uint8_t* mem_block, size_t mem_size) +{ + cb->mem_block = mem_block; cb->mem_size = mem_size; cb->write_pos = 0; cb->num_labels = 0; @@ -801,6 +817,26 @@ void cb_write_jcc(codeblock_t* cb, const char* mnem, uint8_t op0, uint8_t op1, s cb_write_int(cb, 0, 32); } +// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional) +void cb_write_jcc_ptr(codeblock_t* cb, const char* mnem, uint8_t op0, uint8_t op1, uint8_t* dst_ptr) +{ + //cb.writeASM(mnem, label); + + // Write the opcode + cb_write_byte(cb, op0); + cb_write_byte(cb, op1); + + // Pointer to the end of this jump + uint8_t* end_ptr = &cb->mem_block[cb->write_pos] + 4; + + // Compute the jump offset + int64_t rel64 = (int64_t)(dst_ptr - end_ptr); + assert (rel64 >= -2147483648 && rel64 <= 2147483647); + + // Write the relative 32-bit jump offset + cb_write_int(cb, (int32_t)rel64, 32); +} + // Encode a conditional move instruction void cb_write_cmov(codeblock_t* cb, const char* mnem, uint8_t opcode1, x86opnd_t dst, x86opnd_t src) { @@ -1097,6 +1133,38 @@ void jpo (codeblock_t* cb, size_t label_idx) { cb_write_jcc(cb, "jpo" , 0x0F, 0x void js (codeblock_t* cb, size_t label_idx) { cb_write_jcc(cb, "js" , 0x0F, 0x88, label_idx); } void jz (codeblock_t* cb, size_t label_idx) { cb_write_jcc(cb, "jz" , 0x0F, 0x84, label_idx); } +/// jcc - Conditional relative jump to a pointer (32-bit offset) +void ja_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "ja" , 0x0F, 0x87, ptr); } +void jae_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jae" , 0x0F, 0x83, ptr); } +void jb_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jb" , 0x0F, 0x82, ptr); } +void jbe_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jbe" , 0x0F, 0x86, ptr); } +void jc_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jc" , 0x0F, 0x82, ptr); } +void je_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "je" , 0x0F, 0x84, ptr); } +void jg_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jg" , 0x0F, 0x8F, ptr); } +void jge_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jge" , 0x0F, 0x8D, ptr); } +void jl_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jl" , 0x0F, 0x8C, ptr); } +void jle_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jle" , 0x0F, 0x8E, ptr); } +void jna_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jna" , 0x0F, 0x86, ptr); } +void jnae_ptr(codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnae", 0x0F, 0x82, ptr); } +void jnb_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnb" , 0x0F, 0x83, ptr); } +void jnbe_ptr(codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnbe", 0x0F, 0x87, ptr); } +void jnc_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnc" , 0x0F, 0x83, ptr); } +void jne_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jne" , 0x0F, 0x85, ptr); } +void jng_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jng" , 0x0F, 0x8E, ptr); } +void jnge_ptr(codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnge", 0x0F, 0x8C, ptr); } +void jnl_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnl" , 0x0F, 0x8D, ptr); } +void jnle_ptr(codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnle", 0x0F, 0x8F, ptr); } +void jno_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jno" , 0x0F, 0x81, ptr); } +void jnp_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnp" , 0x0F, 0x8b, ptr); } +void jns_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jns" , 0x0F, 0x89, ptr); } +void jnz_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jnz" , 0x0F, 0x85, ptr); } +void jo_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jo" , 0x0F, 0x80, ptr); } +void jp_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jp" , 0x0F, 0x8A, ptr); } +void jpe_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jpe" , 0x0F, 0x8A, ptr); } +void jpo_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jpo" , 0x0F, 0x8B, ptr); } +void js_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "js" , 0x0F, 0x88, ptr); } +void jz_ptr (codeblock_t* cb, uint8_t* ptr) { cb_write_jcc_ptr(cb, "jz" , 0x0F, 0x84, ptr); } + /// jmp - Direct relative jump to label void jmp(codeblock_t* cb, size_t label_idx) { @@ -1119,19 +1187,6 @@ void jmp_rm(codeblock_t* cb, x86opnd_t opnd) cb_write_rm(cb, false, false, NO_OPND, opnd, 4, 1, 0xFF); } -/* -/// jmp - Jump with relative 8-bit offset -void jmp8(CodeBlock cb, int8_t offset) -{ - /// Opcode for direct jump with relative 8-bit offset - const ubyte JMP_REL8_OPCODE = 0xEB; - - cb.writeASM("jmp", ((offset > 0)? "+":"-") ~ to!string(offset)); - cb.writeByte(JMP_REL8_OPCODE); - cb.writeByte(offset); -} -*/ - // jmp - Jump with relative 32-bit offset void jmp32(codeblock_t* cb, int32_t offset) { @@ -1204,7 +1259,7 @@ void mov(codeblock_t* cb, x86opnd_t dst, x86opnd_t src) 0xC6, // opMemImm8 0xFF, // opMemImmSml (not available) 0xFF, // opMemImmLrg - 0xFF, // opExtImm + 0xFF, // opExtImm dst, src ); @@ -1517,6 +1572,30 @@ void sub(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1) ); } +/// test - Logical Compare +void test(codeblock_t* cb, x86opnd_t rm_opnd, x86opnd_t imm_opnd) +{ + assert (rm_opnd.type == OPND_REG || rm_opnd.type == OPND_MEM); + assert (imm_opnd.type == OPND_IMM); + assert (imm_opnd.imm >= 0); + assert (unsig_imm_size(imm_opnd.unsig_imm) <= 32); + assert (unsig_imm_size(imm_opnd.unsig_imm) <= rm_opnd.num_bits); + + // Use the smallest operand size possible + rm_opnd = resize_opnd(rm_opnd, unsig_imm_size(imm_opnd.unsig_imm)); + + if (rm_opnd.num_bits == 8) + { + cb_write_rm(cb, false, false, NO_OPND, rm_opnd, 0x00, 1, 0xF6); + cb_write_int(cb, imm_opnd.imm, rm_opnd.num_bits); + } + else + { + cb_write_rm(cb, rm_opnd.num_bits == 16, false, NO_OPND, rm_opnd, 0x00, 1, 0xF7); + cb_write_int(cb, imm_opnd.imm, rm_opnd.num_bits); + } +} + /// Undefined opcode void ud2(codeblock_t* cb) { diff --git a/ujit_asm.h b/ujit_asm.h index d594897fa0..707c56cb2a 100644 --- a/ujit_asm.h +++ b/ujit_asm.h @@ -219,7 +219,8 @@ x86opnd_t imm_opnd(int64_t val); x86opnd_t const_ptr_opnd(void* ptr); // Code block methods -void cb_init(codeblock_t* cb, size_t mem_size); +uint8_t* alloc_exec_mem(size_t mem_size); +void cb_init(codeblock_t* cb, uint8_t* mem_block, size_t mem_size); void cb_align_pos(codeblock_t* cb, size_t multiple); void cb_set_pos(codeblock_t* cb, size_t pos); uint8_t* cb_get_ptr(codeblock_t* cb, size_t index); @@ -289,7 +290,7 @@ void jnc(codeblock_t* cb, size_t label_idx); void jne(codeblock_t* cb, size_t label_idx); void jng(codeblock_t* cb, size_t label_idx); void jnge(codeblock_t* cb, size_t label_idx); -//void jnl(codeblock_t* cb, size_t label_idx); +void jnl(codeblock_t* cb, size_t label_idx); void jnle(codeblock_t* cb, size_t label_idx); void jno(codeblock_t* cb, size_t label_idx); void jnp(codeblock_t* cb, size_t label_idx); @@ -301,6 +302,36 @@ void jpe(codeblock_t* cb, size_t label_idx); void jpo(codeblock_t* cb, size_t label_idx); void js(codeblock_t* cb, size_t label_idx); void jz(codeblock_t* cb, size_t label_idx); +void ja_ptr(codeblock_t* cb, uint8_t* ptr); +void jae_ptr(codeblock_t* cb, uint8_t* ptr); +void jb_ptr(codeblock_t* cb, uint8_t* ptr); +void jbe_ptr(codeblock_t* cb, uint8_t* ptr); +void jc_ptr(codeblock_t* cb, uint8_t* ptr); +void je_ptr(codeblock_t* cb, uint8_t* ptr); +void jg_ptr(codeblock_t* cb, uint8_t* ptr); +void jge_ptr(codeblock_t* cb, uint8_t* ptr); +void jl_ptr(codeblock_t* cb, uint8_t* ptr); +void jle_ptr(codeblock_t* cb, uint8_t* ptr); +void jna_ptr(codeblock_t* cb, uint8_t* ptr); +void jnae_ptr(codeblock_t* cb, uint8_t* ptr); +void jnb_ptr(codeblock_t* cb, uint8_t* ptr); +void jnbe_ptr(codeblock_t* cb, uint8_t* ptr); +void jnc_ptr(codeblock_t* cb, uint8_t* ptr); +void jne_ptr(codeblock_t* cb, uint8_t* ptr); +void jng_ptr(codeblock_t* cb, uint8_t* ptr); +void jnge_ptr(codeblock_t* cb, uint8_t* ptr); +void jnl_ptr(codeblock_t* cb, uint8_t* ptr); +void jnle_ptr(codeblock_t* cb, uint8_t* ptr); +void jno_ptr(codeblock_t* cb, uint8_t* ptr); +void jnp_ptr(codeblock_t* cb, uint8_t* ptr); +void jns_ptr(codeblock_t* cb, uint8_t* ptr); +void jnz_ptr(codeblock_t* cb, uint8_t* ptr); +void jo_ptr(codeblock_t* cb, uint8_t* ptr); +void jp_ptr(codeblock_t* cb, uint8_t* ptr); +void jpe_ptr(codeblock_t* cb, uint8_t* ptr); +void jpo_ptr(codeblock_t* cb, uint8_t* ptr); +void js_ptr(codeblock_t* cb, uint8_t* ptr); +void jz_ptr(codeblock_t* cb, uint8_t* ptr); void jmp(codeblock_t* cb, size_t label_idx); void jmp_rm(codeblock_t* cb, x86opnd_t opnd); void jmp32(codeblock_t* cb, int32_t offset); @@ -321,6 +352,7 @@ void sar(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); void shl(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); void shr(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); void sub(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); +void test(codeblock_t* cb, x86opnd_t rm_opnd, x86opnd_t imm_opnd); void ud2(codeblock_t* cb); void xor(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); diff --git a/ujit_asm_tests.c b/ujit_asm_tests.c index fd816f50b4..afc724f634 100644 --- a/ujit_asm_tests.c +++ b/ujit_asm_tests.c @@ -64,7 +64,8 @@ void run_tests() codeblock_t cb_obj; codeblock_t* cb = &cb_obj; - cb_init(cb, 4096); + uint8_t* mem_block = alloc_exec_mem(4096); + cb_init(cb, mem_block, 4096); // add cb_set_pos(cb, 0); add(cb, CL, imm_opnd(3)); check_bytes(cb, "80C103"); @@ -313,25 +314,13 @@ void run_tests() cb_set_pos(cb, 0); sub(cb, RAX, imm_opnd(2)); check_bytes(cb, "4883E802"); // test - /* - test( - delegate void (CodeBlock cb) { cb.instr(TEST, AL, 4); }, - "A804" - ); - test( - delegate void (CodeBlock cb) { cb.instr(TEST, CL, 255); }, - "F6C1FF" - ); - test( - delegate void (CodeBlock cb) { cb.instr(TEST, DL, 7); }, - "F6C207" - ); - test( - delegate void (CodeBlock cb) { cb.instr(TEST, DIL, 9); }, - "", - "40F6C709" - ); - */ + cb_set_pos(cb, 0); test(cb, CL, imm_opnd(8)); check_bytes(cb, "F6C108"); + cb_set_pos(cb, 0); test(cb, DL, imm_opnd(7)); check_bytes(cb, "F6C207"); + cb_set_pos(cb, 0); test(cb, RCX, imm_opnd(8)); check_bytes(cb, "F6C108"); + cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(8)); check_bytes(cb, "F6420808"); + cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(255)); check_bytes(cb, "F64208FF"); + cb_set_pos(cb, 0); test(cb, DX, imm_opnd(0xFFFF)); check_bytes(cb, "66F7C2FFFF"); + cb_set_pos(cb, 0); test(cb, mem_opnd(16, RDX, 8), imm_opnd(0xFFFF)); check_bytes(cb, "66F74208FFFF"); // xor cb_set_pos(cb, 0); xor(cb, EAX, EAX); check_bytes(cb, "31C0"); diff --git a/ujit_compile.c b/ujit_compile.c index cb717c8dd5..be74f5f0e0 100644 --- a/ujit_compile.c +++ b/ujit_compile.c @@ -28,7 +28,7 @@ typedef struct ctx_struct } ctx_t; // MicroJIT code generation function signature -typedef void (*codegen_fn)(codeblock_t* cb, ctx_t* ctx); +typedef void (*codegen_fn)(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx); // Map from YARV opcodes to code generation functions static st_table *gen_fns; @@ -37,24 +37,9 @@ static st_table *gen_fns; static codeblock_t block; static codeblock_t* cb = NULL; -// Initialize MicroJIT. Defined later in this file. -static void ujit_init(); - -// Ruby instruction entry -static void -ujit_instr_entry(codeblock_t* cb) -{ - for (size_t i = 0; i < sizeof(ujit_pre_call_bytes); ++i) - cb_write_byte(cb, ujit_pre_call_bytes[i]); -} - -// Ruby instruction exit -static void -ujit_instr_exit(codeblock_t* cb) -{ - for (size_t i = 0; i < sizeof(ujit_post_call_bytes); ++i) - cb_write_byte(cb, ujit_post_call_bytes[i]); -} +// Code block into which we write out-of-line machine code +static codeblock_t outline_block; +static codeblock_t* ocb = NULL; // Keep track of mapping from instructions to generated code // See comment for rb_encoded_insn_data in iseq.c @@ -85,6 +70,15 @@ VALUE ctx_get_arg(ctx_t* ctx, size_t arg_idx) return *(ctx->pc + arg_idx + 1); } +/* +Get an operand for the adjusted stack pointer address +*/ +x86opnd_t ctx_sp_opnd(ctx_t* ctx, size_t n) +{ + int32_t offset = (ctx->stack_diff) * 8; + return mem_opnd(64, RSI, offset); +} + /* Make space on the stack for N values Return a pointer to the new stack top @@ -113,6 +107,66 @@ x86opnd_t ctx_stack_pop(ctx_t* ctx, size_t n) return top; } +// Initialize MicroJIT. Defined later in this file. +static void ujit_init(); + +// Ruby instruction entry +static void +ujit_gen_entry(codeblock_t* cb) +{ + for (size_t i = 0; i < sizeof(ujit_pre_call_bytes); ++i) + cb_write_byte(cb, ujit_pre_call_bytes[i]); +} + +/** +Generate an inline exit to return to the interpreter +*/ +static void +ujit_gen_exit(codeblock_t* cb, ctx_t* ctx, VALUE* exit_pc) +{ + // Write the adjusted SP back into the CFP + if (ctx->stack_diff != 0) + { + x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 1); + lea(cb, RSI, stack_pointer); + mov(cb, mem_opnd(64, RDI, 8), RSI); + } + + // Directly return the next PC, which is a constant + mov(cb, RAX, const_ptr_opnd(exit_pc)); + + // Write PC back into the CFP + mov(cb, mem_opnd(64, RDI, 0), RAX); + + // Write the post call bytes + for (size_t i = 0; i < sizeof(ujit_post_call_bytes); ++i) + cb_write_byte(cb, ujit_post_call_bytes[i]); +} + +/** +Generate an out-of-line exit to return to the interpreter +*/ +uint8_t* +ujit_side_exit(codeblock_t* cb, ctx_t* ctx, VALUE* exit_pc) +{ + uint8_t* code_ptr = cb_get_ptr(cb, cb->write_pos); + + // Write back the old instruction at the exit PC + // Otherwise the interpreter may jump right back to the + // JITted code we're trying to exit + const void * const *table = rb_vm_get_insns_address_table(); + int opcode = (int)(*exit_pc); + void* old_instr = (void*)table[opcode]; + mov(cb, RAX, const_ptr_opnd(exit_pc)); + mov(cb, RCX, const_ptr_opnd(old_instr)); + mov(cb, mem_opnd(64, RAX, 0), RCX); + + // Generate the code to exit to the interpreters + ujit_gen_exit(cb, ctx, exit_pc); + + return code_ptr; +} + /* Generate a chunk of machine code for one individual bytecode instruction Eventually, this will handle multiple instructions in a sequence @@ -179,7 +233,7 @@ ujit_compile_insn(rb_iseq_t *iseq, unsigned int insn_idx, unsigned int* next_uji // Write the pre call bytes before the first instruction if (num_instrs == 0) { - ujit_instr_entry(cb); + ujit_gen_entry(cb); // Load the current SP from the CFP into RSI mov(cb, RSI, mem_opnd(64, RDI, 8)); @@ -187,7 +241,7 @@ ujit_compile_insn(rb_iseq_t *iseq, unsigned int insn_idx, unsigned int* next_uji // Call the code generation function codegen_fn gen_fn = (codegen_fn)st_gen_fn; - gen_fn(cb, &ctx); + gen_fn(cb, ocb, &ctx); // Move to the next instruction insn_idx += insn_len(opcode); @@ -202,56 +256,15 @@ ujit_compile_insn(rb_iseq_t *iseq, unsigned int insn_idx, unsigned int* next_uji return NULL; } - // Write the adjusted SP back into the CFP - if (ctx.stack_diff != 0) - { - // The stack pointer points one above the actual stack top - x86opnd_t stack_pointer = ctx_stack_push(&ctx, 1); - lea(cb, RSI, stack_pointer); - mov(cb, mem_opnd(64, RDI, 8), RSI); - } - - // Directly return the next PC, which is a constant - mov(cb, RAX, const_ptr_opnd(ctx.pc)); - // Write PC back into the CFP - mov(cb, mem_opnd(64, RDI, 0), RAX); - - // Write the post call bytes - ujit_instr_exit(cb); - - /* - // Hack to patch a relative 32-bit jump to the instruction handler - int next_opcode = (int)*ctx.pc; - const void * const *table = rb_vm_get_insns_address_table(); - VALUE encoded = (VALUE)table[next_opcode]; - uint8_t* p_handler = (uint8_t*)encoded; - - uint8_t* p_code = &cb->mem_block[cb->write_pos]; - int64_t rel64 = ((int64_t)p_handler) - ((int64_t)p_code - 2 + 5); - - //printf("p_handler: %lld\n", (int64_t)p_handler); - //printf("rel64: %lld\n", rel64); - - uint8_t byte0 = cb->mem_block[cb->write_pos - 2]; - uint8_t byte1 = cb->mem_block[cb->write_pos - 1]; - - //printf("cb_init: %lld\n", (int64_t)&cb_init); - //printf("%lld\n", rel64); - - if (byte0 == 0xFF && byte1 == 0x20 && rel64 >= -2147483648 && rel64 <= 2147483647) - { - //printf("%02X %02X\n", (int)byte0, (int)byte1); - cb->write_pos -= 2; - jmp32(cb, (int32_t)rel64); - } - */ + // Generate code to exit to the interpreter + ujit_gen_exit(cb, &ctx, ctx.pc); addr2insn_bookkeeping(code_ptr, first_opcode); return code_ptr; } -void gen_dup(codeblock_t* cb, ctx_t* ctx) +void gen_dup(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { x86opnd_t dup_val = ctx_stack_pop(ctx, 1); x86opnd_t loc0 = ctx_stack_push(ctx, 1); @@ -261,25 +274,25 @@ void gen_dup(codeblock_t* cb, ctx_t* ctx) mov(cb, loc1, RAX); } -void gen_nop(codeblock_t* cb, ctx_t* ctx) +void gen_nop(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Do nothing } -void gen_pop(codeblock_t* cb, ctx_t* ctx) +void gen_pop(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Decrement SP ctx_stack_pop(ctx, 1); } -void gen_putnil(codeblock_t* cb, ctx_t* ctx) +void gen_putnil(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Write constant at SP x86opnd_t stack_top = ctx_stack_push(ctx, 1); mov(cb, stack_top, imm_opnd(Qnil)); } -void gen_putobject(codeblock_t* cb, ctx_t* ctx) +void gen_putobject(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Get the argument VALUE object = ctx_get_arg(ctx, 0); @@ -291,7 +304,7 @@ void gen_putobject(codeblock_t* cb, ctx_t* ctx) mov(cb, stack_top, RAX); } -void gen_putobject_int2fix(codeblock_t* cb, ctx_t* ctx) +void gen_putobject_int2fix(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { int opcode = ctx_get_opcode(ctx); int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1; @@ -301,7 +314,7 @@ void gen_putobject_int2fix(codeblock_t* cb, ctx_t* ctx) mov(cb, stack_top, imm_opnd(INT2FIX(cst_val))); } -void gen_putself(codeblock_t* cb, ctx_t* ctx) +void gen_putself(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Load self from CFP mov(cb, RAX, mem_opnd(64, RDI, 24)); @@ -311,7 +324,7 @@ void gen_putself(codeblock_t* cb, ctx_t* ctx) mov(cb, stack_top, RAX); } -void gen_getlocal_wc0(codeblock_t* cb, ctx_t* ctx) +void gen_getlocal_wc0(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { // Load environment pointer EP from CFP mov(cb, RDX, mem_opnd(64, RDI, 32)); @@ -328,10 +341,8 @@ void gen_getlocal_wc0(codeblock_t* cb, ctx_t* ctx) mov(cb, stack_top, RCX); } -void gen_setlocal_wc0(codeblock_t* cb, ctx_t* ctx) +void gen_setlocal_wc0(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) { - //vm_env_write(vm_get_ep(GET_EP(), level), -(int)idx, val); - /* vm_env_write(const VALUE *ep, int index, VALUE v) { @@ -348,40 +359,35 @@ void gen_setlocal_wc0(codeblock_t* cb, ctx_t* ctx) // Load environment pointer EP from CFP mov(cb, RDX, mem_opnd(64, RDI, 32)); - // We could and the flags directly from the mem operand? - x86opnd_t flags_opnd = mem_opnd(64, RAX, 8 * VM_ENV_DATA_INDEX_FLAGS); - // flags & VM_ENV_FLAG_WB_REQUIRED - and(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED)); + x86opnd_t flags_opnd = mem_opnd(64, RDX, 8 * VM_ENV_DATA_INDEX_FLAGS); + test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED)); + // Create a size-exit to fall back to the interpreter + uint8_t* side_exit = ujit_side_exit(ocb, ctx, ctx->pc); - // TODO: you need a label_idx to jump to here // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 - //jnz(cb) + jnz_ptr(cb, side_exit); - - - - - - - // Get value to write from the stack + // Pop the value to write from the stack x86opnd_t stack_top = ctx_stack_pop(ctx, 1); mov(cb, RCX, stack_top); - // Compute the offset from BP to the local + // Write the value at the environment pointer int32_t local_idx = (int32_t)ctx_get_arg(ctx, 0); const int32_t offs = -8 * local_idx; - - // Store the local to the block mov(cb, mem_opnd(64, RDX, offs), RCX); } static void ujit_init() { - // 64MB ought to be enough for anybody + // Initialize the code blocks + size_t mem_size = 64 * 1024 * 1024; + uint8_t* mem_block = alloc_exec_mem(mem_size); cb = █ - cb_init(cb, 64 * 1024 * 1024); + cb_init(cb, mem_block, mem_size/2); + ocb = &outline_block; + cb_init(ocb, mem_block + mem_size/2, mem_size/2); // Initialize the codegen function table gen_fns = rb_st_init_numtable(); @@ -396,5 +402,5 @@ static void ujit_init() st_insert(gen_fns, (st_data_t)BIN(putobject_INT2FIX_1_), (st_data_t)&gen_putobject_int2fix); st_insert(gen_fns, (st_data_t)BIN(putself), (st_data_t)&gen_putself); st_insert(gen_fns, (st_data_t)BIN(getlocal_WC_0), (st_data_t)&gen_getlocal_wc0); - //st_insert(gen_fns, (st_data_t)BIN(setlocal_WC_0), (st_data_t)&gen_setlocal_wc0); + st_insert(gen_fns, (st_data_t)BIN(setlocal_WC_0), (st_data_t)&gen_setlocal_wc0); }