diff --git a/misc/yjit_asm_tests.c b/misc/yjit_asm_tests.c index 0bd11e4752..5708d3abad 100644 --- a/misc/yjit_asm_tests.c +++ b/misc/yjit_asm_tests.c @@ -426,10 +426,6 @@ void run_runtime_tests(void) int main(int argc, char** argv) { - // suppress -Wunused-function - (void)alloc_code_page; - (void)free_code_page; - run_assembler_tests(); run_runtime_tests(); diff --git a/yjit_asm.c b/yjit_asm.c index 0d074d5e4d..49844145cb 100644 --- a/yjit_asm.c +++ b/yjit_asm.c @@ -147,7 +147,7 @@ static uint8_t *align_ptr(uint8_t *ptr, uint32_t multiple) } // Allocate a block of executable memory -uint8_t *alloc_exec_mem(uint32_t mem_size) +static uint8_t *alloc_exec_mem(uint32_t mem_size) { #ifndef _WIN32 uint8_t *mem_block; @@ -221,41 +221,6 @@ uint8_t *alloc_exec_mem(uint32_t mem_size) #endif } -// Head of the list of free code pages -static code_page_t *freelist = NULL; - -// Allocate a single code page from a pool of free pages -code_page_t *alloc_code_page(void) -{ - // If the free list is empty - if (!freelist) { - // Allocate many pages at once - uint8_t *code_chunk = alloc_exec_mem(PAGES_PER_ALLOC * CODE_PAGE_SIZE); - - // Do this in reverse order so we allocate our pages in order - for (int i = PAGES_PER_ALLOC - 1; i >= 0; --i) { - code_page_t *code_page = malloc(sizeof(code_page_t)); - code_page->mem_block = code_chunk + i * CODE_PAGE_SIZE; - assert ((intptr_t)code_page->mem_block % CODE_PAGE_SIZE == 0); - code_page->page_size = CODE_PAGE_SIZE; - code_page->_next = freelist; - freelist = code_page; - } - } - - code_page_t *free_page = freelist; - freelist = freelist->_next; - - return free_page; -} - -// Put a code page back into the allocation pool -void free_code_page(code_page_t *code_page) -{ - code_page->_next = freelist; - freelist = code_page; -} - // Initialize a code block object void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size) { diff --git a/yjit_asm.h b/yjit_asm.h index 30682b5078..b1b2baae2e 100644 --- a/yjit_asm.h +++ b/yjit_asm.h @@ -5,12 +5,6 @@ #include #include -// Size of code pages to allocate -#define CODE_PAGE_SIZE 16 * 1024 - -// How many code pages to allocate at once -#define PAGES_PER_ALLOC 512 - // Maximum number of labels to link #define MAX_LABELS 32 @@ -137,20 +131,6 @@ typedef struct X86Opnd } x86opnd_t; -// Struct representing a code page -typedef struct code_page_struct -{ - // Chunk of executable memory - uint8_t *mem_block; - - // Size of the executable memory chunk - uint32_t page_size; - - // Next node in the free list (private) - struct code_page_struct *_next; - -} code_page_t; - // Dummy none/null operand static const x86opnd_t NO_OPND = { OPND_NONE, 0, .as.imm = 0 }; @@ -264,12 +244,10 @@ static inline x86opnd_t const_ptr_opnd(const void *ptr); sizeof(((struct_type*)0)->member_name[0]) * idx) \ ) -// Machine code allocation +// Allocate executable memory static uint8_t *alloc_exec_mem(uint32_t mem_size); -static code_page_t *alloc_code_page(void); -static void free_code_page(code_page_t *code_page); - +// Code block functions static inline void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size); static inline void cb_align_pos(codeblock_t *cb, uint32_t multiple); static inline void cb_set_pos(codeblock_t *cb, uint32_t pos); diff --git a/yjit_codegen.c b/yjit_codegen.c index 327e74b811..ac79ce1e75 100644 --- a/yjit_codegen.c +++ b/yjit_codegen.c @@ -23,7 +23,7 @@ static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL }; // Map from method entries to code generation functions static st_table *yjit_method_codegen_table = NULL; -// Code for exiting back to the interpreter from the leave insn +// Code for exiting back to the interpreter from the leave instruction static void *leave_exit_code; // Code for full logic of returning from C method and exiting to the interpreter @@ -303,22 +303,22 @@ _gen_counter_inc(codeblock_t *cb, int64_t *counter) } // Increment a counter then take an existing side exit. -#define COUNTED_EXIT(side_exit, counter_name) _counted_side_exit(side_exit, &(yjit_runtime_counters . counter_name)) +#define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name)) static uint8_t * -_counted_side_exit(uint8_t *existing_side_exit, int64_t *counter) +_counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter) { if (!rb_yjit_opts.gen_stats) return existing_side_exit; - uint8_t *start = cb_get_ptr(ocb, ocb->write_pos); - _gen_counter_inc(ocb, counter); - jmp_ptr(ocb, existing_side_exit); + uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos); + _gen_counter_inc(jit->ocb, counter); + jmp_ptr(jit->ocb, existing_side_exit); return start; } #else #define GEN_COUNTER_INC(cb, counter_name) ((void)0) -#define COUNTED_EXIT(side_exit, counter_name) side_exit +#define COUNTED_EXIT(jit, side_exit, counter_name) side_exit #endif // if YJIT_STATS @@ -512,7 +512,7 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq) cb_align_pos(cb, 64); uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos); - ADD_COMMENT(cb, "yjit prolog"); + ADD_COMMENT(cb, "yjit entry"); push(cb, REG_CFP); push(cb, REG_EC); @@ -571,8 +571,8 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context) // We are at the end of the current instruction. Record the boundary. if (jit->record_boundary_patch_point) { - uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, ocb); - record_global_inval_patch(cb, exit_pos); + uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb); + record_global_inval_patch(jit->cb, exit_pos); jit->record_boundary_patch_point = false; } @@ -620,7 +620,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) }; // Mark the start position of the block - block->start_pos = cb->write_pos; + block->start_addr = cb_get_write_ptr(cb); // For each instruction to compile for (;;) { @@ -704,7 +704,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec) } // Mark the end position of the block - block->end_pos = cb->write_pos; + block->end_addr = cb_get_write_ptr(cb); // Store the index of the last instruction in the block block->end_idx = insn_idx; @@ -1030,8 +1030,8 @@ gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Move the array from the stack into REG0 and check that it's an array. mov(cb, REG0, array_opnd); - guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(side_exit, expandarray_not_array)); - guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(side_exit, expandarray_not_array)); + guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array)); + guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array)); // If we don't actually want any values, then just return. if (num == 0) { @@ -1053,7 +1053,7 @@ gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Only handle the case where the number of values in the array is greater // than or equal to the number of values requested. cmp(cb, REG1, imm_opnd(num)); - jl_ptr(cb, COUNTED_EXIT(side_exit, expandarray_rhs_too_small)); + jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small)); // Load the address of the embedded array into REG1. // (struct RArray *)(obj)->as.ary @@ -1663,7 +1663,7 @@ gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE compt // Check that the slot is inside the extended table (num_slots > index) x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv)); cmp(cb, num_slots, imm_opnd(ivar_index)); - jle_ptr(cb, COUNTED_EXIT(side_exit, getivar_idx_out_of_range)); + jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range)); } // Get a pointer to the extended table @@ -1708,7 +1708,7 @@ gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Guard that the receiver has the same class as the one from compile time. mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self)); - guard_self_is_heap(cb, REG0, COUNTED_EXIT(side_exit, getivar_se_self_not_heap), ctx); + guard_self_is_heap(cb, REG0, COUNTED_EXIT(jit, side_exit, getivar_se_self_not_heap), ctx); jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit); @@ -2153,7 +2153,7 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Bail if idx is not a FIXNUM mov(cb, REG1, idx_opnd); test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG)); - jz_ptr(cb, COUNTED_EXIT(side_exit, oaref_arg_not_fixnum)); + jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum)); // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). // It never raises or allocates, so we don't need to write to cfp->pc. @@ -2948,7 +2948,7 @@ jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_c // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of); test(cb, RAX, RAX); - jz_ptr(cb, COUNTED_EXIT(side_exit, send_se_protected_check_failed)); + jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed)); } // Return true when the codegen function generates code. @@ -3195,7 +3195,7 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t) lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t))); cmp(cb, REG_CFP, REG0); - jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow)); + jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow)); // Points to the receiver operand on the stack x86opnd_t recv = ctx_stack_opnd(ctx, argc); @@ -3597,7 +3597,7 @@ gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const r ADD_COMMENT(cb, "stack overflow check"); lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t))); cmp(cb, REG_CFP, REG0); - jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow)); + jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow)); if (doing_kw_call) { // Here we're calling a method with keyword arguments and specifying @@ -4126,7 +4126,7 @@ gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF); jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me); cmp(cb, ep_me_opnd, REG1); - jne_ptr(cb, COUNTED_EXIT(side_exit, invokesuper_me_changed)); + jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed)); if (!block) { // Guard no block passed @@ -4139,7 +4139,7 @@ gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // EP is in REG0 from above x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL); cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE)); - jne_ptr(cb, COUNTED_EXIT(side_exit, invokesuper_block)); + jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block)); } // Points to the receiver operand on the stack @@ -4180,7 +4180,7 @@ gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Check for interrupts ADD_COMMENT(cb, "check for interrupts"); - yjit_check_ints(cb, COUNTED_EXIT(side_exit, leave_se_interrupt)); + yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt)); // Load the return value mov(cb, REG0, ctx_stack_pop(ctx, 1)); @@ -4434,7 +4434,7 @@ gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Check the result. _Bool is one byte in SysV. test(cb, AL, AL); - jz_ptr(cb, COUNTED_EXIT(side_exit, opt_getinlinecache_miss)); + jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss)); // Push ic->entry->value mov(cb, REG0, const_ptr_opnd((void *)ic)); @@ -4487,7 +4487,7 @@ gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM)); - jnz_ptr(cb, COUNTED_EXIT(side_exit, gbpp_block_param_modified)); + jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified)); // Load the block handler for the current frame // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) @@ -4498,7 +4498,7 @@ gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null. cmp(cb, REG0_8, imm_opnd(0x1)); - jnz_ptr(cb, COUNTED_EXIT(side_exit, gbpp_block_handler_not_iseq)); + jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq)); // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy)); diff --git a/yjit_core.c b/yjit_core.c index 2b6fb47cd2..8d9c267908 100644 --- a/yjit_core.c +++ b/yjit_core.c @@ -716,11 +716,12 @@ gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_contex add_block_version(block->blockid, block); // Patch the last branch address - last_branch->dst_addrs[0] = cb_get_ptr(cb, block->start_pos); + last_branch->dst_addrs[0] = block->start_addr; rb_darray_append(&block->incoming, last_branch); last_branch->blocks[0] = block; - RUBY_ASSERT(block->start_pos == last_branch->end_pos); + // This block should immediately follow the last branch + RUBY_ASSERT(block->start_addr == last_branch->end_addr); } return first_block; @@ -801,41 +802,41 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex // If this block hasn't yet been compiled if (!p_block) { // If the new block can be generated right after the branch (at cb->write_pos) - if (cb->write_pos == branch->end_pos && branch->start_pos >= yjit_codepage_frozen_bytes) { + if (cb_get_write_ptr(cb) == branch->end_addr && branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { // This branch should be terminating its block - RUBY_ASSERT(branch->end_pos == branch->block->end_pos); + RUBY_ASSERT(branch->end_addr == branch->block->end_addr); // Change the branch shape to indicate the target block will be placed next branch->shape = (uint8_t)target_idx; // Rewrite the branch with the new, potentially more compact shape - cb_set_pos(cb, branch->start_pos); + cb_set_write_ptr(cb, branch->start_addr); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - RUBY_ASSERT(cb->write_pos <= branch->end_pos && "can't enlarge branches"); - branch->end_pos = cb->write_pos; - branch->block->end_pos = cb->write_pos; + RUBY_ASSERT(cb_get_write_ptr(cb) <= branch->end_addr && "can't enlarge branches"); + branch->end_addr = cb_get_write_ptr(cb); + branch->block->end_addr = cb_get_write_ptr(cb); } // Compile the new block version p_block = gen_block_version(target, target_ctx, ec); RUBY_ASSERT(p_block); - RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_pos != branch->end_pos)); + RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr)); } // Add this branch to the list of incoming branches for the target rb_darray_append(&p_block->incoming, branch); // Update the branch target address - dst_addr = cb_get_ptr(cb, p_block->start_pos); + dst_addr = p_block->start_addr; branch->dst_addrs[target_idx] = dst_addr; // Rewrite the branch with the new jump target address - if (branch->start_pos >= yjit_codepage_frozen_bytes) { + if (branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { RUBY_ASSERT(branch->dst_addrs[0] != NULL); uint32_t cur_pos = cb->write_pos; - cb_set_pos(cb, branch->start_pos); + cb_set_write_ptr(cb, branch->start_addr); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - RUBY_ASSERT(cb->write_pos == branch->end_pos && "branch can't change size"); + RUBY_ASSERT(cb_get_write_ptr(cb) == branch->end_addr && "branch can't change size"); cb_set_pos(cb, cur_pos); } @@ -873,7 +874,7 @@ get_branch_target( branch->blocks[target_idx] = p_block; // Return a pointer to the compiled code - return cb_get_ptr(cb, p_block->start_pos); + return p_block->start_addr; } // Generate an outlined stub that will call branch_stub_hit() @@ -916,9 +917,9 @@ gen_branch( branch->dst_addrs[1] = ctx1? get_branch_target(target1, ctx1, branch, 1):NULL; // Call the branch generation function - branch->start_pos = cb->write_pos; + branch->start_addr = cb_get_write_ptr(cb); gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], SHAPE_DEFAULT); - branch->end_pos = cb->write_pos; + branch->end_addr = cb_get_write_ptr(cb); } static void @@ -957,22 +958,22 @@ gen_direct_jump( if (p_block) { rb_darray_append(&p_block->incoming, branch); - branch->dst_addrs[0] = cb_get_ptr(cb, p_block->start_pos); + branch->dst_addrs[0] = p_block->start_addr; branch->blocks[0] = p_block; branch->shape = SHAPE_DEFAULT; // Call the branch generation function - branch->start_pos = cb->write_pos; + branch->start_addr = cb_get_write_ptr(cb); gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT); - branch->end_pos = cb->write_pos; + branch->end_addr = cb_get_write_ptr(cb); } else { // This NULL target address signals gen_block_version() to compile the // target block right after this one (fallthrough). branch->dst_addrs[0] = NULL; branch->shape = SHAPE_NEXT0; - branch->start_pos = cb->write_pos; - branch->end_pos = cb->write_pos; + branch->start_addr = cb_get_write_ptr(cb); + branch->end_addr = cb_get_write_ptr(cb); } } @@ -1006,9 +1007,9 @@ defer_compilation( // Call the branch generation function codeblock_t *cb = jit->cb; - branch->start_pos = cb->write_pos; + branch->start_addr = cb_get_write_ptr(cb); gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT); - branch->end_pos = cb->write_pos; + branch->end_addr = cb_get_write_ptr(cb); } // Remove all references to a block then free it. @@ -1096,7 +1097,7 @@ invalidate_block_version(block_t *block) block_array_remove(versions, block); // Get a pointer to the generated code for this block - uint8_t *code_ptr = cb_get_ptr(cb, block->start_pos); + uint8_t *code_ptr = block->start_addr; // For each incoming branch rb_darray_for(block->incoming, incoming_idx) { @@ -1109,7 +1110,7 @@ invalidate_block_version(block_t *block) branch->blocks[target_idx] = NULL; // Don't patch frozen code region - if (branch->start_pos < yjit_codepage_frozen_bytes) { + if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { continue; } @@ -1122,7 +1123,7 @@ invalidate_block_version(block_t *block) ); // Check if the invalidated block immediately follows - bool target_next = block->start_pos == branch->end_pos; + bool target_next = (block->start_addr == branch->end_addr); if (target_next) { // The new block will no longer be adjacent @@ -1132,18 +1133,18 @@ invalidate_block_version(block_t *block) // Rewrite the branch with the new jump target address RUBY_ASSERT(branch->dst_addrs[0] != NULL); uint32_t cur_pos = cb->write_pos; - cb_set_pos(cb, branch->start_pos); + cb_set_write_ptr(cb, branch->start_addr); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); - branch->end_pos = cb->write_pos; - branch->block->end_pos = cb->write_pos; + branch->end_addr = cb_get_write_ptr(cb); + branch->block->end_addr = cb_get_write_ptr(cb); cb_set_pos(cb, cur_pos); - if (target_next && branch->end_pos > block->end_pos) { - fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%d block_size=%d\n", + if (target_next && branch->end_addr > block->end_addr) { + fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%ld block_size=%ld\n", branch->block->blockid.idx, block->blockid.idx, - branch->end_pos - block->end_pos, - block->end_pos - block->start_pos); + branch->end_addr - block->end_addr, + block->end_addr - block->start_addr); yjit_print_iseq(branch->block->blockid.iseq); rb_bug("yjit invalidate rewrote branch past end of invalidated block"); } diff --git a/yjit_core.h b/yjit_core.h index e623ed72aa..6cd3ec0095 100644 --- a/yjit_core.h +++ b/yjit_core.h @@ -12,11 +12,10 @@ // Scratch registers used by YJIT #define REG0 RAX -#define REG1 RCX #define REG0_32 EAX -#define REG1_32 ECX - #define REG0_8 AL +#define REG1 RCX +#define REG1_32 ECX // Maximum number of temp value types we keep track of #define MAX_TEMP_TYPES 8 @@ -193,8 +192,8 @@ typedef struct yjit_branch_entry struct yjit_block_version *block; // Positions where the generated code starts and ends - uint32_t start_pos; - uint32_t end_pos; + uint8_t* start_addr; + uint8_t* end_addr; // Context right after the branch instruction ctx_t src_ctx; @@ -242,8 +241,8 @@ typedef struct yjit_block_version ctx_t ctx; // Positions where the generated code starts and ends - uint32_t start_pos; - uint32_t end_pos; + uint8_t* start_addr; + uint8_t* end_addr; // List of incoming branches (from predecessors) branch_array_t incoming; @@ -259,9 +258,6 @@ typedef struct yjit_block_version // block in the system. cme_dependency_array_t cme_dependencies; - // Code page this block lives on - VALUE code_page; - // Index one past the last instruction in the iseq uint32_t end_idx; diff --git a/yjit_iface.c b/yjit_iface.c index 9c52c4d2e3..82477c65b3 100644 --- a/yjit_iface.c +++ b/yjit_iface.c @@ -36,6 +36,12 @@ extern st_table *rb_encoded_insn_data; struct rb_yjit_options rb_yjit_opts; +// Size of code pages to allocate +#define CODE_PAGE_SIZE 16 * 1024 + +// How many code pages to allocate at once +#define PAGES_PER_ALLOC 512 + static const rb_data_type_t yjit_block_type = { "YJIT/Block", {0, 0, 0, }, @@ -54,6 +60,7 @@ yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx) } // For debugging. Print the disassembly of an iseq. +RBIMPL_ATTR_MAYBE_UNUSED() static void yjit_print_iseq(const rb_iseq_t *iseq) { @@ -520,8 +527,7 @@ block_address(VALUE self) { block_t * block; TypedData_Get_Struct(self, block_t, &yjit_block_type, block); - uint8_t *code_addr = cb_get_ptr(cb, block->start_pos); - return LONG2NUM((intptr_t)code_addr); + return LONG2NUM((intptr_t)block->start_addr); } /* Get the machine code for YJIT::Block as a binary string */ @@ -532,8 +538,8 @@ block_code(VALUE self) TypedData_Get_Struct(self, block_t, &yjit_block_type, block); return (VALUE)rb_str_new( - (const char*)cb->mem_block + block->start_pos, - block->end_pos - block->start_pos + (const char*)block->start_addr, + block->end_addr - block->start_addr ); } @@ -880,7 +886,7 @@ rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body) } // Mark the machine code page this block lives on - rb_gc_mark_movable(block->code_page); + //rb_gc_mark_movable(block->code_page); } } } @@ -926,7 +932,7 @@ rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body) } // Update the machine code page this block lives on - block->code_page = rb_gc_location(block->code_page); + //block->code_page = rb_gc_location(block->code_page); } } } @@ -949,10 +955,39 @@ rb_yjit_iseq_free(const struct rb_iseq_constant_body *body) rb_darray_free(body->yjit_blocks); } -static void -yjit_code_page_free(void *code_page) +// Struct representing a code page +typedef struct code_page_struct { - free_code_page((code_page_t*)code_page); + // Chunk of executable memory + uint8_t* mem_block; + + // Size of the executable memory chunk + uint32_t page_size; + + // Inline code block + codeblock_t cb; + + // Outlined code block + codeblock_t ocb; + + // Next node in the free list (private) + struct code_page_struct* _next; + +} code_page_t; + +// Current code page we are writing machine code into +static VALUE yjit_cur_code_page = Qfalse; + +// Head of the list of free code pages +static code_page_t *code_page_freelist = NULL; + +// Free a code page, add it to the free list +static void +yjit_code_page_free(void *voidp) +{ + code_page_t* code_page = (code_page_t*)voidp; + code_page->_next = code_page_freelist; + code_page_freelist = code_page; } // Custom type for interacting with the GC @@ -963,19 +998,47 @@ static const rb_data_type_t yjit_code_page_type = { }; // Allocate a code page and wrap it into a Ruby object owned by the GC -VALUE rb_yjit_code_page_alloc(void) +static VALUE +rb_yjit_code_page_alloc(void) { - code_page_t *code_page = alloc_code_page(); - VALUE cp_obj = TypedData_Wrap_Struct(0, &yjit_code_page_type, code_page); + // If the free list is empty + if (!code_page_freelist) { + // Allocate many pages at once + uint8_t* code_chunk = alloc_exec_mem(PAGES_PER_ALLOC * CODE_PAGE_SIZE); - // Write a pointer to the wrapper object at the beginning of the code page - *((VALUE*)code_page->mem_block) = cp_obj; + // Do this in reverse order so we allocate our pages in order + for (int i = PAGES_PER_ALLOC - 1; i >= 0; --i) { + code_page_t* code_page = malloc(sizeof(code_page_t)); + code_page->mem_block = code_chunk + i * CODE_PAGE_SIZE; + assert ((intptr_t)code_page->mem_block % CODE_PAGE_SIZE == 0); + code_page->page_size = CODE_PAGE_SIZE; + code_page->_next = code_page_freelist; + code_page_freelist = code_page; + } + } - return cp_obj; + code_page_t* code_page = code_page_freelist; + code_page_freelist = code_page_freelist->_next; + + // Create a Ruby wrapper struct for the code page object + VALUE wrapper = TypedData_Wrap_Struct(0, &yjit_code_page_type, code_page); + + // Write a pointer to the wrapper object on the page + *((VALUE*)code_page->mem_block) = wrapper; + + // Initialize the code blocks + uint8_t* page_start = code_page->mem_block + sizeof(VALUE); + uint8_t* page_end = code_page->mem_block + CODE_PAGE_SIZE; + uint32_t halfsize = (uint32_t)(page_end - page_start) / 2; + cb_init(&code_page->cb, page_start, halfsize); + cb_init(&code_page->cb, page_start + halfsize, halfsize); + + return wrapper; } // Unwrap the Ruby object representing a code page -code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj) +static code_page_t * +rb_yjit_code_page_unwrap(VALUE cp_obj) { code_page_t * code_page; TypedData_Get_Struct(cp_obj, code_page_t, &yjit_code_page_type, code_page); @@ -983,21 +1046,23 @@ code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj) } // Get the code page wrapper object for a code pointer -VALUE rb_yjit_code_page_from_ptr(uint8_t *code_ptr) +static VALUE +rb_yjit_code_page_from_ptr(uint8_t* code_ptr) { - VALUE *page_start = (VALUE*)((intptr_t)code_ptr & ~(CODE_PAGE_SIZE - 1)); + VALUE* page_start = (VALUE*)((intptr_t)code_ptr & ~(CODE_PAGE_SIZE - 1)); VALUE wrapper = *page_start; return wrapper; } // Get the inline code block corresponding to a code pointer -void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr) +static void +yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr) { VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr); code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper); // A pointer to the page wrapper object is written at the start of the code page - uint8_t *mem_block = code_page->mem_block + sizeof(VALUE); + uint8_t* mem_block = code_page->mem_block + sizeof(VALUE); uint32_t mem_size = (code_page->page_size/2) - sizeof(VALUE); RUBY_ASSERT(mem_block); @@ -1006,13 +1071,14 @@ void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr) } // Get the outlined code block corresponding to a code pointer -void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr) +static void +yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr) { VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr); code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper); // A pointer to the page wrapper object is written at the start of the code page - uint8_t *mem_block = code_page->mem_block + (code_page->page_size/2); + uint8_t* mem_block = code_page->mem_block + (code_page->page_size/2); uint32_t mem_size = code_page->page_size/2; RUBY_ASSERT(mem_block); @@ -1020,6 +1086,39 @@ void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr) cb_init(cb, mem_block, mem_size); } +// Get the current code page or allocate a new one +static VALUE +yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed) +{ + // If this is the first code page + if (yjit_cur_code_page == Qfalse) { + yjit_cur_code_page = rb_yjit_code_page_alloc(); + } + + // Get the current code page + code_page_t *code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page); + + // Compute how many bytes are left in the code blocks + uint32_t cb_bytes_left = code_page->cb.mem_size - code_page->cb.write_pos; + uint32_t ocb_bytes_left = code_page->ocb.mem_size - code_page->ocb.write_pos; + RUBY_ASSERT_ALWAYS(cb_bytes_needed <= code_page->cb.mem_size); + RUBY_ASSERT_ALWAYS(ocb_bytes_needed <= code_page->ocb.mem_size); + + // If there's enough space left in the current code page + if (cb_bytes_needed <= cb_bytes_left && ocb_bytes_needed <= ocb_bytes_left) { + return yjit_cur_code_page; + } + + // Allocate a new code page + yjit_cur_code_page = rb_yjit_code_page_alloc(); + code_page_t *new_code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page); + + // Jump to the new code page + jmp_ptr(&code_page->cb, new_code_page->cb.mem_block); + + return yjit_cur_code_page; +} + bool rb_yjit_enabled_p(void) { diff --git a/yjit_iface.h b/yjit_iface.h index 92443dceb6..7fed846b4d 100644 --- a/yjit_iface.h +++ b/yjit_iface.h @@ -30,9 +30,9 @@ static const VALUE *yjit_count_side_exit_op(const VALUE *exit_pc); static void yjit_unlink_method_lookup_dependency(block_t *block); static void yjit_block_assumptions_free(block_t *block); -VALUE rb_yjit_code_page_alloc(void); -code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj); -void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr); -void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr); +static VALUE yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed); +//code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj); +//void rb_yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr); +//void rb_yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr); #endif // #ifndef YJIT_IFACE_H