mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
YJIT code pages refactoring for code GC (#5073)
* New code page allocation logic * Fix leaked globals * Fix leaked symbols, yjit asm tests * Make COUNTED_EXIT take a jit argument, so we can eliminate global ocb * Remove extra whitespace * Change block start_pos/end_pos to be pointers instead of uint32_t * Change branch end_pos and start_pos to end_addr, start_addr
This commit is contained in:
parent
85b4cf16e2
commit
2421527d6e
Notes:
git
2021-11-05 05:06:08 +09:00
Merged-By: maximecb <maximecb@ruby-lang.org>
8 changed files with 195 additions and 160 deletions
|
@ -426,10 +426,6 @@ void run_runtime_tests(void)
|
|||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// suppress -Wunused-function
|
||||
(void)alloc_code_page;
|
||||
(void)free_code_page;
|
||||
|
||||
run_assembler_tests();
|
||||
run_runtime_tests();
|
||||
|
||||
|
|
37
yjit_asm.c
37
yjit_asm.c
|
@ -147,7 +147,7 @@ static uint8_t *align_ptr(uint8_t *ptr, uint32_t multiple)
|
|||
}
|
||||
|
||||
// Allocate a block of executable memory
|
||||
uint8_t *alloc_exec_mem(uint32_t mem_size)
|
||||
static uint8_t *alloc_exec_mem(uint32_t mem_size)
|
||||
{
|
||||
#ifndef _WIN32
|
||||
uint8_t *mem_block;
|
||||
|
@ -221,41 +221,6 @@ uint8_t *alloc_exec_mem(uint32_t mem_size)
|
|||
#endif
|
||||
}
|
||||
|
||||
// Head of the list of free code pages
|
||||
static code_page_t *freelist = NULL;
|
||||
|
||||
// Allocate a single code page from a pool of free pages
|
||||
code_page_t *alloc_code_page(void)
|
||||
{
|
||||
// If the free list is empty
|
||||
if (!freelist) {
|
||||
// Allocate many pages at once
|
||||
uint8_t *code_chunk = alloc_exec_mem(PAGES_PER_ALLOC * CODE_PAGE_SIZE);
|
||||
|
||||
// Do this in reverse order so we allocate our pages in order
|
||||
for (int i = PAGES_PER_ALLOC - 1; i >= 0; --i) {
|
||||
code_page_t *code_page = malloc(sizeof(code_page_t));
|
||||
code_page->mem_block = code_chunk + i * CODE_PAGE_SIZE;
|
||||
assert ((intptr_t)code_page->mem_block % CODE_PAGE_SIZE == 0);
|
||||
code_page->page_size = CODE_PAGE_SIZE;
|
||||
code_page->_next = freelist;
|
||||
freelist = code_page;
|
||||
}
|
||||
}
|
||||
|
||||
code_page_t *free_page = freelist;
|
||||
freelist = freelist->_next;
|
||||
|
||||
return free_page;
|
||||
}
|
||||
|
||||
// Put a code page back into the allocation pool
|
||||
void free_code_page(code_page_t *code_page)
|
||||
{
|
||||
code_page->_next = freelist;
|
||||
freelist = code_page;
|
||||
}
|
||||
|
||||
// Initialize a code block object
|
||||
void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size)
|
||||
{
|
||||
|
|
26
yjit_asm.h
26
yjit_asm.h
|
@ -5,12 +5,6 @@
|
|||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// Size of code pages to allocate
|
||||
#define CODE_PAGE_SIZE 16 * 1024
|
||||
|
||||
// How many code pages to allocate at once
|
||||
#define PAGES_PER_ALLOC 512
|
||||
|
||||
// Maximum number of labels to link
|
||||
#define MAX_LABELS 32
|
||||
|
||||
|
@ -137,20 +131,6 @@ typedef struct X86Opnd
|
|||
|
||||
} x86opnd_t;
|
||||
|
||||
// Struct representing a code page
|
||||
typedef struct code_page_struct
|
||||
{
|
||||
// Chunk of executable memory
|
||||
uint8_t *mem_block;
|
||||
|
||||
// Size of the executable memory chunk
|
||||
uint32_t page_size;
|
||||
|
||||
// Next node in the free list (private)
|
||||
struct code_page_struct *_next;
|
||||
|
||||
} code_page_t;
|
||||
|
||||
// Dummy none/null operand
|
||||
static const x86opnd_t NO_OPND = { OPND_NONE, 0, .as.imm = 0 };
|
||||
|
||||
|
@ -264,12 +244,10 @@ static inline x86opnd_t const_ptr_opnd(const void *ptr);
|
|||
sizeof(((struct_type*)0)->member_name[0]) * idx) \
|
||||
)
|
||||
|
||||
// Machine code allocation
|
||||
// Allocate executable memory
|
||||
static uint8_t *alloc_exec_mem(uint32_t mem_size);
|
||||
static code_page_t *alloc_code_page(void);
|
||||
static void free_code_page(code_page_t *code_page);
|
||||
|
||||
|
||||
// Code block functions
|
||||
static inline void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size);
|
||||
static inline void cb_align_pos(codeblock_t *cb, uint32_t multiple);
|
||||
static inline void cb_set_pos(codeblock_t *cb, uint32_t pos);
|
||||
|
|
|
@ -23,7 +23,7 @@ static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
|
|||
// Map from method entries to code generation functions
|
||||
static st_table *yjit_method_codegen_table = NULL;
|
||||
|
||||
// Code for exiting back to the interpreter from the leave insn
|
||||
// Code for exiting back to the interpreter from the leave instruction
|
||||
static void *leave_exit_code;
|
||||
|
||||
// Code for full logic of returning from C method and exiting to the interpreter
|
||||
|
@ -303,22 +303,22 @@ _gen_counter_inc(codeblock_t *cb, int64_t *counter)
|
|||
}
|
||||
|
||||
// Increment a counter then take an existing side exit.
|
||||
#define COUNTED_EXIT(side_exit, counter_name) _counted_side_exit(side_exit, &(yjit_runtime_counters . counter_name))
|
||||
#define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
|
||||
static uint8_t *
|
||||
_counted_side_exit(uint8_t *existing_side_exit, int64_t *counter)
|
||||
_counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
|
||||
{
|
||||
if (!rb_yjit_opts.gen_stats) return existing_side_exit;
|
||||
|
||||
uint8_t *start = cb_get_ptr(ocb, ocb->write_pos);
|
||||
_gen_counter_inc(ocb, counter);
|
||||
jmp_ptr(ocb, existing_side_exit);
|
||||
uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
|
||||
_gen_counter_inc(jit->ocb, counter);
|
||||
jmp_ptr(jit->ocb, existing_side_exit);
|
||||
return start;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define GEN_COUNTER_INC(cb, counter_name) ((void)0)
|
||||
#define COUNTED_EXIT(side_exit, counter_name) side_exit
|
||||
#define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
|
||||
|
||||
#endif // if YJIT_STATS
|
||||
|
||||
|
@ -512,7 +512,7 @@ yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
|
|||
cb_align_pos(cb, 64);
|
||||
|
||||
uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
|
||||
ADD_COMMENT(cb, "yjit prolog");
|
||||
ADD_COMMENT(cb, "yjit entry");
|
||||
|
||||
push(cb, REG_CFP);
|
||||
push(cb, REG_EC);
|
||||
|
@ -571,8 +571,8 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
|
|||
|
||||
// We are at the end of the current instruction. Record the boundary.
|
||||
if (jit->record_boundary_patch_point) {
|
||||
uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, ocb);
|
||||
record_global_inval_patch(cb, exit_pos);
|
||||
uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
|
||||
record_global_inval_patch(jit->cb, exit_pos);
|
||||
jit->record_boundary_patch_point = false;
|
||||
}
|
||||
|
||||
|
@ -620,7 +620,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
|
|||
};
|
||||
|
||||
// Mark the start position of the block
|
||||
block->start_pos = cb->write_pos;
|
||||
block->start_addr = cb_get_write_ptr(cb);
|
||||
|
||||
// For each instruction to compile
|
||||
for (;;) {
|
||||
|
@ -704,7 +704,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
|
|||
}
|
||||
|
||||
// Mark the end position of the block
|
||||
block->end_pos = cb->write_pos;
|
||||
block->end_addr = cb_get_write_ptr(cb);
|
||||
|
||||
// Store the index of the last instruction in the block
|
||||
block->end_idx = insn_idx;
|
||||
|
@ -1030,8 +1030,8 @@ gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Move the array from the stack into REG0 and check that it's an array.
|
||||
mov(cb, REG0, array_opnd);
|
||||
guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(side_exit, expandarray_not_array));
|
||||
guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(side_exit, expandarray_not_array));
|
||||
guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
|
||||
guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
|
||||
|
||||
// If we don't actually want any values, then just return.
|
||||
if (num == 0) {
|
||||
|
@ -1053,7 +1053,7 @@ gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
// Only handle the case where the number of values in the array is greater
|
||||
// than or equal to the number of values requested.
|
||||
cmp(cb, REG1, imm_opnd(num));
|
||||
jl_ptr(cb, COUNTED_EXIT(side_exit, expandarray_rhs_too_small));
|
||||
jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
|
||||
|
||||
// Load the address of the embedded array into REG1.
|
||||
// (struct RArray *)(obj)->as.ary
|
||||
|
@ -1663,7 +1663,7 @@ gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE compt
|
|||
// Check that the slot is inside the extended table (num_slots > index)
|
||||
x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
|
||||
cmp(cb, num_slots, imm_opnd(ivar_index));
|
||||
jle_ptr(cb, COUNTED_EXIT(side_exit, getivar_idx_out_of_range));
|
||||
jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
|
||||
}
|
||||
|
||||
// Get a pointer to the extended table
|
||||
|
@ -1708,7 +1708,7 @@ gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Guard that the receiver has the same class as the one from compile time.
|
||||
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
|
||||
guard_self_is_heap(cb, REG0, COUNTED_EXIT(side_exit, getivar_se_self_not_heap), ctx);
|
||||
guard_self_is_heap(cb, REG0, COUNTED_EXIT(jit, side_exit, getivar_se_self_not_heap), ctx);
|
||||
|
||||
jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
|
||||
|
||||
|
@ -2153,7 +2153,7 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
// Bail if idx is not a FIXNUM
|
||||
mov(cb, REG1, idx_opnd);
|
||||
test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
|
||||
jz_ptr(cb, COUNTED_EXIT(side_exit, oaref_arg_not_fixnum));
|
||||
jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
|
||||
|
||||
// Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
|
||||
// It never raises or allocates, so we don't need to write to cfp->pc.
|
||||
|
@ -2948,7 +2948,7 @@ jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_c
|
|||
// VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
|
||||
call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
|
||||
test(cb, RAX, RAX);
|
||||
jz_ptr(cb, COUNTED_EXIT(side_exit, send_se_protected_check_failed));
|
||||
jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
|
||||
}
|
||||
|
||||
// Return true when the codegen function generates code.
|
||||
|
@ -3195,7 +3195,7 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
|
|||
// REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
|
||||
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
|
||||
cmp(cb, REG_CFP, REG0);
|
||||
jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
|
||||
jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
|
||||
|
||||
// Points to the receiver operand on the stack
|
||||
x86opnd_t recv = ctx_stack_opnd(ctx, argc);
|
||||
|
@ -3597,7 +3597,7 @@ gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const r
|
|||
ADD_COMMENT(cb, "stack overflow check");
|
||||
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
|
||||
cmp(cb, REG_CFP, REG0);
|
||||
jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
|
||||
jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
|
||||
|
||||
if (doing_kw_call) {
|
||||
// Here we're calling a method with keyword arguments and specifying
|
||||
|
@ -4126,7 +4126,7 @@ gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
|
||||
jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
|
||||
cmp(cb, ep_me_opnd, REG1);
|
||||
jne_ptr(cb, COUNTED_EXIT(side_exit, invokesuper_me_changed));
|
||||
jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
|
||||
|
||||
if (!block) {
|
||||
// Guard no block passed
|
||||
|
@ -4139,7 +4139,7 @@ gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
// EP is in REG0 from above
|
||||
x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
|
||||
cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
|
||||
jne_ptr(cb, COUNTED_EXIT(side_exit, invokesuper_block));
|
||||
jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
|
||||
}
|
||||
|
||||
// Points to the receiver operand on the stack
|
||||
|
@ -4180,7 +4180,7 @@ gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Check for interrupts
|
||||
ADD_COMMENT(cb, "check for interrupts");
|
||||
yjit_check_ints(cb, COUNTED_EXIT(side_exit, leave_se_interrupt));
|
||||
yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
|
||||
|
||||
// Load the return value
|
||||
mov(cb, REG0, ctx_stack_pop(ctx, 1));
|
||||
|
@ -4434,7 +4434,7 @@ gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Check the result. _Bool is one byte in SysV.
|
||||
test(cb, AL, AL);
|
||||
jz_ptr(cb, COUNTED_EXIT(side_exit, opt_getinlinecache_miss));
|
||||
jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
|
||||
|
||||
// Push ic->entry->value
|
||||
mov(cb, REG0, const_ptr_opnd((void *)ic));
|
||||
|
@ -4487,7 +4487,7 @@ gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
|
||||
test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
|
||||
jnz_ptr(cb, COUNTED_EXIT(side_exit, gbpp_block_param_modified));
|
||||
jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
|
||||
|
||||
// Load the block handler for the current frame
|
||||
// note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
|
||||
|
@ -4498,7 +4498,7 @@ gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
|
|||
|
||||
// Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
|
||||
cmp(cb, REG0_8, imm_opnd(0x1));
|
||||
jnz_ptr(cb, COUNTED_EXIT(side_exit, gbpp_block_handler_not_iseq));
|
||||
jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
|
||||
|
||||
// Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
|
||||
mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
|
||||
|
|
67
yjit_core.c
67
yjit_core.c
|
@ -716,11 +716,12 @@ gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_contex
|
|||
add_block_version(block->blockid, block);
|
||||
|
||||
// Patch the last branch address
|
||||
last_branch->dst_addrs[0] = cb_get_ptr(cb, block->start_pos);
|
||||
last_branch->dst_addrs[0] = block->start_addr;
|
||||
rb_darray_append(&block->incoming, last_branch);
|
||||
last_branch->blocks[0] = block;
|
||||
|
||||
RUBY_ASSERT(block->start_pos == last_branch->end_pos);
|
||||
// This block should immediately follow the last branch
|
||||
RUBY_ASSERT(block->start_addr == last_branch->end_addr);
|
||||
}
|
||||
|
||||
return first_block;
|
||||
|
@ -801,41 +802,41 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
// If this block hasn't yet been compiled
|
||||
if (!p_block) {
|
||||
// If the new block can be generated right after the branch (at cb->write_pos)
|
||||
if (cb->write_pos == branch->end_pos && branch->start_pos >= yjit_codepage_frozen_bytes) {
|
||||
if (cb_get_write_ptr(cb) == branch->end_addr && branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
// This branch should be terminating its block
|
||||
RUBY_ASSERT(branch->end_pos == branch->block->end_pos);
|
||||
RUBY_ASSERT(branch->end_addr == branch->block->end_addr);
|
||||
|
||||
// Change the branch shape to indicate the target block will be placed next
|
||||
branch->shape = (uint8_t)target_idx;
|
||||
|
||||
// Rewrite the branch with the new, potentially more compact shape
|
||||
cb_set_pos(cb, branch->start_pos);
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
RUBY_ASSERT(cb->write_pos <= branch->end_pos && "can't enlarge branches");
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->block->end_pos = cb->write_pos;
|
||||
RUBY_ASSERT(cb_get_write_ptr(cb) <= branch->end_addr && "can't enlarge branches");
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
branch->block->end_addr = cb_get_write_ptr(cb);
|
||||
}
|
||||
|
||||
// Compile the new block version
|
||||
p_block = gen_block_version(target, target_ctx, ec);
|
||||
RUBY_ASSERT(p_block);
|
||||
RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_pos != branch->end_pos));
|
||||
RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr));
|
||||
}
|
||||
|
||||
// Add this branch to the list of incoming branches for the target
|
||||
rb_darray_append(&p_block->incoming, branch);
|
||||
|
||||
// Update the branch target address
|
||||
dst_addr = cb_get_ptr(cb, p_block->start_pos);
|
||||
dst_addr = p_block->start_addr;
|
||||
branch->dst_addrs[target_idx] = dst_addr;
|
||||
|
||||
// Rewrite the branch with the new jump target address
|
||||
if (branch->start_pos >= yjit_codepage_frozen_bytes) {
|
||||
if (branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
|
||||
uint32_t cur_pos = cb->write_pos;
|
||||
cb_set_pos(cb, branch->start_pos);
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
RUBY_ASSERT(cb->write_pos == branch->end_pos && "branch can't change size");
|
||||
RUBY_ASSERT(cb_get_write_ptr(cb) == branch->end_addr && "branch can't change size");
|
||||
cb_set_pos(cb, cur_pos);
|
||||
}
|
||||
|
||||
|
@ -873,7 +874,7 @@ get_branch_target(
|
|||
branch->blocks[target_idx] = p_block;
|
||||
|
||||
// Return a pointer to the compiled code
|
||||
return cb_get_ptr(cb, p_block->start_pos);
|
||||
return p_block->start_addr;
|
||||
}
|
||||
|
||||
// Generate an outlined stub that will call branch_stub_hit()
|
||||
|
@ -916,9 +917,9 @@ gen_branch(
|
|||
branch->dst_addrs[1] = ctx1? get_branch_target(target1, ctx1, branch, 1):NULL;
|
||||
|
||||
// Call the branch generation function
|
||||
branch->start_pos = cb->write_pos;
|
||||
branch->start_addr = cb_get_write_ptr(cb);
|
||||
gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], SHAPE_DEFAULT);
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -957,22 +958,22 @@ gen_direct_jump(
|
|||
if (p_block) {
|
||||
rb_darray_append(&p_block->incoming, branch);
|
||||
|
||||
branch->dst_addrs[0] = cb_get_ptr(cb, p_block->start_pos);
|
||||
branch->dst_addrs[0] = p_block->start_addr;
|
||||
branch->blocks[0] = p_block;
|
||||
branch->shape = SHAPE_DEFAULT;
|
||||
|
||||
// Call the branch generation function
|
||||
branch->start_pos = cb->write_pos;
|
||||
branch->start_addr = cb_get_write_ptr(cb);
|
||||
gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
}
|
||||
else {
|
||||
// This NULL target address signals gen_block_version() to compile the
|
||||
// target block right after this one (fallthrough).
|
||||
branch->dst_addrs[0] = NULL;
|
||||
branch->shape = SHAPE_NEXT0;
|
||||
branch->start_pos = cb->write_pos;
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->start_addr = cb_get_write_ptr(cb);
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1006,9 +1007,9 @@ defer_compilation(
|
|||
|
||||
// Call the branch generation function
|
||||
codeblock_t *cb = jit->cb;
|
||||
branch->start_pos = cb->write_pos;
|
||||
branch->start_addr = cb_get_write_ptr(cb);
|
||||
gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
}
|
||||
|
||||
// Remove all references to a block then free it.
|
||||
|
@ -1096,7 +1097,7 @@ invalidate_block_version(block_t *block)
|
|||
block_array_remove(versions, block);
|
||||
|
||||
// Get a pointer to the generated code for this block
|
||||
uint8_t *code_ptr = cb_get_ptr(cb, block->start_pos);
|
||||
uint8_t *code_ptr = block->start_addr;
|
||||
|
||||
// For each incoming branch
|
||||
rb_darray_for(block->incoming, incoming_idx) {
|
||||
|
@ -1109,7 +1110,7 @@ invalidate_block_version(block_t *block)
|
|||
branch->blocks[target_idx] = NULL;
|
||||
|
||||
// Don't patch frozen code region
|
||||
if (branch->start_pos < yjit_codepage_frozen_bytes) {
|
||||
if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1122,7 +1123,7 @@ invalidate_block_version(block_t *block)
|
|||
);
|
||||
|
||||
// Check if the invalidated block immediately follows
|
||||
bool target_next = block->start_pos == branch->end_pos;
|
||||
bool target_next = (block->start_addr == branch->end_addr);
|
||||
|
||||
if (target_next) {
|
||||
// The new block will no longer be adjacent
|
||||
|
@ -1132,18 +1133,18 @@ invalidate_block_version(block_t *block)
|
|||
// Rewrite the branch with the new jump target address
|
||||
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
|
||||
uint32_t cur_pos = cb->write_pos;
|
||||
cb_set_pos(cb, branch->start_pos);
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
branch->end_pos = cb->write_pos;
|
||||
branch->block->end_pos = cb->write_pos;
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
branch->block->end_addr = cb_get_write_ptr(cb);
|
||||
cb_set_pos(cb, cur_pos);
|
||||
|
||||
if (target_next && branch->end_pos > block->end_pos) {
|
||||
fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%d block_size=%d\n",
|
||||
if (target_next && branch->end_addr > block->end_addr) {
|
||||
fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%ld block_size=%ld\n",
|
||||
branch->block->blockid.idx,
|
||||
block->blockid.idx,
|
||||
branch->end_pos - block->end_pos,
|
||||
block->end_pos - block->start_pos);
|
||||
branch->end_addr - block->end_addr,
|
||||
block->end_addr - block->start_addr);
|
||||
yjit_print_iseq(branch->block->blockid.iseq);
|
||||
rb_bug("yjit invalidate rewrote branch past end of invalidated block");
|
||||
}
|
||||
|
|
16
yjit_core.h
16
yjit_core.h
|
@ -12,11 +12,10 @@
|
|||
|
||||
// Scratch registers used by YJIT
|
||||
#define REG0 RAX
|
||||
#define REG1 RCX
|
||||
#define REG0_32 EAX
|
||||
#define REG1_32 ECX
|
||||
|
||||
#define REG0_8 AL
|
||||
#define REG1 RCX
|
||||
#define REG1_32 ECX
|
||||
|
||||
// Maximum number of temp value types we keep track of
|
||||
#define MAX_TEMP_TYPES 8
|
||||
|
@ -193,8 +192,8 @@ typedef struct yjit_branch_entry
|
|||
struct yjit_block_version *block;
|
||||
|
||||
// Positions where the generated code starts and ends
|
||||
uint32_t start_pos;
|
||||
uint32_t end_pos;
|
||||
uint8_t* start_addr;
|
||||
uint8_t* end_addr;
|
||||
|
||||
// Context right after the branch instruction
|
||||
ctx_t src_ctx;
|
||||
|
@ -242,8 +241,8 @@ typedef struct yjit_block_version
|
|||
ctx_t ctx;
|
||||
|
||||
// Positions where the generated code starts and ends
|
||||
uint32_t start_pos;
|
||||
uint32_t end_pos;
|
||||
uint8_t* start_addr;
|
||||
uint8_t* end_addr;
|
||||
|
||||
// List of incoming branches (from predecessors)
|
||||
branch_array_t incoming;
|
||||
|
@ -259,9 +258,6 @@ typedef struct yjit_block_version
|
|||
// block in the system.
|
||||
cme_dependency_array_t cme_dependencies;
|
||||
|
||||
// Code page this block lives on
|
||||
VALUE code_page;
|
||||
|
||||
// Index one past the last instruction in the iseq
|
||||
uint32_t end_idx;
|
||||
|
||||
|
|
143
yjit_iface.c
143
yjit_iface.c
|
@ -36,6 +36,12 @@ extern st_table *rb_encoded_insn_data;
|
|||
|
||||
struct rb_yjit_options rb_yjit_opts;
|
||||
|
||||
// Size of code pages to allocate
|
||||
#define CODE_PAGE_SIZE 16 * 1024
|
||||
|
||||
// How many code pages to allocate at once
|
||||
#define PAGES_PER_ALLOC 512
|
||||
|
||||
static const rb_data_type_t yjit_block_type = {
|
||||
"YJIT/Block",
|
||||
{0, 0, 0, },
|
||||
|
@ -54,6 +60,7 @@ yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx)
|
|||
}
|
||||
|
||||
// For debugging. Print the disassembly of an iseq.
|
||||
RBIMPL_ATTR_MAYBE_UNUSED()
|
||||
static void
|
||||
yjit_print_iseq(const rb_iseq_t *iseq)
|
||||
{
|
||||
|
@ -520,8 +527,7 @@ block_address(VALUE self)
|
|||
{
|
||||
block_t * block;
|
||||
TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
|
||||
uint8_t *code_addr = cb_get_ptr(cb, block->start_pos);
|
||||
return LONG2NUM((intptr_t)code_addr);
|
||||
return LONG2NUM((intptr_t)block->start_addr);
|
||||
}
|
||||
|
||||
/* Get the machine code for YJIT::Block as a binary string */
|
||||
|
@ -532,8 +538,8 @@ block_code(VALUE self)
|
|||
TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
|
||||
|
||||
return (VALUE)rb_str_new(
|
||||
(const char*)cb->mem_block + block->start_pos,
|
||||
block->end_pos - block->start_pos
|
||||
(const char*)block->start_addr,
|
||||
block->end_addr - block->start_addr
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -880,7 +886,7 @@ rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body)
|
|||
}
|
||||
|
||||
// Mark the machine code page this block lives on
|
||||
rb_gc_mark_movable(block->code_page);
|
||||
//rb_gc_mark_movable(block->code_page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -926,7 +932,7 @@ rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body)
|
|||
}
|
||||
|
||||
// Update the machine code page this block lives on
|
||||
block->code_page = rb_gc_location(block->code_page);
|
||||
//block->code_page = rb_gc_location(block->code_page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -949,10 +955,39 @@ rb_yjit_iseq_free(const struct rb_iseq_constant_body *body)
|
|||
rb_darray_free(body->yjit_blocks);
|
||||
}
|
||||
|
||||
static void
|
||||
yjit_code_page_free(void *code_page)
|
||||
// Struct representing a code page
|
||||
typedef struct code_page_struct
|
||||
{
|
||||
free_code_page((code_page_t*)code_page);
|
||||
// Chunk of executable memory
|
||||
uint8_t* mem_block;
|
||||
|
||||
// Size of the executable memory chunk
|
||||
uint32_t page_size;
|
||||
|
||||
// Inline code block
|
||||
codeblock_t cb;
|
||||
|
||||
// Outlined code block
|
||||
codeblock_t ocb;
|
||||
|
||||
// Next node in the free list (private)
|
||||
struct code_page_struct* _next;
|
||||
|
||||
} code_page_t;
|
||||
|
||||
// Current code page we are writing machine code into
|
||||
static VALUE yjit_cur_code_page = Qfalse;
|
||||
|
||||
// Head of the list of free code pages
|
||||
static code_page_t *code_page_freelist = NULL;
|
||||
|
||||
// Free a code page, add it to the free list
|
||||
static void
|
||||
yjit_code_page_free(void *voidp)
|
||||
{
|
||||
code_page_t* code_page = (code_page_t*)voidp;
|
||||
code_page->_next = code_page_freelist;
|
||||
code_page_freelist = code_page;
|
||||
}
|
||||
|
||||
// Custom type for interacting with the GC
|
||||
|
@ -963,19 +998,47 @@ static const rb_data_type_t yjit_code_page_type = {
|
|||
};
|
||||
|
||||
// Allocate a code page and wrap it into a Ruby object owned by the GC
|
||||
VALUE rb_yjit_code_page_alloc(void)
|
||||
static VALUE
|
||||
rb_yjit_code_page_alloc(void)
|
||||
{
|
||||
code_page_t *code_page = alloc_code_page();
|
||||
VALUE cp_obj = TypedData_Wrap_Struct(0, &yjit_code_page_type, code_page);
|
||||
// If the free list is empty
|
||||
if (!code_page_freelist) {
|
||||
// Allocate many pages at once
|
||||
uint8_t* code_chunk = alloc_exec_mem(PAGES_PER_ALLOC * CODE_PAGE_SIZE);
|
||||
|
||||
// Write a pointer to the wrapper object at the beginning of the code page
|
||||
*((VALUE*)code_page->mem_block) = cp_obj;
|
||||
// Do this in reverse order so we allocate our pages in order
|
||||
for (int i = PAGES_PER_ALLOC - 1; i >= 0; --i) {
|
||||
code_page_t* code_page = malloc(sizeof(code_page_t));
|
||||
code_page->mem_block = code_chunk + i * CODE_PAGE_SIZE;
|
||||
assert ((intptr_t)code_page->mem_block % CODE_PAGE_SIZE == 0);
|
||||
code_page->page_size = CODE_PAGE_SIZE;
|
||||
code_page->_next = code_page_freelist;
|
||||
code_page_freelist = code_page;
|
||||
}
|
||||
}
|
||||
|
||||
return cp_obj;
|
||||
code_page_t* code_page = code_page_freelist;
|
||||
code_page_freelist = code_page_freelist->_next;
|
||||
|
||||
// Create a Ruby wrapper struct for the code page object
|
||||
VALUE wrapper = TypedData_Wrap_Struct(0, &yjit_code_page_type, code_page);
|
||||
|
||||
// Write a pointer to the wrapper object on the page
|
||||
*((VALUE*)code_page->mem_block) = wrapper;
|
||||
|
||||
// Initialize the code blocks
|
||||
uint8_t* page_start = code_page->mem_block + sizeof(VALUE);
|
||||
uint8_t* page_end = code_page->mem_block + CODE_PAGE_SIZE;
|
||||
uint32_t halfsize = (uint32_t)(page_end - page_start) / 2;
|
||||
cb_init(&code_page->cb, page_start, halfsize);
|
||||
cb_init(&code_page->cb, page_start + halfsize, halfsize);
|
||||
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
// Unwrap the Ruby object representing a code page
|
||||
code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj)
|
||||
static code_page_t *
|
||||
rb_yjit_code_page_unwrap(VALUE cp_obj)
|
||||
{
|
||||
code_page_t * code_page;
|
||||
TypedData_Get_Struct(cp_obj, code_page_t, &yjit_code_page_type, code_page);
|
||||
|
@ -983,21 +1046,23 @@ code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj)
|
|||
}
|
||||
|
||||
// Get the code page wrapper object for a code pointer
|
||||
VALUE rb_yjit_code_page_from_ptr(uint8_t *code_ptr)
|
||||
static VALUE
|
||||
rb_yjit_code_page_from_ptr(uint8_t* code_ptr)
|
||||
{
|
||||
VALUE *page_start = (VALUE*)((intptr_t)code_ptr & ~(CODE_PAGE_SIZE - 1));
|
||||
VALUE* page_start = (VALUE*)((intptr_t)code_ptr & ~(CODE_PAGE_SIZE - 1));
|
||||
VALUE wrapper = *page_start;
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
// Get the inline code block corresponding to a code pointer
|
||||
void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr)
|
||||
static void
|
||||
yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr)
|
||||
{
|
||||
VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr);
|
||||
code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper);
|
||||
|
||||
// A pointer to the page wrapper object is written at the start of the code page
|
||||
uint8_t *mem_block = code_page->mem_block + sizeof(VALUE);
|
||||
uint8_t* mem_block = code_page->mem_block + sizeof(VALUE);
|
||||
uint32_t mem_size = (code_page->page_size/2) - sizeof(VALUE);
|
||||
RUBY_ASSERT(mem_block);
|
||||
|
||||
|
@ -1006,13 +1071,14 @@ void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr)
|
|||
}
|
||||
|
||||
// Get the outlined code block corresponding to a code pointer
|
||||
void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr)
|
||||
static void
|
||||
yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr)
|
||||
{
|
||||
VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr);
|
||||
code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper);
|
||||
|
||||
// A pointer to the page wrapper object is written at the start of the code page
|
||||
uint8_t *mem_block = code_page->mem_block + (code_page->page_size/2);
|
||||
uint8_t* mem_block = code_page->mem_block + (code_page->page_size/2);
|
||||
uint32_t mem_size = code_page->page_size/2;
|
||||
RUBY_ASSERT(mem_block);
|
||||
|
||||
|
@ -1020,6 +1086,39 @@ void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr)
|
|||
cb_init(cb, mem_block, mem_size);
|
||||
}
|
||||
|
||||
// Get the current code page or allocate a new one
|
||||
static VALUE
|
||||
yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed)
|
||||
{
|
||||
// If this is the first code page
|
||||
if (yjit_cur_code_page == Qfalse) {
|
||||
yjit_cur_code_page = rb_yjit_code_page_alloc();
|
||||
}
|
||||
|
||||
// Get the current code page
|
||||
code_page_t *code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page);
|
||||
|
||||
// Compute how many bytes are left in the code blocks
|
||||
uint32_t cb_bytes_left = code_page->cb.mem_size - code_page->cb.write_pos;
|
||||
uint32_t ocb_bytes_left = code_page->ocb.mem_size - code_page->ocb.write_pos;
|
||||
RUBY_ASSERT_ALWAYS(cb_bytes_needed <= code_page->cb.mem_size);
|
||||
RUBY_ASSERT_ALWAYS(ocb_bytes_needed <= code_page->ocb.mem_size);
|
||||
|
||||
// If there's enough space left in the current code page
|
||||
if (cb_bytes_needed <= cb_bytes_left && ocb_bytes_needed <= ocb_bytes_left) {
|
||||
return yjit_cur_code_page;
|
||||
}
|
||||
|
||||
// Allocate a new code page
|
||||
yjit_cur_code_page = rb_yjit_code_page_alloc();
|
||||
code_page_t *new_code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page);
|
||||
|
||||
// Jump to the new code page
|
||||
jmp_ptr(&code_page->cb, new_code_page->cb.mem_block);
|
||||
|
||||
return yjit_cur_code_page;
|
||||
}
|
||||
|
||||
bool
|
||||
rb_yjit_enabled_p(void)
|
||||
{
|
||||
|
|
|
@ -30,9 +30,9 @@ static const VALUE *yjit_count_side_exit_op(const VALUE *exit_pc);
|
|||
static void yjit_unlink_method_lookup_dependency(block_t *block);
|
||||
static void yjit_block_assumptions_free(block_t *block);
|
||||
|
||||
VALUE rb_yjit_code_page_alloc(void);
|
||||
code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj);
|
||||
void rb_yjit_get_cb(codeblock_t *cb, uint8_t *code_ptr);
|
||||
void rb_yjit_get_ocb(codeblock_t *cb, uint8_t *code_ptr);
|
||||
static VALUE yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed);
|
||||
//code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj);
|
||||
//void rb_yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr);
|
||||
//void rb_yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr);
|
||||
|
||||
#endif // #ifndef YJIT_IFACE_H
|
||||
|
|
Loading…
Reference in a new issue