mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
WIP JIT-to-JIT returns
This commit is contained in:
parent
6341fc21b2
commit
9d8cc01b75
6 changed files with 91 additions and 20 deletions
|
@ -114,7 +114,7 @@ Compile an interpreter entry block to be inserted into an iseq
|
|||
Returns `NULL` if compilation fails.
|
||||
*/
|
||||
uint8_t*
|
||||
ujit_entry_prologue()
|
||||
ujit_entry_prologue(void)
|
||||
{
|
||||
RUBY_ASSERT(cb != NULL);
|
||||
|
||||
|
@ -248,9 +248,9 @@ gen_dup(jitstate_t* jit, ctx_t* ctx)
|
|||
x86opnd_t dup_val = ctx_stack_pop(ctx, 1);
|
||||
x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE);
|
||||
x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE);
|
||||
mov(cb, RAX, dup_val);
|
||||
mov(cb, loc0, RAX);
|
||||
mov(cb, loc1, RAX);
|
||||
mov(cb, REG0, dup_val);
|
||||
mov(cb, loc0, REG0);
|
||||
mov(cb, loc1, REG0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1191,6 +1191,23 @@ gen_opt_swb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const r
|
|||
|
||||
bool rb_simple_iseq_p(const rb_iseq_t *iseq);
|
||||
|
||||
void
|
||||
gen_return_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape)
|
||||
{
|
||||
switch (shape)
|
||||
{
|
||||
case SHAPE_NEXT0:
|
||||
case SHAPE_NEXT1:
|
||||
RUBY_ASSERT(false);
|
||||
break;
|
||||
|
||||
case SHAPE_DEFAULT:
|
||||
mov(cb, REG0, const_ptr_opnd(target0));
|
||||
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc)
|
||||
{
|
||||
|
@ -1251,13 +1268,32 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
|
|||
cmp(cb, klass_opnd, REG1);
|
||||
jne_ptr(cb, side_exit);
|
||||
|
||||
// Store incremented PC into current control frame in case callee raises.
|
||||
// Store the updated SP on the current frame (pop arguments and receiver)
|
||||
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
|
||||
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
|
||||
|
||||
// Store the next PC i the current frame
|
||||
mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block))));
|
||||
mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0);
|
||||
|
||||
// Store the updated SP on the CFP (pop arguments and receiver)
|
||||
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
|
||||
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
|
||||
// Stub so we can return to JITted code
|
||||
blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
|
||||
|
||||
// Pop arguments and receiver in return context, push the return value
|
||||
// After the return, the JIT and interpreter SP will match up
|
||||
ctx_t return_ctx = *ctx;
|
||||
ctx_stack_pop(&return_ctx, argc);
|
||||
return_ctx.sp_offset = 0;
|
||||
|
||||
// Write the JIT return address on the current frame
|
||||
gen_branch(
|
||||
ctx,
|
||||
return_block,
|
||||
&return_ctx,
|
||||
return_block,
|
||||
&return_ctx,
|
||||
gen_return_branch
|
||||
);
|
||||
|
||||
// Stack overflow check
|
||||
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
|
||||
|
@ -1327,7 +1363,6 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
|
|||
&DEFAULT_CTX,
|
||||
(blockid_t){ iseq, 0 }
|
||||
);
|
||||
|
||||
|
||||
|
||||
// TODO: create stub for call continuation
|
||||
|
@ -1432,7 +1467,31 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
|
|||
mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
|
||||
mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0);
|
||||
|
||||
// Write the post call bytes
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Load the JIT return address
|
||||
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, jit_return));
|
||||
|
||||
// If the return address is NULL, fall back to the interpreter
|
||||
int FALLBACK_LABEL = cb_new_label(cb, "FALLBACK");
|
||||
cmp(cb, REG0, imm_opnd(0));
|
||||
jz(cb, FALLBACK_LABEL);
|
||||
|
||||
// Jump to the JIT return address
|
||||
jmp_rm(cb, REG0);
|
||||
|
||||
// Fall back to the interpreter
|
||||
cb_write_label(cb, FALLBACK_LABEL);
|
||||
cb_link_labels(cb);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cb_write_post_call_bytes(cb);
|
||||
|
||||
return true;
|
||||
|
|
22
ujit_core.c
22
ujit_core.c
|
@ -32,7 +32,7 @@ Get an operand for the adjusted stack pointer address
|
|||
x86opnd_t
|
||||
ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes)
|
||||
{
|
||||
int32_t offset = (ctx->stack_size) * sizeof(VALUE) + offset_bytes;
|
||||
int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes;
|
||||
return mem_opnd(64, REG_SP, offset);
|
||||
}
|
||||
|
||||
|
@ -49,9 +49,10 @@ ctx_stack_push(ctx_t* ctx, int type)
|
|||
ctx->temp_types[ctx->stack_size] = type;
|
||||
|
||||
ctx->stack_size += 1;
|
||||
ctx->sp_offset += 1;
|
||||
|
||||
// SP points just above the topmost value
|
||||
int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE);
|
||||
int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
|
||||
return mem_opnd(64, REG_SP, offset);
|
||||
}
|
||||
|
||||
|
@ -65,7 +66,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
|
|||
RUBY_ASSERT(n <= ctx->stack_size);
|
||||
|
||||
// SP points just above the topmost value
|
||||
int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE);
|
||||
int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
|
||||
x86opnd_t top = mem_opnd(64, REG_SP, offset);
|
||||
|
||||
// Clear the types of the popped values
|
||||
|
@ -77,6 +78,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
|
|||
}
|
||||
|
||||
ctx->stack_size -= n;
|
||||
ctx->sp_offset -= n;
|
||||
|
||||
return top;
|
||||
}
|
||||
|
@ -88,7 +90,7 @@ x86opnd_t
|
|||
ctx_stack_opnd(ctx_t* ctx, int32_t idx)
|
||||
{
|
||||
// SP points just above the topmost value
|
||||
int32_t offset = (ctx->stack_size - 1 - idx) * sizeof(VALUE);
|
||||
int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE);
|
||||
x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
|
||||
|
||||
return opnd;
|
||||
|
@ -120,6 +122,9 @@ int ctx_diff(const ctx_t* src, const ctx_t* dst)
|
|||
if (dst->stack_size != src->stack_size)
|
||||
return INT_MAX;
|
||||
|
||||
if (dst->sp_offset != src->sp_offset)
|
||||
return INT_MAX;
|
||||
|
||||
if (dst->self_is_object != src->self_is_object)
|
||||
return INT_MAX;
|
||||
|
||||
|
@ -345,6 +350,7 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
|
|||
// Limit the number of block versions
|
||||
ctx_t generic_ctx = DEFAULT_CTX;
|
||||
generic_ctx.stack_size = target_ctx->stack_size;
|
||||
generic_ctx.sp_offset = target_ctx->sp_offset;
|
||||
if (count_block_versions(target) >= MAX_VERSIONS - 1)
|
||||
{
|
||||
fprintf(stderr, "version limit hit in branch_stub_hit\n");
|
||||
|
@ -383,7 +389,6 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
|
|||
}
|
||||
|
||||
// Get a version or stub corresponding to a branch target
|
||||
// TODO: need incoming and target contexts
|
||||
uint8_t* get_branch_target(
|
||||
blockid_t target,
|
||||
const ctx_t* ctx,
|
||||
|
@ -440,13 +445,13 @@ void gen_branch(
|
|||
)
|
||||
{
|
||||
RUBY_ASSERT(target0.iseq != NULL);
|
||||
RUBY_ASSERT(target1.iseq != NULL);
|
||||
//RUBY_ASSERT(target1.iseq != NULL);
|
||||
RUBY_ASSERT(num_branches < MAX_BRANCHES);
|
||||
uint32_t branch_idx = num_branches++;
|
||||
|
||||
// Get the branch targets or stubs
|
||||
uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0);
|
||||
uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1);
|
||||
uint8_t* dst_addr1 = ctx1? get_branch_target(target1, ctx1, branch_idx, 1):NULL;
|
||||
|
||||
// Call the branch generation function
|
||||
uint32_t start_pos = cb->write_pos;
|
||||
|
@ -459,7 +464,7 @@ void gen_branch(
|
|||
end_pos,
|
||||
*src_ctx,
|
||||
{ target0, target1 },
|
||||
{ *ctx0, *ctx1 },
|
||||
{ *ctx0, ctx1? *ctx1:DEFAULT_CTX },
|
||||
{ dst_addr0, dst_addr1 },
|
||||
gen_fn,
|
||||
SHAPE_DEFAULT
|
||||
|
@ -508,6 +513,7 @@ void gen_direct_jump(
|
|||
// Limit the number of block versions
|
||||
ctx_t generic_ctx = DEFAULT_CTX;
|
||||
generic_ctx.stack_size = ctx->stack_size;
|
||||
generic_ctx.sp_offset = ctx->sp_offset;
|
||||
if (count_block_versions(target0) >= MAX_VERSIONS - 1)
|
||||
{
|
||||
fprintf(stderr, "version limit hit in branch_stub_hit\n");
|
||||
|
|
|
@ -31,9 +31,12 @@ typedef struct CtxStruct
|
|||
// T_NONE==0 is the unknown type
|
||||
uint8_t temp_types[MAX_TEMP_TYPES];
|
||||
|
||||
// Number of values pushed on the temporary stack
|
||||
// Number of values currently on the temporary stack
|
||||
uint16_t stack_size;
|
||||
|
||||
// Offset of the JIT SP relative to the interpreter SP
|
||||
int16_t sp_offset;
|
||||
|
||||
// Whether we know self is a heap object
|
||||
bool self_is_object : 1;
|
||||
|
||||
|
|
2
vm.c
2
vm.c
|
@ -202,7 +202,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
|
|||
{
|
||||
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
|
||||
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
|
||||
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0);
|
||||
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
|
||||
return cfp;
|
||||
}
|
||||
|
||||
|
|
|
@ -790,6 +790,8 @@ typedef struct rb_control_frame_struct {
|
|||
#if VM_DEBUG_BP_CHECK
|
||||
VALUE *bp_check; /* cfp[7] */
|
||||
#endif
|
||||
// Return address for uJIT code
|
||||
void *jit_return;
|
||||
} rb_control_frame_t;
|
||||
|
||||
extern const rb_data_type_t ruby_threadptr_data_type;
|
||||
|
|
|
@ -390,6 +390,7 @@ vm_push_frame(rb_execution_context_t *ec,
|
|||
#if VM_DEBUG_BP_CHECK
|
||||
.bp_check = sp,
|
||||
#endif
|
||||
.jit_return = NULL
|
||||
};
|
||||
|
||||
ec->cfp = cfp;
|
||||
|
|
Loading…
Reference in a new issue