1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Runtime counters for bailing from opt_send_without_block

This allows counting things in generated code such as side exits or
different reasons for failing to compile. Runtime counters are exposed
to Ruby as `UJIT.runtime_stats` for ease of reporting and analysis.
This commit is contained in:
Alan Wu 2021-03-02 18:27:50 -05:00
parent 2e8db8c35e
commit 316f757c44
6 changed files with 282 additions and 67 deletions

34
ujit.rb
View file

@ -35,4 +35,38 @@ module UJIT
str
end if defined?(Disasm)
# Return a hash for statistics generated for the --ujit-stats command line option.
# Return nil when option is not passed or unavailable.
def self.runtime_stats
# defined in ujit_iface.c
Primitive.get_stat_counters
end
class << self
private
# Format and print out counters
def _print_stats
counters = runtime_stats
return unless counters
$stderr.puts("***uJIT: Printing runtime counters from ujit.rb***")
$stderr.puts("opt_send_without_block exit reasons: ")
counters.filter! { |key, _| key.start_with?('oswb_') }
counters.transform_keys! { |key| key.to_s.delete_prefix('oswb_') }
counters = counters.to_a
counters.sort_by! { |(_, counter_value)| counter_value }
longest_name_length = counters.max_by { |(name, _)| name.length }.first.length
total = counters.sum { |(_, counter_value)| counter_value }
counters.reverse_each do |(name, value)|
percentage = value.fdiv(total) * 100
$stderr.printf(" %*s %10d (%4.1f%%)\n", longest_name_length, name, value, percentage);
end
end
end
end

View file

@ -1640,3 +1640,9 @@ void xor(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1)
opnd1
);
}
// LOCK - lock prefix for atomic shared memory operations
void cb_write_lock_prefix(codeblock_t* cb)
{
cb_write_byte(cb, 0xF0);
}

View file

@ -379,4 +379,6 @@ void test(codeblock_t* cb, x86opnd_t rm_opnd, x86opnd_t test_opnd);
void ud2(codeblock_t* cb);
void xor(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1);
void cb_write_lock_prefix(codeblock_t* cb);
#endif

View file

@ -162,6 +162,37 @@ ujit_side_exit(jitstate_t* jit, ctx_t* ctx)
return code_ptr;
}
#if RUBY_DEBUG
// Increment a profiling counter with counter_name
#define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(ujit_runtime_counters . counter_name))
static void
_gen_counter_inc(codeblock_t *cb, int64_t *counter)
{
if (!rb_ujit_opts.gen_stats) return;
mov(cb, REG0, const_ptr_opnd(counter));
cb_write_lock_prefix(cb); // for ractors.
add(cb, mem_opnd(64, REG0, 0), imm_opnd(1));
}
// Increment a counter then take an existing side exit.
#define COUNTED_EXIT(side_exit, counter_name) _counted_side_exit(side_exit, &(ujit_runtime_counters . counter_name))
static uint8_t *
_counted_side_exit(uint8_t *existing_side_exit, int64_t *counter)
{
if (!rb_ujit_opts.gen_stats) return existing_side_exit;
uint8_t *start = cb_get_ptr(ocb, ocb->write_pos);
_gen_counter_inc(ocb, counter);
jmp_ptr(ocb, existing_side_exit);
return start;
}
#else
#define GEN_COUNTER_INC(cb, counter_name) ((void)0)
#define COUNTED_EXIT(side_exit, counter_name) side_exit
#endif // if RUBY_DEBUG
/*
Compile an interpreter entry block to be inserted into an iseq
Returns `NULL` if compilation fails.
@ -255,29 +286,23 @@ ujit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec)
break;
}
#if RUBY_DEBUG
// Accumulate stats about instructions executed
if (rb_ujit_opts.gen_stats) {
// Count instructions executed by the JIT
mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count));
add(cb, mem_opnd(64, REG0, 0), imm_opnd(1));
}
#endif
//fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
//print_str(cb, insn_name(opcode));
// Call the code generation function
codegen_status_t status = gen_fn(&jit, ctx);
// Count bytecode instructions that execute in generated code
// FIXME: when generation function returns false, we shouldn't increment
// this counter.
GEN_COUNTER_INC(cb, exec_instruction);
// If we can't compile this instruction
// exit to the interpreter and stop compiling
if (status == UJIT_CANT_COMPILE) {
ujit_gen_exit(&jit, ctx, cb, jit.pc);
// Call the code generation function
bool continue_generating = p_desc->gen_fn(&jit, ctx);
if (!continue_generating) {
break;
}
// Move to the next instruction to compile
// Move to the next instruction
p_last_op = p_desc;
insn_idx += insn_len(opcode);
// If the instruction terminates this block
@ -1060,22 +1085,27 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
// If the function expects a Ruby array of arguments
if (cfunc->argc < 0 && cfunc->argc != -1) {
return UJIT_CANT_COMPILE;
if (cfunc->argc < 0 && cfunc->argc != -1)
{
GEN_COUNTER_INC(cb, oswb_cfunc_ruby_array_varg);
return false;
}
// If the argument count doesn't match
if (cfunc->argc >= 0 && cfunc->argc != argc) {
return UJIT_CANT_COMPILE;
if (cfunc->argc >= 0 && cfunc->argc != argc)
{
GEN_COUNTER_INC(cb, oswb_cfunc_argc_mismatch);
return false;
}
// Don't JIT functions that need C stack arguments for now
if (argc + 1 > NUM_C_ARG_REGS) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_cfunc_toomany_args);
return false;
}
// Create a size-exit to fall back to the interpreter
uint8_t* side_exit = ujit_side_exit(jit, ctx);
uint8_t *side_exit = ujit_side_exit(jit, ctx);
// Check for interrupts
ujit_check_ints(cb, side_exit);
@ -1094,12 +1124,15 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
//print_ptr(cb, recv);
// Check that the receiver is a heap object
test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
jnz_ptr(cb, side_exit);
cmp(cb, REG0, imm_opnd(Qfalse));
je_ptr(cb, side_exit);
cmp(cb, REG0, imm_opnd(Qnil));
je_ptr(cb, side_exit);
{
uint8_t *receiver_not_heap = COUNTED_EXIT(side_exit, oswb_se_receiver_not_heap);
test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
jnz_ptr(cb, receiver_not_heap);
cmp(cb, REG0, imm_opnd(Qfalse));
je_ptr(cb, receiver_not_heap);
cmp(cb, REG0, imm_opnd(Qnil));
je_ptr(cb, receiver_not_heap);
}
// Pointer to the klass field of the receiver &(recv->klass)
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
@ -1110,7 +1143,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
// Bail if receiver class is different from compile-time call cache class
jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cd->cc->klass);
cmp(cb, klass_opnd, REG1);
jne_ptr(cb, side_exit);
jne_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cc_klass_differ));
// Store incremented PC into current control frame in case callee raises.
mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block))));
@ -1124,7 +1157,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
// REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t)));
cmp(cb, REG_CFP, REG0);
jle_ptr(cb, side_exit);
jle_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cf_overflow));
// Increment the stack pointer by 3 (in the callee)
// sp += 3
@ -1285,19 +1318,21 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca
int num_locals = iseq->body->local_table_size - num_params;
if (num_params != argc) {
//fprintf(stderr, "param argc mismatch\n");
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_iseq_argc_mismatch);
return false;
}
if (!rb_simple_iseq_p(iseq)) {
// Only handle iseqs that have simple parameters.
// See vm_callee_setup_arg().
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_iseq_not_simple);
return false;
}
if (vm_ci_flag(cd->ci) & VM_CALL_TAILCALL) {
// We can't handle tailcalls
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_iseq_tailcall);
return false;
}
rb_gc_register_mark_object((VALUE)iseq); // FIXME: intentional LEAK!
@ -1320,12 +1355,15 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca
//print_ptr(cb, recv);
// Check that the receiver is a heap object
test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
jnz_ptr(cb, side_exit);
cmp(cb, REG0, imm_opnd(Qfalse));
je_ptr(cb, side_exit);
cmp(cb, REG0, imm_opnd(Qnil));
je_ptr(cb, side_exit);
{
uint8_t *receiver_not_heap = COUNTED_EXIT(side_exit, oswb_se_receiver_not_heap);
test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
jnz_ptr(cb, receiver_not_heap);
cmp(cb, REG0, imm_opnd(Qfalse));
je_ptr(cb, receiver_not_heap);
cmp(cb, REG0, imm_opnd(Qnil));
je_ptr(cb, receiver_not_heap);
}
// Pointer to the klass field of the receiver &(recv->klass)
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
@ -1335,7 +1373,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca
// Bail if receiver class is different from compile-time call cache class
jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cd->cc->klass);
cmp(cb, klass_opnd, REG1);
jne_ptr(cb, side_exit);
jne_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cc_klass_differ));
// Store the updated SP on the current frame (pop arguments and receiver)
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
@ -1349,7 +1387,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + sizeof(rb_control_frame_t)));
cmp(cb, REG_CFP, REG0);
jle_ptr(cb, side_exit);
jle_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cf_overflow));
// Adjust the callee's stack pointer
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals)));
@ -1426,7 +1464,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca
// Load the updated SP
mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
// Directly jump to the entry point of the callee
gen_direct_jump(
&DEFAULT_CTX,
@ -1460,42 +1498,73 @@ gen_opt_send_without_block(jitstate_t* jit, ctx_t* ctx)
// Don't JIT calls with keyword splat
if (vm_ci_flag(cd->ci) & VM_CALL_KW_SPLAT) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_kw_splat);
return false;
}
// Don't JIT calls that aren't simple
if (!(vm_ci_flag(cd->ci) & VM_CALL_ARGS_SIMPLE)) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_callsite_not_simple);
return false;
}
// Don't JIT if the inline cache is not set
if (!cd->cc || !cd->cc->klass) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_ic_empty);
return false;
}
const rb_callable_method_entry_t *cme = vm_cc_cme(cd->cc);
// Don't JIT if the method entry is out of date
if (METHOD_ENTRY_INVALIDATED(cme)) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_invalid_cme);
return false;
}
// We don't generate code to check protected method calls
if (METHOD_ENTRY_VISI(cme) == METHOD_VISI_PROTECTED) {
return UJIT_CANT_COMPILE;
GEN_COUNTER_INC(cb, oswb_protected);
return false;
}
// If this is a C call
if (cme->def->type == VM_METHOD_TYPE_CFUNC) {
return gen_oswb_cfunc(jit, ctx, cd, cme, argc);
}
// If this is a Ruby call
if (cme->def->type == VM_METHOD_TYPE_ISEQ) {
switch (cme->def->type) {
case VM_METHOD_TYPE_ISEQ:
return gen_oswb_iseq(jit, ctx, cd, cme, argc);
case VM_METHOD_TYPE_CFUNC:
return gen_oswb_cfunc(jit, ctx, cd, cme, argc);
case VM_METHOD_TYPE_ATTRSET:
GEN_COUNTER_INC(cb, oswb_ivar_set_method);
return false;
case VM_METHOD_TYPE_BMETHOD:
GEN_COUNTER_INC(cb, oswb_bmethod);
return false;
case VM_METHOD_TYPE_IVAR:
GEN_COUNTER_INC(cb, oswb_ivar_get_method);
return false;
case VM_METHOD_TYPE_ZSUPER:
GEN_COUNTER_INC(cb, oswb_zsuper_method);
return false;
case VM_METHOD_TYPE_ALIAS:
GEN_COUNTER_INC(cb, oswb_alias_method);
return false;
case VM_METHOD_TYPE_UNDEF:
GEN_COUNTER_INC(cb, oswb_undef_method);
return false;
case VM_METHOD_TYPE_NOTIMPLEMENTED:
GEN_COUNTER_INC(cb, oswb_not_implemented_method);
return false;
case VM_METHOD_TYPE_OPTIMIZED:
GEN_COUNTER_INC(cb, oswb_optimized_method);
return false;
case VM_METHOD_TYPE_MISSING:
GEN_COUNTER_INC(cb, oswb_missing_method);
return false;
case VM_METHOD_TYPE_REFINED:
GEN_COUNTER_INC(cb, oswb_refined_method);
return false;
// no default case so compiler issues a warning if this is not exhaustive
}
return UJIT_CANT_COMPILE;
}
static codegen_status_t

View file

@ -13,22 +13,22 @@
#include "ujit_codegen.h"
#include "ujit_core.h"
#include "ujit_hooks.inc"
#include "ujit.rbinc"
#include "darray.h"
#if HAVE_LIBCAPSTONE
#include <capstone/capstone.h>
#endif
VALUE cUjitBlock;
VALUE cUjitDisasm;
VALUE cUjitDisasmInsn;
static VALUE mUjit;
static VALUE cUjitBlock;
static VALUE cUjitDisasm;
static VALUE cUjitDisasmInsn;
#if RUBY_DEBUG
static int64_t vm_insns_count = 0;
int64_t rb_ujit_exec_insns_count = 0;
static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 };
int64_t rb_compiled_iseq_count = 0;
struct rb_ujit_runtime_counters ujit_runtime_counters = { 0 };
#endif
// Machine code blocks (executable memory)
@ -506,6 +506,63 @@ ujit_disasm(VALUE self, VALUE code, VALUE from)
}
#endif
static VALUE
at_exit_print_stats(RB_BLOCK_CALL_FUNC_ARGLIST(yieldarg, data))
{
// Defined in ujit.rb
rb_funcall(mUjit, rb_intern("_print_stats"), 0);
return Qnil;
}
// Primitive called in ujit.rb. Export all runtime counters as a Ruby hash.
static VALUE
get_stat_counters(rb_execution_context_t *ec, VALUE self)
{
#if RUBY_DEBUG
if (!rb_ujit_opts.gen_stats) return Qnil;
VALUE hash = rb_hash_new();
RB_VM_LOCK_ENTER();
{
int64_t *counter_reader = (int64_t *)&ujit_runtime_counters;
int64_t *counter_reader_end = &ujit_runtime_counters.last_member;
// Iterate through comma separated counter name list
char *name_reader = ujit_counter_names;
char *counter_name_end = ujit_counter_names + sizeof(ujit_counter_names);
while (name_reader < counter_name_end && counter_reader < counter_reader_end) {
if (*name_reader == ',' || *name_reader == ' ') {
name_reader++;
continue;
}
// Compute name of counter name
int name_len;
char *name_end;
{
name_end = strchr(name_reader, ',');
if (name_end == NULL) break;
name_len = (int)(name_end - name_reader);
}
// Put counter into hash
VALUE key = ID2SYM(rb_intern2(name_reader, name_len));
VALUE value = LL2NUM((long long)*counter_reader);
rb_hash_aset(hash, key, value);
counter_reader++;
name_reader = name_end;
}
}
RB_VM_LOCK_LEAVE();
return hash;
#else
return Qnil;
#endif // if RUBY_DEBUG
}
#include "ujit.rbinc"
#if RUBY_DEBUG
// implementation for --ujit-stats
@ -590,16 +647,17 @@ print_ujit_stats(void)
const struct insn_count *sorted_exit_ops = sort_insn_count_array(exit_op_count);
double total_insns_count = vm_insns_count + rb_ujit_exec_insns_count;
double ratio = rb_ujit_exec_insns_count / total_insns_count;
double total_insns_count = vm_insns_count + ujit_runtime_counters.exec_instruction;
double ratio = ujit_runtime_counters.exec_instruction / total_insns_count;
fprintf(stderr, "compiled_iseq_count: %10" PRId64 "\n", rb_compiled_iseq_count);
fprintf(stderr, "main_block_code_size: %6.1f MiB\n", ((double)cb->write_pos) / 1048576.0);
fprintf(stderr, "side_block_code_size: %6.1f MiB\n", ((double)ocb->write_pos) / 1048576.0);
fprintf(stderr, "vm_insns_count: %10" PRId64 "\n", vm_insns_count);
fprintf(stderr, "ujit_exec_insns_count: %10" PRId64 "\n", rb_ujit_exec_insns_count);
fprintf(stderr, "ujit_exec_insns_count: %10" PRId64 "\n", ujit_runtime_counters.exec_instruction);
fprintf(stderr, "ratio_in_ujit: %9.1f%%\n", ratio * 100);
print_insn_count_buffer(sorted_exit_ops, 10, 4);
//print_runtime_counters();
}
#endif // if RUBY_DEBUG
@ -708,7 +766,8 @@ rb_ujit_init(struct rb_ujit_options *options)
ujit_init_codegen();
// UJIT Ruby module
VALUE mUjit = rb_define_module("UJIT");
mUjit = rb_define_module("UJIT");
rb_define_module_function(mUjit, "install_entry", ujit_install_entry, 1);
rb_define_module_function(mUjit, "blocks_for", ujit_blocks_for, 1);
// UJIT::Block (block version, code block)
@ -726,6 +785,11 @@ rb_ujit_init(struct rb_ujit_options *options)
cUjitDisasmInsn = rb_struct_define_under(cUjitDisasm, "Insn", "address", "mnemonic", "op_str", NULL);
#endif
if (RUBY_DEBUG && rb_ujit_opts.gen_stats) {
// Setup at_exit callback for printing out counters
rb_block_call(rb_mKernel, rb_intern("at_exit"), 0, NULL, at_exit_print_stats, Qfalse);
}
// Initialize the GC hooks
method_lookup_dependency = st_init_numtable();
struct ujit_root_struct *root;

View file

@ -21,9 +21,49 @@ struct rb_callcache;
#define rb_callcache rb_callcache
#endif
#define UJIT_DECLARE_COUNTERS(...) struct rb_ujit_runtime_counters { \
int64_t __VA_ARGS__; \
}; \
static char ujit_counter_names[] = #__VA_ARGS__;
UJIT_DECLARE_COUNTERS(
exec_instruction,
oswb_callsite_not_simple,
oswb_kw_splat,
oswb_ic_empty,
oswb_invalid_cme,
oswb_protected,
oswb_ivar_set_method,
oswb_ivar_get_method,
oswb_zsuper_method,
oswb_alias_method,
oswb_undef_method,
oswb_optimized_method,
oswb_missing_method,
oswb_bmethod,
oswb_refined_method,
oswb_unknown_method_type,
oswb_cfunc_ruby_array_varg,
oswb_cfunc_argc_mismatch,
oswb_cfunc_toomany_args,
oswb_iseq_tailcall,
oswb_iseq_argc_mismatch,
oswb_iseq_not_simple,
oswb_not_implemented_method,
oswb_se_receiver_not_heap,
oswb_se_cf_overflow,
oswb_se_cc_klass_differ,
// Member with known name for iterating over counters
last_member
)
#undef UJIT_DECLARE_COUNTERS
RUBY_EXTERN struct rb_ujit_options rb_ujit_opts;
RUBY_EXTERN int64_t rb_ujit_exec_insns_count;
RUBY_EXTERN int64_t rb_compiled_iseq_count;
RUBY_EXTERN struct rb_ujit_runtime_counters ujit_runtime_counters;
void cb_write_pre_call_bytes(codeblock_t* cb);
void cb_write_post_call_bytes(codeblock_t* cb);