From 316f757c44513b43a831f44a1a9b64337b69b181 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 2 Mar 2021 18:27:50 -0500 Subject: [PATCH] Runtime counters for bailing from opt_send_without_block This allows counting things in generated code such as side exits or different reasons for failing to compile. Runtime counters are exposed to Ruby as `UJIT.runtime_stats` for ease of reporting and analysis. --- ujit.rb | 34 +++++++++ ujit_asm.c | 6 ++ ujit_asm.h | 2 + ujit_codegen.c | 183 ++++++++++++++++++++++++++++++++++--------------- ujit_iface.c | 82 +++++++++++++++++++--- ujit_iface.h | 42 +++++++++++- 6 files changed, 282 insertions(+), 67 deletions(-) diff --git a/ujit.rb b/ujit.rb index 7c2a6522c9..5d0e3e37b4 100644 --- a/ujit.rb +++ b/ujit.rb @@ -35,4 +35,38 @@ module UJIT str end if defined?(Disasm) + + # Return a hash for statistics generated for the --ujit-stats command line option. + # Return nil when option is not passed or unavailable. + def self.runtime_stats + # defined in ujit_iface.c + Primitive.get_stat_counters + end + + class << self + private + + # Format and print out counters + def _print_stats + counters = runtime_stats + + return unless counters + + $stderr.puts("***uJIT: Printing runtime counters from ujit.rb***") + $stderr.puts("opt_send_without_block exit reasons: ") + + counters.filter! { |key, _| key.start_with?('oswb_') } + counters.transform_keys! { |key| key.to_s.delete_prefix('oswb_') } + + counters = counters.to_a + counters.sort_by! { |(_, counter_value)| counter_value } + longest_name_length = counters.max_by { |(name, _)| name.length }.first.length + total = counters.sum { |(_, counter_value)| counter_value } + + counters.reverse_each do |(name, value)| + percentage = value.fdiv(total) * 100 + $stderr.printf(" %*s %10d (%4.1f%%)\n", longest_name_length, name, value, percentage); + end + end + end end diff --git a/ujit_asm.c b/ujit_asm.c index 6b85b58075..3eea61cf69 100644 --- a/ujit_asm.c +++ b/ujit_asm.c @@ -1640,3 +1640,9 @@ void xor(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1) opnd1 ); } + +// LOCK - lock prefix for atomic shared memory operations +void cb_write_lock_prefix(codeblock_t* cb) +{ + cb_write_byte(cb, 0xF0); +} diff --git a/ujit_asm.h b/ujit_asm.h index 282cd294a9..e72aa477ef 100644 --- a/ujit_asm.h +++ b/ujit_asm.h @@ -379,4 +379,6 @@ void test(codeblock_t* cb, x86opnd_t rm_opnd, x86opnd_t test_opnd); void ud2(codeblock_t* cb); void xor(codeblock_t* cb, x86opnd_t opnd0, x86opnd_t opnd1); +void cb_write_lock_prefix(codeblock_t* cb); + #endif diff --git a/ujit_codegen.c b/ujit_codegen.c index 05114f85af..25328c6ee8 100644 --- a/ujit_codegen.c +++ b/ujit_codegen.c @@ -162,6 +162,37 @@ ujit_side_exit(jitstate_t* jit, ctx_t* ctx) return code_ptr; } +#if RUBY_DEBUG + +// Increment a profiling counter with counter_name +#define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(ujit_runtime_counters . counter_name)) +static void +_gen_counter_inc(codeblock_t *cb, int64_t *counter) +{ + if (!rb_ujit_opts.gen_stats) return; + mov(cb, REG0, const_ptr_opnd(counter)); + cb_write_lock_prefix(cb); // for ractors. + add(cb, mem_opnd(64, REG0, 0), imm_opnd(1)); +} + +// Increment a counter then take an existing side exit. +#define COUNTED_EXIT(side_exit, counter_name) _counted_side_exit(side_exit, &(ujit_runtime_counters . counter_name)) +static uint8_t * +_counted_side_exit(uint8_t *existing_side_exit, int64_t *counter) +{ + if (!rb_ujit_opts.gen_stats) return existing_side_exit; + + uint8_t *start = cb_get_ptr(ocb, ocb->write_pos); + _gen_counter_inc(ocb, counter); + jmp_ptr(ocb, existing_side_exit); + return start; +} + +#else +#define GEN_COUNTER_INC(cb, counter_name) ((void)0) +#define COUNTED_EXIT(side_exit, counter_name) side_exit +#endif // if RUBY_DEBUG + /* Compile an interpreter entry block to be inserted into an iseq Returns `NULL` if compilation fails. @@ -255,29 +286,23 @@ ujit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec) break; } -#if RUBY_DEBUG - // Accumulate stats about instructions executed - if (rb_ujit_opts.gen_stats) { - // Count instructions executed by the JIT - mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count)); - add(cb, mem_opnd(64, REG0, 0), imm_opnd(1)); - } -#endif - //fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode)); //print_str(cb, insn_name(opcode)); - // Call the code generation function - codegen_status_t status = gen_fn(&jit, ctx); + // Count bytecode instructions that execute in generated code + // FIXME: when generation function returns false, we shouldn't increment + // this counter. + GEN_COUNTER_INC(cb, exec_instruction); - // If we can't compile this instruction - // exit to the interpreter and stop compiling - if (status == UJIT_CANT_COMPILE) { - ujit_gen_exit(&jit, ctx, cb, jit.pc); + // Call the code generation function + bool continue_generating = p_desc->gen_fn(&jit, ctx); + + if (!continue_generating) { break; } - // Move to the next instruction to compile + // Move to the next instruction + p_last_op = p_desc; insn_idx += insn_len(opcode); // If the instruction terminates this block @@ -1060,22 +1085,27 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); // If the function expects a Ruby array of arguments - if (cfunc->argc < 0 && cfunc->argc != -1) { - return UJIT_CANT_COMPILE; + if (cfunc->argc < 0 && cfunc->argc != -1) + { + GEN_COUNTER_INC(cb, oswb_cfunc_ruby_array_varg); + return false; } // If the argument count doesn't match - if (cfunc->argc >= 0 && cfunc->argc != argc) { - return UJIT_CANT_COMPILE; + if (cfunc->argc >= 0 && cfunc->argc != argc) + { + GEN_COUNTER_INC(cb, oswb_cfunc_argc_mismatch); + return false; } // Don't JIT functions that need C stack arguments for now if (argc + 1 > NUM_C_ARG_REGS) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_cfunc_toomany_args); + return false; } // Create a size-exit to fall back to the interpreter - uint8_t* side_exit = ujit_side_exit(jit, ctx); + uint8_t *side_exit = ujit_side_exit(jit, ctx); // Check for interrupts ujit_check_ints(cb, side_exit); @@ -1094,12 +1124,15 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c //print_ptr(cb, recv); // Check that the receiver is a heap object - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, imm_opnd(Qfalse)); - je_ptr(cb, side_exit); - cmp(cb, REG0, imm_opnd(Qnil)); - je_ptr(cb, side_exit); + { + uint8_t *receiver_not_heap = COUNTED_EXIT(side_exit, oswb_se_receiver_not_heap); + test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK)); + jnz_ptr(cb, receiver_not_heap); + cmp(cb, REG0, imm_opnd(Qfalse)); + je_ptr(cb, receiver_not_heap); + cmp(cb, REG0, imm_opnd(Qnil)); + je_ptr(cb, receiver_not_heap); + } // Pointer to the klass field of the receiver &(recv->klass) x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass)); @@ -1110,7 +1143,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c // Bail if receiver class is different from compile-time call cache class jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cd->cc->klass); cmp(cb, klass_opnd, REG1); - jne_ptr(cb, side_exit); + jne_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cc_klass_differ)); // Store incremented PC into current control frame in case callee raises. mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block)))); @@ -1124,7 +1157,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t) lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t))); cmp(cb, REG_CFP, REG0); - jle_ptr(cb, side_exit); + jle_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cf_overflow)); // Increment the stack pointer by 3 (in the callee) // sp += 3 @@ -1285,19 +1318,21 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca int num_locals = iseq->body->local_table_size - num_params; if (num_params != argc) { - //fprintf(stderr, "param argc mismatch\n"); - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_iseq_argc_mismatch); + return false; } if (!rb_simple_iseq_p(iseq)) { // Only handle iseqs that have simple parameters. // See vm_callee_setup_arg(). - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_iseq_not_simple); + return false; } if (vm_ci_flag(cd->ci) & VM_CALL_TAILCALL) { // We can't handle tailcalls - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_iseq_tailcall); + return false; } rb_gc_register_mark_object((VALUE)iseq); // FIXME: intentional LEAK! @@ -1320,12 +1355,15 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca //print_ptr(cb, recv); // Check that the receiver is a heap object - test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK)); - jnz_ptr(cb, side_exit); - cmp(cb, REG0, imm_opnd(Qfalse)); - je_ptr(cb, side_exit); - cmp(cb, REG0, imm_opnd(Qnil)); - je_ptr(cb, side_exit); + { + uint8_t *receiver_not_heap = COUNTED_EXIT(side_exit, oswb_se_receiver_not_heap); + test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK)); + jnz_ptr(cb, receiver_not_heap); + cmp(cb, REG0, imm_opnd(Qfalse)); + je_ptr(cb, receiver_not_heap); + cmp(cb, REG0, imm_opnd(Qnil)); + je_ptr(cb, receiver_not_heap); + } // Pointer to the klass field of the receiver &(recv->klass) x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass)); @@ -1335,7 +1373,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca // Bail if receiver class is different from compile-time call cache class jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cd->cc->klass); cmp(cb, klass_opnd, REG1); - jne_ptr(cb, side_exit); + jne_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cc_klass_differ)); // Store the updated SP on the current frame (pop arguments and receiver) lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); @@ -1349,7 +1387,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + sizeof(rb_control_frame_t))); cmp(cb, REG_CFP, REG0); - jle_ptr(cb, side_exit); + jle_ptr(cb, COUNTED_EXIT(side_exit, oswb_se_cf_overflow)); // Adjust the callee's stack pointer lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals))); @@ -1426,7 +1464,7 @@ gen_oswb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_ca // Load the updated SP mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp)); - + // Directly jump to the entry point of the callee gen_direct_jump( &DEFAULT_CTX, @@ -1460,42 +1498,73 @@ gen_opt_send_without_block(jitstate_t* jit, ctx_t* ctx) // Don't JIT calls with keyword splat if (vm_ci_flag(cd->ci) & VM_CALL_KW_SPLAT) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_kw_splat); + return false; } // Don't JIT calls that aren't simple if (!(vm_ci_flag(cd->ci) & VM_CALL_ARGS_SIMPLE)) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_callsite_not_simple); + return false; } // Don't JIT if the inline cache is not set if (!cd->cc || !cd->cc->klass) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_ic_empty); + return false; } const rb_callable_method_entry_t *cme = vm_cc_cme(cd->cc); // Don't JIT if the method entry is out of date if (METHOD_ENTRY_INVALIDATED(cme)) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_invalid_cme); + return false; } // We don't generate code to check protected method calls if (METHOD_ENTRY_VISI(cme) == METHOD_VISI_PROTECTED) { - return UJIT_CANT_COMPILE; + GEN_COUNTER_INC(cb, oswb_protected); + return false; } - // If this is a C call - if (cme->def->type == VM_METHOD_TYPE_CFUNC) { - return gen_oswb_cfunc(jit, ctx, cd, cme, argc); - } - - // If this is a Ruby call - if (cme->def->type == VM_METHOD_TYPE_ISEQ) { + switch (cme->def->type) { + case VM_METHOD_TYPE_ISEQ: return gen_oswb_iseq(jit, ctx, cd, cme, argc); + case VM_METHOD_TYPE_CFUNC: + return gen_oswb_cfunc(jit, ctx, cd, cme, argc); + case VM_METHOD_TYPE_ATTRSET: + GEN_COUNTER_INC(cb, oswb_ivar_set_method); + return false; + case VM_METHOD_TYPE_BMETHOD: + GEN_COUNTER_INC(cb, oswb_bmethod); + return false; + case VM_METHOD_TYPE_IVAR: + GEN_COUNTER_INC(cb, oswb_ivar_get_method); + return false; + case VM_METHOD_TYPE_ZSUPER: + GEN_COUNTER_INC(cb, oswb_zsuper_method); + return false; + case VM_METHOD_TYPE_ALIAS: + GEN_COUNTER_INC(cb, oswb_alias_method); + return false; + case VM_METHOD_TYPE_UNDEF: + GEN_COUNTER_INC(cb, oswb_undef_method); + return false; + case VM_METHOD_TYPE_NOTIMPLEMENTED: + GEN_COUNTER_INC(cb, oswb_not_implemented_method); + return false; + case VM_METHOD_TYPE_OPTIMIZED: + GEN_COUNTER_INC(cb, oswb_optimized_method); + return false; + case VM_METHOD_TYPE_MISSING: + GEN_COUNTER_INC(cb, oswb_missing_method); + return false; + case VM_METHOD_TYPE_REFINED: + GEN_COUNTER_INC(cb, oswb_refined_method); + return false; + // no default case so compiler issues a warning if this is not exhaustive } - - return UJIT_CANT_COMPILE; } static codegen_status_t diff --git a/ujit_iface.c b/ujit_iface.c index b5cdfee0cf..352ff392cb 100644 --- a/ujit_iface.c +++ b/ujit_iface.c @@ -13,22 +13,22 @@ #include "ujit_codegen.h" #include "ujit_core.h" #include "ujit_hooks.inc" -#include "ujit.rbinc" #include "darray.h" #if HAVE_LIBCAPSTONE #include #endif -VALUE cUjitBlock; -VALUE cUjitDisasm; -VALUE cUjitDisasmInsn; +static VALUE mUjit; +static VALUE cUjitBlock; +static VALUE cUjitDisasm; +static VALUE cUjitDisasmInsn; #if RUBY_DEBUG static int64_t vm_insns_count = 0; -int64_t rb_ujit_exec_insns_count = 0; static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 }; int64_t rb_compiled_iseq_count = 0; +struct rb_ujit_runtime_counters ujit_runtime_counters = { 0 }; #endif // Machine code blocks (executable memory) @@ -506,6 +506,63 @@ ujit_disasm(VALUE self, VALUE code, VALUE from) } #endif +static VALUE +at_exit_print_stats(RB_BLOCK_CALL_FUNC_ARGLIST(yieldarg, data)) +{ + // Defined in ujit.rb + rb_funcall(mUjit, rb_intern("_print_stats"), 0); + return Qnil; +} + +// Primitive called in ujit.rb. Export all runtime counters as a Ruby hash. +static VALUE +get_stat_counters(rb_execution_context_t *ec, VALUE self) +{ +#if RUBY_DEBUG + if (!rb_ujit_opts.gen_stats) return Qnil; + + VALUE hash = rb_hash_new(); + RB_VM_LOCK_ENTER(); + { + int64_t *counter_reader = (int64_t *)&ujit_runtime_counters; + int64_t *counter_reader_end = &ujit_runtime_counters.last_member; + + // Iterate through comma separated counter name list + char *name_reader = ujit_counter_names; + char *counter_name_end = ujit_counter_names + sizeof(ujit_counter_names); + while (name_reader < counter_name_end && counter_reader < counter_reader_end) { + if (*name_reader == ',' || *name_reader == ' ') { + name_reader++; + continue; + } + + // Compute name of counter name + int name_len; + char *name_end; + { + name_end = strchr(name_reader, ','); + if (name_end == NULL) break; + name_len = (int)(name_end - name_reader); + } + + // Put counter into hash + VALUE key = ID2SYM(rb_intern2(name_reader, name_len)); + VALUE value = LL2NUM((long long)*counter_reader); + rb_hash_aset(hash, key, value); + + counter_reader++; + name_reader = name_end; + } + } + RB_VM_LOCK_LEAVE(); + return hash; +#else + return Qnil; +#endif // if RUBY_DEBUG +} + +#include "ujit.rbinc" + #if RUBY_DEBUG // implementation for --ujit-stats @@ -590,16 +647,17 @@ print_ujit_stats(void) const struct insn_count *sorted_exit_ops = sort_insn_count_array(exit_op_count); - double total_insns_count = vm_insns_count + rb_ujit_exec_insns_count; - double ratio = rb_ujit_exec_insns_count / total_insns_count; + double total_insns_count = vm_insns_count + ujit_runtime_counters.exec_instruction; + double ratio = ujit_runtime_counters.exec_instruction / total_insns_count; fprintf(stderr, "compiled_iseq_count: %10" PRId64 "\n", rb_compiled_iseq_count); fprintf(stderr, "main_block_code_size: %6.1f MiB\n", ((double)cb->write_pos) / 1048576.0); fprintf(stderr, "side_block_code_size: %6.1f MiB\n", ((double)ocb->write_pos) / 1048576.0); fprintf(stderr, "vm_insns_count: %10" PRId64 "\n", vm_insns_count); - fprintf(stderr, "ujit_exec_insns_count: %10" PRId64 "\n", rb_ujit_exec_insns_count); + fprintf(stderr, "ujit_exec_insns_count: %10" PRId64 "\n", ujit_runtime_counters.exec_instruction); fprintf(stderr, "ratio_in_ujit: %9.1f%%\n", ratio * 100); print_insn_count_buffer(sorted_exit_ops, 10, 4); + //print_runtime_counters(); } #endif // if RUBY_DEBUG @@ -708,7 +766,8 @@ rb_ujit_init(struct rb_ujit_options *options) ujit_init_codegen(); // UJIT Ruby module - VALUE mUjit = rb_define_module("UJIT"); + mUjit = rb_define_module("UJIT"); + rb_define_module_function(mUjit, "install_entry", ujit_install_entry, 1); rb_define_module_function(mUjit, "blocks_for", ujit_blocks_for, 1); // UJIT::Block (block version, code block) @@ -726,6 +785,11 @@ rb_ujit_init(struct rb_ujit_options *options) cUjitDisasmInsn = rb_struct_define_under(cUjitDisasm, "Insn", "address", "mnemonic", "op_str", NULL); #endif + if (RUBY_DEBUG && rb_ujit_opts.gen_stats) { + // Setup at_exit callback for printing out counters + rb_block_call(rb_mKernel, rb_intern("at_exit"), 0, NULL, at_exit_print_stats, Qfalse); + } + // Initialize the GC hooks method_lookup_dependency = st_init_numtable(); struct ujit_root_struct *root; diff --git a/ujit_iface.h b/ujit_iface.h index c1f34ed459..2542e21590 100644 --- a/ujit_iface.h +++ b/ujit_iface.h @@ -21,9 +21,49 @@ struct rb_callcache; #define rb_callcache rb_callcache #endif +#define UJIT_DECLARE_COUNTERS(...) struct rb_ujit_runtime_counters { \ + int64_t __VA_ARGS__; \ +}; \ +static char ujit_counter_names[] = #__VA_ARGS__; + +UJIT_DECLARE_COUNTERS( + exec_instruction, + + oswb_callsite_not_simple, + oswb_kw_splat, + oswb_ic_empty, + oswb_invalid_cme, + oswb_protected, + oswb_ivar_set_method, + oswb_ivar_get_method, + oswb_zsuper_method, + oswb_alias_method, + oswb_undef_method, + oswb_optimized_method, + oswb_missing_method, + oswb_bmethod, + oswb_refined_method, + oswb_unknown_method_type, + oswb_cfunc_ruby_array_varg, + oswb_cfunc_argc_mismatch, + oswb_cfunc_toomany_args, + oswb_iseq_tailcall, + oswb_iseq_argc_mismatch, + oswb_iseq_not_simple, + oswb_not_implemented_method, + oswb_se_receiver_not_heap, + oswb_se_cf_overflow, + oswb_se_cc_klass_differ, + + // Member with known name for iterating over counters + last_member +) + +#undef UJIT_DECLARE_COUNTERS + RUBY_EXTERN struct rb_ujit_options rb_ujit_opts; -RUBY_EXTERN int64_t rb_ujit_exec_insns_count; RUBY_EXTERN int64_t rb_compiled_iseq_count; +RUBY_EXTERN struct rb_ujit_runtime_counters ujit_runtime_counters; void cb_write_pre_call_bytes(codeblock_t* cb); void cb_write_post_call_bytes(codeblock_t* cb);