From 13df05acfa02aeccac89e93f8789933b23004113 Mon Sep 17 00:00:00 2001 From: k0kubun Date: Sun, 21 Oct 2018 14:23:24 +0000 Subject: [PATCH] mjit.c: copy call cache values to MJIT worker same as r65275 but for call cache. === Optcarrot Benchmark === $ benchmark-driver benchmark.yml --rbenv 'before::before --disable-gems --jit;after::after --disable-gems --jit' -v --repeat-count 24 before: ruby 2.6.0dev (2018-10-21 trunk 65277) +JIT [x86_64-linux] after: ruby 2.6.0dev (2018-10-21 trunk 65277) +JIT [x86_64-linux] last_commit=mjit.c: copy call cache values to MJIT worker Calculating ------------------------------------- before after Optcarrot Lan_Master.nes 85.372 85.359 fps Comparison: Optcarrot Lan_Master.nes before: 85.4 fps after: 85.4 fps - 1.00x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65279 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- mjit.c | 7 +++- mjit.h | 2 +- mjit_compile.c | 6 ++- mjit_worker.c | 47 ++++++++++++++--------- tool/ruby_vm/views/_mjit_compile_send.erb | 13 ++++--- 5 files changed, 48 insertions(+), 27 deletions(-) diff --git a/mjit.c b/mjit.c index f2b2581f7a..215057ae30 100644 --- a/mjit.c +++ b/mjit.c @@ -25,7 +25,12 @@ static void mjit_copy_job_handler(void *data) { struct mjit_copy_job *job = (struct mjit_copy_job *)data; - memcpy(job->is_entries, job->body->is_entries, sizeof(union iseq_inline_storage_entry) * job->body->is_size); + if (job->cc_entries) { + memcpy(job->cc_entries, job->body->cc_entries, sizeof(struct rb_call_cache) * (job->body->ci_size + job->body->ci_kw_size)); + } + if (job->is_entries) { + memcpy(job->is_entries, job->body->is_entries, sizeof(union iseq_inline_storage_entry) * job->body->is_size); + } CRITICAL_SECTION_START(3, "in MJIT copy job wait"); job->finish_p = TRUE; diff --git a/mjit.h b/mjit.h index 9209389e99..325fe555a7 100644 --- a/mjit.h +++ b/mjit.h @@ -64,7 +64,7 @@ extern void mjit_add_iseq_to_process(const rb_iseq_t *iseq); extern VALUE mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body); RUBY_SYMBOL_EXPORT_END -extern int mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname, union iseq_inline_storage_entry *is_entries); +extern int mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname, struct rb_call_cache *cc_entries, union iseq_inline_storage_entry *is_entries); extern void mjit_init(struct mjit_options *opts); extern void mjit_finish(void); extern void mjit_gc_start_hook(void); diff --git a/mjit_compile.c b/mjit_compile.c index 7cf3a47267..07e417e75b 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -34,8 +34,9 @@ struct compile_status { /* If TRUE, JIT-ed code will use local variables to store pushed values instead of using VM's stack and moving stack pointer. */ int local_stack_p; - /* Safely-accessible is_entries copied from main thread. */ + /* Safely-accessible cache entries copied from main thread. */ union iseq_inline_storage_entry *is_entries; + struct rb_call_cache *cc_entries; }; /* Storage to keep data which is consistent in each conditional branch. @@ -197,7 +198,7 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct /* Compile ISeq to C code in F. It returns 1 if it succeeds to compile. */ int -mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname, union iseq_inline_storage_entry *is_entries) +mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname, struct rb_call_cache *cc_entries, union iseq_inline_storage_entry *is_entries) { struct compile_status status; status.success = TRUE; @@ -206,6 +207,7 @@ mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *func if (status.stack_size_for_pos == NULL) return FALSE; memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); + status.cc_entries = cc_entries; status.is_entries = is_entries; /* For performance, we verify stack size only on compilation time (mjit_compile.inc.erb) without --jit-debug */ diff --git a/mjit_worker.c b/mjit_worker.c index 0dcd25321b..2234dcd8f1 100644 --- a/mjit_worker.c +++ b/mjit_worker.c @@ -1029,7 +1029,7 @@ compile_prelude(FILE *f) /* Compile ISeq in UNIT and return function pointer of JIT-ed code. It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong. */ static mjit_func_t -convert_unit_to_func(struct rb_mjit_unit *unit, union iseq_inline_storage_entry *is_entries) +convert_unit_to_func(struct rb_mjit_unit *unit, struct rb_call_cache *cc_entries, union iseq_inline_storage_entry *is_entries) { char c_file_buff[MAXPATHLEN], *c_file = c_file_buff, *so_file, funcname[35]; /* TODO: reconsider `35` */ int success; @@ -1097,7 +1097,7 @@ convert_unit_to_func(struct rb_mjit_unit *unit, union iseq_inline_storage_entry verbose(2, "start compilation: %s@%s:%d -> %s", label, path, lineno, c_file); fprintf(f, "/* %s@%s:%d */\n\n", label, path, lineno); } - success = mjit_compile(f, unit->iseq->body, funcname, is_entries); + success = mjit_compile(f, unit->iseq->body, funcname, cc_entries, is_entries); /* release blocking mjit_gc_start_hook */ CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC"); @@ -1163,12 +1163,29 @@ convert_unit_to_func(struct rb_mjit_unit *unit, union iseq_inline_storage_entry struct mjit_copy_job { const struct rb_iseq_constant_body *body; + struct rb_call_cache *cc_entries; union iseq_inline_storage_entry *is_entries; int finish_p; }; static void mjit_copy_job_handler(void *data); +/* We're lazily copying cache values from main thread because these cache values + could be different between ones on enqueue timing and ones on dequeue timing. */ +static void +copy_cache_from_main_thread(struct mjit_copy_job *job) +{ + job->finish_p = FALSE; + + rb_postponed_job_register(0, mjit_copy_job_handler, (void *)job); + CRITICAL_SECTION_START(3, "in MJIT copy job wait"); + while (!job->finish_p) { + rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex); + verbose(3, "Getting wakeup from client"); + } + CRITICAL_SECTION_FINISH(3, "in MJIT copy job wait"); +} + /* The function implementing a worker. It is executed in a separate thread by rb_thread_create_mjit_thread. It compiles precompiled header and then compiles requested ISeqs. */ @@ -1207,24 +1224,21 @@ mjit_worker(void) mjit_func_t func; struct mjit_copy_job job; - /* Copy ISeq's inline caches from main thread. */ - job.is_entries = NULL; job.body = node->unit->iseq->body; - if (job.body->is_size > 0) { - job.is_entries = malloc(sizeof(union iseq_inline_storage_entry) * job.body->is_size); - job.finish_p = FALSE; + job.cc_entries = NULL; + if (job.body->ci_size > 0 || job.body->ci_kw_size > 0) + job.cc_entries = alloca(sizeof(struct rb_call_cache) * (job.body->ci_size + job.body->ci_kw_size)); + job.is_entries = NULL; + if (job.body->is_size > 0) + job.is_entries = alloca(sizeof(union iseq_inline_storage_entry) * job.body->is_size); - rb_postponed_job_register(0, mjit_copy_job_handler, (void *)&job); - CRITICAL_SECTION_START(3, "in MJIT copy job wait"); - while (!job.finish_p) { - rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex); - verbose(3, "Getting wakeup from client"); - } - CRITICAL_SECTION_FINISH(3, "in MJIT copy job wait"); + /* Copy ISeq's inline caches values to avoid race condition. */ + if (job.cc_entries != NULL || job.is_entries != NULL) { + copy_cache_from_main_thread(&job); } /* JIT compile */ - func = convert_unit_to_func(node->unit, job.is_entries); + func = convert_unit_to_func(node->unit, job.cc_entries, job.is_entries); CRITICAL_SECTION_START(3, "in jit func replace"); if (node->unit->iseq) { /* Check whether GCed or not */ @@ -1241,9 +1255,6 @@ mjit_worker(void) compact_all_jit_code(); } #endif - if (job.is_entries != NULL) { - free(job.is_entries); - } } } diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 4104a7646e..4ba97acd7f 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -13,6 +13,9 @@ % insn.opes.each_with_index do |ope, i| MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>]; % end +% # compiler: Use copied cc to avoid race condition + CALL_CACHE cc_copy = status->cc_entries + (cc - body->cc_entries); + cc_copy = cc; % if (has_valid_method_type(cc)) { const rb_iseq_t *iseq; @@ -21,7 +24,7 @@ argc += ((ci->flag & VM_CALL_ARGS_BLOCKARG) ? 1 : 0); % end - if (cc->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc, iseq = rb_iseq_check(cc->me->def->body.iseq.iseqptr))) { /* CC_SET_FASTPATH in vm_callee_setup_arg */ + if (cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc, iseq = rb_iseq_check(cc_copy->me->def->body.iseq.iseqptr))) { /* CC_SET_FASTPATH in vm_callee_setup_arg */ int param_size = iseq->body->param.size; /* TODO: check calling->argc for argument_arity_error */ fprintf(f, "{\n"); @@ -31,8 +34,8 @@ } % # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things. - fprintf(f, " if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc->method_state); - fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc->class_serial); + fprintf(f, " if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc_copy->method_state); + fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial); fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1); fprintf(f, " goto cancel;\n"); @@ -52,11 +55,11 @@ fprintf(f, " calling.argc = %d;\n", ci->orig_argc); fprintf(f, " calling.recv = stack[%d];\n", b->stack_size - 1 - argc); -% # JIT: Special CALL_METHOD. Bypass cc->call and inline vm_call_iseq_setup_normal for vm_call_iseq_setup_func FASTPATH. +% # JIT: Special CALL_METHOD. Bypass cc_copy->call and inline vm_call_iseq_setup_normal for vm_call_iseq_setup_func FASTPATH. fprintf(f, " {\n"); fprintf(f, " VALUE v;\n"); fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n", - (VALUE)cc->me, param_size, iseq->body->local_table_size); /* rb_simple_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE */ + (VALUE)cc_copy->me, param_size, iseq->body->local_table_size); /* rb_simple_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE */ if (iseq->body->catch_except_p) { fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); fprintf(f, " v = vm_exec(ec, TRUE);\n");