From 4bcd5981e80d3e1852c8723741a0069779464128 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 10 Mar 2020 00:19:19 -0700 Subject: [PATCH] Capture inlined iseq's cc entries in root iseq's jit_unit to avoid marking wrong cc entries when inlined iseq is compiled multiple times, resolving the TODO added by daf7c48d88. This obviates pseudo jit_unit in inlined iseq introduced by 7ec2359374 and fixes memory leak of the adhoc unit. --- mjit.c | 16 ------- mjit_compile.c | 39 ++++++++++------ mjit_worker.c | 57 +++++++++++++++-------- tool/ruby_vm/views/_mjit_compile_send.erb | 2 +- tool/ruby_vm/views/mjit_compile.inc.erb | 2 +- 5 files changed, 64 insertions(+), 52 deletions(-) diff --git a/mjit.c b/mjit.c index a5461414d3..514d04f085 100644 --- a/mjit.c +++ b/mjit.c @@ -26,7 +26,6 @@ #include "internal/warnings.h" #include "mjit_worker.c" -#include "vm_callinfo.h" // Copy ISeq's states so that race condition does not happen on compilation. static void @@ -53,18 +52,6 @@ mjit_copy_job_handler(void *data) } const struct rb_iseq_constant_body *body = job->iseq->body; - const unsigned int ci_size = body->ci_size; - if (ci_size > 0) { - VM_ASSERT(body->jit_unit != NULL); - VM_ASSERT(body->jit_unit->cc_entries != NULL); - - const struct rb_callcache **cc_entries = body->jit_unit->cc_entries; - - for (unsigned int i=0; icall_data[i].cc; - } - } - if (job->is_entries) { memcpy(job->is_entries, body->is_entries, sizeof(union iseq_inline_storage_entry) * body->is_size); } @@ -293,9 +280,6 @@ create_unit(const rb_iseq_t *iseq) unit->id = current_unit_num++; unit->iseq = (rb_iseq_t *)iseq; - if (iseq->body->ci_size > 0) { - unit->cc_entries = ZALLOC_N(const struct rb_callcache *, iseq->body->ci_size); - } iseq->body->jit_unit = unit; } diff --git a/mjit_compile.c b/mjit_compile.c index c0e5b18856..a75e3fe738 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -32,12 +32,6 @@ #define NOT_COMPILED_STACK_SIZE -1 #define ALREADY_COMPILED_P(status, pos) (status->stack_size_for_pos[pos] != NOT_COMPILED_STACK_SIZE) -static size_t -call_data_index(CALL_DATA cd, const struct rb_iseq_constant_body *body) -{ - return cd - body->call_data; -} - // For propagating information needed for lazily pushing a frame. struct inlined_call_context { int orig_argc; // ci->orig_argc @@ -55,8 +49,12 @@ struct compile_status { // If true, JIT-ed code will use local variables to store pushed values instead of // using VM's stack and moving stack pointer. bool local_stack_p; - // Safely-accessible cache entries copied from main thread. + // Safely-accessible ivar cache entries copied from main thread. union iseq_inline_storage_entry *is_entries; + // Safely-accessible call cache entries captured to compiled_iseq to be marked on GC + const struct rb_callcache **cc_entries; + // A pointer to root (i.e. not inlined) iseq being compiled. + const struct rb_iseq_constant_body *compiled_iseq; // Mutated optimization levels struct rb_mjit_compile_info *compile_info; // If `inlined_iseqs[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there. @@ -78,6 +76,12 @@ struct case_dispatch_var { VALUE last_value; }; +static size_t +call_data_index(CALL_DATA cd, const struct rb_iseq_constant_body *body) +{ + return cd - body->call_data; +} + // Returns true if call cache is still not obsoleted and vm_cc_cme(cc)->def->type is available. static bool has_valid_method_type(CALL_CACHE cc) @@ -273,6 +277,9 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct fprintf(f, " return Qundef;\n"); } +extern const struct rb_callcache ** +mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq); + extern bool mjit_copy_cache_from_main_thread(const rb_iseq_t *iseq, union iseq_inline_storage_entry *is_entries); @@ -368,6 +375,9 @@ inlinable_iseq_p(const struct rb_iseq_constant_body *body) alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \ .is_entries = (body->is_size > 0) ? \ alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL, \ + .cc_entries = (body->ci_size > 0) ? \ + mjit_capture_cc_entries(status.compiled_iseq, body) : NULL, \ + .compiled_iseq = status.compiled_iseq, \ .compile_info = compile_root_p ? \ rb_mjit_iseq_compile_info(body) : alloca(sizeof(struct rb_mjit_compile_info)) \ }; \ @@ -393,7 +403,7 @@ precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status if (insn == BIN(opt_send_without_block)) { // `compile_inlined_cancel_handler` supports only `opt_send_without_block` CALL_DATA cd = (CALL_DATA)body->iseq_encoded[pos + 1]; const struct rb_callinfo *ci = cd->ci; - const struct rb_callcache *cc = mjit_iseq_cc_entries(iseq->body)[call_data_index(cd, body)]; // use copy to avoid race condition + const struct rb_callcache *cc = status->cc_entries[call_data_index(cd, body)]; // use copy to avoid race condition const rb_iseq_t *child_iseq; if (has_valid_method_type(cc) && @@ -411,7 +421,7 @@ precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status RSTRING_PTR(child_iseq->body->location.label), RSTRING_PTR(rb_iseq_path(child_iseq)), FIX2INT(child_iseq->body->location.first_lineno)); - struct compile_status child_status; + struct compile_status child_status = { .compiled_iseq = status->compiled_iseq }; INIT_COMPILE_STATUS(child_status, child_iseq->body, false); child_status.inline_context = (struct inlined_call_context){ .orig_argc = vm_ci_argc(ci), @@ -419,9 +429,10 @@ precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status .param_size = child_iseq->body->param.size, .local_size = child_iseq->body->local_table_size }; - if ((child_iseq->body->ci_size > 0 || child_status.is_entries != NULL) - && !mjit_copy_cache_from_main_thread(child_iseq, child_status.is_entries)) + if ((child_iseq->body->ci_size > 0 && child_status.cc_entries == NULL) + || (child_status.is_entries != NULL && !mjit_copy_cache_from_main_thread(child_iseq, child_status.is_entries))) { return false; + } fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self, const rb_iseq_t *original_iseq));\n", pos); fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self, const rb_iseq_t *original_iseq)\n{\n", pos); @@ -449,10 +460,10 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname) fprintf(f, "#define OPT_CHECKED_RUN 0\n\n"); } - struct compile_status status; + struct compile_status status = { .compiled_iseq = iseq->body }; INIT_COMPILE_STATUS(status, iseq->body, true); - if ((iseq->body->ci_size > 0 || status.is_entries != NULL) - && !mjit_copy_cache_from_main_thread(iseq, status.is_entries)) { + if ((iseq->body->ci_size > 0 && status.cc_entries == NULL) + || (status.is_entries != NULL && !mjit_copy_cache_from_main_thread(iseq, status.is_entries))) { return false; } diff --git a/mjit_worker.c b/mjit_worker.c index fcc75a35f5..e9ba24b0af 100644 --- a/mjit_worker.c +++ b/mjit_worker.c @@ -86,6 +86,7 @@ #endif #include "vm_core.h" +#include "vm_callinfo.h" #include "mjit.h" #include "gc.h" #include "ruby_assert.h" @@ -160,7 +161,8 @@ struct rb_mjit_unit { // mjit_compile's optimization switches struct rb_mjit_compile_info compile_info; // captured CC values, they should be marked with iseq. - const struct rb_callcache **cc_entries; // size: iseq->body->ci_size + const struct rb_callcache **cc_entries; + unsigned int cc_entries_size; // iseq->body->ci_size + ones of inlined iseqs }; // Linked list of struct rb_mjit_unit. @@ -1149,6 +1151,40 @@ static void mjit_copy_job_handler(void *data); // vm_trace.c int rb_workqueue_register(unsigned flags, rb_postponed_job_func_t , void *); +// Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`. +// This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc. +// +// This assumes that it's safe to reference cc without acquiring GVL. +const struct rb_callcache ** +mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq) +{ + struct rb_mjit_unit *unit = compiled_iseq->jit_unit; + unsigned int new_entries_size = unit->cc_entries_size + captured_iseq->ci_size; + VM_ASSERT(captured_iseq->ci_size > 0); + + // Allocate new cc_entries and append them to unit->cc_entries + const struct rb_callcache **cc_entries; + if (unit->cc_entries_size == 0) { + VM_ASSERT(unit->cc_entries == NULL); + unit->cc_entries = cc_entries = malloc(sizeof(struct rb_callcache *) * new_entries_size); + if (cc_entries == NULL) return NULL; + } + else { + cc_entries = realloc(unit->cc_entries, sizeof(struct rb_callcache *) * new_entries_size); + if (cc_entries == NULL) return NULL; + unit->cc_entries = cc_entries; + cc_entries += unit->cc_entries_size; + } + unit->cc_entries_size = new_entries_size; + + // Capture cc to cc_enties + for (unsigned int i = 0; i < captured_iseq->ci_size; i++) { + cc_entries[i] = captured_iseq->call_data[i].cc; + } + + return cc_entries; +} + // Copy inline cache values of `iseq` to `cc_entries` and `is_entries`. // These buffers should be pre-allocated properly prior to calling this function. // Return true if copy succeeds or is not needed. @@ -1176,25 +1212,6 @@ mjit_copy_cache_from_main_thread(const rb_iseq_t *iseq, union iseq_inline_storag CRITICAL_SECTION_FINISH(3, "in mjit_copy_cache_from_main_thread"); if (UNLIKELY(mjit_opts.wait)) { - // setup pseudo jit_unit - // - // Usually jit_unit is created in `rb_mjit_add_iseq_to_process`. - // However, this copy job can be used for inlined ISeqs too, and - // inlined ISeq doesn't have a jit_unit. - // TODO: Manage the cc in outer ISeq's jit_unit. - if (iseq->body->jit_unit == NULL) { - // This function is invoked in mjit worker thread, so GC should not be invoked. - // To prevent GC with xmalloc(), use malloc() directly here. - // However, mixing xmalloc() and malloc() will cause another issue. - // TODO: fix this allocation code. - iseq->body->jit_unit = (struct rb_mjit_unit *)malloc(sizeof(struct rb_mjit_unit)); - if (iseq->body->jit_unit == NULL) rb_fatal("malloc failed"); - if (iseq->body->ci_size > 0) { - iseq->body->jit_unit->cc_entries = - (const struct rb_callcache **)calloc(iseq->body->ci_size, sizeof(const struct rb_callcache *)); - if (iseq->body->jit_unit->cc_entries == NULL) rb_fatal("malloc failed"); - } - } mjit_copy_job_handler((void *)job); } else if (rb_workqueue_register(0, mjit_copy_job_handler, (void *)job)) { diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 9f51f856fe..4a2872302d 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -14,7 +14,7 @@ MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>]; % end % # compiler: Use copied cc to avoid race condition - const struct rb_callcache *captured_cc = mjit_iseq_cc_entries(body)[call_data_index(cd, body)]; + const struct rb_callcache *captured_cc = status->cc_entries[call_data_index(cd, body)]; % if (!status->compile_info->disable_send_cache && has_valid_method_type(captured_cc)) { const rb_iseq_t *iseq; diff --git a/tool/ruby_vm/views/mjit_compile.inc.erb b/tool/ruby_vm/views/mjit_compile.inc.erb index de0dfde604..0b9fa3f222 100644 --- a/tool/ruby_vm/views/mjit_compile.inc.erb +++ b/tool/ruby_vm/views/mjit_compile.inc.erb @@ -57,7 +57,7 @@ switch (insn) { % when *send_compatible_opt_insns % # To avoid cancel, just emit `opt_send_without_block` instead of `opt_*` insn if call cache is populated. % cd_index = insn.opes.index { |o| o.fetch(:type) == 'CALL_DATA' } - if (has_valid_method_type(mjit_iseq_cc_entries(body)[call_data_index((CALL_DATA)operands[<%= cd_index %>], body)])) { + if (has_valid_method_type(status->cc_entries[call_data_index((CALL_DATA)operands[<%= cd_index %>], body)])) { <%= render 'mjit_compile_send', locals: { insn: opt_send_without_block } -%> <%= render 'mjit_compile_insn', locals: { insn: opt_send_without_block } -%> break;