From e35c528d721d209ed8531b10b46c2ac725ea7bf5 Mon Sep 17 00:00:00 2001 From: Koichi Sasada <ko1@atdot.net> Date: Mon, 17 Oct 2022 17:50:42 +0900 Subject: [PATCH] push dummy frame for loading process This patch pushes dummy frames when loading code for the profiling purpose. The following methods push a dummy frame: * `Kernel#require` * `Kernel#load` * `RubyVM::InstructionSequence.compile_file` * `RubyVM::InstructionSequence.load_from_binary` https://bugs.ruby-lang.org/issues/18559 --- compile.c | 64 ++++++++++++++++++++-------------- internal/vm.h | 1 + iseq.c | 6 ++++ load.c | 4 +++ test/objspace/test_objspace.rb | 59 +++++++++++++++++++++++++++++++ vm.c | 25 +++++++------ vm_backtrace.c | 12 ++++--- vm_core.h | 3 +- vm_dump.c | 20 +++++++---- vm_insnhelper.c | 32 ++++++++++++++++- 10 files changed, 175 insertions(+), 51 deletions(-) diff --git a/compile.c b/compile.c index 9051ecfcd6..359d55c0f2 100644 --- a/compile.c +++ b/compile.c @@ -12157,6 +12157,40 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) const char catch_except_p = (char)ibf_load_small_value(load, &reading_pos); const bool builtin_inline_p = (bool)ibf_load_small_value(load, &reading_pos); + // setup fname and dummy frame + VALUE path = ibf_load_object(load, location_pathobj_index); + { + VALUE realpath = Qnil; + + if (RB_TYPE_P(path, T_STRING)) { + realpath = path = rb_fstring(path); + } + else if (RB_TYPE_P(path, T_ARRAY)) { + VALUE pathobj = path; + if (RARRAY_LEN(pathobj) != 2) { + rb_raise(rb_eRuntimeError, "path object size mismatch"); + } + path = rb_fstring(RARRAY_AREF(pathobj, 0)); + realpath = RARRAY_AREF(pathobj, 1); + if (!NIL_P(realpath)) { + if (!RB_TYPE_P(realpath, T_STRING)) { + rb_raise(rb_eArgError, "unexpected realpath %"PRIxVALUE + "(%x), path=%+"PRIsVALUE, + realpath, TYPE(realpath), path); + } + realpath = rb_fstring(realpath); + } + } + else { + rb_raise(rb_eRuntimeError, "unexpected path object"); + } + rb_iseq_pathobj_set(iseq, path, realpath); + } + + // push dummy frame + rb_execution_context_t *ec = GET_EC(); + VALUE dummy_frame = rb_vm_push_frame_fname(ec, path); + #undef IBF_BODY_OFFSET load_body->type = type; @@ -12225,33 +12259,6 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) load->current_buffer = &load->global_buffer; #endif - { - VALUE realpath = Qnil, path = ibf_load_object(load, location_pathobj_index); - if (RB_TYPE_P(path, T_STRING)) { - realpath = path = rb_fstring(path); - } - else if (RB_TYPE_P(path, T_ARRAY)) { - VALUE pathobj = path; - if (RARRAY_LEN(pathobj) != 2) { - rb_raise(rb_eRuntimeError, "path object size mismatch"); - } - path = rb_fstring(RARRAY_AREF(pathobj, 0)); - realpath = RARRAY_AREF(pathobj, 1); - if (!NIL_P(realpath)) { - if (!RB_TYPE_P(realpath, T_STRING)) { - rb_raise(rb_eArgError, "unexpected realpath %"PRIxVALUE - "(%x), path=%+"PRIsVALUE, - realpath, TYPE(realpath), path); - } - realpath = rb_fstring(realpath); - } - } - else { - rb_raise(rb_eRuntimeError, "unexpected path object"); - } - rb_iseq_pathobj_set(iseq, path, realpath); - } - RB_OBJ_WRITE(iseq, &load_body->location.base_label, ibf_load_location_str(load, location_base_label_index)); RB_OBJ_WRITE(iseq, &load_body->location.label, ibf_load_location_str(load, location_label_index)); @@ -12259,6 +12266,9 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset) load->current_buffer = saved_buffer; #endif verify_call_cache(iseq); + + RB_GC_GUARD(dummy_frame); + rb_vm_pop_frame(ec); } struct ibf_dump_iseq_list_arg diff --git a/internal/vm.h b/internal/vm.h index c6c6b2ccc2..9480406a2b 100644 --- a/internal/vm.h +++ b/internal/vm.h @@ -41,6 +41,7 @@ enum method_missing_reason { /* vm_insnhelper.h */ rb_serial_t rb_next_class_serial(void); +VALUE rb_vm_push_frame_fname(struct rb_execution_context_struct *ec, VALUE fname); /* vm.c */ VALUE rb_obj_is_thread(VALUE obj); diff --git a/iseq.c b/iseq.c index 5b1d9de106..1d0c96dba1 100644 --- a/iseq.c +++ b/iseq.c @@ -1435,6 +1435,9 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) f = rb_file_open_str(file, "r"); + rb_execution_context_t *ec = GET_EC(); + VALUE v = rb_vm_push_frame_fname(ec, file); + parser = rb_parser_new(); rb_parser_set_context(parser, NULL, FALSE); ast = (rb_ast_t *)rb_parser_load_file(parser, file); @@ -1453,6 +1456,9 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self) rb_realpath_internal(Qnil, file, 1), 1, NULL, 0, ISEQ_TYPE_TOP, &option)); rb_ast_dispose(ast); + + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); return ret; } diff --git a/load.c b/load.c index 62b9b2d9bd..0e75c1ab83 100644 --- a/load.c +++ b/load.c @@ -681,6 +681,8 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname) const rb_iseq_t *iseq = rb_iseq_load_iseq(fname); if (!iseq) { + rb_execution_context_t *ec = GET_EC(); + VALUE v = rb_vm_push_frame_fname(ec, fname); rb_ast_t *ast; VALUE parser = rb_parser_new(); rb_parser_set_context(parser, NULL, FALSE); @@ -688,6 +690,8 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname) iseq = rb_iseq_new_top(&ast->body, rb_fstring_lit("<top (required)>"), fname, rb_realpath_internal(Qnil, fname, 1), NULL); rb_ast_dispose(ast); + rb_vm_pop_frame(ec); + RB_GC_GUARD(v); } rb_exec_event_hook_script_compiled(ec, iseq, Qnil); rb_iseq_eval(iseq); diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 3b90319858..5994fadeff 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -726,6 +726,65 @@ class TestObjSpace < Test::Unit::TestCase end end + def load_allocation_path_helper method, to_binary: false + + Tempfile.create(["test_ruby_load_allocation_path", ".rb"]) do |t| + path = t.path + str = "#{Time.now.to_f.to_s}_#{rand.to_s}" + t.puts script = <<~RUBY + # frozen-string-literal: true + return if Time.now.to_i > 0 + $gv = 'rnd-#{str}' # unreachable, but the string literal was written + RUBY + + t.close + + if to_binary + bin = RubyVM::InstructionSequence.compile_file(t.path).to_binary + bt = Tempfile.new(['test_ruby_load_allocation_path', '.yarb'], mode: File::Constants::WRONLY) + bt.write bin + bt.close + + path = bt.path + end + + assert_separately(%w[-robjspace -rtempfile], <<~RUBY) + GC.disable + path = "#{path}" + ObjectSpace.trace_object_allocations do + #{method} + end + + n = 0 + dump = ObjectSpace.dump_all(output: :string) + dump.each_line do |line| + if /"value":"rnd-#{str}"/ =~ line && /"frozen":true/ =~ line + assert Regexp.new('"file":"' + "#{path}") =~ line + assert Regexp.new('"line":') !~ line + n += 1 + end + rescue ArgumentError + end + + assert_equal(1, n) + RUBY + ensure + bt.unlink if bt + end + end + + def test_load_allocation_path_load + load_allocation_path_helper 'load(path)' + end + + def test_load_allocation_path_compile_file + load_allocation_path_helper 'RubyVM::InstructionSequence.compile_file(path)' + end + + def test_load_allocation_path_load_from_binary + # load_allocation_path_helper 'iseq = RubyVM::InstructionSequence.load_from_binary(File.binread(path))', to_binary: true + end + def test_utf8_method_names name = "utf8_❨╯°□°❩╯︵┻━┻" obj = ObjectSpace.trace_object_allocations do diff --git a/vm.c b/vm.c index ed38192670..15dae878df 100644 --- a/vm.c +++ b/vm.c @@ -3149,19 +3149,22 @@ rb_execution_context_mark(const rb_execution_context_t *ec) while (cfp != limit_cfp) { const VALUE *ep = cfp->ep; VM_ASSERT(!!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED) == vm_ep_in_heap_p_(ec, ep)); - rb_gc_mark_movable(cfp->self); - rb_gc_mark_movable((VALUE)cfp->iseq); - rb_gc_mark_movable((VALUE)cfp->block_code); - if (!VM_ENV_LOCAL_P(ep)) { - const VALUE *prev_ep = VM_ENV_PREV_EP(ep); - if (VM_ENV_FLAGS(prev_ep, VM_ENV_FLAG_ESCAPED)) { - rb_gc_mark_movable(prev_ep[VM_ENV_DATA_INDEX_ENV]); - } + if (VM_FRAME_TYPE(cfp) != VM_FRAME_MAGIC_DUMMY) { + rb_gc_mark_movable(cfp->self); + rb_gc_mark_movable((VALUE)cfp->iseq); + rb_gc_mark_movable((VALUE)cfp->block_code); - if (VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)) { - rb_gc_mark_movable(ep[VM_ENV_DATA_INDEX_ENV]); - rb_gc_mark(ep[VM_ENV_DATA_INDEX_ME_CREF]); + if (!VM_ENV_LOCAL_P(ep)) { + const VALUE *prev_ep = VM_ENV_PREV_EP(ep); + if (VM_ENV_FLAGS(prev_ep, VM_ENV_FLAG_ESCAPED)) { + rb_gc_mark_movable(prev_ep[VM_ENV_DATA_INDEX_ENV]); + } + + if (VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)) { + rb_gc_mark_movable(ep[VM_ENV_DATA_INDEX_ENV]); + rb_gc_mark(ep[VM_ENV_DATA_INDEX_ME_CREF]); + } } } diff --git a/vm_backtrace.c b/vm_backtrace.c index 3aae59bf68..12157ef707 100644 --- a/vm_backtrace.c +++ b/vm_backtrace.c @@ -37,10 +37,8 @@ inline static int calc_pos(const rb_iseq_t *iseq, const VALUE *pc, int *lineno, int *node_id) { VM_ASSERT(iseq); - VM_ASSERT(ISEQ_BODY(iseq)); - VM_ASSERT(ISEQ_BODY(iseq)->iseq_encoded); - VM_ASSERT(ISEQ_BODY(iseq)->iseq_size); - if (! pc) { + + if (pc == NULL) { if (ISEQ_BODY(iseq)->type == ISEQ_TYPE_TOP) { VM_ASSERT(! ISEQ_BODY(iseq)->local_table); VM_ASSERT(! ISEQ_BODY(iseq)->local_table_size); @@ -53,6 +51,10 @@ calc_pos(const rb_iseq_t *iseq, const VALUE *pc, int *lineno, int *node_id) return 1; } else { + VM_ASSERT(ISEQ_BODY(iseq)); + VM_ASSERT(ISEQ_BODY(iseq)->iseq_encoded); + VM_ASSERT(ISEQ_BODY(iseq)->iseq_size); + ptrdiff_t n = pc - ISEQ_BODY(iseq)->iseq_encoded; VM_ASSERT(n <= ISEQ_BODY(iseq)->iseq_size); VM_ASSERT(n >= 0); @@ -1557,7 +1559,7 @@ rb_profile_frames(int start, int limit, VALUE *buff, int *lines) end_cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp); for (i=0; i<limit && cfp != end_cfp;) { - if (VM_FRAME_RUBYFRAME_P(cfp)) { + if (VM_FRAME_RUBYFRAME_P(cfp) && cfp->pc != 0) { if (start > 0) { start--; continue; diff --git a/vm_core.h b/vm_core.h index 3bd907b227..bdfff95bd5 100644 --- a/vm_core.h +++ b/vm_core.h @@ -1371,7 +1371,8 @@ static inline int VM_FRAME_CFRAME_P(const rb_control_frame_t *cfp) { int cframe_p = VM_ENV_FLAGS(cfp->ep, VM_FRAME_FLAG_CFRAME) != 0; - VM_ASSERT(RUBY_VM_NORMAL_ISEQ_P(cfp->iseq) != cframe_p); + VM_ASSERT(RUBY_VM_NORMAL_ISEQ_P(cfp->iseq) != cframe_p || + (VM_FRAME_TYPE(cfp) & VM_FRAME_MAGIC_MASK) == VM_FRAME_MAGIC_DUMMY); return cframe_p; } diff --git a/vm_dump.c b/vm_dump.c index 607488388a..93179fedc8 100644 --- a/vm_dump.c +++ b/vm_dump.c @@ -89,6 +89,9 @@ control_frame_dump(const rb_execution_context_t *ec, const rb_control_frame_t *c case VM_FRAME_MAGIC_RESCUE: magic = "RESCUE"; break; + case VM_FRAME_MAGIC_DUMMY: + magic = "DUMMY"; + break; case 0: magic = "------"; break; @@ -117,12 +120,17 @@ control_frame_dump(const rb_execution_context_t *ec, const rb_control_frame_t *c line = -1; } else { - iseq = cfp->iseq; - pc = cfp->pc - ISEQ_BODY(iseq)->iseq_encoded; - iseq_name = RSTRING_PTR(ISEQ_BODY(iseq)->location.label); - line = rb_vm_get_sourceline(cfp); - if (line) { - snprintf(posbuf, MAX_POSBUF, "%s:%d", RSTRING_PTR(rb_iseq_path(iseq)), line); + if (cfp->pc) { + iseq = cfp->iseq; + pc = cfp->pc - ISEQ_BODY(iseq)->iseq_encoded; + iseq_name = RSTRING_PTR(ISEQ_BODY(iseq)->location.label); + line = rb_vm_get_sourceline(cfp); + if (line) { + snprintf(posbuf, MAX_POSBUF, "%s:%d", RSTRING_PTR(rb_iseq_path(iseq)), line); + } + } + else { + iseq_name = "<dummy_frame>"; } } } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 8b19c6c10d..2434c5c3c6 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -201,7 +201,9 @@ vm_check_frame_detail(VALUE type, int req_block, int req_me, int req_cref, VALUE if ((type & VM_FRAME_MAGIC_MASK) == VM_FRAME_MAGIC_DUMMY) { VM_ASSERT(iseq == NULL || - RUBY_VM_NORMAL_ISEQ_P(iseq) /* argument error. it should be fixed */); + RBASIC_CLASS((VALUE)iseq) == 0 || // dummy frame for loading + RUBY_VM_NORMAL_ISEQ_P(iseq) //argument error + ); } else { VM_ASSERT(is_cframe == !RUBY_VM_NORMAL_ISEQ_P(iseq)); @@ -429,6 +431,34 @@ rb_vm_pop_frame(rb_execution_context_t *ec) vm_pop_frame(ec, ec->cfp, ec->cfp->ep); } +// it pushes pseudo-frame with fname filename. +VALUE +rb_vm_push_frame_fname(rb_execution_context_t *ec, VALUE fname) +{ + VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + void *ptr = ruby_xcalloc(sizeof(struct rb_iseq_constant_body) + sizeof(struct rb_iseq_struct), 1); + rb_imemo_tmpbuf_set_ptr(tmpbuf, ptr); + + struct rb_iseq_struct *dmy_iseq = (struct rb_iseq_struct *)ptr; + struct rb_iseq_constant_body *dmy_body = (struct rb_iseq_constant_body *)&dmy_iseq[1]; + dmy_iseq->body = dmy_body; + dmy_body->type = ISEQ_TYPE_TOP; + dmy_body->location.pathobj = fname; + + vm_push_frame(ec, + dmy_iseq, //const rb_iseq_t *iseq, + VM_FRAME_MAGIC_DUMMY | VM_ENV_FLAG_LOCAL | VM_FRAME_FLAG_FINISH, // VALUE type, + ec->cfp->self, // VALUE self, + VM_BLOCK_HANDLER_NONE, // VALUE specval, + Qfalse, // VALUE cref_or_me, + NULL, // const VALUE *pc, + ec->cfp->sp, // VALUE *sp, + 0, // int local_size, + 0); // int stack_max + + return tmpbuf; +} + /* method dispatch */ static inline VALUE rb_arity_error_new(int argc, int min, int max)