From e35c528d721d209ed8531b10b46c2ac725ea7bf5 Mon Sep 17 00:00:00 2001
From: Koichi Sasada <ko1@atdot.net>
Date: Mon, 17 Oct 2022 17:50:42 +0900
Subject: [PATCH] push dummy frame for loading process

This patch pushes dummy frames when loading code for the
profiling purpose.

The following methods push a dummy frame:
* `Kernel#require`
* `Kernel#load`
* `RubyVM::InstructionSequence.compile_file`
* `RubyVM::InstructionSequence.load_from_binary`

https://bugs.ruby-lang.org/issues/18559
---
 compile.c                      | 64 ++++++++++++++++++++--------------
 internal/vm.h                  |  1 +
 iseq.c                         |  6 ++++
 load.c                         |  4 +++
 test/objspace/test_objspace.rb | 59 +++++++++++++++++++++++++++++++
 vm.c                           | 25 +++++++------
 vm_backtrace.c                 | 12 ++++---
 vm_core.h                      |  3 +-
 vm_dump.c                      | 20 +++++++----
 vm_insnhelper.c                | 32 ++++++++++++++++-
 10 files changed, 175 insertions(+), 51 deletions(-)

diff --git a/compile.c b/compile.c
index 9051ecfcd6..359d55c0f2 100644
--- a/compile.c
+++ b/compile.c
@@ -12157,6 +12157,40 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset)
     const char catch_except_p = (char)ibf_load_small_value(load, &reading_pos);
     const bool builtin_inline_p = (bool)ibf_load_small_value(load, &reading_pos);
 
+    // setup fname and dummy frame
+    VALUE path = ibf_load_object(load, location_pathobj_index);
+    {
+        VALUE realpath = Qnil;
+
+        if (RB_TYPE_P(path, T_STRING)) {
+            realpath = path = rb_fstring(path);
+        }
+        else if (RB_TYPE_P(path, T_ARRAY)) {
+            VALUE pathobj = path;
+            if (RARRAY_LEN(pathobj) != 2) {
+                rb_raise(rb_eRuntimeError, "path object size mismatch");
+            }
+            path = rb_fstring(RARRAY_AREF(pathobj, 0));
+            realpath = RARRAY_AREF(pathobj, 1);
+            if (!NIL_P(realpath)) {
+                if (!RB_TYPE_P(realpath, T_STRING)) {
+                    rb_raise(rb_eArgError, "unexpected realpath %"PRIxVALUE
+                             "(%x), path=%+"PRIsVALUE,
+                             realpath, TYPE(realpath), path);
+                }
+                realpath = rb_fstring(realpath);
+            }
+        }
+        else {
+            rb_raise(rb_eRuntimeError, "unexpected path object");
+        }
+        rb_iseq_pathobj_set(iseq, path, realpath);
+    }
+
+    // push dummy frame
+    rb_execution_context_t *ec = GET_EC();
+    VALUE dummy_frame = rb_vm_push_frame_fname(ec, path);
+
 #undef IBF_BODY_OFFSET
 
     load_body->type = type;
@@ -12225,33 +12259,6 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset)
     load->current_buffer = &load->global_buffer;
 #endif
 
-    {
-        VALUE realpath = Qnil, path = ibf_load_object(load, location_pathobj_index);
-        if (RB_TYPE_P(path, T_STRING)) {
-            realpath = path = rb_fstring(path);
-        }
-        else if (RB_TYPE_P(path, T_ARRAY)) {
-            VALUE pathobj = path;
-            if (RARRAY_LEN(pathobj) != 2) {
-                rb_raise(rb_eRuntimeError, "path object size mismatch");
-            }
-            path = rb_fstring(RARRAY_AREF(pathobj, 0));
-            realpath = RARRAY_AREF(pathobj, 1);
-            if (!NIL_P(realpath)) {
-                if (!RB_TYPE_P(realpath, T_STRING)) {
-                    rb_raise(rb_eArgError, "unexpected realpath %"PRIxVALUE
-                             "(%x), path=%+"PRIsVALUE,
-                             realpath, TYPE(realpath), path);
-                }
-                realpath = rb_fstring(realpath);
-            }
-        }
-        else {
-            rb_raise(rb_eRuntimeError, "unexpected path object");
-        }
-        rb_iseq_pathobj_set(iseq, path, realpath);
-    }
-
     RB_OBJ_WRITE(iseq, &load_body->location.base_label,    ibf_load_location_str(load, location_base_label_index));
     RB_OBJ_WRITE(iseq, &load_body->location.label,         ibf_load_location_str(load, location_label_index));
 
@@ -12259,6 +12266,9 @@ ibf_load_iseq_each(struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t offset)
     load->current_buffer = saved_buffer;
 #endif
     verify_call_cache(iseq);
+
+    RB_GC_GUARD(dummy_frame);
+    rb_vm_pop_frame(ec);
 }
 
 struct ibf_dump_iseq_list_arg
diff --git a/internal/vm.h b/internal/vm.h
index c6c6b2ccc2..9480406a2b 100644
--- a/internal/vm.h
+++ b/internal/vm.h
@@ -41,6 +41,7 @@ enum method_missing_reason {
 
 /* vm_insnhelper.h */
 rb_serial_t rb_next_class_serial(void);
+VALUE rb_vm_push_frame_fname(struct rb_execution_context_struct *ec, VALUE fname);
 
 /* vm.c */
 VALUE rb_obj_is_thread(VALUE obj);
diff --git a/iseq.c b/iseq.c
index 5b1d9de106..1d0c96dba1 100644
--- a/iseq.c
+++ b/iseq.c
@@ -1435,6 +1435,9 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
 
     f = rb_file_open_str(file, "r");
 
+    rb_execution_context_t *ec = GET_EC();
+    VALUE v = rb_vm_push_frame_fname(ec, file);
+
     parser = rb_parser_new();
     rb_parser_set_context(parser, NULL, FALSE);
     ast = (rb_ast_t *)rb_parser_load_file(parser, file);
@@ -1453,6 +1456,9 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
                                          rb_realpath_internal(Qnil, file, 1),
                                          1, NULL, 0, ISEQ_TYPE_TOP, &option));
     rb_ast_dispose(ast);
+
+    rb_vm_pop_frame(ec);
+    RB_GC_GUARD(v);
     return ret;
 }
 
diff --git a/load.c b/load.c
index 62b9b2d9bd..0e75c1ab83 100644
--- a/load.c
+++ b/load.c
@@ -681,6 +681,8 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname)
     const rb_iseq_t *iseq = rb_iseq_load_iseq(fname);
 
     if (!iseq) {
+        rb_execution_context_t *ec = GET_EC();
+        VALUE v = rb_vm_push_frame_fname(ec, fname);
         rb_ast_t *ast;
         VALUE parser = rb_parser_new();
         rb_parser_set_context(parser, NULL, FALSE);
@@ -688,6 +690,8 @@ load_iseq_eval(rb_execution_context_t *ec, VALUE fname)
         iseq = rb_iseq_new_top(&ast->body, rb_fstring_lit("<top (required)>"),
                                fname, rb_realpath_internal(Qnil, fname, 1), NULL);
         rb_ast_dispose(ast);
+        rb_vm_pop_frame(ec);
+        RB_GC_GUARD(v);
     }
     rb_exec_event_hook_script_compiled(ec, iseq, Qnil);
     rb_iseq_eval(iseq);
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 3b90319858..5994fadeff 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -726,6 +726,65 @@ class TestObjSpace < Test::Unit::TestCase
     end
   end
 
+  def load_allocation_path_helper method, to_binary: false
+
+    Tempfile.create(["test_ruby_load_allocation_path", ".rb"]) do |t|
+      path = t.path
+      str = "#{Time.now.to_f.to_s}_#{rand.to_s}"
+      t.puts script = <<~RUBY
+        # frozen-string-literal: true
+        return if Time.now.to_i > 0
+        $gv = 'rnd-#{str}' # unreachable, but the string literal was written
+      RUBY
+
+      t.close
+
+      if to_binary
+        bin = RubyVM::InstructionSequence.compile_file(t.path).to_binary
+        bt = Tempfile.new(['test_ruby_load_allocation_path', '.yarb'], mode: File::Constants::WRONLY)
+        bt.write bin
+        bt.close
+
+        path = bt.path
+      end
+
+      assert_separately(%w[-robjspace -rtempfile], <<~RUBY)
+        GC.disable
+        path = "#{path}"
+        ObjectSpace.trace_object_allocations do
+          #{method}
+        end
+
+        n = 0
+        dump = ObjectSpace.dump_all(output: :string)
+        dump.each_line do |line|
+          if /"value":"rnd-#{str}"/ =~ line && /"frozen":true/ =~ line
+            assert Regexp.new('"file":"' + "#{path}") =~ line
+            assert Regexp.new('"line":') !~ line
+            n += 1
+          end
+        rescue ArgumentError
+        end
+
+        assert_equal(1, n)
+      RUBY
+    ensure
+      bt.unlink if bt
+    end
+  end
+
+  def test_load_allocation_path_load
+    load_allocation_path_helper 'load(path)'
+  end
+
+  def test_load_allocation_path_compile_file
+    load_allocation_path_helper 'RubyVM::InstructionSequence.compile_file(path)'
+  end
+
+  def test_load_allocation_path_load_from_binary
+    # load_allocation_path_helper 'iseq = RubyVM::InstructionSequence.load_from_binary(File.binread(path))', to_binary: true
+  end
+
   def test_utf8_method_names
     name = "utf8_❨╯°□°❩╯︵┻━┻"
     obj = ObjectSpace.trace_object_allocations do
diff --git a/vm.c b/vm.c
index ed38192670..15dae878df 100644
--- a/vm.c
+++ b/vm.c
@@ -3149,19 +3149,22 @@ rb_execution_context_mark(const rb_execution_context_t *ec)
         while (cfp != limit_cfp) {
             const VALUE *ep = cfp->ep;
             VM_ASSERT(!!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED) == vm_ep_in_heap_p_(ec, ep));
-            rb_gc_mark_movable(cfp->self);
-            rb_gc_mark_movable((VALUE)cfp->iseq);
-            rb_gc_mark_movable((VALUE)cfp->block_code);
 
-            if (!VM_ENV_LOCAL_P(ep)) {
-                const VALUE *prev_ep = VM_ENV_PREV_EP(ep);
-                if (VM_ENV_FLAGS(prev_ep, VM_ENV_FLAG_ESCAPED)) {
-                    rb_gc_mark_movable(prev_ep[VM_ENV_DATA_INDEX_ENV]);
-                }
+            if (VM_FRAME_TYPE(cfp) != VM_FRAME_MAGIC_DUMMY) {
+                rb_gc_mark_movable(cfp->self);
+                rb_gc_mark_movable((VALUE)cfp->iseq);
+                rb_gc_mark_movable((VALUE)cfp->block_code);
 
-                if (VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)) {
-                    rb_gc_mark_movable(ep[VM_ENV_DATA_INDEX_ENV]);
-                    rb_gc_mark(ep[VM_ENV_DATA_INDEX_ME_CREF]);
+                if (!VM_ENV_LOCAL_P(ep)) {
+                    const VALUE *prev_ep = VM_ENV_PREV_EP(ep);
+                    if (VM_ENV_FLAGS(prev_ep, VM_ENV_FLAG_ESCAPED)) {
+                        rb_gc_mark_movable(prev_ep[VM_ENV_DATA_INDEX_ENV]);
+                    }
+
+                    if (VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)) {
+                        rb_gc_mark_movable(ep[VM_ENV_DATA_INDEX_ENV]);
+                        rb_gc_mark(ep[VM_ENV_DATA_INDEX_ME_CREF]);
+                    }
                 }
             }
 
diff --git a/vm_backtrace.c b/vm_backtrace.c
index 3aae59bf68..12157ef707 100644
--- a/vm_backtrace.c
+++ b/vm_backtrace.c
@@ -37,10 +37,8 @@ inline static int
 calc_pos(const rb_iseq_t *iseq, const VALUE *pc, int *lineno, int *node_id)
 {
     VM_ASSERT(iseq);
-    VM_ASSERT(ISEQ_BODY(iseq));
-    VM_ASSERT(ISEQ_BODY(iseq)->iseq_encoded);
-    VM_ASSERT(ISEQ_BODY(iseq)->iseq_size);
-    if (! pc) {
+
+    if (pc == NULL) {
         if (ISEQ_BODY(iseq)->type == ISEQ_TYPE_TOP) {
             VM_ASSERT(! ISEQ_BODY(iseq)->local_table);
             VM_ASSERT(! ISEQ_BODY(iseq)->local_table_size);
@@ -53,6 +51,10 @@ calc_pos(const rb_iseq_t *iseq, const VALUE *pc, int *lineno, int *node_id)
         return 1;
     }
     else {
+        VM_ASSERT(ISEQ_BODY(iseq));
+        VM_ASSERT(ISEQ_BODY(iseq)->iseq_encoded);
+        VM_ASSERT(ISEQ_BODY(iseq)->iseq_size);
+
         ptrdiff_t n = pc - ISEQ_BODY(iseq)->iseq_encoded;
         VM_ASSERT(n <= ISEQ_BODY(iseq)->iseq_size);
         VM_ASSERT(n >= 0);
@@ -1557,7 +1559,7 @@ rb_profile_frames(int start, int limit, VALUE *buff, int *lines)
     end_cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp);
 
     for (i=0; i<limit && cfp != end_cfp;) {
-        if (VM_FRAME_RUBYFRAME_P(cfp)) {
+        if (VM_FRAME_RUBYFRAME_P(cfp) && cfp->pc != 0) {
             if (start > 0) {
                 start--;
                 continue;
diff --git a/vm_core.h b/vm_core.h
index 3bd907b227..bdfff95bd5 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -1371,7 +1371,8 @@ static inline int
 VM_FRAME_CFRAME_P(const rb_control_frame_t *cfp)
 {
     int cframe_p = VM_ENV_FLAGS(cfp->ep, VM_FRAME_FLAG_CFRAME) != 0;
-    VM_ASSERT(RUBY_VM_NORMAL_ISEQ_P(cfp->iseq) != cframe_p);
+    VM_ASSERT(RUBY_VM_NORMAL_ISEQ_P(cfp->iseq) != cframe_p ||
+              (VM_FRAME_TYPE(cfp) & VM_FRAME_MAGIC_MASK) == VM_FRAME_MAGIC_DUMMY);
     return cframe_p;
 }
 
diff --git a/vm_dump.c b/vm_dump.c
index 607488388a..93179fedc8 100644
--- a/vm_dump.c
+++ b/vm_dump.c
@@ -89,6 +89,9 @@ control_frame_dump(const rb_execution_context_t *ec, const rb_control_frame_t *c
       case VM_FRAME_MAGIC_RESCUE:
         magic = "RESCUE";
         break;
+      case VM_FRAME_MAGIC_DUMMY:
+        magic = "DUMMY";
+        break;
       case 0:
         magic = "------";
         break;
@@ -117,12 +120,17 @@ control_frame_dump(const rb_execution_context_t *ec, const rb_control_frame_t *c
             line = -1;
         }
         else {
-            iseq = cfp->iseq;
-            pc = cfp->pc - ISEQ_BODY(iseq)->iseq_encoded;
-            iseq_name = RSTRING_PTR(ISEQ_BODY(iseq)->location.label);
-            line = rb_vm_get_sourceline(cfp);
-            if (line) {
-                snprintf(posbuf, MAX_POSBUF, "%s:%d", RSTRING_PTR(rb_iseq_path(iseq)), line);
+            if (cfp->pc) {
+                iseq = cfp->iseq;
+                pc = cfp->pc - ISEQ_BODY(iseq)->iseq_encoded;
+                iseq_name = RSTRING_PTR(ISEQ_BODY(iseq)->location.label);
+                line = rb_vm_get_sourceline(cfp);
+                if (line) {
+                    snprintf(posbuf, MAX_POSBUF, "%s:%d", RSTRING_PTR(rb_iseq_path(iseq)), line);
+                }
+            }
+            else {
+                iseq_name = "<dummy_frame>";
             }
         }
     }
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 8b19c6c10d..2434c5c3c6 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -201,7 +201,9 @@ vm_check_frame_detail(VALUE type, int req_block, int req_me, int req_cref, VALUE
 
     if ((type & VM_FRAME_MAGIC_MASK) == VM_FRAME_MAGIC_DUMMY) {
         VM_ASSERT(iseq == NULL ||
-                  RUBY_VM_NORMAL_ISEQ_P(iseq) /* argument error. it should be fixed */);
+                  RBASIC_CLASS((VALUE)iseq) == 0 || // dummy frame for loading
+                  RUBY_VM_NORMAL_ISEQ_P(iseq) //argument error
+                  );
     }
     else {
         VM_ASSERT(is_cframe == !RUBY_VM_NORMAL_ISEQ_P(iseq));
@@ -429,6 +431,34 @@ rb_vm_pop_frame(rb_execution_context_t *ec)
     vm_pop_frame(ec, ec->cfp, ec->cfp->ep);
 }
 
+// it pushes pseudo-frame with fname filename.
+VALUE
+rb_vm_push_frame_fname(rb_execution_context_t *ec, VALUE fname)
+{
+    VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer();
+    void *ptr = ruby_xcalloc(sizeof(struct rb_iseq_constant_body) + sizeof(struct rb_iseq_struct), 1);
+    rb_imemo_tmpbuf_set_ptr(tmpbuf, ptr);
+
+    struct rb_iseq_struct *dmy_iseq = (struct rb_iseq_struct *)ptr;
+    struct rb_iseq_constant_body *dmy_body = (struct rb_iseq_constant_body *)&dmy_iseq[1];
+    dmy_iseq->body = dmy_body;
+    dmy_body->type = ISEQ_TYPE_TOP;
+    dmy_body->location.pathobj = fname;
+
+    vm_push_frame(ec,
+                  dmy_iseq, //const rb_iseq_t *iseq,
+                  VM_FRAME_MAGIC_DUMMY | VM_ENV_FLAG_LOCAL | VM_FRAME_FLAG_FINISH, // VALUE type,
+                  ec->cfp->self, // VALUE self,
+                  VM_BLOCK_HANDLER_NONE, // VALUE specval,
+                  Qfalse, // VALUE cref_or_me,
+                  NULL, // const VALUE *pc,
+                  ec->cfp->sp, // VALUE *sp,
+                  0, // int local_size,
+                  0); // int stack_max
+
+    return tmpbuf;
+}
+
 /* method dispatch */
 static inline VALUE
 rb_arity_error_new(int argc, int min, int max)