From 529fc204af84f825f98f83c34b004acbaa802615 Mon Sep 17 00:00:00 2001 From: Jean byroot Boussier Date: Thu, 30 Sep 2021 16:50:31 +0200 Subject: [PATCH] marshal.c: don't call the proc with partially initialized objects. (#4866) For cyclic objects, it requires to keep a st_table of the partially initialized objects. --- marshal.c | 75 +++++++++++++++------------ spec/ruby/core/marshal/shared/load.rb | 75 +++++++++++++++------------ test/ruby/test_marshal.rb | 12 +++++ 3 files changed, 97 insertions(+), 65 deletions(-) diff --git a/marshal.c b/marshal.c index f8110da5c6..1643d84d72 100644 --- a/marshal.c +++ b/marshal.c @@ -1156,6 +1156,7 @@ struct load_arg { long offset; st_table *symbols; st_table *data; + st_table *partial_objects; VALUE proc; st_table *compat_tbl; }; @@ -1182,6 +1183,7 @@ mark_load_arg(void *ptr) return; rb_mark_tbl(p->symbols); rb_mark_tbl(p->data); + rb_mark_tbl(p->partial_objects); rb_mark_hash(p->compat_tbl); } @@ -1535,6 +1537,7 @@ r_entry0(VALUE v, st_index_t num, struct load_arg *arg) st_lookup(arg->compat_tbl, v, &real_obj); } st_insert(arg->data, num, real_obj); + st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue); return v; } @@ -1565,10 +1568,15 @@ r_post_proc(VALUE v, struct load_arg *arg) } static VALUE -r_leave(VALUE v, struct load_arg *arg) +r_leave(VALUE v, struct load_arg *arg, bool partial) { v = r_fixup_compat(v, arg); - v = r_post_proc(v, arg); + if (!partial) { + st_data_t data; + st_data_t key = (st_data_t)v; + st_delete(arg->partial_objects, &key, &data); + v = r_post_proc(v, arg); + } return v; } @@ -1695,7 +1703,7 @@ append_extmod(VALUE obj, VALUE extmod) } while (0) static VALUE -r_object0(struct load_arg *arg, int *ivp, VALUE extmod) +r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod) { VALUE v = Qnil; int type = r_byte(arg); @@ -1709,18 +1717,18 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_raise(rb_eArgError, "dump format error (unlinked)"); } v = (VALUE)link; - v = r_post_proc(v, arg); + if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) { + v = r_post_proc(v, arg); + } break; case TYPE_IVAR: { int ivar = TRUE; - v = r_object0(arg, &ivar, extmod); + v = r_object0(arg, true, &ivar, extmod); if (ivar) r_ivar(v, NULL, arg); - if (RB_TYPE_P(v, T_STRING)) { - v = r_leave(v, arg); - } + v = r_leave(v, arg, partial); } break; @@ -1733,7 +1741,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (RB_TYPE_P(m, T_CLASS)) { /* prepended */ VALUE c; - v = r_object0(arg, 0, Qnil); + v = r_object0(arg, true, 0, Qnil); c = CLASS_OF(v); if (c != m || FL_TEST(c, FL_SINGLETON)) { rb_raise(rb_eArgError, @@ -1750,7 +1758,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) must_be_module(m, path); rb_ary_push(extmod, m); - v = r_object0(arg, 0, extmod); + v = r_object0(arg, true, 0, extmod); while (RARRAY_LEN(extmod) > 0) { m = rb_ary_pop(extmod); rb_extend_object(v, m); @@ -1766,7 +1774,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (FL_TEST(c, FL_SINGLETON)) { rb_raise(rb_eTypeError, "singleton can't be loaded"); } - v = r_object0(arg, 0, extmod); + v = r_object0(arg, partial, 0, extmod); if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) { goto format_error; } @@ -1784,17 +1792,17 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) case TYPE_NIL: v = Qnil; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_TRUE: v = Qtrue; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FALSE: v = Qfalse; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FIXNUM: @@ -1802,7 +1810,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) long i = r_long(arg); v = LONG2FIX(i); } - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FLOAT: @@ -1827,7 +1835,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } v = DBL2NUM(d); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, false); } break; @@ -1844,15 +1852,13 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); rb_str_resize(data, 0L); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, false); } break; case TYPE_STRING: v = r_entry(r_string(arg), arg); - if (!ivp) { - v = r_leave(v, arg); - } + v = r_leave(v, arg, partial); break; case TYPE_REGEXP: @@ -1887,7 +1893,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_str_set_len(str, dst - ptr); } v = r_entry0(rb_reg_new_str(str, options), idx, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -1902,7 +1908,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_ary_push(v, r_object(arg)); arg->readable--; } - v = r_leave(v, arg); + v = r_leave(v, arg, partial); arg->readable++; } break; @@ -1925,7 +1931,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (type == TYPE_HASH_DEF) { RHASH_SET_IFNONE(v, r_object(arg)); } - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -1977,7 +1983,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } } rb_struct_initialize(v, values); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); arg->readable += 2; } break; @@ -2004,7 +2010,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) marshal_compat_t *compat = (marshal_compat_t*)d; v = compat->loader(klass, v); } - v = r_post_proc(v, arg); + if (!partial) v = r_post_proc(v, arg); } break; @@ -2046,7 +2052,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } v = r_entry0(v, idx, arg); r_ivar(v, NULL, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2067,9 +2073,9 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) "class %"PRIsVALUE" needs to have instance method `_load_data'", name); } - r = r_object0(arg, 0, extmod); + r = r_object0(arg, partial, 0, extmod); load_funcall(arg, v, s_load_data, 1, &r); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2080,7 +2086,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = rb_path_to_class(str); prohibit_ivar("class/module", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2091,7 +2097,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = path2class(str); prohibit_ivar("class", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2102,7 +2108,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = path2module(str); prohibit_ivar("module", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2115,7 +2121,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = r_symreal(arg, 0); } v = rb_str_intern(v); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); break; case TYPE_SYMLINK: @@ -2137,7 +2143,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) static VALUE r_object(struct load_arg *arg) { - return r_object0(arg, 0, Qnil); + return r_object0(arg, false, 0, Qnil); } static void @@ -2155,6 +2161,8 @@ clear_load_arg(struct load_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + st_free_table(arg->partial_objects); + arg->partial_objects = 0; if (arg->compat_tbl) { st_free_table(arg->compat_tbl); arg->compat_tbl = 0; @@ -2209,6 +2217,7 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc) arg->offset = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->partial_objects = rb_init_identtable(); arg->compat_tbl = 0; arg->proc = 0; arg->readable = 0; diff --git a/spec/ruby/core/marshal/shared/load.rb b/spec/ruby/core/marshal/shared/load.rb index 18f11d70d6..37c29d7cc6 100644 --- a/spec/ruby/core/marshal/shared/load.rb +++ b/spec/ruby/core/marshal/shared/load.rb @@ -42,18 +42,17 @@ describe :marshal_load, shared: true do Marshal.send(@method, Marshal.dump([1,2]), proc { [3,4] }).should == [3,4] end - it "calls the proc for recursively visited data" do - a = [1] - a << a - ret = [] - Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg; arg }) - ret.first.should == 1 - ret[1].should == [1,a] - ret[2].should == a - ret.size.should == 3 - end - ruby_bug "#18141", ""..."3.1" do + it "calls the proc for recursively visited data" do + a = [1] + a << a + ret = [] + Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg.inspect; arg }) + ret[0].should == 1.inspect + ret[1].should == a.inspect + ret.size.should == 2 + end + it "loads an Array with proc" do arr = [] s = 'hi' @@ -67,13 +66,14 @@ describe :marshal_load, shared: true do a.instance_variable_set(:@two, 2) obj = [s, 10, s, s, st, a] obj.instance_variable_set(:@zoo, 'ant') - proc = Proc.new { |o| arr << o; o} + proc = Proc.new { |o| arr << o.dup; o} Marshal.send(@method, "\x04\bI[\vI\"\ahi\a:\x06EF:\t@fooi\ni\x0F@\x06@\x06IS:\x14Struct::Brittle\x06:\x06af\x060\x06:\n@clueI\"\tnone\x06;\x00FI[\b;\b:\x06b:\x06c\x06:\t@twoi\a\x06:\t@zooI\"\bant\x06;\x00F", proc) - arr.should == [false, 5, "hi", 10, "hi", "hi", 0.0, st, false, "none", - :b, :c, a, 2, ["hi", 10, "hi", "hi", st, [:a, :b, :c]], false, "ant"] - + arr.should == [ + false, 5, "hi", 10, "hi", "hi", 0.0, false, "none", st, + :b, :c, 2, a, false, "ant", ["hi", 10, "hi", "hi", st, [:a, :b, :c]], + ] Struct.send(:remove_const, :Brittle) end end @@ -139,28 +139,39 @@ describe :marshal_load, shared: true do end end - it "loads an array containing objects having _dump method, and with proc" do - arr = [] - myproc = Proc.new { |o| arr << o; o } - o1 = UserDefined.new; - o2 = UserDefinedWithIvar.new - obj = [o1, o2, o1, o2] + ruby_bug "#18141", ""..."3.1" do + it "loads an array containing objects having _dump method, and with proc" do + arr = [] + myproc = Proc.new { |o| arr << o.dup; o } + o1 = UserDefined.new; + o2 = UserDefinedWithIvar.new + obj = [o1, o2, o1, o2] - Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc) + Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc) - arr.should == [o1, o2, o1, o2, obj] - end + arr[0].should == o1 + arr[1].should == o2 + arr[2].should == obj + arr.size.should == 3 + end - it "loads an array containing objects having marshal_dump method, and with proc" do - arr = [] - proc = Proc.new { |o| arr << o; o } - o1 = UserMarshal.new - o2 = UserMarshalWithIvar.new - obj = [o1, o2, o1, o2] + it "loads an array containing objects having marshal_dump method, and with proc" do + arr = [] + proc = Proc.new { |o| arr << o.dup; o } + o1 = UserMarshal.new + o2 = UserMarshalWithIvar.new - Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc) + Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc) - arr.should == ['stuff', o1, 'my data', ['my data'], o2, o1, o2, obj] + arr[0].should == 'stuff' + arr[1].should == o1 + arr[2].should == 'my data' + arr[3].should == ['my data'] + arr[4].should == o2 + arr[5].should == [o1, o2, o1, o2] + + arr.size.should == 6 + end end it "assigns classes to nested subclasses of Array correctly" do diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb index f7f273af42..131ebd5fa6 100644 --- a/test/ruby/test_marshal.rb +++ b/test/ruby/test_marshal.rb @@ -858,4 +858,16 @@ class TestMarshal < Test::Unit::TestCase assert_nil(e2.backtrace_locations) # temporal end end + + class TestMarshalFreezeProc < Test::Unit::TestCase + include MarshalTestLib + + def encode(o) + Marshal.dump(o) + end + + def decode(s) + Marshal.load(s, :freeze.to_proc) + end + end end