diff --git a/marshal.c b/marshal.c index 5f0ac7e7bb..ed096f52fe 100644 --- a/marshal.c +++ b/marshal.c @@ -1137,6 +1137,7 @@ struct load_arg { long offset; st_table *symbols; st_table *data; + st_table *partial_objects; VALUE proc; st_table *compat_tbl; }; @@ -1163,6 +1164,7 @@ mark_load_arg(void *ptr) return; rb_mark_tbl(p->symbols); rb_mark_tbl(p->data); + rb_mark_tbl(p->partial_objects); rb_mark_hash(p->compat_tbl); } @@ -1516,6 +1518,7 @@ r_entry0(VALUE v, st_index_t num, struct load_arg *arg) st_lookup(arg->compat_tbl, v, &real_obj); } st_insert(arg->data, num, real_obj); + st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue); return v; } @@ -1546,10 +1549,15 @@ r_post_proc(VALUE v, struct load_arg *arg) } static VALUE -r_leave(VALUE v, struct load_arg *arg) +r_leave(VALUE v, struct load_arg *arg, bool partial) { v = r_fixup_compat(v, arg); - v = r_post_proc(v, arg); + if (!partial) { + st_data_t data; + st_data_t key = (st_data_t)v; + st_delete(arg->partial_objects, &key, &data); + v = r_post_proc(v, arg); + } return v; } @@ -1676,7 +1684,7 @@ append_extmod(VALUE obj, VALUE extmod) } while (0) static VALUE -r_object0(struct load_arg *arg, int *ivp, VALUE extmod) +r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod) { VALUE v = Qnil; int type = r_byte(arg); @@ -1690,15 +1698,18 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_raise(rb_eArgError, "dump format error (unlinked)"); } v = (VALUE)link; - v = r_post_proc(v, arg); + if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) { + v = r_post_proc(v, arg); + } break; case TYPE_IVAR: { int ivar = TRUE; - v = r_object0(arg, &ivar, extmod); + v = r_object0(arg, true, &ivar, extmod); if (ivar) r_ivar(v, NULL, arg); + v = r_leave(v, arg, partial); } break; @@ -1711,7 +1722,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (RB_TYPE_P(m, T_CLASS)) { /* prepended */ VALUE c; - v = r_object0(arg, 0, Qnil); + v = r_object0(arg, true, 0, Qnil); c = CLASS_OF(v); if (c != m || FL_TEST(c, FL_SINGLETON)) { rb_raise(rb_eArgError, @@ -1728,7 +1739,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) must_be_module(m, path); rb_ary_push(extmod, m); - v = r_object0(arg, 0, extmod); + v = r_object0(arg, true, 0, extmod); while (RARRAY_LEN(extmod) > 0) { m = rb_ary_pop(extmod); rb_extend_object(v, m); @@ -1744,7 +1755,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (FL_TEST(c, FL_SINGLETON)) { rb_raise(rb_eTypeError, "singleton can't be loaded"); } - v = r_object0(arg, 0, extmod); + v = r_object0(arg, partial, 0, extmod); if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) { goto format_error; } @@ -1762,17 +1773,17 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) case TYPE_NIL: v = Qnil; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_TRUE: v = Qtrue; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FALSE: v = Qfalse; - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FIXNUM: @@ -1780,7 +1791,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) long i = r_long(arg); v = LONG2FIX(i); } - v = r_leave(v, arg); + v = r_leave(v, arg, false); break; case TYPE_FLOAT: @@ -1805,7 +1816,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } v = DBL2NUM(d); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, false); } break; @@ -1822,13 +1833,13 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); rb_str_resize(data, 0L); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, false); } break; case TYPE_STRING: v = r_entry(r_string(arg), arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); break; case TYPE_REGEXP: @@ -1863,7 +1874,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_str_set_len(str, dst - ptr); } v = r_entry0(rb_reg_new_str(str, options), idx, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -1878,7 +1889,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) rb_ary_push(v, r_object(arg)); arg->readable--; } - v = r_leave(v, arg); + v = r_leave(v, arg, partial); arg->readable++; } break; @@ -1901,7 +1912,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) if (type == TYPE_HASH_DEF) { RHASH_SET_IFNONE(v, r_object(arg)); } - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -1953,7 +1964,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } } rb_struct_initialize(v, values); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); arg->readable += 2; } break; @@ -1980,7 +1991,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) marshal_compat_t *compat = (marshal_compat_t*)d; v = compat->loader(klass, v); } - v = r_post_proc(v, arg); + if (!partial) v = r_post_proc(v, arg); } break; @@ -2022,7 +2033,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) } v = r_entry0(v, idx, arg); r_ivar(v, NULL, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2043,9 +2054,9 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) "class %"PRIsVALUE" needs to have instance method `_load_data'", name); } - r = r_object0(arg, 0, extmod); + r = r_object0(arg, partial, 0, extmod); load_funcall(arg, v, s_load_data, 1, &r); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2056,7 +2067,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = rb_path_to_class(str); prohibit_ivar("class/module", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2067,7 +2078,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = path2class(str); prohibit_ivar("class", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2078,7 +2089,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = path2module(str); prohibit_ivar("module", str); v = r_entry(v, arg); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); } break; @@ -2091,7 +2102,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) v = r_symreal(arg, 0); } v = rb_str_intern(v); - v = r_leave(v, arg); + v = r_leave(v, arg, partial); break; case TYPE_SYMLINK: @@ -2113,7 +2124,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod) static VALUE r_object(struct load_arg *arg) { - return r_object0(arg, 0, Qnil); + return r_object0(arg, false, 0, Qnil); } static void @@ -2131,6 +2142,8 @@ clear_load_arg(struct load_arg *arg) arg->symbols = 0; st_free_table(arg->data); arg->data = 0; + st_free_table(arg->partial_objects); + arg->partial_objects = 0; if (arg->compat_tbl) { st_free_table(arg->compat_tbl); arg->compat_tbl = 0; @@ -2185,6 +2198,7 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc) arg->offset = 0; arg->symbols = st_init_numtable(); arg->data = rb_init_identtable(); + arg->partial_objects = rb_init_identtable(); arg->compat_tbl = 0; arg->proc = 0; arg->readable = 0; diff --git a/spec/ruby/core/marshal/shared/load.rb b/spec/ruby/core/marshal/shared/load.rb index 9d42b9df4a..97023c1640 100644 --- a/spec/ruby/core/marshal/shared/load.rb +++ b/spec/ruby/core/marshal/shared/load.rb @@ -20,42 +20,62 @@ describe :marshal_load, shared: true do end describe "when called with a proc" do + ruby_bug "#18141", ""..."3.1" do + it "call the proc with fully initialized strings" do + utf8_string = "foo".encode(Encoding::UTF_8) + Marshal.send(@method, Marshal.dump(utf8_string), proc { |arg| + if arg.is_a?(String) + arg.should == utf8_string + arg.encoding.should == Encoding::UTF_8 + end + arg + }) + end + + it "no longer mutate the object after it was passed to the proc" do + string = Marshal.load(Marshal.dump("foo"), :freeze.to_proc) + string.should.frozen? + end + end + it "returns the value of the proc" do Marshal.send(@method, Marshal.dump([1,2]), proc { [3,4] }).should == [3,4] end - it "calls the proc for recursively visited data" do - a = [1] - a << a - ret = [] - Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg; arg }) - ret.first.should == 1 - ret[1].should == [1,a] - ret[2].should == a - ret.size.should == 3 - end + ruby_bug "#18141", ""..."3.1" do + it "calls the proc for recursively visited data" do + a = [1] + a << a + ret = [] + Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg.inspect; arg }) + ret[0].should == 1.inspect + ret[1].should == a.inspect + ret.size.should == 2 + end - it "loads an Array with proc" do - arr = [] - s = 'hi' - s.instance_variable_set(:@foo, 5) - st = Struct.new("Brittle", :a).new - st.instance_variable_set(:@clue, 'none') - st.a = 0.0 - h = Hash.new('def') - h['nine'] = 9 - a = [:a, :b, :c] - a.instance_variable_set(:@two, 2) - obj = [s, 10, s, s, st, a] - obj.instance_variable_set(:@zoo, 'ant') - proc = Proc.new { |o| arr << o; o} + it "loads an Array with proc" do + arr = [] + s = 'hi' + s.instance_variable_set(:@foo, 5) + st = Struct.new("Brittle", :a).new + st.instance_variable_set(:@clue, 'none') + st.a = 0.0 + h = Hash.new('def') + h['nine'] = 9 + a = [:a, :b, :c] + a.instance_variable_set(:@two, 2) + obj = [s, 10, s, s, st, a] + obj.instance_variable_set(:@zoo, 'ant') + proc = Proc.new { |o| arr << o.dup; o} - Marshal.send(@method, "\x04\bI[\vI\"\ahi\a:\x06EF:\t@fooi\ni\x0F@\x06@\x06IS:\x14Struct::Brittle\x06:\x06af\x060\x06:\n@clueI\"\tnone\x06;\x00FI[\b;\b:\x06b:\x06c\x06:\t@twoi\a\x06:\t@zooI\"\bant\x06;\x00F", proc) + Marshal.send(@method, "\x04\bI[\vI\"\ahi\a:\x06EF:\t@fooi\ni\x0F@\x06@\x06IS:\x14Struct::Brittle\x06:\x06af\x060\x06:\n@clueI\"\tnone\x06;\x00FI[\b;\b:\x06b:\x06c\x06:\t@twoi\a\x06:\t@zooI\"\bant\x06;\x00F", proc) - arr.should == ["hi", false, 5, 10, "hi", "hi", 0.0, st, "none", false, - :b, :c, a, 2, ["hi", 10, "hi", "hi", st, [:a, :b, :c]], "ant", false] - - Struct.send(:remove_const, :Brittle) + arr.should == [ + false, 5, "hi", 10, "hi", "hi", 0.0, false, "none", st, + :b, :c, 2, a, false, "ant", ["hi", 10, "hi", "hi", st, [:a, :b, :c]], + ] + Struct.send(:remove_const, :Brittle) + end end end @@ -119,28 +139,39 @@ describe :marshal_load, shared: true do end end - it "loads an array containing objects having _dump method, and with proc" do - arr = [] - myproc = Proc.new { |o| arr << o; o } - o1 = UserDefined.new; - o2 = UserDefinedWithIvar.new - obj = [o1, o2, o1, o2] + ruby_bug "#18141", ""..."3.1" do + it "loads an array containing objects having _dump method, and with proc" do + arr = [] + myproc = Proc.new { |o| arr << o.dup; o } + o1 = UserDefined.new; + o2 = UserDefinedWithIvar.new + obj = [o1, o2, o1, o2] - Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc) + Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc) - arr.should == [o1, o2, o1, o2, obj] - end + arr[0].should == o1 + arr[1].should == o2 + arr[2].should == obj + arr.size.should == 3 + end - it "loads an array containing objects having marshal_dump method, and with proc" do - arr = [] - proc = Proc.new { |o| arr << o; o } - o1 = UserMarshal.new - o2 = UserMarshalWithIvar.new - obj = [o1, o2, o1, o2] + it "loads an array containing objects having marshal_dump method, and with proc" do + arr = [] + proc = Proc.new { |o| arr << o.dup; o } + o1 = UserMarshal.new + o2 = UserMarshalWithIvar.new - Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc) + Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc) - arr.should == ['stuff', o1, 'my data', ['my data'], o2, o1, o2, obj] + arr[0].should == 'stuff' + arr[1].should == o1 + arr[2].should == 'my data' + arr[3].should == ['my data'] + arr[4].should == o2 + arr[5].should == [o1, o2, o1, o2] + + arr.size.should == 6 + end end it "assigns classes to nested subclasses of Array correctly" do diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb index 43bdadccb0..ae6e8708ee 100644 --- a/test/ruby/test_marshal.rb +++ b/test/ruby/test_marshal.rb @@ -672,6 +672,23 @@ class TestMarshal < Test::Unit::TestCase assert_equal(['X', 'X'], Marshal.load(Marshal.dump(obj), ->(v) { v == str ? v.upcase : v })) end + def test_marshal_proc_string_encoding + string = "foo" + payload = Marshal.dump(string) + Marshal.load(payload, ->(v) { + if v.is_a?(String) + assert_equal(string, v) + assert_equal(string.encoding, v.encoding) + end + v + }) + end + + def test_marshal_proc_freeze + object = { foo: [42, "bar"] } + assert_equal object, Marshal.load(Marshal.dump(object), :freeze.to_proc) + end + def test_marshal_load_extended_class_crash assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}") begin; @@ -833,4 +850,16 @@ class TestMarshal < Test::Unit::TestCase assert_nil(e2.backtrace_locations) # temporal end end + + class TestMarshalFreezeProc < Test::Unit::TestCase + include MarshalTestLib + + def encode(o) + Marshal.dump(o) + end + + def decode(s) + Marshal.load(s, :freeze.to_proc) + end + end end diff --git a/version.h b/version.h index 99ed4691ce..862232ba9c 100644 --- a/version.h +++ b/version.h @@ -12,11 +12,11 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 3 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 140 +#define RUBY_PATCHLEVEL 141 #define RUBY_RELEASE_YEAR 2021 #define RUBY_RELEASE_MONTH 10 -#define RUBY_RELEASE_DAY 3 +#define RUBY_RELEASE_DAY 9 #include "ruby/version.h"