1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

merge revision(s) 89242279e61b023a81c58065c62a82de8829d0b3,529fc204af84f825f98f83c34b004acbaa802615: [Backport #18141]

Marshal.load: do not call the proc until strings have their encoding

	Ref: https://bugs.ruby-lang.org/issues/18141
	---
	 marshal.c                             |  7 +++-
	 spec/ruby/core/marshal/shared/load.rb | 62 +++++++++++++++++++++++------------
	 test/ruby/test_marshal.rb             | 17 ++++++++++
	 3 files changed, 64 insertions(+), 22 deletions(-)

	marshal.c: don't call the proc with partially initialized objects.
	 (#4866)

	For cyclic objects, it requires to keep a st_table of the partially
	initialized objects.
	---
	 marshal.c                             | 75 ++++++++++++++++++++---------------
	 spec/ruby/core/marshal/shared/load.rb | 75 ++++++++++++++++++++---------------
	 test/ruby/test_marshal.rb             | 12 ++++++
	 3 files changed, 97 insertions(+), 65 deletions(-)
This commit is contained in:
nagachika 2021-10-09 15:05:24 +09:00
parent f192e01233
commit fe9d33beb7
4 changed files with 150 additions and 76 deletions

View file

@ -1137,6 +1137,7 @@ struct load_arg {
long offset; long offset;
st_table *symbols; st_table *symbols;
st_table *data; st_table *data;
st_table *partial_objects;
VALUE proc; VALUE proc;
st_table *compat_tbl; st_table *compat_tbl;
}; };
@ -1163,6 +1164,7 @@ mark_load_arg(void *ptr)
return; return;
rb_mark_tbl(p->symbols); rb_mark_tbl(p->symbols);
rb_mark_tbl(p->data); rb_mark_tbl(p->data);
rb_mark_tbl(p->partial_objects);
rb_mark_hash(p->compat_tbl); rb_mark_hash(p->compat_tbl);
} }
@ -1516,6 +1518,7 @@ r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
st_lookup(arg->compat_tbl, v, &real_obj); st_lookup(arg->compat_tbl, v, &real_obj);
} }
st_insert(arg->data, num, real_obj); st_insert(arg->data, num, real_obj);
st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
return v; return v;
} }
@ -1546,10 +1549,15 @@ r_post_proc(VALUE v, struct load_arg *arg)
} }
static VALUE static VALUE
r_leave(VALUE v, struct load_arg *arg) r_leave(VALUE v, struct load_arg *arg, bool partial)
{ {
v = r_fixup_compat(v, arg); v = r_fixup_compat(v, arg);
if (!partial) {
st_data_t data;
st_data_t key = (st_data_t)v;
st_delete(arg->partial_objects, &key, &data);
v = r_post_proc(v, arg); v = r_post_proc(v, arg);
}
return v; return v;
} }
@ -1676,7 +1684,7 @@ append_extmod(VALUE obj, VALUE extmod)
} while (0) } while (0)
static VALUE static VALUE
r_object0(struct load_arg *arg, int *ivp, VALUE extmod) r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
{ {
VALUE v = Qnil; VALUE v = Qnil;
int type = r_byte(arg); int type = r_byte(arg);
@ -1690,15 +1698,18 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
rb_raise(rb_eArgError, "dump format error (unlinked)"); rb_raise(rb_eArgError, "dump format error (unlinked)");
} }
v = (VALUE)link; v = (VALUE)link;
if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
v = r_post_proc(v, arg); v = r_post_proc(v, arg);
}
break; break;
case TYPE_IVAR: case TYPE_IVAR:
{ {
int ivar = TRUE; int ivar = TRUE;
v = r_object0(arg, &ivar, extmod); v = r_object0(arg, true, &ivar, extmod);
if (ivar) r_ivar(v, NULL, arg); if (ivar) r_ivar(v, NULL, arg);
v = r_leave(v, arg, partial);
} }
break; break;
@ -1711,7 +1722,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
if (RB_TYPE_P(m, T_CLASS)) { /* prepended */ if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
VALUE c; VALUE c;
v = r_object0(arg, 0, Qnil); v = r_object0(arg, true, 0, Qnil);
c = CLASS_OF(v); c = CLASS_OF(v);
if (c != m || FL_TEST(c, FL_SINGLETON)) { if (c != m || FL_TEST(c, FL_SINGLETON)) {
rb_raise(rb_eArgError, rb_raise(rb_eArgError,
@ -1728,7 +1739,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
must_be_module(m, path); must_be_module(m, path);
rb_ary_push(extmod, m); rb_ary_push(extmod, m);
v = r_object0(arg, 0, extmod); v = r_object0(arg, true, 0, extmod);
while (RARRAY_LEN(extmod) > 0) { while (RARRAY_LEN(extmod) > 0) {
m = rb_ary_pop(extmod); m = rb_ary_pop(extmod);
rb_extend_object(v, m); rb_extend_object(v, m);
@ -1744,7 +1755,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
if (FL_TEST(c, FL_SINGLETON)) { if (FL_TEST(c, FL_SINGLETON)) {
rb_raise(rb_eTypeError, "singleton can't be loaded"); rb_raise(rb_eTypeError, "singleton can't be loaded");
} }
v = r_object0(arg, 0, extmod); v = r_object0(arg, partial, 0, extmod);
if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) { if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
goto format_error; goto format_error;
} }
@ -1762,17 +1773,17 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
case TYPE_NIL: case TYPE_NIL:
v = Qnil; v = Qnil;
v = r_leave(v, arg); v = r_leave(v, arg, false);
break; break;
case TYPE_TRUE: case TYPE_TRUE:
v = Qtrue; v = Qtrue;
v = r_leave(v, arg); v = r_leave(v, arg, false);
break; break;
case TYPE_FALSE: case TYPE_FALSE:
v = Qfalse; v = Qfalse;
v = r_leave(v, arg); v = r_leave(v, arg, false);
break; break;
case TYPE_FIXNUM: case TYPE_FIXNUM:
@ -1780,7 +1791,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
long i = r_long(arg); long i = r_long(arg);
v = LONG2FIX(i); v = LONG2FIX(i);
} }
v = r_leave(v, arg); v = r_leave(v, arg, false);
break; break;
case TYPE_FLOAT: case TYPE_FLOAT:
@ -1805,7 +1816,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
} }
v = DBL2NUM(d); v = DBL2NUM(d);
v = r_entry(v, arg); v = r_entry(v, arg);
v = r_leave(v, arg); v = r_leave(v, arg, false);
} }
break; break;
@ -1822,13 +1833,13 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0)); INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
rb_str_resize(data, 0L); rb_str_resize(data, 0L);
v = r_entry(v, arg); v = r_entry(v, arg);
v = r_leave(v, arg); v = r_leave(v, arg, false);
} }
break; break;
case TYPE_STRING: case TYPE_STRING:
v = r_entry(r_string(arg), arg); v = r_entry(r_string(arg), arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
break; break;
case TYPE_REGEXP: case TYPE_REGEXP:
@ -1863,7 +1874,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
rb_str_set_len(str, dst - ptr); rb_str_set_len(str, dst - ptr);
} }
v = r_entry0(rb_reg_new_str(str, options), idx, arg); v = r_entry0(rb_reg_new_str(str, options), idx, arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -1878,7 +1889,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
rb_ary_push(v, r_object(arg)); rb_ary_push(v, r_object(arg));
arg->readable--; arg->readable--;
} }
v = r_leave(v, arg); v = r_leave(v, arg, partial);
arg->readable++; arg->readable++;
} }
break; break;
@ -1901,7 +1912,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
if (type == TYPE_HASH_DEF) { if (type == TYPE_HASH_DEF) {
RHASH_SET_IFNONE(v, r_object(arg)); RHASH_SET_IFNONE(v, r_object(arg));
} }
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -1953,7 +1964,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
} }
} }
rb_struct_initialize(v, values); rb_struct_initialize(v, values);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
arg->readable += 2; arg->readable += 2;
} }
break; break;
@ -1980,7 +1991,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
marshal_compat_t *compat = (marshal_compat_t*)d; marshal_compat_t *compat = (marshal_compat_t*)d;
v = compat->loader(klass, v); v = compat->loader(klass, v);
} }
v = r_post_proc(v, arg); if (!partial) v = r_post_proc(v, arg);
} }
break; break;
@ -2022,7 +2033,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
} }
v = r_entry0(v, idx, arg); v = r_entry0(v, idx, arg);
r_ivar(v, NULL, arg); r_ivar(v, NULL, arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -2043,9 +2054,9 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
"class %"PRIsVALUE" needs to have instance method `_load_data'", "class %"PRIsVALUE" needs to have instance method `_load_data'",
name); name);
} }
r = r_object0(arg, 0, extmod); r = r_object0(arg, partial, 0, extmod);
load_funcall(arg, v, s_load_data, 1, &r); load_funcall(arg, v, s_load_data, 1, &r);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -2056,7 +2067,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = rb_path_to_class(str); v = rb_path_to_class(str);
prohibit_ivar("class/module", str); prohibit_ivar("class/module", str);
v = r_entry(v, arg); v = r_entry(v, arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -2067,7 +2078,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = path2class(str); v = path2class(str);
prohibit_ivar("class", str); prohibit_ivar("class", str);
v = r_entry(v, arg); v = r_entry(v, arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -2078,7 +2089,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = path2module(str); v = path2module(str);
prohibit_ivar("module", str); prohibit_ivar("module", str);
v = r_entry(v, arg); v = r_entry(v, arg);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
} }
break; break;
@ -2091,7 +2102,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
v = r_symreal(arg, 0); v = r_symreal(arg, 0);
} }
v = rb_str_intern(v); v = rb_str_intern(v);
v = r_leave(v, arg); v = r_leave(v, arg, partial);
break; break;
case TYPE_SYMLINK: case TYPE_SYMLINK:
@ -2113,7 +2124,7 @@ r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
static VALUE static VALUE
r_object(struct load_arg *arg) r_object(struct load_arg *arg)
{ {
return r_object0(arg, 0, Qnil); return r_object0(arg, false, 0, Qnil);
} }
static void static void
@ -2131,6 +2142,8 @@ clear_load_arg(struct load_arg *arg)
arg->symbols = 0; arg->symbols = 0;
st_free_table(arg->data); st_free_table(arg->data);
arg->data = 0; arg->data = 0;
st_free_table(arg->partial_objects);
arg->partial_objects = 0;
if (arg->compat_tbl) { if (arg->compat_tbl) {
st_free_table(arg->compat_tbl); st_free_table(arg->compat_tbl);
arg->compat_tbl = 0; arg->compat_tbl = 0;
@ -2185,6 +2198,7 @@ rb_marshal_load_with_proc(VALUE port, VALUE proc)
arg->offset = 0; arg->offset = 0;
arg->symbols = st_init_numtable(); arg->symbols = st_init_numtable();
arg->data = rb_init_identtable(); arg->data = rb_init_identtable();
arg->partial_objects = rb_init_identtable();
arg->compat_tbl = 0; arg->compat_tbl = 0;
arg->proc = 0; arg->proc = 0;
arg->readable = 0; arg->readable = 0;

View file

@ -20,19 +20,37 @@ describe :marshal_load, shared: true do
end end
describe "when called with a proc" do describe "when called with a proc" do
ruby_bug "#18141", ""..."3.1" do
it "call the proc with fully initialized strings" do
utf8_string = "foo".encode(Encoding::UTF_8)
Marshal.send(@method, Marshal.dump(utf8_string), proc { |arg|
if arg.is_a?(String)
arg.should == utf8_string
arg.encoding.should == Encoding::UTF_8
end
arg
})
end
it "no longer mutate the object after it was passed to the proc" do
string = Marshal.load(Marshal.dump("foo"), :freeze.to_proc)
string.should.frozen?
end
end
it "returns the value of the proc" do it "returns the value of the proc" do
Marshal.send(@method, Marshal.dump([1,2]), proc { [3,4] }).should == [3,4] Marshal.send(@method, Marshal.dump([1,2]), proc { [3,4] }).should == [3,4]
end end
ruby_bug "#18141", ""..."3.1" do
it "calls the proc for recursively visited data" do it "calls the proc for recursively visited data" do
a = [1] a = [1]
a << a a << a
ret = [] ret = []
Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg; arg }) Marshal.send(@method, Marshal.dump(a), proc { |arg| ret << arg.inspect; arg })
ret.first.should == 1 ret[0].should == 1.inspect
ret[1].should == [1,a] ret[1].should == a.inspect
ret[2].should == a ret.size.should == 2
ret.size.should == 3
end end
it "loads an Array with proc" do it "loads an Array with proc" do
@ -48,16 +66,18 @@ describe :marshal_load, shared: true do
a.instance_variable_set(:@two, 2) a.instance_variable_set(:@two, 2)
obj = [s, 10, s, s, st, a] obj = [s, 10, s, s, st, a]
obj.instance_variable_set(:@zoo, 'ant') obj.instance_variable_set(:@zoo, 'ant')
proc = Proc.new { |o| arr << o; o} proc = Proc.new { |o| arr << o.dup; o}
Marshal.send(@method, "\x04\bI[\vI\"\ahi\a:\x06EF:\t@fooi\ni\x0F@\x06@\x06IS:\x14Struct::Brittle\x06:\x06af\x060\x06:\n@clueI\"\tnone\x06;\x00FI[\b;\b:\x06b:\x06c\x06:\t@twoi\a\x06:\t@zooI\"\bant\x06;\x00F", proc) Marshal.send(@method, "\x04\bI[\vI\"\ahi\a:\x06EF:\t@fooi\ni\x0F@\x06@\x06IS:\x14Struct::Brittle\x06:\x06af\x060\x06:\n@clueI\"\tnone\x06;\x00FI[\b;\b:\x06b:\x06c\x06:\t@twoi\a\x06:\t@zooI\"\bant\x06;\x00F", proc)
arr.should == ["hi", false, 5, 10, "hi", "hi", 0.0, st, "none", false, arr.should == [
:b, :c, a, 2, ["hi", 10, "hi", "hi", st, [:a, :b, :c]], "ant", false] false, 5, "hi", 10, "hi", "hi", 0.0, false, "none", st,
:b, :c, 2, a, false, "ant", ["hi", 10, "hi", "hi", st, [:a, :b, :c]],
]
Struct.send(:remove_const, :Brittle) Struct.send(:remove_const, :Brittle)
end end
end end
end
describe "when called with nil for the proc argument" do describe "when called with nil for the proc argument" do
it "behaves as if no proc argument was passed" do it "behaves as if no proc argument was passed" do
@ -119,28 +139,39 @@ describe :marshal_load, shared: true do
end end
end end
ruby_bug "#18141", ""..."3.1" do
it "loads an array containing objects having _dump method, and with proc" do it "loads an array containing objects having _dump method, and with proc" do
arr = [] arr = []
myproc = Proc.new { |o| arr << o; o } myproc = Proc.new { |o| arr << o.dup; o }
o1 = UserDefined.new; o1 = UserDefined.new;
o2 = UserDefinedWithIvar.new o2 = UserDefinedWithIvar.new
obj = [o1, o2, o1, o2] obj = [o1, o2, o1, o2]
Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc) Marshal.send(@method, "\x04\b[\tu:\x10UserDefined\x18\x04\b[\aI\"\nstuff\x06:\x06EF@\x06u:\x18UserDefinedWithIvar>\x04\b[\bI\"\nstuff\a:\x06EF:\t@foo:\x18UserDefinedWithIvarI\"\tmore\x06;\x00F@\a@\x06@\a", myproc)
arr.should == [o1, o2, o1, o2, obj] arr[0].should == o1
arr[1].should == o2
arr[2].should == obj
arr.size.should == 3
end end
it "loads an array containing objects having marshal_dump method, and with proc" do it "loads an array containing objects having marshal_dump method, and with proc" do
arr = [] arr = []
proc = Proc.new { |o| arr << o; o } proc = Proc.new { |o| arr << o.dup; o }
o1 = UserMarshal.new o1 = UserMarshal.new
o2 = UserMarshalWithIvar.new o2 = UserMarshalWithIvar.new
obj = [o1, o2, o1, o2]
Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc) Marshal.send(@method, "\004\b[\tU:\020UserMarshal\"\nstuffU:\030UserMarshalWithIvar[\006\"\fmy data@\006@\b", proc)
arr.should == ['stuff', o1, 'my data', ['my data'], o2, o1, o2, obj] arr[0].should == 'stuff'
arr[1].should == o1
arr[2].should == 'my data'
arr[3].should == ['my data']
arr[4].should == o2
arr[5].should == [o1, o2, o1, o2]
arr.size.should == 6
end
end end
it "assigns classes to nested subclasses of Array correctly" do it "assigns classes to nested subclasses of Array correctly" do

View file

@ -672,6 +672,23 @@ class TestMarshal < Test::Unit::TestCase
assert_equal(['X', 'X'], Marshal.load(Marshal.dump(obj), ->(v) { v == str ? v.upcase : v })) assert_equal(['X', 'X'], Marshal.load(Marshal.dump(obj), ->(v) { v == str ? v.upcase : v }))
end end
def test_marshal_proc_string_encoding
string = "foo"
payload = Marshal.dump(string)
Marshal.load(payload, ->(v) {
if v.is_a?(String)
assert_equal(string, v)
assert_equal(string.encoding, v.encoding)
end
v
})
end
def test_marshal_proc_freeze
object = { foo: [42, "bar"] }
assert_equal object, Marshal.load(Marshal.dump(object), :freeze.to_proc)
end
def test_marshal_load_extended_class_crash def test_marshal_load_extended_class_crash
assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}") assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
begin; begin;
@ -833,4 +850,16 @@ class TestMarshal < Test::Unit::TestCase
assert_nil(e2.backtrace_locations) # temporal assert_nil(e2.backtrace_locations) # temporal
end end
end end
class TestMarshalFreezeProc < Test::Unit::TestCase
include MarshalTestLib
def encode(o)
Marshal.dump(o)
end
def decode(s)
Marshal.load(s, :freeze.to_proc)
end
end
end end

View file

@ -12,11 +12,11 @@
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
#define RUBY_VERSION_TEENY 3 #define RUBY_VERSION_TEENY 3
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
#define RUBY_PATCHLEVEL 140 #define RUBY_PATCHLEVEL 141
#define RUBY_RELEASE_YEAR 2021 #define RUBY_RELEASE_YEAR 2021
#define RUBY_RELEASE_MONTH 10 #define RUBY_RELEASE_MONTH 10
#define RUBY_RELEASE_DAY 3 #define RUBY_RELEASE_DAY 9
#include "ruby/version.h" #include "ruby/version.h"