mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Use a monotonically increasing number for object_id
This changes object_id from being based on the objects location in memory (or a nearby memory location in the case of a conflict) to be based on an always increasing number. This number is a Ruby Integer which allows it to overflow the size of a pointer without issue (very unlikely to happen in real programs especially on 64-bit, but a nice guarantee). This changes obj_to_id_tbl and id_to_obj_tbl to both be maps of Ruby objects to Ruby objects (previously they were Ruby object to C integer) which simplifies updating them after compaction as we can run them through gc_update_table_refs. Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
This commit is contained in:
parent
d62abc47c8
commit
b99833baec
5 changed files with 90 additions and 151 deletions
|
@ -44,3 +44,12 @@ assert_normal_exit %q{
|
||||||
Thread.new {}
|
Thread.new {}
|
||||||
end
|
end
|
||||||
}, '[ruby-core:37858]'
|
}, '[ruby-core:37858]'
|
||||||
|
|
||||||
|
assert_equal 'ok', %q{
|
||||||
|
objects_and_ids = 1000.times.map { o = Object.new; [o, o.object_id] }
|
||||||
|
objects_and_ids.each { |expected, id|
|
||||||
|
actual = ObjectSpace._id2ref(id)
|
||||||
|
raise "expected #{expected.inspect}, got #{actual.inspect}" unless actual.equal?(expected)
|
||||||
|
}
|
||||||
|
'ok'
|
||||||
|
}
|
||||||
|
|
126
gc.c
126
gc.c
|
@ -700,6 +700,7 @@ typedef struct rb_objspace {
|
||||||
|
|
||||||
rb_event_flag_t hook_events;
|
rb_event_flag_t hook_events;
|
||||||
size_t total_allocated_objects;
|
size_t total_allocated_objects;
|
||||||
|
VALUE next_object_id;
|
||||||
|
|
||||||
rb_heap_t eden_heap;
|
rb_heap_t eden_heap;
|
||||||
rb_heap_t tomb_heap; /* heap for zombies and ghosts */
|
rb_heap_t tomb_heap; /* heap for zombies and ghosts */
|
||||||
|
@ -747,7 +748,6 @@ typedef struct rb_objspace {
|
||||||
#if USE_RGENGC
|
#if USE_RGENGC
|
||||||
size_t minor_gc_count;
|
size_t minor_gc_count;
|
||||||
size_t major_gc_count;
|
size_t major_gc_count;
|
||||||
size_t object_id_collisions;
|
|
||||||
#if RGENGC_PROFILE > 0
|
#if RGENGC_PROFILE > 0
|
||||||
size_t total_generated_normal_object_count;
|
size_t total_generated_normal_object_count;
|
||||||
size_t total_generated_shady_object_count;
|
size_t total_generated_shady_object_count;
|
||||||
|
@ -2846,12 +2846,41 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define OBJ_ID_INCREMENT (sizeof(RVALUE) / 2)
|
||||||
|
#define OBJ_ID_INITIAL (OBJ_ID_INCREMENT * 2)
|
||||||
|
|
||||||
|
static int
|
||||||
|
object_id_cmp(st_data_t x, st_data_t y)
|
||||||
|
{
|
||||||
|
if (RB_TYPE_P(x, T_BIGNUM)) {
|
||||||
|
return !rb_big_eql(x, y);
|
||||||
|
} else {
|
||||||
|
return x != y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static st_index_t
|
||||||
|
object_id_hash(st_data_t n)
|
||||||
|
{
|
||||||
|
if (RB_TYPE_P(n, T_BIGNUM)) {
|
||||||
|
return FIX2LONG(rb_big_hash(n));
|
||||||
|
} else {
|
||||||
|
return st_numhash(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static const struct st_hash_type object_id_hash_type = {
|
||||||
|
object_id_cmp,
|
||||||
|
object_id_hash,
|
||||||
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
Init_heap(void)
|
Init_heap(void)
|
||||||
{
|
{
|
||||||
rb_objspace_t *objspace = &rb_objspace;
|
rb_objspace_t *objspace = &rb_objspace;
|
||||||
|
|
||||||
objspace->id_to_obj_tbl = st_init_numtable();
|
objspace->next_object_id = INT2FIX(OBJ_ID_INITIAL);
|
||||||
|
objspace->id_to_obj_tbl = st_init_table(&object_id_hash_type);
|
||||||
objspace->obj_to_id_tbl = st_init_numtable();
|
objspace->obj_to_id_tbl = st_init_numtable();
|
||||||
|
|
||||||
#if RGENGC_ESTIMATE_OLDMALLOC
|
#if RGENGC_ESTIMATE_OLDMALLOC
|
||||||
|
@ -3576,37 +3605,33 @@ id2ref(VALUE obj, VALUE objid)
|
||||||
VALUE orig;
|
VALUE orig;
|
||||||
void *p0;
|
void *p0;
|
||||||
|
|
||||||
ptr = NUM2PTR(objid);
|
if (FIXNUM_P(objid) || rb_big_size(objid) <= SIZEOF_VOIDP) {
|
||||||
p0 = (void *)ptr;
|
ptr = NUM2PTR(objid);
|
||||||
|
if (ptr == Qtrue) return Qtrue;
|
||||||
|
if (ptr == Qfalse) return Qfalse;
|
||||||
|
if (ptr == Qnil) return Qnil;
|
||||||
|
if (FIXNUM_P(ptr)) return (VALUE)ptr;
|
||||||
|
if (FLONUM_P(ptr)) return (VALUE)ptr;
|
||||||
|
|
||||||
if (ptr == Qtrue) return Qtrue;
|
ptr = obj_id_to_ref(objid);
|
||||||
if (ptr == Qfalse) return Qfalse;
|
if ((ptr % sizeof(RVALUE)) == (4 << 2)) {
|
||||||
if (ptr == Qnil) return Qnil;
|
ID symid = ptr / sizeof(RVALUE);
|
||||||
if (FIXNUM_P(ptr)) return (VALUE)ptr;
|
p0 = (void *)ptr;
|
||||||
if (FLONUM_P(ptr)) return (VALUE)ptr;
|
if (rb_id2str(symid) == 0)
|
||||||
ptr = obj_id_to_ref(objid);
|
rb_raise(rb_eRangeError, "%p is not symbol id value", p0);
|
||||||
|
return ID2SYM(symid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (st_lookup(objspace->id_to_obj_tbl, ptr, &orig)) {
|
if (st_lookup(objspace->id_to_obj_tbl, objid, &orig)) {
|
||||||
return orig;
|
return orig;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((ptr % sizeof(RVALUE)) == (4 << 2)) {
|
if (rb_int_ge(objid, objspace->next_object_id)) {
|
||||||
ID symid = ptr / sizeof(RVALUE);
|
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_int2str(objid, 10));
|
||||||
if (rb_id2str(symid) == 0)
|
} else {
|
||||||
rb_raise(rb_eRangeError, "%p is not symbol id value", p0);
|
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_int2str(objid, 10));
|
||||||
return ID2SYM(symid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_id_value(objspace, ptr)) {
|
|
||||||
rb_raise(rb_eRangeError, "%p is not id value", p0);
|
|
||||||
}
|
|
||||||
if (!is_live_object(objspace, ptr)) {
|
|
||||||
rb_raise(rb_eRangeError, "%p is recycled object", p0);
|
|
||||||
}
|
|
||||||
if (RBASIC(ptr)->klass == 0) {
|
|
||||||
rb_raise(rb_eRangeError, "%p is internal object", p0);
|
|
||||||
}
|
|
||||||
return (VALUE)ptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -3637,27 +3662,20 @@ cached_object_id(VALUE obj)
|
||||||
|
|
||||||
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &id)) {
|
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &id)) {
|
||||||
GC_ASSERT(FL_TEST(obj, FL_SEEN_OBJ_ID));
|
GC_ASSERT(FL_TEST(obj, FL_SEEN_OBJ_ID));
|
||||||
return nonspecial_obj_id(id);
|
return id;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
GC_ASSERT(!FL_TEST(obj, FL_SEEN_OBJ_ID));
|
GC_ASSERT(!FL_TEST(obj, FL_SEEN_OBJ_ID));
|
||||||
id = obj;
|
|
||||||
|
|
||||||
while (1) {
|
id = objspace->next_object_id;
|
||||||
/* id is the object id */
|
objspace->next_object_id = rb_int_plus(id, INT2FIX(OBJ_ID_INCREMENT));
|
||||||
if (st_is_member(objspace->id_to_obj_tbl, (st_data_t)id)) {
|
|
||||||
objspace->profile.object_id_collisions++;
|
st_insert(objspace->obj_to_id_tbl, (st_data_t)obj, (st_data_t)id);
|
||||||
id += sizeof(VALUE);
|
st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj);
|
||||||
}
|
FL_SET(obj, FL_SEEN_OBJ_ID);
|
||||||
else {
|
|
||||||
st_insert(objspace->obj_to_id_tbl, (st_data_t)obj, (st_data_t)id);
|
return id;
|
||||||
st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj);
|
|
||||||
FL_SET(obj, FL_SEEN_OBJ_ID);
|
|
||||||
return nonspecial_obj_id(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return nonspecial_obj_id(obj);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -5566,6 +5584,10 @@ gc_mark_roots(rb_objspace_t *objspace, const char **categoryp)
|
||||||
MARK_CHECKPOINT("global_tbl");
|
MARK_CHECKPOINT("global_tbl");
|
||||||
rb_gc_mark_global_tbl();
|
rb_gc_mark_global_tbl();
|
||||||
|
|
||||||
|
MARK_CHECKPOINT("object_id");
|
||||||
|
rb_gc_mark(objspace->next_object_id);
|
||||||
|
mark_tbl_no_pin(objspace, objspace->obj_to_id_tbl); /* Only mark ids */
|
||||||
|
|
||||||
if (stress_to_class) rb_gc_mark(stress_to_class);
|
if (stress_to_class) rb_gc_mark(stress_to_class);
|
||||||
|
|
||||||
MARK_CHECKPOINT("finish");
|
MARK_CHECKPOINT("finish");
|
||||||
|
@ -7551,18 +7573,6 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
update_id_to_obj(st_data_t *key, st_data_t *value, st_data_t arg, int exists)
|
|
||||||
{
|
|
||||||
if (exists) {
|
|
||||||
*value = arg;
|
|
||||||
return ST_CONTINUE;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return ST_STOP;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, VALUE moved_list)
|
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, VALUE moved_list)
|
||||||
{
|
{
|
||||||
|
@ -7604,7 +7614,6 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, VALUE moved_list)
|
||||||
gc_report(4, objspace, "Moving object with seen id: %p -> %p\n", (void *)src, (void *)dest);
|
gc_report(4, objspace, "Moving object with seen id: %p -> %p\n", (void *)src, (void *)dest);
|
||||||
st_delete(objspace->obj_to_id_tbl, (st_data_t *)&src, 0);
|
st_delete(objspace->obj_to_id_tbl, (st_data_t *)&src, 0);
|
||||||
st_insert(objspace->obj_to_id_tbl, (VALUE)dest, id);
|
st_insert(objspace->obj_to_id_tbl, (VALUE)dest, id);
|
||||||
st_update(objspace->id_to_obj_tbl, (st_data_t)id, update_id_to_obj, (st_data_t)dest);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Move the object */
|
/* Move the object */
|
||||||
|
@ -8422,6 +8431,8 @@ gc_update_references(rb_objspace_t * objspace)
|
||||||
rb_transient_heap_update_references();
|
rb_transient_heap_update_references();
|
||||||
global_symbols.ids = rb_gc_location(global_symbols.ids);
|
global_symbols.ids = rb_gc_location(global_symbols.ids);
|
||||||
global_symbols.dsymbol_fstr_hash = rb_gc_location(global_symbols.dsymbol_fstr_hash);
|
global_symbols.dsymbol_fstr_hash = rb_gc_location(global_symbols.dsymbol_fstr_hash);
|
||||||
|
gc_update_tbl_refs(objspace, objspace->obj_to_id_tbl);
|
||||||
|
gc_update_table_refs(objspace, objspace->id_to_obj_tbl);
|
||||||
gc_update_table_refs(objspace, global_symbols.str_sym);
|
gc_update_table_refs(objspace, global_symbols.str_sym);
|
||||||
gc_update_table_refs(objspace, finalizer_table);
|
gc_update_table_refs(objspace, finalizer_table);
|
||||||
}
|
}
|
||||||
|
@ -8879,7 +8890,6 @@ enum gc_stat_sym {
|
||||||
#if USE_RGENGC
|
#if USE_RGENGC
|
||||||
gc_stat_sym_minor_gc_count,
|
gc_stat_sym_minor_gc_count,
|
||||||
gc_stat_sym_major_gc_count,
|
gc_stat_sym_major_gc_count,
|
||||||
gc_stat_sym_object_id_collisions,
|
|
||||||
gc_stat_sym_remembered_wb_unprotected_objects,
|
gc_stat_sym_remembered_wb_unprotected_objects,
|
||||||
gc_stat_sym_remembered_wb_unprotected_objects_limit,
|
gc_stat_sym_remembered_wb_unprotected_objects_limit,
|
||||||
gc_stat_sym_old_objects,
|
gc_stat_sym_old_objects,
|
||||||
|
@ -8955,7 +8965,6 @@ setup_gc_stat_symbols(void)
|
||||||
S(malloc_increase_bytes_limit);
|
S(malloc_increase_bytes_limit);
|
||||||
#if USE_RGENGC
|
#if USE_RGENGC
|
||||||
S(minor_gc_count);
|
S(minor_gc_count);
|
||||||
S(object_id_collisions);
|
|
||||||
S(major_gc_count);
|
S(major_gc_count);
|
||||||
S(remembered_wb_unprotected_objects);
|
S(remembered_wb_unprotected_objects);
|
||||||
S(remembered_wb_unprotected_objects_limit);
|
S(remembered_wb_unprotected_objects_limit);
|
||||||
|
@ -9128,7 +9137,6 @@ gc_stat_internal(VALUE hash_or_sym)
|
||||||
SET(malloc_increase_bytes_limit, malloc_limit);
|
SET(malloc_increase_bytes_limit, malloc_limit);
|
||||||
#if USE_RGENGC
|
#if USE_RGENGC
|
||||||
SET(minor_gc_count, objspace->profile.minor_gc_count);
|
SET(minor_gc_count, objspace->profile.minor_gc_count);
|
||||||
SET(object_id_collisions, objspace->profile.object_id_collisions);
|
|
||||||
SET(major_gc_count, objspace->profile.major_gc_count);
|
SET(major_gc_count, objspace->profile.major_gc_count);
|
||||||
SET(remembered_wb_unprotected_objects, objspace->rgengc.uncollectible_wb_unprotected_objects);
|
SET(remembered_wb_unprotected_objects, objspace->rgengc.uncollectible_wb_unprotected_objects);
|
||||||
SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit);
|
SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit);
|
||||||
|
|
8
hash.c
8
hash.c
|
@ -273,10 +273,14 @@ rb_objid_hash(st_index_t index)
|
||||||
static st_index_t
|
static st_index_t
|
||||||
objid_hash(VALUE obj)
|
objid_hash(VALUE obj)
|
||||||
{
|
{
|
||||||
|
VALUE object_id = rb_obj_id(obj);
|
||||||
|
if (!FIXNUM_P(object_id))
|
||||||
|
object_id = rb_big_hash(object_id);
|
||||||
|
|
||||||
#if SIZEOF_LONG == SIZEOF_VOIDP
|
#if SIZEOF_LONG == SIZEOF_VOIDP
|
||||||
return (st_index_t)st_index_hash((st_index_t)NUM2LONG(rb_obj_id(obj)));
|
return (st_index_t)st_index_hash((st_index_t)NUM2LONG(object_id));
|
||||||
#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
|
#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
|
||||||
return (st_index_t)st_index_hash((st_index_t)NUM2LL(rb_obj_id(obj)));
|
return (st_index_t)st_index_hash((st_index_t)NUM2LL(object_id));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -461,4 +461,12 @@ class TestGc < Test::Unit::TestCase
|
||||||
skip "finalizers did not get run" if @result.empty?
|
skip "finalizers did not get run" if @result.empty?
|
||||||
assert_equal([:c1, :c2], @result)
|
assert_equal([:c1, :c2], @result)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_object_ids_never_repeat
|
||||||
|
GC.start
|
||||||
|
a = 1000.times.map { Object.new.object_id }
|
||||||
|
GC.start
|
||||||
|
b = 1000.times.map { Object.new.object_id }
|
||||||
|
assert_empty(a & b)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,13 +7,6 @@ class TestGCCompact < Test::Unit::TestCase
|
||||||
(Fiddle.dlwrap(obj) >> 1)
|
(Fiddle.dlwrap(obj) >> 1)
|
||||||
end
|
end
|
||||||
|
|
||||||
def assert_object_ids(list)
|
|
||||||
same_count = list.find_all { |obj|
|
|
||||||
memory_location(obj) == obj.object_id
|
|
||||||
}.count
|
|
||||||
list.count - same_count
|
|
||||||
end
|
|
||||||
|
|
||||||
def big_list(level = 10)
|
def big_list(level = 10)
|
||||||
if level > 0
|
if level > 0
|
||||||
big_list(level - 1)
|
big_list(level - 1)
|
||||||
|
@ -41,89 +34,6 @@ class TestGCCompact < Test::Unit::TestCase
|
||||||
new_object
|
new_object
|
||||||
end
|
end
|
||||||
|
|
||||||
def try_to_move_objects
|
|
||||||
10.times do
|
|
||||||
list_of_objects = big_list
|
|
||||||
|
|
||||||
ids = list_of_objects.map(&:object_id) # store id in map
|
|
||||||
addresses = list_of_objects.map(&self.:memory_location)
|
|
||||||
|
|
||||||
assert_equal ids, addresses
|
|
||||||
|
|
||||||
# All object ids should be equal
|
|
||||||
assert_equal 0, assert_object_ids(list_of_objects) # should be 0
|
|
||||||
|
|
||||||
GC.verify_compaction_references(toward: :empty)
|
|
||||||
|
|
||||||
# Some should have moved
|
|
||||||
id_count = assert_object_ids(list_of_objects)
|
|
||||||
skip "couldn't get objects to move" if id_count == 0
|
|
||||||
assert_operator id_count, :>, 0
|
|
||||||
|
|
||||||
new_ids = list_of_objects.map(&:object_id)
|
|
||||||
|
|
||||||
# Object ids should not change after compaction
|
|
||||||
assert_equal ids, new_ids
|
|
||||||
|
|
||||||
new_tenant = find_object_in_recycled_slot(addresses)
|
|
||||||
return [list_of_objects, addresses, new_tenant] if new_tenant
|
|
||||||
end
|
|
||||||
|
|
||||||
flunk "Couldn't get objects to move"
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_find_collided_object
|
|
||||||
skip "figure out how to guarantee move"
|
|
||||||
|
|
||||||
list_of_objects, addresses, new_tenant = try_to_move_objects
|
|
||||||
|
|
||||||
# This is the object that used to be in new_object's position
|
|
||||||
loc = memory_location(new_tenant)
|
|
||||||
assert loc, "should have a memory location"
|
|
||||||
|
|
||||||
if (ENV['TRAVIS'] && RUBY_PLATFORM =~ /darwin/)
|
|
||||||
skip "tests are failing on Travis osx / Wercker from here"
|
|
||||||
end
|
|
||||||
|
|
||||||
address_idx = addresses.index(loc)
|
|
||||||
assert address_idx, "should have an address index"
|
|
||||||
|
|
||||||
previous_tenant = list_of_objects[address_idx]
|
|
||||||
assert previous_tenant, "should have a previous tenant"
|
|
||||||
|
|
||||||
assert_not_equal previous_tenant.object_id, new_tenant.object_id
|
|
||||||
|
|
||||||
# Should be able to look up object by object_id
|
|
||||||
assert_equal new_tenant, ObjectSpace._id2ref(new_tenant.object_id)
|
|
||||||
|
|
||||||
# Should be able to look up object by object_id
|
|
||||||
assert_equal previous_tenant, ObjectSpace._id2ref(previous_tenant.object_id)
|
|
||||||
|
|
||||||
int = (new_tenant.object_id >> 1)
|
|
||||||
# These two should be the same! but they are not :(
|
|
||||||
assert_equal int, ObjectSpace._id2ref(int.object_id)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_many_collisions
|
|
||||||
list_of_objects = big_list
|
|
||||||
ids = list_of_objects.map(&:object_id)
|
|
||||||
addresses = list_of_objects.map(&self.:memory_location)
|
|
||||||
|
|
||||||
GC.verify_compaction_references(toward: :empty)
|
|
||||||
|
|
||||||
skip "time consuming"
|
|
||||||
|
|
||||||
new_tenants = 10.times.map {
|
|
||||||
find_object_in_recycled_slot(addresses)
|
|
||||||
}
|
|
||||||
|
|
||||||
collisions = GC.stat(:object_id_collisions)
|
|
||||||
skip "couldn't get objects to collide" if collisions == 0
|
|
||||||
assert_operator collisions, :>, 0
|
|
||||||
ids.clear
|
|
||||||
new_tenants.clear
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_complex_hash_keys
|
def test_complex_hash_keys
|
||||||
list_of_objects = big_list
|
list_of_objects = big_list
|
||||||
hash = list_of_objects.hash
|
hash = list_of_objects.hash
|
||||||
|
|
Loading…
Reference in a new issue