1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Move String RVALUES between pools

And re-embed any strings that can now fit inside the slot they've been
moved to
This commit is contained in:
Matt Valentine-House 2022-04-06 09:55:23 +01:00 committed by Aaron Patterson
parent f8502a2699
commit 56cc3e99b6
Notes: git 2022-06-14 02:11:57 +09:00
5 changed files with 196 additions and 33 deletions

106
gc.c
View file

@ -837,6 +837,8 @@ typedef struct rb_objspace {
struct { struct {
size_t considered_count_table[T_MASK]; size_t considered_count_table[T_MASK];
size_t moved_count_table[T_MASK]; size_t moved_count_table[T_MASK];
size_t moved_up_count_table[T_MASK];
size_t moved_down_count_table[T_MASK];
size_t total_moved; size_t total_moved;
} rcompactor; } rcompactor;
@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page)
} }
static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj); static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj);
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size); static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size);
static void static void
lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body) lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
static bool static bool
try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src) try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src)
{ {
struct heap_page *src_page = GET_HEAP_PAGE(src);
if (!free_page) { if (!free_page) {
return false; return false;
} }
@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page,
free_page->freelist = RANY(dest)->as.free.next; free_page->freelist = RANY(dest)->as.free.next;
GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE); GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE);
GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size);
if (src_page->slot_size > free_page->slot_size) {
objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++;
} else if (free_page->slot_size > src_page->slot_size) {
objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++;
}
objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++; objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++;
objspace->rcompactor.total_moved++; objspace->rcompactor.total_moved++;
gc_move(objspace, src, dest, free_page->slot_size); gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size);
gc_pin(objspace, src); gc_pin(objspace, src);
free_page->free_slots--; free_page->free_slots--;
} }
@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
object = rb_gc_location(forwarding_object); object = rb_gc_location(forwarding_object);
gc_move(objspace, object, forwarding_object, page->slot_size); gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size);
/* forwarding_object is now our actual object, and "object" /* forwarding_object is now our actual object, and "object"
* is the free slot for the original page */ * is the free slot for the original page */
struct heap_page *orig_page = GET_HEAP_PAGE(object); struct heap_page *orig_page = GET_HEAP_PAGE(object);
@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace)
memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t)); memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t)); memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t));
/* Set up read barrier for pages containing MOVED objects */ /* Set up read barrier for pages containing MOVED objects */
install_handlers(); install_handlers();
@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap)
return heap->sweeping_page == heap->compact_cursor; return heap->sweeping_page == heap->compact_cursor;
} }
static rb_size_pool_t *
gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src)
{
size_t obj_size;
switch (BUILTIN_TYPE(src)) {
case T_STRING:
obj_size = rb_str_size_as_embedded(src);
if (rb_gc_size_allocatable_p(obj_size)){
return &size_pools[size_pool_idx_for_size(obj_size)];
}
else {
GC_ASSERT(!STR_EMBED_P(src));
return &size_pools[0];
}
default:
return src_pool;
}
}
static bool static bool
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src) gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src)
{ {
GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED); GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED);
rb_heap_t *dheap = heap; rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src));
if (gc_compact_heap_cursors_met_p(dheap)) { if (gc_compact_heap_cursors_met_p(dheap)) {
return false; return dheap != heap;
} }
while (!try_move(objspace, dheap, dheap->free_pages, src)) { while (!try_move(objspace, dheap, dheap->free_pages, src)) {
struct gc_sweep_context ctx = { struct gc_sweep_context ctx = {
@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
} }
static bool static bool
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) { gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
short slot_size = page->slot_size; short slot_size = page->slot_size;
short slot_bits = slot_size / BASE_SLOT_SIZE; short slot_bits = slot_size / BASE_SLOT_SIZE;
GC_ASSERT(slot_bits > 0); GC_ASSERT(slot_bits > 0);
@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
if (bitset & 1) { if (bitset & 1) {
objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++; objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++;
if (!gc_compact_move(objspace, heap, vp)) { if (!gc_compact_move(objspace, heap, size_pool, vp)) {
//the cursors met. bubble up //the cursors met. bubble up
return false; return false;
} }
@ -8295,7 +8324,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
bitset = (mark_bits[0] & ~pin_bits[0]); bitset = (mark_bits[0] & ~pin_bits[0]);
bitset >>= NUM_IN_PAGE(p); bitset >>= NUM_IN_PAGE(p);
if (bitset) { if (bitset) {
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false; return false;
} }
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
@ -8303,7 +8332,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
bitset = (mark_bits[j] & ~pin_bits[j]); bitset = (mark_bits[j] & ~pin_bits[j]);
if (bitset) { if (bitset) {
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false; return false;
} }
p += BITS_BITLENGTH * BASE_SLOT_SIZE; p += BITS_BITLENGTH * BASE_SLOT_SIZE;
@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace)
struct heap_page *start_page = heap->compact_cursor; struct heap_page *start_page = heap->compact_cursor;
if (!gc_compact_page(objspace, size_pool, heap, start_page)) { if (!gc_compact_page(objspace, size_pool, heap, start_page)) {
GC_ASSERT(heap->sweeping_page == heap->compact_cursor);
lock_page_body(objspace, GET_PAGE_BODY(start_page->start)); lock_page_body(objspace, GET_PAGE_BODY(start_page->start));
continue; continue;
@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
} }
static VALUE static VALUE
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size) gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size)
{ {
int marked; int marked;
int wb_unprotected; int wb_unprotected;
@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
} }
/* Move the object */ /* Move the object */
memcpy(dest, src, slot_size); memcpy(dest, src, MIN(src_slot_size, slot_size));
memset(src, 0, slot_size); memset(src, 0, src_slot_size);
/* Set bits for object in new location */ /* Set bits for object in new location */
if (marking) { if (marking) {
@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
break; break;
case T_STRING: case T_STRING:
if (STR_SHARED_P(obj)) { {
#if USE_RVARGC #if USE_RVARGC
VALUE orig_shared = any->as.string.as.heap.aux.shared;
#endif #endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
#if USE_RVARGC
VALUE shared = any->as.string.as.heap.aux.shared;
if (STR_EMBED_P(shared)) {
size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
}
#endif
}
break;
if (STR_SHARED_P(obj)) {
#if USE_RVARGC
VALUE old_root = any->as.string.as.heap.aux.shared;
#endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
#if USE_RVARGC
VALUE new_root = any->as.string.as.heap.aux.shared;
rb_str_update_shared_ary(obj, old_root, new_root);
// if, after move the string is not embedded, and can fit in the
// slot it's been placed in, then re-embed it
if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) {
if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) {
rb_str_make_embedded(obj);
}
}
#endif
}
break;
}
case T_DATA: case T_DATA:
/* Call the compaction callback, if it exists */ /* Call the compaction callback, if it exists */
{ {
@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self)
VALUE h = rb_hash_new(); VALUE h = rb_hash_new();
VALUE considered = rb_hash_new(); VALUE considered = rb_hash_new();
VALUE moved = rb_hash_new(); VALUE moved = rb_hash_new();
VALUE moved_up = rb_hash_new();
VALUE moved_down = rb_hash_new();
for (i=0; i<T_MASK; i++) { for (i=0; i<T_MASK; i++) {
if (objspace->rcompactor.considered_count_table[i]) { if (objspace->rcompactor.considered_count_table[i]) {
@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self)
if (objspace->rcompactor.moved_count_table[i]) { if (objspace->rcompactor.moved_count_table[i]) {
rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i])); rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i]));
} }
if (objspace->rcompactor.moved_up_count_table[i]) {
rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i]));
}
if (objspace->rcompactor.moved_down_count_table[i]) {
rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i]));
}
} }
rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered); rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered);
rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved); rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved);
rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up);
rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down);
return h; return h;
} }

View file

@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str)
return rb_long2int(RSTRING_LEN(str)); return rb_long2int(RSTRING_LEN(str));
} }
bool
rb_str_shared_root_p(VALUE str);
/** /**
* Convenient macro to obtain the contents and length at once. * Convenient macro to obtain the contents and length at once.
* *

View file

@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc); VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
void rb_str_make_embedded(VALUE);
size_t rb_str_size_as_embedded(VALUE);
bool rb_str_reembeddable_p(VALUE);
void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root);
RUBY_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END
MJIT_SYMBOL_EXPORT_BEGIN MJIT_SYMBOL_EXPORT_BEGIN

View file

@ -221,17 +221,51 @@ str_embed_capa(VALUE str)
#endif #endif
} }
bool
rb_str_reembeddable_p(VALUE str)
{
return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
}
static inline size_t static inline size_t
str_embed_size(long capa) rb_str_embed_size(long capa)
{ {
return offsetof(struct RString, as.embed.ary) + capa; return offsetof(struct RString, as.embed.ary) + capa;
} }
bool
rb_str_shared_root_p(VALUE str)
{
return FL_TEST_RAW(str, STR_SHARED_ROOT);
}
size_t
rb_str_size_as_embedded(VALUE str)
{
size_t real_size;
#if USE_RVARGC
if (STR_EMBED_P(str)) {
real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
}
/* if the string is not currently embedded, but it can be embedded, how
* much space would it require */
else if (rb_str_reembeddable_p(str)) {
real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str);
}
else {
#endif
real_size = sizeof(struct RString);
#if USE_RVARGC
}
#endif
return real_size;
}
static inline bool static inline bool
STR_EMBEDDABLE_P(long len, long termlen) STR_EMBEDDABLE_P(long len, long termlen)
{ {
#if USE_RVARGC #if USE_RVARGC
return rb_gc_size_allocatable_p(str_embed_size(len + termlen)); return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen));
#else #else
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen; return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
#endif #endif
@ -264,6 +298,41 @@ rb_str_make_independent(VALUE str)
} }
} }
void
rb_str_make_embedded(VALUE str) {
RUBY_ASSERT(rb_str_reembeddable_p(str));
RUBY_ASSERT(!STR_EMBED_P(str));
char *buf = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, len);
memmove(RSTRING_PTR(str), buf, len);
ruby_xfree(buf);
}
void
rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root)
{
// if the root location hasn't changed, we don't need to update
if (new_root == old_root) {
return;
}
// if the root string isn't embedded, we don't need to touch the ponter.
// it already points to the shame shared buffer
if (!STR_EMBED_P(new_root)) {
return;
}
size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary);
RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary);
RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset;
}
void void
rb_debug_rstring_null_ptr(const char *func) rb_debug_rstring_null_ptr(const char *func)
{ {
@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size)
static inline VALUE static inline VALUE
str_alloc_embed(VALUE klass, size_t capa) str_alloc_embed(VALUE klass, size_t capa)
{ {
size_t size = str_embed_size(capa); size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size)); assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC #if !USE_RVARGC
assert(size <= sizeof(struct RString)); assert(size <= sizeof(struct RString));
@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
static inline VALUE static inline VALUE
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa) ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
{ {
size_t size = str_embed_size(capa); size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size)); assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC #if !USE_RVARGC
assert(size <= sizeof(struct RString)); assert(size <= sizeof(struct RString));

View file

@ -208,4 +208,43 @@ class TestGCCompact < Test::Unit::TestCase
assert_equal([:call, :line], results) assert_equal([:call, :line], results)
end end
def test_moving_strings_between_size_pools
assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
begin;
moveables = []
small_slots = []
large_slots = []
# Ensure fragmentation in the large heap
base_slot_size = GC.stat_heap[0].fetch(:slot_size)
500.times {
String.new(+"a" * base_slot_size).downcase
large_slots << String.new(+"a" * base_slot_size).downcase
}
# Ensure fragmentation in the smaller heap
500.times {
small_slots << Object.new
Object.new
}
500.times {
# strings are created as shared strings when initialized from literals
# use downcase to force the creation of an embedded string (it calls
# rb_str_new internally)
moveables << String.new(+"a" * base_slot_size).downcase
moveables << String.new("a").downcase
}
moveables.map { |s| s << ("bc" * base_slot_size) }
moveables.map { |s| s.squeeze! }
stats = GC.compact
moved_strings = (stats.dig(:moved_up, :T_STRING) || 0) +
(stats.dig(:moved_down, :T_STRING) || 0)
assert_operator(moved_strings, :>, 0)
end;
end
end end