1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Move String RVALUES between pools

And re-embed any strings that can now fit inside the slot they've been
moved to
This commit is contained in:
Matt Valentine-House 2022-04-06 09:55:23 +01:00 committed by Aaron Patterson
parent f8502a2699
commit 56cc3e99b6
Notes: git 2022-06-14 02:11:57 +09:00
5 changed files with 196 additions and 33 deletions

106
gc.c
View file

@ -837,6 +837,8 @@ typedef struct rb_objspace {
struct {
size_t considered_count_table[T_MASK];
size_t moved_count_table[T_MASK];
size_t moved_up_count_table[T_MASK];
size_t moved_down_count_table[T_MASK];
size_t total_moved;
} rcompactor;
@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page)
}
static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj);
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size);
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size);
static void
lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
static bool
try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src)
{
struct heap_page *src_page = GET_HEAP_PAGE(src);
if (!free_page) {
return false;
}
@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page,
free_page->freelist = RANY(dest)->as.free.next;
GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE);
GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size);
if (src_page->slot_size > free_page->slot_size) {
objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++;
} else if (free_page->slot_size > src_page->slot_size) {
objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++;
}
objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++;
objspace->rcompactor.total_moved++;
gc_move(objspace, src, dest, free_page->slot_size);
gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size);
gc_pin(objspace, src);
free_page->free_slots--;
}
@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
object = rb_gc_location(forwarding_object);
gc_move(objspace, object, forwarding_object, page->slot_size);
gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size);
/* forwarding_object is now our actual object, and "object"
* is the free slot for the original page */
struct heap_page *orig_page = GET_HEAP_PAGE(object);
@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace)
memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t));
/* Set up read barrier for pages containing MOVED objects */
install_handlers();
@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap)
return heap->sweeping_page == heap->compact_cursor;
}
static rb_size_pool_t *
gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src)
{
size_t obj_size;
switch (BUILTIN_TYPE(src)) {
case T_STRING:
obj_size = rb_str_size_as_embedded(src);
if (rb_gc_size_allocatable_p(obj_size)){
return &size_pools[size_pool_idx_for_size(obj_size)];
}
else {
GC_ASSERT(!STR_EMBED_P(src));
return &size_pools[0];
}
default:
return src_pool;
}
}
static bool
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src)
{
GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED);
rb_heap_t *dheap = heap;
rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src));
if (gc_compact_heap_cursors_met_p(dheap)) {
return false;
return dheap != heap;
}
while (!try_move(objspace, dheap, dheap->free_pages, src)) {
struct gc_sweep_context ctx = {
@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
}
static bool
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
short slot_size = page->slot_size;
short slot_bits = slot_size / BASE_SLOT_SIZE;
GC_ASSERT(slot_bits > 0);
@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
if (bitset & 1) {
objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++;
if (!gc_compact_move(objspace, heap, vp)) {
if (!gc_compact_move(objspace, heap, size_pool, vp)) {
//the cursors met. bubble up
return false;
}
@ -8295,7 +8324,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
bitset = (mark_bits[0] & ~pin_bits[0]);
bitset >>= NUM_IN_PAGE(p);
if (bitset) {
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false;
}
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
@ -8303,7 +8332,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
bitset = (mark_bits[j] & ~pin_bits[j]);
if (bitset) {
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false;
}
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace)
struct heap_page *start_page = heap->compact_cursor;
if (!gc_compact_page(objspace, size_pool, heap, start_page)) {
GC_ASSERT(heap->sweeping_page == heap->compact_cursor);
lock_page_body(objspace, GET_PAGE_BODY(start_page->start));
continue;
@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
}
static VALUE
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size)
{
int marked;
int wb_unprotected;
@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
}
/* Move the object */
memcpy(dest, src, slot_size);
memset(src, 0, slot_size);
memcpy(dest, src, MIN(src_slot_size, slot_size));
memset(src, 0, src_slot_size);
/* Set bits for object in new location */
if (marking) {
@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
break;
case T_STRING:
if (STR_SHARED_P(obj)) {
{
#if USE_RVARGC
VALUE orig_shared = any->as.string.as.heap.aux.shared;
#endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
#if USE_RVARGC
VALUE shared = any->as.string.as.heap.aux.shared;
if (STR_EMBED_P(shared)) {
size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
}
#endif
}
break;
if (STR_SHARED_P(obj)) {
#if USE_RVARGC
VALUE old_root = any->as.string.as.heap.aux.shared;
#endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
#if USE_RVARGC
VALUE new_root = any->as.string.as.heap.aux.shared;
rb_str_update_shared_ary(obj, old_root, new_root);
// if, after move the string is not embedded, and can fit in the
// slot it's been placed in, then re-embed it
if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) {
if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) {
rb_str_make_embedded(obj);
}
}
#endif
}
break;
}
case T_DATA:
/* Call the compaction callback, if it exists */
{
@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self)
VALUE h = rb_hash_new();
VALUE considered = rb_hash_new();
VALUE moved = rb_hash_new();
VALUE moved_up = rb_hash_new();
VALUE moved_down = rb_hash_new();
for (i=0; i<T_MASK; i++) {
if (objspace->rcompactor.considered_count_table[i]) {
@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self)
if (objspace->rcompactor.moved_count_table[i]) {
rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i]));
}
if (objspace->rcompactor.moved_up_count_table[i]) {
rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i]));
}
if (objspace->rcompactor.moved_down_count_table[i]) {
rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i]));
}
}
rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered);
rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved);
rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up);
rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down);
return h;
}

View file

@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str)
return rb_long2int(RSTRING_LEN(str));
}
bool
rb_str_shared_root_p(VALUE str);
/**
* Convenient macro to obtain the contents and length at once.
*

View file

@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
void rb_str_make_embedded(VALUE);
size_t rb_str_size_as_embedded(VALUE);
bool rb_str_reembeddable_p(VALUE);
void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root);
RUBY_SYMBOL_EXPORT_END
MJIT_SYMBOL_EXPORT_BEGIN

View file

@ -221,17 +221,51 @@ str_embed_capa(VALUE str)
#endif
}
bool
rb_str_reembeddable_p(VALUE str)
{
return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
}
static inline size_t
str_embed_size(long capa)
rb_str_embed_size(long capa)
{
return offsetof(struct RString, as.embed.ary) + capa;
}
bool
rb_str_shared_root_p(VALUE str)
{
return FL_TEST_RAW(str, STR_SHARED_ROOT);
}
size_t
rb_str_size_as_embedded(VALUE str)
{
size_t real_size;
#if USE_RVARGC
if (STR_EMBED_P(str)) {
real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
}
/* if the string is not currently embedded, but it can be embedded, how
* much space would it require */
else if (rb_str_reembeddable_p(str)) {
real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str);
}
else {
#endif
real_size = sizeof(struct RString);
#if USE_RVARGC
}
#endif
return real_size;
}
static inline bool
STR_EMBEDDABLE_P(long len, long termlen)
{
#if USE_RVARGC
return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen));
#else
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
#endif
@ -264,6 +298,41 @@ rb_str_make_independent(VALUE str)
}
}
void
rb_str_make_embedded(VALUE str) {
RUBY_ASSERT(rb_str_reembeddable_p(str));
RUBY_ASSERT(!STR_EMBED_P(str));
char *buf = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, len);
memmove(RSTRING_PTR(str), buf, len);
ruby_xfree(buf);
}
void
rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root)
{
// if the root location hasn't changed, we don't need to update
if (new_root == old_root) {
return;
}
// if the root string isn't embedded, we don't need to touch the ponter.
// it already points to the shame shared buffer
if (!STR_EMBED_P(new_root)) {
return;
}
size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary);
RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary);
RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset;
}
void
rb_debug_rstring_null_ptr(const char *func)
{
@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size)
static inline VALUE
str_alloc_embed(VALUE klass, size_t capa)
{
size_t size = str_embed_size(capa);
size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));
@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
static inline VALUE
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
{
size_t size = str_embed_size(capa);
size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));

View file

@ -208,4 +208,43 @@ class TestGCCompact < Test::Unit::TestCase
assert_equal([:call, :line], results)
end
def test_moving_strings_between_size_pools
assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
begin;
moveables = []
small_slots = []
large_slots = []
# Ensure fragmentation in the large heap
base_slot_size = GC.stat_heap[0].fetch(:slot_size)
500.times {
String.new(+"a" * base_slot_size).downcase
large_slots << String.new(+"a" * base_slot_size).downcase
}
# Ensure fragmentation in the smaller heap
500.times {
small_slots << Object.new
Object.new
}
500.times {
# strings are created as shared strings when initialized from literals
# use downcase to force the creation of an embedded string (it calls
# rb_str_new internally)
moveables << String.new(+"a" * base_slot_size).downcase
moveables << String.new("a").downcase
}
moveables.map { |s| s << ("bc" * base_slot_size) }
moveables.map { |s| s.squeeze! }
stats = GC.compact
moved_strings = (stats.dig(:moved_up, :T_STRING) || 0) +
(stats.dig(:moved_down, :T_STRING) || 0)
assert_operator(moved_strings, :>, 0)
end;
end
end