mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Move String RVALUES between pools
And re-embed any strings that can now fit inside the slot they've been moved to
This commit is contained in:
parent
f8502a2699
commit
56cc3e99b6
Notes:
git
2022-06-14 02:11:57 +09:00
5 changed files with 196 additions and 33 deletions
106
gc.c
106
gc.c
|
@ -837,6 +837,8 @@ typedef struct rb_objspace {
|
|||
struct {
|
||||
size_t considered_count_table[T_MASK];
|
||||
size_t moved_count_table[T_MASK];
|
||||
size_t moved_up_count_table[T_MASK];
|
||||
size_t moved_down_count_table[T_MASK];
|
||||
size_t total_moved;
|
||||
} rcompactor;
|
||||
|
||||
|
@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page)
|
|||
}
|
||||
|
||||
static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj);
|
||||
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size);
|
||||
static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size);
|
||||
|
||||
static void
|
||||
lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
|
||||
|
@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
|
|||
static bool
|
||||
try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src)
|
||||
{
|
||||
struct heap_page *src_page = GET_HEAP_PAGE(src);
|
||||
if (!free_page) {
|
||||
return false;
|
||||
}
|
||||
|
@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page,
|
|||
free_page->freelist = RANY(dest)->as.free.next;
|
||||
|
||||
GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE);
|
||||
GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size);
|
||||
|
||||
if (src_page->slot_size > free_page->slot_size) {
|
||||
objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++;
|
||||
} else if (free_page->slot_size > src_page->slot_size) {
|
||||
objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++;
|
||||
}
|
||||
objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++;
|
||||
objspace->rcompactor.total_moved++;
|
||||
|
||||
gc_move(objspace, src, dest, free_page->slot_size);
|
||||
gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size);
|
||||
gc_pin(objspace, src);
|
||||
free_page->free_slots--;
|
||||
}
|
||||
|
@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
|
|||
|
||||
object = rb_gc_location(forwarding_object);
|
||||
|
||||
gc_move(objspace, object, forwarding_object, page->slot_size);
|
||||
gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size);
|
||||
/* forwarding_object is now our actual object, and "object"
|
||||
* is the free slot for the original page */
|
||||
struct heap_page *orig_page = GET_HEAP_PAGE(object);
|
||||
|
@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace)
|
|||
|
||||
memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t));
|
||||
memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t));
|
||||
memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t));
|
||||
memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t));
|
||||
|
||||
/* Set up read barrier for pages containing MOVED objects */
|
||||
install_handlers();
|
||||
|
@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap)
|
|||
return heap->sweeping_page == heap->compact_cursor;
|
||||
}
|
||||
|
||||
static rb_size_pool_t *
|
||||
gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src)
|
||||
{
|
||||
size_t obj_size;
|
||||
|
||||
switch (BUILTIN_TYPE(src)) {
|
||||
case T_STRING:
|
||||
obj_size = rb_str_size_as_embedded(src);
|
||||
if (rb_gc_size_allocatable_p(obj_size)){
|
||||
return &size_pools[size_pool_idx_for_size(obj_size)];
|
||||
}
|
||||
else {
|
||||
GC_ASSERT(!STR_EMBED_P(src));
|
||||
return &size_pools[0];
|
||||
}
|
||||
default:
|
||||
return src_pool;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
|
||||
gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src)
|
||||
{
|
||||
GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED);
|
||||
rb_heap_t *dheap = heap;
|
||||
rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src));
|
||||
|
||||
if (gc_compact_heap_cursors_met_p(dheap)) {
|
||||
return false;
|
||||
return dheap != heap;
|
||||
}
|
||||
while (!try_move(objspace, dheap, dheap->free_pages, src)) {
|
||||
struct gc_sweep_context ctx = {
|
||||
|
@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
|
|||
}
|
||||
|
||||
static bool
|
||||
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
|
||||
gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
|
||||
short slot_size = page->slot_size;
|
||||
short slot_bits = slot_size / BASE_SLOT_SIZE;
|
||||
GC_ASSERT(slot_bits > 0);
|
||||
|
@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
|
|||
if (bitset & 1) {
|
||||
objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++;
|
||||
|
||||
if (!gc_compact_move(objspace, heap, vp)) {
|
||||
if (!gc_compact_move(objspace, heap, size_pool, vp)) {
|
||||
//the cursors met. bubble up
|
||||
return false;
|
||||
}
|
||||
|
@ -8295,7 +8324,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
|
|||
bitset = (mark_bits[0] & ~pin_bits[0]);
|
||||
bitset >>= NUM_IN_PAGE(p);
|
||||
if (bitset) {
|
||||
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
|
||||
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
|
||||
return false;
|
||||
}
|
||||
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
|
||||
|
@ -8303,7 +8332,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
|
|||
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
|
||||
bitset = (mark_bits[j] & ~pin_bits[j]);
|
||||
if (bitset) {
|
||||
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
|
||||
if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
|
||||
return false;
|
||||
}
|
||||
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
|
||||
|
@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace)
|
|||
struct heap_page *start_page = heap->compact_cursor;
|
||||
|
||||
if (!gc_compact_page(objspace, size_pool, heap, start_page)) {
|
||||
GC_ASSERT(heap->sweeping_page == heap->compact_cursor);
|
||||
lock_page_body(objspace, GET_PAGE_BODY(start_page->start));
|
||||
|
||||
continue;
|
||||
|
@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
|
|||
}
|
||||
|
||||
static VALUE
|
||||
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
|
||||
gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size)
|
||||
{
|
||||
int marked;
|
||||
int wb_unprotected;
|
||||
|
@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
|
|||
}
|
||||
|
||||
/* Move the object */
|
||||
memcpy(dest, src, slot_size);
|
||||
memset(src, 0, slot_size);
|
||||
memcpy(dest, src, MIN(src_slot_size, slot_size));
|
||||
memset(src, 0, src_slot_size);
|
||||
|
||||
/* Set bits for object in new location */
|
||||
if (marking) {
|
||||
|
@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
|
|||
break;
|
||||
|
||||
case T_STRING:
|
||||
if (STR_SHARED_P(obj)) {
|
||||
{
|
||||
#if USE_RVARGC
|
||||
VALUE orig_shared = any->as.string.as.heap.aux.shared;
|
||||
#endif
|
||||
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
|
||||
#if USE_RVARGC
|
||||
VALUE shared = any->as.string.as.heap.aux.shared;
|
||||
if (STR_EMBED_P(shared)) {
|
||||
size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
|
||||
GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
|
||||
GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
|
||||
any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
if (STR_SHARED_P(obj)) {
|
||||
#if USE_RVARGC
|
||||
VALUE old_root = any->as.string.as.heap.aux.shared;
|
||||
#endif
|
||||
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
|
||||
#if USE_RVARGC
|
||||
VALUE new_root = any->as.string.as.heap.aux.shared;
|
||||
rb_str_update_shared_ary(obj, old_root, new_root);
|
||||
|
||||
// if, after move the string is not embedded, and can fit in the
|
||||
// slot it's been placed in, then re-embed it
|
||||
if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) {
|
||||
if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) {
|
||||
rb_str_make_embedded(obj);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case T_DATA:
|
||||
/* Call the compaction callback, if it exists */
|
||||
{
|
||||
|
@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self)
|
|||
VALUE h = rb_hash_new();
|
||||
VALUE considered = rb_hash_new();
|
||||
VALUE moved = rb_hash_new();
|
||||
VALUE moved_up = rb_hash_new();
|
||||
VALUE moved_down = rb_hash_new();
|
||||
|
||||
for (i=0; i<T_MASK; i++) {
|
||||
if (objspace->rcompactor.considered_count_table[i]) {
|
||||
|
@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self)
|
|||
if (objspace->rcompactor.moved_count_table[i]) {
|
||||
rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i]));
|
||||
}
|
||||
|
||||
if (objspace->rcompactor.moved_up_count_table[i]) {
|
||||
rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i]));
|
||||
}
|
||||
|
||||
if (objspace->rcompactor.moved_down_count_table[i]) {
|
||||
rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i]));
|
||||
}
|
||||
}
|
||||
|
||||
rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered);
|
||||
rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved);
|
||||
rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up);
|
||||
rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down);
|
||||
|
||||
return h;
|
||||
}
|
||||
|
|
|
@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str)
|
|||
return rb_long2int(RSTRING_LEN(str));
|
||||
}
|
||||
|
||||
bool
|
||||
rb_str_shared_root_p(VALUE str);
|
||||
|
||||
/**
|
||||
* Convenient macro to obtain the contents and length at once.
|
||||
*
|
||||
|
|
|
@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
|
|||
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
|
||||
VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
|
||||
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
|
||||
void rb_str_make_embedded(VALUE);
|
||||
size_t rb_str_size_as_embedded(VALUE);
|
||||
bool rb_str_reembeddable_p(VALUE);
|
||||
void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root);
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
MJIT_SYMBOL_EXPORT_BEGIN
|
||||
|
|
77
string.c
77
string.c
|
@ -221,17 +221,51 @@ str_embed_capa(VALUE str)
|
|||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
rb_str_reembeddable_p(VALUE str)
|
||||
{
|
||||
return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
str_embed_size(long capa)
|
||||
rb_str_embed_size(long capa)
|
||||
{
|
||||
return offsetof(struct RString, as.embed.ary) + capa;
|
||||
}
|
||||
|
||||
bool
|
||||
rb_str_shared_root_p(VALUE str)
|
||||
{
|
||||
return FL_TEST_RAW(str, STR_SHARED_ROOT);
|
||||
}
|
||||
|
||||
size_t
|
||||
rb_str_size_as_embedded(VALUE str)
|
||||
{
|
||||
size_t real_size;
|
||||
#if USE_RVARGC
|
||||
if (STR_EMBED_P(str)) {
|
||||
real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
|
||||
}
|
||||
/* if the string is not currently embedded, but it can be embedded, how
|
||||
* much space would it require */
|
||||
else if (rb_str_reembeddable_p(str)) {
|
||||
real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str);
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
real_size = sizeof(struct RString);
|
||||
#if USE_RVARGC
|
||||
}
|
||||
#endif
|
||||
return real_size;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
STR_EMBEDDABLE_P(long len, long termlen)
|
||||
{
|
||||
#if USE_RVARGC
|
||||
return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
|
||||
return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen));
|
||||
#else
|
||||
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
|
||||
#endif
|
||||
|
@ -264,6 +298,41 @@ rb_str_make_independent(VALUE str)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
rb_str_make_embedded(VALUE str) {
|
||||
RUBY_ASSERT(rb_str_reembeddable_p(str));
|
||||
RUBY_ASSERT(!STR_EMBED_P(str));
|
||||
|
||||
char *buf = RSTRING_PTR(str);
|
||||
long len = RSTRING_LEN(str);
|
||||
|
||||
STR_SET_EMBED(str);
|
||||
STR_SET_EMBED_LEN(str, len);
|
||||
|
||||
memmove(RSTRING_PTR(str), buf, len);
|
||||
ruby_xfree(buf);
|
||||
}
|
||||
|
||||
void
|
||||
rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root)
|
||||
{
|
||||
// if the root location hasn't changed, we don't need to update
|
||||
if (new_root == old_root) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if the root string isn't embedded, we don't need to touch the ponter.
|
||||
// it already points to the shame shared buffer
|
||||
if (!STR_EMBED_P(new_root)) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary);
|
||||
|
||||
RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary);
|
||||
RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset;
|
||||
}
|
||||
|
||||
void
|
||||
rb_debug_rstring_null_ptr(const char *func)
|
||||
{
|
||||
|
@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size)
|
|||
static inline VALUE
|
||||
str_alloc_embed(VALUE klass, size_t capa)
|
||||
{
|
||||
size_t size = str_embed_size(capa);
|
||||
size_t size = rb_str_embed_size(capa);
|
||||
assert(rb_gc_size_allocatable_p(size));
|
||||
#if !USE_RVARGC
|
||||
assert(size <= sizeof(struct RString));
|
||||
|
@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
|
|||
static inline VALUE
|
||||
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
|
||||
{
|
||||
size_t size = str_embed_size(capa);
|
||||
size_t size = rb_str_embed_size(capa);
|
||||
assert(rb_gc_size_allocatable_p(size));
|
||||
#if !USE_RVARGC
|
||||
assert(size <= sizeof(struct RString));
|
||||
|
|
|
@ -208,4 +208,43 @@ class TestGCCompact < Test::Unit::TestCase
|
|||
|
||||
assert_equal([:call, :line], results)
|
||||
end
|
||||
|
||||
def test_moving_strings_between_size_pools
|
||||
assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
|
||||
begin;
|
||||
moveables = []
|
||||
small_slots = []
|
||||
large_slots = []
|
||||
|
||||
# Ensure fragmentation in the large heap
|
||||
base_slot_size = GC.stat_heap[0].fetch(:slot_size)
|
||||
500.times {
|
||||
String.new(+"a" * base_slot_size).downcase
|
||||
large_slots << String.new(+"a" * base_slot_size).downcase
|
||||
}
|
||||
|
||||
# Ensure fragmentation in the smaller heap
|
||||
500.times {
|
||||
small_slots << Object.new
|
||||
Object.new
|
||||
}
|
||||
|
||||
500.times {
|
||||
# strings are created as shared strings when initialized from literals
|
||||
# use downcase to force the creation of an embedded string (it calls
|
||||
# rb_str_new internally)
|
||||
moveables << String.new(+"a" * base_slot_size).downcase
|
||||
|
||||
moveables << String.new("a").downcase
|
||||
}
|
||||
moveables.map { |s| s << ("bc" * base_slot_size) }
|
||||
moveables.map { |s| s.squeeze! }
|
||||
stats = GC.compact
|
||||
|
||||
moved_strings = (stats.dig(:moved_up, :T_STRING) || 0) +
|
||||
(stats.dig(:moved_down, :T_STRING) || 0)
|
||||
|
||||
assert_operator(moved_strings, :>, 0)
|
||||
end;
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue