From a5b6598192c30187b19b892af3110a46f6a70d76 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 26 Aug 2021 10:06:32 -0400 Subject: [PATCH] [Feature #18239] Implement VWA for strings This commit adds support for embedded strings with variable capacity and uses Variable Width Allocation to allocate strings. --- debug.c | 2 + ext/-test-/string/capacity.c | 9 +- ext/-test-/string/cstr.c | 10 +- gc.c | 147 ++++++----- gc.rb | 10 + include/ruby/internal/config.h | 4 + include/ruby/internal/core/rstring.h | 20 ++ internal/gc.h | 27 +-- misc/lldb_cruby.py | 3 +- ruby.c | 7 +- spec/ruby/optional/capi/string_spec.rb | 12 +- string.c | 323 +++++++++++++++++++------ test/-ext-/string/test_capacity.rb | 37 ++- test/-ext-/string/test_rb_str_dup.rb | 6 +- test/objspace/test_objspace.rb | 4 +- transcode.c | 4 + 16 files changed, 453 insertions(+), 172 deletions(-) diff --git a/debug.c b/debug.c index 52bd0f7fb7..a5e6ce475a 100644 --- a/debug.c +++ b/debug.c @@ -56,7 +56,9 @@ const union { enum ruby_robject_consts robject_consts; enum ruby_rmodule_flags rmodule_flags; enum ruby_rstring_flags rstring_flags; +#if !USE_RVARGC enum ruby_rstring_consts rstring_consts; +#endif enum ruby_rarray_flags rarray_flags; enum ruby_rarray_consts rarray_consts; enum { diff --git a/ext/-test-/string/capacity.c b/ext/-test-/string/capacity.c index cb8d2c2b3a..33b2023fd3 100644 --- a/ext/-test-/string/capacity.c +++ b/ext/-test-/string/capacity.c @@ -4,10 +4,11 @@ static VALUE bug_str_capacity(VALUE klass, VALUE str) { - return - STR_EMBED_P(str) ? INT2FIX(RSTRING_EMBED_LEN_MAX) : \ - STR_SHARED_P(str) ? INT2FIX(0) : \ - LONG2FIX(RSTRING(str)->as.heap.aux.capa); + if (!STR_EMBED_P(str) && STR_SHARED_P(str)) { + return INT2FIX(0); + } + + return LONG2FIX(rb_str_capacity(str)); } void diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c index 4f837998d7..1eadb8b4fd 100644 --- a/ext/-test-/string/cstr.c +++ b/ext/-test-/string/cstr.c @@ -62,9 +62,13 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen) if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len); str = rb_str_new_shared(str); if (STR_EMBED_P(str)) { +#if USE_RVARGC + RSTRING(str)->as.embed.len = (short)len; +#else RSTRING(str)->basic.flags &= ~RSTRING_EMBED_LEN_MASK; RSTRING(str)->basic.flags |= len << RSTRING_EMBED_LEN_SHIFT; - memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len); +#endif + memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len); } else { RSTRING(str)->as.heap.ptr += beg; @@ -112,7 +116,11 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) Check_Type(str, T_STRING); FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); +#if USE_RVARGC + RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6); +#else RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK; +#endif RSTRING(str2)->as.heap.aux.capa = capacity; RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->as.heap.len = RSTRING_LEN(str); diff --git a/gc.c b/gc.c index 04337e4440..0c739ba709 100644 --- a/gc.c +++ b/gc.c @@ -888,6 +888,7 @@ static const bool USE_MMAP_ALIGNED_ALLOC = false; #endif struct heap_page { + short slot_size; short total_slots; short free_slots; short pinned_slots; @@ -1849,7 +1850,7 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj if (RGENGC_CHECK_MODE && /* obj should belong to page */ !(&page->start[0] <= (RVALUE *)obj && - (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size)) && + (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->slot_size)) && obj % sizeof(RVALUE) == 0)) { rb_bug("heap_page_add_freeobj: %p is not rvalue.", (void *)p); } @@ -1938,7 +1939,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace) } struct heap_page *hipage = heap_pages_sorted[heap_allocated_pages - 1]; - uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->size_pool->slot_size); + uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->slot_size); GC_ASSERT(himem <= (uintptr_t)heap_pages_himem); heap_pages_himem = (RVALUE *)himem; @@ -2034,6 +2035,7 @@ heap_page_allocate(rb_objspace_t *objspace, rb_size_pool_t *size_pool) page->start = (RVALUE *)start; page->total_slots = limit; + page->slot_size = size_pool->slot_size; page->size_pool = size_pool; page_body->header.page = page; @@ -2091,7 +2093,6 @@ heap_add_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea { /* Adding to eden heap during incremental sweeping is forbidden */ GC_ASSERT(!(heap == SIZE_POOL_EDEN_HEAP(size_pool) && heap->sweeping_page)); - GC_ASSERT(page->size_pool == size_pool); page->flags.in_tomb = (heap == SIZE_POOL_TOMB_HEAP(size_pool)); list_add_tail(&heap->pages, &page->page_node); heap->total_pages++; @@ -2324,18 +2325,37 @@ static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page); static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap); static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page); -#if USE_RVARGC -void * -rb_gc_rvargc_object_data(VALUE obj) +size_t +rb_gc_obj_slot_size(VALUE obj) { - return (void *)(obj + sizeof(RVALUE)); + return GET_HEAP_PAGE(obj)->slot_size; } + +static inline size_t +size_pool_slot_size(char pool_id) +{ + GC_ASSERT(pool_id < SIZE_POOL_COUNT); + + size_t slot_size = (1 << pool_id) * sizeof(RVALUE); + +#if RGENGC_CHECK_MODE + rb_objspace_t *objspace = &rb_objspace; + GC_ASSERT(size_pools[pool_id].slot_size == slot_size); #endif + return slot_size; +} + +bool +rb_gc_size_allocatable_p(size_t size) +{ + return size <= size_pool_slot_size(SIZE_POOL_COUNT - 1); +} + static inline VALUE ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size) { - if (size != sizeof(RVALUE)) { + if (size > sizeof(RVALUE)) { return Qfalse; } @@ -2409,6 +2429,25 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3) } #if USE_RVARGC +static inline rb_size_pool_t * +size_pool_for_size(rb_objspace_t *objspace, size_t size) +{ + size_t slot_count = CEILDIV(size, sizeof(RVALUE)); + + /* size_pool_idx is ceil(log2(slot_count)) */ + size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); + if (size_pool_idx >= SIZE_POOL_COUNT) { + rb_bug("size_pool_for_size: allocation size too large"); + } + + rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; + GC_ASSERT(size_pool->slot_size >= (short)size); + GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size); + + return size_pool; +} + + static inline VALUE heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap) { @@ -2430,25 +2469,6 @@ heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t * return (VALUE)p; } - -static inline rb_size_pool_t * -size_pool_for_size(rb_objspace_t *objspace, size_t size) -{ - size_t slot_count = CEILDIV(size, sizeof(RVALUE)); - - /* size_pool_idx is ceil(log2(slot_count)) */ - size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); - GC_ASSERT(size_pool_idx > 0); - if (size_pool_idx >= SIZE_POOL_COUNT) { - rb_bug("size_pool_for_size: allocation size too large"); - } - - rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; - GC_ASSERT(size_pool->slot_size >= (short)size); - GC_ASSERT(size_pools[size_pool_idx - 1].slot_size < (short)size); - - return size_pool; -} #endif ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size)); @@ -2574,7 +2594,6 @@ VALUE rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, FALSE, size); } @@ -2582,7 +2601,6 @@ VALUE rb_wb_protected_newobj_of(VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of(klass, flags, 0, 0, 0, TRUE, size); } @@ -2590,7 +2608,6 @@ VALUE rb_ec_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size) { GC_ASSERT((flags & FL_WB_PROTECTED) == 0); - size = size + sizeof(RVALUE); return newobj_of_cr(rb_ec_ractor_ptr(ec), klass, flags, 0, 0, 0, TRUE, size); } @@ -2830,14 +2847,14 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr) mid = (lo + hi) / 2; page = heap_pages_sorted[mid]; if (page->start <= p) { - if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size))) { + if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->slot_size))) { RB_DEBUG_COUNTER_INC(gc_isptr_maybe); if (page->flags.in_tomb) { return FALSE; } else { - if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->size_pool->slot_size != 0) return FALSE; + if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->slot_size != 0) return FALSE; return TRUE; } @@ -4183,7 +4200,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace) /* run data/file object's finalizers */ for (i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short stride = page->size_pool->slot_size; + short stride = page->slot_size; uintptr_t p = (uintptr_t)page->start; uintptr_t pend = p + page->total_slots * stride; @@ -4780,13 +4797,13 @@ count_objects(int argc, VALUE *argv, VALUE os) for (i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short stride = page->size_pool->slot_size; + short stride = page->slot_size; uintptr_t p = (uintptr_t)page->start; uintptr_t pend = p + page->total_slots * stride; for (;p < pend; p += stride) { VALUE vp = (VALUE)p; - GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->size_pool->slot_size == 0); + GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->slot_size == 0); void *poisoned = asan_poisoned_object_p(vp); asan_unpoison_object(vp, false); @@ -4916,7 +4933,7 @@ try_move_in_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa from_freelist = true; } - gc_move(objspace, (VALUE)p, dest, page->size_pool->slot_size); + gc_move(objspace, (VALUE)p, dest, page->slot_size); gc_pin(objspace, (VALUE)p); heap->compact_cursor_index = (RVALUE *)p; if (from_freelist) { @@ -5216,7 +5233,7 @@ gc_fill_swept_page_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, struct heap_page * sweep_page = ctx->page; if (bitset) { - short slot_size = sweep_page->size_pool->slot_size; + short slot_size = sweep_page->slot_size; short slot_bits = slot_size / sizeof(RVALUE); do { @@ -5307,7 +5324,7 @@ static inline void gc_plane_sweep(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx) { struct heap_page * sweep_page = ctx->page; - short slot_size = sweep_page->size_pool->slot_size; + short slot_size = sweep_page->slot_size; short slot_bits = slot_size / sizeof(RVALUE); GC_ASSERT(slot_bits > 0); @@ -5385,7 +5402,6 @@ static inline void gc_page_sweep(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, struct gc_sweep_context *ctx) { struct heap_page *sweep_page = ctx->page; - GC_ASSERT(sweep_page->size_pool == size_pool); int i; @@ -5603,7 +5619,23 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool) size_t min_free_slots = (size_t)(total_slots * gc_params.heap_free_slots_min_ratio); if (swept_slots < min_free_slots) { - if (is_full_marking(objspace)) { + bool grow_heap = is_full_marking(objspace); + + if (!is_full_marking(objspace)) { + /* The heap is a growth heap if it freed more slots than had empty slots. */ + bool is_growth_heap = size_pool->empty_slots == 0 || + size_pool->freed_slots > size_pool->empty_slots; + + if (objspace->profile.count - objspace->rgengc.last_major_gc < RVALUE_OLD_AGE) { + grow_heap = TRUE; + } + else if (is_growth_heap) { /* Only growth heaps are allowed to start a major GC. */ + objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE; + size_pool->force_major_gc_count++; + } + } + + if (grow_heap) { size_t extend_page_count = heap_extend_pages(objspace, swept_slots, total_slots, total_pages); if (extend_page_count > size_pool->allocatable_pages) { @@ -5612,18 +5644,6 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool) heap_increment(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool)); } - else { - /* The heap is a growth heap if it freed more slots than had empty slots. */ - bool is_growth_heap = size_pool->empty_slots == 0 || - size_pool->freed_slots > size_pool->empty_slots; - - /* Only growth heaps are allowed to start a major GC. */ - if (is_growth_heap && - objspace->profile.count - objspace->rgengc.last_major_gc >= RVALUE_OLD_AGE) { - objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE; - size_pool->force_major_gc_count++; - } - } } } #endif @@ -5660,6 +5680,7 @@ gc_sweep_finish(rb_objspace_t *objspace) else { eden_heap->free_pages = eden_heap->pooled_pages; } + eden_heap->pooled_pages = NULL; objspace->rincgc.pooled_slots = 0; } #endif @@ -5701,8 +5722,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea #endif do { - GC_ASSERT(sweep_page->size_pool == size_pool); - RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page); struct gc_sweep_context ctx = { @@ -5831,7 +5850,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ bool from_freelist = FL_TEST_RAW(forwarding_object, FL_FROM_FREELIST); object = rb_gc_location(forwarding_object); - gc_move(objspace, object, forwarding_object, page->size_pool->slot_size); + gc_move(objspace, object, forwarding_object, page->slot_size); /* forwarding_object is now our actual object, and "object" * is the free slot for the original page */ struct heap_page *orig_page = GET_HEAP_PAGE(object); @@ -7654,7 +7673,7 @@ gc_verify_heap_page(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) int remembered_old_objects = 0; int free_objects = 0; int zombie_objects = 0; - int stride = page->size_pool->slot_size / sizeof(RVALUE); + int stride = page->slot_size / sizeof(RVALUE); for (i=0; itotal_slots; i+=stride) { VALUE val = (VALUE)&page->start[i]; @@ -7776,7 +7795,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) /* check relations */ for (size_t i = 0; i < heap_allocated_pages; i++) { struct heap_page *page = heap_pages_sorted[i]; - short slot_size = page->size_pool->slot_size; + short slot_size = page->slot_size; uintptr_t start = (uintptr_t)page->start; uintptr_t end = start + page->total_slots * slot_size; @@ -10019,7 +10038,19 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj) case T_STRING: if (STR_SHARED_P(obj)) { +#if USE_RVARGC + VALUE orig_shared = any->as.string.as.heap.aux.shared; +#endif UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared); +#if USE_RVARGC + VALUE shared = any->as.string.as.heap.aux.shared; + if (STR_EMBED_P(shared)) { + size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary; + GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary); + GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len); + any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset; + } +#endif } break; @@ -13561,6 +13592,8 @@ Init_GC(void) rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_SIZE")), SIZET2NUM(HEAP_PAGE_BITMAP_SIZE)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_PLANES")), SIZET2NUM(HEAP_PAGE_BITMAP_PLANES)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_SIZE")), SIZET2NUM(HEAP_PAGE_SIZE)); + rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(SIZE_POOL_COUNT)); + rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(size_pool_slot_size(SIZE_POOL_COUNT - 1))); OBJ_FREEZE(gc_constants); /* internal constants */ rb_define_const(rb_mGC, "INTERNAL_CONSTANTS", gc_constants); diff --git a/gc.rb b/gc.rb index c0459b0beb..e80d6635a7 100644 --- a/gc.rb +++ b/gc.rb @@ -256,6 +256,16 @@ module GC def self.verify_compaction_references(toward: nil, double_heap: false) Primitive.gc_verify_compaction_references(double_heap, toward == :empty) end + + # :nodoc: + # call-seq: + # GC.using_rvargc? -> true or false + # + # Returns true if using experimental feature Variable Width Allocation, false + # otherwise. + def self.using_rvargc? + GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] > 1 + end end module ObjectSpace diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h index b6134c6165..51f863fc29 100644 --- a/include/ruby/internal/config.h +++ b/include/ruby/internal/config.h @@ -146,4 +146,8 @@ # undef RBIMPL_TEST3 #endif /* HAVE_VA_ARGS_MACRO */ +#ifndef USE_RVARGC +# define USE_RVARGC 0 +#endif + #endif /* RBIMPL_CONFIG_H */ diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index f7d0539a0c..b7cf142156 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -42,9 +42,11 @@ /** @cond INTERNAL_MACRO */ #define RSTRING_NOEMBED RSTRING_NOEMBED +#if !USE_RVARGC #define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK #define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT #define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX +#endif #define RSTRING_FSTR RSTRING_FSTR #define RSTRING_EMBED_LEN RSTRING_EMBED_LEN #define RSTRING_LEN RSTRING_LEN @@ -160,6 +162,7 @@ enum ruby_rstring_flags { */ RSTRING_NOEMBED = RUBY_FL_USER1, +#if !USE_RVARGC /** * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these * bits are used to store the number of bytes actually filled into @@ -172,6 +175,7 @@ enum ruby_rstring_flags { */ RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 | RUBY_FL_USER5 | RUBY_FL_USER6, +#endif /* Actually, string encodings are also encoded into the flags, using * remaining bits.*/ @@ -198,6 +202,7 @@ enum ruby_rstring_flags { RSTRING_FSTR = RUBY_FL_USER17 }; +#if !USE_RVARGC /** * This is an enum because GDB wants it (rather than a macro). People need not * bother. @@ -209,6 +214,7 @@ enum ruby_rstring_consts { /** Max possible number of characters that can be embedded. */ RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1 }; +#endif /** * Ruby's String. A string in ruby conceptually has these information: @@ -278,7 +284,17 @@ struct RString { * here. Could be sufficiently large. In this case the length is * encoded into the flags. */ +#if USE_RVARGC + short len; + /* This is a length 1 array because: + * 1. GCC has a bug that does not optimize C flexible array members + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452) + * 2. Zero length arrays are not supported by all compilers + */ + char ary[1]; +#else char ary[RSTRING_EMBED_LEN_MAX + 1]; +#endif } embed; } as; }; @@ -407,9 +423,13 @@ RSTRING_EMBED_LEN(VALUE str) RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING); RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED)); +#if USE_RVARGC + short f = RSTRING(str)->as.embed.len; +#else VALUE f = RBASIC(str)->flags; f &= RSTRING_EMBED_LEN_MASK; f >>= RSTRING_EMBED_LEN_SHIFT; +#endif return RBIMPL_CAST((long)f); } diff --git a/internal/gc.h b/internal/gc.h index 233af1ce2c..49b12db2df 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -18,10 +18,6 @@ struct rb_execution_context_struct; /* in vm_core.h */ struct rb_objspace; /* in vm_core.h */ -#ifndef USE_RVARGC -#define USE_RVARGC 0 -#endif - #ifdef NEWOBJ_OF # undef NEWOBJ_OF # undef RB_NEWOBJ_OF @@ -30,22 +26,21 @@ struct rb_objspace; /* in vm_core.h */ #define RVALUE_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX])) -/* optimized version of NEWOBJ() */ -#define RB_NEWOBJ_OF(var, T, c, f) \ - T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ - rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \ - rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE)) - -#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) \ - T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ - rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \ - rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE)) - #define RB_RVARGC_NEWOBJ_OF(var, T, c, f, s) \ T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, s) : \ rb_wb_unprotected_newobj_of((c), (f), s)) +#define RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, s) \ + T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \ + rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, s) : \ + rb_wb_unprotected_newobj_of((c), (f), s)) + +/* optimized version of NEWOBJ() */ +#define RB_NEWOBJ_OF(var, T, c, f) RB_RVARGC_NEWOBJ_OF(var, T, c, f, RVALUE_SIZE) + +#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, RVALUE_SIZE) + #define NEWOBJ_OF(var, T, c, f) RB_NEWOBJ_OF((var), T, (c), (f)) #define RVARGC_NEWOBJ_OF(var, T, c, f, s) RB_RVARGC_NEWOBJ_OF((var), T, (c), (f), (s)) #define RB_OBJ_GC_FLAGS_MAX 6 /* used in ext/objspace */ @@ -102,6 +97,8 @@ static inline void *ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, si static inline void ruby_sized_xfree_inlined(void *ptr, size_t size); VALUE rb_class_allocate_instance(VALUE klass); void rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache); +size_t rb_gc_obj_slot_size(VALUE obj); +bool rb_gc_size_allocatable_p(size_t size); RUBY_SYMBOL_EXPORT_BEGIN /* gc.c (export) */ diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py index c046e7fbb1..b6ac231cee 100755 --- a/misc/lldb_cruby.py +++ b/misc/lldb_cruby.py @@ -190,6 +190,8 @@ def string2cstr(rstring): cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0) clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0) else: + # cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0) + # clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0) cptr = int(rstring.GetValueForExpressionPath(".as.ary").location, 0) clen = (flags & RSTRING_EMBED_LEN_MASK) >> RSTRING_EMBED_LEN_SHIFT return cptr, clen @@ -315,7 +317,6 @@ def lldb_inspect(debugger, target, result, val): else: len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned() ptr = val.GetValueForExpressionPath("->as.heap.ptr") - #print(val.GetValueForExpressionPath("->as.heap"), file=result) result.write("T_ARRAY: %slen=%d" % (flaginfo, len)) if flags & RUBY_FL_USER1: result.write(" (embed)") diff --git a/ruby.c b/ruby.c index ce0a96abd8..0c64d4e1ce 100644 --- a/ruby.c +++ b/ruby.c @@ -566,7 +566,12 @@ static VALUE runtime_libruby_path(void) { #if defined _WIN32 || defined __CYGWIN__ - DWORD len = RSTRING_EMBED_LEN_MAX, ret; + DWORD len, ret; +#if USE_RVARGC + len = 32; +#else + len = RSTRING_EMBED_LEN_MAX; +#endif VALUE path; VALUE wsopath = rb_str_new(0, len*sizeof(WCHAR)); WCHAR *wlibpath; diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb index 5575ade07b..3cd88a7390 100644 --- a/spec/ruby/optional/capi/string_spec.rb +++ b/spec/ruby/optional/capi/string_spec.rb @@ -108,7 +108,7 @@ describe "C-API String function" do it "returns a string with the given capacity" do buf = @s.rb_str_buf_new(256, nil) - @s.rb_str_capacity(buf).should == 256 + @s.rb_str_capacity(buf).should >= 256 end it "returns a string that can be appended to" do @@ -682,27 +682,27 @@ describe "C-API String function" do describe "rb_str_modify_expand" do it "grows the capacity to bytesize + expand, not changing the bytesize" do str = @s.rb_str_buf_new(256, "abcd") - @s.rb_str_capacity(str).should == 256 + @s.rb_str_capacity(str).should >= 256 @s.rb_str_set_len(str, 3) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 256 + @s.rb_str_capacity(str).should >= 256 @s.rb_str_modify_expand(str, 4) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 7 + @s.rb_str_capacity(str).should >= 7 @s.rb_str_modify_expand(str, 1024) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 1027 + @s.rb_str_capacity(str).should >= 1027 @s.rb_str_modify_expand(str, 1) str.bytesize.should == 3 @s.RSTRING_LEN(str).should == 3 - @s.rb_str_capacity(str).should == 4 + @s.rb_str_capacity(str).should >= 4 end it "raises an error if the string is frozen" do diff --git a/string.c b/string.c index 48c92072d8..b815b12c54 100644 --- a/string.c +++ b/string.c @@ -106,14 +106,26 @@ VALUE rb_cSymbol; #define STR_SET_NOEMBED(str) do {\ FL_SET((str), STR_NOEMBED);\ - STR_SET_EMBED_LEN((str), 0);\ + if (USE_RVARGC) {\ + FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\ + }\ + else {\ + STR_SET_EMBED_LEN((str), 0);\ + }\ } while (0) #define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE)) -#define STR_SET_EMBED_LEN(str, n) do { \ +#if USE_RVARGC +# define STR_SET_EMBED_LEN(str, n) do { \ + assert(str_embed_capa(str) > (n));\ + RSTRING(str)->as.embed.len = (n);\ +} while (0) +#else +# define STR_SET_EMBED_LEN(str, n) do { \ long tmp_n = (n);\ RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\ RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\ } while (0) +#endif #define STR_SET_LEN(str, n) do { \ if (STR_EMBED_P(str)) {\ @@ -150,7 +162,7 @@ VALUE rb_cSymbol; } while (0) #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\ if (STR_EMBED_P(str)) {\ - if (!STR_EMBEDDABLE_P(capacity, termlen)) {\ + if (str_embed_capa(str) < capacity + termlen) {\ char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\ const long tlen = RSTRING_LEN(str);\ memcpy(tmp, RSTRING_PTR(str), tlen);\ @@ -170,6 +182,8 @@ VALUE rb_cSymbol; #define STR_SET_SHARED(str, shared_str) do { \ if (!FL_TEST(str, STR_FAKESTR)) { \ + assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \ + assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \ RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ FL_SET((shared_str), STR_SHARED_ROOT); \ @@ -193,8 +207,32 @@ VALUE rb_cSymbol; #define SHARABLE_SUBSTRING_P(beg, len, end) 1 #endif -#define STR_EMBEDDABLE_P(len, termlen) \ - ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen)) + +static inline long +str_embed_capa(VALUE str) +{ +#if USE_RVARGC + return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary); +#else + return RSTRING_EMBED_LEN_MAX + 1; +#endif +} + +static inline size_t +str_embed_size(long capa) +{ + return offsetof(struct RString, as.embed.ary) + capa; +} + +static inline bool +STR_EMBEDDABLE_P(long len, long termlen) +{ +#if USE_RVARGC + return rb_gc_size_allocatable_p(str_embed_size(len + termlen)); +#else + return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen; +#endif +} static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str); static VALUE str_new_frozen(VALUE klass, VALUE orig); @@ -768,7 +806,11 @@ static size_t str_capacity(VALUE str, const int termlen) { if (STR_EMBED_P(str)) { +#if USE_RVARGC + return str_embed_capa(str) - termlen; +#else return (RSTRING_EMBED_LEN_MAX + 1 - termlen); +#endif } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { return RSTRING(str)->as.heap.len; @@ -793,17 +835,36 @@ must_not_null(const char *ptr) } static inline VALUE -str_alloc(VALUE klass) +str_alloc(VALUE klass, size_t size) { - NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); + assert(size > 0); + RVARGC_NEWOBJ_OF(str, struct RString, klass, + T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size); return (VALUE)str; } +static inline VALUE +str_alloc_embed(VALUE klass, size_t capa) +{ + size_t size = str_embed_size(capa); + assert(rb_gc_size_allocatable_p(size)); +#if !USE_RVARGC + assert(size <= sizeof(struct RString)); +#endif + return str_alloc(klass, size); +} + +static inline VALUE +str_alloc_heap(VALUE klass) +{ + return str_alloc(klass, sizeof(struct RString)); +} + static inline VALUE empty_str_alloc(VALUE klass) { RUBY_DTRACE_CREATE_HOOK(STRING, 0); - return str_alloc(klass); + return str_alloc_embed(klass, 0); } static VALUE @@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); - if (!STR_EMBEDDABLE_P(len, termlen)) { + if (STR_EMBEDDABLE_P(len, termlen)) { + str = str_alloc_embed(klass, len + termlen); + if (len == 0) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + } + else { + str = str_alloc_heap(klass); RSTRING(str)->as.heap.aux.capa = len; /* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never * integer overflow. If we can STATIC_ASSERT that, the following @@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen) rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen); STR_SET_NOEMBED(str); } - else if (len == 0) { - ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); - } if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } @@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex) } else { RUBY_DTRACE_CREATE_HOOK(STRING, len); - str = str_alloc(klass); + str = str_alloc_heap(klass); RSTRING(str)->as.heap.len = len; RSTRING(str)->as.heap.ptr = (char *)ptr; RSTRING(str)->as.heap.aux.capa = len; @@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) long len; RSTRING_GETMEM(str, ptr, len); - if (STR_EMBEDDABLE_P(len, termlen)) { - char *ptr2 = RSTRING(str2)->as.embed.ary; + if (str_embed_capa(str2) >= len + termlen) { + char *ptr2 = RSTRING(str2)->as.embed.ary; STR_SET_EMBED(str2); memcpy(ptr2, RSTRING_PTR(str), len); STR_SET_EMBED_LEN(str2, len); @@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str) root = rb_str_new_frozen(str); RSTRING_GETMEM(root, ptr, len); } + assert(OBJ_FROZEN(root)); if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) { if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) { rb_fatal("about to free a possible shared root"); @@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str) static VALUE str_new_shared(VALUE klass, VALUE str) { - return str_replace_shared(str_alloc(klass), str); + return str_replace_shared(str_alloc_heap(klass), str); } VALUE @@ -1335,26 +1400,54 @@ str_new_frozen(VALUE klass, VALUE orig) return str_new_frozen_buffer(klass, orig, TRUE); } +static VALUE +heap_str_make_shared(VALUE klass, VALUE orig) +{ + assert(!STR_EMBED_P(orig)); + assert(!STR_SHARED_P(orig)); + + VALUE str = str_alloc_heap(klass); + STR_SET_NOEMBED(str); + RSTRING(str)->as.heap.len = RSTRING_LEN(orig); + RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); + RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; + RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; + RBASIC(orig)->flags &= ~STR_NOFREE; + STR_SET_SHARED(orig, str); + if (klass == 0) + FL_UNSET_RAW(str, STR_BORROWED); + return str; +} + static VALUE str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) { VALUE str; - if (STR_EMBED_P(orig)) { - str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig)); + long len = RSTRING_LEN(orig); + + if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, 1)) { + str = str_new(klass, RSTRING_PTR(orig), len); + assert(STR_EMBED_P(str)); } else { if (FL_TEST_RAW(orig, STR_SHARED)) { VALUE shared = RSTRING(orig)->as.heap.aux.shared; - long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr; - long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len; + long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared); + long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len; + assert(ofs >= 0); + assert(rest >= 0); + assert(ofs + rest <= RSTRING_LEN(shared)); +#if !USE_RVARGC assert(!STR_EMBED_P(shared)); +#endif assert(OBJ_FROZEN(shared)); if ((ofs > 0) || (rest > 0) || (klass != RBASIC(shared)->klass) || ENCODING_GET(shared) != ENCODING_GET(orig)) { str = str_new_shared(klass, shared); + assert(!STR_EMBED_P(str)); RSTRING(str)->as.heap.ptr += ofs; RSTRING(str)->as.heap.len -= ofs + rest; } @@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding) return shared; } } - else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) { - str = str_alloc(klass); + else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) { + str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig)); STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig)); STR_SET_EMBED_LEN(str, RSTRING_LEN(orig)); TERM_FILL(RSTRING_END(str), TERM_LEN(orig)); } else { - str = str_alloc(klass); - STR_SET_NOEMBED(str); - RSTRING(str)->as.heap.len = RSTRING_LEN(orig); - RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig); - RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa; - RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE; - RBASIC(orig)->flags &= ~STR_NOFREE; - STR_SET_SHARED(orig, str); - if (klass == 0) - FL_UNSET_RAW(str, STR_BORROWED); + str = heap_str_make_shared(klass, orig); } } @@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str) } #define STR_BUF_MIN_SIZE 63 +#if !USE_RVARGC STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX); +#endif VALUE rb_str_buf_new(long capa) { - VALUE str = str_alloc(rb_cString); + if (STR_EMBEDDABLE_P(capa, 1)) { + return str_alloc_embed(rb_cString, capa + 1); + } - if (capa <= RSTRING_EMBED_LEN_MAX) return str; + VALUE str = str_alloc_heap(rb_cString); + +#if !USE_RVARGC if (capa < STR_BUF_MIN_SIZE) { capa = STR_BUF_MIN_SIZE; } +#endif FL_SET(str, STR_NOEMBED); RSTRING(str)->as.heap.aux.capa = capa; RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1); @@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2) str_discard(str); termlen = rb_enc_mbminlen(enc); - if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) { + if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) { STR_SET_EMBED(str); memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen); STR_SET_EMBED_LEN(str, RSTRING_LEN(str2)); @@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2) ENC_CODERANGE_SET(str, cr); } else { +#if USE_RVARGC + if (STR_EMBED_P(str2)) { + assert(!FL_TEST(str2, STR_SHARED)); + long len = RSTRING(str2)->as.embed.len; + assert(len + termlen <= str_embed_capa(str2)); + + char *new_ptr = ALLOC_N(char, len + termlen); + memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen); + RSTRING(str2)->as.heap.ptr = new_ptr; + RSTRING(str2)->as.heap.len = len; + RSTRING(str2)->as.heap.aux.capa = len; + STR_SET_NOEMBED(str2); + } +#endif + STR_SET_NOEMBED(str); FL_UNSET(str, STR_SHARED); RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); @@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2) } static inline VALUE -ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass) +ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size) { - RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0)); + assert(size > 0); + RB_RVARGC_EC_NEWOBJ_OF(ec, str, struct RString, klass, + T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size); return (VALUE)str; } +static inline VALUE +ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa) +{ + size_t size = str_embed_size(capa); + assert(rb_gc_size_allocatable_p(size)); +#if !USE_RVARGC + assert(size <= sizeof(struct RString)); +#endif + return ec_str_alloc(ec, klass, size); +} + +static inline VALUE +ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass) +{ + return ec_str_alloc(ec, klass, sizeof(struct RString)); +} + static inline VALUE str_duplicate_setup(VALUE klass, VALUE str, VALUE dup) { - enum {embed_size = RSTRING_EMBED_LEN_MAX + 1}; const VALUE flag_mask = +#if !USE_RVARGC RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK | - ENC_CODERANGE_MASK | ENCODING_MASK | +#endif + ENC_CODERANGE_MASK | ENCODING_MASK | FL_FREEZE ; VALUE flags = FL_TEST_RAW(str, flag_mask); int encidx = 0; - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - if (flags & STR_NOEMBED) { + if (STR_EMBED_P(str)) { + assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str)); + STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str)); + MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, + char, RSTRING_EMBED_LEN(str)); + flags &= ~RSTRING_NOEMBED; + } + else { + VALUE root = str; if (FL_TEST_RAW(str, STR_SHARED)) { - str = RSTRING(str)->as.heap.aux.shared; + root = RSTRING(str)->as.heap.aux.shared; } else if (UNLIKELY(!(flags & FL_FREEZE))) { - str = str_new_frozen(klass, str); + root = str = str_new_frozen(klass, str); flags = FL_TEST_RAW(str, flag_mask); - } - if (flags & STR_NOEMBED) { - RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str); - flags |= STR_SHARED; - } - else { - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, embed_size); - } + } + assert(!STR_SHARED_P(root)); + assert(RB_OBJ_FROZEN_RAW(root)); +#if USE_RVARGC + if (1) { +#else + if (STR_EMBED_P(root)) { + MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(root)->as.embed.ary, + char, RSTRING_EMBED_LEN_MAX + 1); + } + else { +#endif + RSTRING(dup)->as.heap.len = RSTRING_LEN(str); + RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str); + RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root); + flags |= RSTRING_NOEMBED | STR_SHARED; + } } + if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<as.embed.len + 1 <= str_embed_capa(str)); + memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1); +#else memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1); +#endif RSTRING(str)->as.heap.ptr = new_ptr; } else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) { @@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times) return str_duplicate(rb_cString, str); } if (times == INT2FIX(0)) { - str2 = str_alloc(rb_cString); + str2 = str_alloc_embed(rb_cString, 0); rb_enc_copy(str2, str); return str2; } @@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times) rb_raise(rb_eArgError, "negative argument"); } if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { - str2 = str_alloc(rb_cString); - if (!STR_EMBEDDABLE_P(len, 1)) { + if (STR_EMBEDDABLE_P(len, 1)) { + str2 = str_alloc_embed(rb_cString, len + 1); + } + else { + str2 = str_alloc_heap(rb_cString); RSTRING(str2)->as.heap.aux.capa = len; RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1); STR_SET_NOEMBED(str2); @@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen) if (len > capa) len = capa; - if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) { + if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) { ptr = RSTRING(str)->as.heap.ptr; STR_SET_EMBED(str); - memcpy(RSTRING(str)->as.embed.ary, ptr, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + memcpy(RSTRING(str)->as.embed.ary, ptr, len); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); return; } @@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len) } else { str2 = rb_str_new(RSTRING_PTR(str)+beg, len); - RB_GC_GUARD(str); + RB_GC_GUARD(str); } rb_enc_cr_str_copy_for_substr(str2, str); @@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len) const int termlen = TERM_LEN(str); if (STR_EMBED_P(str)) { if (len == slen) return str; - if (STR_EMBEDDABLE_P(len, termlen)) { + if (str_embed_capa(str) >= len + termlen) { STR_SET_EMBED_LEN(str, len); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); return str; } str_make_independent_expand(str, slen, len - slen, termlen); } - else if (STR_EMBEDDABLE_P(len, termlen)) { + else if (str_embed_capa(str) >= len + termlen) { char *ptr = STR_HEAP_PTR(str); STR_SET_EMBED(str); if (slen > len) slen = len; - if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); - TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); + if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen); + TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen); STR_SET_EMBED_LEN(str, len); if (independent) ruby_xfree(ptr); return str; @@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len) long capa, total, olen, off = -1; char *sptr; const int termlen = TERM_LEN(str); +#if !USE_RVARGC assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */ +#endif RSTRING_GETMEM(str, sptr, olen); if (ptr >= sptr && ptr <= sptr + olen) { @@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len) rb_str_modify(str); if (len == 0) return 0; if (STR_EMBED_P(str)) { - capa = RSTRING_EMBED_LEN_MAX + 1 - termlen; - sptr = RSTRING(str)->as.embed.ary; + capa = str_embed_capa(str) - termlen; + sptr = RSTRING(str)->as.embed.ary; olen = RSTRING_EMBED_LEN(str); } else { @@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len) str_modifiable(str); if (len > olen) len = olen; nlen = olen - len; - if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) { + if (str_embed_capa(str) >= nlen + TERM_LEN(str)) { char *oldptr = ptr; int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE)); STR_SET_EMBED(str); STR_SET_EMBED_LEN(str, nlen); - ptr = RSTRING(str)->as.embed.ary; + ptr = RSTRING(str)->as.embed.ary; memmove(ptr, oldptr + len, nlen); if (fl == STR_NOEMBED) xfree(oldptr); } else { - if (!STR_SHARED_P(str)) rb_str_new_frozen(str); + if (!STR_SHARED_P(str)) { + VALUE shared = heap_str_make_shared(rb_obj_class(str), str); + rb_enc_cr_str_exact_copy(shared, str); + OBJ_FREEZE(shared); + } ptr = RSTRING(str)->as.heap.ptr += len; RSTRING(str)->as.heap.len = nlen; } @@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc) static VALUE rb_str_b(VALUE str) { - VALUE str2 = str_alloc(rb_cString); + VALUE str2; + if (FL_TEST(str, STR_NOEMBED)) { + str2 = str_alloc_heap(rb_cString); + } + else { + str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str)); + } str_replace_shared_without_enc(str2, str); ENC_CODERANGE_CLEAR(str2); return str2; diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb index df59e76778..583c98fca4 100644 --- a/test/-ext-/string/test_capacity.rb +++ b/test/-ext-/string/test_capacity.rb @@ -4,13 +4,10 @@ require '-test-/string' require 'rbconfig/sizeof' class Test_StringCapacity < Test::Unit::TestCase - def capa(str) - Bug::String.capacity(str) - end - def test_capacity_embedded - size = RbConfig::SIZEOF['void*'] * 3 - 1 - assert_equal size, capa('foo') + assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - embed_header_size - 1, capa('foo') + assert_equal max_embed_len, capa('1' * max_embed_len) + assert_equal max_embed_len, capa('1' * (max_embed_len - 1)) end def test_capacity_shared @@ -18,7 +15,8 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_capacity_normal - assert_equal 128, capa('1'*128) + assert_equal max_embed_len + 1, capa('1' * (max_embed_len + 1)) + assert_equal max_embed_len + 100, capa('1' * (max_embed_len + 100)) end def test_s_new_capacity @@ -39,7 +37,10 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_literal_capacity - s = "I am testing string literal capacity" + s = eval(%{ + # frozen_string_literal: true + "#{"a" * (max_embed_len + 1)}" + }) assert_equal(s.length, capa(s)) end @@ -51,9 +52,27 @@ class Test_StringCapacity < Test::Unit::TestCase end def test_capacity_fstring - s = String.new("I am testing", capacity: 1000) + s = String.new("a" * max_embed_len, capacity: 1000) s << "fstring capacity" s = -s assert_equal(s.length, capa(s)) end + + private + + def capa(str) + Bug::String.capacity(str) + end + + def embed_header_size + if GC.using_rvargc? + 2 * RbConfig::SIZEOF['void*'] + RbConfig::SIZEOF['short'] + else + 2 * RbConfig::SIZEOF['void*'] + end + end + + def max_embed_len + GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] - embed_header_size - 1 + end end diff --git a/test/-ext-/string/test_rb_str_dup.rb b/test/-ext-/string/test_rb_str_dup.rb index 49b6af9598..c76a90252f 100644 --- a/test/-ext-/string/test_rb_str_dup.rb +++ b/test/-ext-/string/test_rb_str_dup.rb @@ -3,13 +3,15 @@ require '-test-/string' class Test_RbStrDup < Test::Unit::TestCase def test_nested_shared_non_frozen - str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50)) + orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] + str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str)) assert_send([Bug::String, :shared_string?, str]) assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]') end def test_nested_shared_frozen - str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50).freeze) + orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] + str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str).freeze) assert_send([Bug::String, :shared_string?, str]) assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]') end diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 8ed04f59c9..a1954d56a9 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -29,12 +29,12 @@ class TestObjSpace < Test::Unit::TestCase end def test_memsize_of_root_shared_string - a = "hello" * 5 + a = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] b = a.dup c = nil ObjectSpace.each_object(String) {|x| break c = x if x == a and x.frozen?} rv_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - assert_equal([rv_size, rv_size, 26 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)}) + assert_equal([rv_size, rv_size, a.length + 1 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)}) end def test_argf_memsize diff --git a/transcode.c b/transcode.c index 0681288346..d7011443f8 100644 --- a/transcode.c +++ b/transcode.c @@ -3769,7 +3769,11 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self) rb_str_modify(output); if (NIL_P(output_bytesize_v)) { +#if USE_RVARGC + output_bytesize = rb_str_capacity(output); +#else output_bytesize = RSTRING_EMBED_LEN_MAX; +#endif if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input)) output_bytesize = RSTRING_LEN(input); }