ruby--ruby/id_table.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

327 lines
7.4 KiB
C
Raw Normal View History

/* This file is included by symbol.c */
#include "id_table.h"
#ifndef ID_TABLE_DEBUG
#define ID_TABLE_DEBUG 0
#endif
#if ID_TABLE_DEBUG == 0
#undef NDEBUG
#define NDEBUG
#endif
#include "ruby_assert.h"
typedef rb_id_serial_t id_key_t;
static inline ID
key2id(id_key_t key)
{
return rb_id_serial_to_id(key);
}
static inline id_key_t
id2key(ID id)
{
return rb_id_to_serial(id);
}
/* simple open addressing with quadratic probing.
uses mark-bit on collisions - need extra 1 bit,
ID is strictly 3 bits larger than rb_id_serial_t */
typedef struct rb_id_item {
id_key_t key;
#if SIZEOF_VALUE == 8
int collision;
#endif
VALUE val;
} item_t;
struct rb_id_table {
int capa;
int num;
int used;
item_t *items;
};
#if SIZEOF_VALUE == 8
#define ITEM_GET_KEY(tbl, i) ((tbl)->items[i].key)
#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items[i].key)
#define ITEM_COLLIDED(tbl, i) ((tbl)->items[i].collision)
#define ITEM_SET_COLLIDED(tbl, i) ((tbl)->items[i].collision = 1)
static inline void
ITEM_SET_KEY(struct rb_id_table *tbl, int i, id_key_t key)
{
tbl->items[i].key = key;
}
#else
#define ITEM_GET_KEY(tbl, i) ((tbl)->items[i].key >> 1)
#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items[i].key > 1)
#define ITEM_COLLIDED(tbl, i) ((tbl)->items[i].key & 1)
#define ITEM_SET_COLLIDED(tbl, i) ((tbl)->items[i].key |= 1)
static inline void
ITEM_SET_KEY(struct rb_id_table *tbl, int i, id_key_t key)
{
tbl->items[i].key = (key << 1) | ITEM_COLLIDED(tbl, i);
}
#endif
static inline int
round_capa(int capa)
{
/* minsize is 4 */
capa >>= 2;
capa |= capa >> 1;
capa |= capa >> 2;
capa |= capa >> 4;
capa |= capa >> 8;
capa |= capa >> 16;
return (capa + 1) << 2;
}
static struct rb_id_table *
rb_id_table_init(struct rb_id_table *tbl, int capa)
{
MEMZERO(tbl, struct rb_id_table, 1);
if (capa > 0) {
capa = round_capa(capa);
tbl->capa = (int)capa;
tbl->items = ZALLOC_N(item_t, capa);
}
return tbl;
}
Add a cache for class variables Redo of 34a2acdac788602c14bf05fb616215187badd504 and 931138b00696419945dc03e10f033b1f53cd50f3 which were reverted. GitHub PR #4340. This change implements a cache for class variables. Previously there was no cache for cvars. Cvar access is slow due to needing to travel all the way up th ancestor tree before returning the cvar value. The deeper the ancestor tree the slower cvar access will be. The benefits of the cache are more visible with a higher number of included modules due to the way Ruby looks up class variables. The benchmark here includes 26 modules and shows with the cache, this branch is 6.5x faster when accessing class variables. ``` compare-ruby: ruby 3.1.0dev (2021-03-15T06:22:34Z master 9e5105c) [x86_64-darwin19] built-ruby: ruby 3.1.0dev (2021-03-15T12:12:44Z add-cache-for-clas.. c6be009) [x86_64-darwin19] | |compare-ruby|built-ruby| |:--------|-----------:|---------:| |vm_cvar | 5.681M| 36.980M| | | -| 6.51x| ``` Benchmark.ips calling `ActiveRecord::Base.logger` from within a Rails application. ActiveRecord::Base.logger has 71 ancestors. The more ancestors a tree has, the more clear the speed increase. IE if Base had only one ancestor we'd see no improvement. This benchmark is run on a vanilla Rails application. Benchmark code: ```ruby require "benchmark/ips" require_relative "config/environment" Benchmark.ips do |x| x.report "logger" do ActiveRecord::Base.logger end end ``` Ruby 3.0 master / Rails 6.1: ``` Warming up -------------------------------------- logger 155.251k i/100ms Calculating ------------------------------------- ``` Ruby 3.0 with cvar cache / Rails 6.1: ``` Warming up -------------------------------------- logger 1.546M i/100ms Calculating ------------------------------------- logger 14.857M (± 4.8%) i/s - 74.198M in 5.006202s ``` Lastly we ran a benchmark to demonstate the difference between master and our cache when the number of modules increases. This benchmark measures 1 ancestor, 30 ancestors, and 100 ancestors. Ruby 3.0 master: ``` Warming up -------------------------------------- 1 module 1.231M i/100ms 30 modules 432.020k i/100ms 100 modules 145.399k i/100ms Calculating ------------------------------------- 1 module 12.210M (± 2.1%) i/s - 61.553M in 5.043400s 30 modules 4.354M (± 2.7%) i/s - 22.033M in 5.063839s 100 modules 1.434M (± 2.9%) i/s - 7.270M in 5.072531s Comparison: 1 module: 12209958.3 i/s 30 modules: 4354217.8 i/s - 2.80x (± 0.00) slower 100 modules: 1434447.3 i/s - 8.51x (± 0.00) slower ``` Ruby 3.0 with cvar cache: ``` Warming up -------------------------------------- 1 module 1.641M i/100ms 30 modules 1.655M i/100ms 100 modules 1.620M i/100ms Calculating ------------------------------------- 1 module 16.279M (± 3.8%) i/s - 82.038M in 5.046923s 30 modules 15.891M (± 3.9%) i/s - 79.459M in 5.007958s 100 modules 16.087M (± 3.6%) i/s - 81.005M in 5.041931s Comparison: 1 module: 16279458.0 i/s 100 modules: 16087484.6 i/s - same-ish: difference falls within error 30 modules: 15891406.2 i/s - same-ish: difference falls within error ``` Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2021-06-01 17:34:06 +00:00
MJIT_FUNC_EXPORTED struct rb_id_table *
rb_id_table_create(size_t capa)
{
struct rb_id_table *tbl = ALLOC(struct rb_id_table);
return rb_id_table_init(tbl, (int)capa);
}
void
rb_id_table_free(struct rb_id_table *tbl)
{
xfree(tbl->items);
xfree(tbl);
}
void
rb_id_table_clear(struct rb_id_table *tbl)
{
tbl->num = 0;
tbl->used = 0;
MEMZERO(tbl->items, item_t, tbl->capa);
}
size_t
rb_id_table_size(const struct rb_id_table *tbl)
{
return (size_t)tbl->num;
}
size_t
rb_id_table_memsize(const struct rb_id_table *tbl)
{
return sizeof(item_t) * tbl->capa + sizeof(struct rb_id_table);
}
static int
hash_table_index(struct rb_id_table* tbl, id_key_t key)
{
if (tbl->capa > 0) {
int mask = tbl->capa - 1;
int ix = key & mask;
int d = 1;
while (key != ITEM_GET_KEY(tbl, ix)) {
if (!ITEM_COLLIDED(tbl, ix))
return -1;
ix = (ix + d) & mask;
d++;
}
return ix;
}
return -1;
}
static void
hash_table_raw_insert(struct rb_id_table *tbl, id_key_t key, VALUE val)
{
int mask = tbl->capa - 1;
int ix = key & mask;
int d = 1;
assert(key != 0);
while (ITEM_KEY_ISSET(tbl, ix)) {
ITEM_SET_COLLIDED(tbl, ix);
ix = (ix + d) & mask;
d++;
}
tbl->num++;
if (!ITEM_COLLIDED(tbl, ix)) {
tbl->used++;
}
ITEM_SET_KEY(tbl, ix, key);
tbl->items[ix].val = val;
}
static int
hash_delete_index(struct rb_id_table *tbl, int ix)
{
if (ix >= 0) {
if (!ITEM_COLLIDED(tbl, ix)) {
tbl->used--;
}
tbl->num--;
ITEM_SET_KEY(tbl, ix, 0);
tbl->items[ix].val = 0;
return TRUE;
}
else {
return FALSE;
}
}
static void
hash_table_extend(struct rb_id_table* tbl)
{
if (tbl->used + (tbl->used >> 1) >= tbl->capa) {
int new_cap = round_capa(tbl->num + (tbl->num >> 1));
int i;
item_t* old;
struct rb_id_table tmp_tbl = {0, 0, 0};
if (new_cap < tbl->capa) {
new_cap = round_capa(tbl->used + (tbl->used >> 1));
}
tmp_tbl.capa = new_cap;
tmp_tbl.items = ZALLOC_N(item_t, new_cap);
for (i = 0; i < tbl->capa; i++) {
id_key_t key = ITEM_GET_KEY(tbl, i);
if (key != 0) {
hash_table_raw_insert(&tmp_tbl, key, tbl->items[i].val);
}
}
old = tbl->items;
*tbl = tmp_tbl;
xfree(old);
}
}
#if ID_TABLE_DEBUG && 0
static void
hash_table_show(struct rb_id_table *tbl)
{
const id_key_t *keys = tbl->keys;
const int capa = tbl->capa;
int i;
fprintf(stderr, "tbl: %p (capa: %d, num: %d, used: %d)\n", tbl, tbl->capa, tbl->num, tbl->used);
for (i=0; i<capa; i++) {
if (ITEM_KEY_ISSET(tbl, i)) {
fprintf(stderr, " -> [%d] %s %d\n", i, rb_id2name(key2id(keys[i])), (int)keys[i]);
}
}
}
#endif
Add a cache for class variables Redo of 34a2acdac788602c14bf05fb616215187badd504 and 931138b00696419945dc03e10f033b1f53cd50f3 which were reverted. GitHub PR #4340. This change implements a cache for class variables. Previously there was no cache for cvars. Cvar access is slow due to needing to travel all the way up th ancestor tree before returning the cvar value. The deeper the ancestor tree the slower cvar access will be. The benefits of the cache are more visible with a higher number of included modules due to the way Ruby looks up class variables. The benchmark here includes 26 modules and shows with the cache, this branch is 6.5x faster when accessing class variables. ``` compare-ruby: ruby 3.1.0dev (2021-03-15T06:22:34Z master 9e5105c) [x86_64-darwin19] built-ruby: ruby 3.1.0dev (2021-03-15T12:12:44Z add-cache-for-clas.. c6be009) [x86_64-darwin19] | |compare-ruby|built-ruby| |:--------|-----------:|---------:| |vm_cvar | 5.681M| 36.980M| | | -| 6.51x| ``` Benchmark.ips calling `ActiveRecord::Base.logger` from within a Rails application. ActiveRecord::Base.logger has 71 ancestors. The more ancestors a tree has, the more clear the speed increase. IE if Base had only one ancestor we'd see no improvement. This benchmark is run on a vanilla Rails application. Benchmark code: ```ruby require "benchmark/ips" require_relative "config/environment" Benchmark.ips do |x| x.report "logger" do ActiveRecord::Base.logger end end ``` Ruby 3.0 master / Rails 6.1: ``` Warming up -------------------------------------- logger 155.251k i/100ms Calculating ------------------------------------- ``` Ruby 3.0 with cvar cache / Rails 6.1: ``` Warming up -------------------------------------- logger 1.546M i/100ms Calculating ------------------------------------- logger 14.857M (± 4.8%) i/s - 74.198M in 5.006202s ``` Lastly we ran a benchmark to demonstate the difference between master and our cache when the number of modules increases. This benchmark measures 1 ancestor, 30 ancestors, and 100 ancestors. Ruby 3.0 master: ``` Warming up -------------------------------------- 1 module 1.231M i/100ms 30 modules 432.020k i/100ms 100 modules 145.399k i/100ms Calculating ------------------------------------- 1 module 12.210M (± 2.1%) i/s - 61.553M in 5.043400s 30 modules 4.354M (± 2.7%) i/s - 22.033M in 5.063839s 100 modules 1.434M (± 2.9%) i/s - 7.270M in 5.072531s Comparison: 1 module: 12209958.3 i/s 30 modules: 4354217.8 i/s - 2.80x (± 0.00) slower 100 modules: 1434447.3 i/s - 8.51x (± 0.00) slower ``` Ruby 3.0 with cvar cache: ``` Warming up -------------------------------------- 1 module 1.641M i/100ms 30 modules 1.655M i/100ms 100 modules 1.620M i/100ms Calculating ------------------------------------- 1 module 16.279M (± 3.8%) i/s - 82.038M in 5.046923s 30 modules 15.891M (± 3.9%) i/s - 79.459M in 5.007958s 100 modules 16.087M (± 3.6%) i/s - 81.005M in 5.041931s Comparison: 1 module: 16279458.0 i/s 100 modules: 16087484.6 i/s - same-ish: difference falls within error 30 modules: 15891406.2 i/s - same-ish: difference falls within error ``` Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2021-06-01 17:34:06 +00:00
MJIT_FUNC_EXPORTED int
rb_id_table_lookup(struct rb_id_table *tbl, ID id, VALUE *valp)
{
id_key_t key = id2key(id);
int index = hash_table_index(tbl, key);
if (index >= 0) {
*valp = tbl->items[index].val;
return TRUE;
}
else {
return FALSE;
}
}
static int
rb_id_table_insert_key(struct rb_id_table *tbl, const id_key_t key, const VALUE val)
{
const int index = hash_table_index(tbl, key);
if (index >= 0) {
tbl->items[index].val = val;
}
else {
hash_table_extend(tbl);
hash_table_raw_insert(tbl, key, val);
}
return TRUE;
}
Add a cache for class variables Redo of 34a2acdac788602c14bf05fb616215187badd504 and 931138b00696419945dc03e10f033b1f53cd50f3 which were reverted. GitHub PR #4340. This change implements a cache for class variables. Previously there was no cache for cvars. Cvar access is slow due to needing to travel all the way up th ancestor tree before returning the cvar value. The deeper the ancestor tree the slower cvar access will be. The benefits of the cache are more visible with a higher number of included modules due to the way Ruby looks up class variables. The benchmark here includes 26 modules and shows with the cache, this branch is 6.5x faster when accessing class variables. ``` compare-ruby: ruby 3.1.0dev (2021-03-15T06:22:34Z master 9e5105c) [x86_64-darwin19] built-ruby: ruby 3.1.0dev (2021-03-15T12:12:44Z add-cache-for-clas.. c6be009) [x86_64-darwin19] | |compare-ruby|built-ruby| |:--------|-----------:|---------:| |vm_cvar | 5.681M| 36.980M| | | -| 6.51x| ``` Benchmark.ips calling `ActiveRecord::Base.logger` from within a Rails application. ActiveRecord::Base.logger has 71 ancestors. The more ancestors a tree has, the more clear the speed increase. IE if Base had only one ancestor we'd see no improvement. This benchmark is run on a vanilla Rails application. Benchmark code: ```ruby require "benchmark/ips" require_relative "config/environment" Benchmark.ips do |x| x.report "logger" do ActiveRecord::Base.logger end end ``` Ruby 3.0 master / Rails 6.1: ``` Warming up -------------------------------------- logger 155.251k i/100ms Calculating ------------------------------------- ``` Ruby 3.0 with cvar cache / Rails 6.1: ``` Warming up -------------------------------------- logger 1.546M i/100ms Calculating ------------------------------------- logger 14.857M (± 4.8%) i/s - 74.198M in 5.006202s ``` Lastly we ran a benchmark to demonstate the difference between master and our cache when the number of modules increases. This benchmark measures 1 ancestor, 30 ancestors, and 100 ancestors. Ruby 3.0 master: ``` Warming up -------------------------------------- 1 module 1.231M i/100ms 30 modules 432.020k i/100ms 100 modules 145.399k i/100ms Calculating ------------------------------------- 1 module 12.210M (± 2.1%) i/s - 61.553M in 5.043400s 30 modules 4.354M (± 2.7%) i/s - 22.033M in 5.063839s 100 modules 1.434M (± 2.9%) i/s - 7.270M in 5.072531s Comparison: 1 module: 12209958.3 i/s 30 modules: 4354217.8 i/s - 2.80x (± 0.00) slower 100 modules: 1434447.3 i/s - 8.51x (± 0.00) slower ``` Ruby 3.0 with cvar cache: ``` Warming up -------------------------------------- 1 module 1.641M i/100ms 30 modules 1.655M i/100ms 100 modules 1.620M i/100ms Calculating ------------------------------------- 1 module 16.279M (± 3.8%) i/s - 82.038M in 5.046923s 30 modules 15.891M (± 3.9%) i/s - 79.459M in 5.007958s 100 modules 16.087M (± 3.6%) i/s - 81.005M in 5.041931s Comparison: 1 module: 16279458.0 i/s 100 modules: 16087484.6 i/s - same-ish: difference falls within error 30 modules: 15891406.2 i/s - same-ish: difference falls within error ``` Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2021-06-01 17:34:06 +00:00
MJIT_FUNC_EXPORTED int
rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val)
{
return rb_id_table_insert_key(tbl, id2key(id), val);
}
int
rb_id_table_delete(struct rb_id_table *tbl, ID id)
{
const id_key_t key = id2key(id);
int index = hash_table_index(tbl, key);
return hash_delete_index(tbl, index);
}
void
rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, void *data)
{
int i, capa = tbl->capa;
for (i=0; i<capa; i++) {
if (ITEM_KEY_ISSET(tbl, i)) {
const id_key_t key = ITEM_GET_KEY(tbl, i);
enum rb_id_table_iterator_result ret = (*func)(key2id(key), tbl->items[i].val, data);
assert(key != 0);
2022-07-21 16:23:58 +00:00
if (ret == ID_TABLE_DELETE)
hash_delete_index(tbl, i);
else if (ret == ID_TABLE_STOP)
return;
}
}
}
void
rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data)
{
int i, capa = tbl->capa;
for (i=0; i<capa; i++) {
if (ITEM_KEY_ISSET(tbl, i)) {
enum rb_id_table_iterator_result ret = (*func)(tbl->items[i].val, data);
2022-07-21 16:23:58 +00:00
if (ret == ID_TABLE_DELETE)
hash_delete_index(tbl, i);
else if (ret == ID_TABLE_STOP)
return;
}
}
}
void
rb_id_table_foreach_values_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, rb_id_table_update_value_callback_func_t *replace, void *data)
{
int i, capa = tbl->capa;
for (i = 0; i < capa; i++) {
if (ITEM_KEY_ISSET(tbl, i)) {
enum rb_id_table_iterator_result ret = (*func)(tbl->items[i].val, data);
if (ret == ID_TABLE_REPLACE) {
VALUE val = tbl->items[i].val;
ret = (*replace)(&val, data, TRUE);
tbl->items[i].val = val;
}
if (ret == ID_TABLE_STOP)
return;
}
}
}