diff --git a/st.c b/st.c index 56ae30ce47..0c52e7a2ef 100644 --- a/st.c +++ b/st.c @@ -90,6 +90,11 @@ o To save more memory we use 8-, 16-, 32- and 64- bit indexes in bins depending on the current hash table size. + o The implementation takes into account that the table can be + rebuilt during hashing or comparison functions. It can happen if + the functions are implemented in Ruby and a thread switch occurs + during their execution. + This implementation speeds up the Ruby hash table benchmarks in average by more 40% on Intel Haswell CPU. @@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = { #define PTR_EQUAL(tab, ptr, hash_val, key_) \ ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) +/* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set + up to TRUE if the table is rebuilt during the comparison. */ +#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ + do { \ + unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \ + res = PTR_EQUAL(tab, ptr, hash_val, key); \ + rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \ + } while (FALSE) + /* Features of a table. */ struct st_features { /* Power of 2 used for number of allocated entries. */ @@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) #define UNDEFINED_ENTRY_IND (~(st_index_t) 0) #define UNDEFINED_BIN_IND (~(st_index_t) 0) +/* Entry and bin values returned when we found a table rebuild during + the search. */ +#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1) +#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1) + /* Mark I-th bin of table TAB as corresponding to a deleted table entry. Update number of entries in the table and number of bins corresponding to deleted entries. */ @@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) /* Find an entry with HASH_VALUE and KEY in TABLE using a linear search. Return the index of the found entry in array `entries`. - If it is not found, return UNDEFINED_ENTRY_IND. */ + If it is not found, return UNDEFINED_ENTRY_IND. If the table was + rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ static inline st_index_t find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) { + int eq_p, rebuilt_p; st_index_t i, bound; st_table_entry *entries; bound = tab->entries_bound; entries = tab->entries; for (i = tab->entries_start; i < bound; i++) { - if (PTR_EQUAL(tab, &entries[i], hash_value, key)) + DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) return i; } return UNDEFINED_ENTRY_IND; @@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) /*#define QUADRATIC_PROBE*/ /* Return index of entry with HASH_VALUE and KEY in table TAB. If - there is no such entry, return UNDEFINED_ENTRY_IND. */ + there is no such entry, return UNDEFINED_ENTRY_IND. If the table + was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ static st_index_t find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { + int eq_p, rebuilt_p; st_index_t ind; #ifdef QUADRATIC_PROBE st_index_t d; @@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) FOUND_BIN; for (;;) { bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin) - && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) - break; - else if (EMPTY_BIN_P(bin)) + if (! EMPTY_OR_DELETED_BIN_P(bin)) { + DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) + break; + } else if (EMPTY_BIN_P(bin)) return UNDEFINED_ENTRY_IND; #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); @@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) /* Find and return index of table TAB bin corresponding to an entry with HASH_VALUE and KEY. If there is no such bin, return - UNDEFINED_BIN_IND. */ + UNDEFINED_BIN_IND. If the table was rebuilt during the search, + return REBUILT_TABLE_BIN_IND. */ static st_index_t find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { + int eq_p, rebuilt_p; st_index_t ind; #ifdef QUADRATIC_PROBE st_index_t d; @@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) FOUND_BIN; for (;;) { bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin) - && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) - break; - else if (EMPTY_BIN_P(bin)) + if (! EMPTY_OR_DELETED_BIN_P(bin)) { + DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_BIN_IND; + if (eq_p) + break; + } else if (EMPTY_BIN_P(bin)) return UNDEFINED_BIN_IND; #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); @@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) bin = get_bin(tab->bins, get_size_ind(tab), ind); if (EMPTY_OR_DELETED_BIN_P(bin)) return ind; - st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)); + st_assert (entries[bin - ENTRY_BASE].hash != hash_value); #ifdef QUADRATIC_PROBE ind = hash_bin(ind + d, tab); d++; @@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) bigger entries array. Although we can reuse a deleted bin, the result bin value is always empty if the table has no entry with KEY. Return the entries array index of the found entry or - UNDEFINED_ENTRY_IND if it is not found. */ + UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt + during the search, return REBUILT_TABLE_ENTRY_IND. */ static st_index_t find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, st_data_t key, st_index_t *bin_ind) { + int eq_p, rebuilt_p; st_index_t ind; st_hash_t curr_hash_value = *hash_value; #ifdef QUADRATIC_PROBE @@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, break; } else if (! DELETED_BIN_P(entry_index)) { - if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) + DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) break; } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) @@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value) st_index_t bin; st_hash_t hash = do_hash(key, tab); + retry: if (tab->bins == NULL) { bin = find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; if (bin == UNDEFINED_ENTRY_IND) return 0; } else { bin = find_table_entry_ind(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; if (bin == UNDEFINED_ENTRY_IND) return 0; bin -= ENTRY_BASE; @@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result) st_index_t bin; st_hash_t hash = do_hash(key, tab); + retry: if (tab->bins == NULL) { bin = find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; if (bin == UNDEFINED_ENTRY_IND) return 0; } else { bin = find_table_entry_ind(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; if (bin == UNDEFINED_ENTRY_IND) return 0; bin -= ENTRY_BASE; @@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value) st_index_t bin_ind; int new_p; - rebuild_table_if_necessary(tab); hash_value = do_hash(key, tab); + retry: + rebuild_table_if_necessary(tab); if (tab->bins == NULL) { bin = find_entry(tab, hash_value, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; new_p = bin == UNDEFINED_ENTRY_IND; if (new_p) tab->num_entries++; @@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value) else { bin = find_table_bin_ptr_and_reserve(tab, &hash_value, key, &bin_ind); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; new_p = bin == UNDEFINED_ENTRY_IND; bin -= ENTRY_BASE; } @@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, st_index_t bin_ind; int new_p; - rebuild_table_if_necessary (tab); hash_value = do_hash(key, tab); + retry: + rebuild_table_if_necessary (tab); if (tab->bins == NULL) { bin = find_entry(tab, hash_value, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; new_p = bin == UNDEFINED_ENTRY_IND; if (new_p) tab->num_entries++; @@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, else { bin = find_table_bin_ptr_and_reserve(tab, &hash_value, key, &bin_ind); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; new_p = bin == UNDEFINED_ENTRY_IND; bin -= ENTRY_BASE; } @@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, check = tab->rebuilds_num; key = (*func)(key); st_assert(check == tab->rebuilds_num); - st_assert(do_hash(key, tab) == hash_value); ind = tab->entries_bound++; entry = &tab->entries[ind]; entry->hash = hash_value; @@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value, entry->record = value; if (bin_ind != UNDEFINED_BIN_IND) set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); + st_assert(do_hash(key, tab) == hash_value); #ifdef ST_DEBUG st_check(tab); #endif @@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) st_assert(tab != NULL); hash = do_hash(*key, tab); + retry: if (tab->bins == NULL) { bin = find_entry(tab, hash, *key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; if (bin == UNDEFINED_ENTRY_IND) { if (value != 0) *value = 0; return 0; @@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) } else { bin_ind = find_table_bin_ind(tab, hash, *key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) + goto retry; if (bin_ind == UNDEFINED_BIN_IND) { if (value != 0) *value = 0; return 0; @@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value) for (i = tab->entries_start; i < bound; i++) { curr_entry_ptr = &entries[i]; if (! DELETED_ENTRY_P(curr_entry_ptr)) { + st_hash_t entry_hash = curr_entry_ptr->hash; + st_data_t entry_key = curr_entry_ptr->key; + if (value != 0) *value = curr_entry_ptr->record; - *key = curr_entry_ptr->key; + *key = entry_key; + retry: if (tab->bins == NULL) { - bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key); + bin = find_entry(tab, entry_hash, entry_key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) { + entries = tab->entries; + goto retry; + } st_assert(bin != UNDEFINED_ENTRY_IND); - st_assert(&entries[bin] == curr_entry_ptr); + curr_entry_ptr = &entries[bin]; } else { - bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash, - curr_entry_ptr->key); + bin_ind = find_table_bin_ind(tab, entry_hash, entry_key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) { + entries = tab->entries; + goto retry; + } st_assert(bin_ind != UNDEFINED_BIN_IND); - st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) - - ENTRY_BASE] == curr_entry_ptr); + curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) + - ENTRY_BASE]; MARK_BIN_DELETED(tab, bin_ind); } + st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key); MARK_ENTRY_DELETED(curr_entry_ptr); tab->num_entries--; update_range_for_deleted(tab, i); @@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key, int retval, existing; st_hash_t hash = do_hash(key, tab); + retry: entries = tab->entries; if (tab->bins == NULL) { bin = find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; existing = bin != UNDEFINED_ENTRY_IND; entry = &entries[bin]; bin_ind = UNDEFINED_BIN_IND; } else { bin_ind = find_table_bin_ind(tab, hash, key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) + goto retry; existing = bin_ind != UNDEFINED_BIN_IND; if (existing) { bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; @@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, hash = curr_entry_ptr->hash; retval = (*func)(key, curr_entry_ptr->record, arg, 0); if (rebuilds_num != tab->rebuilds_num) { + retry: entries = tab->entries; packed_p = tab->bins == NULL; if (packed_p) { i = find_entry(tab, hash, key); + if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; error_p = i == UNDEFINED_ENTRY_IND; } else { i = find_table_entry_ind(tab, hash, key); + if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; error_p = i == UNDEFINED_ENTRY_IND; i -= ENTRY_BASE; } @@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, } switch (retval) { case ST_CONTINUE: - break; + break; case ST_CHECK: - if (check_p) - break; + if (check_p) + break; case ST_STOP: #ifdef ST_DEBUG - st_check(tab); + st_check(tab); #endif - return 0; - case ST_DELETE: - if (packed_p) { - bin = find_entry(tab, hash, curr_entry_ptr->key); - if (bin == UNDEFINED_ENTRY_IND) - break; - } - else { - bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key); - if (bin_ind == UNDEFINED_BIN_IND) - break; - bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; - MARK_BIN_DELETED(tab, bin_ind); - } - st_assert(&entries[bin] == curr_entry_ptr); - MARK_ENTRY_DELETED(curr_entry_ptr); - tab->num_entries--; - update_range_for_deleted(tab, bin); + return 0; + case ST_DELETE: { + st_data_t key = curr_entry_ptr->key; + + again: + if (packed_p) { + bin = find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto again; + if (bin == UNDEFINED_ENTRY_IND) + break; + } + else { + bin_ind = find_table_bin_ind(tab, hash, key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) + goto again; + if (bin_ind == UNDEFINED_BIN_IND) + break; + bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; + MARK_BIN_DELETED(tab, bin_ind); + } + curr_entry_ptr = &entries[bin]; + MARK_ENTRY_DELETED(curr_entry_ptr); + tab->num_entries--; + update_range_for_deleted(tab, bin); #ifdef ST_DEBUG - st_check(tab); + st_check(tab); #endif - break; + break; + } } } #ifdef ST_DEBUG @@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz) free(tmp); } -/* Rehash using linear search. */ -static void +/* Rehash using linear search. Return TRUE if we found that the table + was rebuilt. */ +static int st_rehash_linear(st_table *tab) { + int eq_p, rebuilt_p; st_index_t i, j; st_table_entry *p, *q; if (tab->bins) { @@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab) q = &tab->entries[j]; if (DELETED_ENTRY_P(q)) continue; - if (PTR_EQUAL(tab, p, q->hash, q->key)) { + DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return TRUE; + if (eq_p) { st_assert(p < q); *p = *q; MARK_ENTRY_DELETED(q); @@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab) } } } + return FALSE; } -/* Rehash using index */ -static void +/* Rehash using index. Return TRUE if we found that the table was + rebuilt. */ +static int st_rehash_indexed(st_table *tab) { + int eq_p, rebuilt_p; st_index_t i; st_index_t const n = bins_size(tab); unsigned int const size_ind = get_size_ind(tab); @@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab) set_bin(bins, size_ind, ind, i + ENTRY_BASE); break; } - else if (PTR_EQUAL(tab, q, p->hash, p->key)) { - /* duplicated key; delete it */ - st_assert(q < p); - q->record = p->record; - MARK_ENTRY_DELETED(p); - tab->num_entries--; - update_range_for_deleted(tab, bin); - break; - } else { - /* hash collision; skip it */ + DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return TRUE; + if (eq_p) { + /* duplicated key; delete it */ + st_assert(q < p); + q->record = p->record; + MARK_ENTRY_DELETED(p); + tab->num_entries--; + update_range_for_deleted(tab, bin); + break; + } + else { + /* hash collision; skip it */ #ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; + ind = hash_bin(ind + d, tab); + d++; #else - ind = secondary_hash(ind, tab, &peterb); + ind = secondary_hash(ind, tab, &peterb); #endif - } + } + } } } + return FALSE; } /* Reconstruct TAB's bins according to TAB's entries. This function @@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab) static void st_rehash(st_table *tab) { - if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) - st_rehash_linear(tab); - else - st_rehash_indexed(tab); + int rebuilt_p; + + do { + if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) + rebuilt_p = st_rehash_linear(tab); + else + rebuilt_p = st_rehash_indexed(tab); + } while (rebuilt_p); } #ifdef RUBY