mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
switching hash removal
* st.h (struct st_hash_type): Remove strong_hash. (struct st_table): Remove inside_rebuild_p and curr_hash. * st.c (do_hash): Use type->hash instead of curr_hash. (make_tab_empty): Remove setting up curr_hash. (st_init_table_with_size): Remove setting up inside_rebuild_p. (rebuild_table): Remove clearing inside_rebuild_p. (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code recognizing a denial attack and switching to strong hash. * hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use rb_hash_start to randomize the hash. (str_seed): Remove. (any_hash): Remove strong_p and use always rb_str_hash for strings. (any_hash_weak, rb_any_hash_weak): Remove. (st_hash_type objhash): Remove rb_any_hash_weak. based on the patch by Vladimir N Makarov <vmakarov@redhat.com> at [ruby-core:78490]. [Bug #13002] * test/ruby/test_hash.rb (test_wrapper): objects other than special constants should be able to be wrapped. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
1e95f9da88
commit
5714a26b90
5 changed files with 46 additions and 98 deletions
38
hash.c
38
hash.c
|
@ -157,19 +157,13 @@ rb_dbl_long_hash(double d)
|
||||||
union {double d; uint64_t i;} u;
|
union {double d; uint64_t i;} u;
|
||||||
|
|
||||||
u.d = d;
|
u.d = d;
|
||||||
return rb_objid_hash(u.i);
|
return rb_objid_hash(rb_hash_start(u.i));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SIZEOF_INT == SIZEOF_VOIDP
|
|
||||||
static const st_index_t str_seed = 0xfa835867;
|
|
||||||
#else
|
|
||||||
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline st_index_t
|
static inline st_index_t
|
||||||
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
|
||||||
{
|
{
|
||||||
VALUE hval;
|
VALUE hval;
|
||||||
st_index_t hnum;
|
st_index_t hnum;
|
||||||
|
@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
||||||
if (SPECIAL_CONST_P(a)) {
|
if (SPECIAL_CONST_P(a)) {
|
||||||
if (STATIC_SYM_P(a)) {
|
if (STATIC_SYM_P(a)) {
|
||||||
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
|
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
|
||||||
|
hnum = rb_hash_start(hnum);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
else if (FLONUM_P(a)) {
|
else if (FLONUM_P(a)) {
|
||||||
|
@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
||||||
hnum = rb_objid_hash((st_index_t)a);
|
hnum = rb_objid_hash((st_index_t)a);
|
||||||
}
|
}
|
||||||
else if (BUILTIN_TYPE(a) == T_STRING) {
|
else if (BUILTIN_TYPE(a) == T_STRING) {
|
||||||
hnum = (strong_p
|
hnum = rb_str_hash(a);
|
||||||
? rb_str_hash(a)
|
|
||||||
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
|
|
||||||
}
|
}
|
||||||
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
|
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
|
||||||
hnum = RSYMBOL(a)->hashval;
|
hnum = RSYMBOL(a)->hashval;
|
||||||
|
@ -216,24 +209,6 @@ obj_any_hash(VALUE obj)
|
||||||
return FIX2LONG(obj);
|
return FIX2LONG(obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline st_index_t
|
|
||||||
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE))
|
|
||||||
{
|
|
||||||
return any_hash_general(a, FALSE, other_func);
|
|
||||||
}
|
|
||||||
|
|
||||||
static st_index_t
|
|
||||||
rb_any_hash_weak(VALUE a)
|
|
||||||
{
|
|
||||||
return any_hash_weak(a, obj_any_hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline st_index_t
|
|
||||||
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
|
|
||||||
{
|
|
||||||
return any_hash_general(a, TRUE, other_func);
|
|
||||||
}
|
|
||||||
|
|
||||||
static st_index_t
|
static st_index_t
|
||||||
rb_any_hash(VALUE a)
|
rb_any_hash(VALUE a)
|
||||||
{
|
{
|
||||||
|
@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed)
|
||||||
long
|
long
|
||||||
rb_objid_hash(st_index_t index)
|
rb_objid_hash(st_index_t index)
|
||||||
{
|
{
|
||||||
return (long)key64_hash(index, (uint32_t)prime2);
|
return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static st_index_t
|
static st_index_t
|
||||||
|
@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h)
|
||||||
|
|
||||||
static const struct st_hash_type objhash = {
|
static const struct st_hash_type objhash = {
|
||||||
rb_any_cmp,
|
rb_any_cmp,
|
||||||
rb_any_hash_weak,
|
|
||||||
rb_any_hash,
|
rb_any_hash,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
|
return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct st_hash_type identhash = {
|
static const struct st_hash_type identhash = {
|
||||||
|
|
|
@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_index_t[SIZEOF_VOIDP == (int)sizeof(st_index
|
||||||
struct st_hash_type {
|
struct st_hash_type {
|
||||||
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
|
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
|
||||||
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
|
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
|
||||||
/* The following is an optional func for stronger hash. When we
|
|
||||||
have many different keys with the same hash we can switch to
|
|
||||||
use it to prevent a denial attack with usage of hash table
|
|
||||||
collisions. */
|
|
||||||
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
|
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
|
||||||
|
@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st.c */
|
||||||
struct st_table {
|
struct st_table {
|
||||||
/* Cached features of the table -- see st.c for more details. */
|
/* Cached features of the table -- see st.c for more details. */
|
||||||
unsigned char entry_power, bin_power, size_ind;
|
unsigned char entry_power, bin_power, size_ind;
|
||||||
/* True when we are rebuilding the table. */
|
|
||||||
unsigned char inside_rebuild_p;
|
|
||||||
/* How many times the table was rebuilt. */
|
/* How many times the table was rebuilt. */
|
||||||
unsigned int rebuilds_num;
|
unsigned int rebuilds_num;
|
||||||
/* Currently used hash function. */
|
|
||||||
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
|
|
||||||
const struct st_hash_type *type;
|
const struct st_hash_type *type;
|
||||||
/* Number of entries currently in the table. */
|
/* Number of entries currently in the table. */
|
||||||
st_index_t num_entries;
|
st_index_t num_entries;
|
||||||
|
|
44
st.c
44
st.c
|
@ -461,7 +461,6 @@ initialize_bins(st_table *tab)
|
||||||
static void
|
static void
|
||||||
make_tab_empty(st_table *tab)
|
make_tab_empty(st_table *tab)
|
||||||
{
|
{
|
||||||
tab->curr_hash = tab->type->hash;
|
|
||||||
tab->num_entries = 0;
|
tab->num_entries = 0;
|
||||||
tab->entries_start = tab->entries_bound = 0;
|
tab->entries_start = tab->entries_bound = 0;
|
||||||
if (tab->bins != NULL)
|
if (tab->bins != NULL)
|
||||||
|
@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
|
||||||
tab->entry_power = n;
|
tab->entry_power = n;
|
||||||
tab->bin_power = features[n].bin_power;
|
tab->bin_power = features[n].bin_power;
|
||||||
tab->size_ind = features[n].size_ind;
|
tab->size_ind = features[n].size_ind;
|
||||||
tab->inside_rebuild_p = FALSE;
|
|
||||||
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||||||
tab->bins = NULL;
|
tab->bins = NULL;
|
||||||
else
|
else
|
||||||
|
@ -744,7 +742,6 @@ rebuild_table(st_table *tab)
|
||||||
st_assert(tab != NULL);
|
st_assert(tab != NULL);
|
||||||
bound = tab->entries_bound;
|
bound = tab->entries_bound;
|
||||||
entries = tab->entries;
|
entries = tab->entries;
|
||||||
tab->inside_rebuild_p = TRUE;
|
|
||||||
if ((2 * tab->num_entries <= get_allocated_entries(tab)
|
if ((2 * tab->num_entries <= get_allocated_entries(tab)
|
||||||
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|
||||||
|| tab->num_entries < (1 << MINIMAL_POWER2)) {
|
|| tab->num_entries < (1 << MINIMAL_POWER2)) {
|
||||||
|
@ -758,8 +755,6 @@ rebuild_table(st_table *tab)
|
||||||
else {
|
else {
|
||||||
new_tab = st_init_table_with_size(tab->type,
|
new_tab = st_init_table_with_size(tab->type,
|
||||||
2 * tab->num_entries - 1);
|
2 * tab->num_entries - 1);
|
||||||
st_assert(new_tab->curr_hash == new_tab->type->hash);
|
|
||||||
new_tab->curr_hash = tab->curr_hash;
|
|
||||||
new_entries = new_tab->entries;
|
new_entries = new_tab->entries;
|
||||||
}
|
}
|
||||||
ni = 0;
|
ni = 0;
|
||||||
|
@ -798,7 +793,6 @@ rebuild_table(st_table *tab)
|
||||||
tab->entries_start = 0;
|
tab->entries_start = 0;
|
||||||
tab->entries_bound = tab->num_entries;
|
tab->entries_bound = tab->num_entries;
|
||||||
tab->rebuilds_num++;
|
tab->rebuilds_num++;
|
||||||
tab->inside_rebuild_p = FALSE;
|
|
||||||
#ifdef ST_DEBUG
|
#ifdef ST_DEBUG
|
||||||
st_check(tab);
|
st_check(tab);
|
||||||
#endif
|
#endif
|
||||||
|
@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Recalculate hashes of entries in table TAB. */
|
|
||||||
static void
|
|
||||||
reset_entry_hashes (st_table *tab)
|
|
||||||
{
|
|
||||||
st_index_t i, bound;
|
|
||||||
st_table_entry *entries, *curr_entry_ptr;
|
|
||||||
|
|
||||||
bound = tab->entries_bound;
|
|
||||||
entries = tab->entries;
|
|
||||||
for (i = tab->entries_start; i < bound; i++) {
|
|
||||||
curr_entry_ptr = &entries[i];
|
|
||||||
if (! DELETED_ENTRY_P(curr_entry_ptr))
|
|
||||||
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If we have the following number of collisions with different keys
|
|
||||||
but with the same hash during finding a bin for new entry
|
|
||||||
inclusions, possibly a denial attack is going on. Start to use a
|
|
||||||
stronger hash. */
|
|
||||||
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
|
|
||||||
|
|
||||||
/* Return index of table TAB bin for HASH_VALUE and KEY through
|
/* Return index of table TAB bin for HASH_VALUE and KEY through
|
||||||
BIN_IND and the pointed value as the function result. Reserve the
|
BIN_IND and the pointed value as the function result. Reserve the
|
||||||
bin for inclusion of the corresponding entry into the table if it
|
bin for inclusion of the corresponding entry into the table if it
|
||||||
|
@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
||||||
st_index_t entry_index;
|
st_index_t entry_index;
|
||||||
st_index_t first_deleted_bin_ind;
|
st_index_t first_deleted_bin_ind;
|
||||||
st_table_entry *entries;
|
st_table_entry *entries;
|
||||||
int hit;
|
|
||||||
|
|
||||||
st_assert(tab != NULL && tab->bins != NULL
|
st_assert(tab != NULL && tab->bins != NULL
|
||||||
&& tab->entries_bound <= get_allocated_entries(tab)
|
&& tab->entries_bound <= get_allocated_entries(tab)
|
||||||
&& tab->entries_start <= tab->entries_bound);
|
&& tab->entries_start <= tab->entries_bound);
|
||||||
repeat:
|
|
||||||
ind = hash_bin(curr_hash_value, tab);
|
ind = hash_bin(curr_hash_value, tab);
|
||||||
#ifdef QUADRATIC_PROBE
|
#ifdef QUADRATIC_PROBE
|
||||||
d = 1;
|
d = 1;
|
||||||
|
@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
||||||
FOUND_BIN;
|
FOUND_BIN;
|
||||||
first_deleted_bin_ind = UNDEFINED_BIN_IND;
|
first_deleted_bin_ind = UNDEFINED_BIN_IND;
|
||||||
entries = tab->entries;
|
entries = tab->entries;
|
||||||
hit = 0;
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
|
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
|
||||||
if (EMPTY_BIN_P(entry_index)) {
|
if (EMPTY_BIN_P(entry_index)) {
|
||||||
|
@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
||||||
} else if (! DELETED_BIN_P(entry_index)) {
|
} else if (! DELETED_BIN_P(entry_index)) {
|
||||||
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
|
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
|
||||||
break;
|
break;
|
||||||
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
|
|
||||||
hit++;
|
|
||||||
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
|
|
||||||
&& tab->curr_hash != tab->type->strong_hash
|
|
||||||
&& tab->type->strong_hash != NULL
|
|
||||||
&& ! tab->inside_rebuild_p) {
|
|
||||||
tab->curr_hash = tab->type->strong_hash;
|
|
||||||
*hash_value = curr_hash_value = do_hash(key, tab);
|
|
||||||
reset_entry_hashes(tab);
|
|
||||||
rebuild_table(tab);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
|
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
|
||||||
first_deleted_bin_ind = ind;
|
first_deleted_bin_ind = ind;
|
||||||
#ifdef QUADRATIC_PROBE
|
#ifdef QUADRATIC_PROBE
|
||||||
|
|
|
@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase
|
||||||
assert_no_memory_leak([], prepare, code, bug9187)
|
assert_no_memory_leak([], prepare, code, bug9187)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_wrapper_of_special_const
|
def test_wrapper
|
||||||
bug9381 = '[ruby-core:59638] [Bug #9381]'
|
bug9381 = '[ruby-core:59638] [Bug #9381]'
|
||||||
|
|
||||||
wrapper = Class.new do
|
wrapper = Class.new do
|
||||||
|
@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase
|
||||||
5, true, false, nil,
|
5, true, false, nil,
|
||||||
0.0, 1.72723e-77,
|
0.0, 1.72723e-77,
|
||||||
:foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
|
:foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
|
||||||
|
"str",
|
||||||
].select do |x|
|
].select do |x|
|
||||||
hash = {x => bug9381}
|
hash = {x => bug9381}
|
||||||
hash[wrapper.new(x)] != bug9381
|
hash[wrapper.new(x)] != bug9381
|
||||||
|
@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase
|
||||||
assert_empty(bad, bug9381)
|
assert_empty(bad, bug9381)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def assert_hash_random(obj, dump = obj.inspect)
|
||||||
|
a = [obj.hash.to_s]
|
||||||
|
3.times {
|
||||||
|
assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e|
|
||||||
|
a += r
|
||||||
|
assert_equal([], e)
|
||||||
|
end
|
||||||
|
}
|
||||||
|
assert_not_equal([obj.hash.to_s], a.uniq)
|
||||||
|
assert_operator(a.uniq.size, :>, 2, proc {a.inspect})
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_string_hash_random
|
||||||
|
assert_hash_random('abc')
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_symbol_hash_random
|
||||||
|
assert_hash_random(:-)
|
||||||
|
assert_hash_random(:foo)
|
||||||
|
assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_integer_hash_random
|
||||||
|
assert_hash_random(0)
|
||||||
|
assert_hash_random(+1)
|
||||||
|
assert_hash_random(-1)
|
||||||
|
assert_hash_random(+(1<<100))
|
||||||
|
assert_hash_random(-(1<<100))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_float_hash_random
|
||||||
|
assert_hash_random(0.0)
|
||||||
|
assert_hash_random(+1.0)
|
||||||
|
assert_hash_random(-1.0)
|
||||||
|
assert_hash_random(1.72723e-77)
|
||||||
|
assert_hash_random(Float::INFINITY, "Float::INFINITY")
|
||||||
|
end
|
||||||
|
|
||||||
def test_label_syntax
|
def test_label_syntax
|
||||||
return unless @cls == Hash
|
return unless @cls == Hash
|
||||||
|
|
||||||
|
|
|
@ -979,18 +979,6 @@ CODE
|
||||||
assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
|
assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_hash_random
|
|
||||||
str = 'abc'
|
|
||||||
a = [str.hash.to_s]
|
|
||||||
3.times {
|
|
||||||
assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e|
|
|
||||||
a += r
|
|
||||||
assert_equal([], e)
|
|
||||||
end
|
|
||||||
}
|
|
||||||
assert_not_equal([str.hash.to_s], a.uniq)
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_hex
|
def test_hex
|
||||||
assert_equal(255, S("0xff").hex)
|
assert_equal(255, S("0xff").hex)
|
||||||
assert_equal(-255, S("-0xff").hex)
|
assert_equal(-255, S("-0xff").hex)
|
||||||
|
|
Loading…
Reference in a new issue