1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

switching hash removal

* st.h (struct st_hash_type): Remove strong_hash.
  (struct st_table): Remove inside_rebuild_p and curr_hash.
* st.c (do_hash): Use type->hash instead of curr_hash.
  (make_tab_empty): Remove setting up curr_hash.
  (st_init_table_with_size): Remove setting up inside_rebuild_p.
  (rebuild_table): Remove clearing inside_rebuild_p.
  (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code
  recognizing a denial attack and switching to strong hash.
* hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use
  rb_hash_start to randomize the hash.
  (str_seed): Remove.
  (any_hash): Remove strong_p and use always rb_str_hash for
  strings.
  (any_hash_weak, rb_any_hash_weak): Remove.
  (st_hash_type objhash): Remove rb_any_hash_weak.

based on the patch by Vladimir N Makarov <vmakarov@redhat.com> at
[ruby-core:78490].  [Bug #13002]

* test/ruby/test_hash.rb (test_wrapper): objects other than special
  constants should be able to be wrapped.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2016-12-06 04:43:48 +00:00
parent 1e95f9da88
commit 5714a26b90
5 changed files with 46 additions and 98 deletions

38
hash.c
View file

@ -157,19 +157,13 @@ rb_dbl_long_hash(double d)
union {double d; uint64_t i;} u;
u.d = d;
return rb_objid_hash(u.i);
return rb_objid_hash(rb_hash_start(u.i));
}
#endif
}
#if SIZEOF_INT == SIZEOF_VOIDP
static const st_index_t str_seed = 0xfa835867;
#else
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
#endif
static inline st_index_t
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
{
VALUE hval;
st_index_t hnum;
@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
if (SPECIAL_CONST_P(a)) {
if (STATIC_SYM_P(a)) {
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
hnum = rb_hash_start(hnum);
goto out;
}
else if (FLONUM_P(a)) {
@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
hnum = rb_objid_hash((st_index_t)a);
}
else if (BUILTIN_TYPE(a) == T_STRING) {
hnum = (strong_p
? rb_str_hash(a)
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
hnum = rb_str_hash(a);
}
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval;
@ -216,24 +209,6 @@ obj_any_hash(VALUE obj)
return FIX2LONG(obj);
}
static inline st_index_t
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE))
{
return any_hash_general(a, FALSE, other_func);
}
static st_index_t
rb_any_hash_weak(VALUE a)
{
return any_hash_weak(a, obj_any_hash);
}
static inline st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
{
return any_hash_general(a, TRUE, other_func);
}
static st_index_t
rb_any_hash(VALUE a)
{
@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed)
long
rb_objid_hash(st_index_t index)
{
return (long)key64_hash(index, (uint32_t)prime2);
return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2);
}
static st_index_t
@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h)
static const struct st_hash_type objhash = {
rb_any_cmp,
rb_any_hash_weak,
rb_any_hash,
};
@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n)
}
#endif
return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2);
}
static const struct st_hash_type identhash = {

View file

@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_index_t[SIZEOF_VOIDP == (int)sizeof(st_index
struct st_hash_type {
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
/* The following is an optional func for stronger hash. When we
have many different keys with the same hash we can switch to
use it to prevent a denial attack with usage of hash table
collisions. */
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
};
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st.c */
struct st_table {
/* Cached features of the table -- see st.c for more details. */
unsigned char entry_power, bin_power, size_ind;
/* True when we are rebuilding the table. */
unsigned char inside_rebuild_p;
/* How many times the table was rebuilt. */
unsigned int rebuilds_num;
/* Currently used hash function. */
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
const struct st_hash_type *type;
/* Number of entries currently in the table. */
st_index_t num_entries;

44
st.c
View file

@ -461,7 +461,6 @@ initialize_bins(st_table *tab)
static void
make_tab_empty(st_table *tab)
{
tab->curr_hash = tab->type->hash;
tab->num_entries = 0;
tab->entries_start = tab->entries_bound = 0;
if (tab->bins != NULL)
@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
tab->size_ind = features[n].size_ind;
tab->inside_rebuild_p = FALSE;
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
tab->bins = NULL;
else
@ -744,7 +742,6 @@ rebuild_table(st_table *tab)
st_assert(tab != NULL);
bound = tab->entries_bound;
entries = tab->entries;
tab->inside_rebuild_p = TRUE;
if ((2 * tab->num_entries <= get_allocated_entries(tab)
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|| tab->num_entries < (1 << MINIMAL_POWER2)) {
@ -758,8 +755,6 @@ rebuild_table(st_table *tab)
else {
new_tab = st_init_table_with_size(tab->type,
2 * tab->num_entries - 1);
st_assert(new_tab->curr_hash == new_tab->type->hash);
new_tab->curr_hash = tab->curr_hash;
new_entries = new_tab->entries;
}
ni = 0;
@ -798,7 +793,6 @@ rebuild_table(st_table *tab)
tab->entries_start = 0;
tab->entries_bound = tab->num_entries;
tab->rebuilds_num++;
tab->inside_rebuild_p = FALSE;
#ifdef ST_DEBUG
st_check(tab);
#endif
@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
}
}
/* Recalculate hashes of entries in table TAB. */
static void
reset_entry_hashes (st_table *tab)
{
st_index_t i, bound;
st_table_entry *entries, *curr_entry_ptr;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr))
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
}
}
/* If we have the following number of collisions with different keys
but with the same hash during finding a bin for new entry
inclusions, possibly a denial attack is going on. Start to use a
stronger hash. */
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
/* Return index of table TAB bin for HASH_VALUE and KEY through
BIN_IND and the pointed value as the function result. Reserve the
bin for inclusion of the corresponding entry into the table if it
@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_index_t entry_index;
st_index_t first_deleted_bin_ind;
st_table_entry *entries;
int hit;
st_assert(tab != NULL && tab->bins != NULL
&& tab->entries_bound <= get_allocated_entries(tab)
&& tab->entries_start <= tab->entries_bound);
repeat:
ind = hash_bin(curr_hash_value, tab);
#ifdef QUADRATIC_PROBE
d = 1;
@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
FOUND_BIN;
first_deleted_bin_ind = UNDEFINED_BIN_IND;
entries = tab->entries;
hit = 0;
for (;;) {
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_BIN_P(entry_index)) {
@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
} else if (! DELETED_BIN_P(entry_index)) {
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break;
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
hit++;
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
&& tab->curr_hash != tab->type->strong_hash
&& tab->type->strong_hash != NULL
&& ! tab->inside_rebuild_p) {
tab->curr_hash = tab->type->strong_hash;
*hash_value = curr_hash_value = do_hash(key, tab);
reset_entry_hashes(tab);
rebuild_table(tab);
goto repeat;
}
}
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
first_deleted_bin_ind = ind;
#ifdef QUADRATIC_PROBE

View file

@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase
assert_no_memory_leak([], prepare, code, bug9187)
end
def test_wrapper_of_special_const
def test_wrapper
bug9381 = '[ruby-core:59638] [Bug #9381]'
wrapper = Class.new do
@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase
5, true, false, nil,
0.0, 1.72723e-77,
:foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
"str",
].select do |x|
hash = {x => bug9381}
hash[wrapper.new(x)] != bug9381
@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase
assert_empty(bad, bug9381)
end
def assert_hash_random(obj, dump = obj.inspect)
a = [obj.hash.to_s]
3.times {
assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e|
a += r
assert_equal([], e)
end
}
assert_not_equal([obj.hash.to_s], a.uniq)
assert_operator(a.uniq.size, :>, 2, proc {a.inspect})
end
def test_string_hash_random
assert_hash_random('abc')
end
def test_symbol_hash_random
assert_hash_random(:-)
assert_hash_random(:foo)
assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym)
end
def test_integer_hash_random
assert_hash_random(0)
assert_hash_random(+1)
assert_hash_random(-1)
assert_hash_random(+(1<<100))
assert_hash_random(-(1<<100))
end
def test_float_hash_random
assert_hash_random(0.0)
assert_hash_random(+1.0)
assert_hash_random(-1.0)
assert_hash_random(1.72723e-77)
assert_hash_random(Float::INFINITY, "Float::INFINITY")
end
def test_label_syntax
return unless @cls == Hash

View file

@ -979,18 +979,6 @@ CODE
assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
end
def test_hash_random
str = 'abc'
a = [str.hash.to_s]
3.times {
assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e|
a += r
assert_equal([], e)
end
}
assert_not_equal([str.hash.to_s], a.uniq)
end
def test_hex
assert_equal(255, S("0xff").hex)
assert_equal(-255, S("-0xff").hex)