1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

switching hash removal

* st.h (struct st_hash_type): Remove strong_hash.
  (struct st_table): Remove inside_rebuild_p and curr_hash.
* st.c (do_hash): Use type->hash instead of curr_hash.
  (make_tab_empty): Remove setting up curr_hash.
  (st_init_table_with_size): Remove setting up inside_rebuild_p.
  (rebuild_table): Remove clearing inside_rebuild_p.
  (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code
  recognizing a denial attack and switching to strong hash.
* hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use
  rb_hash_start to randomize the hash.
  (str_seed): Remove.
  (any_hash): Remove strong_p and use always rb_str_hash for
  strings.
  (any_hash_weak, rb_any_hash_weak): Remove.
  (st_hash_type objhash): Remove rb_any_hash_weak.

based on the patch by Vladimir N Makarov <vmakarov@redhat.com> at
[ruby-core:78490].  [Bug #13002]

* test/ruby/test_hash.rb (test_wrapper): objects other than special
  constants should be able to be wrapped.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2016-12-06 04:43:48 +00:00
parent 1e95f9da88
commit 5714a26b90
5 changed files with 46 additions and 98 deletions

38
hash.c
View file

@ -157,19 +157,13 @@ rb_dbl_long_hash(double d)
union {double d; uint64_t i;} u; union {double d; uint64_t i;} u;
u.d = d; u.d = d;
return rb_objid_hash(u.i); return rb_objid_hash(rb_hash_start(u.i));
} }
#endif #endif
} }
#if SIZEOF_INT == SIZEOF_VOIDP
static const st_index_t str_seed = 0xfa835867;
#else
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
#endif
static inline st_index_t static inline st_index_t
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) any_hash(VALUE a, st_index_t (*other_func)(VALUE))
{ {
VALUE hval; VALUE hval;
st_index_t hnum; st_index_t hnum;
@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
if (SPECIAL_CONST_P(a)) { if (SPECIAL_CONST_P(a)) {
if (STATIC_SYM_P(a)) { if (STATIC_SYM_P(a)) {
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
hnum = rb_hash_start(hnum);
goto out; goto out;
} }
else if (FLONUM_P(a)) { else if (FLONUM_P(a)) {
@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
hnum = rb_objid_hash((st_index_t)a); hnum = rb_objid_hash((st_index_t)a);
} }
else if (BUILTIN_TYPE(a) == T_STRING) { else if (BUILTIN_TYPE(a) == T_STRING) {
hnum = (strong_p hnum = rb_str_hash(a);
? rb_str_hash(a)
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
} }
else if (BUILTIN_TYPE(a) == T_SYMBOL) { else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval; hnum = RSYMBOL(a)->hashval;
@ -216,24 +209,6 @@ obj_any_hash(VALUE obj)
return FIX2LONG(obj); return FIX2LONG(obj);
} }
static inline st_index_t
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE))
{
return any_hash_general(a, FALSE, other_func);
}
static st_index_t
rb_any_hash_weak(VALUE a)
{
return any_hash_weak(a, obj_any_hash);
}
static inline st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
{
return any_hash_general(a, TRUE, other_func);
}
static st_index_t static st_index_t
rb_any_hash(VALUE a) rb_any_hash(VALUE a)
{ {
@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed)
long long
rb_objid_hash(st_index_t index) rb_objid_hash(st_index_t index)
{ {
return (long)key64_hash(index, (uint32_t)prime2); return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2);
} }
static st_index_t static st_index_t
@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h)
static const struct st_hash_type objhash = { static const struct st_hash_type objhash = {
rb_any_cmp, rb_any_cmp,
rb_any_hash_weak,
rb_any_hash, rb_any_hash,
}; };
@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n)
} }
#endif #endif
return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2); return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2);
} }
static const struct st_hash_type identhash = { static const struct st_hash_type identhash = {

View file

@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_index_t[SIZEOF_VOIDP == (int)sizeof(st_index
struct st_hash_type { struct st_hash_type {
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */ st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
/* The following is an optional func for stronger hash. When we
have many different keys with the same hash we can switch to
use it to prevent a denial attack with usage of hash table
collisions. */
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
}; };
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P) #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st.c */
struct st_table { struct st_table {
/* Cached features of the table -- see st.c for more details. */ /* Cached features of the table -- see st.c for more details. */
unsigned char entry_power, bin_power, size_ind; unsigned char entry_power, bin_power, size_ind;
/* True when we are rebuilding the table. */
unsigned char inside_rebuild_p;
/* How many times the table was rebuilt. */ /* How many times the table was rebuilt. */
unsigned int rebuilds_num; unsigned int rebuilds_num;
/* Currently used hash function. */
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
const struct st_hash_type *type; const struct st_hash_type *type;
/* Number of entries currently in the table. */ /* Number of entries currently in the table. */
st_index_t num_entries; st_index_t num_entries;

44
st.c
View file

@ -461,7 +461,6 @@ initialize_bins(st_table *tab)
static void static void
make_tab_empty(st_table *tab) make_tab_empty(st_table *tab)
{ {
tab->curr_hash = tab->type->hash;
tab->num_entries = 0; tab->num_entries = 0;
tab->entries_start = tab->entries_bound = 0; tab->entries_start = tab->entries_bound = 0;
if (tab->bins != NULL) if (tab->bins != NULL)
@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
tab->entry_power = n; tab->entry_power = n;
tab->bin_power = features[n].bin_power; tab->bin_power = features[n].bin_power;
tab->size_ind = features[n].size_ind; tab->size_ind = features[n].size_ind;
tab->inside_rebuild_p = FALSE;
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
tab->bins = NULL; tab->bins = NULL;
else else
@ -744,7 +742,6 @@ rebuild_table(st_table *tab)
st_assert(tab != NULL); st_assert(tab != NULL);
bound = tab->entries_bound; bound = tab->entries_bound;
entries = tab->entries; entries = tab->entries;
tab->inside_rebuild_p = TRUE;
if ((2 * tab->num_entries <= get_allocated_entries(tab) if ((2 * tab->num_entries <= get_allocated_entries(tab)
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|| tab->num_entries < (1 << MINIMAL_POWER2)) { || tab->num_entries < (1 << MINIMAL_POWER2)) {
@ -758,8 +755,6 @@ rebuild_table(st_table *tab)
else { else {
new_tab = st_init_table_with_size(tab->type, new_tab = st_init_table_with_size(tab->type,
2 * tab->num_entries - 1); 2 * tab->num_entries - 1);
st_assert(new_tab->curr_hash == new_tab->type->hash);
new_tab->curr_hash = tab->curr_hash;
new_entries = new_tab->entries; new_entries = new_tab->entries;
} }
ni = 0; ni = 0;
@ -798,7 +793,6 @@ rebuild_table(st_table *tab)
tab->entries_start = 0; tab->entries_start = 0;
tab->entries_bound = tab->num_entries; tab->entries_bound = tab->num_entries;
tab->rebuilds_num++; tab->rebuilds_num++;
tab->inside_rebuild_p = FALSE;
#ifdef ST_DEBUG #ifdef ST_DEBUG
st_check(tab); st_check(tab);
#endif #endif
@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
} }
} }
/* Recalculate hashes of entries in table TAB. */
static void
reset_entry_hashes (st_table *tab)
{
st_index_t i, bound;
st_table_entry *entries, *curr_entry_ptr;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr))
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
}
}
/* If we have the following number of collisions with different keys
but with the same hash during finding a bin for new entry
inclusions, possibly a denial attack is going on. Start to use a
stronger hash. */
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
/* Return index of table TAB bin for HASH_VALUE and KEY through /* Return index of table TAB bin for HASH_VALUE and KEY through
BIN_IND and the pointed value as the function result. Reserve the BIN_IND and the pointed value as the function result. Reserve the
bin for inclusion of the corresponding entry into the table if it bin for inclusion of the corresponding entry into the table if it
@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_index_t entry_index; st_index_t entry_index;
st_index_t first_deleted_bin_ind; st_index_t first_deleted_bin_ind;
st_table_entry *entries; st_table_entry *entries;
int hit;
st_assert(tab != NULL && tab->bins != NULL st_assert(tab != NULL && tab->bins != NULL
&& tab->entries_bound <= get_allocated_entries(tab) && tab->entries_bound <= get_allocated_entries(tab)
&& tab->entries_start <= tab->entries_bound); && tab->entries_start <= tab->entries_bound);
repeat:
ind = hash_bin(curr_hash_value, tab); ind = hash_bin(curr_hash_value, tab);
#ifdef QUADRATIC_PROBE #ifdef QUADRATIC_PROBE
d = 1; d = 1;
@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
FOUND_BIN; FOUND_BIN;
first_deleted_bin_ind = UNDEFINED_BIN_IND; first_deleted_bin_ind = UNDEFINED_BIN_IND;
entries = tab->entries; entries = tab->entries;
hit = 0;
for (;;) { for (;;) {
entry_index = get_bin(tab->bins, get_size_ind(tab), ind); entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_BIN_P(entry_index)) { if (EMPTY_BIN_P(entry_index)) {
@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
} else if (! DELETED_BIN_P(entry_index)) { } else if (! DELETED_BIN_P(entry_index)) {
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break; break;
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
hit++;
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
&& tab->curr_hash != tab->type->strong_hash
&& tab->type->strong_hash != NULL
&& ! tab->inside_rebuild_p) {
tab->curr_hash = tab->type->strong_hash;
*hash_value = curr_hash_value = do_hash(key, tab);
reset_entry_hashes(tab);
rebuild_table(tab);
goto repeat;
}
}
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
first_deleted_bin_ind = ind; first_deleted_bin_ind = ind;
#ifdef QUADRATIC_PROBE #ifdef QUADRATIC_PROBE

View file

@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase
assert_no_memory_leak([], prepare, code, bug9187) assert_no_memory_leak([], prepare, code, bug9187)
end end
def test_wrapper_of_special_const def test_wrapper
bug9381 = '[ruby-core:59638] [Bug #9381]' bug9381 = '[ruby-core:59638] [Bug #9381]'
wrapper = Class.new do wrapper = Class.new do
@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase
5, true, false, nil, 5, true, false, nil,
0.0, 1.72723e-77, 0.0, 1.72723e-77,
:foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym, :foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
"str",
].select do |x| ].select do |x|
hash = {x => bug9381} hash = {x => bug9381}
hash[wrapper.new(x)] != bug9381 hash[wrapper.new(x)] != bug9381
@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase
assert_empty(bad, bug9381) assert_empty(bad, bug9381)
end end
def assert_hash_random(obj, dump = obj.inspect)
a = [obj.hash.to_s]
3.times {
assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e|
a += r
assert_equal([], e)
end
}
assert_not_equal([obj.hash.to_s], a.uniq)
assert_operator(a.uniq.size, :>, 2, proc {a.inspect})
end
def test_string_hash_random
assert_hash_random('abc')
end
def test_symbol_hash_random
assert_hash_random(:-)
assert_hash_random(:foo)
assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym)
end
def test_integer_hash_random
assert_hash_random(0)
assert_hash_random(+1)
assert_hash_random(-1)
assert_hash_random(+(1<<100))
assert_hash_random(-(1<<100))
end
def test_float_hash_random
assert_hash_random(0.0)
assert_hash_random(+1.0)
assert_hash_random(-1.0)
assert_hash_random(1.72723e-77)
assert_hash_random(Float::INFINITY, "Float::INFINITY")
end
def test_label_syntax def test_label_syntax
return unless @cls == Hash return unless @cls == Hash

View file

@ -979,18 +979,6 @@ CODE
assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172) assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
end end
def test_hash_random
str = 'abc'
a = [str.hash.to_s]
3.times {
assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e|
a += r
assert_equal([], e)
end
}
assert_not_equal([str.hash.to_s], a.uniq)
end
def test_hex def test_hex
assert_equal(255, S("0xff").hex) assert_equal(255, S("0xff").hex)
assert_equal(-255, S("-0xff").hex) assert_equal(-255, S("-0xff").hex)