From 5714a26b90b40846733fb2a5764d4c61285f5ea1 Mon Sep 17 00:00:00 2001 From: nobu Date: Tue, 6 Dec 2016 04:43:48 +0000 Subject: [PATCH] switching hash removal * st.h (struct st_hash_type): Remove strong_hash. (struct st_table): Remove inside_rebuild_p and curr_hash. * st.c (do_hash): Use type->hash instead of curr_hash. (make_tab_empty): Remove setting up curr_hash. (st_init_table_with_size): Remove setting up inside_rebuild_p. (rebuild_table): Remove clearing inside_rebuild_p. (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code recognizing a denial attack and switching to strong hash. * hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use rb_hash_start to randomize the hash. (str_seed): Remove. (any_hash): Remove strong_p and use always rb_str_hash for strings. (any_hash_weak, rb_any_hash_weak): Remove. (st_hash_type objhash): Remove rb_any_hash_weak. based on the patch by Vladimir N Makarov at [ruby-core:78490]. [Bug #13002] * test/ruby/test_hash.rb (test_wrapper): objects other than special constants should be able to be wrapped. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- hash.c | 38 ++++++---------------------------- include/ruby/st.h | 9 -------- st.c | 44 ---------------------------------------- test/ruby/test_hash.rb | 41 ++++++++++++++++++++++++++++++++++++- test/ruby/test_string.rb | 12 ----------- 5 files changed, 46 insertions(+), 98 deletions(-) diff --git a/hash.c b/hash.c index 388446efad..afbc5220fb 100644 --- a/hash.c +++ b/hash.c @@ -157,19 +157,13 @@ rb_dbl_long_hash(double d) union {double d; uint64_t i;} u; u.d = d; - return rb_objid_hash(u.i); + return rb_objid_hash(rb_hash_start(u.i)); } #endif } -#if SIZEOF_INT == SIZEOF_VOIDP -static const st_index_t str_seed = 0xfa835867; -#else -static const st_index_t str_seed = 0xc42b5e2e6480b23bULL; -#endif - static inline st_index_t -any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) +any_hash(VALUE a, st_index_t (*other_func)(VALUE)) { VALUE hval; st_index_t hnum; @@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) if (SPECIAL_CONST_P(a)) { if (STATIC_SYM_P(a)) { hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); + hnum = rb_hash_start(hnum); goto out; } else if (FLONUM_P(a)) { @@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) hnum = rb_objid_hash((st_index_t)a); } else if (BUILTIN_TYPE(a) == T_STRING) { - hnum = (strong_p - ? rb_str_hash(a) - : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed)); + hnum = rb_str_hash(a); } else if (BUILTIN_TYPE(a) == T_SYMBOL) { hnum = RSYMBOL(a)->hashval; @@ -216,24 +209,6 @@ obj_any_hash(VALUE obj) return FIX2LONG(obj); } -static inline st_index_t -any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) -{ - return any_hash_general(a, FALSE, other_func); -} - -static st_index_t -rb_any_hash_weak(VALUE a) -{ - return any_hash_weak(a, obj_any_hash); -} - -static inline st_index_t -any_hash(VALUE a, st_index_t (*other_func)(VALUE)) -{ - return any_hash_general(a, TRUE, other_func); -} - static st_index_t rb_any_hash(VALUE a) { @@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed) long rb_objid_hash(st_index_t index) { - return (long)key64_hash(index, (uint32_t)prime2); + return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2); } static st_index_t @@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h) static const struct st_hash_type objhash = { rb_any_cmp, - rb_any_hash_weak, rb_any_hash, }; @@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n) } #endif - return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2); + return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2); } static const struct st_hash_type identhash = { diff --git a/include/ruby/st.h b/include/ruby/st.h index ede75458c4..41f2913b80 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_index_t[SIZEOF_VOIDP == (int)sizeof(st_index struct st_hash_type { int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */ - /* The following is an optional func for stronger hash. When we - have many different keys with the same hash we can switch to - use it to prevent a denial attack with usage of hash table - collisions. */ - st_index_t (*strong_hash)(ANYARGS /*st_data_t*/); }; #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P) @@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st.c */ struct st_table { /* Cached features of the table -- see st.c for more details. */ unsigned char entry_power, bin_power, size_ind; - /* True when we are rebuilding the table. */ - unsigned char inside_rebuild_p; /* How many times the table was rebuilt. */ unsigned int rebuilds_num; - /* Currently used hash function. */ - st_index_t (*curr_hash)(ANYARGS /*st_data_t*/); const struct st_hash_type *type; /* Number of entries currently in the table. */ st_index_t num_entries; diff --git a/st.c b/st.c index 89dc248969..887b81bc03 100644 --- a/st.c +++ b/st.c @@ -461,7 +461,6 @@ initialize_bins(st_table *tab) static void make_tab_empty(st_table *tab) { - tab->curr_hash = tab->type->hash; tab->num_entries = 0; tab->entries_start = tab->entries_bound = 0; if (tab->bins != NULL) @@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size) tab->entry_power = n; tab->bin_power = features[n].bin_power; tab->size_ind = features[n].size_ind; - tab->inside_rebuild_p = FALSE; if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) tab->bins = NULL; else @@ -744,7 +742,6 @@ rebuild_table(st_table *tab) st_assert(tab != NULL); bound = tab->entries_bound; entries = tab->entries; - tab->inside_rebuild_p = TRUE; if ((2 * tab->num_entries <= get_allocated_entries(tab) && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) || tab->num_entries < (1 << MINIMAL_POWER2)) { @@ -758,8 +755,6 @@ rebuild_table(st_table *tab) else { new_tab = st_init_table_with_size(tab->type, 2 * tab->num_entries - 1); - st_assert(new_tab->curr_hash == new_tab->type->hash); - new_tab->curr_hash = tab->curr_hash; new_entries = new_tab->entries; } ni = 0; @@ -798,7 +793,6 @@ rebuild_table(st_table *tab) tab->entries_start = 0; tab->entries_bound = tab->num_entries; tab->rebuilds_num++; - tab->inside_rebuild_p = FALSE; #ifdef ST_DEBUG st_check(tab); #endif @@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) } } -/* Recalculate hashes of entries in table TAB. */ -static void -reset_entry_hashes (st_table *tab) -{ - st_index_t i, bound; - st_table_entry *entries, *curr_entry_ptr; - - bound = tab->entries_bound; - entries = tab->entries; - for (i = tab->entries_start; i < bound; i++) { - curr_entry_ptr = &entries[i]; - if (! DELETED_ENTRY_P(curr_entry_ptr)) - curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab); - } -} - -/* If we have the following number of collisions with different keys - but with the same hash during finding a bin for new entry - inclusions, possibly a denial attack is going on. Start to use a - stronger hash. */ -#define HIT_THRESHOULD_FOR_STRONG_HASH 10 - /* Return index of table TAB bin for HASH_VALUE and KEY through BIN_IND and the pointed value as the function result. Reserve the bin for inclusion of the corresponding entry into the table if it @@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, st_index_t entry_index; st_index_t first_deleted_bin_ind; st_table_entry *entries; - int hit; st_assert(tab != NULL && tab->bins != NULL && tab->entries_bound <= get_allocated_entries(tab) && tab->entries_start <= tab->entries_bound); - repeat: ind = hash_bin(curr_hash_value, tab); #ifdef QUADRATIC_PROBE d = 1; @@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, FOUND_BIN; first_deleted_bin_ind = UNDEFINED_BIN_IND; entries = tab->entries; - hit = 0; for (;;) { entry_index = get_bin(tab->bins, get_size_ind(tab), ind); if (EMPTY_BIN_P(entry_index)) { @@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, } else if (! DELETED_BIN_P(entry_index)) { if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) break; - if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) { - hit++; - if (hit > HIT_THRESHOULD_FOR_STRONG_HASH - && tab->curr_hash != tab->type->strong_hash - && tab->type->strong_hash != NULL - && ! tab->inside_rebuild_p) { - tab->curr_hash = tab->type->strong_hash; - *hash_value = curr_hash_value = do_hash(key, tab); - reset_entry_hashes(tab); - rebuild_table(tab); - goto repeat; - } - } } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) first_deleted_bin_ind = ind; #ifdef QUADRATIC_PROBE diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index fdb65ae6ca..679bd2702a 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase assert_no_memory_leak([], prepare, code, bug9187) end - def test_wrapper_of_special_const + def test_wrapper bug9381 = '[ruby-core:59638] [Bug #9381]' wrapper = Class.new do @@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase 5, true, false, nil, 0.0, 1.72723e-77, :foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym, + "str", ].select do |x| hash = {x => bug9381} hash[wrapper.new(x)] != bug9381 @@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase assert_empty(bad, bug9381) end + def assert_hash_random(obj, dump = obj.inspect) + a = [obj.hash.to_s] + 3.times { + assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e| + a += r + assert_equal([], e) + end + } + assert_not_equal([obj.hash.to_s], a.uniq) + assert_operator(a.uniq.size, :>, 2, proc {a.inspect}) + end + + def test_string_hash_random + assert_hash_random('abc') + end + + def test_symbol_hash_random + assert_hash_random(:-) + assert_hash_random(:foo) + assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym) + end + + def test_integer_hash_random + assert_hash_random(0) + assert_hash_random(+1) + assert_hash_random(-1) + assert_hash_random(+(1<<100)) + assert_hash_random(-(1<<100)) + end + + def test_float_hash_random + assert_hash_random(0.0) + assert_hash_random(+1.0) + assert_hash_random(-1.0) + assert_hash_random(1.72723e-77) + assert_hash_random(Float::INFINITY, "Float::INFINITY") + end + def test_label_syntax return unless @cls == Hash diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 7ee7fa7e97..2bd35b1703 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -979,18 +979,6 @@ CODE assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172) end - def test_hash_random - str = 'abc' - a = [str.hash.to_s] - 3.times { - assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e| - a += r - assert_equal([], e) - end - } - assert_not_equal([str.hash.to_s], a.uniq) - end - def test_hex assert_equal(255, S("0xff").hex) assert_equal(-255, S("-0xff").hex)