mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (hash): updated to MurmurHash 2.0 2009-09-19.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25101 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
8367245839
commit
bf4ab7f610
2 changed files with 92 additions and 71 deletions
|
@ -1,4 +1,6 @@
|
||||||
Sat Sep 26 16:32:01 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sat Sep 26 16:48:32 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* string.c (hash): updated to MurmurHash 2.0 2009-09-19.
|
||||||
|
|
||||||
* string.c (rb_hash_start): fixed shift width on 128bit platform.
|
* string.c (rb_hash_start): fixed shift width on 128bit platform.
|
||||||
|
|
||||||
|
|
159
string.c
159
string.c
|
@ -1990,12 +1990,23 @@ rb_str_concat(VALUE str1, VALUE str2)
|
||||||
#define MURMUR 2
|
#define MURMUR 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MurmurMagic 0x7fd652ad
|
typedef char check_murmur_voidp[SIZEOF_VOIDP == (int)sizeof(st_index_t) ? 1 : -1];
|
||||||
|
#define SIZEOF_ST_INDEX_T SIZEOF_VOIDP
|
||||||
|
|
||||||
static inline unsigned long
|
#if MURMUR == 1
|
||||||
murmur(unsigned long h, unsigned long k, int r)
|
#define MurmurMagic 0xc6a4a793
|
||||||
|
#elif MURMUR == 2
|
||||||
|
#if SIZEOF_ST_INDEX_T > 4
|
||||||
|
#define MurmurMagic 0xc6a4a7935bd1e995
|
||||||
|
#else
|
||||||
|
#define MurmurMagic 0x5bd1e995
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline st_index_t
|
||||||
|
murmur(st_index_t h, st_index_t k, int r)
|
||||||
{
|
{
|
||||||
const unsigned int m = MurmurMagic;
|
const st_index_t m = MurmurMagic;
|
||||||
#if MURMUR == 1
|
#if MURMUR == 1
|
||||||
h += k;
|
h += k;
|
||||||
h *= m;
|
h *= m;
|
||||||
|
@ -2010,10 +2021,9 @@ murmur(unsigned long h, unsigned long k, int r)
|
||||||
#endif
|
#endif
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
#define murmur16(h) murmur_step(h, 16)
|
|
||||||
|
|
||||||
static inline unsigned long
|
static inline st_index_t
|
||||||
murmur_finish(unsigned long h)
|
murmur_finish(st_index_t h)
|
||||||
{
|
{
|
||||||
#if MURMUR == 1
|
#if MURMUR == 1
|
||||||
h = murmur(h, 0, 10);
|
h = murmur(h, 0, 10);
|
||||||
|
@ -2028,35 +2038,50 @@ murmur_finish(unsigned long h)
|
||||||
|
|
||||||
#define murmur_step(h, k) murmur(h, k, 16)
|
#define murmur_step(h, k) murmur(h, k, 16)
|
||||||
|
|
||||||
|
#if MURMUR == 1
|
||||||
|
#define murmur1(h) murmur_step(h, 16)
|
||||||
|
#else
|
||||||
|
#define murmur1(h) murmur_step(h, 24)
|
||||||
|
#endif
|
||||||
|
|
||||||
static st_index_t
|
static st_index_t
|
||||||
hash(const unsigned char * data, size_t len, st_index_t h)
|
hash(const void *ptr, size_t len, st_index_t h)
|
||||||
{
|
{
|
||||||
uint32_t t = 0;
|
const char *data = ptr;
|
||||||
|
st_index_t t = 0;
|
||||||
|
|
||||||
h += 0xdeadbeef;
|
h += 0xdeadbeef;
|
||||||
|
|
||||||
#ifdef WORDS_BIGENDIAN
|
#define data_at(n) (st_index_t)((unsigned char)data[n])
|
||||||
# define SHIFT_OFFSET(i) ((i)*CHAR_BIT)
|
#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
|
||||||
#else
|
#if SIZEOF_ST_INDEX_T > 4
|
||||||
# define SHIFT_OFFSET(i) (32-(i)*CHAR_BIT)
|
#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
|
||||||
|
#if SIZEOF_ST_INDEX_T > 8
|
||||||
|
#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
|
||||||
|
UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
|
||||||
|
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
|
||||||
#endif
|
#endif
|
||||||
if (len >= sizeof(uint32_t)) {
|
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
|
||||||
|
#else
|
||||||
|
#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
|
||||||
|
#endif
|
||||||
|
if (len >= sizeof(st_index_t)) {
|
||||||
#if !UNALIGNED_WORD_ACCESS
|
#if !UNALIGNED_WORD_ACCESS
|
||||||
int align = (int)((VALUE)data % sizeof(uint32_t));
|
int align = (int)((st_data_t)data % sizeof(st_index_t));
|
||||||
if (align) {
|
if (align) {
|
||||||
uint32_t d = 0;
|
st_index_t d = 0;
|
||||||
int sl, sr, pack;
|
int sl, sr, pack;
|
||||||
|
|
||||||
switch (align) {
|
switch (align) {
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
case 1: t |= data[2];
|
# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
|
||||||
case 2: t |= data[1] << CHAR_BIT;
|
t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
|
||||||
case 3: t |= data[0] << CHAR_BIT*2;
|
|
||||||
#else
|
#else
|
||||||
case 1: t |= data[2] << CHAR_BIT*2;
|
# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
|
||||||
case 2: t |= data[1] << CHAR_BIT;
|
t |= data_at(n) << CHAR_BIT*(n)
|
||||||
case 3: t |= data[0];
|
|
||||||
#endif
|
#endif
|
||||||
|
UNALIGNED_ADD_ALL;
|
||||||
|
#undef UNALIGNED_ADD
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
@ -2065,14 +2090,14 @@ hash(const unsigned char * data, size_t len, st_index_t h)
|
||||||
t <<= (CHAR_BIT * align);
|
t <<= (CHAR_BIT * align);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
data += sizeof(uint32_t)-align;
|
data += sizeof(st_index_t)-align;
|
||||||
len -= sizeof(uint32_t)-align;
|
len -= sizeof(st_index_t)-align;
|
||||||
|
|
||||||
sl = CHAR_BIT * ((int)sizeof(uint32_t)-align);
|
sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
|
||||||
sr = CHAR_BIT * align;
|
sr = CHAR_BIT * align;
|
||||||
|
|
||||||
while (len >= sizeof(uint32_t)) {
|
while (len >= sizeof(st_index_t)) {
|
||||||
d = *(uint32_t *)data;
|
d = *(st_index_t *)data;
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
t = (t << sr) | (d >> sl);
|
t = (t << sr) | (d >> sl);
|
||||||
#else
|
#else
|
||||||
|
@ -2080,22 +2105,22 @@ hash(const unsigned char * data, size_t len, st_index_t h)
|
||||||
#endif
|
#endif
|
||||||
h = murmur_step(h, t);
|
h = murmur_step(h, t);
|
||||||
t = d;
|
t = d;
|
||||||
data += sizeof(uint32_t);
|
data += sizeof(st_index_t);
|
||||||
len -= sizeof(uint32_t);
|
len -= sizeof(st_index_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
pack = len < (size_t)align ? (int)len : align;
|
pack = len < (size_t)align ? (int)len : align;
|
||||||
d = 0;
|
d = 0;
|
||||||
switch (pack) {
|
switch (pack) {
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
case 3: d |= data[2] << CHAR_BIT;
|
# define UNALIGNED_ADD(n) case (n) + 1: \
|
||||||
case 2: d |= data[1] << CHAR_BIT*2;
|
d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
|
||||||
case 1: d |= data[0] << CHAR_BIT*3;
|
|
||||||
#else
|
#else
|
||||||
case 3: d |= data[2] << CHAR_BIT*2;
|
# define UNALIGNED_ADD(n) case (n) + 1: \
|
||||||
case 2: d |= data[1] << CHAR_BIT;
|
d |= data_at(n) << CHAR_BIT*(n)
|
||||||
case 1: d |= data[0];
|
|
||||||
#endif
|
#endif
|
||||||
|
UNALIGNED_ADD_ALL;
|
||||||
|
#undef UNALIGNED_ADD
|
||||||
}
|
}
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
t = (t << sr) | (d >> sl);
|
t = (t << sr) | (d >> sl);
|
||||||
|
@ -2114,30 +2139,24 @@ hash(const unsigned char * data, size_t len, st_index_t h)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
h = murmur_step(h, *(uint32_t *)data);
|
h = murmur_step(h, *(st_index_t *)data);
|
||||||
data += sizeof(uint32_t);
|
data += sizeof(st_index_t);
|
||||||
len -= sizeof(uint32_t);
|
len -= sizeof(st_index_t);
|
||||||
} while (len >= sizeof(uint32_t));
|
} while (len >= sizeof(st_index_t));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t = 0;
|
t = 0;
|
||||||
switch (len) {
|
switch (len) {
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
case 3:
|
# define UNALIGNED_ADD(n) case (n) + 1: \
|
||||||
t |= data[2] << CHAR_BIT;
|
t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
|
||||||
case 2:
|
|
||||||
t |= data[1] << CHAR_BIT*2;
|
|
||||||
case 1:
|
|
||||||
t |= data[0] << CHAR_BIT*3;
|
|
||||||
#else
|
#else
|
||||||
case 3:
|
# define UNALIGNED_ADD(n) case (n) + 1: \
|
||||||
t |= data[2] << CHAR_BIT*2;
|
t |= data_at(n) << CHAR_BIT*(n)
|
||||||
case 2:
|
|
||||||
t |= data[1] << CHAR_BIT;
|
|
||||||
case 1:
|
|
||||||
t |= data[0];
|
|
||||||
#endif
|
#endif
|
||||||
|
UNALIGNED_ADD_ALL;
|
||||||
|
#undef UNALIGNED_ADD
|
||||||
#if MURMUR == 1
|
#if MURMUR == 1
|
||||||
h = murmur_step(h, t);
|
h = murmur_step(h, t);
|
||||||
#elif MURMUR == 2
|
#elif MURMUR == 2
|
||||||
|
@ -2153,7 +2172,7 @@ hash(const unsigned char * data, size_t len, st_index_t h)
|
||||||
}
|
}
|
||||||
|
|
||||||
st_index_t
|
st_index_t
|
||||||
rb_hash_uint32(st_index_t h, unsigned int i)
|
rb_hash_uint32(st_index_t h, uint32_t i)
|
||||||
{
|
{
|
||||||
return murmur_step(h + i, 16);
|
return murmur_step(h + i, 16);
|
||||||
}
|
}
|
||||||
|
@ -2161,29 +2180,29 @@ rb_hash_uint32(st_index_t h, unsigned int i)
|
||||||
st_index_t
|
st_index_t
|
||||||
rb_hash_uint(st_index_t h, st_index_t i)
|
rb_hash_uint(st_index_t h, st_index_t i)
|
||||||
{
|
{
|
||||||
unsigned long v = 0;
|
st_index_t v = 0;
|
||||||
h += i;
|
h += i;
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 12*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
|
||||||
v = murmur16(v + (h >> 12*8));
|
v = murmur1(v + (h >> 12*8));
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 8*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
|
||||||
v = murmur16(v + (h >> 8*8));
|
v = murmur1(v + (h >> 8*8));
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 4*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
|
||||||
v = murmur16(v + (h >> 4*8));
|
v = murmur1(v + (h >> 4*8));
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
v = murmur16(v + h);
|
v = murmur1(v + h);
|
||||||
#ifndef WORDS_BIGENDIAN
|
#ifndef WORDS_BIGENDIAN
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 4*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
|
||||||
v = murmur16(v + (h >> 4*8));
|
v = murmur1(v + (h >> 4*8));
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 8*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
|
||||||
v = murmur16(v + (h >> 8*8));
|
v = murmur1(v + (h >> 8*8));
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 12*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
|
||||||
v = murmur16(v + (h >> 12*8));
|
v = murmur1(v + (h >> 12*8));
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
return v;
|
return v;
|
||||||
|
@ -2201,19 +2220,19 @@ st_index_t
|
||||||
rb_hash_start(st_index_t h)
|
rb_hash_start(st_index_t h)
|
||||||
{
|
{
|
||||||
static int hashseed_init = 0;
|
static int hashseed_init = 0;
|
||||||
static VALUE hashseed;
|
static st_index_t hashseed;
|
||||||
|
|
||||||
if (!hashseed_init) {
|
if (!hashseed_init) {
|
||||||
hashseed = rb_genrand_int32();
|
hashseed = rb_genrand_int32();
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 4*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
|
||||||
hashseed <<= 32;
|
hashseed <<= 32;
|
||||||
hashseed |= rb_genrand_int32();
|
hashseed |= rb_genrand_int32();
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 8*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
|
||||||
hashseed <<= 32;
|
hashseed <<= 32;
|
||||||
hashseed |= rb_genrand_int32();
|
hashseed |= rb_genrand_int32();
|
||||||
#endif
|
#endif
|
||||||
#if SIZEOF_VALUE*CHAR_BIT > 12*8
|
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
|
||||||
hashseed <<= 32;
|
hashseed <<= 32;
|
||||||
hashseed |= rb_genrand_int32();
|
hashseed |= rb_genrand_int32();
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue