Sux
SpookyV2.hpp
Go to the documentation of this file.
1 //
2 // SpookyHash: a 128-bit noncryptographic hash function
3 // By Bob Jenkins, public domain
4 // Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
5 // Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
6 // Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
7 // Feb 2 2012: production, same bits as beta
8 // Feb 5 2012: adjusted definitions of uint* to be more portable
9 // Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough.
10 // August 5 2012: SpookyV2 (different results)
11 //
12 // Up to 3 bytes/cycle for long messages. Reasonably fast for short messages.
13 // All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
14 //
15 // This was developed for and tested on 64-bit x86-compatible processors.
16 // It assumes the processor is little-endian. There is a macro
17 // controlling whether unaligned reads are allowed (by default they are).
18 // This should be an equally good hash on big-endian machines, but it will
19 // compute different results on them than on little-endian machines.
20 //
21 // Google's CityHash has similar specs to SpookyHash, and CityHash is faster
22 // on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders
23 // of magnitude slower. CRCs are two or more times slower, but unlike
24 // SpookyHash, they have nice math for combining the CRCs of pieces to form
25 // the CRCs of wholes. There are also cryptographic hashes, but those are even
26 // slower than MD5.
27 //
28 
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 
33 #define ALLOW_UNALIGNED_READS 1
34 
35 class SpookyHash {
36  private:
37  static inline uint64_t Rot64(uint64_t x, int k) { return (x << k) | (x >> (64 - k)); }
38 
39  //
40  // This is used if the input is 96 bytes long or longer.
41  //
42  // The internal state is fully overwritten every 96 bytes.
43  // Every input bit appears to cause at least 128 bits of entropy
44  // before 96 other bytes are combined, when run forward or backward
45  // For every input bit,
46  // Two inputs differing in just that input bit
47  // Where "differ" means xor or subtraction
48  // And the base value is random
49  // When run forward or backwards one Mix
50  // I tried 3 pairs of each; they all differed by at least 212 bits.
51  //
52  static inline void Mix(const uint64_t *data, uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3, uint64_t &s4, uint64_t &s5, uint64_t &s6, uint64_t &s7, uint64_t &s8, uint64_t &s9,
53  uint64_t &s10, uint64_t &s11) {
54  s0 += data[0];
55  s2 ^= s10;
56  s11 ^= s0;
57  s0 = Rot64(s0, 11);
58  s11 += s1;
59  s1 += data[1];
60  s3 ^= s11;
61  s0 ^= s1;
62  s1 = Rot64(s1, 32);
63  s0 += s2;
64  s2 += data[2];
65  s4 ^= s0;
66  s1 ^= s2;
67  s2 = Rot64(s2, 43);
68  s1 += s3;
69  s3 += data[3];
70  s5 ^= s1;
71  s2 ^= s3;
72  s3 = Rot64(s3, 31);
73  s2 += s4;
74  s4 += data[4];
75  s6 ^= s2;
76  s3 ^= s4;
77  s4 = Rot64(s4, 17);
78  s3 += s5;
79  s5 += data[5];
80  s7 ^= s3;
81  s4 ^= s5;
82  s5 = Rot64(s5, 28);
83  s4 += s6;
84  s6 += data[6];
85  s8 ^= s4;
86  s5 ^= s6;
87  s6 = Rot64(s6, 39);
88  s5 += s7;
89  s7 += data[7];
90  s9 ^= s5;
91  s6 ^= s7;
92  s7 = Rot64(s7, 57);
93  s6 += s8;
94  s8 += data[8];
95  s10 ^= s6;
96  s7 ^= s8;
97  s8 = Rot64(s8, 55);
98  s7 += s9;
99  s9 += data[9];
100  s11 ^= s7;
101  s8 ^= s9;
102  s9 = Rot64(s9, 54);
103  s8 += s10;
104  s10 += data[10];
105  s0 ^= s8;
106  s9 ^= s10;
107  s10 = Rot64(s10, 22);
108  s9 += s11;
109  s11 += data[11];
110  s1 ^= s9;
111  s10 ^= s11;
112  s11 = Rot64(s11, 46);
113  s10 += s0;
114  }
115 
116  //
117  // Mix all 12 inputs together so that h0, h1 are a hash of them all.
118  //
119  // For two inputs differing in just the input bits
120  // Where "differ" means xor or subtraction
121  // And the base value is random, or a counting value starting at that bit
122  // The final result will have each bit of h0, h1 flip
123  // For every input bit,
124  // with probability 50 +- .3%
125  // For every pair of input bits,
126  // with probability 50 +- 3%
127  //
128  // This does not rely on the last Mix() call having already mixed some.
129  // Two iterations was almost good enough for a 64-bit result, but a
130  // 128-bit result is reported, so End() does three iterations.
131  //
132  static inline void EndPartial(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3, uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7, uint64_t &h8, uint64_t &h9, uint64_t &h10,
133  uint64_t &h11) {
134  h11 += h1;
135  h2 ^= h11;
136  h1 = Rot64(h1, 44);
137  h0 += h2;
138  h3 ^= h0;
139  h2 = Rot64(h2, 15);
140  h1 += h3;
141  h4 ^= h1;
142  h3 = Rot64(h3, 34);
143  h2 += h4;
144  h5 ^= h2;
145  h4 = Rot64(h4, 21);
146  h3 += h5;
147  h6 ^= h3;
148  h5 = Rot64(h5, 38);
149  h4 += h6;
150  h7 ^= h4;
151  h6 = Rot64(h6, 33);
152  h5 += h7;
153  h8 ^= h5;
154  h7 = Rot64(h7, 10);
155  h6 += h8;
156  h9 ^= h6;
157  h8 = Rot64(h8, 13);
158  h7 += h9;
159  h10 ^= h7;
160  h9 = Rot64(h9, 38);
161  h8 += h10;
162  h11 ^= h8;
163  h10 = Rot64(h10, 53);
164  h9 += h11;
165  h0 ^= h9;
166  h11 = Rot64(h11, 42);
167  h10 += h0;
168  h1 ^= h10;
169  h0 = Rot64(h0, 54);
170  }
171 
172  static inline void End(const uint64_t *data, uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3, uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7, uint64_t &h8, uint64_t &h9,
173  uint64_t &h10, uint64_t &h11) {
174  h0 += data[0];
175  h1 += data[1];
176  h2 += data[2];
177  h3 += data[3];
178  h4 += data[4];
179  h5 += data[5];
180  h6 += data[6];
181  h7 += data[7];
182  h8 += data[8];
183  h9 += data[9];
184  h10 += data[10];
185  h11 += data[11];
186  EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
187  EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
188  EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
189  }
190 
191  //
192  // The goal is for each bit of the input to expand into 128 bits of
193  // apparent entropy before it is fully overwritten.
194  // n trials both set and cleared at least m bits of h0 h1 h2 h3
195  // n: 2 m: 29
196  // n: 3 m: 46
197  // n: 4 m: 57
198  // n: 5 m: 107
199  // n: 6 m: 146
200  // n: 7 m: 152
201  // when run forwards or backwards
202  // for all 1-bit and 2-bit diffs
203  // with diffs defined by either xor or subtraction
204  // with a base of all zeros plus a counter, or plus another bit, or random
205  //
206  static inline void ShortMix(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3) {
207  h2 = Rot64(h2, 50);
208  h2 += h3;
209  h0 ^= h2;
210  h3 = Rot64(h3, 52);
211  h3 += h0;
212  h1 ^= h3;
213  h0 = Rot64(h0, 30);
214  h0 += h1;
215  h2 ^= h0;
216  h1 = Rot64(h1, 41);
217  h1 += h2;
218  h3 ^= h1;
219  h2 = Rot64(h2, 54);
220  h2 += h3;
221  h0 ^= h2;
222  h3 = Rot64(h3, 48);
223  h3 += h0;
224  h1 ^= h3;
225  h0 = Rot64(h0, 38);
226  h0 += h1;
227  h2 ^= h0;
228  h1 = Rot64(h1, 37);
229  h1 += h2;
230  h3 ^= h1;
231  h2 = Rot64(h2, 62);
232  h2 += h3;
233  h0 ^= h2;
234  h3 = Rot64(h3, 34);
235  h3 += h0;
236  h1 ^= h3;
237  h0 = Rot64(h0, 5);
238  h0 += h1;
239  h2 ^= h0;
240  h1 = Rot64(h1, 36);
241  h1 += h2;
242  h3 ^= h1;
243  }
244 
245  //
246  // Mix all 4 inputs together so that h0, h1 are a hash of them all.
247  //
248  // For two inputs differing in just the input bits
249  // Where "differ" means xor or subtraction
250  // And the base value is random, or a counting value starting at that bit
251  // The final result will have each bit of h0, h1 flip
252  // For every input bit,
253  // with probability 50 +- .3% (it is probably better than that)
254  // For every pair of input bits,
255  // with probability 50 +- .75% (the worst case is approximately that)
256  //
257  static inline void ShortEnd(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3) {
258  h3 ^= h2;
259  h2 = Rot64(h2, 15);
260  h3 += h2;
261  h0 ^= h3;
262  h3 = Rot64(h3, 52);
263  h0 += h3;
264  h1 ^= h0;
265  h0 = Rot64(h0, 26);
266  h1 += h0;
267  h2 ^= h1;
268  h1 = Rot64(h1, 51);
269  h2 += h1;
270  h3 ^= h2;
271  h2 = Rot64(h2, 28);
272  h3 += h2;
273  h0 ^= h3;
274  h3 = Rot64(h3, 9);
275  h0 += h3;
276  h1 ^= h0;
277  h0 = Rot64(h0, 47);
278  h1 += h0;
279  h2 ^= h1;
280  h1 = Rot64(h1, 54);
281  h2 += h1;
282  h3 ^= h2;
283  h2 = Rot64(h2, 32);
284  h3 += h2;
285  h0 ^= h3;
286  h3 = Rot64(h3, 25);
287  h0 += h3;
288  h1 ^= h0;
289  h0 = Rot64(h0, 63);
290  h1 += h0;
291  }
292 
293  private:
294  // number of uint64_t's in internal state
295  static const size_t sc_numVars = 12;
296 
297  // size of the internal state
298  static const size_t sc_blockSize = sc_numVars * 8;
299 
300  // size of buffer of unhashed data, in bytes
301  static const size_t sc_bufSize = 2 * sc_blockSize;
302 
303  //
304  // sc_const: a constant which:
305  // * is not zero
306  // * is odd
307  // * is a not-very-regular mix of 1's and 0's
308  // * does not need any other special mathematical properties
309  //
310  static const uint64_t sc_const = 0xdeadbeefdeadbeefLL;
311 
312  public:
323  static void Short128(const void *data, size_t length, uint64_t *hash1, uint64_t *hash2) {
324  uint64_t buf[2 * sc_numVars];
325  union {
326  const uint8_t *p8;
327  uint32_t *p32;
328  uint64_t *p64;
329  size_t i;
330  } u;
331 
332  u.p8 = (const uint8_t *)data;
333 
334  if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) {
335  memcpy(buf, data, length);
336  u.p64 = buf;
337  }
338 
339  size_t remainder = length % 32;
340  uint64_t a = *hash1;
341  uint64_t b = *hash2;
342  uint64_t c = sc_const;
343  uint64_t d = sc_const;
344 
345  if (length > 15) {
346  const uint64_t *end = u.p64 + (length / 32) * 4;
347 
348  // handle all complete sets of 32 bytes
349  for (; u.p64 < end; u.p64 += 4) {
350  c += u.p64[0];
351  d += u.p64[1];
352  ShortMix(a, b, c, d);
353  a += u.p64[2];
354  b += u.p64[3];
355  }
356 
357  // Handle the case of 16+ remaining bytes.
358  if (remainder >= 16) {
359  c += u.p64[0];
360  d += u.p64[1];
361  ShortMix(a, b, c, d);
362  u.p64 += 2;
363  remainder -= 16;
364  }
365  }
366 
367  // Handle the last 0..15 bytes, and its length
368  d += ((uint64_t)length) << 56;
369  switch (remainder) {
370  case 15:
371  d += ((uint64_t)u.p8[14]) << 48;
372  case 14:
373  d += ((uint64_t)u.p8[13]) << 40;
374  case 13:
375  d += ((uint64_t)u.p8[12]) << 32;
376  case 12:
377  d += u.p32[2];
378  c += u.p64[0];
379  break;
380  case 11:
381  d += ((uint64_t)u.p8[10]) << 16;
382  case 10:
383  d += ((uint64_t)u.p8[9]) << 8;
384  case 9:
385  d += (uint64_t)u.p8[8];
386  case 8:
387  c += u.p64[0];
388  break;
389  case 7:
390  c += ((uint64_t)u.p8[6]) << 48;
391  case 6:
392  c += ((uint64_t)u.p8[5]) << 40;
393  case 5:
394  c += ((uint64_t)u.p8[4]) << 32;
395  case 4:
396  c += u.p32[0];
397  break;
398  case 3:
399  c += ((uint64_t)u.p8[2]) << 16;
400  case 2:
401  c += ((uint64_t)u.p8[1]) << 8;
402  case 1:
403  c += (uint64_t)u.p8[0];
404  break;
405  case 0:
406  c += sc_const;
407  d += sc_const;
408  }
409  ShortEnd(a, b, c, d);
410  *hash1 = a;
411  *hash2 = b;
412  }
413 
424  static uint64_t Short64(const void *data, size_t length, uint64_t seed) {
425  uint64_t hash1 = seed;
426  Short128(data, length, &hash1, &seed);
427  return hash1;
428  }
429 
440  static void Hash128(const void *data, size_t length, uint64_t *hash1, uint64_t *hash2) {
441  if (length < sc_bufSize) {
442  Short128(data, length, hash1, hash2);
443  return;
444  }
445 
446  uint64_t h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
447  uint64_t buf[sc_numVars];
448  uint64_t *end;
449  union {
450  const uint8_t *p8;
451  uint64_t *p64;
452  size_t i;
453  } u;
454  size_t remainder;
455 
456  h0 = h3 = h6 = h9 = *hash1;
457  h1 = h4 = h7 = h10 = *hash2;
458  h2 = h5 = h8 = h11 = sc_const;
459 
460  u.p8 = (const uint8_t *)data;
461  end = u.p64 + (length / sc_blockSize) * sc_numVars;
462 
463  // handle all whole sc_blockSize blocks of bytes
464  if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) {
465  while (u.p64 < end) {
466  Mix(u.p64, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
467  u.p64 += sc_numVars;
468  }
469  } else {
470  while (u.p64 < end) {
471  memcpy(buf, u.p64, sc_blockSize);
472  Mix(buf, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
473  u.p64 += sc_numVars;
474  }
475  }
476 
477  // handle the last partial block of sc_blockSize bytes
478  remainder = (length - ((const uint8_t *)end - (const uint8_t *)data));
479  memcpy(buf, end, remainder);
480  memset(((uint8_t *)buf) + remainder, 0, sc_blockSize - remainder);
481  ((uint8_t *)buf)[sc_blockSize - 1] = remainder;
482 
483  // do some final mixing
484  End(buf, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
485  *hash1 = h0;
486  *hash2 = h1;
487  }
488 
499  static uint64_t Hash64(const void *data, size_t length, uint64_t seed) {
500  uint64_t hash1 = seed;
501  Hash128(data, length, &hash1, &seed);
502  return hash1;
503  }
504 };
SpookyHash::Short128
static void Short128(const void *data, size_t length, uint64_t *hash1, uint64_t *hash2)
Definition: SpookyV2.hpp:323
SpookyHash::Short64
static uint64_t Short64(const void *data, size_t length, uint64_t seed)
Definition: SpookyV2.hpp:424
SpookyHash::Hash128
static void Hash128(const void *data, size_t length, uint64_t *hash1, uint64_t *hash2)
Definition: SpookyV2.hpp:440
ALLOW_UNALIGNED_READS
#define ALLOW_UNALIGNED_READS
Definition: SpookyV2.hpp:33
SpookyHash::Hash64
static uint64_t Hash64(const void *data, size_t length, uint64_t seed)
Definition: SpookyV2.hpp:499
SpookyHash
Definition: SpookyV2.hpp:35