da_jenkins.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * Copyright [2021] JD.com, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include"da_hashkit.h"
  17. #define hashsize(n) ((uint32_t)1<<(n))
  18. #define hashmask(n) (hashsize(n)-1)
  19. #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  20. #define mix(a,b,c) \
  21. { \
  22. a -= c; a ^= rot(c, 4); c += b; \
  23. b -= a; b ^= rot(a, 6); a += c; \
  24. c -= b; c ^= rot(b, 8); b += a; \
  25. a -= c; a ^= rot(c,16); c += b; \
  26. b -= a; b ^= rot(a,19); a += c; \
  27. c -= b; c ^= rot(b, 4); b += a; \
  28. }
  29. #define final(a,b,c) \
  30. { \
  31. c ^= b; c -= rot(b,14); \
  32. a ^= c; a -= rot(c,11); \
  33. b ^= a; b -= rot(a,25); \
  34. c ^= b; c -= rot(b,16); \
  35. a ^= c; a -= rot(c,4); \
  36. b ^= a; b -= rot(a,14); \
  37. c ^= b; c -= rot(b,24); \
  38. }
  39. #define JENKINS_INITVAL 13
  40. /*
  41. * jenkins_hash() -- hash a variable-length key into a 32-bit value
  42. * k : the key (the unaligned variable-length array of bytes)
  43. * length : the length of the key, counting by bytes
  44. * initval : can be any 4-byte value
  45. * Returns a 32-bit value. Every bit of the key affects every bit of
  46. * the return value. Two keys differing by one or two bits will have
  47. * totally different hash values.
  48. * The best hash table sizes are powers of 2. There is no need to do
  49. * mod a prime (mod is sooo slow!). If you need less than 32 bits,
  50. * use a bitmask. For example, if you need only 10 bits, do
  51. * h = (h & hashmask(10));
  52. * In which case, the hash table should have hashsize(10) elements.
  53. */
  54. uint32_t
  55. hash_jenkins(const char *key, size_t length)
  56. {
  57. uint32_t a,b,c; /* internal state */
  58. union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
  59. /* Set up the internal state */
  60. a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL;
  61. u.ptr = key;
  62. #ifndef WORDS_BIGENDIAN
  63. if ((u.i & 0x3) == 0)
  64. {
  65. const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
  66. /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
  67. while (length > 12)
  68. {
  69. a += k[0];
  70. b += k[1];
  71. c += k[2];
  72. mix(a,b,c);
  73. length -= 12;
  74. k += 3;
  75. }
  76. /*----------------------------- handle the last (probably partial) block */
  77. /*
  78. * "k[2]&0xffffff" actually reads beyond the end of the string, but
  79. * then masks off the part it's not allowed to read. Because the
  80. * string is aligned, the masked-off tail is in the same word as the
  81. * rest of the string. Every machine with memory protection I've seen
  82. * does it on word boundaries, so is OK with this. But VALGRIND will
  83. * still catch it and complain. The masking trick does make the hash
  84. * noticably faster for short strings (like English words).
  85. */
  86. switch(length)
  87. {
  88. case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  89. case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
  90. case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
  91. case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
  92. case 8 : b+=k[1]; a+=k[0]; break;
  93. case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
  94. case 6 : b+=k[1]&0xffff; a+=k[0]; break;
  95. case 5 : b+=k[1]&0xff; a+=k[0]; break;
  96. case 4 : a+=k[0]; break;
  97. case 3 : a+=k[0]&0xffffff; break;
  98. case 2 : a+=k[0]&0xffff; break;
  99. case 1 : a+=k[0]&0xff; break;
  100. case 0 : return c; /* zero length strings require no mixing */
  101. default: return c;
  102. }
  103. }
  104. else if ((u.i & 0x1) == 0)
  105. {
  106. const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
  107. const uint8_t *k8;
  108. /*--------------- all but last block: aligned reads and different mixing */
  109. while (length > 12)
  110. {
  111. a += k[0] + (((uint32_t)k[1])<<16);
  112. b += k[2] + (((uint32_t)k[3])<<16);
  113. c += k[4] + (((uint32_t)k[5])<<16);
  114. mix(a,b,c);
  115. length -= 12;
  116. k += 6;
  117. }
  118. /*----------------------------- handle the last (probably partial) block */
  119. k8 = (const uint8_t *)k;
  120. switch(length)
  121. {
  122. case 12: c+=k[4]+(((uint32_t)k[5])<<16);
  123. b+=k[2]+(((uint32_t)k[3])<<16);
  124. a+=k[0]+(((uint32_t)k[1])<<16);
  125. break;
  126. case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
  127. case 10: c+=k[4];
  128. b+=k[2]+(((uint32_t)k[3])<<16);
  129. a+=k[0]+(((uint32_t)k[1])<<16);
  130. break;
  131. case 9 : c+=k8[8]; /* fall through */
  132. case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
  133. a+=k[0]+(((uint32_t)k[1])<<16);
  134. break;
  135. case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
  136. case 6 : b+=k[2];
  137. a+=k[0]+(((uint32_t)k[1])<<16);
  138. break;
  139. case 5 : b+=k8[4]; /* fall through */
  140. case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
  141. break;
  142. case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
  143. case 2 : a+=k[0];
  144. break;
  145. case 1 : a+=k8[0];
  146. break;
  147. case 0 : return c; /* zero length requires no mixing */
  148. default: return c;
  149. }
  150. }
  151. else
  152. { /* need to read the key one byte at a time */
  153. #endif /* little endian */
  154. const uint8_t *k = (const uint8_t *)key;
  155. /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
  156. while (length > 12)
  157. {
  158. a += k[0];
  159. a += ((uint32_t)k[1])<<8;
  160. a += ((uint32_t)k[2])<<16;
  161. a += ((uint32_t)k[3])<<24;
  162. b += k[4];
  163. b += ((uint32_t)k[5])<<8;
  164. b += ((uint32_t)k[6])<<16;
  165. b += ((uint32_t)k[7])<<24;
  166. c += k[8];
  167. c += ((uint32_t)k[9])<<8;
  168. c += ((uint32_t)k[10])<<16;
  169. c += ((uint32_t)k[11])<<24;
  170. mix(a,b,c);
  171. length -= 12;
  172. k += 12;
  173. }
  174. /*-------------------------------- last block: affect all 32 bits of (c) */
  175. switch(length) /* all the case statements fall through */
  176. {
  177. case 12: c+=((uint32_t)k[11])<<24;
  178. case 11: c+=((uint32_t)k[10])<<16;
  179. case 10: c+=((uint32_t)k[9])<<8;
  180. case 9 : c+=k[8];
  181. case 8 : b+=((uint32_t)k[7])<<24;
  182. case 7 : b+=((uint32_t)k[6])<<16;
  183. case 6 : b+=((uint32_t)k[5])<<8;
  184. case 5 : b+=k[4];
  185. case 4 : a+=((uint32_t)k[3])<<24;
  186. case 3 : a+=((uint32_t)k[2])<<16;
  187. case 2 : a+=((uint32_t)k[1])<<8;
  188. case 1 : a+=k[0];
  189. break;
  190. case 0 : return c;
  191. default : return c;
  192. }
  193. #ifndef WORDS_BIGENDIAN
  194. }
  195. #endif
  196. final(a,b,c);
  197. return c;
  198. }