Line data Source code
1 : /*
2 : * hashfn_unstable.h
3 : *
4 : * Building blocks for creating fast inlineable hash functions. The
5 : * functions in this file are not guaranteed to be stable between versions,
6 : * and may differ by hardware platform. Hence they must not be used in
7 : * indexes or other on-disk structures. See hashfn.h if you need stability.
8 : *
9 : *
10 : * Portions Copyright (c) 2024-2025, PostgreSQL Global Development Group
11 : *
12 : * src/include/common/hashfn_unstable.h
13 : */
14 : #ifndef HASHFN_UNSTABLE_H
15 : #define HASHFN_UNSTABLE_H
16 :
17 :
18 : /*
19 : * fasthash is a modification of code taken from
20 : * https://code.google.com/archive/p/fast-hash/source/default/source
21 : * under the terms of the MIT license. The original copyright
22 : * notice follows:
23 : */
24 :
25 : /* The MIT License
26 :
27 : Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
28 :
29 : Permission is hereby granted, free of charge, to any person
30 : obtaining a copy of this software and associated documentation
31 : files (the "Software"), to deal in the Software without
32 : restriction, including without limitation the rights to use, copy,
33 : modify, merge, publish, distribute, sublicense, and/or sell copies
34 : of the Software, and to permit persons to whom the Software is
35 : furnished to do so, subject to the following conditions:
36 :
37 : The above copyright notice and this permission notice shall be
38 : included in all copies or substantial portions of the Software.
39 :
40 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
41 : EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42 : MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
43 : NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
44 : BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
45 : ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
46 : CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 : SOFTWARE.
48 : */
49 :
50 : /*
51 : * fasthash as implemented here has two interfaces:
52 : *
53 : * 1) Standalone functions, e.g. fasthash32() for a single value with a
54 : * known length. These return the same hash code as the original, at
55 : * least on little-endian machines.
56 : *
57 : * 2) Incremental interface. This can used for incorporating multiple
58 : * inputs. First, initialize the hash state (here with a zero seed):
59 : *
60 : * fasthash_state hs;
61 : * fasthash_init(&hs, 0);
62 : *
63 : * If the inputs are of types that can be trivially cast to uint64, it's
64 : * sufficient to do:
65 : *
66 : * hs.accum = value1;
67 : * fasthash_combine(&hs);
68 : * hs.accum = value2;
69 : * fasthash_combine(&hs);
70 : * ...
71 : *
72 : * For longer or variable-length input, fasthash_accum() is a more
73 : * flexible, but more verbose method. The standalone functions use this
74 : * internally, so see fasthash64() for an example of this.
75 : *
76 : * After all inputs have been mixed in, finalize the hash:
77 : *
78 : * hashcode = fasthash_final32(&hs, 0);
79 : *
80 : * The incremental interface allows an optimization for NUL-terminated
81 : * C strings:
82 : *
83 : * len = fasthash_accum_cstring(&hs, str);
84 : * hashcode = fasthash_final32(&hs, len);
85 : *
86 : * By handling the terminator on-the-fly, we can avoid needing a strlen()
87 : * call to tell us how many bytes to hash. Experimentation has found that
88 : * SMHasher fails unless we incorporate the length, so it is passed to
89 : * the finalizer as a tweak.
90 : */
91 :
92 :
93 : typedef struct fasthash_state
94 : {
95 : /* staging area for chunks of input */
96 : uint64 accum;
97 :
98 : uint64 hash;
99 : } fasthash_state;
100 :
101 : #define FH_SIZEOF_ACCUM sizeof(uint64)
102 :
103 :
104 : /*
105 : * Initialize the hash state.
106 : *
107 : * 'seed' can be zero.
108 : */
109 : static inline void
110 15630612 : fasthash_init(fasthash_state *hs, uint64 seed)
111 : {
112 15630612 : memset(hs, 0, sizeof(fasthash_state));
113 15630612 : hs->hash = seed ^ 0x880355f21e6d1965;
114 15630612 : }
115 :
116 : /* both the finalizer and part of the combining step */
117 : static inline uint64
118 47142650 : fasthash_mix(uint64 h, uint64 tweak)
119 : {
120 47142650 : h ^= (h >> 23) + tweak;
121 47142650 : h *= 0x2127599bf4325c37;
122 47142650 : h ^= h >> 47;
123 47142650 : return h;
124 : }
125 :
126 : /* combine one chunk of input into the hash */
127 : static inline void
128 31512038 : fasthash_combine(fasthash_state *hs)
129 : {
130 31512038 : hs->hash ^= fasthash_mix(hs->accum, 0);
131 31512038 : hs->hash *= 0x880355f21e6d1965;
132 31512038 : }
133 :
134 : /* accumulate up to 8 bytes of input and combine it into the hash */
135 : static inline void
136 45087598 : fasthash_accum(fasthash_state *hs, const char *k, size_t len)
137 : {
138 : uint32 lower_four;
139 :
140 : Assert(len <= FH_SIZEOF_ACCUM);
141 45087598 : hs->accum = 0;
142 :
143 : /*
144 : * For consistency, bytewise loads must match the platform's endianness.
145 : */
146 : #ifdef WORDS_BIGENDIAN
147 : switch (len)
148 : {
149 : case 8:
150 : memcpy(&hs->accum, k, 8);
151 : break;
152 : case 7:
153 : hs->accum |= (uint64) k[6] << 8;
154 : /* FALLTHROUGH */
155 : case 6:
156 : hs->accum |= (uint64) k[5] << 16;
157 : /* FALLTHROUGH */
158 : case 5:
159 : hs->accum |= (uint64) k[4] << 24;
160 : /* FALLTHROUGH */
161 : case 4:
162 : memcpy(&lower_four, k, sizeof(lower_four));
163 : hs->accum |= (uint64) lower_four << 32;
164 : break;
165 : case 3:
166 : hs->accum |= (uint64) k[2] << 40;
167 : /* FALLTHROUGH */
168 : case 2:
169 : hs->accum |= (uint64) k[1] << 48;
170 : /* FALLTHROUGH */
171 : case 1:
172 : hs->accum |= (uint64) k[0] << 56;
173 : break;
174 : case 0:
175 : return;
176 : }
177 : #else
178 45087598 : switch (len)
179 : {
180 29476724 : case 8:
181 29476724 : memcpy(&hs->accum, k, 8);
182 29476724 : break;
183 200884 : case 7:
184 200884 : hs->accum |= (uint64) k[6] << 48;
185 : /* FALLTHROUGH */
186 256844 : case 6:
187 256844 : hs->accum |= (uint64) k[5] << 40;
188 : /* FALLTHROUGH */
189 259114 : case 5:
190 259114 : hs->accum |= (uint64) k[4] << 32;
191 : /* FALLTHROUGH */
192 442738 : case 4:
193 442738 : memcpy(&lower_four, k, sizeof(lower_four));
194 442738 : hs->accum |= lower_four;
195 442738 : break;
196 397678 : case 3:
197 397678 : hs->accum |= (uint64) k[2] << 16;
198 : /* FALLTHROUGH */
199 425372 : case 2:
200 425372 : hs->accum |= (uint64) k[1] << 8;
201 : /* FALLTHROUGH */
202 429774 : case 1:
203 429774 : hs->accum |= (uint64) k[0];
204 429774 : break;
205 14738362 : case 0:
206 14738362 : return;
207 : }
208 : #endif
209 :
210 30349236 : fasthash_combine(hs);
211 : }
212 :
213 : /*
214 : * Set high bit in lowest byte where the input is zero, from:
215 : * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
216 : */
217 : #define haszero64(v) \
218 : (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
219 :
220 : /*
221 : * all-purpose workhorse for fasthash_accum_cstring
222 : */
223 : static inline size_t
224 892250 : fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
225 : {
226 892250 : const char *const start = str;
227 :
228 1764762 : while (*str)
229 : {
230 872512 : size_t chunk_len = 0;
231 :
232 4613130 : while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
233 3740618 : chunk_len++;
234 :
235 872512 : fasthash_accum(hs, str, chunk_len);
236 872512 : str += chunk_len;
237 : }
238 :
239 892250 : return str - start;
240 : }
241 :
242 : /*
243 : * specialized workhorse for fasthash_accum_cstring
244 : *
245 : * With an aligned pointer, we consume the string a word at a time.
246 : * Loading the word containing the NUL terminator cannot segfault since
247 : * allocation boundaries are suitably aligned. To keep from setting
248 : * off alarms with address sanitizers, exclude this function from
249 : * such testing.
250 : */
251 : pg_attribute_no_sanitize_address()
252 : static inline size_t
253 892250 : fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
254 : {
255 892250 : const char *const start = str;
256 : size_t remainder;
257 : uint64 zero_byte_low;
258 :
259 : Assert(PointerIsAligned(start, uint64));
260 :
261 : /*
262 : * For every chunk of input, check for zero bytes before mixing into the
263 : * hash. The chunk with zeros must contain the NUL terminator.
264 : */
265 : for (;;)
266 1007956 : {
267 1900206 : uint64 chunk = *(uint64 *) str;
268 :
269 1900206 : zero_byte_low = haszero64(chunk);
270 1900206 : if (zero_byte_low)
271 892250 : break;
272 :
273 1007956 : hs->accum = chunk;
274 1007956 : fasthash_combine(hs);
275 1007956 : str += FH_SIZEOF_ACCUM;
276 : }
277 :
278 : /* mix in remaining bytes */
279 892250 : remainder = fasthash_accum_cstring_unaligned(hs, str);
280 892250 : str += remainder;
281 :
282 892250 : return str - start;
283 : }
284 :
285 : /*
286 : * Mix 'str' into the hash state and return the length of the string.
287 : */
288 : static inline size_t
289 892250 : fasthash_accum_cstring(fasthash_state *hs, const char *str)
290 : {
291 : #if SIZEOF_VOID_P >= 8
292 :
293 : size_t len;
294 : #ifdef USE_ASSERT_CHECKING
295 : size_t len_check;
296 : fasthash_state hs_check;
297 :
298 : memcpy(&hs_check, hs, sizeof(fasthash_state));
299 : len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
300 : #endif
301 892250 : if (PointerIsAligned(str, uint64))
302 : {
303 892250 : len = fasthash_accum_cstring_aligned(hs, str);
304 : Assert(len_check == len);
305 : Assert(hs_check.hash == hs->hash);
306 892250 : return len;
307 : }
308 : #endif /* SIZEOF_VOID_P */
309 :
310 : /*
311 : * It's not worth it to try to make the word-at-a-time optimization work
312 : * on 32-bit platforms.
313 : */
314 0 : return fasthash_accum_cstring_unaligned(hs, str);
315 : }
316 :
317 : /*
318 : * The finalizer
319 : *
320 : * 'tweak' is intended to be the input length when the caller doesn't know
321 : * the length ahead of time, such as for NUL-terminated strings, otherwise
322 : * zero.
323 : */
324 : static inline uint64
325 15630612 : fasthash_final64(fasthash_state *hs, uint64 tweak)
326 : {
327 15630612 : return fasthash_mix(hs->hash, tweak);
328 : }
329 :
330 : /*
331 : * Reduce a 64-bit hash to a 32-bit hash.
332 : *
333 : * This optional step provides a bit more additional mixing compared to
334 : * just taking the lower 32-bits.
335 : */
336 : static inline uint32
337 15630612 : fasthash_reduce32(uint64 h)
338 : {
339 : /*
340 : * Convert the 64-bit hashcode to Fermat residue, which shall retain
341 : * information from both the higher and lower parts of hashcode.
342 : */
343 15630612 : return h - (h >> 32);
344 : }
345 :
346 : /* finalize and reduce */
347 : static inline uint32
348 892250 : fasthash_final32(fasthash_state *hs, uint64 tweak)
349 : {
350 892250 : return fasthash_reduce32(fasthash_final64(hs, tweak));
351 : }
352 :
353 : /*
354 : * The original fasthash64 function, re-implemented using the incremental
355 : * interface. Returns a 64-bit hashcode. 'len' controls not only how
356 : * many bytes to hash, but also modifies the internal seed.
357 : * 'seed' can be zero.
358 : */
359 : static inline uint64
360 14738362 : fasthash64(const char *k, size_t len, uint64 seed)
361 : {
362 : fasthash_state hs;
363 :
364 14738362 : fasthash_init(&hs, 0);
365 :
366 : /* re-initialize the seed according to input length */
367 14738362 : hs.hash = seed ^ (len * 0x880355f21e6d1965);
368 :
369 44215086 : while (len >= FH_SIZEOF_ACCUM)
370 : {
371 29476724 : fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
372 29476724 : k += FH_SIZEOF_ACCUM;
373 29476724 : len -= FH_SIZEOF_ACCUM;
374 : }
375 :
376 14738362 : fasthash_accum(&hs, k, len);
377 14738362 : return fasthash_final64(&hs, 0);
378 : }
379 :
380 : /* like fasthash64, but returns a 32-bit hashcode */
381 : static inline uint32
382 14738362 : fasthash32(const char *k, size_t len, uint64 seed)
383 : {
384 14738362 : return fasthash_reduce32(fasthash64(k, len, seed));
385 : }
386 :
387 : /*
388 : * Convenience function for hashing NUL-terminated strings
389 : */
390 : static inline uint32
391 737404 : hash_string(const char *s)
392 : {
393 : fasthash_state hs;
394 : size_t s_len;
395 :
396 737404 : fasthash_init(&hs, 0);
397 :
398 : /*
399 : * Combine string into the hash and save the length for tweaking the final
400 : * mix.
401 : */
402 737404 : s_len = fasthash_accum_cstring(&hs, s);
403 :
404 737404 : return fasthash_final32(&hs, s_len);
405 : }
406 :
407 : #endif /* HASHFN_UNSTABLE_H */
|