LCOV - code coverage report
Current view: top level - src/include/common - hashfn_unstable.h (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 98.9 % 87 86
Test Date: 2026-03-15 15:14:35 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * hashfn_unstable.h
       3              :  *
       4              :  * Building blocks for creating fast inlineable hash functions. The
       5              :  * functions in this file are not guaranteed to be stable between versions,
       6              :  * and may differ by hardware platform. Hence they must not be used in
       7              :  * indexes or other on-disk structures. See hashfn.h if you need stability.
       8              :  *
       9              :  *
      10              :  * Portions Copyright (c) 2024-2026, PostgreSQL Global Development Group
      11              :  *
      12              :  * src/include/common/hashfn_unstable.h
      13              :  */
      14              : #ifndef HASHFN_UNSTABLE_H
      15              : #define HASHFN_UNSTABLE_H
      16              : 
      17              : 
      18              : /*
      19              :  * fasthash is a modification of code taken from
      20              :  * https://code.google.com/archive/p/fast-hash/source/default/source
      21              :  * under the terms of the MIT license. The original copyright
      22              :  * notice follows:
      23              :  */
      24              : 
      25              : /* The MIT License
      26              : 
      27              :    Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
      28              : 
      29              :    Permission is hereby granted, free of charge, to any person
      30              :    obtaining a copy of this software and associated documentation
      31              :    files (the "Software"), to deal in the Software without
      32              :    restriction, including without limitation the rights to use, copy,
      33              :    modify, merge, publish, distribute, sublicense, and/or sell copies
      34              :    of the Software, and to permit persons to whom the Software is
      35              :    furnished to do so, subject to the following conditions:
      36              : 
      37              :    The above copyright notice and this permission notice shall be
      38              :    included in all copies or substantial portions of the Software.
      39              : 
      40              :    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      41              :    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      42              :    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      43              :    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
      44              :    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
      45              :    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
      46              :    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      47              :    SOFTWARE.
      48              : */
      49              : 
      50              : /*
      51              :  * fasthash as implemented here has two interfaces:
      52              :  *
      53              :  * 1) Standalone functions that take a single input.
      54              :  *
      55              :  * 2) Incremental interface. This can used for incorporating multiple
      56              :  * inputs. First, initialize the hash state (here with a zero seed):
      57              :  *
      58              :  * fasthash_state hs;
      59              :  * fasthash_init(&hs, 0);
      60              :  *
      61              :  * Next, accumulate input into the hash state.
      62              :  * If the inputs are of types that can be trivially cast to uint64, it's
      63              :  * sufficient to do:
      64              :  *
      65              :  * hs.accum = value1;
      66              :  * fasthash_combine(&hs);
      67              :  * hs.accum = value2;
      68              :  * fasthash_combine(&hs);
      69              :  * ...
      70              :  *
      71              :  * For longer or variable-length input, fasthash_accum() is a more
      72              :  * flexible, but more verbose method. The standalone functions use this
      73              :  * internally, so see fasthash64() for an example of this.
      74              :  *
      75              :  * After all inputs have been mixed in, finalize the hash and optionally
      76              :  * reduce to 32 bits. If all inputs are fixed-length, it's sufficient
      77              :  * to pass zero for the tweak:
      78              :  *
      79              :  * hashcode = fasthash_final32(&hs, 0);
      80              :  *
      81              :  * For variable length input, experimentation has found that SMHasher
      82              :  * fails unless we pass the length for the tweak. When accumulating
      83              :  * multiple varlen values, it's probably safest to calculate a tweak
      84              :  * such that the bits of all individual lengths are present, for example:
      85              :  *
      86              :  * lengths = len1 + (len2 << 10) + (len3 << 20);
      87              :  * hashcode = fasthash_final32(&hs, lengths);
      88              :  *
      89              :  * The incremental interface allows an optimization for NUL-terminated
      90              :  * C strings:
      91              :  *
      92              :  * len = fasthash_accum_cstring(&hs, str);
      93              :  * hashcode = fasthash_final32(&hs, len);
      94              :  *
      95              :  * By computing the length on-the-fly, we can avoid needing a strlen()
      96              :  * call to tell us how many bytes to hash.
      97              :  */
      98              : 
      99              : 
     100              : typedef struct fasthash_state
     101              : {
     102              :     /* staging area for chunks of input */
     103              :     uint64      accum;
     104              : 
     105              :     uint64      hash;
     106              : } fasthash_state;
     107              : 
     108              : #define FH_SIZEOF_ACCUM sizeof(uint64)
     109              : 
     110              : 
     111              : /*
     112              :  * Initialize the hash state.
     113              :  *
     114              :  * 'seed' can be zero.
     115              :  */
     116              : static inline void
     117      9633494 : fasthash_init(fasthash_state *hs, uint64 seed)
     118              : {
     119      9633494 :     memset(hs, 0, sizeof(fasthash_state));
     120      9633494 :     hs->hash = seed ^ 0x880355f21e6d1965;
     121      9633494 : }
     122              : 
     123              : /* both the finalizer and part of the combining step */
     124              : static inline uint64
     125     29040163 : fasthash_mix(uint64 h, uint64 tweak)
     126              : {
     127     29040163 :     h ^= (h >> 23) + tweak;
     128     29040163 :     h *= 0x2127599bf4325c37;
     129     29040163 :     h ^= h >> 47;
     130     29040163 :     return h;
     131              : }
     132              : 
     133              : /* combine one chunk of input into the hash */
     134              : static inline void
     135     19406669 : fasthash_combine(fasthash_state *hs)
     136              : {
     137     19406669 :     hs->hash ^= fasthash_mix(hs->accum, 0);
     138     19406669 :     hs->hash *= 0x880355f21e6d1965;
     139     19406669 : }
     140              : 
     141              : /* accumulate up to 8 bytes of input and combine it into the hash */
     142              : static inline void
     143     27953781 : fasthash_accum(fasthash_state *hs, const char *k, size_t len)
     144              : {
     145              :     uint32      lower_four;
     146              : 
     147              :     Assert(len <= FH_SIZEOF_ACCUM);
     148     27953781 :     hs->accum = 0;
     149              : 
     150              :     /*
     151              :      * For consistency, bytewise loads must match the platform's endianness.
     152              :      */
     153              : #ifdef WORDS_BIGENDIAN
     154              :     switch (len)
     155              :     {
     156              :         case 8:
     157              :             memcpy(&hs->accum, k, 8);
     158              :             break;
     159              :         case 7:
     160              :             hs->accum |= (uint64) k[6] << 8;
     161              :             pg_fallthrough;
     162              :         case 6:
     163              :             hs->accum |= (uint64) k[5] << 16;
     164              :             pg_fallthrough;
     165              :         case 5:
     166              :             hs->accum |= (uint64) k[4] << 24;
     167              :             pg_fallthrough;
     168              :         case 4:
     169              :             memcpy(&lower_four, k, sizeof(lower_four));
     170              :             hs->accum |= (uint64) lower_four << 32;
     171              :             break;
     172              :         case 3:
     173              :             hs->accum |= (uint64) k[2] << 40;
     174              :             pg_fallthrough;
     175              :         case 2:
     176              :             hs->accum |= (uint64) k[1] << 48;
     177              :             pg_fallthrough;
     178              :         case 1:
     179              :             hs->accum |= (uint64) k[0] << 56;
     180              :             break;
     181              :         case 0:
     182              :             return;
     183              :     }
     184              : #else
     185     27953781 :     switch (len)
     186              :     {
     187     18330712 :         case 8:
     188     18330712 :             memcpy(&hs->accum, k, 8);
     189     18330712 :             break;
     190       107162 :         case 7:
     191       107162 :             hs->accum |= (uint64) k[6] << 48;
     192              :             pg_fallthrough;
     193       137022 :         case 6:
     194       137022 :             hs->accum |= (uint64) k[5] << 40;
     195              :             pg_fallthrough;
     196       138330 :         case 5:
     197       138330 :             hs->accum |= (uint64) k[4] << 32;
     198              :             pg_fallthrough;
     199       232023 :         case 4:
     200       232023 :             memcpy(&lower_four, k, sizeof(lower_four));
     201       232023 :             hs->accum |= lower_four;
     202       232023 :             break;
     203       208328 :         case 3:
     204       208328 :             hs->accum |= (uint64) k[2] << 16;
     205              :             pg_fallthrough;
     206       223036 :         case 2:
     207       223036 :             hs->accum |= (uint64) k[1] << 8;
     208              :             pg_fallthrough;
     209       225690 :         case 1:
     210       225690 :             hs->accum |= (uint64) k[0];
     211       225690 :             break;
     212      9165356 :         case 0:
     213      9165356 :             return;
     214              :     }
     215              : #endif
     216              : 
     217     18788425 :     fasthash_combine(hs);
     218              : }
     219              : 
     220              : /*
     221              :  * Set high bit in lowest byte where the input is zero, from:
     222              :  * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
     223              :  */
     224              : #define haszero64(v) \
     225              :     (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
     226              : 
     227              : /*
     228              :  * all-purpose workhorse for fasthash_accum_cstring
     229              :  */
     230              : static inline size_t
     231       468541 : fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
     232              : {
     233       468541 :     const char *const start = str;
     234              : 
     235       926254 :     while (*str)
     236              :     {
     237       457713 :         size_t      chunk_len = 0;
     238              : 
     239      2425373 :         while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
     240      1967660 :             chunk_len++;
     241              : 
     242       457713 :         fasthash_accum(hs, str, chunk_len);
     243       457713 :         str += chunk_len;
     244              :     }
     245              : 
     246       468541 :     return str - start;
     247              : }
     248              : 
     249              : /*
     250              :  * specialized workhorse for fasthash_accum_cstring
     251              :  *
     252              :  * With an aligned pointer, we consume the string a word at a time.
     253              :  * Loading the word containing the NUL terminator cannot segfault since
     254              :  * allocation boundaries are suitably aligned. To keep from setting
     255              :  * off alarms with address sanitizers, exclude this function from
     256              :  * such testing.
     257              :  */
     258              : pg_attribute_no_sanitize_address()
     259              : static inline size_t
     260       468541 : fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
     261              : {
     262       468541 :     const char *const start = str;
     263              :     size_t      remainder;
     264              :     uint64      zero_byte_low;
     265              : 
     266              :     Assert(PointerIsAligned(start, uint64));
     267              : 
     268              :     /*
     269              :      * For every chunk of input, check for zero bytes before mixing into the
     270              :      * hash. The chunk with zeros must contain the NUL terminator.
     271              :      */
     272              :     for (;;)
     273       530821 :     {
     274       999362 :         uint64      chunk = *(const uint64 *) str;
     275              : 
     276       999362 :         zero_byte_low = haszero64(chunk);
     277       999362 :         if (zero_byte_low)
     278       468541 :             break;
     279              : 
     280       530821 :         hs->accum = chunk;
     281       530821 :         fasthash_combine(hs);
     282       530821 :         str += FH_SIZEOF_ACCUM;
     283              :     }
     284              : 
     285              :     /* mix in remaining bytes */
     286       468541 :     remainder = fasthash_accum_cstring_unaligned(hs, str);
     287       468541 :     str += remainder;
     288              : 
     289       468541 :     return str - start;
     290              : }
     291              : 
     292              : /*
     293              :  * Mix 'str' into the hash state and return the length of the string.
     294              :  */
     295              : static inline size_t
     296       468541 : fasthash_accum_cstring(fasthash_state *hs, const char *str)
     297              : {
     298              : #if SIZEOF_VOID_P >= 8
     299              : 
     300              :     size_t      len;
     301              : #ifdef USE_ASSERT_CHECKING
     302              :     size_t      len_check;
     303              :     fasthash_state hs_check;
     304              : 
     305              :     memcpy(&hs_check, hs, sizeof(fasthash_state));
     306              :     len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
     307              : #endif
     308       468541 :     if (PointerIsAligned(str, uint64))
     309              :     {
     310       468541 :         len = fasthash_accum_cstring_aligned(hs, str);
     311              :         Assert(len_check == len);
     312              :         Assert(hs_check.hash == hs->hash);
     313       468541 :         return len;
     314              :     }
     315              : #endif                          /* SIZEOF_VOID_P */
     316              : 
     317              :     /*
     318              :      * It's not worth it to try to make the word-at-a-time optimization work
     319              :      * on 32-bit platforms.
     320              :      */
     321            0 :     return fasthash_accum_cstring_unaligned(hs, str);
     322              : }
     323              : 
     324              : /*
     325              :  * The finalizer
     326              :  *
     327              :  * 'tweak' is intended to be the input length when the caller doesn't know
     328              :  * the length ahead of time, such as for NUL-terminated strings, otherwise
     329              :  * zero.
     330              :  */
     331              : static inline uint64
     332      9633494 : fasthash_final64(fasthash_state *hs, uint64 tweak)
     333              : {
     334      9633494 :     return fasthash_mix(hs->hash, tweak);
     335              : }
     336              : 
     337              : /*
     338              :  * Reduce a 64-bit hash to a 32-bit hash.
     339              :  *
     340              :  * This optional step provides a bit more additional mixing compared to
     341              :  * just taking the lower 32-bits.
     342              :  */
     343              : static inline uint32
     344      9633494 : fasthash_reduce32(uint64 h)
     345              : {
     346              :     /*
     347              :      * Convert the 64-bit hashcode to Fermat residue, which shall retain
     348              :      * information from both the higher and lower parts of hashcode.
     349              :      */
     350      9633494 :     return h - (h >> 32);
     351              : }
     352              : 
     353              : /* finalize and reduce */
     354              : static inline uint32
     355       468138 : fasthash_final32(fasthash_state *hs, uint64 tweak)
     356              : {
     357       468138 :     return fasthash_reduce32(fasthash_final64(hs, tweak));
     358              : }
     359              : 
     360              : 
     361              : /* Standalone functions */
     362              : 
     363              : /*
     364              :  * The original fasthash64 function, re-implemented using the incremental
     365              :  * interface. Returns the same 64-bit hashcode as the original,
     366              :  * at least on little-endian machines. 'len' controls not only how
     367              :  * many bytes to hash, but also modifies the internal seed.
     368              :  * 'seed' can be zero.
     369              :  */
     370              : static inline uint64
     371      9165356 : fasthash64(const char *k, size_t len, uint64 seed)
     372              : {
     373              :     fasthash_state hs;
     374              : 
     375      9165356 :     fasthash_init(&hs, 0);
     376              : 
     377              :     /* re-initialize the seed according to input length */
     378      9165356 :     hs.hash = seed ^ (len * 0x880355f21e6d1965);
     379              : 
     380     27496068 :     while (len >= FH_SIZEOF_ACCUM)
     381              :     {
     382     18330712 :         fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
     383     18330712 :         k += FH_SIZEOF_ACCUM;
     384     18330712 :         len -= FH_SIZEOF_ACCUM;
     385              :     }
     386              : 
     387      9165356 :     fasthash_accum(&hs, k, len);
     388              : 
     389              :     /*
     390              :      * Since we already mixed the input length into the seed, we can just pass
     391              :      * zero here. This matches upstream behavior as well.
     392              :      */
     393      9165356 :     return fasthash_final64(&hs, 0);
     394              : }
     395              : 
     396              : /* like fasthash64, but returns a 32-bit hashcode */
     397              : static inline uint32
     398      9165356 : fasthash32(const char *k, size_t len, uint64 seed)
     399              : {
     400      9165356 :     return fasthash_reduce32(fasthash64(k, len, seed));
     401              : }
     402              : 
     403              : /*
     404              :  * Convenience function for hashing NUL-terminated strings
     405              :  *
     406              :  * Note: This is faster than, and computes a different result from,
     407              :  * "fasthash32(s, strlen(s))"
     408              :  */
     409              : static inline uint32
     410       379258 : hash_string(const char *s)
     411              : {
     412              :     fasthash_state hs;
     413              :     size_t      s_len;
     414              : 
     415       379258 :     fasthash_init(&hs, 0);
     416              : 
     417              :     /*
     418              :      * Combine string into the hash and save the length for tweaking the final
     419              :      * mix.
     420              :      */
     421       379258 :     s_len = fasthash_accum_cstring(&hs, s);
     422              : 
     423       379258 :     return fasthash_final32(&hs, s_len);
     424              : }
     425              : 
     426              : #endif                          /* HASHFN_UNSTABLE_H */
        

Generated by: LCOV version 2.0-1