LCOV - code coverage report
Current view: top level - src/include/common - hashfn_unstable.h (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 98.9 % 87 86
Test Date: 2026-05-17 14:16:24 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * hashfn_unstable.h
       3              :  *
       4              :  * Building blocks for creating fast inlineable hash functions. The
       5              :  * functions in this file are not guaranteed to be stable between versions,
       6              :  * and may differ by hardware platform. Hence they must not be used in
       7              :  * indexes or other on-disk structures. See hashfn.h if you need stability.
       8              :  *
       9              :  *
      10              :  * Portions Copyright (c) 2024-2026, PostgreSQL Global Development Group
      11              :  *
      12              :  * src/include/common/hashfn_unstable.h
      13              :  */
      14              : #ifndef HASHFN_UNSTABLE_H
      15              : #define HASHFN_UNSTABLE_H
      16              : 
      17              : 
      18              : /*
      19              :  * fasthash is a modification of code taken from
      20              :  * https://code.google.com/archive/p/fast-hash/source/default/source
      21              :  * under the terms of the MIT license. The original copyright
      22              :  * notice follows:
      23              :  */
      24              : 
      25              : /*
      26              :  * The MIT License
      27              :  *
      28              :  * Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
      29              :  *
      30              :  * Permission is hereby granted, free of charge, to any person
      31              :  * obtaining a copy of this software and associated documentation
      32              :  * files (the "Software"), to deal in the Software without
      33              :  * restriction, including without limitation the rights to use, copy,
      34              :  * modify, merge, publish, distribute, sublicense, and/or sell copies
      35              :  * of the Software, and to permit persons to whom the Software is
      36              :  * furnished to do so, subject to the following conditions:
      37              :  *
      38              :  * The above copyright notice and this permission notice shall be
      39              :  * included in all copies or substantial portions of the Software.
      40              :  *
      41              :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      42              :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      43              :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      44              :  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
      45              :  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
      46              :  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
      47              :  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      48              :  * SOFTWARE.
      49              :  */
      50              : 
      51              : /*
      52              :  * fasthash as implemented here has two interfaces:
      53              :  *
      54              :  * 1) Standalone functions that take a single input.
      55              :  *
      56              :  * 2) Incremental interface. This can used for incorporating multiple
      57              :  * inputs. First, initialize the hash state (here with a zero seed):
      58              :  *
      59              :  * fasthash_state hs;
      60              :  * fasthash_init(&hs, 0);
      61              :  *
      62              :  * Next, accumulate input into the hash state.
      63              :  * If the inputs are of types that can be trivially cast to uint64, it's
      64              :  * sufficient to do:
      65              :  *
      66              :  * hs.accum = value1;
      67              :  * fasthash_combine(&hs);
      68              :  * hs.accum = value2;
      69              :  * fasthash_combine(&hs);
      70              :  * ...
      71              :  *
      72              :  * For longer or variable-length input, fasthash_accum() is a more
      73              :  * flexible, but more verbose method. The standalone functions use this
      74              :  * internally, so see fasthash64() for an example of this.
      75              :  *
      76              :  * After all inputs have been mixed in, finalize the hash and optionally
      77              :  * reduce to 32 bits. If all inputs are fixed-length, it's sufficient
      78              :  * to pass zero for the tweak:
      79              :  *
      80              :  * hashcode = fasthash_final32(&hs, 0);
      81              :  *
      82              :  * For variable length input, experimentation has found that SMHasher
      83              :  * fails unless we pass the length for the tweak. When accumulating
      84              :  * multiple varlen values, it's probably safest to calculate a tweak
      85              :  * such that the bits of all individual lengths are present, for example:
      86              :  *
      87              :  * lengths = len1 + (len2 << 10) + (len3 << 20);
      88              :  * hashcode = fasthash_final32(&hs, lengths);
      89              :  *
      90              :  * The incremental interface allows an optimization for NUL-terminated
      91              :  * C strings:
      92              :  *
      93              :  * len = fasthash_accum_cstring(&hs, str);
      94              :  * hashcode = fasthash_final32(&hs, len);
      95              :  *
      96              :  * By computing the length on-the-fly, we can avoid needing a strlen()
      97              :  * call to tell us how many bytes to hash.
      98              :  */
      99              : 
     100              : 
     101              : typedef struct fasthash_state
     102              : {
     103              :     /* staging area for chunks of input */
     104              :     uint64      accum;
     105              : 
     106              :     uint64      hash;
     107              : } fasthash_state;
     108              : 
     109              : #define FH_SIZEOF_ACCUM sizeof(uint64)
     110              : 
     111              : 
     112              : /*
     113              :  * Initialize the hash state.
     114              :  *
     115              :  * 'seed' can be zero.
     116              :  */
     117              : static inline void
     118     10804536 : fasthash_init(fasthash_state *hs, uint64 seed)
     119              : {
     120     10804536 :     memset(hs, 0, sizeof(fasthash_state));
     121     10804536 :     hs->hash = seed ^ 0x880355f21e6d1965;
     122     10804536 : }
     123              : 
     124              : /* both the finalizer and part of the combining step */
     125              : static inline uint64
     126     32443689 : fasthash_mix(uint64 h, uint64 tweak)
     127              : {
     128     32443689 :     h ^= (h >> 23) + tweak;
     129     32443689 :     h *= 0x2127599bf4325c37;
     130     32443689 :     h ^= h >> 47;
     131     32443689 :     return h;
     132              : }
     133              : 
     134              : /* combine one chunk of input into the hash */
     135              : static inline void
     136     21639153 : fasthash_combine(fasthash_state *hs)
     137              : {
     138     21639153 :     hs->hash ^= fasthash_mix(hs->accum, 0);
     139     21639153 :     hs->hash *= 0x880355f21e6d1965;
     140     21639153 : }
     141              : 
     142              : /* accumulate up to 8 bytes of input and combine it into the hash */
     143              : static inline void
     144     30540210 : fasthash_accum(fasthash_state *hs, const char *k, size_t len)
     145              : {
     146              :     uint32      lower_four;
     147              : 
     148              :     Assert(len <= FH_SIZEOF_ACCUM);
     149     30540210 :     hs->accum = 0;
     150              : 
     151              :     /*
     152              :      * For consistency, bytewise loads must match the platform's endianness.
     153              :      */
     154              : #ifdef WORDS_BIGENDIAN
     155              :     switch (len)
     156              :     {
     157              :         case 8:
     158              :             memcpy(&hs->accum, k, 8);
     159              :             break;
     160              :         case 7:
     161              :             hs->accum |= (uint64) k[6] << 8;
     162              :             pg_fallthrough;
     163              :         case 6:
     164              :             hs->accum |= (uint64) k[5] << 16;
     165              :             pg_fallthrough;
     166              :         case 5:
     167              :             hs->accum |= (uint64) k[4] << 24;
     168              :             pg_fallthrough;
     169              :         case 4:
     170              :             memcpy(&lower_four, k, sizeof(lower_four));
     171              :             hs->accum |= (uint64) lower_four << 32;
     172              :             break;
     173              :         case 3:
     174              :             hs->accum |= (uint64) k[2] << 40;
     175              :             pg_fallthrough;
     176              :         case 2:
     177              :             hs->accum |= (uint64) k[1] << 48;
     178              :             pg_fallthrough;
     179              :         case 1:
     180              :             hs->accum |= (uint64) k[0] << 56;
     181              :             break;
     182              :         case 0:
     183              :             return;
     184              :     }
     185              : #else
     186     30540210 :     switch (len)
     187              :     {
     188     19696776 :         case 8:
     189     19696776 :             memcpy(&hs->accum, k, 8);
     190     19696776 :             break;
     191       190399 :         case 7:
     192       190399 :             hs->accum |= (uint64) k[6] << 48;
     193              :             pg_fallthrough;
     194       265129 :         case 6:
     195       265129 :             hs->accum |= (uint64) k[5] << 40;
     196              :             pg_fallthrough;
     197       282302 :         case 5:
     198       282302 :             hs->accum |= (uint64) k[4] << 32;
     199              :             pg_fallthrough;
     200       402066 :         case 4:
     201       402066 :             memcpy(&lower_four, k, sizeof(lower_four));
     202       402066 :             hs->accum |= lower_four;
     203       402066 :             break;
     204       264171 :         case 3:
     205       264171 :             hs->accum |= (uint64) k[2] << 16;
     206              :             pg_fallthrough;
     207       416648 :         case 2:
     208       416648 :             hs->accum |= (uint64) k[1] << 8;
     209              :             pg_fallthrough;
     210       592980 :         case 1:
     211       592980 :             hs->accum |= (uint64) k[0];
     212       592980 :             break;
     213      9848388 :         case 0:
     214      9848388 :             return;
     215              :     }
     216              : #endif
     217              : 
     218     20691822 :     fasthash_combine(hs);
     219              : }
     220              : 
     221              : /*
     222              :  * Set high bit in lowest byte where the input is zero, from:
     223              :  * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
     224              :  */
     225              : #define haszero64(v) \
     226              :     (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
     227              : 
     228              : /*
     229              :  * all-purpose workhorse for fasthash_accum_cstring
     230              :  */
     231              : static inline size_t
     232      1111937 : fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
     233              : {
     234      1111937 :     const char *const start = str;
     235              : 
     236      2106983 :     while (*str)
     237              :     {
     238       995046 :         size_t      chunk_len = 0;
     239              : 
     240      4614939 :         while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
     241      3619893 :             chunk_len++;
     242              : 
     243       995046 :         fasthash_accum(hs, str, chunk_len);
     244       995046 :         str += chunk_len;
     245              :     }
     246              : 
     247      1111937 :     return str - start;
     248              : }
     249              : 
     250              : /*
     251              :  * specialized workhorse for fasthash_accum_cstring
     252              :  *
     253              :  * With an aligned pointer, we consume the string a word at a time.
     254              :  * Loading the word containing the NUL terminator cannot segfault since
     255              :  * allocation boundaries are suitably aligned. To keep from setting
     256              :  * off alarms with address sanitizers, exclude this function from
     257              :  * such testing.
     258              :  */
     259              : pg_attribute_no_sanitize_address()
     260              : static inline size_t
     261      1111937 : fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
     262              : {
     263      1111937 :     const char *const start = str;
     264              :     size_t      remainder;
     265              :     uint64      zero_byte_low;
     266              : 
     267              :     Assert(PointerIsAligned(start, uint64));
     268              : 
     269              :     /*
     270              :      * For every chunk of input, check for zero bytes before mixing into the
     271              :      * hash. The chunk with zeros must contain the NUL terminator.
     272              :      */
     273              :     for (;;)
     274       839239 :     {
     275      1951176 :         uint64      chunk = *(const uint64 *) str;
     276              : 
     277      1951176 :         zero_byte_low = haszero64(chunk);
     278      1951176 :         if (zero_byte_low)
     279      1111937 :             break;
     280              : 
     281       839239 :         hs->accum = chunk;
     282       839239 :         fasthash_combine(hs);
     283       839239 :         str += FH_SIZEOF_ACCUM;
     284              :     }
     285              : 
     286              :     /* mix in remaining bytes */
     287      1111937 :     remainder = fasthash_accum_cstring_unaligned(hs, str);
     288      1111937 :     str += remainder;
     289              : 
     290      1111937 :     return str - start;
     291              : }
     292              : 
     293              : /*
     294              :  * Mix 'str' into the hash state and return the length of the string.
     295              :  */
     296              : static inline size_t
     297      1111937 : fasthash_accum_cstring(fasthash_state *hs, const char *str)
     298              : {
     299              : #if SIZEOF_VOID_P >= 8
     300              : 
     301              :     size_t      len;
     302              : #ifdef USE_ASSERT_CHECKING
     303              :     size_t      len_check;
     304              :     fasthash_state hs_check;
     305              : 
     306              :     memcpy(&hs_check, hs, sizeof(fasthash_state));
     307              :     len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
     308              : #endif
     309      1111937 :     if (PointerIsAligned(str, uint64))
     310              :     {
     311      1111937 :         len = fasthash_accum_cstring_aligned(hs, str);
     312              :         Assert(len_check == len);
     313              :         Assert(hs_check.hash == hs->hash);
     314      1111937 :         return len;
     315              :     }
     316              : #endif                          /* SIZEOF_VOID_P */
     317              : 
     318              :     /*
     319              :      * It's not worth it to try to make the word-at-a-time optimization work
     320              :      * on 32-bit platforms.
     321              :      */
     322            0 :     return fasthash_accum_cstring_unaligned(hs, str);
     323              : }
     324              : 
     325              : /*
     326              :  * The finalizer
     327              :  *
     328              :  * 'tweak' is intended to be the input length when the caller doesn't know
     329              :  * the length ahead of time, such as for NUL-terminated strings, otherwise
     330              :  * zero.
     331              :  */
     332              : static inline uint64
     333     10804536 : fasthash_final64(fasthash_state *hs, uint64 tweak)
     334              : {
     335     10804536 :     return fasthash_mix(hs->hash, tweak);
     336              : }
     337              : 
     338              : /*
     339              :  * Reduce a 64-bit hash to a 32-bit hash.
     340              :  *
     341              :  * This optional step provides a bit more additional mixing compared to
     342              :  * just taking the lower 32-bits.
     343              :  */
     344              : static inline uint32
     345     10804536 : fasthash_reduce32(uint64 h)
     346              : {
     347              :     /*
     348              :      * Convert the 64-bit hashcode to Fermat residue, which shall retain
     349              :      * information from both the higher and lower parts of hashcode.
     350              :      */
     351     10804536 :     return h - (h >> 32);
     352              : }
     353              : 
     354              : /* finalize and reduce */
     355              : static inline uint32
     356       956148 : fasthash_final32(fasthash_state *hs, uint64 tweak)
     357              : {
     358       956148 :     return fasthash_reduce32(fasthash_final64(hs, tweak));
     359              : }
     360              : 
     361              : 
     362              : /* Standalone functions */
     363              : 
     364              : /*
     365              :  * The original fasthash64 function, re-implemented using the incremental
     366              :  * interface. Returns the same 64-bit hashcode as the original,
     367              :  * at least on little-endian machines. 'len' controls not only how
     368              :  * many bytes to hash, but also modifies the internal seed.
     369              :  * 'seed' can be zero.
     370              :  */
     371              : static inline uint64
     372      9848388 : fasthash64(const char *k, size_t len, uint64 seed)
     373              : {
     374              :     fasthash_state hs;
     375              : 
     376      9848388 :     fasthash_init(&hs, 0);
     377              : 
     378              :     /* re-initialize the seed according to input length */
     379      9848388 :     hs.hash = seed ^ (len * 0x880355f21e6d1965);
     380              : 
     381     29545164 :     while (len >= FH_SIZEOF_ACCUM)
     382              :     {
     383     19696776 :         fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
     384     19696776 :         k += FH_SIZEOF_ACCUM;
     385     19696776 :         len -= FH_SIZEOF_ACCUM;
     386              :     }
     387              : 
     388      9848388 :     fasthash_accum(&hs, k, len);
     389              : 
     390              :     /*
     391              :      * Since we already mixed the input length into the seed, we can just pass
     392              :      * zero here. This matches upstream behavior as well.
     393              :      */
     394      9848388 :     return fasthash_final64(&hs, 0);
     395              : }
     396              : 
     397              : /* like fasthash64, but returns a 32-bit hashcode */
     398              : static inline uint32
     399      9848388 : fasthash32(const char *k, size_t len, uint64 seed)
     400              : {
     401      9848388 :     return fasthash_reduce32(fasthash64(k, len, seed));
     402              : }
     403              : 
     404              : /*
     405              :  * Convenience function for hashing NUL-terminated strings
     406              :  *
     407              :  * Note: This is faster than, and computes a different result from,
     408              :  * "fasthash32(s, strlen(s))"
     409              :  */
     410              : static inline uint32
     411       399863 : hash_string(const char *s)
     412              : {
     413              :     fasthash_state hs;
     414              :     size_t      s_len;
     415              : 
     416       399863 :     fasthash_init(&hs, 0);
     417              : 
     418              :     /*
     419              :      * Combine string into the hash and save the length for tweaking the final
     420              :      * mix.
     421              :      */
     422       399863 :     s_len = fasthash_accum_cstring(&hs, s);
     423              : 
     424       399863 :     return fasthash_final32(&hs, s_len);
     425              : }
     426              : 
     427              : #endif                          /* HASHFN_UNSTABLE_H */
        

Generated by: LCOV version 2.0-1