LCOV - code coverage report
Current view: top level - src/include/common - hashfn_unstable.h (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 86 87 98.9 %
Date: 2026-01-25 07:16:42 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * hashfn_unstable.h
       3             :  *
       4             :  * Building blocks for creating fast inlineable hash functions. The
       5             :  * functions in this file are not guaranteed to be stable between versions,
       6             :  * and may differ by hardware platform. Hence they must not be used in
       7             :  * indexes or other on-disk structures. See hashfn.h if you need stability.
       8             :  *
       9             :  *
      10             :  * Portions Copyright (c) 2024-2026, PostgreSQL Global Development Group
      11             :  *
      12             :  * src/include/common/hashfn_unstable.h
      13             :  */
      14             : #ifndef HASHFN_UNSTABLE_H
      15             : #define HASHFN_UNSTABLE_H
      16             : 
      17             : 
      18             : /*
      19             :  * fasthash is a modification of code taken from
      20             :  * https://code.google.com/archive/p/fast-hash/source/default/source
      21             :  * under the terms of the MIT license. The original copyright
      22             :  * notice follows:
      23             :  */
      24             : 
      25             : /* The MIT License
      26             : 
      27             :    Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
      28             : 
      29             :    Permission is hereby granted, free of charge, to any person
      30             :    obtaining a copy of this software and associated documentation
      31             :    files (the "Software"), to deal in the Software without
      32             :    restriction, including without limitation the rights to use, copy,
      33             :    modify, merge, publish, distribute, sublicense, and/or sell copies
      34             :    of the Software, and to permit persons to whom the Software is
      35             :    furnished to do so, subject to the following conditions:
      36             : 
      37             :    The above copyright notice and this permission notice shall be
      38             :    included in all copies or substantial portions of the Software.
      39             : 
      40             :    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      41             :    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      42             :    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      43             :    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
      44             :    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
      45             :    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
      46             :    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      47             :    SOFTWARE.
      48             : */
      49             : 
      50             : /*
      51             :  * fasthash as implemented here has two interfaces:
      52             :  *
      53             :  * 1) Standalone functions that take a single input.
      54             :  *
      55             :  * 2) Incremental interface. This can used for incorporating multiple
      56             :  * inputs. First, initialize the hash state (here with a zero seed):
      57             :  *
      58             :  * fasthash_state hs;
      59             :  * fasthash_init(&hs, 0);
      60             :  *
      61             :  * Next, accumulate input into the hash state.
      62             :  * If the inputs are of types that can be trivially cast to uint64, it's
      63             :  * sufficient to do:
      64             :  *
      65             :  * hs.accum = value1;
      66             :  * fasthash_combine(&hs);
      67             :  * hs.accum = value2;
      68             :  * fasthash_combine(&hs);
      69             :  * ...
      70             :  *
      71             :  * For longer or variable-length input, fasthash_accum() is a more
      72             :  * flexible, but more verbose method. The standalone functions use this
      73             :  * internally, so see fasthash64() for an example of this.
      74             :  *
      75             :  * After all inputs have been mixed in, finalize the hash and optionally
      76             :  * reduce to 32 bits. If all inputs are fixed-length, it's sufficient
      77             :  * to pass zero for the tweak:
      78             :  *
      79             :  * hashcode = fasthash_final32(&hs, 0);
      80             :  *
      81             :  * For variable length input, experimentation has found that SMHasher
      82             :  * fails unless we pass the length for the tweak. When accumulating
      83             :  * multiple varlen values, it's probably safest to calculate a tweak
      84             :  * such that the bits of all individual lengths are present, for example:
      85             :  *
      86             :  * lengths = len1 + (len2 << 10) + (len3 << 20);
      87             :  * hashcode = fasthash_final32(&hs, lengths);
      88             :  *
      89             :  * The incremental interface allows an optimization for NUL-terminated
      90             :  * C strings:
      91             :  *
      92             :  * len = fasthash_accum_cstring(&hs, str);
      93             :  * hashcode = fasthash_final32(&hs, len);
      94             :  *
      95             :  * By computing the length on-the-fly, we can avoid needing a strlen()
      96             :  * call to tell us how many bytes to hash.
      97             :  */
      98             : 
      99             : 
     100             : typedef struct fasthash_state
     101             : {
     102             :     /* staging area for chunks of input */
     103             :     uint64      accum;
     104             : 
     105             :     uint64      hash;
     106             : } fasthash_state;
     107             : 
     108             : #define FH_SIZEOF_ACCUM sizeof(uint64)
     109             : 
     110             : 
     111             : /*
     112             :  * Initialize the hash state.
     113             :  *
     114             :  * 'seed' can be zero.
     115             :  */
     116             : static inline void
     117    16199038 : fasthash_init(fasthash_state *hs, uint64 seed)
     118             : {
     119    16199038 :     memset(hs, 0, sizeof(fasthash_state));
     120    16199038 :     hs->hash = seed ^ 0x880355f21e6d1965;
     121    16199038 : }
     122             : 
     123             : /* both the finalizer and part of the combining step */
     124             : static inline uint64
     125    48873072 : fasthash_mix(uint64 h, uint64 tweak)
     126             : {
     127    48873072 :     h ^= (h >> 23) + tweak;
     128    48873072 :     h *= 0x2127599bf4325c37;
     129    48873072 :     h ^= h >> 47;
     130    48873072 :     return h;
     131             : }
     132             : 
     133             : /* combine one chunk of input into the hash */
     134             : static inline void
     135    32674034 : fasthash_combine(fasthash_state *hs)
     136             : {
     137    32674034 :     hs->hash ^= fasthash_mix(hs->accum, 0);
     138    32674034 :     hs->hash *= 0x880355f21e6d1965;
     139    32674034 : }
     140             : 
     141             : /* accumulate up to 8 bytes of input and combine it into the hash */
     142             : static inline void
     143    46712006 : fasthash_accum(fasthash_state *hs, const char *k, size_t len)
     144             : {
     145             :     uint32      lower_four;
     146             : 
     147             :     Assert(len <= FH_SIZEOF_ACCUM);
     148    46712006 :     hs->accum = 0;
     149             : 
     150             :     /*
     151             :      * For consistency, bytewise loads must match the platform's endianness.
     152             :      */
     153             : #ifdef WORDS_BIGENDIAN
     154             :     switch (len)
     155             :     {
     156             :         case 8:
     157             :             memcpy(&hs->accum, k, 8);
     158             :             break;
     159             :         case 7:
     160             :             hs->accum |= (uint64) k[6] << 8;
     161             :             /* FALLTHROUGH */
     162             :         case 6:
     163             :             hs->accum |= (uint64) k[5] << 16;
     164             :             /* FALLTHROUGH */
     165             :         case 5:
     166             :             hs->accum |= (uint64) k[4] << 24;
     167             :             /* FALLTHROUGH */
     168             :         case 4:
     169             :             memcpy(&lower_four, k, sizeof(lower_four));
     170             :             hs->accum |= (uint64) lower_four << 32;
     171             :             break;
     172             :         case 3:
     173             :             hs->accum |= (uint64) k[2] << 40;
     174             :             /* FALLTHROUGH */
     175             :         case 2:
     176             :             hs->accum |= (uint64) k[1] << 48;
     177             :             /* FALLTHROUGH */
     178             :         case 1:
     179             :             hs->accum |= (uint64) k[0] << 56;
     180             :             break;
     181             :         case 0:
     182             :             return;
     183             :     }
     184             : #else
     185    46712006 :     switch (len)
     186             :     {
     187    30533432 :         case 8:
     188    30533432 :             memcpy(&hs->accum, k, 8);
     189    30533432 :             break;
     190      211966 :         case 7:
     191      211966 :             hs->accum |= (uint64) k[6] << 48;
     192             :             /* FALLTHROUGH */
     193      272422 :         case 6:
     194      272422 :             hs->accum |= (uint64) k[5] << 40;
     195             :             /* FALLTHROUGH */
     196      274870 :         case 5:
     197      274870 :             hs->accum |= (uint64) k[4] << 32;
     198             :             /* FALLTHROUGH */
     199      461722 :         case 4:
     200      461722 :             memcpy(&lower_four, k, sizeof(lower_four));
     201      461722 :             hs->accum |= lower_four;
     202      461722 :             break;
     203      417092 :         case 3:
     204      417092 :             hs->accum |= (uint64) k[2] << 16;
     205             :             /* FALLTHROUGH */
     206      445958 :         case 2:
     207      445958 :             hs->accum |= (uint64) k[1] << 8;
     208             :             /* FALLTHROUGH */
     209      450136 :         case 1:
     210      450136 :             hs->accum |= (uint64) k[0];
     211      450136 :             break;
     212    15266716 :         case 0:
     213    15266716 :             return;
     214             :     }
     215             : #endif
     216             : 
     217    31445290 :     fasthash_combine(hs);
     218             : }
     219             : 
     220             : /*
     221             :  * Set high bit in lowest byte where the input is zero, from:
     222             :  * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
     223             :  */
     224             : #define haszero64(v) \
     225             :     (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
     226             : 
     227             : /*
     228             :  * all-purpose workhorse for fasthash_accum_cstring
     229             :  */
     230             : static inline size_t
     231      932322 : fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
     232             : {
     233      932322 :     const char *const start = str;
     234             : 
     235     1844180 :     while (*str)
     236             :     {
     237      911858 :         size_t      chunk_len = 0;
     238             : 
     239     4831190 :         while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
     240     3919332 :             chunk_len++;
     241             : 
     242      911858 :         fasthash_accum(hs, str, chunk_len);
     243      911858 :         str += chunk_len;
     244             :     }
     245             : 
     246      932322 :     return str - start;
     247             : }
     248             : 
     249             : /*
     250             :  * specialized workhorse for fasthash_accum_cstring
     251             :  *
     252             :  * With an aligned pointer, we consume the string a word at a time.
     253             :  * Loading the word containing the NUL terminator cannot segfault since
     254             :  * allocation boundaries are suitably aligned. To keep from setting
     255             :  * off alarms with address sanitizers, exclude this function from
     256             :  * such testing.
     257             :  */
     258             : pg_attribute_no_sanitize_address()
     259             : static inline size_t
     260      932322 : fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
     261             : {
     262      932322 :     const char *const start = str;
     263             :     size_t      remainder;
     264             :     uint64      zero_byte_low;
     265             : 
     266             :     Assert(PointerIsAligned(start, uint64));
     267             : 
     268             :     /*
     269             :      * For every chunk of input, check for zero bytes before mixing into the
     270             :      * hash. The chunk with zeros must contain the NUL terminator.
     271             :      */
     272             :     for (;;)
     273     1057866 :     {
     274     1990188 :         uint64      chunk = *(uint64 *) str;
     275             : 
     276     1990188 :         zero_byte_low = haszero64(chunk);
     277     1990188 :         if (zero_byte_low)
     278      932322 :             break;
     279             : 
     280     1057866 :         hs->accum = chunk;
     281     1057866 :         fasthash_combine(hs);
     282     1057866 :         str += FH_SIZEOF_ACCUM;
     283             :     }
     284             : 
     285             :     /* mix in remaining bytes */
     286      932322 :     remainder = fasthash_accum_cstring_unaligned(hs, str);
     287      932322 :     str += remainder;
     288             : 
     289      932322 :     return str - start;
     290             : }
     291             : 
     292             : /*
     293             :  * Mix 'str' into the hash state and return the length of the string.
     294             :  */
     295             : static inline size_t
     296      932322 : fasthash_accum_cstring(fasthash_state *hs, const char *str)
     297             : {
     298             : #if SIZEOF_VOID_P >= 8
     299             : 
     300             :     size_t      len;
     301             : #ifdef USE_ASSERT_CHECKING
     302             :     size_t      len_check;
     303             :     fasthash_state hs_check;
     304             : 
     305             :     memcpy(&hs_check, hs, sizeof(fasthash_state));
     306             :     len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
     307             : #endif
     308      932322 :     if (PointerIsAligned(str, uint64))
     309             :     {
     310      932322 :         len = fasthash_accum_cstring_aligned(hs, str);
     311             :         Assert(len_check == len);
     312             :         Assert(hs_check.hash == hs->hash);
     313      932322 :         return len;
     314             :     }
     315             : #endif                          /* SIZEOF_VOID_P */
     316             : 
     317             :     /*
     318             :      * It's not worth it to try to make the word-at-a-time optimization work
     319             :      * on 32-bit platforms.
     320             :      */
     321           0 :     return fasthash_accum_cstring_unaligned(hs, str);
     322             : }
     323             : 
     324             : /*
     325             :  * The finalizer
     326             :  *
     327             :  * 'tweak' is intended to be the input length when the caller doesn't know
     328             :  * the length ahead of time, such as for NUL-terminated strings, otherwise
     329             :  * zero.
     330             :  */
     331             : static inline uint64
     332    16199038 : fasthash_final64(fasthash_state *hs, uint64 tweak)
     333             : {
     334    16199038 :     return fasthash_mix(hs->hash, tweak);
     335             : }
     336             : 
     337             : /*
     338             :  * Reduce a 64-bit hash to a 32-bit hash.
     339             :  *
     340             :  * This optional step provides a bit more additional mixing compared to
     341             :  * just taking the lower 32-bits.
     342             :  */
     343             : static inline uint32
     344    16199038 : fasthash_reduce32(uint64 h)
     345             : {
     346             :     /*
     347             :      * Convert the 64-bit hashcode to Fermat residue, which shall retain
     348             :      * information from both the higher and lower parts of hashcode.
     349             :      */
     350    16199038 :     return h - (h >> 32);
     351             : }
     352             : 
     353             : /* finalize and reduce */
     354             : static inline uint32
     355      932322 : fasthash_final32(fasthash_state *hs, uint64 tweak)
     356             : {
     357      932322 :     return fasthash_reduce32(fasthash_final64(hs, tweak));
     358             : }
     359             : 
     360             : 
     361             : /* Standalone functions */
     362             : 
     363             : /*
     364             :  * The original fasthash64 function, re-implemented using the incremental
     365             :  * interface. Returns the same 64-bit hashcode as the original,
     366             :  * at least on little-endian machines. 'len' controls not only how
     367             :  * many bytes to hash, but also modifies the internal seed.
     368             :  * 'seed' can be zero.
     369             :  */
     370             : static inline uint64
     371    15266716 : fasthash64(const char *k, size_t len, uint64 seed)
     372             : {
     373             :     fasthash_state hs;
     374             : 
     375    15266716 :     fasthash_init(&hs, 0);
     376             : 
     377             :     /* re-initialize the seed according to input length */
     378    15266716 :     hs.hash = seed ^ (len * 0x880355f21e6d1965);
     379             : 
     380    45800148 :     while (len >= FH_SIZEOF_ACCUM)
     381             :     {
     382    30533432 :         fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
     383    30533432 :         k += FH_SIZEOF_ACCUM;
     384    30533432 :         len -= FH_SIZEOF_ACCUM;
     385             :     }
     386             : 
     387    15266716 :     fasthash_accum(&hs, k, len);
     388             : 
     389             :     /*
     390             :      * Since we already mixed the input length into the seed, we can just pass
     391             :      * zero here. This matches upstream behavior as well.
     392             :      */
     393    15266716 :     return fasthash_final64(&hs, 0);
     394             : }
     395             : 
     396             : /* like fasthash64, but returns a 32-bit hashcode */
     397             : static inline uint32
     398    15266716 : fasthash32(const char *k, size_t len, uint64 seed)
     399             : {
     400    15266716 :     return fasthash_reduce32(fasthash64(k, len, seed));
     401             : }
     402             : 
     403             : /*
     404             :  * Convenience function for hashing NUL-terminated strings
     405             :  *
     406             :  * Note: This is faster than, and computes a different result from,
     407             :  * "fasthash32(s, strlen(s))"
     408             :  */
     409             : static inline uint32
     410      761444 : hash_string(const char *s)
     411             : {
     412             :     fasthash_state hs;
     413             :     size_t      s_len;
     414             : 
     415      761444 :     fasthash_init(&hs, 0);
     416             : 
     417             :     /*
     418             :      * Combine string into the hash and save the length for tweaking the final
     419             :      * mix.
     420             :      */
     421      761444 :     s_len = fasthash_accum_cstring(&hs, s);
     422             : 
     423      761444 :     return fasthash_final32(&hs, s_len);
     424             : }
     425             : 
     426             : #endif                          /* HASHFN_UNSTABLE_H */

Generated by: LCOV version 1.16