LCOV - code coverage report
Current view: top level - src/include/lib - simplehash.h (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 242 284 85.2 %
Date: 2025-11-29 10:18:11 Functions: 228 290 78.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * simplehash.h
       3             :  *
       4             :  *    When included this file generates a "templated" (by way of macros)
       5             :  *    open-addressing hash table implementation specialized to user-defined
       6             :  *    types.
       7             :  *
       8             :  *    It's probably not worthwhile to generate such a specialized implementation
       9             :  *    for hash tables that aren't performance or space sensitive.
      10             :  *
      11             :  *    Compared to dynahash, simplehash has the following benefits:
      12             :  *
      13             :  *    - Due to the "templated" code generation has known structure sizes and no
      14             :  *      indirect function calls (which show up substantially in dynahash
      15             :  *      profiles). These features considerably increase speed for small
      16             :  *      entries.
      17             :  *    - Open addressing has better CPU cache behavior than dynahash's chained
      18             :  *      hashtables.
      19             :  *    - The generated interface is type-safe and easier to use than dynahash,
      20             :  *      though at the cost of more complex setup.
      21             :  *    - Allocates memory in a MemoryContext or another allocator with a
      22             :  *      malloc/free style interface (which isn't easily usable in a shared
      23             :  *      memory context)
      24             :  *    - Does not require the overhead of a separate memory context.
      25             :  *
      26             :  * Usage notes:
      27             :  *
      28             :  *    To generate a hash-table and associated functions for a use case several
      29             :  *    macros have to be #define'ed before this file is included.  Including
      30             :  *    the file #undef's all those, so a new hash table can be generated
      31             :  *    afterwards.
      32             :  *    The relevant parameters are:
      33             :  *    - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
      34             :  *      will result in hash table type 'foo_hash' and functions like
      35             :  *      'foo_insert'/'foo_lookup' and so forth.
      36             :  *    - SH_ELEMENT_TYPE - type of the contained elements
      37             :  *    - SH_KEY_TYPE - type of the hashtable's key
      38             :  *    - SH_DECLARE - if defined function prototypes and type declarations are
      39             :  *      generated
      40             :  *    - SH_DEFINE - if defined function definitions are generated
      41             :  *    - SH_SCOPE - in which scope (e.g. extern, static inline) do function
      42             :  *      declarations reside
      43             :  *    - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
      44             :  *      use this to allocate bytes. The allocator must zero the returned space.
      45             :  *    - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
      46             :  *      are defined, so you can supply your own
      47             :  *    The following parameters are only relevant when SH_DEFINE is defined:
      48             :  *    - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
      49             :  *    - SH_EQUAL(table, a, b) - compare two table keys
      50             :  *    - SH_HASH_KEY(table, key) - generate hash for the key
      51             :  *    - SH_STORE_HASH - if defined the hash is stored in the elements
      52             :  *    - SH_GET_HASH(tb, a) - return the field to store the hash in
      53             :  *
      54             :  *    The element type is required to contain a "status" member that can store
      55             :  *    the range of values defined in the SH_STATUS enum.
      56             :  *
      57             :  *    While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
      58             :  *    the hash table implementation needs to compare hashes to move elements
      59             :  *    (particularly when growing the hash), it's preferable, if possible, to
      60             :  *    store the element's hash in the element's data type. If the hash is so
      61             :  *    stored, the hash table will also compare hashes before calling SH_EQUAL
      62             :  *    when comparing two keys.
      63             :  *
      64             :  *    For convenience the hash table create functions accept a void pointer
      65             :  *    that will be stored in the hash table type's member private_data. This
      66             :  *    allows callbacks to reference caller provided data.
      67             :  *
      68             :  *    For examples of usage look at tidbitmap.c (file local definition) and
      69             :  *    execnodes.h/execGrouping.c (exposed declaration, file local
      70             :  *    implementation).
      71             :  *
      72             :  * Hash table design:
      73             :  *
      74             :  *    The hash table design chosen is a variant of linear open-addressing. The
      75             :  *    reason for doing so is that linear addressing is CPU cache & pipeline
      76             :  *    friendly. The biggest disadvantage of simple linear addressing schemes
      77             :  *    are highly variable lookup times due to clustering, and deletions
      78             :  *    leaving a lot of tombstones around.  To address these issues a variant
      79             :  *    of "robin hood" hashing is employed.  Robin hood hashing optimizes
      80             :  *    chaining lengths by moving elements close to their optimal bucket
      81             :  *    ("rich" elements), out of the way if a to-be-inserted element is further
      82             :  *    away from its optimal position (i.e. it's "poor").  While that can make
      83             :  *    insertions slower, the average lookup performance is a lot better, and
      84             :  *    higher fill factors can be used in a still performant manner.  To avoid
      85             :  *    tombstones - which normally solve the issue that a deleted node's
      86             :  *    presence is relevant to determine whether a lookup needs to continue
      87             :  *    looking or is done - buckets following a deleted element are shifted
      88             :  *    backwards, unless they're empty or already at their optimal position.
      89             :  *
      90             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      91             :  * Portions Copyright (c) 1994, Regents of the University of California
      92             :  *
      93             :  * src/include/lib/simplehash.h
      94             :  */
      95             : 
      96             : #include "port/pg_bitutils.h"
      97             : 
      98             : /* helpers */
      99             : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
     100             : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
     101             : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
     102             : 
     103             : /* name macros for: */
     104             : 
     105             : /* type declarations */
     106             : #define SH_TYPE SH_MAKE_NAME(hash)
     107             : #define SH_STATUS SH_MAKE_NAME(status)
     108             : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
     109             : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
     110             : #define SH_ITERATOR SH_MAKE_NAME(iterator)
     111             : 
     112             : /* function declarations */
     113             : #define SH_CREATE SH_MAKE_NAME(create)
     114             : #define SH_DESTROY SH_MAKE_NAME(destroy)
     115             : #define SH_RESET SH_MAKE_NAME(reset)
     116             : #define SH_INSERT SH_MAKE_NAME(insert)
     117             : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
     118             : #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
     119             : #define SH_DELETE SH_MAKE_NAME(delete)
     120             : #define SH_LOOKUP SH_MAKE_NAME(lookup)
     121             : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
     122             : #define SH_GROW SH_MAKE_NAME(grow)
     123             : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
     124             : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
     125             : #define SH_ITERATE SH_MAKE_NAME(iterate)
     126             : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
     127             : #define SH_FREE SH_MAKE_NAME(free)
     128             : #define SH_ESTIMATE_SPACE SH_MAKE_NAME(estimate_space)
     129             : #define SH_STAT SH_MAKE_NAME(stat)
     130             : 
     131             : /* internal helper functions (no externally visible prototypes) */
     132             : #define SH_COMPUTE_SIZE SH_MAKE_NAME(compute_size)
     133             : #define SH_UPDATE_PARAMETERS SH_MAKE_NAME(update_parameters)
     134             : #define SH_NEXT SH_MAKE_NAME(next)
     135             : #define SH_PREV SH_MAKE_NAME(prev)
     136             : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
     137             : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
     138             : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
     139             : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
     140             : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
     141             : 
     142             : /* generate forward declarations necessary to use the hash table */
     143             : #ifdef SH_DECLARE
     144             : 
     145             : /* type definitions */
     146             : typedef struct SH_TYPE
     147             : {
     148             :     /*
     149             :      * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
     150             :      * tables.  Note that the maximum number of elements is lower
     151             :      * (SH_MAX_FILLFACTOR)
     152             :      */
     153             :     uint64      size;
     154             : 
     155             :     /* how many elements have valid contents */
     156             :     uint32      members;
     157             : 
     158             :     /* mask for bucket and size calculations, based on size */
     159             :     uint32      sizemask;
     160             : 
     161             :     /* boundary after which to grow hashtable */
     162             :     uint32      grow_threshold;
     163             : 
     164             :     /* hash buckets */
     165             :     SH_ELEMENT_TYPE *data;
     166             : 
     167             : #ifndef SH_RAW_ALLOCATOR
     168             :     /* memory context to use for allocations */
     169             :     MemoryContext ctx;
     170             : #endif
     171             : 
     172             :     /* user defined data, useful for callbacks */
     173             :     void       *private_data;
     174             : }           SH_TYPE;
     175             : 
     176             : typedef enum SH_STATUS
     177             : {
     178             :     SH_STATUS_EMPTY = 0x00,
     179             :     SH_STATUS_IN_USE = 0x01
     180             : } SH_STATUS;
     181             : 
     182             : typedef struct SH_ITERATOR
     183             : {
     184             :     uint32      cur;            /* current element */
     185             :     uint32      end;
     186             :     bool        done;           /* iterator exhausted? */
     187             : }           SH_ITERATOR;
     188             : 
     189             : /* externally visible function prototypes */
     190             : #ifdef SH_RAW_ALLOCATOR
     191             : /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
     192             : SH_SCOPE    SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
     193             : #else
     194             : /*
     195             :  * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
     196             :  *                               void *private_data)
     197             :  */
     198             : SH_SCOPE    SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
     199             :                                void *private_data);
     200             : #endif
     201             : 
     202             : /* void <prefix>_destroy(<prefix>_hash *tb) */
     203             : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
     204             : 
     205             : /* void <prefix>_reset(<prefix>_hash *tb) */
     206             : SH_SCOPE void SH_RESET(SH_TYPE * tb);
     207             : 
     208             : /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
     209             : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
     210             : 
     211             : /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
     212             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
     213             : 
     214             : /*
     215             :  * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
     216             :  *                                bool *found)
     217             :  */
     218             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     219             :                                             uint32 hash, bool *found);
     220             : 
     221             : /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
     222             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
     223             : 
     224             : /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
     225             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     226             :                                             uint32 hash);
     227             : 
     228             : /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
     229             : SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
     230             : 
     231             : /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
     232             : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
     233             : 
     234             : /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
     235             : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     236             : 
     237             : /*
     238             :  * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
     239             :  *                                uint32 at)
     240             :  */
     241             : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
     242             : 
     243             : /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
     244             : SH_SCOPE    SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     245             : 
     246             : /* size_t <prefix>_estimate_space(double nentries) */
     247             : SH_SCOPE size_t SH_ESTIMATE_SPACE(double nentries);
     248             : 
     249             : /* void <prefix>_stat(<prefix>_hash *tb) */
     250             : SH_SCOPE void SH_STAT(SH_TYPE * tb);
     251             : 
     252             : #endif                          /* SH_DECLARE */
     253             : 
     254             : 
     255             : /* generate implementation of the hash table */
     256             : #ifdef SH_DEFINE
     257             : 
     258             : #ifndef SH_RAW_ALLOCATOR
     259             : #include "utils/memutils.h"
     260             : #endif
     261             : 
     262             : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
     263             : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
     264             : 
     265             : /* normal fillfactor, unless already close to maximum */
     266             : #ifndef SH_FILLFACTOR
     267             : #define SH_FILLFACTOR (0.9)
     268             : #endif
     269             : /* increase fillfactor if we otherwise would error out */
     270             : #define SH_MAX_FILLFACTOR (0.98)
     271             : /* grow if actual and optimal location bigger than */
     272             : #ifndef SH_GROW_MAX_DIB
     273             : #define SH_GROW_MAX_DIB 25
     274             : #endif
     275             : /* grow if more than elements to move when inserting */
     276             : #ifndef SH_GROW_MAX_MOVE
     277             : #define SH_GROW_MAX_MOVE 150
     278             : #endif
     279             : #ifndef SH_GROW_MIN_FILLFACTOR
     280             : /* but do not grow due to SH_GROW_MAX_* if below */
     281             : #define SH_GROW_MIN_FILLFACTOR 0.1
     282             : #endif
     283             : 
     284             : #ifdef SH_STORE_HASH
     285             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
     286             : #else
     287             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
     288             : #endif
     289             : 
     290             : /*
     291             :  * Wrap the following definitions in include guards, to avoid multiple
     292             :  * definition errors if this header is included more than once.  The rest of
     293             :  * the file deliberately has no include guards, because it can be included
     294             :  * with different parameters to define functions and types with non-colliding
     295             :  * names.
     296             :  */
     297             : #ifndef SIMPLEHASH_H
     298             : #define SIMPLEHASH_H
     299             : 
     300             : #ifdef FRONTEND
     301             : #define sh_error(...) pg_fatal(__VA_ARGS__)
     302             : #define sh_log(...) pg_log_info(__VA_ARGS__)
     303             : #else
     304             : #define sh_error(...) elog(ERROR, __VA_ARGS__)
     305             : #define sh_log(...) elog(LOG, __VA_ARGS__)
     306             : #endif
     307             : 
     308             : #endif
     309             : 
     310             : /*
     311             :  * Compute allocation size for hashtable. Result can be passed to
     312             :  * SH_UPDATE_PARAMETERS.  (Keep SH_ESTIMATE_SPACE in sync with this!)
     313             :  */
     314             : static inline uint64
     315      205540 : SH_COMPUTE_SIZE(uint64 newsize)
     316             : {
     317             :     uint64      size;
     318             : 
     319             :     /* supporting zero sized hashes would complicate matters */
     320      205540 :     size = Max(newsize, 2);
     321             : 
     322             :     /* round up size to the next power of 2, that's how bucketing works */
     323      205540 :     size = pg_nextpower2_64(size);
     324             :     Assert(size <= SH_MAX_SIZE);
     325             : 
     326             :     /*
     327             :      * Verify that allocation of ->data is possible on this platform, without
     328             :      * overflowing Size.
     329             :      */
     330      205540 :     if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
     331           0 :         sh_error("hash table too large");
     332             : 
     333      205540 :     return size;
     334             : }
     335             : 
     336             : /*
     337             :  * Update sizing parameters for hashtable. Called when creating and growing
     338             :  * the hashtable.
     339             :  */
     340             : static inline void
     341      102770 : SH_UPDATE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
     342             : {
     343      102770 :     uint64      size = SH_COMPUTE_SIZE(newsize);
     344             : 
     345             :     /* now set size */
     346      102770 :     tb->size = size;
     347      102770 :     tb->sizemask = (uint32) (size - 1);
     348             : 
     349             :     /*
     350             :      * Compute the next threshold at which we need to grow the hash table
     351             :      * again.
     352             :      */
     353      102770 :     if (tb->size == SH_MAX_SIZE)
     354           0 :         tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
     355             :     else
     356      102770 :         tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
     357      102770 : }
     358             : 
     359             : /* return the optimal bucket for the hash */
     360             : static inline uint32
     361    53117758 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
     362             : {
     363    53117758 :     return hash & tb->sizemask;
     364             : }
     365             : 
     366             : /* return next bucket after the current, handling wraparound */
     367             : static inline uint32
     368    22921272 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     369             : {
     370    22921272 :     curelem = (curelem + 1) & tb->sizemask;
     371             : 
     372             :     Assert(curelem != startelem);
     373             : 
     374    22921272 :     return curelem;
     375             : }
     376             : 
     377             : /* return bucket before the current, handling wraparound */
     378             : static inline uint32
     379     3573082 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     380             : {
     381     3573082 :     curelem = (curelem - 1) & tb->sizemask;
     382             : 
     383             :     Assert(curelem != startelem);
     384             : 
     385     3573082 :     return curelem;
     386             : }
     387             : 
     388             : /* return distance between bucket and its optimal position */
     389             : static inline uint32
     390    10789432 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
     391             : {
     392    10789432 :     if (optimal <= bucket)
     393    10727512 :         return bucket - optimal;
     394             :     else
     395       61920 :         return (tb->size + bucket) - optimal;
     396             : }
     397             : 
     398             : static inline uint32
     399    12308544 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     400             : {
     401             : #ifdef SH_STORE_HASH
     402     4866702 :     return SH_GET_HASH(tb, entry);
     403             : #else
     404     7441842 :     return SH_HASH_KEY(tb, entry->SH_KEY);
     405             : #endif
     406             : }
     407             : 
     408             : /* default memory allocator function */
     409             : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
     410             : static inline void SH_FREE(SH_TYPE * type, void *pointer);
     411             : 
     412             : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
     413             : 
     414             : /* default memory allocator function */
     415             : static inline void *
     416       93270 : SH_ALLOCATE(SH_TYPE * type, Size size)
     417             : {
     418             : #ifdef SH_RAW_ALLOCATOR
     419         722 :     return SH_RAW_ALLOCATOR(size);
     420             : #else
     421       92548 :     return MemoryContextAllocExtended(type->ctx, size,
     422             :                                       MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
     423             : #endif
     424             : }
     425             : 
     426             : /* default memory free function */
     427             : static inline void
     428       44786 : SH_FREE(SH_TYPE * type, void *pointer)
     429             : {
     430       44786 :     pfree(pointer);
     431       44786 : }
     432             : 
     433             : #endif
     434             : 
     435             : /*
     436             :  * Create a hash table with enough space for `nelements` distinct members.
     437             :  * Memory for the hash table is allocated from the passed-in context.  If
     438             :  * desired, the array of elements can be allocated using a passed-in allocator;
     439             :  * this could be useful in order to place the array of elements in a shared
     440             :  * memory, or in a context that will outlive the rest of the hash table.
     441             :  * Memory other than for the array of elements will still be allocated from
     442             :  * the passed-in context.
     443             :  */
     444             : #ifdef SH_RAW_ALLOCATOR
     445             : SH_SCOPE    SH_TYPE *
     446         712 : SH_CREATE(uint32 nelements, void *private_data)
     447             : #else
     448             : SH_SCOPE    SH_TYPE *
     449       96774 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
     450             : #endif
     451             : {
     452             :     SH_TYPE    *tb;
     453             :     uint64      size;
     454             : 
     455             : #ifdef SH_RAW_ALLOCATOR
     456         712 :     tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
     457             : #else
     458       96774 :     tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
     459       96774 :     tb->ctx = ctx;
     460             : #endif
     461       97486 :     tb->private_data = private_data;
     462             : 
     463             :     /* increase nelements by fillfactor, want to store nelements elements */
     464       97486 :     size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
     465             : 
     466       97486 :     size = SH_COMPUTE_SIZE(size);
     467             : 
     468       97486 :     tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * size);
     469             : 
     470       97486 :     SH_UPDATE_PARAMETERS(tb, size);
     471       97486 :     return tb;
     472             : }
     473             : 
     474             : /* destroy a previously created hash table */
     475             : SH_SCOPE void
     476       48996 : SH_DESTROY(SH_TYPE * tb)
     477             : {
     478       48996 :     SH_FREE(tb, tb->data);
     479       48996 :     pfree(tb);
     480       48996 : }
     481             : 
     482             : /* reset the contents of a previously created hash table */
     483             : SH_SCOPE void
     484      194740 : SH_RESET(SH_TYPE * tb)
     485             : {
     486      194740 :     memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
     487      194740 :     tb->members = 0;
     488      194740 : }
     489             : 
     490             : /*
     491             :  * Grow a hash table to at least `newsize` buckets.
     492             :  *
     493             :  * Usually this will automatically be called by insertions/deletions, when
     494             :  * necessary. But resizing to the exact input size can be advantageous
     495             :  * performance-wise, when known at some point.
     496             :  */
     497             : SH_SCOPE void
     498        5284 : SH_GROW(SH_TYPE * tb, uint64 newsize)
     499             : {
     500        5284 :     uint64      oldsize = tb->size;
     501        5284 :     SH_ELEMENT_TYPE *olddata = tb->data;
     502             :     SH_ELEMENT_TYPE *newdata;
     503             :     uint32      i;
     504        5284 :     uint32      startelem = 0;
     505             :     uint32      copyelem;
     506             : 
     507             :     Assert(oldsize == pg_nextpower2_64(oldsize));
     508             :     Assert(oldsize != SH_MAX_SIZE);
     509             :     Assert(oldsize < newsize);
     510             : 
     511        5284 :     newsize = SH_COMPUTE_SIZE(newsize);
     512             : 
     513        5284 :     tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * newsize);
     514             : 
     515             :     /*
     516             :      * Update parameters for new table after allocation succeeds to avoid
     517             :      * inconsistent state on OOM.
     518             :      */
     519        5284 :     SH_UPDATE_PARAMETERS(tb, newsize);
     520             : 
     521        5284 :     newdata = tb->data;
     522             : 
     523             :     /*
     524             :      * Copy entries from the old data to newdata. We theoretically could use
     525             :      * SH_INSERT here, to avoid code duplication, but that's more general than
     526             :      * we need. We neither want tb->members increased, nor do we need to do
     527             :      * deal with deleted elements, nor do we need to compare keys. So a
     528             :      * special-cased implementation is lot faster. As resizing can be time
     529             :      * consuming and frequent, that's worthwhile to optimize.
     530             :      *
     531             :      * To be able to simply move entries over, we have to start not at the
     532             :      * first bucket (i.e olddata[0]), but find the first bucket that's either
     533             :      * empty, or is occupied by an entry at its optimal position. Such a
     534             :      * bucket has to exist in any table with a load factor under 1, as not all
     535             :      * buckets are occupied, i.e. there always has to be an empty bucket.  By
     536             :      * starting at such a bucket we can move the entries to the larger table,
     537             :      * without having to deal with conflicts.
     538             :      */
     539             : 
     540             :     /* search for the first element in the hash that's not wrapped around */
     541       76690 :     for (i = 0; i < oldsize; i++)
     542             :     {
     543       76690 :         SH_ELEMENT_TYPE *oldentry = &olddata[i];
     544             :         uint32      hash;
     545             :         uint32      optimal;
     546             : 
     547       76690 :         if (oldentry->status != SH_STATUS_IN_USE)
     548             :         {
     549        3148 :             startelem = i;
     550        3148 :             break;
     551             :         }
     552             : 
     553       73542 :         hash = SH_ENTRY_HASH(tb, oldentry);
     554       73542 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     555             : 
     556       73542 :         if (optimal == i)
     557             :         {
     558        2136 :             startelem = i;
     559        2136 :             break;
     560             :         }
     561             :     }
     562             : 
     563             :     /* and copy all elements in the old table */
     564        5284 :     copyelem = startelem;
     565     1395820 :     for (i = 0; i < oldsize; i++)
     566             :     {
     567     1390536 :         SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
     568             : 
     569     1390536 :         if (oldentry->status == SH_STATUS_IN_USE)
     570             :         {
     571             :             uint32      hash;
     572             :             uint32      startelem2;
     573             :             uint32      curelem;
     574             :             SH_ELEMENT_TYPE *newentry;
     575             : 
     576     1223748 :             hash = SH_ENTRY_HASH(tb, oldentry);
     577     1223748 :             startelem2 = SH_INITIAL_BUCKET(tb, hash);
     578     1223748 :             curelem = startelem2;
     579             : 
     580             :             /* find empty element to put data into */
     581             :             while (true)
     582             :             {
     583     1695390 :                 newentry = &newdata[curelem];
     584             : 
     585     1695390 :                 if (newentry->status == SH_STATUS_EMPTY)
     586             :                 {
     587     1223748 :                     break;
     588             :                 }
     589             : 
     590      471642 :                 curelem = SH_NEXT(tb, curelem, startelem2);
     591             :             }
     592             : 
     593             :             /* copy entry to new slot */
     594     1223748 :             memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
     595             :         }
     596             : 
     597             :         /* can't use SH_NEXT here, would use new size */
     598     1390536 :         copyelem++;
     599     1390536 :         if (copyelem >= oldsize)
     600             :         {
     601        5284 :             copyelem = 0;
     602             :         }
     603             :     }
     604             : 
     605        5284 :     SH_FREE(tb, olddata);
     606        5284 : }
     607             : 
     608             : /*
     609             :  * This is a separate static inline function, so it can be reliably be inlined
     610             :  * into its wrapper functions even if SH_SCOPE is extern.
     611             :  */
     612             : static inline SH_ELEMENT_TYPE *
     613    25889652 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     614             : {
     615             :     uint32      startelem;
     616             :     uint32      curelem;
     617             :     SH_ELEMENT_TYPE *data;
     618             :     uint32      insertdist;
     619             : 
     620         178 : restart:
     621    25889830 :     insertdist = 0;
     622             : 
     623             :     /*
     624             :      * We do the grow check even if the key is actually present, to avoid
     625             :      * doing the check inside the loop. This also lets us avoid having to
     626             :      * re-find our position in the hashtable after resizing.
     627             :      *
     628             :      * Note that this also reached when resizing the table due to
     629             :      * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
     630             :      */
     631    25889830 :     if (unlikely(tb->members >= tb->grow_threshold))
     632             :     {
     633        5284 :         if (unlikely(tb->size == SH_MAX_SIZE))
     634           0 :             sh_error("hash table size exceeded");
     635             : 
     636             :         /*
     637             :          * When optimizing, it can be very useful to print these out.
     638             :          */
     639             :         /* SH_STAT(tb); */
     640        5284 :         SH_GROW(tb, tb->size * 2);
     641             :         /* SH_STAT(tb); */
     642             :     }
     643             : 
     644             :     /* perform insert, start bucket search at optimal location */
     645    25889830 :     data = tb->data;
     646    25889830 :     startelem = SH_INITIAL_BUCKET(tb, hash);
     647    25889830 :     curelem = startelem;
     648             :     while (true)
     649    10202374 :     {
     650             :         uint32      curdist;
     651             :         uint32      curhash;
     652             :         uint32      curoptimal;
     653    36092204 :         SH_ELEMENT_TYPE *entry = &data[curelem];
     654             : 
     655             :         /* any empty bucket can directly be used */
     656    36092204 :         if (entry->status == SH_STATUS_EMPTY)
     657             :         {
     658     5038854 :             tb->members++;
     659     5038854 :             entry->SH_KEY = key;
     660             : #ifdef SH_STORE_HASH
     661     2330538 :             SH_GET_HASH(tb, entry) = hash;
     662             : #endif
     663     5038854 :             entry->status = SH_STATUS_IN_USE;
     664     5038854 :             *found = false;
     665     5038854 :             return entry;
     666             :         }
     667             : 
     668             :         /*
     669             :          * If the bucket is not empty, we either found a match (in which case
     670             :          * we're done), or we have to decide whether to skip over or move the
     671             :          * colliding entry. When the colliding element's distance to its
     672             :          * optimal position is smaller than the to-be-inserted entry's, we
     673             :          * shift the colliding entry (and its followers) forward by one.
     674             :          */
     675             : 
     676    31053350 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     677             :         {
     678             :             Assert(entry->status == SH_STATUS_IN_USE);
     679    20263918 :             *found = true;
     680    20263918 :             return entry;
     681             :         }
     682             : 
     683    10789432 :         curhash = SH_ENTRY_HASH(tb, entry);
     684    10789432 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     685    10789432 :         curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
     686             : 
     687    10789432 :         if (insertdist > curdist)
     688             :         {
     689      587058 :             SH_ELEMENT_TYPE *lastentry = entry;
     690      587058 :             uint32      emptyelem = curelem;
     691             :             uint32      moveelem;
     692      587058 :             int32       emptydist = 0;
     693             : 
     694             :             /* find next empty bucket */
     695             :             while (true)
     696     3012902 :             {
     697             :                 SH_ELEMENT_TYPE *emptyentry;
     698             : 
     699     3599960 :                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
     700     3599960 :                 emptyentry = &data[emptyelem];
     701             : 
     702     3599960 :                 if (emptyentry->status == SH_STATUS_EMPTY)
     703             :                 {
     704      586880 :                     lastentry = emptyentry;
     705      586880 :                     break;
     706             :                 }
     707             : 
     708             :                 /*
     709             :                  * To avoid negative consequences from overly imbalanced
     710             :                  * hashtables, grow the hashtable if collisions would require
     711             :                  * us to move a lot of entries.  The most likely cause of such
     712             :                  * imbalance is filling a (currently) small table, from a
     713             :                  * currently big one, in hash-table order.  Don't grow if the
     714             :                  * hashtable would be too empty, to prevent quick space
     715             :                  * explosion for some weird edge cases.
     716             :                  */
     717     3013080 :                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
     718         178 :                     ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     719             :                 {
     720         178 :                     tb->grow_threshold = 0;
     721         178 :                     goto restart;
     722             :                 }
     723             :             }
     724             : 
     725             :             /* shift forward, starting at last occupied element */
     726             : 
     727             :             /*
     728             :              * TODO: This could be optimized to be one memcpy in many cases,
     729             :              * excepting wrapping around at the end of ->data. Hasn't shown up
     730             :              * in profiles so far though.
     731             :              */
     732      586880 :             moveelem = emptyelem;
     733     4159962 :             while (moveelem != curelem)
     734             :             {
     735             :                 SH_ELEMENT_TYPE *moveentry;
     736             : 
     737     3573082 :                 moveelem = SH_PREV(tb, moveelem, startelem);
     738     3573082 :                 moveentry = &data[moveelem];
     739             : 
     740     3573082 :                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
     741     3573082 :                 lastentry = moveentry;
     742             :             }
     743             : 
     744             :             /* and fill the now empty spot */
     745      586880 :             tb->members++;
     746             : 
     747      586880 :             entry->SH_KEY = key;
     748             : #ifdef SH_STORE_HASH
     749      304298 :             SH_GET_HASH(tb, entry) = hash;
     750             : #endif
     751      586880 :             entry->status = SH_STATUS_IN_USE;
     752      586880 :             *found = false;
     753      586880 :             return entry;
     754             :         }
     755             : 
     756    10202374 :         curelem = SH_NEXT(tb, curelem, startelem);
     757    10202374 :         insertdist++;
     758             : 
     759             :         /*
     760             :          * To avoid negative consequences from overly imbalanced hashtables,
     761             :          * grow the hashtable if collisions lead to large runs. The most
     762             :          * likely cause of such imbalance is filling a (currently) small
     763             :          * table, from a currently big one, in hash-table order.  Don't grow
     764             :          * if the hashtable would be too empty, to prevent quick space
     765             :          * explosion for some weird edge cases.
     766             :          */
     767    10202374 :         if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
     768           0 :             ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     769             :         {
     770           0 :             tb->grow_threshold = 0;
     771           0 :             goto restart;
     772             :         }
     773             :     }
     774             : }
     775             : 
     776             : /*
     777             :  * Insert the key into the hash-table, set *found to true if the key already
     778             :  * exists, false otherwise. Returns the hash-table entry in either case.
     779             :  */
     780             : SH_SCOPE    SH_ELEMENT_TYPE *
     781    18331698 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
     782             : {
     783    18331698 :     uint32      hash = SH_HASH_KEY(tb, key);
     784             : 
     785    18331698 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     786             : }
     787             : 
     788             : /*
     789             :  * Insert the key into the hash-table using an already-calculated hash. Set
     790             :  * *found to true if the key already exists, false otherwise. Returns the
     791             :  * hash-table entry in either case.
     792             :  */
     793             : SH_SCOPE    SH_ELEMENT_TYPE *
     794     7557954 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     795             : {
     796     7557954 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     797             : }
     798             : 
     799             : /*
     800             :  * This is a separate static inline function, so it can be reliably be inlined
     801             :  * into its wrapper functions even if SH_SCOPE is extern.
     802             :  */
     803             : static inline SH_ELEMENT_TYPE *
     804    12403612 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     805             : {
     806    12403612 :     const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
     807    12403612 :     uint32      curelem = startelem;
     808             : 
     809             :     while (true)
     810     5331468 :     {
     811    17735080 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     812             : 
     813    17735080 :         if (entry->status == SH_STATUS_EMPTY)
     814             :         {
     815     3822560 :             return NULL;
     816             :         }
     817             : 
     818             :         Assert(entry->status == SH_STATUS_IN_USE);
     819             : 
     820    13912520 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     821     8581052 :             return entry;
     822             : 
     823             :         /*
     824             :          * TODO: we could stop search based on distance. If the current
     825             :          * buckets's distance-from-optimal is smaller than what we've skipped
     826             :          * already, the entry doesn't exist. Probably only do so if
     827             :          * SH_STORE_HASH is defined, to avoid re-computing hashes?
     828             :          */
     829             : 
     830     5331468 :         curelem = SH_NEXT(tb, curelem, startelem);
     831             :     }
     832             : }
     833             : 
     834             : /*
     835             :  * Lookup entry in hash table.  Returns NULL if key not present.
     836             :  */
     837             : SH_SCOPE    SH_ELEMENT_TYPE *
     838    10670002 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
     839             : {
     840    10670002 :     uint32      hash = SH_HASH_KEY(tb, key);
     841             : 
     842    10670002 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     843             : }
     844             : 
     845             : /*
     846             :  * Lookup entry in hash table using an already-calculated hash.
     847             :  *
     848             :  * Returns NULL if key not present.
     849             :  */
     850             : SH_SCOPE    SH_ELEMENT_TYPE *
     851     1733610 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     852             : {
     853     1733610 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     854             : }
     855             : 
     856             : /*
     857             :  * Delete entry from hash table by key.  Returns whether to-be-deleted key was
     858             :  * present.
     859             :  */
     860             : SH_SCOPE bool
     861     2515772 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
     862             : {
     863     2515772 :     uint32      hash = SH_HASH_KEY(tb, key);
     864     2515772 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     865     2515772 :     uint32      curelem = startelem;
     866             : 
     867             :     while (true)
     868      802168 :     {
     869     3317940 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     870             : 
     871     3317940 :         if (entry->status == SH_STATUS_EMPTY)
     872      145642 :             return false;
     873             : 
     874     6298968 :         if (entry->status == SH_STATUS_IN_USE &&
     875     3172298 :             SH_COMPARE_KEYS(tb, hash, key, entry))
     876             :         {
     877     2370130 :             SH_ELEMENT_TYPE *lastentry = entry;
     878             : 
     879     2370130 :             tb->members--;
     880             : 
     881             :             /*
     882             :              * Backward shift following elements till either an empty element
     883             :              * or an element at its optimal position is encountered.
     884             :              *
     885             :              * While that sounds expensive, the average chain length is short,
     886             :              * and deletions would otherwise require tombstones.
     887             :              */
     888             :             while (true)
     889      136366 :             {
     890             :                 SH_ELEMENT_TYPE *curentry;
     891             :                 uint32      curhash;
     892             :                 uint32      curoptimal;
     893             : 
     894     2506496 :                 curelem = SH_NEXT(tb, curelem, startelem);
     895     2506496 :                 curentry = &tb->data[curelem];
     896             : 
     897     2506496 :                 if (curentry->status != SH_STATUS_IN_USE)
     898             :                 {
     899     2292996 :                     lastentry->status = SH_STATUS_EMPTY;
     900     2292996 :                     break;
     901             :                 }
     902             : 
     903      213500 :                 curhash = SH_ENTRY_HASH(tb, curentry);
     904      213500 :                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     905             : 
     906             :                 /* current is at optimal position, done */
     907      213500 :                 if (curoptimal == curelem)
     908             :                 {
     909       77134 :                     lastentry->status = SH_STATUS_EMPTY;
     910       77134 :                     break;
     911             :                 }
     912             : 
     913             :                 /* shift */
     914      136366 :                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     915             : 
     916      136366 :                 lastentry = curentry;
     917             :             }
     918             : 
     919     2370130 :             return true;
     920             :         }
     921             : 
     922             :         /* TODO: return false; if distance too big */
     923             : 
     924      802168 :         curelem = SH_NEXT(tb, curelem, startelem);
     925             :     }
     926             : }
     927             : 
     928             : /*
     929             :  * Delete entry from hash table by entry pointer
     930             :  */
     931             : SH_SCOPE void
     932        2388 : SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     933             : {
     934        2388 :     SH_ELEMENT_TYPE *lastentry = entry;
     935        2388 :     uint32      hash = SH_ENTRY_HASH(tb, entry);
     936        2388 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     937             :     uint32      curelem;
     938             : 
     939             :     /* Calculate the index of 'entry' */
     940        2388 :     curelem = entry - &tb->data[0];
     941             : 
     942        2388 :     tb->members--;
     943             : 
     944             :     /*
     945             :      * Backward shift following elements till either an empty element or an
     946             :      * element at its optimal position is encountered.
     947             :      *
     948             :      * While that sounds expensive, the average chain length is short, and
     949             :      * deletions would otherwise require tombstones.
     950             :      */
     951             :     while (true)
     952        4776 :     {
     953             :         SH_ELEMENT_TYPE *curentry;
     954             :         uint32      curhash;
     955             :         uint32      curoptimal;
     956             : 
     957        7164 :         curelem = SH_NEXT(tb, curelem, startelem);
     958        7164 :         curentry = &tb->data[curelem];
     959             : 
     960        7164 :         if (curentry->status != SH_STATUS_IN_USE)
     961             :         {
     962        1230 :             lastentry->status = SH_STATUS_EMPTY;
     963        1230 :             break;
     964             :         }
     965             : 
     966        5934 :         curhash = SH_ENTRY_HASH(tb, curentry);
     967        5934 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     968             : 
     969             :         /* current is at optimal position, done */
     970        5934 :         if (curoptimal == curelem)
     971             :         {
     972        1158 :             lastentry->status = SH_STATUS_EMPTY;
     973        1158 :             break;
     974             :         }
     975             : 
     976             :         /* shift */
     977        4776 :         memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     978             : 
     979        4776 :         lastentry = curentry;
     980             :     }
     981        2388 : }
     982             : 
     983             : /*
     984             :  * Initialize iterator.
     985             :  */
     986             : SH_SCOPE void
     987      206046 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     988             : {
     989      206046 :     uint64      startelem = PG_UINT64_MAX;
     990             : 
     991             :     /*
     992             :      * Search for the first empty element. As deletions during iterations are
     993             :      * supported, we want to start/end at an element that cannot be affected
     994             :      * by elements being shifted.
     995             :      */
     996      323150 :     for (uint32 i = 0; i < tb->size; i++)
     997             :     {
     998      323150 :         SH_ELEMENT_TYPE *entry = &tb->data[i];
     999             : 
    1000      323150 :         if (entry->status != SH_STATUS_IN_USE)
    1001             :         {
    1002      206046 :             startelem = i;
    1003      206046 :             break;
    1004             :         }
    1005             :     }
    1006             : 
    1007             :     /* we should have found an empty element */
    1008             :     Assert(startelem < SH_MAX_SIZE);
    1009             : 
    1010             :     /*
    1011             :      * Iterate backwards, that allows the current element to be deleted, even
    1012             :      * if there are backward shifts
    1013             :      */
    1014      206046 :     iter->cur = startelem;
    1015      206046 :     iter->end = iter->cur;
    1016      206046 :     iter->done = false;
    1017      206046 : }
    1018             : 
    1019             : /*
    1020             :  * Initialize iterator to a specific bucket. That's really only useful for
    1021             :  * cases where callers are partially iterating over the hashspace, and that
    1022             :  * iteration deletes and inserts elements based on visited entries. Doing that
    1023             :  * repeatedly could lead to an unbalanced keyspace when always starting at the
    1024             :  * same position.
    1025             :  */
    1026             : SH_SCOPE void
    1027          36 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
    1028             : {
    1029             :     /*
    1030             :      * Iterate backwards, that allows the current element to be deleted, even
    1031             :      * if there are backward shifts.
    1032             :      */
    1033          36 :     iter->cur = at & tb->sizemask;    /* ensure at is within a valid range */
    1034          36 :     iter->end = iter->cur;
    1035          36 :     iter->done = false;
    1036          36 : }
    1037             : 
    1038             : /*
    1039             :  * Iterate over all entries in the hash-table. Return the next occupied entry,
    1040             :  * or NULL if done.
    1041             :  *
    1042             :  * During iteration the current entry in the hash table may be deleted,
    1043             :  * without leading to elements being skipped or returned twice.  Additionally
    1044             :  * the rest of the table may be modified (i.e. there can be insertions or
    1045             :  * deletions), but if so, there's neither a guarantee that all nodes are
    1046             :  * visited at least once, nor a guarantee that a node is visited at most once.
    1047             :  */
    1048             : SH_SCOPE    SH_ELEMENT_TYPE *
    1049     4693598 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
    1050             : {
    1051             :     /* validate sanity of the given iterator */
    1052             :     Assert(iter->cur < tb->size);
    1053             :     Assert(iter->end < tb->size);
    1054             : 
    1055    23735904 :     while (!iter->done)
    1056             :     {
    1057             :         SH_ELEMENT_TYPE *elem;
    1058             : 
    1059    23529980 :         elem = &tb->data[iter->cur];
    1060             : 
    1061             :         /* next element in backward direction */
    1062    23529980 :         iter->cur = (iter->cur - 1) & tb->sizemask;
    1063             : 
    1064    23529980 :         if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
    1065      205924 :             iter->done = true;
    1066    23529980 :         if (elem->status == SH_STATUS_IN_USE)
    1067             :         {
    1068     4487674 :             return elem;
    1069             :         }
    1070             :     }
    1071             : 
    1072      205924 :     return NULL;
    1073             : }
    1074             : 
    1075             : /*
    1076             :  * Estimate the amount of space needed for a hashtable with nentries entries.
    1077             :  * Return SIZE_MAX if that's too many entries.
    1078             :  *
    1079             :  * nentries is "double" because this is meant for use by the planner,
    1080             :  * which typically works with double rowcount estimates.  So we'd need to
    1081             :  * clamp to integer somewhere and that might as well be here.  We do expect
    1082             :  * the value not to be NaN or negative, else the result will be garbage.
    1083             :  */
    1084             : SH_SCOPE size_t
    1085        4786 : SH_ESTIMATE_SPACE(double nentries)
    1086             : {
    1087             :     uint64      size;
    1088             :     uint64      space;
    1089             : 
    1090             :     /* scale request by SH_FILLFACTOR, as SH_CREATE does */
    1091        4786 :     nentries = nentries / SH_FILLFACTOR;
    1092             : 
    1093             :     /* fail if we'd overrun SH_MAX_SIZE entries */
    1094        4786 :     if (nentries >= SH_MAX_SIZE)
    1095           6 :         return SIZE_MAX;
    1096             : 
    1097             :     /* should be safe to convert to uint64 */
    1098        4780 :     size = (uint64) nentries;
    1099             : 
    1100             :     /* supporting zero sized hashes would complicate matters */
    1101        4780 :     size = Max(size, 2);
    1102             : 
    1103             :     /* round up size to the next power of 2, that's how bucketing works */
    1104        4780 :     size = pg_nextpower2_64(size);
    1105             : 
    1106             :     /* calculate space needed for ->data */
    1107        4780 :     space = ((uint64) sizeof(SH_ELEMENT_TYPE)) * size;
    1108             : 
    1109             :     /* verify that allocation of ->data is possible on this platform */
    1110        4780 :     if (space >= SIZE_MAX / 2)
    1111           0 :         return SIZE_MAX;
    1112             : 
    1113        4780 :     return (size_t) space + sizeof(SH_TYPE);
    1114             : }
    1115             : 
    1116             : /*
    1117             :  * Report some statistics about the state of the hashtable. For
    1118             :  * debugging/profiling purposes only.
    1119             :  */
    1120             : SH_SCOPE void
    1121           0 : SH_STAT(SH_TYPE * tb)
    1122             : {
    1123           0 :     uint32      max_chain_length = 0;
    1124           0 :     uint32      total_chain_length = 0;
    1125             :     double      avg_chain_length;
    1126             :     double      fillfactor;
    1127             :     uint32      i;
    1128             : 
    1129           0 :     uint32     *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
    1130           0 :     uint32      total_collisions = 0;
    1131           0 :     uint32      max_collisions = 0;
    1132             :     double      avg_collisions;
    1133             : 
    1134           0 :     for (i = 0; i < tb->size; i++)
    1135             :     {
    1136             :         uint32      hash;
    1137             :         uint32      optimal;
    1138             :         uint32      dist;
    1139             :         SH_ELEMENT_TYPE *elem;
    1140             : 
    1141           0 :         elem = &tb->data[i];
    1142             : 
    1143           0 :         if (elem->status != SH_STATUS_IN_USE)
    1144           0 :             continue;
    1145             : 
    1146           0 :         hash = SH_ENTRY_HASH(tb, elem);
    1147           0 :         optimal = SH_INITIAL_BUCKET(tb, hash);
    1148           0 :         dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
    1149             : 
    1150           0 :         if (dist > max_chain_length)
    1151           0 :             max_chain_length = dist;
    1152           0 :         total_chain_length += dist;
    1153             : 
    1154           0 :         collisions[optimal]++;
    1155             :     }
    1156             : 
    1157           0 :     for (i = 0; i < tb->size; i++)
    1158             :     {
    1159           0 :         uint32      curcoll = collisions[i];
    1160             : 
    1161           0 :         if (curcoll == 0)
    1162           0 :             continue;
    1163             : 
    1164             :         /* single contained element is not a collision */
    1165           0 :         curcoll--;
    1166           0 :         total_collisions += curcoll;
    1167           0 :         if (curcoll > max_collisions)
    1168           0 :             max_collisions = curcoll;
    1169             :     }
    1170             : 
    1171             :     /* large enough to be worth freeing, even if just used for debugging */
    1172           0 :     pfree(collisions);
    1173             : 
    1174           0 :     if (tb->members > 0)
    1175             :     {
    1176           0 :         fillfactor = tb->members / ((double) tb->size);
    1177           0 :         avg_chain_length = ((double) total_chain_length) / tb->members;
    1178           0 :         avg_collisions = ((double) total_collisions) / tb->members;
    1179             :     }
    1180             :     else
    1181             :     {
    1182           0 :         fillfactor = 0;
    1183           0 :         avg_chain_length = 0;
    1184           0 :         avg_collisions = 0;
    1185             :     }
    1186             : 
    1187           0 :     sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
    1188             :            tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
    1189             :            total_collisions, max_collisions, avg_collisions);
    1190           0 : }
    1191             : 
    1192             : #endif                          /* SH_DEFINE */
    1193             : 
    1194             : 
    1195             : /* undefine external parameters, so next hash table can be defined */
    1196             : #undef SH_PREFIX
    1197             : #undef SH_KEY_TYPE
    1198             : #undef SH_KEY
    1199             : #undef SH_ELEMENT_TYPE
    1200             : #undef SH_HASH_KEY
    1201             : #undef SH_SCOPE
    1202             : #undef SH_DECLARE
    1203             : #undef SH_DEFINE
    1204             : #undef SH_GET_HASH
    1205             : #undef SH_STORE_HASH
    1206             : #undef SH_USE_NONDEFAULT_ALLOCATOR
    1207             : #undef SH_EQUAL
    1208             : 
    1209             : /* undefine locally declared macros */
    1210             : #undef SH_MAKE_PREFIX
    1211             : #undef SH_MAKE_NAME
    1212             : #undef SH_MAKE_NAME_
    1213             : #undef SH_FILLFACTOR
    1214             : #undef SH_MAX_FILLFACTOR
    1215             : #undef SH_GROW_MAX_DIB
    1216             : #undef SH_GROW_MAX_MOVE
    1217             : #undef SH_GROW_MIN_FILLFACTOR
    1218             : #undef SH_MAX_SIZE
    1219             : 
    1220             : /* types */
    1221             : #undef SH_TYPE
    1222             : #undef SH_STATUS
    1223             : #undef SH_STATUS_EMPTY
    1224             : #undef SH_STATUS_IN_USE
    1225             : #undef SH_ITERATOR
    1226             : 
    1227             : /* external function names */
    1228             : #undef SH_CREATE
    1229             : #undef SH_DESTROY
    1230             : #undef SH_RESET
    1231             : #undef SH_INSERT
    1232             : #undef SH_INSERT_HASH
    1233             : #undef SH_DELETE_ITEM
    1234             : #undef SH_DELETE
    1235             : #undef SH_LOOKUP
    1236             : #undef SH_LOOKUP_HASH
    1237             : #undef SH_GROW
    1238             : #undef SH_START_ITERATE
    1239             : #undef SH_START_ITERATE_AT
    1240             : #undef SH_ITERATE
    1241             : #undef SH_ALLOCATE
    1242             : #undef SH_FREE
    1243             : #undef SH_ESTIMATE_SPACE
    1244             : #undef SH_STAT
    1245             : 
    1246             : /* internal function names */
    1247             : #undef SH_COMPUTE_SIZE
    1248             : #undef SH_UPDATE_PARAMETERS
    1249             : #undef SH_COMPARE_KEYS
    1250             : #undef SH_INITIAL_BUCKET
    1251             : #undef SH_NEXT
    1252             : #undef SH_PREV
    1253             : #undef SH_DISTANCE_FROM_OPTIMAL
    1254             : #undef SH_ENTRY_HASH
    1255             : #undef SH_INSERT_HASH_INTERNAL
    1256             : #undef SH_LOOKUP_HASH_INTERNAL

Generated by: LCOV version 1.16