LCOV - code coverage report
Current view: top level - src/include/lib - simplehash.h (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 205 250 82.0 %
Date: 2019-08-24 16:07:17 Functions: 36 42 85.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * simplehash.h
       3             :  *
       4             :  *    Hash table implementation which will be specialized to user-defined
       5             :  *    types, by including this file to generate the required code.  It's
       6             :  *    probably not worthwhile to do so for hash tables that aren't performance
       7             :  *    or space sensitive.
       8             :  *
       9             :  * Usage notes:
      10             :  *
      11             :  *    To generate a hash-table and associated functions for a use case several
      12             :  *    macros have to be #define'ed before this file is included.  Including
      13             :  *    the file #undef's all those, so a new hash table can be generated
      14             :  *    afterwards.
      15             :  *    The relevant parameters are:
      16             :  *    - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
      17             :  *      will result in hash table type 'foo_hash' and functions like
      18             :  *      'foo_insert'/'foo_lookup' and so forth.
      19             :  *    - SH_ELEMENT_TYPE - type of the contained elements
      20             :  *    - SH_KEY_TYPE - type of the hashtable's key
      21             :  *    - SH_DECLARE - if defined function prototypes and type declarations are
      22             :  *      generated
      23             :  *    - SH_DEFINE - if defined function definitions are generated
      24             :  *    - SH_SCOPE - in which scope (e.g. extern, static inline) do function
      25             :  *      declarations reside
      26             :  *    - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
      27             :  *      are defined, so you can supply your own
      28             :  *    The following parameters are only relevant when SH_DEFINE is defined:
      29             :  *    - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
      30             :  *    - SH_EQUAL(table, a, b) - compare two table keys
      31             :  *    - SH_HASH_KEY(table, key) - generate hash for the key
      32             :  *    - SH_STORE_HASH - if defined the hash is stored in the elements
      33             :  *    - SH_GET_HASH(tb, a) - return the field to store the hash in
      34             :  *
      35             :  *    For examples of usage look at tidbitmap.c (file local definition) and
      36             :  *    execnodes.h/execGrouping.c (exposed declaration, file local
      37             :  *    implementation).
      38             :  *
      39             :  * Hash table design:
      40             :  *
      41             :  *    The hash table design chosen is a variant of linear open-addressing. The
      42             :  *    reason for doing so is that linear addressing is CPU cache & pipeline
      43             :  *    friendly. The biggest disadvantage of simple linear addressing schemes
      44             :  *    are highly variable lookup times due to clustering, and deletions
      45             :  *    leaving a lot of tombstones around.  To address these issues a variant
      46             :  *    of "robin hood" hashing is employed.  Robin hood hashing optimizes
      47             :  *    chaining lengths by moving elements close to their optimal bucket
      48             :  *    ("rich" elements), out of the way if a to-be-inserted element is further
      49             :  *    away from its optimal position (i.e. it's "poor").  While that can make
      50             :  *    insertions slower, the average lookup performance is a lot better, and
      51             :  *    higher fill factors can be used in a still performant manner.  To avoid
      52             :  *    tombstones - which normally solve the issue that a deleted node's
      53             :  *    presence is relevant to determine whether a lookup needs to continue
      54             :  *    looking or is done - buckets following a deleted element are shifted
      55             :  *    backwards, unless they're empty or already at their optimal position.
      56             :  */
      57             : 
      58             : /* helpers */
      59             : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
      60             : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
      61             : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
      62             : 
      63             : /* name macros for: */
      64             : 
      65             : /* type declarations */
      66             : #define SH_TYPE SH_MAKE_NAME(hash)
      67             : #define SH_STATUS SH_MAKE_NAME(status)
      68             : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
      69             : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
      70             : #define SH_ITERATOR SH_MAKE_NAME(iterator)
      71             : 
      72             : /* function declarations */
      73             : #define SH_CREATE SH_MAKE_NAME(create)
      74             : #define SH_DESTROY SH_MAKE_NAME(destroy)
      75             : #define SH_RESET SH_MAKE_NAME(reset)
      76             : #define SH_INSERT SH_MAKE_NAME(insert)
      77             : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
      78             : #define SH_DELETE SH_MAKE_NAME(delete)
      79             : #define SH_LOOKUP SH_MAKE_NAME(lookup)
      80             : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
      81             : #define SH_GROW SH_MAKE_NAME(grow)
      82             : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
      83             : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
      84             : #define SH_ITERATE SH_MAKE_NAME(iterate)
      85             : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
      86             : #define SH_FREE SH_MAKE_NAME(free)
      87             : #define SH_STAT SH_MAKE_NAME(stat)
      88             : 
      89             : /* internal helper functions (no externally visible prototypes) */
      90             : #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
      91             : #define SH_NEXT SH_MAKE_NAME(next)
      92             : #define SH_PREV SH_MAKE_NAME(prev)
      93             : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
      94             : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
      95             : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
      96             : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
      97             : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
      98             : 
      99             : /* generate forward declarations necessary to use the hash table */
     100             : #ifdef SH_DECLARE
     101             : 
     102             : /* type definitions */
     103             : typedef struct SH_TYPE
     104             : {
     105             :     /*
     106             :      * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
     107             :      * tables.  Note that the maximum number of elements is lower
     108             :      * (SH_MAX_FILLFACTOR)
     109             :      */
     110             :     uint64      size;
     111             : 
     112             :     /* how many elements have valid contents */
     113             :     uint32      members;
     114             : 
     115             :     /* mask for bucket and size calculations, based on size */
     116             :     uint32      sizemask;
     117             : 
     118             :     /* boundary after which to grow hashtable */
     119             :     uint32      grow_threshold;
     120             : 
     121             :     /* hash buckets */
     122             :     SH_ELEMENT_TYPE *data;
     123             : 
     124             :     /* memory context to use for allocations */
     125             :     MemoryContext ctx;
     126             : 
     127             :     /* user defined data, useful for callbacks */
     128             :     void       *private_data;
     129             : }           SH_TYPE;
     130             : 
     131             : typedef enum SH_STATUS
     132             : {
     133             :     SH_STATUS_EMPTY = 0x00,
     134             :     SH_STATUS_IN_USE = 0x01
     135             : } SH_STATUS;
     136             : 
     137             : typedef struct SH_ITERATOR
     138             : {
     139             :     uint32      cur;            /* current element */
     140             :     uint32      end;
     141             :     bool        done;           /* iterator exhausted? */
     142             : }           SH_ITERATOR;
     143             : 
     144             : /* externally visible function prototypes */
     145             : SH_SCOPE    SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
     146             :                                void *private_data);
     147             : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
     148             : SH_SCOPE void SH_RESET(SH_TYPE * tb);
     149             : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
     150             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
     151             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     152             :                                             uint32 hash, bool *found);
     153             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
     154             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     155             :                                             uint32 hash);
     156             : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
     157             : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     158             : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
     159             : SH_SCOPE    SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     160             : SH_SCOPE void SH_STAT(SH_TYPE * tb);
     161             : 
     162             : #endif                          /* SH_DECLARE */
     163             : 
     164             : 
     165             : /* generate implementation of the hash table */
     166             : #ifdef SH_DEFINE
     167             : 
     168             : #include "utils/memutils.h"
     169             : 
     170             : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
     171             : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
     172             : 
     173             : /* normal fillfactor, unless already close to maximum */
     174             : #ifndef SH_FILLFACTOR
     175             : #define SH_FILLFACTOR (0.9)
     176             : #endif
     177             : /* increase fillfactor if we otherwise would error out */
     178             : #define SH_MAX_FILLFACTOR (0.98)
     179             : /* grow if actual and optimal location bigger than */
     180             : #ifndef SH_GROW_MAX_DIB
     181             : #define SH_GROW_MAX_DIB 25
     182             : #endif
     183             : /* grow if more than elements to move when inserting */
     184             : #ifndef SH_GROW_MAX_MOVE
     185             : #define SH_GROW_MAX_MOVE 150
     186             : #endif
     187             : #ifndef SH_GROW_MIN_FILLFACTOR
     188             : /* but do not grow due to SH_GROW_MAX_* if below */
     189             : #define SH_GROW_MIN_FILLFACTOR 0.1
     190             : #endif
     191             : 
     192             : #ifdef SH_STORE_HASH
     193             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
     194             : #else
     195             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
     196             : #endif
     197             : 
     198             : /*
     199             :  * Wrap the following definitions in include guards, to avoid multiple
     200             :  * definition errors if this header is included more than once.  The rest of
     201             :  * the file deliberately has no include guards, because it can be included
     202             :  * with different parameters to define functions and types with non-colliding
     203             :  * names.
     204             :  */
     205             : #ifndef SIMPLEHASH_H
     206             : #define SIMPLEHASH_H
     207             : 
     208             : /* FIXME: can we move these to a central location? */
     209             : 
     210             : /* calculate ceil(log base 2) of num */
     211             : static inline uint64
     212        7434 : sh_log2(uint64 num)
     213             : {
     214             :     int         i;
     215             :     uint64      limit;
     216             : 
     217        7434 :     for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
     218             :         ;
     219        7434 :     return i;
     220             : }
     221             : 
     222             : /* calculate first power of 2 >= num */
     223             : static inline uint64
     224        7434 : sh_pow2(uint64 num)
     225             : {
     226        7434 :     return ((uint64) 1) << sh_log2(num);
     227             : }
     228             : 
     229             : #endif
     230             : 
     231             : /*
     232             :  * Compute sizing parameters for hashtable. Called when creating and growing
     233             :  * the hashtable.
     234             :  */
     235             : static inline void
     236        7434 : SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
     237             : {
     238             :     uint64      size;
     239             : 
     240             :     /* supporting zero sized hashes would complicate matters */
     241        7434 :     size = Max(newsize, 2);
     242             : 
     243             :     /* round up size to the next power of 2, that's how bucketing works */
     244        7434 :     size = sh_pow2(size);
     245             :     Assert(size <= SH_MAX_SIZE);
     246             : 
     247             :     /*
     248             :      * Verify that allocation of ->data is possible on this platform, without
     249             :      * overflowing Size.
     250             :      */
     251        7434 :     if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
     252           0 :         elog(ERROR, "hash table too large");
     253             : 
     254             :     /* now set size */
     255        7434 :     tb->size = size;
     256             : 
     257        7434 :     if (tb->size == SH_MAX_SIZE)
     258           0 :         tb->sizemask = 0;
     259             :     else
     260        7434 :         tb->sizemask = tb->size - 1;
     261             : 
     262             :     /*
     263             :      * Compute the next threshold at which we need to grow the hash table
     264             :      * again.
     265             :      */
     266        7434 :     if (tb->size == SH_MAX_SIZE)
     267           0 :         tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
     268             :     else
     269        7434 :         tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
     270        7434 : }
     271             : 
     272             : /* return the optimal bucket for the hash */
     273             : static inline uint32
     274     8942820 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
     275             : {
     276     8942820 :     return hash & tb->sizemask;
     277             : }
     278             : 
     279             : /* return next bucket after the current, handling wraparound */
     280             : static inline uint32
     281     3742736 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     282             : {
     283     3742736 :     curelem = (curelem + 1) & tb->sizemask;
     284             : 
     285             :     Assert(curelem != startelem);
     286             : 
     287     3742736 :     return curelem;
     288             : }
     289             : 
     290             : /* return bucket before the current, handling wraparound */
     291             : static inline uint32
     292      441834 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     293             : {
     294      441834 :     curelem = (curelem - 1) & tb->sizemask;
     295             : 
     296             :     Assert(curelem != startelem);
     297             : 
     298      441834 :     return curelem;
     299             : }
     300             : 
     301             : /* return distance between bucket and its optimal position */
     302             : static inline uint32
     303     2374628 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
     304             : {
     305     2374628 :     if (optimal <= bucket)
     306     2364146 :         return bucket - optimal;
     307             :     else
     308       10482 :         return (tb->size + bucket) - optimal;
     309             : }
     310             : 
     311             : static inline uint32
     312     2510390 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     313             : {
     314             : #ifdef SH_STORE_HASH
     315     1322318 :     return SH_GET_HASH(tb, entry);
     316             : #else
     317     1188072 :     return SH_HASH_KEY(tb, entry->SH_KEY);
     318             : #endif
     319             : }
     320             : 
     321             : /* default memory allocator function */
     322             : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
     323             : static inline void SH_FREE(SH_TYPE * type, void *pointer);
     324             : 
     325             : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
     326             : 
     327             : /* default memory allocator function */
     328             : static inline void *
     329        4582 : SH_ALLOCATE(SH_TYPE * type, Size size)
     330             : {
     331        4582 :     return MemoryContextAllocExtended(type->ctx, size,
     332             :                                       MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
     333             : }
     334             : 
     335             : /* default memory free function */
     336             : static inline void
     337         704 : SH_FREE(SH_TYPE * type, void *pointer)
     338             : {
     339         704 :     pfree(pointer);
     340         704 : }
     341             : 
     342             : #endif
     343             : 
     344             : /*
     345             :  * Create a hash table with enough space for `nelements` distinct members.
     346             :  * Memory for the hash table is allocated from the passed-in context.  If
     347             :  * desired, the array of elements can be allocated using a passed-in allocator;
     348             :  * this could be useful in order to place the array of elements in a shared
     349             :  * memory, or in a context that will outlive the rest of the hash table.
     350             :  * Memory other than for the array of elements will still be allocated from
     351             :  * the passed-in context.
     352             :  */
     353             : SH_SCOPE    SH_TYPE *
     354        6626 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
     355             : {
     356             :     SH_TYPE    *tb;
     357             :     uint64      size;
     358             : 
     359        6626 :     tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
     360        6626 :     tb->ctx = ctx;
     361        6626 :     tb->private_data = private_data;
     362             : 
     363             :     /* increase nelements by fillfactor, want to store nelements elements */
     364        6626 :     size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
     365             : 
     366        6626 :     SH_COMPUTE_PARAMETERS(tb, size);
     367             : 
     368        6626 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     369             : 
     370        6626 :     return tb;
     371             : }
     372             : 
     373             : /* destroy a previously created hash table */
     374             : SH_SCOPE void
     375        2748 : SH_DESTROY(SH_TYPE * tb)
     376             : {
     377        2748 :     SH_FREE(tb, tb->data);
     378        2748 :     pfree(tb);
     379        2748 : }
     380             : 
     381             : /* reset the contents of a previously created hash table */
     382             : SH_SCOPE void
     383       36536 : SH_RESET(SH_TYPE * tb)
     384             : {
     385       36536 :     memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
     386       36536 :     tb->members = 0;
     387       36536 : }
     388             : 
     389             : /*
     390             :  * Grow a hash table to at least `newsize` buckets.
     391             :  *
     392             :  * Usually this will automatically be called by insertions/deletions, when
     393             :  * necessary. But resizing to the exact input size can be advantageous
     394             :  * performance-wise, when known at some point.
     395             :  */
     396             : SH_SCOPE void
     397         808 : SH_GROW(SH_TYPE * tb, uint32 newsize)
     398             : {
     399         808 :     uint64      oldsize = tb->size;
     400         808 :     SH_ELEMENT_TYPE *olddata = tb->data;
     401             :     SH_ELEMENT_TYPE *newdata;
     402             :     uint32      i;
     403         808 :     uint32      startelem = 0;
     404             :     uint32      copyelem;
     405             : 
     406             :     Assert(oldsize == sh_pow2(oldsize));
     407             :     Assert(oldsize != SH_MAX_SIZE);
     408             :     Assert(oldsize < newsize);
     409             : 
     410             :     /* compute parameters for new table */
     411         808 :     SH_COMPUTE_PARAMETERS(tb, newsize);
     412             : 
     413         808 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     414             : 
     415         808 :     newdata = tb->data;
     416             : 
     417             :     /*
     418             :      * Copy entries from the old data to newdata. We theoretically could use
     419             :      * SH_INSERT here, to avoid code duplication, but that's more general than
     420             :      * we need. We neither want tb->members increased, nor do we need to do
     421             :      * deal with deleted elements, nor do we need to compare keys. So a
     422             :      * special-cased implementation is lot faster. As resizing can be time
     423             :      * consuming and frequent, that's worthwhile to optimize.
     424             :      *
     425             :      * To be able to simply move entries over, we have to start not at the
     426             :      * first bucket (i.e olddata[0]), but find the first bucket that's either
     427             :      * empty, or is occupied by an entry at its optimal position. Such a
     428             :      * bucket has to exist in any table with a load factor under 1, as not all
     429             :      * buckets are occupied, i.e. there always has to be an empty bucket.  By
     430             :      * starting at such a bucket we can move the entries to the larger table,
     431             :      * without having to deal with conflicts.
     432             :      */
     433             : 
     434             :     /* search for the first element in the hash that's not wrapped around */
     435        5506 :     for (i = 0; i < oldsize; i++)
     436             :     {
     437        5506 :         SH_ELEMENT_TYPE *oldentry = &olddata[i];
     438             :         uint32      hash;
     439             :         uint32      optimal;
     440             : 
     441        5506 :         if (oldentry->status != SH_STATUS_IN_USE)
     442             :         {
     443         410 :             startelem = i;
     444         410 :             break;
     445             :         }
     446             : 
     447        5096 :         hash = SH_ENTRY_HASH(tb, oldentry);
     448        5096 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     449             : 
     450        5096 :         if (optimal == i)
     451             :         {
     452         398 :             startelem = i;
     453         398 :             break;
     454             :         }
     455             :     }
     456             : 
     457             :     /* and copy all elements in the old table */
     458         808 :     copyelem = startelem;
     459      148148 :     for (i = 0; i < oldsize; i++)
     460             :     {
     461      147340 :         SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
     462             : 
     463      147340 :         if (oldentry->status == SH_STATUS_IN_USE)
     464             :         {
     465             :             uint32      hash;
     466             :             uint32      startelem;
     467             :             uint32      curelem;
     468             :             SH_ELEMENT_TYPE *newentry;
     469             : 
     470      130522 :             hash = SH_ENTRY_HASH(tb, oldentry);
     471      130522 :             startelem = SH_INITIAL_BUCKET(tb, hash);
     472      130522 :             curelem = startelem;
     473             : 
     474             :             /* find empty element to put data into */
     475             :             while (true)
     476             :             {
     477      230830 :                 newentry = &newdata[curelem];
     478             : 
     479      180676 :                 if (newentry->status == SH_STATUS_EMPTY)
     480             :                 {
     481      130522 :                     break;
     482             :                 }
     483             : 
     484       50154 :                 curelem = SH_NEXT(tb, curelem, startelem);
     485             :             }
     486             : 
     487             :             /* copy entry to new slot */
     488      130522 :             memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
     489             :         }
     490             : 
     491             :         /* can't use SH_NEXT here, would use new size */
     492      147340 :         copyelem++;
     493      147340 :         if (copyelem >= oldsize)
     494             :         {
     495         808 :             copyelem = 0;
     496             :         }
     497             :     }
     498             : 
     499         808 :     SH_FREE(tb, olddata);
     500         808 : }
     501             : 
     502             : /*
     503             :  * This is a separate static inline function, so it can be reliably be inlined
     504             :  * into its wrapper functions even if SH_SCOPE is extern.
     505             :  */
     506             : static inline   SH_ELEMENT_TYPE *
     507     5701084 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     508             : {
     509             :     uint32      startelem;
     510             :     uint32      curelem;
     511             :     SH_ELEMENT_TYPE *data;
     512             :     uint32      insertdist;
     513             : 
     514             : restart:
     515     5701084 :     insertdist = 0;
     516             : 
     517             :     /*
     518             :      * We do the grow check even if the key is actually present, to avoid
     519             :      * doing the check inside the loop. This also lets us avoid having to
     520             :      * re-find our position in the hashtable after resizing.
     521             :      *
     522             :      * Note that this also reached when resizing the table due to
     523             :      * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
     524             :      */
     525     5701084 :     if (unlikely(tb->members >= tb->grow_threshold))
     526             :     {
     527         808 :         if (tb->size == SH_MAX_SIZE)
     528             :         {
     529           0 :             elog(ERROR, "hash table size exceeded");
     530             :         }
     531             : 
     532             :         /*
     533             :          * When optimizing, it can be very useful to print these out.
     534             :          */
     535             :         /* SH_STAT(tb); */
     536         808 :         SH_GROW(tb, tb->size * 2);
     537             :         /* SH_STAT(tb); */
     538             :     }
     539             : 
     540             :     /* perform insert, start bucket search at optimal location */
     541     5701084 :     data = tb->data;
     542     5701084 :     startelem = SH_INITIAL_BUCKET(tb, hash);
     543     5701084 :     curelem = startelem;
     544             :     while (true)
     545     2309250 :     {
     546             :         uint32      curdist;
     547             :         uint32      curhash;
     548             :         uint32      curoptimal;
     549     8010334 :         SH_ELEMENT_TYPE *entry = &data[curelem];
     550             : 
     551             :         /* any empty bucket can directly be used */
     552     8010334 :         if (entry->status == SH_STATUS_EMPTY)
     553             :         {
     554      345466 :             tb->members++;
     555      345466 :             entry->SH_KEY = key;
     556             : #ifdef SH_STORE_HASH
     557      290570 :             SH_GET_HASH(tb, entry) = hash;
     558             : #endif
     559      345466 :             entry->status = SH_STATUS_IN_USE;
     560      345466 :             *found = false;
     561      345466 :             return entry;
     562             :         }
     563             : 
     564             :         /*
     565             :          * If the bucket is not empty, we either found a match (in which case
     566             :          * we're done), or we have to decide whether to skip over or move the
     567             :          * colliding entry. When the colliding element's distance to its
     568             :          * optimal position is smaller than the to-be-inserted entry's, we
     569             :          * shift the colliding entry (and its followers) forward by one.
     570             :          */
     571             : 
     572     7664868 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     573             :         {
     574             :             Assert(entry->status == SH_STATUS_IN_USE);
     575     5290240 :             *found = true;
     576     5290240 :             return entry;
     577             :         }
     578             : 
     579     2374628 :         curhash = SH_ENTRY_HASH(tb, entry);
     580     2374628 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     581     2374628 :         curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
     582             : 
     583     2374628 :         if (insertdist > curdist)
     584             :         {
     585       65378 :             SH_ELEMENT_TYPE *lastentry = entry;
     586       65378 :             uint32      emptyelem = curelem;
     587             :             uint32      moveelem;
     588       65378 :             int32       emptydist = 0;
     589             : 
     590             :             /* find next empty bucket */
     591             :             while (true)
     592      378872 :             {
     593             :                 SH_ELEMENT_TYPE *emptyentry;
     594             : 
     595      444250 :                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
     596      444250 :                 emptyentry = &data[emptyelem];
     597             : 
     598      444250 :                 if (emptyentry->status == SH_STATUS_EMPTY)
     599             :                 {
     600       65362 :                     lastentry = emptyentry;
     601       65362 :                     break;
     602             :                 }
     603             : 
     604             :                 /*
     605             :                  * To avoid negative consequences from overly imbalanced
     606             :                  * hashtables, grow the hashtable if collisions would require
     607             :                  * us to move a lot of entries.  The most likely cause of such
     608             :                  * imbalance is filling a (currently) small table, from a
     609             :                  * currently big one, in hash-table order.  Don't grow if the
     610             :                  * hashtable would be too empty, to prevent quick space
     611             :                  * explosion for some weird edge cases.
     612             :                  */
     613      378904 :                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
     614          16 :                     ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     615             :                 {
     616          16 :                     tb->grow_threshold = 0;
     617          16 :                     goto restart;
     618             :                 }
     619             :             }
     620             : 
     621             :             /* shift forward, starting at last occupied element */
     622             : 
     623             :             /*
     624             :              * TODO: This could be optimized to be one memcpy in may cases,
     625             :              * excepting wrapping around at the end of ->data. Hasn't shown up
     626             :              * in profiles so far though.
     627             :              */
     628       65362 :             moveelem = emptyelem;
     629      572558 :             while (moveelem != curelem)
     630             :             {
     631             :                 SH_ELEMENT_TYPE *moveentry;
     632             : 
     633      441834 :                 moveelem = SH_PREV(tb, moveelem, startelem);
     634      441834 :                 moveentry = &data[moveelem];
     635             : 
     636      441834 :                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
     637      441834 :                 lastentry = moveentry;
     638             :             }
     639             : 
     640             :             /* and fill the now empty spot */
     641       65362 :             tb->members++;
     642             : 
     643       65362 :             entry->SH_KEY = key;
     644             : #ifdef SH_STORE_HASH
     645       52870 :             SH_GET_HASH(tb, entry) = hash;
     646             : #endif
     647       65362 :             entry->status = SH_STATUS_IN_USE;
     648       65362 :             *found = false;
     649       65362 :             return entry;
     650             :         }
     651             : 
     652     2309250 :         curelem = SH_NEXT(tb, curelem, startelem);
     653     2309250 :         insertdist++;
     654             : 
     655             :         /*
     656             :          * To avoid negative consequences from overly imbalanced hashtables,
     657             :          * grow the hashtable if collisions lead to large runs. The most
     658             :          * likely cause of such imbalance is filling a (currently) small
     659             :          * table, from a currently big one, in hash-table order.  Don't grow
     660             :          * if the hashtable would be too empty, to prevent quick space
     661             :          * explosion for some weird edge cases.
     662             :          */
     663     2309250 :         if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
     664           0 :             ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     665             :         {
     666           0 :             tb->grow_threshold = 0;
     667           0 :             goto restart;
     668             :         }
     669             :     }
     670             : }
     671             : 
     672             : /*
     673             :  * Insert the key key into the hash-table, set *found to true if the key
     674             :  * already exists, false otherwise. Returns the hash-table entry in either
     675             :  * case.
     676             :  */
     677             : SH_SCOPE    SH_ELEMENT_TYPE *
     678     5701072 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
     679             : {
     680     5701072 :     uint32 hash = SH_HASH_KEY(tb, key);
     681             : 
     682     5701068 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     683             : }
     684             : 
     685             : /*
     686             :  * Insert the key key into the hash-table using an already-calculated
     687             :  * hash. Set *found to true if the key already exists, false
     688             :  * otherwise. Returns the hash-table entry in either case.
     689             :  */
     690             : SH_SCOPE    SH_ELEMENT_TYPE *
     691           0 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     692             : {
     693           0 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     694             : }
     695             : 
     696             : /*
     697             :  * This is a separate static inline function, so it can be reliably be inlined
     698             :  * into its wrapper functions even if SH_SCOPE is extern.
     699             :  */
     700             : static inline   SH_ELEMENT_TYPE *
     701      647166 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     702             : {
     703      647166 :     const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
     704      647166 :     uint32      curelem = startelem;
     705             : 
     706             :     while (true)
     707      921306 :     {
     708     1568472 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     709             : 
     710     1568472 :         if (entry->status == SH_STATUS_EMPTY)
     711             :         {
     712      488464 :             return NULL;
     713             :         }
     714             : 
     715             :         Assert(entry->status == SH_STATUS_IN_USE);
     716             : 
     717     1080008 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     718      158702 :             return entry;
     719             : 
     720             :         /*
     721             :          * TODO: we could stop search based on distance. If the current
     722             :          * buckets's distance-from-optimal is smaller than what we've skipped
     723             :          * already, the entry doesn't exist. Probably only do so if
     724             :          * SH_STORE_HASH is defined, to avoid re-computing hashes?
     725             :          */
     726             : 
     727      921306 :         curelem = SH_NEXT(tb, curelem, startelem);
     728             :     }
     729             : }
     730             : 
     731             : /*
     732             :  * Lookup up entry in hash table.  Returns NULL if key not present.
     733             :  */
     734             : SH_SCOPE    SH_ELEMENT_TYPE *
     735      647166 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
     736             : {
     737      647166 :     uint32 hash = SH_HASH_KEY(tb, key);
     738             : 
     739      647166 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     740             : }
     741             : 
     742             : /*
     743             :  * Lookup up entry in hash table using an already-calculated hash.
     744             :  *
     745             :  * Returns NULL if key not present.
     746             :  */
     747             : SH_SCOPE    SH_ELEMENT_TYPE *
     748           0 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     749             : {
     750           0 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     751             : }
     752             : 
     753             : /*
     754             :  * Delete entry from hash table.  Returns whether to-be-deleted key was
     755             :  * present.
     756             :  */
     757             : SH_SCOPE bool
     758       84180 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
     759             : {
     760       84180 :     uint32      hash = SH_HASH_KEY(tb, key);
     761       84180 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     762       84180 :     uint32      curelem = startelem;
     763             : 
     764             :     while (true)
     765        6536 :     {
     766       90716 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     767             : 
     768       90716 :         if (entry->status == SH_STATUS_EMPTY)
     769       73020 :             return false;
     770             : 
     771       35392 :         if (entry->status == SH_STATUS_IN_USE &&
     772       17696 :             SH_COMPARE_KEYS(tb, hash, key, entry))
     773             :         {
     774       11160 :             SH_ELEMENT_TYPE *lastentry = entry;
     775             : 
     776       11160 :             tb->members--;
     777             : 
     778             :             /*
     779             :              * Backward shift following elements till either an empty element
     780             :              * or an element at its optimal position is encountered.
     781             :              *
     782             :              * While that sounds expensive, the average chain length is short,
     783             :              * and deletions would otherwise require tombstones.
     784             :              */
     785             :             while (true)
     786          80 :             {
     787             :                 SH_ELEMENT_TYPE *curentry;
     788             :                 uint32      curhash;
     789             :                 uint32      curoptimal;
     790             : 
     791       11240 :                 curelem = SH_NEXT(tb, curelem, startelem);
     792       11240 :                 curentry = &tb->data[curelem];
     793             : 
     794       11240 :                 if (curentry->status != SH_STATUS_IN_USE)
     795             :                 {
     796       11096 :                     lastentry->status = SH_STATUS_EMPTY;
     797       11096 :                     break;
     798             :                 }
     799             : 
     800         144 :                 curhash = SH_ENTRY_HASH(tb, curentry);
     801         144 :                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     802             : 
     803             :                 /* current is at optimal position, done */
     804         144 :                 if (curoptimal == curelem)
     805             :                 {
     806          64 :                     lastentry->status = SH_STATUS_EMPTY;
     807          64 :                     break;
     808             :                 }
     809             : 
     810             :                 /* shift */
     811          80 :                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     812             : 
     813          80 :                 lastentry = curentry;
     814             :             }
     815             : 
     816       11160 :             return true;
     817             :         }
     818             : 
     819             :         /* TODO: return false; if distance too big */
     820             : 
     821        6536 :         curelem = SH_NEXT(tb, curelem, startelem);
     822             :     }
     823             : }
     824             : 
     825             : /*
     826             :  * Initialize iterator.
     827             :  */
     828             : SH_SCOPE void
     829       42040 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     830             : {
     831             :     int         i;
     832       42040 :     uint64      startelem = PG_UINT64_MAX;
     833             : 
     834             :     /*
     835             :      * Search for the first empty element. As deletions during iterations are
     836             :      * supported, we want to start/end at an element that cannot be affected
     837             :      * by elements being shifted.
     838             :      */
     839       46674 :     for (i = 0; i < tb->size; i++)
     840             :     {
     841       46674 :         SH_ELEMENT_TYPE *entry = &tb->data[i];
     842             : 
     843       46674 :         if (entry->status != SH_STATUS_IN_USE)
     844             :         {
     845       42040 :             startelem = i;
     846       42040 :             break;
     847             :         }
     848             :     }
     849             : 
     850             :     Assert(startelem < SH_MAX_SIZE);
     851             : 
     852             :     /*
     853             :      * Iterate backwards, that allows the current element to be deleted, even
     854             :      * if there are backward shifts
     855             :      */
     856       42040 :     iter->cur = startelem;
     857       42040 :     iter->end = iter->cur;
     858       42040 :     iter->done = false;
     859       42040 : }
     860             : 
     861             : /*
     862             :  * Initialize iterator to a specific bucket. That's really only useful for
     863             :  * cases where callers are partially iterating over the hashspace, and that
     864             :  * iteration deletes and inserts elements based on visited entries. Doing that
     865             :  * repeatedly could lead to an unbalanced keyspace when always starting at the
     866             :  * same position.
     867             :  */
     868             : SH_SCOPE void
     869          16 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
     870             : {
     871             :     /*
     872             :      * Iterate backwards, that allows the current element to be deleted, even
     873             :      * if there are backward shifts.
     874             :      */
     875          16 :     iter->cur = at & tb->sizemask;    /* ensure at is within a valid range */
     876          16 :     iter->end = iter->cur;
     877          16 :     iter->done = false;
     878          16 : }
     879             : 
     880             : /*
     881             :  * Iterate over all entries in the hash-table. Return the next occupied entry,
     882             :  * or NULL if done.
     883             :  *
     884             :  * During iteration the current entry in the hash table may be deleted,
     885             :  * without leading to elements being skipped or returned twice.  Additionally
     886             :  * the rest of the table may be modified (i.e. there can be insertions or
     887             :  * deletions), but if so, there's neither a guarantee that all nodes are
     888             :  * visited at least once, nor a guarantee that a node is visited at most once.
     889             :  */
     890             : SH_SCOPE    SH_ELEMENT_TYPE *
     891      402994 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     892             : {
     893     2782316 :     while (!iter->done)
     894             :     {
     895             :         SH_ELEMENT_TYPE *elem;
     896             : 
     897     2337352 :         elem = &tb->data[iter->cur];
     898             : 
     899             :         /* next element in backward direction */
     900     2337352 :         iter->cur = (iter->cur - 1) & tb->sizemask;
     901             : 
     902     2337352 :         if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
     903       41970 :             iter->done = true;
     904     2337352 :         if (elem->status == SH_STATUS_IN_USE)
     905             :         {
     906      361024 :             return elem;
     907             :         }
     908             :     }
     909             : 
     910       41970 :     return NULL;
     911             : }
     912             : 
     913             : /*
     914             :  * Report some statistics about the state of the hashtable. For
     915             :  * debugging/profiling purposes only.
     916             :  */
     917             : SH_SCOPE void
     918           0 : SH_STAT(SH_TYPE * tb)
     919             : {
     920           0 :     uint32      max_chain_length = 0;
     921           0 :     uint32      total_chain_length = 0;
     922             :     double      avg_chain_length;
     923             :     double      fillfactor;
     924             :     uint32      i;
     925             : 
     926           0 :     uint32     *collisions = palloc0(tb->size * sizeof(uint32));
     927           0 :     uint32      total_collisions = 0;
     928           0 :     uint32      max_collisions = 0;
     929             :     double      avg_collisions;
     930             : 
     931           0 :     for (i = 0; i < tb->size; i++)
     932             :     {
     933             :         uint32      hash;
     934             :         uint32      optimal;
     935             :         uint32      dist;
     936             :         SH_ELEMENT_TYPE *elem;
     937             : 
     938           0 :         elem = &tb->data[i];
     939             : 
     940           0 :         if (elem->status != SH_STATUS_IN_USE)
     941           0 :             continue;
     942             : 
     943           0 :         hash = SH_ENTRY_HASH(tb, elem);
     944           0 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     945           0 :         dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
     946             : 
     947           0 :         if (dist > max_chain_length)
     948           0 :             max_chain_length = dist;
     949           0 :         total_chain_length += dist;
     950             : 
     951           0 :         collisions[optimal]++;
     952             :     }
     953             : 
     954           0 :     for (i = 0; i < tb->size; i++)
     955             :     {
     956           0 :         uint32      curcoll = collisions[i];
     957             : 
     958           0 :         if (curcoll == 0)
     959           0 :             continue;
     960             : 
     961             :         /* single contained element is not a collision */
     962           0 :         curcoll--;
     963           0 :         total_collisions += curcoll;
     964           0 :         if (curcoll > max_collisions)
     965           0 :             max_collisions = curcoll;
     966             :     }
     967             : 
     968           0 :     if (tb->members > 0)
     969             :     {
     970           0 :         fillfactor = tb->members / ((double) tb->size);
     971           0 :         avg_chain_length = ((double) total_chain_length) / tb->members;
     972           0 :         avg_collisions = ((double) total_collisions) / tb->members;
     973             :     }
     974             :     else
     975             :     {
     976           0 :         fillfactor = 0;
     977           0 :         avg_chain_length = 0;
     978           0 :         avg_collisions = 0;
     979             :     }
     980             : 
     981           0 :     elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
     982             :          tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
     983             :          total_collisions, max_collisions, avg_collisions);
     984           0 : }
     985             : 
     986             : #endif                          /* SH_DEFINE */
     987             : 
     988             : 
     989             : /* undefine external parameters, so next hash table can be defined */
     990             : #undef SH_PREFIX
     991             : #undef SH_KEY_TYPE
     992             : #undef SH_KEY
     993             : #undef SH_ELEMENT_TYPE
     994             : #undef SH_HASH_KEY
     995             : #undef SH_SCOPE
     996             : #undef SH_DECLARE
     997             : #undef SH_DEFINE
     998             : #undef SH_GET_HASH
     999             : #undef SH_STORE_HASH
    1000             : #undef SH_USE_NONDEFAULT_ALLOCATOR
    1001             : #undef SH_EQUAL
    1002             : 
    1003             : /* undefine locally declared macros */
    1004             : #undef SH_MAKE_PREFIX
    1005             : #undef SH_MAKE_NAME
    1006             : #undef SH_MAKE_NAME_
    1007             : #undef SH_FILLFACTOR
    1008             : #undef SH_MAX_FILLFACTOR
    1009             : #undef SH_GROW_MAX_DIB
    1010             : #undef SH_GROW_MAX_MOVE
    1011             : #undef SH_GROW_MIN_FILLFACTOR
    1012             : #undef SH_MAX_SIZE
    1013             : 
    1014             : /* types */
    1015             : #undef SH_TYPE
    1016             : #undef SH_STATUS
    1017             : #undef SH_STATUS_EMPTY
    1018             : #undef SH_STATUS_IN_USE
    1019             : #undef SH_ITERATOR
    1020             : 
    1021             : /* external function names */
    1022             : #undef SH_CREATE
    1023             : #undef SH_DESTROY
    1024             : #undef SH_RESET
    1025             : #undef SH_INSERT
    1026             : #undef SH_INSERT_HASH
    1027             : #undef SH_DELETE
    1028             : #undef SH_LOOKUP
    1029             : #undef SH_LOOKUP_HASH
    1030             : #undef SH_GROW
    1031             : #undef SH_START_ITERATE
    1032             : #undef SH_START_ITERATE_AT
    1033             : #undef SH_ITERATE
    1034             : #undef SH_ALLOCATE
    1035             : #undef SH_FREE
    1036             : #undef SH_STAT
    1037             : 
    1038             : /* internal function names */
    1039             : #undef SH_COMPUTE_PARAMETERS
    1040             : #undef SH_COMPARE_KEYS
    1041             : #undef SH_INITIAL_BUCKET
    1042             : #undef SH_NEXT
    1043             : #undef SH_PREV
    1044             : #undef SH_DISTANCE_FROM_OPTIMAL
    1045             : #undef SH_ENTRY_HASH
    1046             : #undef SH_INSERT_HASH_INTERNAL
    1047             : #undef SH_LOOKUP_HASH_INTERNAL

Generated by: LCOV version 1.13