LCOV - code coverage report
Current view: top level - src/include/lib - simplehash.h (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 208 249 83.5 %
Date: 2020-06-01 09:07:10 Functions: 51 56 91.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * simplehash.h
       3             :  *
       4             :  *    Hash table implementation which will be specialized to user-defined
       5             :  *    types, by including this file to generate the required code.  It's
       6             :  *    probably not worthwhile to do so for hash tables that aren't performance
       7             :  *    or space sensitive.
       8             :  *
       9             :  * Usage notes:
      10             :  *
      11             :  *    To generate a hash-table and associated functions for a use case several
      12             :  *    macros have to be #define'ed before this file is included.  Including
      13             :  *    the file #undef's all those, so a new hash table can be generated
      14             :  *    afterwards.
      15             :  *    The relevant parameters are:
      16             :  *    - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
      17             :  *      will result in hash table type 'foo_hash' and functions like
      18             :  *      'foo_insert'/'foo_lookup' and so forth.
      19             :  *    - SH_ELEMENT_TYPE - type of the contained elements
      20             :  *    - SH_KEY_TYPE - type of the hashtable's key
      21             :  *    - SH_DECLARE - if defined function prototypes and type declarations are
      22             :  *      generated
      23             :  *    - SH_DEFINE - if defined function definitions are generated
      24             :  *    - SH_SCOPE - in which scope (e.g. extern, static inline) do function
      25             :  *      declarations reside
      26             :  *    - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
      27             :  *      use this to allocate bytes
      28             :  *    - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
      29             :  *      are defined, so you can supply your own
      30             :  *    The following parameters are only relevant when SH_DEFINE is defined:
      31             :  *    - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
      32             :  *    - SH_EQUAL(table, a, b) - compare two table keys
      33             :  *    - SH_HASH_KEY(table, key) - generate hash for the key
      34             :  *    - SH_STORE_HASH - if defined the hash is stored in the elements
      35             :  *    - SH_GET_HASH(tb, a) - return the field to store the hash in
      36             :  *
      37             :  *    For examples of usage look at tidbitmap.c (file local definition) and
      38             :  *    execnodes.h/execGrouping.c (exposed declaration, file local
      39             :  *    implementation).
      40             :  *
      41             :  * Hash table design:
      42             :  *
      43             :  *    The hash table design chosen is a variant of linear open-addressing. The
      44             :  *    reason for doing so is that linear addressing is CPU cache & pipeline
      45             :  *    friendly. The biggest disadvantage of simple linear addressing schemes
      46             :  *    are highly variable lookup times due to clustering, and deletions
      47             :  *    leaving a lot of tombstones around.  To address these issues a variant
      48             :  *    of "robin hood" hashing is employed.  Robin hood hashing optimizes
      49             :  *    chaining lengths by moving elements close to their optimal bucket
      50             :  *    ("rich" elements), out of the way if a to-be-inserted element is further
      51             :  *    away from its optimal position (i.e. it's "poor").  While that can make
      52             :  *    insertions slower, the average lookup performance is a lot better, and
      53             :  *    higher fill factors can be used in a still performant manner.  To avoid
      54             :  *    tombstones - which normally solve the issue that a deleted node's
      55             :  *    presence is relevant to determine whether a lookup needs to continue
      56             :  *    looking or is done - buckets following a deleted element are shifted
      57             :  *    backwards, unless they're empty or already at their optimal position.
      58             :  */
      59             : 
      60             : #include "port/pg_bitutils.h"
      61             : 
      62             : /* helpers */
      63             : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
      64             : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
      65             : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
      66             : 
      67             : /* name macros for: */
      68             : 
      69             : /* type declarations */
      70             : #define SH_TYPE SH_MAKE_NAME(hash)
      71             : #define SH_STATUS SH_MAKE_NAME(status)
      72             : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
      73             : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
      74             : #define SH_ITERATOR SH_MAKE_NAME(iterator)
      75             : 
      76             : /* function declarations */
      77             : #define SH_CREATE SH_MAKE_NAME(create)
      78             : #define SH_DESTROY SH_MAKE_NAME(destroy)
      79             : #define SH_RESET SH_MAKE_NAME(reset)
      80             : #define SH_INSERT SH_MAKE_NAME(insert)
      81             : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
      82             : #define SH_DELETE SH_MAKE_NAME(delete)
      83             : #define SH_LOOKUP SH_MAKE_NAME(lookup)
      84             : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
      85             : #define SH_GROW SH_MAKE_NAME(grow)
      86             : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
      87             : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
      88             : #define SH_ITERATE SH_MAKE_NAME(iterate)
      89             : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
      90             : #define SH_FREE SH_MAKE_NAME(free)
      91             : #define SH_STAT SH_MAKE_NAME(stat)
      92             : 
      93             : /* internal helper functions (no externally visible prototypes) */
      94             : #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
      95             : #define SH_NEXT SH_MAKE_NAME(next)
      96             : #define SH_PREV SH_MAKE_NAME(prev)
      97             : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
      98             : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
      99             : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
     100             : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
     101             : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
     102             : 
     103             : /* generate forward declarations necessary to use the hash table */
     104             : #ifdef SH_DECLARE
     105             : 
     106             : /* type definitions */
     107             : typedef struct SH_TYPE
     108             : {
     109             :     /*
     110             :      * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
     111             :      * tables.  Note that the maximum number of elements is lower
     112             :      * (SH_MAX_FILLFACTOR)
     113             :      */
     114             :     uint64      size;
     115             : 
     116             :     /* how many elements have valid contents */
     117             :     uint32      members;
     118             : 
     119             :     /* mask for bucket and size calculations, based on size */
     120             :     uint32      sizemask;
     121             : 
     122             :     /* boundary after which to grow hashtable */
     123             :     uint32      grow_threshold;
     124             : 
     125             :     /* hash buckets */
     126             :     SH_ELEMENT_TYPE *data;
     127             : 
     128             : #ifndef SH_RAW_ALLOCATOR
     129             :     /* memory context to use for allocations */
     130             :     MemoryContext ctx;
     131             : #endif
     132             : 
     133             :     /* user defined data, useful for callbacks */
     134             :     void       *private_data;
     135             : }           SH_TYPE;
     136             : 
     137             : typedef enum SH_STATUS
     138             : {
     139             :     SH_STATUS_EMPTY = 0x00,
     140             :     SH_STATUS_IN_USE = 0x01
     141             : } SH_STATUS;
     142             : 
     143             : typedef struct SH_ITERATOR
     144             : {
     145             :     uint32      cur;            /* current element */
     146             :     uint32      end;
     147             :     bool        done;           /* iterator exhausted? */
     148             : }           SH_ITERATOR;
     149             : 
     150             : /* externally visible function prototypes */
     151             : #ifdef SH_RAW_ALLOCATOR
     152             : SH_SCOPE    SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
     153             : #else
     154             : SH_SCOPE    SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
     155             :                                void *private_data);
     156             : #endif
     157             : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
     158             : SH_SCOPE void SH_RESET(SH_TYPE * tb);
     159             : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
     160             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
     161             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     162             :                                             uint32 hash, bool *found);
     163             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
     164             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
     165             :                                             uint32 hash);
     166             : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
     167             : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     168             : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
     169             : SH_SCOPE    SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     170             : SH_SCOPE void SH_STAT(SH_TYPE * tb);
     171             : 
     172             : #endif                          /* SH_DECLARE */
     173             : 
     174             : 
     175             : /* generate implementation of the hash table */
     176             : #ifdef SH_DEFINE
     177             : 
     178             : #ifndef SH_RAW_ALLOCATOR
     179             : #include "utils/memutils.h"
     180             : #endif
     181             : 
     182             : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
     183             : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
     184             : 
     185             : /* normal fillfactor, unless already close to maximum */
     186             : #ifndef SH_FILLFACTOR
     187             : #define SH_FILLFACTOR (0.9)
     188             : #endif
     189             : /* increase fillfactor if we otherwise would error out */
     190             : #define SH_MAX_FILLFACTOR (0.98)
     191             : /* grow if actual and optimal location bigger than */
     192             : #ifndef SH_GROW_MAX_DIB
     193             : #define SH_GROW_MAX_DIB 25
     194             : #endif
     195             : /* grow if more than elements to move when inserting */
     196             : #ifndef SH_GROW_MAX_MOVE
     197             : #define SH_GROW_MAX_MOVE 150
     198             : #endif
     199             : #ifndef SH_GROW_MIN_FILLFACTOR
     200             : /* but do not grow due to SH_GROW_MAX_* if below */
     201             : #define SH_GROW_MIN_FILLFACTOR 0.1
     202             : #endif
     203             : 
     204             : #ifdef SH_STORE_HASH
     205             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
     206             : #else
     207             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
     208             : #endif
     209             : 
     210             : /*
     211             :  * Wrap the following definitions in include guards, to avoid multiple
     212             :  * definition errors if this header is included more than once.  The rest of
     213             :  * the file deliberately has no include guards, because it can be included
     214             :  * with different parameters to define functions and types with non-colliding
     215             :  * names.
     216             :  */
     217             : #ifndef SIMPLEHASH_H
     218             : #define SIMPLEHASH_H
     219             : 
     220             : #ifdef FRONTEND
     221             : #define sh_error(...) pg_log_error(__VA_ARGS__)
     222             : #define sh_log(...) pg_log_info(__VA_ARGS__)
     223             : #else
     224             : #define sh_error(...) elog(ERROR, __VA_ARGS__)
     225             : #define sh_log(...) elog(LOG, __VA_ARGS__)
     226             : #endif
     227             : 
     228             : #endif
     229             : 
     230             : /*
     231             :  * Compute sizing parameters for hashtable. Called when creating and growing
     232             :  * the hashtable.
     233             :  */
     234             : static inline void
     235        8466 : SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
     236             : {
     237             :     uint64      size;
     238             : 
     239             :     /* supporting zero sized hashes would complicate matters */
     240        8466 :     size = Max(newsize, 2);
     241             : 
     242             :     /* round up size to the next power of 2, that's how bucketing works */
     243        8466 :     size = pg_nextpower2_64(size);
     244             :     Assert(size <= SH_MAX_SIZE);
     245             : 
     246             :     /*
     247             :      * Verify that allocation of ->data is possible on this platform, without
     248             :      * overflowing Size.
     249             :      */
     250        8466 :     if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2)
     251           0 :         sh_error("hash table too large");
     252             : 
     253             :     /* now set size */
     254        8466 :     tb->size = size;
     255             : 
     256        8466 :     if (tb->size == SH_MAX_SIZE)
     257           0 :         tb->sizemask = 0;
     258             :     else
     259        8466 :         tb->sizemask = tb->size - 1;
     260             : 
     261             :     /*
     262             :      * Compute the next threshold at which we need to grow the hash table
     263             :      * again.
     264             :      */
     265        8466 :     if (tb->size == SH_MAX_SIZE)
     266           0 :         tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
     267             :     else
     268        8466 :         tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
     269        8466 : }
     270             : 
     271             : /* return the optimal bucket for the hash */
     272             : static inline uint32
     273    10794568 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
     274             : {
     275    10794568 :     return hash & tb->sizemask;
     276             : }
     277             : 
     278             : /* return next bucket after the current, handling wraparound */
     279             : static inline uint32
     280     4081200 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     281             : {
     282     4081200 :     curelem = (curelem + 1) & tb->sizemask;
     283             : 
     284             :     Assert(curelem != startelem);
     285             : 
     286     4081200 :     return curelem;
     287             : }
     288             : 
     289             : /* return bucket before the current, handling wraparound */
     290             : static inline uint32
     291      963396 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     292             : {
     293      963396 :     curelem = (curelem - 1) & tb->sizemask;
     294             : 
     295             :     Assert(curelem != startelem);
     296             : 
     297      963396 :     return curelem;
     298             : }
     299             : 
     300             : /* return distance between bucket and its optimal position */
     301             : static inline uint32
     302     1860526 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
     303             : {
     304     1860526 :     if (optimal <= bucket)
     305     1857280 :         return bucket - optimal;
     306             :     else
     307        3246 :         return (tb->size + bucket) - optimal;
     308             : }
     309             : 
     310             : static inline uint32
     311     2219214 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     312             : {
     313             : #ifdef SH_STORE_HASH
     314      981006 :     return SH_GET_HASH(tb, entry);
     315             : #else
     316     1238208 :     return SH_HASH_KEY(tb, entry->SH_KEY);
     317             : #endif
     318             : }
     319             : 
     320             : /* default memory allocator function */
     321             : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
     322             : static inline void SH_FREE(SH_TYPE * type, void *pointer);
     323             : 
     324             : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
     325             : 
     326             : /* default memory allocator function */
     327             : static inline void *
     328        4912 : SH_ALLOCATE(SH_TYPE * type, Size size)
     329             : {
     330             : #ifdef SH_RAW_ALLOCATOR
     331         144 :     return SH_RAW_ALLOCATOR(size);
     332             : #else
     333        4768 :     return MemoryContextAllocExtended(type->ctx, size,
     334             :                                       MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
     335             : #endif
     336             : }
     337             : 
     338             : /* default memory free function */
     339             : static inline void
     340         614 : SH_FREE(SH_TYPE * type, void *pointer)
     341             : {
     342         614 :     pfree(pointer);
     343         614 : }
     344             : 
     345             : #endif
     346             : 
     347             : /*
     348             :  * Create a hash table with enough space for `nelements` distinct members.
     349             :  * Memory for the hash table is allocated from the passed-in context.  If
     350             :  * desired, the array of elements can be allocated using a passed-in allocator;
     351             :  * this could be useful in order to place the array of elements in a shared
     352             :  * memory, or in a context that will outlive the rest of the hash table.
     353             :  * Memory other than for the array of elements will still be allocated from
     354             :  * the passed-in context.
     355             :  */
     356             : #ifdef SH_RAW_ALLOCATOR
     357             : SH_SCOPE    SH_TYPE *
     358         142 : SH_CREATE(uint32 nelements, void *private_data)
     359             : #else
     360             : SH_SCOPE    SH_TYPE *
     361        7606 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
     362             : #endif
     363             : {
     364             :     SH_TYPE    *tb;
     365             :     uint64      size;
     366             : 
     367             : #ifdef SH_RAW_ALLOCATOR
     368         142 :     tb = SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
     369             : #else
     370        7606 :     tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
     371        7606 :     tb->ctx = ctx;
     372             : #endif
     373        7748 :     tb->private_data = private_data;
     374             : 
     375             :     /* increase nelements by fillfactor, want to store nelements elements */
     376        7748 :     size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
     377             : 
     378        7748 :     SH_COMPUTE_PARAMETERS(tb, size);
     379             : 
     380        7748 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     381             : 
     382        7748 :     return tb;
     383             : }
     384             : 
     385             : /* destroy a previously created hash table */
     386             : SH_SCOPE void
     387        3450 : SH_DESTROY(SH_TYPE * tb)
     388             : {
     389        3450 :     SH_FREE(tb, tb->data);
     390        3450 :     pfree(tb);
     391        3450 : }
     392             : 
     393             : /* reset the contents of a previously created hash table */
     394             : SH_SCOPE void
     395      167184 : SH_RESET(SH_TYPE * tb)
     396             : {
     397      167184 :     memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
     398      167184 :     tb->members = 0;
     399      167184 : }
     400             : 
     401             : /*
     402             :  * Grow a hash table to at least `newsize` buckets.
     403             :  *
     404             :  * Usually this will automatically be called by insertions/deletions, when
     405             :  * necessary. But resizing to the exact input size can be advantageous
     406             :  * performance-wise, when known at some point.
     407             :  */
     408             : SH_SCOPE void
     409         718 : SH_GROW(SH_TYPE * tb, uint32 newsize)
     410             : {
     411         718 :     uint64      oldsize = tb->size;
     412         718 :     SH_ELEMENT_TYPE *olddata = tb->data;
     413             :     SH_ELEMENT_TYPE *newdata;
     414             :     uint32      i;
     415         718 :     uint32      startelem = 0;
     416             :     uint32      copyelem;
     417             : 
     418             :     Assert(oldsize == pg_nextpower2_64(oldsize));
     419             :     Assert(oldsize != SH_MAX_SIZE);
     420             :     Assert(oldsize < newsize);
     421             : 
     422             :     /* compute parameters for new table */
     423         718 :     SH_COMPUTE_PARAMETERS(tb, newsize);
     424             : 
     425         718 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     426             : 
     427         718 :     newdata = tb->data;
     428             : 
     429             :     /*
     430             :      * Copy entries from the old data to newdata. We theoretically could use
     431             :      * SH_INSERT here, to avoid code duplication, but that's more general than
     432             :      * we need. We neither want tb->members increased, nor do we need to do
     433             :      * deal with deleted elements, nor do we need to compare keys. So a
     434             :      * special-cased implementation is lot faster. As resizing can be time
     435             :      * consuming and frequent, that's worthwhile to optimize.
     436             :      *
     437             :      * To be able to simply move entries over, we have to start not at the
     438             :      * first bucket (i.e olddata[0]), but find the first bucket that's either
     439             :      * empty, or is occupied by an entry at its optimal position. Such a
     440             :      * bucket has to exist in any table with a load factor under 1, as not all
     441             :      * buckets are occupied, i.e. there always has to be an empty bucket.  By
     442             :      * starting at such a bucket we can move the entries to the larger table,
     443             :      * without having to deal with conflicts.
     444             :      */
     445             : 
     446             :     /* search for the first element in the hash that's not wrapped around */
     447        9540 :     for (i = 0; i < oldsize; i++)
     448             :     {
     449        9540 :         SH_ELEMENT_TYPE *oldentry = &olddata[i];
     450             :         uint32      hash;
     451             :         uint32      optimal;
     452             : 
     453        9540 :         if (oldentry->status != SH_STATUS_IN_USE)
     454             :         {
     455         250 :             startelem = i;
     456         250 :             break;
     457             :         }
     458             : 
     459        9290 :         hash = SH_ENTRY_HASH(tb, oldentry);
     460        9290 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     461             : 
     462        9290 :         if (optimal == i)
     463             :         {
     464         468 :             startelem = i;
     465         468 :             break;
     466             :         }
     467             :     }
     468             : 
     469             :     /* and copy all elements in the old table */
     470         718 :     copyelem = startelem;
     471      401438 :     for (i = 0; i < oldsize; i++)
     472             :     {
     473      400720 :         SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
     474             : 
     475      400720 :         if (oldentry->status == SH_STATUS_IN_USE)
     476             :         {
     477             :             uint32      hash;
     478             :             uint32      startelem;
     479             :             uint32      curelem;
     480             :             SH_ELEMENT_TYPE *newentry;
     481             : 
     482      349254 :             hash = SH_ENTRY_HASH(tb, oldentry);
     483      349254 :             startelem = SH_INITIAL_BUCKET(tb, hash);
     484      349254 :             curelem = startelem;
     485             : 
     486             :             /* find empty element to put data into */
     487             :             while (true)
     488             :             {
     489      129512 :                 newentry = &newdata[curelem];
     490             : 
     491      478766 :                 if (newentry->status == SH_STATUS_EMPTY)
     492             :                 {
     493      349254 :                     break;
     494             :                 }
     495             : 
     496      129512 :                 curelem = SH_NEXT(tb, curelem, startelem);
     497             :             }
     498             : 
     499             :             /* copy entry to new slot */
     500      349254 :             memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
     501             :         }
     502             : 
     503             :         /* can't use SH_NEXT here, would use new size */
     504      400720 :         copyelem++;
     505      400720 :         if (copyelem >= oldsize)
     506             :         {
     507         718 :             copyelem = 0;
     508             :         }
     509             :     }
     510             : 
     511         718 :     SH_FREE(tb, olddata);
     512         718 : }
     513             : 
     514             : /*
     515             :  * This is a separate static inline function, so it can be reliably be inlined
     516             :  * into its wrapper functions even if SH_SCOPE is extern.
     517             :  */
     518             : static inline SH_ELEMENT_TYPE *
     519     6684032 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     520             : {
     521             :     uint32      startelem;
     522             :     uint32      curelem;
     523             :     SH_ELEMENT_TYPE *data;
     524             :     uint32      insertdist;
     525             : 
     526     6684032 : restart:
     527     6684032 :     insertdist = 0;
     528             : 
     529             :     /*
     530             :      * We do the grow check even if the key is actually present, to avoid
     531             :      * doing the check inside the loop. This also lets us avoid having to
     532             :      * re-find our position in the hashtable after resizing.
     533             :      *
     534             :      * Note that this also reached when resizing the table due to
     535             :      * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
     536             :      */
     537     6684032 :     if (unlikely(tb->members >= tb->grow_threshold))
     538             :     {
     539         718 :         if (tb->size == SH_MAX_SIZE)
     540             :         {
     541           0 :             sh_error("hash table size exceeded");
     542             :         }
     543             : 
     544             :         /*
     545             :          * When optimizing, it can be very useful to print these out.
     546             :          */
     547             :         /* SH_STAT(tb); */
     548         718 :         SH_GROW(tb, tb->size * 2);
     549             :         /* SH_STAT(tb); */
     550             :     }
     551             : 
     552             :     /* perform insert, start bucket search at optimal location */
     553     6684032 :     data = tb->data;
     554     6684032 :     startelem = SH_INITIAL_BUCKET(tb, hash);
     555     6684032 :     curelem = startelem;
     556             :     while (true)
     557     1733764 :     {
     558             :         uint32      curdist;
     559             :         uint32      curhash;
     560             :         uint32      curoptimal;
     561     8417796 :         SH_ELEMENT_TYPE *entry = &data[curelem];
     562             : 
     563             :         /* any empty bucket can directly be used */
     564     8417796 :         if (entry->status == SH_STATUS_EMPTY)
     565             :         {
     566      658842 :             tb->members++;
     567      658842 :             entry->SH_KEY = key;
     568             : #ifdef SH_STORE_HASH
     569      516180 :             SH_GET_HASH(tb, entry) = hash;
     570             : #endif
     571      658842 :             entry->status = SH_STATUS_IN_USE;
     572      658842 :             *found = false;
     573      658842 :             return entry;
     574             :         }
     575             : 
     576             :         /*
     577             :          * If the bucket is not empty, we either found a match (in which case
     578             :          * we're done), or we have to decide whether to skip over or move the
     579             :          * colliding entry. When the colliding element's distance to its
     580             :          * optimal position is smaller than the to-be-inserted entry's, we
     581             :          * shift the colliding entry (and its followers) forward by one.
     582             :          */
     583             : 
     584     7758954 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     585             :         {
     586             :             Assert(entry->status == SH_STATUS_IN_USE);
     587     5898428 :             *found = true;
     588     5898428 :             return entry;
     589             :         }
     590             : 
     591     1860526 :         curhash = SH_ENTRY_HASH(tb, entry);
     592     1860526 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     593     1860526 :         curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
     594             : 
     595     1860526 :         if (insertdist > curdist)
     596             :         {
     597      126762 :             SH_ELEMENT_TYPE *lastentry = entry;
     598      126762 :             uint32      emptyelem = curelem;
     599             :             uint32      moveelem;
     600      126762 :             int32       emptydist = 0;
     601             : 
     602             :             /* find next empty bucket */
     603             :             while (true)
     604      847204 :             {
     605             :                 SH_ELEMENT_TYPE *emptyentry;
     606             : 
     607      973966 :                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
     608      973966 :                 emptyentry = &data[emptyelem];
     609             : 
     610      973966 :                 if (emptyentry->status == SH_STATUS_EMPTY)
     611             :                 {
     612      126692 :                     lastentry = emptyentry;
     613      126692 :                     break;
     614             :                 }
     615             : 
     616             :                 /*
     617             :                  * To avoid negative consequences from overly imbalanced
     618             :                  * hashtables, grow the hashtable if collisions would require
     619             :                  * us to move a lot of entries.  The most likely cause of such
     620             :                  * imbalance is filling a (currently) small table, from a
     621             :                  * currently big one, in hash-table order.  Don't grow if the
     622             :                  * hashtable would be too empty, to prevent quick space
     623             :                  * explosion for some weird edge cases.
     624             :                  */
     625      847274 :                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
     626          70 :                     ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     627             :                 {
     628          70 :                     tb->grow_threshold = 0;
     629          70 :                     goto restart;
     630             :                 }
     631             :             }
     632             : 
     633             :             /* shift forward, starting at last occupied element */
     634             : 
     635             :             /*
     636             :              * TODO: This could be optimized to be one memcpy in many cases,
     637             :              * excepting wrapping around at the end of ->data. Hasn't shown up
     638             :              * in profiles so far though.
     639             :              */
     640      126692 :             moveelem = emptyelem;
     641     1090088 :             while (moveelem != curelem)
     642             :             {
     643             :                 SH_ELEMENT_TYPE *moveentry;
     644             : 
     645      963396 :                 moveelem = SH_PREV(tb, moveelem, startelem);
     646      963396 :                 moveentry = &data[moveelem];
     647             : 
     648      963396 :                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
     649      963396 :                 lastentry = moveentry;
     650             :             }
     651             : 
     652             :             /* and fill the now empty spot */
     653      126692 :             tb->members++;
     654             : 
     655      126692 :             entry->SH_KEY = key;
     656             : #ifdef SH_STORE_HASH
     657      108840 :             SH_GET_HASH(tb, entry) = hash;
     658             : #endif
     659      126692 :             entry->status = SH_STATUS_IN_USE;
     660      126692 :             *found = false;
     661      126692 :             return entry;
     662             :         }
     663             : 
     664     1733764 :         curelem = SH_NEXT(tb, curelem, startelem);
     665     1733764 :         insertdist++;
     666             : 
     667             :         /*
     668             :          * To avoid negative consequences from overly imbalanced hashtables,
     669             :          * grow the hashtable if collisions lead to large runs. The most
     670             :          * likely cause of such imbalance is filling a (currently) small
     671             :          * table, from a currently big one, in hash-table order.  Don't grow
     672             :          * if the hashtable would be too empty, to prevent quick space
     673             :          * explosion for some weird edge cases.
     674             :          */
     675     1733764 :         if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
     676           0 :             ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     677             :         {
     678           0 :             tb->grow_threshold = 0;
     679           0 :             goto restart;
     680             :         }
     681             :     }
     682             : }
     683             : 
     684             : /*
     685             :  * Insert the key key into the hash-table, set *found to true if the key
     686             :  * already exists, false otherwise. Returns the hash-table entry in either
     687             :  * case.
     688             :  */
     689             : SH_SCOPE    SH_ELEMENT_TYPE *
     690     3231862 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
     691             : {
     692     3231862 :     uint32      hash = SH_HASH_KEY(tb, key);
     693             : 
     694     3231862 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     695             : }
     696             : 
     697             : /*
     698             :  * Insert the key key into the hash-table using an already-calculated
     699             :  * hash. Set *found to true if the key already exists, false
     700             :  * otherwise. Returns the hash-table entry in either case.
     701             :  */
     702             : SH_SCOPE    SH_ELEMENT_TYPE *
     703     3452100 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
     704             : {
     705     3452100 :     return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
     706             : }
     707             : 
     708             : /*
     709             :  * This is a separate static inline function, so it can be reliably be inlined
     710             :  * into its wrapper functions even if SH_SCOPE is extern.
     711             :  */
     712             : static inline SH_ELEMENT_TYPE *
     713     1807142 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     714             : {
     715     1807142 :     const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
     716     1807142 :     uint32      curelem = startelem;
     717             : 
     718             :     while (true)
     719     1226182 :     {
     720     3033324 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     721             : 
     722     3033324 :         if (entry->status == SH_STATUS_EMPTY)
     723             :         {
     724     1294676 :             return NULL;
     725             :         }
     726             : 
     727             :         Assert(entry->status == SH_STATUS_IN_USE);
     728             : 
     729     1738648 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     730      512466 :             return entry;
     731             : 
     732             :         /*
     733             :          * TODO: we could stop search based on distance. If the current
     734             :          * buckets's distance-from-optimal is smaller than what we've skipped
     735             :          * already, the entry doesn't exist. Probably only do so if
     736             :          * SH_STORE_HASH is defined, to avoid re-computing hashes?
     737             :          */
     738             : 
     739     1226182 :         curelem = SH_NEXT(tb, curelem, startelem);
     740             :     }
     741             : }
     742             : 
     743             : /*
     744             :  * Lookup up entry in hash table.  Returns NULL if key not present.
     745             :  */
     746             : SH_SCOPE    SH_ELEMENT_TYPE *
     747      631140 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
     748             : {
     749      631140 :     uint32      hash = SH_HASH_KEY(tb, key);
     750             : 
     751      631140 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     752             : }
     753             : 
     754             : /*
     755             :  * Lookup up entry in hash table using an already-calculated hash.
     756             :  *
     757             :  * Returns NULL if key not present.
     758             :  */
     759             : SH_SCOPE    SH_ELEMENT_TYPE *
     760     1176002 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
     761             : {
     762     1176002 :     return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
     763             : }
     764             : 
     765             : /*
     766             :  * Delete entry from hash table.  Returns whether to-be-deleted key was
     767             :  * present.
     768             :  */
     769             : SH_SCOPE bool
     770       84180 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
     771             : {
     772       84180 :     uint32      hash = SH_HASH_KEY(tb, key);
     773       84180 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     774       84180 :     uint32      curelem = startelem;
     775             : 
     776             :     while (true)
     777        6536 :     {
     778       90716 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     779             : 
     780       90716 :         if (entry->status == SH_STATUS_EMPTY)
     781       73020 :             return false;
     782             : 
     783       17696 :         if (entry->status == SH_STATUS_IN_USE &&
     784       17696 :             SH_COMPARE_KEYS(tb, hash, key, entry))
     785             :         {
     786       11160 :             SH_ELEMENT_TYPE *lastentry = entry;
     787             : 
     788       11160 :             tb->members--;
     789             : 
     790             :             /*
     791             :              * Backward shift following elements till either an empty element
     792             :              * or an element at its optimal position is encountered.
     793             :              *
     794             :              * While that sounds expensive, the average chain length is short,
     795             :              * and deletions would otherwise require tombstones.
     796             :              */
     797             :             while (true)
     798          80 :             {
     799             :                 SH_ELEMENT_TYPE *curentry;
     800             :                 uint32      curhash;
     801             :                 uint32      curoptimal;
     802             : 
     803       11240 :                 curelem = SH_NEXT(tb, curelem, startelem);
     804       11240 :                 curentry = &tb->data[curelem];
     805             : 
     806       11240 :                 if (curentry->status != SH_STATUS_IN_USE)
     807             :                 {
     808       11096 :                     lastentry->status = SH_STATUS_EMPTY;
     809       11096 :                     break;
     810             :                 }
     811             : 
     812         144 :                 curhash = SH_ENTRY_HASH(tb, curentry);
     813         144 :                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     814             : 
     815             :                 /* current is at optimal position, done */
     816         144 :                 if (curoptimal == curelem)
     817             :                 {
     818          64 :                     lastentry->status = SH_STATUS_EMPTY;
     819          64 :                     break;
     820             :                 }
     821             : 
     822             :                 /* shift */
     823          80 :                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     824             : 
     825          80 :                 lastentry = curentry;
     826             :             }
     827             : 
     828       11160 :             return true;
     829             :         }
     830             : 
     831             :         /* TODO: return false; if distance too big */
     832             : 
     833        6536 :         curelem = SH_NEXT(tb, curelem, startelem);
     834             :     }
     835             : }
     836             : 
     837             : /*
     838             :  * Initialize iterator.
     839             :  */
     840             : SH_SCOPE void
     841      137520 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     842             : {
     843             :     int         i;
     844      137520 :     uint64      startelem = PG_UINT64_MAX;
     845             : 
     846             :     /*
     847             :      * Search for the first empty element. As deletions during iterations are
     848             :      * supported, we want to start/end at an element that cannot be affected
     849             :      * by elements being shifted.
     850             :      */
     851      141674 :     for (i = 0; i < tb->size; i++)
     852             :     {
     853      141674 :         SH_ELEMENT_TYPE *entry = &tb->data[i];
     854             : 
     855      141674 :         if (entry->status != SH_STATUS_IN_USE)
     856             :         {
     857      137520 :             startelem = i;
     858      137520 :             break;
     859             :         }
     860             :     }
     861             : 
     862             :     Assert(startelem < SH_MAX_SIZE);
     863             : 
     864             :     /*
     865             :      * Iterate backwards, that allows the current element to be deleted, even
     866             :      * if there are backward shifts
     867             :      */
     868      137520 :     iter->cur = startelem;
     869      137520 :     iter->end = iter->cur;
     870      137520 :     iter->done = false;
     871      137520 : }
     872             : 
     873             : /*
     874             :  * Initialize iterator to a specific bucket. That's really only useful for
     875             :  * cases where callers are partially iterating over the hashspace, and that
     876             :  * iteration deletes and inserts elements based on visited entries. Doing that
     877             :  * repeatedly could lead to an unbalanced keyspace when always starting at the
     878             :  * same position.
     879             :  */
     880             : SH_SCOPE void
     881          16 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
     882             : {
     883             :     /*
     884             :      * Iterate backwards, that allows the current element to be deleted, even
     885             :      * if there are backward shifts.
     886             :      */
     887          16 :     iter->cur = at & tb->sizemask;    /* ensure at is within a valid range */
     888          16 :     iter->end = iter->cur;
     889          16 :     iter->done = false;
     890          16 : }
     891             : 
     892             : /*
     893             :  * Iterate over all entries in the hash-table. Return the next occupied entry,
     894             :  * or NULL if done.
     895             :  *
     896             :  * During iteration the current entry in the hash table may be deleted,
     897             :  * without leading to elements being skipped or returned twice.  Additionally
     898             :  * the rest of the table may be modified (i.e. there can be insertions or
     899             :  * deletions), but if so, there's neither a guarantee that all nodes are
     900             :  * visited at least once, nor a guarantee that a node is visited at most once.
     901             :  */
     902             : SH_SCOPE    SH_ELEMENT_TYPE *
     903      932166 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     904             : {
     905    72700194 :     while (!iter->done)
     906             :     {
     907             :         SH_ELEMENT_TYPE *elem;
     908             : 
     909    72562746 :         elem = &tb->data[iter->cur];
     910             : 
     911             :         /* next element in backward direction */
     912    72562746 :         iter->cur = (iter->cur - 1) & tb->sizemask;
     913             : 
     914    72562746 :         if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
     915      137448 :             iter->done = true;
     916    72562746 :         if (elem->status == SH_STATUS_IN_USE)
     917             :         {
     918      794718 :             return elem;
     919             :         }
     920             :     }
     921             : 
     922      137448 :     return NULL;
     923             : }
     924             : 
     925             : /*
     926             :  * Report some statistics about the state of the hashtable. For
     927             :  * debugging/profiling purposes only.
     928             :  */
     929             : SH_SCOPE void
     930           0 : SH_STAT(SH_TYPE * tb)
     931             : {
     932           0 :     uint32      max_chain_length = 0;
     933           0 :     uint32      total_chain_length = 0;
     934             :     double      avg_chain_length;
     935             :     double      fillfactor;
     936             :     uint32      i;
     937             : 
     938           0 :     uint32     *collisions = palloc0(tb->size * sizeof(uint32));
     939           0 :     uint32      total_collisions = 0;
     940           0 :     uint32      max_collisions = 0;
     941             :     double      avg_collisions;
     942             : 
     943           0 :     for (i = 0; i < tb->size; i++)
     944             :     {
     945             :         uint32      hash;
     946             :         uint32      optimal;
     947             :         uint32      dist;
     948             :         SH_ELEMENT_TYPE *elem;
     949             : 
     950           0 :         elem = &tb->data[i];
     951             : 
     952           0 :         if (elem->status != SH_STATUS_IN_USE)
     953           0 :             continue;
     954             : 
     955           0 :         hash = SH_ENTRY_HASH(tb, elem);
     956           0 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     957           0 :         dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
     958             : 
     959           0 :         if (dist > max_chain_length)
     960           0 :             max_chain_length = dist;
     961           0 :         total_chain_length += dist;
     962             : 
     963           0 :         collisions[optimal]++;
     964             :     }
     965             : 
     966           0 :     for (i = 0; i < tb->size; i++)
     967             :     {
     968           0 :         uint32      curcoll = collisions[i];
     969             : 
     970           0 :         if (curcoll == 0)
     971           0 :             continue;
     972             : 
     973             :         /* single contained element is not a collision */
     974           0 :         curcoll--;
     975           0 :         total_collisions += curcoll;
     976           0 :         if (curcoll > max_collisions)
     977           0 :             max_collisions = curcoll;
     978             :     }
     979             : 
     980           0 :     if (tb->members > 0)
     981             :     {
     982           0 :         fillfactor = tb->members / ((double) tb->size);
     983           0 :         avg_chain_length = ((double) total_chain_length) / tb->members;
     984           0 :         avg_collisions = ((double) total_collisions) / tb->members;
     985             :     }
     986             :     else
     987             :     {
     988           0 :         fillfactor = 0;
     989           0 :         avg_chain_length = 0;
     990           0 :         avg_collisions = 0;
     991             :     }
     992             : 
     993           0 :     sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
     994             :            tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
     995             :            total_collisions, max_collisions, avg_collisions);
     996           0 : }
     997             : 
     998             : #endif                          /* SH_DEFINE */
     999             : 
    1000             : 
    1001             : /* undefine external parameters, so next hash table can be defined */
    1002             : #undef SH_PREFIX
    1003             : #undef SH_KEY_TYPE
    1004             : #undef SH_KEY
    1005             : #undef SH_ELEMENT_TYPE
    1006             : #undef SH_HASH_KEY
    1007             : #undef SH_SCOPE
    1008             : #undef SH_DECLARE
    1009             : #undef SH_DEFINE
    1010             : #undef SH_GET_HASH
    1011             : #undef SH_STORE_HASH
    1012             : #undef SH_USE_NONDEFAULT_ALLOCATOR
    1013             : #undef SH_EQUAL
    1014             : 
    1015             : /* undefine locally declared macros */
    1016             : #undef SH_MAKE_PREFIX
    1017             : #undef SH_MAKE_NAME
    1018             : #undef SH_MAKE_NAME_
    1019             : #undef SH_FILLFACTOR
    1020             : #undef SH_MAX_FILLFACTOR
    1021             : #undef SH_GROW_MAX_DIB
    1022             : #undef SH_GROW_MAX_MOVE
    1023             : #undef SH_GROW_MIN_FILLFACTOR
    1024             : #undef SH_MAX_SIZE
    1025             : 
    1026             : /* types */
    1027             : #undef SH_TYPE
    1028             : #undef SH_STATUS
    1029             : #undef SH_STATUS_EMPTY
    1030             : #undef SH_STATUS_IN_USE
    1031             : #undef SH_ITERATOR
    1032             : 
    1033             : /* external function names */
    1034             : #undef SH_CREATE
    1035             : #undef SH_DESTROY
    1036             : #undef SH_RESET
    1037             : #undef SH_INSERT
    1038             : #undef SH_INSERT_HASH
    1039             : #undef SH_DELETE
    1040             : #undef SH_LOOKUP
    1041             : #undef SH_LOOKUP_HASH
    1042             : #undef SH_GROW
    1043             : #undef SH_START_ITERATE
    1044             : #undef SH_START_ITERATE_AT
    1045             : #undef SH_ITERATE
    1046             : #undef SH_ALLOCATE
    1047             : #undef SH_FREE
    1048             : #undef SH_STAT
    1049             : 
    1050             : /* internal function names */
    1051             : #undef SH_COMPUTE_PARAMETERS
    1052             : #undef SH_COMPARE_KEYS
    1053             : #undef SH_INITIAL_BUCKET
    1054             : #undef SH_NEXT
    1055             : #undef SH_PREV
    1056             : #undef SH_DISTANCE_FROM_OPTIMAL
    1057             : #undef SH_ENTRY_HASH
    1058             : #undef SH_INSERT_HASH_INTERNAL
    1059             : #undef SH_LOOKUP_HASH_INTERNAL

Generated by: LCOV version 1.13