LCOV - code coverage report
Current view: top level - src/include/lib - simplehash.h (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 201 242 83.1 %
Date: 2019-06-19 16:07:09 Functions: 32 36 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * simplehash.h
       3             :  *
       4             :  *    Hash table implementation which will be specialized to user-defined
       5             :  *    types, by including this file to generate the required code.  It's
       6             :  *    probably not worthwhile to do so for hash tables that aren't performance
       7             :  *    or space sensitive.
       8             :  *
       9             :  * Usage notes:
      10             :  *
      11             :  *    To generate a hash-table and associated functions for a use case several
      12             :  *    macros have to be #define'ed before this file is included.  Including
      13             :  *    the file #undef's all those, so a new hash table can be generated
      14             :  *    afterwards.
      15             :  *    The relevant parameters are:
      16             :  *    - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
      17             :  *      will result in hash table type 'foo_hash' and functions like
      18             :  *      'foo_insert'/'foo_lookup' and so forth.
      19             :  *    - SH_ELEMENT_TYPE - type of the contained elements
      20             :  *    - SH_KEY_TYPE - type of the hashtable's key
      21             :  *    - SH_DECLARE - if defined function prototypes and type declarations are
      22             :  *      generated
      23             :  *    - SH_DEFINE - if defined function definitions are generated
      24             :  *    - SH_SCOPE - in which scope (e.g. extern, static inline) do function
      25             :  *      declarations reside
      26             :  *    - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
      27             :  *      are defined, so you can supply your own
      28             :  *    The following parameters are only relevant when SH_DEFINE is defined:
      29             :  *    - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
      30             :  *    - SH_EQUAL(table, a, b) - compare two table keys
      31             :  *    - SH_HASH_KEY(table, key) - generate hash for the key
      32             :  *    - SH_STORE_HASH - if defined the hash is stored in the elements
      33             :  *    - SH_GET_HASH(tb, a) - return the field to store the hash in
      34             :  *
      35             :  *    For examples of usage look at tidbitmap.c (file local definition) and
      36             :  *    execnodes.h/execGrouping.c (exposed declaration, file local
      37             :  *    implementation).
      38             :  *
      39             :  * Hash table design:
      40             :  *
      41             :  *    The hash table design chosen is a variant of linear open-addressing. The
      42             :  *    reason for doing so is that linear addressing is CPU cache & pipeline
      43             :  *    friendly. The biggest disadvantage of simple linear addressing schemes
      44             :  *    are highly variable lookup times due to clustering, and deletions
      45             :  *    leaving a lot of tombstones around.  To address these issues a variant
      46             :  *    of "robin hood" hashing is employed.  Robin hood hashing optimizes
      47             :  *    chaining lengths by moving elements close to their optimal bucket
      48             :  *    ("rich" elements), out of the way if a to-be-inserted element is further
      49             :  *    away from its optimal position (i.e. it's "poor").  While that can make
      50             :  *    insertions slower, the average lookup performance is a lot better, and
      51             :  *    higher fill factors can be used in a still performant manner.  To avoid
      52             :  *    tombstones - which normally solve the issue that a deleted node's
      53             :  *    presence is relevant to determine whether a lookup needs to continue
      54             :  *    looking or is done - buckets following a deleted element are shifted
      55             :  *    backwards, unless they're empty or already at their optimal position.
      56             :  */
      57             : 
      58             : /* helpers */
      59             : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
      60             : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
      61             : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
      62             : 
      63             : /* name macros for: */
      64             : 
      65             : /* type declarations */
      66             : #define SH_TYPE SH_MAKE_NAME(hash)
      67             : #define SH_STATUS SH_MAKE_NAME(status)
      68             : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
      69             : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
      70             : #define SH_ITERATOR SH_MAKE_NAME(iterator)
      71             : 
      72             : /* function declarations */
      73             : #define SH_CREATE SH_MAKE_NAME(create)
      74             : #define SH_DESTROY SH_MAKE_NAME(destroy)
      75             : #define SH_RESET SH_MAKE_NAME(reset)
      76             : #define SH_INSERT SH_MAKE_NAME(insert)
      77             : #define SH_DELETE SH_MAKE_NAME(delete)
      78             : #define SH_LOOKUP SH_MAKE_NAME(lookup)
      79             : #define SH_GROW SH_MAKE_NAME(grow)
      80             : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
      81             : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
      82             : #define SH_ITERATE SH_MAKE_NAME(iterate)
      83             : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
      84             : #define SH_FREE SH_MAKE_NAME(free)
      85             : #define SH_STAT SH_MAKE_NAME(stat)
      86             : 
      87             : /* internal helper functions (no externally visible prototypes) */
      88             : #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
      89             : #define SH_NEXT SH_MAKE_NAME(next)
      90             : #define SH_PREV SH_MAKE_NAME(prev)
      91             : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
      92             : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
      93             : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
      94             : 
      95             : /* generate forward declarations necessary to use the hash table */
      96             : #ifdef SH_DECLARE
      97             : 
      98             : /* type definitions */
      99             : typedef struct SH_TYPE
     100             : {
     101             :     /*
     102             :      * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
     103             :      * tables.  Note that the maximum number of elements is lower
     104             :      * (SH_MAX_FILLFACTOR)
     105             :      */
     106             :     uint64      size;
     107             : 
     108             :     /* how many elements have valid contents */
     109             :     uint32      members;
     110             : 
     111             :     /* mask for bucket and size calculations, based on size */
     112             :     uint32      sizemask;
     113             : 
     114             :     /* boundary after which to grow hashtable */
     115             :     uint32      grow_threshold;
     116             : 
     117             :     /* hash buckets */
     118             :     SH_ELEMENT_TYPE *data;
     119             : 
     120             :     /* memory context to use for allocations */
     121             :     MemoryContext ctx;
     122             : 
     123             :     /* user defined data, useful for callbacks */
     124             :     void       *private_data;
     125             : }           SH_TYPE;
     126             : 
     127             : typedef enum SH_STATUS
     128             : {
     129             :     SH_STATUS_EMPTY = 0x00,
     130             :     SH_STATUS_IN_USE = 0x01
     131             : } SH_STATUS;
     132             : 
     133             : typedef struct SH_ITERATOR
     134             : {
     135             :     uint32      cur;            /* current element */
     136             :     uint32      end;
     137             :     bool        done;           /* iterator exhausted? */
     138             : }           SH_ITERATOR;
     139             : 
     140             : /* externally visible function prototypes */
     141             : SH_SCOPE    SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
     142             :                                void *private_data);
     143             : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
     144             : SH_SCOPE void SH_RESET(SH_TYPE * tb);
     145             : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
     146             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
     147             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
     148             : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
     149             : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     150             : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
     151             : SH_SCOPE    SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     152             : SH_SCOPE void SH_STAT(SH_TYPE * tb);
     153             : 
     154             : #endif                          /* SH_DECLARE */
     155             : 
     156             : 
     157             : /* generate implementation of the hash table */
     158             : #ifdef SH_DEFINE
     159             : 
     160             : #include "utils/memutils.h"
     161             : 
     162             : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
     163             : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
     164             : 
     165             : /* normal fillfactor, unless already close to maximum */
     166             : #ifndef SH_FILLFACTOR
     167             : #define SH_FILLFACTOR (0.9)
     168             : #endif
     169             : /* increase fillfactor if we otherwise would error out */
     170             : #define SH_MAX_FILLFACTOR (0.98)
     171             : /* grow if actual and optimal location bigger than */
     172             : #ifndef SH_GROW_MAX_DIB
     173             : #define SH_GROW_MAX_DIB 25
     174             : #endif
     175             : /* grow if more than elements to move when inserting */
     176             : #ifndef SH_GROW_MAX_MOVE
     177             : #define SH_GROW_MAX_MOVE 150
     178             : #endif
     179             : #ifndef SH_GROW_MIN_FILLFACTOR
     180             : /* but do not grow due to SH_GROW_MAX_* if below */
     181             : #define SH_GROW_MIN_FILLFACTOR 0.1
     182             : #endif
     183             : 
     184             : #ifdef SH_STORE_HASH
     185             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
     186             : #else
     187             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
     188             : #endif
     189             : 
     190             : /*
     191             :  * Wrap the following definitions in include guards, to avoid multiple
     192             :  * definition errors if this header is included more than once.  The rest of
     193             :  * the file deliberately has no include guards, because it can be included
     194             :  * with different parameters to define functions and types with non-colliding
     195             :  * names.
     196             :  */
     197             : #ifndef SIMPLEHASH_H
     198             : #define SIMPLEHASH_H
     199             : 
     200             : /* FIXME: can we move these to a central location? */
     201             : 
     202             : /* calculate ceil(log base 2) of num */
     203             : static inline uint64
     204        7452 : sh_log2(uint64 num)
     205             : {
     206             :     int         i;
     207             :     uint64      limit;
     208             : 
     209        7452 :     for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
     210             :         ;
     211        7452 :     return i;
     212             : }
     213             : 
     214             : /* calculate first power of 2 >= num */
     215             : static inline uint64
     216        7452 : sh_pow2(uint64 num)
     217             : {
     218        7452 :     return ((uint64) 1) << sh_log2(num);
     219             : }
     220             : 
     221             : #endif
     222             : 
     223             : /*
     224             :  * Compute sizing parameters for hashtable. Called when creating and growing
     225             :  * the hashtable.
     226             :  */
     227             : static inline void
     228        7452 : SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
     229             : {
     230             :     uint64      size;
     231             : 
     232             :     /* supporting zero sized hashes would complicate matters */
     233        7452 :     size = Max(newsize, 2);
     234             : 
     235             :     /* round up size to the next power of 2, that's how bucketing works */
     236        7452 :     size = sh_pow2(size);
     237             :     Assert(size <= SH_MAX_SIZE);
     238             : 
     239             :     /*
     240             :      * Verify that allocation of ->data is possible on this platform, without
     241             :      * overflowing Size.
     242             :      */
     243        7452 :     if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
     244           0 :         elog(ERROR, "hash table too large");
     245             : 
     246             :     /* now set size */
     247        7452 :     tb->size = size;
     248             : 
     249        7452 :     if (tb->size == SH_MAX_SIZE)
     250           0 :         tb->sizemask = 0;
     251             :     else
     252        7452 :         tb->sizemask = tb->size - 1;
     253             : 
     254             :     /*
     255             :      * Compute the next threshold at which we need to grow the hash table
     256             :      * again.
     257             :      */
     258        7452 :     if (tb->size == SH_MAX_SIZE)
     259           0 :         tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
     260             :     else
     261        7452 :         tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
     262        7452 : }
     263             : 
     264             : /* return the optimal bucket for the hash */
     265             : static inline uint32
     266     8833132 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
     267             : {
     268     8833132 :     return hash & tb->sizemask;
     269             : }
     270             : 
     271             : /* return next bucket after the current, handling wraparound */
     272             : static inline uint32
     273     3729702 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     274             : {
     275     3729702 :     curelem = (curelem + 1) & tb->sizemask;
     276             : 
     277             :     Assert(curelem != startelem);
     278             : 
     279     3729702 :     return curelem;
     280             : }
     281             : 
     282             : /* return bucket before the current, handling wraparound */
     283             : static inline uint32
     284      443104 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     285             : {
     286      443104 :     curelem = (curelem - 1) & tb->sizemask;
     287             : 
     288             :     Assert(curelem != startelem);
     289             : 
     290      443104 :     return curelem;
     291             : }
     292             : 
     293             : /* return distance between bucket and its optimal position */
     294             : static inline uint32
     295     2359594 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
     296             : {
     297     2359594 :     if (optimal <= bucket)
     298     2349140 :         return bucket - optimal;
     299             :     else
     300       10454 :         return (tb->size + bucket) - optimal;
     301             : }
     302             : 
     303             : static inline uint32
     304     2495484 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     305             : {
     306             : #ifdef SH_STORE_HASH
     307     1319292 :     return SH_GET_HASH(tb, entry);
     308             : #else
     309     1176192 :     return SH_HASH_KEY(tb, entry->SH_KEY);
     310             : #endif
     311             : }
     312             : 
     313             : /* default memory allocator function */
     314             : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
     315             : static inline void SH_FREE(SH_TYPE * type, void *pointer);
     316             : 
     317             : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
     318             : 
     319             : /* default memory allocator function */
     320             : static inline void *
     321        4566 : SH_ALLOCATE(SH_TYPE * type, Size size)
     322             : {
     323        4566 :     return MemoryContextAllocExtended(type->ctx, size,
     324             :                                       MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
     325             : }
     326             : 
     327             : /* default memory free function */
     328             : static inline void
     329         698 : SH_FREE(SH_TYPE * type, void *pointer)
     330             : {
     331         698 :     pfree(pointer);
     332         698 : }
     333             : 
     334             : #endif
     335             : 
     336             : /*
     337             :  * Create a hash table with enough space for `nelements` distinct members.
     338             :  * Memory for the hash table is allocated from the passed-in context.  If
     339             :  * desired, the array of elements can be allocated using a passed-in allocator;
     340             :  * this could be useful in order to place the array of elements in a shared
     341             :  * memory, or in a context that will outlive the rest of the hash table.
     342             :  * Memory other than for the array of elements will still be allocated from
     343             :  * the passed-in context.
     344             :  */
     345             : SH_SCOPE    SH_TYPE *
     346        6650 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
     347             : {
     348             :     SH_TYPE    *tb;
     349             :     uint64      size;
     350             : 
     351        6650 :     tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
     352        6650 :     tb->ctx = ctx;
     353        6650 :     tb->private_data = private_data;
     354             : 
     355             :     /* increase nelements by fillfactor, want to store nelements elements */
     356        6650 :     size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
     357             : 
     358        6650 :     SH_COMPUTE_PARAMETERS(tb, size);
     359             : 
     360        6650 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     361             : 
     362        6650 :     return tb;
     363             : }
     364             : 
     365             : /* destroy a previously created hash table */
     366             : SH_SCOPE void
     367        2782 : SH_DESTROY(SH_TYPE * tb)
     368             : {
     369        2782 :     SH_FREE(tb, tb->data);
     370        2782 :     pfree(tb);
     371        2782 : }
     372             : 
     373             : /* reset the contents of a previously created hash table */
     374             : SH_SCOPE void
     375       36308 : SH_RESET(SH_TYPE * tb)
     376             : {
     377       36308 :     memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
     378       36308 :     tb->members = 0;
     379       36308 : }
     380             : 
     381             : /*
     382             :  * Grow a hash table to at least `newsize` buckets.
     383             :  *
     384             :  * Usually this will automatically be called by insertions/deletions, when
     385             :  * necessary. But resizing to the exact input size can be advantageous
     386             :  * performance-wise, when known at some point.
     387             :  */
     388             : SH_SCOPE void
     389         802 : SH_GROW(SH_TYPE * tb, uint32 newsize)
     390             : {
     391         802 :     uint64      oldsize = tb->size;
     392         802 :     SH_ELEMENT_TYPE *olddata = tb->data;
     393             :     SH_ELEMENT_TYPE *newdata;
     394             :     uint32      i;
     395         802 :     uint32      startelem = 0;
     396             :     uint32      copyelem;
     397             : 
     398             :     Assert(oldsize == sh_pow2(oldsize));
     399             :     Assert(oldsize != SH_MAX_SIZE);
     400             :     Assert(oldsize < newsize);
     401             : 
     402             :     /* compute parameters for new table */
     403         802 :     SH_COMPUTE_PARAMETERS(tb, newsize);
     404             : 
     405         802 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     406             : 
     407         802 :     newdata = tb->data;
     408             : 
     409             :     /*
     410             :      * Copy entries from the old data to newdata. We theoretically could use
     411             :      * SH_INSERT here, to avoid code duplication, but that's more general than
     412             :      * we need. We neither want tb->members increased, nor do we need to do
     413             :      * deal with deleted elements, nor do we need to compare keys. So a
     414             :      * special-cased implementation is lot faster. As resizing can be time
     415             :      * consuming and frequent, that's worthwhile to optimize.
     416             :      *
     417             :      * To be able to simply move entries over, we have to start not at the
     418             :      * first bucket (i.e olddata[0]), but find the first bucket that's either
     419             :      * empty, or is occupied by an entry at its optimal position. Such a
     420             :      * bucket has to exist in any table with a load factor under 1, as not all
     421             :      * buckets are occupied, i.e. there always has to be an empty bucket.  By
     422             :      * starting at such a bucket we can move the entries to the larger table,
     423             :      * without having to deal with conflicts.
     424             :      */
     425             : 
     426             :     /* search for the first element in the hash that's not wrapped around */
     427        5478 :     for (i = 0; i < oldsize; i++)
     428             :     {
     429        5478 :         SH_ELEMENT_TYPE *oldentry = &olddata[i];
     430             :         uint32      hash;
     431             :         uint32      optimal;
     432             : 
     433        5478 :         if (oldentry->status != SH_STATUS_IN_USE)
     434             :         {
     435         472 :             startelem = i;
     436         472 :             break;
     437             :         }
     438             : 
     439        5006 :         hash = SH_ENTRY_HASH(tb, oldentry);
     440        5006 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     441             : 
     442        5006 :         if (optimal == i)
     443             :         {
     444         330 :             startelem = i;
     445         330 :             break;
     446             :         }
     447             :     }
     448             : 
     449             :     /* and copy all elements in the old table */
     450         802 :     copyelem = startelem;
     451      148378 :     for (i = 0; i < oldsize; i++)
     452             :     {
     453      147576 :         SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
     454             : 
     455      147576 :         if (oldentry->status == SH_STATUS_IN_USE)
     456             :         {
     457             :             uint32      hash;
     458             :             uint32      startelem;
     459             :             uint32      curelem;
     460             :             SH_ELEMENT_TYPE *newentry;
     461             : 
     462      130740 :             hash = SH_ENTRY_HASH(tb, oldentry);
     463      130740 :             startelem = SH_INITIAL_BUCKET(tb, hash);
     464      130740 :             curelem = startelem;
     465             : 
     466             :             /* find empty element to put data into */
     467             :             while (true)
     468             :             {
     469      231720 :                 newentry = &newdata[curelem];
     470             : 
     471      181230 :                 if (newentry->status == SH_STATUS_EMPTY)
     472             :                 {
     473      130740 :                     break;
     474             :                 }
     475             : 
     476       50490 :                 curelem = SH_NEXT(tb, curelem, startelem);
     477             :             }
     478             : 
     479             :             /* copy entry to new slot */
     480      130740 :             memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
     481             :         }
     482             : 
     483             :         /* can't use SH_NEXT here, would use new size */
     484      147576 :         copyelem++;
     485      147576 :         if (copyelem >= oldsize)
     486             :         {
     487         802 :             copyelem = 0;
     488             :         }
     489             :     }
     490             : 
     491         802 :     SH_FREE(tb, olddata);
     492         802 : }
     493             : 
     494             : /*
     495             :  * Insert the key key into the hash-table, set *found to true if the key
     496             :  * already exists, false otherwise. Returns the hash-table entry in either
     497             :  * case.
     498             :  */
     499             : SH_SCOPE    SH_ELEMENT_TYPE *
     500     5607102 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
     501             : {
     502     5607102 :     uint32      hash = SH_HASH_KEY(tb, key);
     503             :     uint32      startelem;
     504             :     uint32      curelem;
     505             :     SH_ELEMENT_TYPE *data;
     506             :     uint32      insertdist;
     507             : 
     508             : restart:
     509     5607114 :     insertdist = 0;
     510             : 
     511             :     /*
     512             :      * We do the grow check even if the key is actually present, to avoid
     513             :      * doing the check inside the loop. This also lets us avoid having to
     514             :      * re-find our position in the hashtable after resizing.
     515             :      *
     516             :      * Note that this also reached when resizing the table due to
     517             :      * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
     518             :      */
     519     5607114 :     if (unlikely(tb->members >= tb->grow_threshold))
     520             :     {
     521         802 :         if (tb->size == SH_MAX_SIZE)
     522             :         {
     523           0 :             elog(ERROR, "hash table size exceeded");
     524             :         }
     525             : 
     526             :         /*
     527             :          * When optimizing, it can be very useful to print these out.
     528             :          */
     529             :         /* SH_STAT(tb); */
     530         802 :         SH_GROW(tb, tb->size * 2);
     531             :         /* SH_STAT(tb); */
     532             :     }
     533             : 
     534             :     /* perform insert, start bucket search at optimal location */
     535     5607114 :     data = tb->data;
     536     5607114 :     startelem = SH_INITIAL_BUCKET(tb, hash);
     537     5607114 :     curelem = startelem;
     538             :     while (true)
     539     2293958 :     {
     540             :         uint32      curdist;
     541             :         uint32      curhash;
     542             :         uint32      curoptimal;
     543     7901072 :         SH_ELEMENT_TYPE *entry = &data[curelem];
     544             : 
     545             :         /* any empty bucket can directly be used */
     546     7901072 :         if (entry->status == SH_STATUS_EMPTY)
     547             :         {
     548      343686 :             tb->members++;
     549      343686 :             entry->SH_KEY = key;
     550             : #ifdef SH_STORE_HASH
     551      289786 :             SH_GET_HASH(tb, entry) = hash;
     552             : #endif
     553      343686 :             entry->status = SH_STATUS_IN_USE;
     554      343686 :             *found = false;
     555      343686 :             return entry;
     556             :         }
     557             : 
     558             :         /*
     559             :          * If the bucket is not empty, we either found a match (in which case
     560             :          * we're done), or we have to decide whether to skip over or move the
     561             :          * colliding entry. When the colliding element's distance to its
     562             :          * optimal position is smaller than the to-be-inserted entry's, we
     563             :          * shift the colliding entry (and its followers) forward by one.
     564             :          */
     565             : 
     566     7557386 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     567             :         {
     568             :             Assert(entry->status == SH_STATUS_IN_USE);
     569     5197792 :             *found = true;
     570     5197792 :             return entry;
     571             :         }
     572             : 
     573     2359594 :         curhash = SH_ENTRY_HASH(tb, entry);
     574     2359594 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     575     2359594 :         curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
     576             : 
     577     2359594 :         if (insertdist > curdist)
     578             :         {
     579       65636 :             SH_ELEMENT_TYPE *lastentry = entry;
     580       65636 :             uint32      emptyelem = curelem;
     581             :             uint32      moveelem;
     582       65636 :             int32       emptydist = 0;
     583             : 
     584             :             /* find next empty bucket */
     585             :             while (true)
     586      379884 :             {
     587             :                 SH_ELEMENT_TYPE *emptyentry;
     588             : 
     589      445520 :                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
     590      445520 :                 emptyentry = &data[emptyelem];
     591             : 
     592      445520 :                 if (emptyentry->status == SH_STATUS_EMPTY)
     593             :                 {
     594       65620 :                     lastentry = emptyentry;
     595       65620 :                     break;
     596             :                 }
     597             : 
     598             :                 /*
     599             :                  * To avoid negative consequences from overly imbalanced
     600             :                  * hashtables, grow the hashtable if collisions would require
     601             :                  * us to move a lot of entries.  The most likely cause of such
     602             :                  * imbalance is filling a (currently) small table, from a
     603             :                  * currently big one, in hash-table order.  Don't grow if the
     604             :                  * hashtable would be too empty, to prevent quick space
     605             :                  * explosion for some weird edge cases.
     606             :                  */
     607      379916 :                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
     608          16 :                     ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     609             :                 {
     610          16 :                     tb->grow_threshold = 0;
     611          16 :                     goto restart;
     612             :                 }
     613             :             }
     614             : 
     615             :             /* shift forward, starting at last occupied element */
     616             : 
     617             :             /*
     618             :              * TODO: This could be optimized to be one memcpy in may cases,
     619             :              * excepting wrapping around at the end of ->data. Hasn't shown up
     620             :              * in profiles so far though.
     621             :              */
     622       65620 :             moveelem = emptyelem;
     623      574344 :             while (moveelem != curelem)
     624             :             {
     625             :                 SH_ELEMENT_TYPE *moveentry;
     626             : 
     627      443104 :                 moveelem = SH_PREV(tb, moveelem, startelem);
     628      443104 :                 moveentry = &data[moveelem];
     629             : 
     630      443104 :                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
     631      443104 :                 lastentry = moveentry;
     632             :             }
     633             : 
     634             :             /* and fill the now empty spot */
     635       65620 :             tb->members++;
     636             : 
     637       65620 :             entry->SH_KEY = key;
     638             : #ifdef SH_STORE_HASH
     639       53138 :             SH_GET_HASH(tb, entry) = hash;
     640             : #endif
     641       65620 :             entry->status = SH_STATUS_IN_USE;
     642       65620 :             *found = false;
     643       65620 :             return entry;
     644             :         }
     645             : 
     646     2293958 :         curelem = SH_NEXT(tb, curelem, startelem);
     647     2293958 :         insertdist++;
     648             : 
     649             :         /*
     650             :          * To avoid negative consequences from overly imbalanced hashtables,
     651             :          * grow the hashtable if collisions lead to large runs. The most
     652             :          * likely cause of such imbalance is filling a (currently) small
     653             :          * table, from a currently big one, in hash-table order.  Don't grow
     654             :          * if the hashtable would be too empty, to prevent quick space
     655             :          * explosion for some weird edge cases.
     656             :          */
     657     2293958 :         if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
     658           0 :             ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
     659             :         {
     660           0 :             tb->grow_threshold = 0;
     661           0 :             goto restart;
     662             :         }
     663             :     }
     664             : }
     665             : 
     666             : /*
     667             :  * Lookup up entry in hash table.  Returns NULL if key not present.
     668             :  */
     669             : SH_SCOPE    SH_ELEMENT_TYPE *
     670      646354 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
     671             : {
     672      646354 :     uint32      hash = SH_HASH_KEY(tb, key);
     673      646354 :     const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
     674      646354 :     uint32      curelem = startelem;
     675             : 
     676             :     while (true)
     677      921958 :     {
     678     1568312 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     679             : 
     680     1568312 :         if (entry->status == SH_STATUS_EMPTY)
     681             :         {
     682      487690 :             return NULL;
     683             :         }
     684             : 
     685             :         Assert(entry->status == SH_STATUS_IN_USE);
     686             : 
     687     1080622 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     688      158664 :             return entry;
     689             : 
     690             :         /*
     691             :          * TODO: we could stop search based on distance. If the current
     692             :          * buckets's distance-from-optimal is smaller than what we've skipped
     693             :          * already, the entry doesn't exist. Probably only do so if
     694             :          * SH_STORE_HASH is defined, to avoid re-computing hashes?
     695             :          */
     696             : 
     697      921958 :         curelem = SH_NEXT(tb, curelem, startelem);
     698             :     }
     699             : }
     700             : 
     701             : /*
     702             :  * Delete entry from hash table.  Returns whether to-be-deleted key was
     703             :  * present.
     704             :  */
     705             : SH_SCOPE bool
     706       84180 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
     707             : {
     708       84180 :     uint32      hash = SH_HASH_KEY(tb, key);
     709       84180 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     710       84180 :     uint32      curelem = startelem;
     711             : 
     712             :     while (true)
     713        6536 :     {
     714       90716 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     715             : 
     716       90716 :         if (entry->status == SH_STATUS_EMPTY)
     717       73020 :             return false;
     718             : 
     719       35392 :         if (entry->status == SH_STATUS_IN_USE &&
     720       17696 :             SH_COMPARE_KEYS(tb, hash, key, entry))
     721             :         {
     722       11160 :             SH_ELEMENT_TYPE *lastentry = entry;
     723             : 
     724       11160 :             tb->members--;
     725             : 
     726             :             /*
     727             :              * Backward shift following elements till either an empty element
     728             :              * or an element at its optimal position is encountered.
     729             :              *
     730             :              * While that sounds expensive, the average chain length is short,
     731             :              * and deletions would otherwise require tombstones.
     732             :              */
     733             :             while (true)
     734          80 :             {
     735             :                 SH_ELEMENT_TYPE *curentry;
     736             :                 uint32      curhash;
     737             :                 uint32      curoptimal;
     738             : 
     739       11240 :                 curelem = SH_NEXT(tb, curelem, startelem);
     740       11240 :                 curentry = &tb->data[curelem];
     741             : 
     742       11240 :                 if (curentry->status != SH_STATUS_IN_USE)
     743             :                 {
     744       11096 :                     lastentry->status = SH_STATUS_EMPTY;
     745       11096 :                     break;
     746             :                 }
     747             : 
     748         144 :                 curhash = SH_ENTRY_HASH(tb, curentry);
     749         144 :                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     750             : 
     751             :                 /* current is at optimal position, done */
     752         144 :                 if (curoptimal == curelem)
     753             :                 {
     754          64 :                     lastentry->status = SH_STATUS_EMPTY;
     755          64 :                     break;
     756             :                 }
     757             : 
     758             :                 /* shift */
     759          80 :                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     760             : 
     761          80 :                 lastentry = curentry;
     762             :             }
     763             : 
     764       11160 :             return true;
     765             :         }
     766             : 
     767             :         /* TODO: return false; if distance too big */
     768             : 
     769        6536 :         curelem = SH_NEXT(tb, curelem, startelem);
     770             :     }
     771             : }
     772             : 
     773             : /*
     774             :  * Initialize iterator.
     775             :  */
     776             : SH_SCOPE void
     777       41836 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     778             : {
     779             :     int         i;
     780       41836 :     uint64      startelem = PG_UINT64_MAX;
     781             : 
     782             :     /*
     783             :      * Search for the first empty element. As deletions during iterations are
     784             :      * supported, we want to start/end at an element that cannot be affected
     785             :      * by elements being shifted.
     786             :      */
     787       46528 :     for (i = 0; i < tb->size; i++)
     788             :     {
     789       46528 :         SH_ELEMENT_TYPE *entry = &tb->data[i];
     790             : 
     791       46528 :         if (entry->status != SH_STATUS_IN_USE)
     792             :         {
     793       41836 :             startelem = i;
     794       41836 :             break;
     795             :         }
     796             :     }
     797             : 
     798             :     Assert(startelem < SH_MAX_SIZE);
     799             : 
     800             :     /*
     801             :      * Iterate backwards, that allows the current element to be deleted, even
     802             :      * if there are backward shifts
     803             :      */
     804       41836 :     iter->cur = startelem;
     805       41836 :     iter->end = iter->cur;
     806       41836 :     iter->done = false;
     807       41836 : }
     808             : 
     809             : /*
     810             :  * Initialize iterator to a specific bucket. That's really only useful for
     811             :  * cases where callers are partially iterating over the hashspace, and that
     812             :  * iteration deletes and inserts elements based on visited entries. Doing that
     813             :  * repeatedly could lead to an unbalanced keyspace when always starting at the
     814             :  * same position.
     815             :  */
     816             : SH_SCOPE void
     817          16 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
     818             : {
     819             :     /*
     820             :      * Iterate backwards, that allows the current element to be deleted, even
     821             :      * if there are backward shifts.
     822             :      */
     823          16 :     iter->cur = at & tb->sizemask;    /* ensure at is within a valid range */
     824          16 :     iter->end = iter->cur;
     825          16 :     iter->done = false;
     826          16 : }
     827             : 
     828             : /*
     829             :  * Iterate over all entries in the hash-table. Return the next occupied entry,
     830             :  * or NULL if done.
     831             :  *
     832             :  * During iteration the current entry in the hash table may be deleted,
     833             :  * without leading to elements being skipped or returned twice.  Additionally
     834             :  * the rest of the table may be modified (i.e. there can be insertions or
     835             :  * deletions), but if so, there's neither a guarantee that all nodes are
     836             :  * visited at least once, nor a guarantee that a node is visited at most once.
     837             :  */
     838             : SH_SCOPE    SH_ELEMENT_TYPE *
     839      401060 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     840             : {
     841     2781366 :     while (!iter->done)
     842             :     {
     843             :         SH_ELEMENT_TYPE *elem;
     844             : 
     845     2338540 :         elem = &tb->data[iter->cur];
     846             : 
     847             :         /* next element in backward direction */
     848     2338540 :         iter->cur = (iter->cur - 1) & tb->sizemask;
     849             : 
     850     2338540 :         if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
     851       41766 :             iter->done = true;
     852     2338540 :         if (elem->status == SH_STATUS_IN_USE)
     853             :         {
     854      359294 :             return elem;
     855             :         }
     856             :     }
     857             : 
     858       41766 :     return NULL;
     859             : }
     860             : 
     861             : /*
     862             :  * Report some statistics about the state of the hashtable. For
     863             :  * debugging/profiling purposes only.
     864             :  */
     865             : SH_SCOPE void
     866           0 : SH_STAT(SH_TYPE * tb)
     867             : {
     868           0 :     uint32      max_chain_length = 0;
     869           0 :     uint32      total_chain_length = 0;
     870             :     double      avg_chain_length;
     871             :     double      fillfactor;
     872             :     uint32      i;
     873             : 
     874           0 :     uint32     *collisions = palloc0(tb->size * sizeof(uint32));
     875           0 :     uint32      total_collisions = 0;
     876           0 :     uint32      max_collisions = 0;
     877             :     double      avg_collisions;
     878             : 
     879           0 :     for (i = 0; i < tb->size; i++)
     880             :     {
     881             :         uint32      hash;
     882             :         uint32      optimal;
     883             :         uint32      dist;
     884             :         SH_ELEMENT_TYPE *elem;
     885             : 
     886           0 :         elem = &tb->data[i];
     887             : 
     888           0 :         if (elem->status != SH_STATUS_IN_USE)
     889           0 :             continue;
     890             : 
     891           0 :         hash = SH_ENTRY_HASH(tb, elem);
     892           0 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     893           0 :         dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
     894             : 
     895           0 :         if (dist > max_chain_length)
     896           0 :             max_chain_length = dist;
     897           0 :         total_chain_length += dist;
     898             : 
     899           0 :         collisions[optimal]++;
     900             :     }
     901             : 
     902           0 :     for (i = 0; i < tb->size; i++)
     903             :     {
     904           0 :         uint32      curcoll = collisions[i];
     905             : 
     906           0 :         if (curcoll == 0)
     907           0 :             continue;
     908             : 
     909             :         /* single contained element is not a collision */
     910           0 :         curcoll--;
     911           0 :         total_collisions += curcoll;
     912           0 :         if (curcoll > max_collisions)
     913           0 :             max_collisions = curcoll;
     914             :     }
     915             : 
     916           0 :     if (tb->members > 0)
     917             :     {
     918           0 :         fillfactor = tb->members / ((double) tb->size);
     919           0 :         avg_chain_length = ((double) total_chain_length) / tb->members;
     920           0 :         avg_collisions = ((double) total_collisions) / tb->members;
     921             :     }
     922             :     else
     923             :     {
     924           0 :         fillfactor = 0;
     925           0 :         avg_chain_length = 0;
     926           0 :         avg_collisions = 0;
     927             :     }
     928             : 
     929           0 :     elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
     930             :          tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
     931             :          total_collisions, max_collisions, avg_collisions);
     932           0 : }
     933             : 
     934             : #endif                          /* SH_DEFINE */
     935             : 
     936             : 
     937             : /* undefine external parameters, so next hash table can be defined */
     938             : #undef SH_PREFIX
     939             : #undef SH_KEY_TYPE
     940             : #undef SH_KEY
     941             : #undef SH_ELEMENT_TYPE
     942             : #undef SH_HASH_KEY
     943             : #undef SH_SCOPE
     944             : #undef SH_DECLARE
     945             : #undef SH_DEFINE
     946             : #undef SH_GET_HASH
     947             : #undef SH_STORE_HASH
     948             : #undef SH_USE_NONDEFAULT_ALLOCATOR
     949             : #undef SH_EQUAL
     950             : 
     951             : /* undefine locally declared macros */
     952             : #undef SH_MAKE_PREFIX
     953             : #undef SH_MAKE_NAME
     954             : #undef SH_MAKE_NAME_
     955             : #undef SH_FILLFACTOR
     956             : #undef SH_MAX_FILLFACTOR
     957             : #undef SH_GROW_MAX_DIB
     958             : #undef SH_GROW_MAX_MOVE
     959             : #undef SH_GROW_MIN_FILLFACTOR
     960             : #undef SH_MAX_SIZE
     961             : 
     962             : /* types */
     963             : #undef SH_TYPE
     964             : #undef SH_STATUS
     965             : #undef SH_STATUS_EMPTY
     966             : #undef SH_STATUS_IN_USE
     967             : #undef SH_ITERATOR
     968             : 
     969             : /* external function names */
     970             : #undef SH_CREATE
     971             : #undef SH_DESTROY
     972             : #undef SH_RESET
     973             : #undef SH_INSERT
     974             : #undef SH_DELETE
     975             : #undef SH_LOOKUP
     976             : #undef SH_GROW
     977             : #undef SH_START_ITERATE
     978             : #undef SH_START_ITERATE_AT
     979             : #undef SH_ITERATE
     980             : #undef SH_ALLOCATE
     981             : #undef SH_FREE
     982             : #undef SH_STAT
     983             : 
     984             : /* internal function names */
     985             : #undef SH_COMPUTE_PARAMETERS
     986             : #undef SH_COMPARE_KEYS
     987             : #undef SH_INITIAL_BUCKET
     988             : #undef SH_NEXT
     989             : #undef SH_PREV
     990             : #undef SH_DISTANCE_FROM_OPTIMAL
     991             : #undef SH_ENTRY_HASH

Generated by: LCOV version 1.13