LCOV - code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 83.9 % 273 229
Test Date: 2026-03-04 23:14:33 Functions: 93.8 % 32 30
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * dshash.c
       4              :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5              :  *
       6              :  * This is an open hashing hash table, with a linked list at each table
       7              :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8              :  * lists from growing too long on average.  Currently, only growing is
       9              :  * supported: the hash table never becomes smaller.
      10              :  *
      11              :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12              :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13              :  * find and iterate operations normally only acquire one lock.  Therefore,
      14              :  * good concurrency is achieved whenever such operations don't collide at the
      15              :  * lock partition level.  However, when a resize operation begins, all
      16              :  * partition locks must be acquired simultaneously for a brief period.  This
      17              :  * is only expected to happen a small number of times until a stable size is
      18              :  * found, since growth is geometric.
      19              :  *
      20              :  * Future versions may support iterators and incremental resizing; for now
      21              :  * the implementation is minimalist.
      22              :  *
      23              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      24              :  * Portions Copyright (c) 1994, Regents of the University of California
      25              :  *
      26              :  * IDENTIFICATION
      27              :  *    src/backend/lib/dshash.c
      28              :  *
      29              :  *-------------------------------------------------------------------------
      30              :  */
      31              : 
      32              : #include "postgres.h"
      33              : 
      34              : #include <limits.h>
      35              : 
      36              : #include "common/hashfn.h"
      37              : #include "lib/dshash.h"
      38              : #include "storage/lwlock.h"
      39              : #include "utils/dsa.h"
      40              : 
      41              : /*
      42              :  * An item in the hash table.  This wraps the user's entry object in an
      43              :  * envelop that holds a pointer back to the bucket and a pointer to the next
      44              :  * item in the bucket.
      45              :  */
      46              : struct dshash_table_item
      47              : {
      48              :     /* The next item in the same bucket. */
      49              :     dsa_pointer next;
      50              :     /* The hashed key, to avoid having to recompute it. */
      51              :     dshash_hash hash;
      52              :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      53              : };
      54              : 
      55              : /*
      56              :  * The number of partitions for locking purposes.  This is set to match
      57              :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      58              :  * the buffer pool must be good enough for any other purpose.  This could
      59              :  * become a runtime parameter in future.
      60              :  */
      61              : #define DSHASH_NUM_PARTITIONS_LOG2 7
      62              : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      63              : 
      64              : /* A magic value used to identify our hash tables. */
      65              : #define DSHASH_MAGIC 0x75ff6a20
      66              : 
      67              : /*
      68              :  * Tracking information for each lock partition.  Initially, each partition
      69              :  * corresponds to one bucket, but each time the hash table grows, the buckets
      70              :  * covered by each partition split so the number of buckets covered doubles.
      71              :  *
      72              :  * We might want to add padding here so that each partition is on a different
      73              :  * cache line, but doing so would bloat this structure considerably.
      74              :  */
      75              : typedef struct dshash_partition
      76              : {
      77              :     LWLock      lock;           /* Protects all buckets in this partition. */
      78              :     size_t      count;          /* # of items in this partition's buckets */
      79              : } dshash_partition;
      80              : 
      81              : /*
      82              :  * The head object for a hash table.  This will be stored in dynamic shared
      83              :  * memory.
      84              :  */
      85              : typedef struct dshash_table_control
      86              : {
      87              :     dshash_table_handle handle;
      88              :     uint32      magic;
      89              :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      90              :     int         lwlock_tranche_id;
      91              : 
      92              :     /*
      93              :      * The following members are written to only when ALL partitions locks are
      94              :      * held.  They can be read when any one partition lock is held.
      95              :      */
      96              : 
      97              :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      98              :     size_t      size_log2;      /* log2(number of buckets) */
      99              :     dsa_pointer buckets;        /* current bucket array */
     100              : } dshash_table_control;
     101              : 
     102              : /*
     103              :  * Per-backend state for a dynamic hash table.
     104              :  */
     105              : struct dshash_table
     106              : {
     107              :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     108              :     dshash_parameters params;   /* Parameters. */
     109              :     void       *arg;            /* User-supplied data pointer. */
     110              :     dshash_table_control *control;  /* Control object in DSM. */
     111              :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     112              :     size_t      size_log2;      /* log2(number of buckets) */
     113              : };
     114              : 
     115              : /* Given a pointer to an item, find the entry (user data) it holds. */
     116              : #define ENTRY_FROM_ITEM(item) \
     117              :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     118              : 
     119              : /* Given a pointer to an entry, find the item that holds it. */
     120              : #define ITEM_FROM_ENTRY(entry)                                          \
     121              :     ((dshash_table_item *)((char *)(entry) -                            \
     122              :                              MAXALIGN(sizeof(dshash_table_item))))
     123              : 
     124              : /* How many resize operations (bucket splits) have there been? */
     125              : #define NUM_SPLITS(size_log2)                   \
     126              :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     127              : 
     128              : /* How many buckets are there in a given size? */
     129              : #define NUM_BUCKETS(size_log2)      \
     130              :     (((size_t) 1) << (size_log2))
     131              : 
     132              : /* How many buckets are there in each partition at a given size? */
     133              : #define BUCKETS_PER_PARTITION(size_log2)        \
     134              :     (((size_t) 1) << NUM_SPLITS(size_log2))
     135              : 
     136              : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     137              : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     138              :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     139              :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     140              : 
     141              : /* Choose partition based on the highest order bits of the hash. */
     142              : #define PARTITION_FOR_HASH(hash)                                        \
     143              :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     144              : 
     145              : /*
     146              :  * Find the bucket index for a given hash and table size.  Each time the table
     147              :  * doubles in size, the appropriate bucket for a given hash value doubles and
     148              :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     149              :  * are split.
     150              :  */
     151              : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     152              :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     153              : 
     154              : /* The index of the first bucket in a given partition. */
     155              : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     156              :     ((partition) << NUM_SPLITS(size_log2))
     157              : 
     158              : /* Choose partition based on bucket index. */
     159              : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     160              :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     161              : 
     162              : /* The head of the active bucket for a given hash value (lvalue). */
     163              : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     164              :     (hash_table->buckets[                                                \
     165              :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     166              :                                        hash_table->size_log2)])
     167              : 
     168              : static void delete_item(dshash_table *hash_table,
     169              :                         dshash_table_item *item);
     170              : static void resize(dshash_table *hash_table, size_t new_size_log2);
     171              : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     172              : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     173              :                                                 const void *key,
     174              :                                                 dsa_pointer item_pointer);
     175              : static void insert_item_into_bucket(dshash_table *hash_table,
     176              :                                     dsa_pointer item_pointer,
     177              :                                     dshash_table_item *item,
     178              :                                     dsa_pointer *bucket);
     179              : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     180              :                                              const void *key,
     181              :                                              dsa_pointer *bucket);
     182              : static bool delete_key_from_bucket(dshash_table *hash_table,
     183              :                                    const void *key,
     184              :                                    dsa_pointer *bucket_head);
     185              : static bool delete_item_from_bucket(dshash_table *hash_table,
     186              :                                     dshash_table_item *item,
     187              :                                     dsa_pointer *bucket_head);
     188              : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     189              : static inline bool equal_keys(dshash_table *hash_table,
     190              :                               const void *a, const void *b);
     191              : static inline void copy_key(dshash_table *hash_table, void *dest,
     192              :                             const void *src);
     193              : 
     194              : #define PARTITION_LOCK(hash_table, i)           \
     195              :     (&(hash_table)->control->partitions[(i)].lock)
     196              : 
     197              : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     198              :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     199              :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     200              : 
     201              : /*
     202              :  * Create a new hash table backed by the given dynamic shared area, with the
     203              :  * given parameters.  The returned object is allocated in backend-local memory
     204              :  * using the current MemoryContext.  'arg' will be passed through to the
     205              :  * compare, hash, and copy functions.
     206              :  */
     207              : dshash_table *
     208         1399 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     209              : {
     210              :     dshash_table *hash_table;
     211              :     dsa_pointer control;
     212              : 
     213              :     /* Allocate the backend-local object representing the hash table. */
     214         1399 :     hash_table = palloc_object(dshash_table);
     215              : 
     216              :     /* Allocate the control object in shared memory. */
     217         1399 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     218              : 
     219              :     /* Set up the local and shared hash table structs. */
     220         1399 :     hash_table->area = area;
     221         1399 :     hash_table->params = *params;
     222         1399 :     hash_table->arg = arg;
     223         1399 :     hash_table->control = dsa_get_address(area, control);
     224         1399 :     hash_table->control->handle = control;
     225         1399 :     hash_table->control->magic = DSHASH_MAGIC;
     226         1399 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     227              : 
     228              :     /* Set up the array of lock partitions. */
     229              :     {
     230         1399 :         dshash_partition *partitions = hash_table->control->partitions;
     231         1399 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     232              :         int         i;
     233              : 
     234       180471 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     235              :         {
     236       179072 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     237       179072 :             partitions[i].count = 0;
     238              :         }
     239              :     }
     240              : 
     241              :     /*
     242              :      * Set up the initial array of buckets.  Our initial size is the same as
     243              :      * the number of partitions.
     244              :      */
     245         1399 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     246         2798 :     hash_table->control->buckets =
     247         1399 :         dsa_allocate_extended(area,
     248              :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     249              :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     250         1399 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     251              :     {
     252            0 :         dsa_free(area, control);
     253            0 :         ereport(ERROR,
     254              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     255              :                  errmsg("out of memory"),
     256              :                  errdetail("Failed on DSA request of size %zu.",
     257              :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     258              :     }
     259         2798 :     hash_table->buckets = dsa_get_address(area,
     260         1399 :                                           hash_table->control->buckets);
     261         1399 :     hash_table->size_log2 = hash_table->control->size_log2;
     262              : 
     263         1399 :     return hash_table;
     264              : }
     265              : 
     266              : /*
     267              :  * Attach to an existing hash table using a handle.  The returned object is
     268              :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     269              :  * will be passed through to the compare and hash functions.
     270              :  */
     271              : dshash_table *
     272        26559 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     273              :               dshash_table_handle handle, void *arg)
     274              : {
     275              :     dshash_table *hash_table;
     276              :     dsa_pointer control;
     277              : 
     278              :     /* Allocate the backend-local object representing the hash table. */
     279        26559 :     hash_table = palloc_object(dshash_table);
     280              : 
     281              :     /* Find the control object in shared memory. */
     282        26559 :     control = handle;
     283              : 
     284              :     /* Set up the local hash table struct. */
     285        26559 :     hash_table->area = area;
     286        26559 :     hash_table->params = *params;
     287        26559 :     hash_table->arg = arg;
     288        26559 :     hash_table->control = dsa_get_address(area, control);
     289              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     290              : 
     291              :     /*
     292              :      * These will later be set to the correct values by
     293              :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     294              :      * partition lock for interlocking against concurrent resizing.
     295              :      */
     296        26559 :     hash_table->buckets = NULL;
     297        26559 :     hash_table->size_log2 = 0;
     298              : 
     299        26559 :     return hash_table;
     300              : }
     301              : 
     302              : /*
     303              :  * Detach from a hash table.  This frees backend-local resources associated
     304              :  * with the hash table, but the hash table will continue to exist until it is
     305              :  * either explicitly destroyed (by a backend that is still attached to it), or
     306              :  * the area that backs it is returned to the operating system.
     307              :  */
     308              : void
     309        27537 : dshash_detach(dshash_table *hash_table)
     310              : {
     311              :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     312              : 
     313              :     /* The hash table may have been destroyed.  Just free local memory. */
     314        27537 :     pfree(hash_table);
     315        27537 : }
     316              : 
     317              : /*
     318              :  * Destroy a hash table, returning all memory to the area.  The caller must be
     319              :  * certain that no other backend will attempt to access the hash table before
     320              :  * calling this function.  Other backend must explicitly call dshash_detach to
     321              :  * free up backend-local memory associated with the hash table.  The backend
     322              :  * that calls dshash_destroy must not call dshash_detach.
     323              :  */
     324              : void
     325            0 : dshash_destroy(dshash_table *hash_table)
     326              : {
     327              :     size_t      size;
     328              :     size_t      i;
     329              : 
     330              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     331            0 :     ensure_valid_bucket_pointers(hash_table);
     332              : 
     333              :     /* Free all the entries. */
     334            0 :     size = NUM_BUCKETS(hash_table->size_log2);
     335            0 :     for (i = 0; i < size; ++i)
     336              :     {
     337            0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     338              : 
     339            0 :         while (DsaPointerIsValid(item_pointer))
     340              :         {
     341              :             dshash_table_item *item;
     342              :             dsa_pointer next_item_pointer;
     343              : 
     344            0 :             item = dsa_get_address(hash_table->area, item_pointer);
     345            0 :             next_item_pointer = item->next;
     346            0 :             dsa_free(hash_table->area, item_pointer);
     347            0 :             item_pointer = next_item_pointer;
     348              :         }
     349              :     }
     350              : 
     351              :     /*
     352              :      * Vandalize the control block to help catch programming errors where
     353              :      * other backends access the memory formerly occupied by this hash table.
     354              :      */
     355            0 :     hash_table->control->magic = 0;
     356              : 
     357              :     /* Free the active table and control object. */
     358            0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     359            0 :     dsa_free(hash_table->area, hash_table->control->handle);
     360              : 
     361            0 :     pfree(hash_table);
     362            0 : }
     363              : 
     364              : /*
     365              :  * Get a handle that can be used by other processes to attach to this hash
     366              :  * table.
     367              :  */
     368              : dshash_table_handle
     369         1399 : dshash_get_hash_table_handle(dshash_table *hash_table)
     370              : {
     371              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     372              : 
     373         1399 :     return hash_table->control->handle;
     374              : }
     375              : 
     376              : /*
     377              :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     378              :  * found with the given key.  Returns NULL if the key is not found.  If a
     379              :  * non-NULL value is returned, the entry is locked and must be released by
     380              :  * calling dshash_release_lock.  If an error is raised before
     381              :  * dshash_release_lock is called, the lock will be released automatically, but
     382              :  * the caller must take care to ensure that the entry is not left corrupted.
     383              :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     384              :  *
     385              :  * The caller must not hold a lock already.
     386              :  *
     387              :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     388              :  * return from this function, and not resumed until dshash_release_lock is
     389              :  * called.  It is a very good idea for the caller to release the lock quickly.
     390              :  */
     391              : void *
     392      1249947 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     393              : {
     394              :     dshash_hash hash;
     395              :     size_t      partition;
     396              :     dshash_table_item *item;
     397              : 
     398      1249947 :     hash = hash_key(hash_table, key);
     399      1249947 :     partition = PARTITION_FOR_HASH(hash);
     400              : 
     401              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     402              :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     403              : 
     404      1249947 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     405      1249947 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     406      1249947 :     ensure_valid_bucket_pointers(hash_table);
     407              : 
     408              :     /* Search the active bucket. */
     409      1249947 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     410              : 
     411      1249947 :     if (!item)
     412              :     {
     413              :         /* Not found. */
     414       239711 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     415       239711 :         return NULL;
     416              :     }
     417              :     else
     418              :     {
     419              :         /* The caller will free the lock by calling dshash_release_lock. */
     420      1010236 :         return ENTRY_FROM_ITEM(item);
     421              :     }
     422              : }
     423              : 
     424              : /*
     425              :  * Returns a pointer to an exclusively locked item which must be released with
     426              :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     427              :  * to true and a pointer to the existing entry is returned.  If the key is not
     428              :  * found, 'found' is set to false, and a pointer to a newly created entry is
     429              :  * returned.
     430              :  *
     431              :  * Notes above dshash_find() regarding locking and error handling equally
     432              :  * apply here.
     433              :  */
     434              : void *
     435       341082 : dshash_find_or_insert(dshash_table *hash_table,
     436              :                       const void *key,
     437              :                       bool *found)
     438              : {
     439              :     dshash_hash hash;
     440              :     size_t      partition_index;
     441              :     dshash_partition *partition;
     442              :     dshash_table_item *item;
     443              : 
     444       341082 :     hash = hash_key(hash_table, key);
     445       341082 :     partition_index = PARTITION_FOR_HASH(hash);
     446       341082 :     partition = &hash_table->control->partitions[partition_index];
     447              : 
     448              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     449              :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     450              : 
     451       344413 : restart:
     452       344413 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     453              :                   LW_EXCLUSIVE);
     454       344413 :     ensure_valid_bucket_pointers(hash_table);
     455              : 
     456              :     /* Search the active bucket. */
     457       344413 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     458              : 
     459       344413 :     if (item)
     460          225 :         *found = true;
     461              :     else
     462              :     {
     463       344188 :         *found = false;
     464              : 
     465              :         /* Check if we are getting too full. */
     466       344188 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     467              :         {
     468              :             /*
     469              :              * The load factor (= keys / buckets) for all buckets protected by
     470              :              * this partition is > 0.75.  Presumably the same applies
     471              :              * generally across the whole hash table (though we don't attempt
     472              :              * to track that directly to avoid contention on some kind of
     473              :              * central counter; we just assume that this partition is
     474              :              * representative).  This is a good time to resize.
     475              :              *
     476              :              * Give up our existing lock first, because resizing needs to
     477              :              * reacquire all the locks in the right order to avoid deadlocks.
     478              :              */
     479         3331 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     480         3331 :             resize(hash_table, hash_table->size_log2 + 1);
     481              : 
     482         3331 :             goto restart;
     483              :         }
     484              : 
     485              :         /* Finally we can try to insert the new item. */
     486       340857 :         item = insert_into_bucket(hash_table, key,
     487       340857 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     488       340857 :         item->hash = hash;
     489              :         /* Adjust per-lock-partition counter for load factor knowledge. */
     490       340857 :         ++partition->count;
     491              :     }
     492              : 
     493              :     /* The caller must release the lock with dshash_release_lock. */
     494       341082 :     return ENTRY_FROM_ITEM(item);
     495              : }
     496              : 
     497              : /*
     498              :  * Remove an entry by key.  Returns true if the key was found and the
     499              :  * corresponding entry was removed.
     500              :  *
     501              :  * To delete an entry that you already have a pointer to, see
     502              :  * dshash_delete_entry.
     503              :  */
     504              : bool
     505          242 : dshash_delete_key(dshash_table *hash_table, const void *key)
     506              : {
     507              :     dshash_hash hash;
     508              :     size_t      partition;
     509              :     bool        found;
     510              : 
     511              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     512              :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     513              : 
     514          242 :     hash = hash_key(hash_table, key);
     515          242 :     partition = PARTITION_FOR_HASH(hash);
     516              : 
     517          242 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     518          242 :     ensure_valid_bucket_pointers(hash_table);
     519              : 
     520          242 :     if (delete_key_from_bucket(hash_table, key,
     521          242 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     522              :     {
     523              :         Assert(hash_table->control->partitions[partition].count > 0);
     524          145 :         found = true;
     525          145 :         --hash_table->control->partitions[partition].count;
     526              :     }
     527              :     else
     528           97 :         found = false;
     529              : 
     530          242 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     531              : 
     532          242 :     return found;
     533              : }
     534              : 
     535              : /*
     536              :  * Remove an entry.  The entry must already be exclusively locked, and must
     537              :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     538              :  * function releases the lock just like dshash_release_lock.
     539              :  *
     540              :  * To delete an entry by key, see dshash_delete_key.
     541              :  */
     542              : void
     543        54559 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     544              : {
     545        54559 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     546        54559 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     547              : 
     548              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     549              :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     550              :                                 LW_EXCLUSIVE));
     551              : 
     552        54559 :     delete_item(hash_table, item);
     553        54559 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     554        54559 : }
     555              : 
     556              : /*
     557              :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     558              :  */
     559              : void
     560      1296759 : dshash_release_lock(dshash_table *hash_table, void *entry)
     561              : {
     562      1296759 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     563      1296759 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     564              : 
     565              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     566              : 
     567      1296759 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     568      1296759 : }
     569              : 
     570              : /*
     571              :  * A compare function that forwards to memcmp.
     572              :  */
     573              : int
     574          627 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     575              : {
     576          627 :     return memcmp(a, b, size);
     577              : }
     578              : 
     579              : /*
     580              :  * A hash function that forwards to tag_hash.
     581              :  */
     582              : dshash_hash
     583         1006 : dshash_memhash(const void *v, size_t size, void *arg)
     584              : {
     585         1006 :     return tag_hash(v, size);
     586              : }
     587              : 
     588              : /*
     589              :  * A copy function that forwards to memcpy.
     590              :  */
     591              : void
     592       340838 : dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
     593              : {
     594       340838 :     (void) memcpy(dest, src, size);
     595       340838 : }
     596              : 
     597              : /*
     598              :  * A compare function that forwards to strcmp.
     599              :  */
     600              : int
     601          119 : dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
     602              : {
     603              :     Assert(strlen((const char *) a) < size);
     604              :     Assert(strlen((const char *) b) < size);
     605              : 
     606          119 :     return strcmp((const char *) a, (const char *) b);
     607              : }
     608              : 
     609              : /*
     610              :  * A hash function that forwards to string_hash.
     611              :  */
     612              : dshash_hash
     613          138 : dshash_strhash(const void *v, size_t size, void *arg)
     614              : {
     615              :     Assert(strlen((const char *) v) < size);
     616              : 
     617          138 :     return string_hash((const char *) v, size);
     618              : }
     619              : 
     620              : /*
     621              :  * A copy function that forwards to strcpy.
     622              :  */
     623              : void
     624           19 : dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
     625              : {
     626              :     Assert(strlen((const char *) src) < size);
     627              : 
     628           19 :     (void) strcpy((char *) dest, (const char *) src);
     629           19 : }
     630              : 
     631              : /*
     632              :  * Sequentially scan through dshash table and return all the elements one by
     633              :  * one, return NULL when all elements have been returned.
     634              :  *
     635              :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     636              :  * delete returned elements midst of a scan by using dshash_delete_current()
     637              :  * if exclusive = true.
     638              :  */
     639              : void
     640         1036 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     641              :                 bool exclusive)
     642              : {
     643         1036 :     status->hash_table = hash_table;
     644         1036 :     status->curbucket = 0;
     645         1036 :     status->nbuckets = 0;
     646         1036 :     status->curitem = NULL;
     647         1036 :     status->pnextitem = InvalidDsaPointer;
     648         1036 :     status->curpartition = -1;
     649         1036 :     status->exclusive = exclusive;
     650         1036 : }
     651              : 
     652              : /*
     653              :  * Returns the next element.
     654              :  *
     655              :  * Returned elements are locked and the caller may not release the lock. It is
     656              :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     657              :  */
     658              : void *
     659       263463 : dshash_seq_next(dshash_seq_status *status)
     660              : {
     661              :     dsa_pointer next_item_pointer;
     662              : 
     663              :     /*
     664              :      * Not yet holding any partition locks. Need to determine the size of the
     665              :      * hash table, it could have been resized since we were looking last.
     666              :      * Since we iterate in partition order, we can start by unconditionally
     667              :      * lock partition 0.
     668              :      *
     669              :      * Once we hold the lock, no resizing can happen until the scan ends. So
     670              :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     671              :      */
     672       263463 :     if (status->curpartition == -1)
     673              :     {
     674              :         Assert(status->curbucket == 0);
     675              :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     676              : 
     677         1036 :         status->curpartition = 0;
     678              : 
     679         1036 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     680              :                                      status->curpartition),
     681         1036 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     682              : 
     683         1036 :         ensure_valid_bucket_pointers(status->hash_table);
     684              : 
     685         1036 :         status->nbuckets =
     686         1036 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     687         1036 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     688              :     }
     689              :     else
     690       262427 :         next_item_pointer = status->pnextitem;
     691              : 
     692              :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     693              :                                                status->curpartition),
     694              :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     695              : 
     696              :     /* Move to the next bucket if we finished the current bucket */
     697      1682203 :     while (!DsaPointerIsValid(next_item_pointer))
     698              :     {
     699              :         int         next_partition;
     700              : 
     701      1419776 :         if (++status->curbucket >= status->nbuckets)
     702              :         {
     703              :             /* all buckets have been scanned. finish. */
     704         1036 :             return NULL;
     705              :         }
     706              : 
     707              :         /* Check if move to the next partition */
     708      1418740 :         next_partition =
     709      1418740 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     710              :                                        status->hash_table->size_log2);
     711              : 
     712      1418740 :         if (status->curpartition != next_partition)
     713              :         {
     714              :             /*
     715              :              * Move to the next partition. Lock the next partition then
     716              :              * release the current, not in the reverse order to avoid
     717              :              * concurrent resizing.  Avoid dead lock by taking lock in the
     718              :              * same order with resize().
     719              :              */
     720       131572 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     721              :                                          next_partition),
     722       131572 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     723       131572 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     724              :                                          status->curpartition));
     725       131572 :             status->curpartition = next_partition;
     726              :         }
     727              : 
     728      1418740 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     729              :     }
     730              : 
     731       262427 :     status->curitem =
     732       262427 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     733              : 
     734              :     /*
     735              :      * The caller may delete the item. Store the next item in case of
     736              :      * deletion.
     737              :      */
     738       262427 :     status->pnextitem = status->curitem->next;
     739              : 
     740       262427 :     return ENTRY_FROM_ITEM(status->curitem);
     741              : }
     742              : 
     743              : /*
     744              :  * Terminates the seqscan and release all locks.
     745              :  *
     746              :  * Needs to be called after finishing or when exiting a seqscan.
     747              :  */
     748              : void
     749         1036 : dshash_seq_term(dshash_seq_status *status)
     750              : {
     751         1036 :     if (status->curpartition >= 0)
     752         1036 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     753         1036 : }
     754              : 
     755              : /*
     756              :  * Remove the current entry of the seq scan.
     757              :  */
     758              : void
     759         4921 : dshash_delete_current(dshash_seq_status *status)
     760              : {
     761         4921 :     dshash_table *hash_table = status->hash_table;
     762         4921 :     dshash_table_item *item = status->curitem;
     763              :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     764              : 
     765         4921 :     partition = PARTITION_FOR_HASH(item->hash);
     766              : 
     767              :     Assert(status->exclusive);
     768              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     769              :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     770              :                                 LW_EXCLUSIVE));
     771              : 
     772         4921 :     delete_item(hash_table, item);
     773         4921 : }
     774              : 
     775              : /*
     776              :  * Print debugging information about the internal state of the hash table to
     777              :  * stderr.  The caller must hold no partition locks.
     778              :  */
     779              : void
     780            0 : dshash_dump(dshash_table *hash_table)
     781              : {
     782              :     size_t      i;
     783              :     size_t      j;
     784              : 
     785              :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     786              :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     787              : 
     788            0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     789              :     {
     790              :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     791            0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     792              :     }
     793              : 
     794            0 :     ensure_valid_bucket_pointers(hash_table);
     795              : 
     796            0 :     fprintf(stderr,
     797            0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     798            0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     799              :     {
     800            0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     801            0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     802            0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     803              : 
     804            0 :         fprintf(stderr, "  partition %zu\n", i);
     805            0 :         fprintf(stderr,
     806              :                 "    active buckets (key count = %zu)\n", partition->count);
     807              : 
     808            0 :         for (j = begin; j < end; ++j)
     809              :         {
     810            0 :             size_t      count = 0;
     811            0 :             dsa_pointer bucket = hash_table->buckets[j];
     812              : 
     813            0 :             while (DsaPointerIsValid(bucket))
     814              :             {
     815              :                 dshash_table_item *item;
     816              : 
     817            0 :                 item = dsa_get_address(hash_table->area, bucket);
     818              : 
     819            0 :                 bucket = item->next;
     820            0 :                 ++count;
     821              :             }
     822            0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     823              :         }
     824              :     }
     825              : 
     826            0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     827            0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     828            0 : }
     829              : 
     830              : /*
     831              :  * Delete a locked item to which we have a pointer.
     832              :  */
     833              : static void
     834        59480 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     835              : {
     836        59480 :     size_t      hash = item->hash;
     837        59480 :     size_t      partition = PARTITION_FOR_HASH(hash);
     838              : 
     839              :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     840              : 
     841        59480 :     if (delete_item_from_bucket(hash_table, item,
     842        59480 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     843              :     {
     844              :         Assert(hash_table->control->partitions[partition].count > 0);
     845        59480 :         --hash_table->control->partitions[partition].count;
     846              :     }
     847              :     else
     848              :     {
     849              :         Assert(false);
     850              :     }
     851        59480 : }
     852              : 
     853              : /*
     854              :  * Grow the hash table if necessary to the requested number of buckets.  The
     855              :  * requested size must be double some previously observed size.
     856              :  *
     857              :  * Must be called without any partition lock held.
     858              :  */
     859              : static void
     860         3331 : resize(dshash_table *hash_table, size_t new_size_log2)
     861              : {
     862              :     dsa_pointer old_buckets;
     863              :     dsa_pointer new_buckets_shared;
     864              :     dsa_pointer *new_buckets;
     865              :     size_t      size;
     866         3331 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     867              :     size_t      i;
     868              : 
     869              :     /*
     870              :      * Acquire the locks for all lock partitions.  This is expensive, but we
     871              :      * shouldn't have to do it many times.
     872              :      */
     873       429699 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     874              :     {
     875              :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     876              : 
     877       426368 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     878       426368 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     879              :         {
     880              :             /*
     881              :              * Another backend has already increased the size; we can avoid
     882              :              * obtaining all the locks and return early.
     883              :              */
     884            0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     885            0 :             return;
     886              :         }
     887              :     }
     888              : 
     889              :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     890              : 
     891              :     /* Allocate the space for the new table. */
     892              :     new_buckets_shared =
     893         3331 :         dsa_allocate_extended(hash_table->area,
     894              :                               sizeof(dsa_pointer) * new_size,
     895              :                               DSA_ALLOC_HUGE | DSA_ALLOC_ZERO);
     896         3331 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     897              : 
     898              :     /*
     899              :      * We've allocated the new bucket array; all that remains to do now is to
     900              :      * reinsert all items, which amounts to adjusting all the pointers.
     901              :      */
     902         3331 :     size = ((size_t) 1) << hash_table->control->size_log2;
     903      1519235 :     for (i = 0; i < size; ++i)
     904              :     {
     905      1515904 :         dsa_pointer item_pointer = hash_table->buckets[i];
     906              : 
     907      1629641 :         while (DsaPointerIsValid(item_pointer))
     908              :         {
     909              :             dshash_table_item *item;
     910              :             dsa_pointer next_item_pointer;
     911              : 
     912       113737 :             item = dsa_get_address(hash_table->area, item_pointer);
     913       113737 :             next_item_pointer = item->next;
     914       113737 :             insert_item_into_bucket(hash_table, item_pointer, item,
     915       113737 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     916              :                                                                                 new_size_log2)]);
     917       113737 :             item_pointer = next_item_pointer;
     918              :         }
     919              :     }
     920              : 
     921              :     /* Swap the hash table into place and free the old one. */
     922         3331 :     old_buckets = hash_table->control->buckets;
     923         3331 :     hash_table->control->buckets = new_buckets_shared;
     924         3331 :     hash_table->control->size_log2 = new_size_log2;
     925         3331 :     hash_table->buckets = new_buckets;
     926         3331 :     dsa_free(hash_table->area, old_buckets);
     927              : 
     928              :     /* Release all the locks. */
     929       429699 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     930       426368 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     931              : }
     932              : 
     933              : /*
     934              :  * Make sure that our backend-local bucket pointers are up to date.  The
     935              :  * caller must have locked one lock partition, which prevents resize() from
     936              :  * running concurrently.
     937              :  */
     938              : static inline void
     939      1595638 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     940              : {
     941      1595638 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     942              :     {
     943        54460 :         hash_table->buckets = dsa_get_address(hash_table->area,
     944        27230 :                                               hash_table->control->buckets);
     945        27230 :         hash_table->size_log2 = hash_table->control->size_log2;
     946              :     }
     947      1595638 : }
     948              : 
     949              : /*
     950              :  * Scan a locked bucket for a match, using the provided compare function.
     951              :  */
     952              : static inline dshash_table_item *
     953      1594360 : find_in_bucket(dshash_table *hash_table, const void *key,
     954              :                dsa_pointer item_pointer)
     955              : {
     956      1817135 :     while (DsaPointerIsValid(item_pointer))
     957              :     {
     958              :         dshash_table_item *item;
     959              : 
     960      1233236 :         item = dsa_get_address(hash_table->area, item_pointer);
     961      1233236 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     962      1010461 :             return item;
     963       222775 :         item_pointer = item->next;
     964              :     }
     965       583899 :     return NULL;
     966              : }
     967              : 
     968              : /*
     969              :  * Insert an already-allocated item into a bucket.
     970              :  */
     971              : static void
     972       454594 : insert_item_into_bucket(dshash_table *hash_table,
     973              :                         dsa_pointer item_pointer,
     974              :                         dshash_table_item *item,
     975              :                         dsa_pointer *bucket)
     976              : {
     977              :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     978              : 
     979       454594 :     item->next = *bucket;
     980       454594 :     *bucket = item_pointer;
     981       454594 : }
     982              : 
     983              : /*
     984              :  * Allocate space for an entry with the given key and insert it into the
     985              :  * provided bucket.
     986              :  */
     987              : static dshash_table_item *
     988       340857 : insert_into_bucket(dshash_table *hash_table,
     989              :                    const void *key,
     990              :                    dsa_pointer *bucket)
     991              : {
     992              :     dsa_pointer item_pointer;
     993              :     dshash_table_item *item;
     994              : 
     995       340857 :     item_pointer = dsa_allocate(hash_table->area,
     996              :                                 hash_table->params.entry_size +
     997              :                                 MAXALIGN(sizeof(dshash_table_item)));
     998       340857 :     item = dsa_get_address(hash_table->area, item_pointer);
     999       340857 :     copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
    1000       340857 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
    1001       340857 :     return item;
    1002              : }
    1003              : 
    1004              : /*
    1005              :  * Search a bucket for a matching key and delete it.
    1006              :  */
    1007              : static bool
    1008          242 : delete_key_from_bucket(dshash_table *hash_table,
    1009              :                        const void *key,
    1010              :                        dsa_pointer *bucket_head)
    1011              : {
    1012          242 :     while (DsaPointerIsValid(*bucket_head))
    1013              :     {
    1014              :         dshash_table_item *item;
    1015              : 
    1016          145 :         item = dsa_get_address(hash_table->area, *bucket_head);
    1017              : 
    1018          145 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
    1019              :         {
    1020              :             dsa_pointer next;
    1021              : 
    1022          145 :             next = item->next;
    1023          145 :             dsa_free(hash_table->area, *bucket_head);
    1024          145 :             *bucket_head = next;
    1025              : 
    1026          145 :             return true;
    1027              :         }
    1028            0 :         bucket_head = &item->next;
    1029              :     }
    1030           97 :     return false;
    1031              : }
    1032              : 
    1033              : /*
    1034              :  * Delete the specified item from the bucket.
    1035              :  */
    1036              : static bool
    1037        59480 : delete_item_from_bucket(dshash_table *hash_table,
    1038              :                         dshash_table_item *item,
    1039              :                         dsa_pointer *bucket_head)
    1040              : {
    1041        59980 :     while (DsaPointerIsValid(*bucket_head))
    1042              :     {
    1043              :         dshash_table_item *bucket_item;
    1044              : 
    1045        59980 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
    1046              : 
    1047        59980 :         if (bucket_item == item)
    1048              :         {
    1049              :             dsa_pointer next;
    1050              : 
    1051        59480 :             next = item->next;
    1052        59480 :             dsa_free(hash_table->area, *bucket_head);
    1053        59480 :             *bucket_head = next;
    1054        59480 :             return true;
    1055              :         }
    1056          500 :         bucket_head = &bucket_item->next;
    1057              :     }
    1058            0 :     return false;
    1059              : }
    1060              : 
    1061              : /*
    1062              :  * Compute the hash value for a key.
    1063              :  */
    1064              : static inline dshash_hash
    1065      1591271 : hash_key(dshash_table *hash_table, const void *key)
    1066              : {
    1067      1591271 :     return hash_table->params.hash_function(key,
    1068              :                                             hash_table->params.key_size,
    1069              :                                             hash_table->arg);
    1070              : }
    1071              : 
    1072              : /*
    1073              :  * Check whether two keys compare equal.
    1074              :  */
    1075              : static inline bool
    1076      1233381 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1077              : {
    1078      1233381 :     return hash_table->params.compare_function(a, b,
    1079              :                                                hash_table->params.key_size,
    1080      1233381 :                                                hash_table->arg) == 0;
    1081              : }
    1082              : 
    1083              : /*
    1084              :  * Copy a key.
    1085              :  */
    1086              : static inline void
    1087       340857 : copy_key(dshash_table *hash_table, void *dest, const void *src)
    1088              : {
    1089       340857 :     hash_table->params.copy_function(dest, src,
    1090              :                                      hash_table->params.key_size,
    1091              :                                      hash_table->arg);
    1092       340857 : }
        

Generated by: LCOV version 2.0-1