LCOV - code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 229 273 83.9 %
Date: 2025-11-26 11:17:37 Functions: 30 32 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dshash.c
       4             :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5             :  *
       6             :  * This is an open hashing hash table, with a linked list at each table
       7             :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8             :  * lists from growing too long on average.  Currently, only growing is
       9             :  * supported: the hash table never becomes smaller.
      10             :  *
      11             :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12             :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13             :  * find and iterate operations normally only acquire one lock.  Therefore,
      14             :  * good concurrency is achieved whenever such operations don't collide at the
      15             :  * lock partition level.  However, when a resize operation begins, all
      16             :  * partition locks must be acquired simultaneously for a brief period.  This
      17             :  * is only expected to happen a small number of times until a stable size is
      18             :  * found, since growth is geometric.
      19             :  *
      20             :  * Future versions may support iterators and incremental resizing; for now
      21             :  * the implementation is minimalist.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/lib/dshash.c
      28             :  *
      29             :  *-------------------------------------------------------------------------
      30             :  */
      31             : 
      32             : #include "postgres.h"
      33             : 
      34             : #include <limits.h>
      35             : 
      36             : #include "common/hashfn.h"
      37             : #include "lib/dshash.h"
      38             : #include "storage/lwlock.h"
      39             : #include "utils/dsa.h"
      40             : 
      41             : /*
      42             :  * An item in the hash table.  This wraps the user's entry object in an
      43             :  * envelop that holds a pointer back to the bucket and a pointer to the next
      44             :  * item in the bucket.
      45             :  */
      46             : struct dshash_table_item
      47             : {
      48             :     /* The next item in the same bucket. */
      49             :     dsa_pointer next;
      50             :     /* The hashed key, to avoid having to recompute it. */
      51             :     dshash_hash hash;
      52             :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      53             : };
      54             : 
      55             : /*
      56             :  * The number of partitions for locking purposes.  This is set to match
      57             :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      58             :  * the buffer pool must be good enough for any other purpose.  This could
      59             :  * become a runtime parameter in future.
      60             :  */
      61             : #define DSHASH_NUM_PARTITIONS_LOG2 7
      62             : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      63             : 
      64             : /* A magic value used to identify our hash tables. */
      65             : #define DSHASH_MAGIC 0x75ff6a20
      66             : 
      67             : /*
      68             :  * Tracking information for each lock partition.  Initially, each partition
      69             :  * corresponds to one bucket, but each time the hash table grows, the buckets
      70             :  * covered by each partition split so the number of buckets covered doubles.
      71             :  *
      72             :  * We might want to add padding here so that each partition is on a different
      73             :  * cache line, but doing so would bloat this structure considerably.
      74             :  */
      75             : typedef struct dshash_partition
      76             : {
      77             :     LWLock      lock;           /* Protects all buckets in this partition. */
      78             :     size_t      count;          /* # of items in this partition's buckets */
      79             : } dshash_partition;
      80             : 
      81             : /*
      82             :  * The head object for a hash table.  This will be stored in dynamic shared
      83             :  * memory.
      84             :  */
      85             : typedef struct dshash_table_control
      86             : {
      87             :     dshash_table_handle handle;
      88             :     uint32      magic;
      89             :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      90             :     int         lwlock_tranche_id;
      91             : 
      92             :     /*
      93             :      * The following members are written to only when ALL partitions locks are
      94             :      * held.  They can be read when any one partition lock is held.
      95             :      */
      96             : 
      97             :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      98             :     size_t      size_log2;      /* log2(number of buckets) */
      99             :     dsa_pointer buckets;        /* current bucket array */
     100             : } dshash_table_control;
     101             : 
     102             : /*
     103             :  * Per-backend state for a dynamic hash table.
     104             :  */
     105             : struct dshash_table
     106             : {
     107             :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     108             :     dshash_parameters params;   /* Parameters. */
     109             :     void       *arg;            /* User-supplied data pointer. */
     110             :     dshash_table_control *control;  /* Control object in DSM. */
     111             :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     112             :     size_t      size_log2;      /* log2(number of buckets) */
     113             : };
     114             : 
     115             : /* Given a pointer to an item, find the entry (user data) it holds. */
     116             : #define ENTRY_FROM_ITEM(item) \
     117             :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     118             : 
     119             : /* Given a pointer to an entry, find the item that holds it. */
     120             : #define ITEM_FROM_ENTRY(entry)                                          \
     121             :     ((dshash_table_item *)((char *)(entry) -                            \
     122             :                              MAXALIGN(sizeof(dshash_table_item))))
     123             : 
     124             : /* How many resize operations (bucket splits) have there been? */
     125             : #define NUM_SPLITS(size_log2)                   \
     126             :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     127             : 
     128             : /* How many buckets are there in a given size? */
     129             : #define NUM_BUCKETS(size_log2)      \
     130             :     (((size_t) 1) << (size_log2))
     131             : 
     132             : /* How many buckets are there in each partition at a given size? */
     133             : #define BUCKETS_PER_PARTITION(size_log2)        \
     134             :     (((size_t) 1) << NUM_SPLITS(size_log2))
     135             : 
     136             : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     137             : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     138             :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     139             :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     140             : 
     141             : /* Choose partition based on the highest order bits of the hash. */
     142             : #define PARTITION_FOR_HASH(hash)                                        \
     143             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     144             : 
     145             : /*
     146             :  * Find the bucket index for a given hash and table size.  Each time the table
     147             :  * doubles in size, the appropriate bucket for a given hash value doubles and
     148             :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     149             :  * are split.
     150             :  */
     151             : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     152             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     153             : 
     154             : /* The index of the first bucket in a given partition. */
     155             : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     156             :     ((partition) << NUM_SPLITS(size_log2))
     157             : 
     158             : /* Choose partition based on bucket index. */
     159             : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     160             :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     161             : 
     162             : /* The head of the active bucket for a given hash value (lvalue). */
     163             : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     164             :     (hash_table->buckets[                                                \
     165             :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     166             :                                        hash_table->size_log2)])
     167             : 
     168             : static void delete_item(dshash_table *hash_table,
     169             :                         dshash_table_item *item);
     170             : static void resize(dshash_table *hash_table, size_t new_size_log2);
     171             : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     172             : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     173             :                                                 const void *key,
     174             :                                                 dsa_pointer item_pointer);
     175             : static void insert_item_into_bucket(dshash_table *hash_table,
     176             :                                     dsa_pointer item_pointer,
     177             :                                     dshash_table_item *item,
     178             :                                     dsa_pointer *bucket);
     179             : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     180             :                                              const void *key,
     181             :                                              dsa_pointer *bucket);
     182             : static bool delete_key_from_bucket(dshash_table *hash_table,
     183             :                                    const void *key,
     184             :                                    dsa_pointer *bucket_head);
     185             : static bool delete_item_from_bucket(dshash_table *hash_table,
     186             :                                     dshash_table_item *item,
     187             :                                     dsa_pointer *bucket_head);
     188             : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     189             : static inline bool equal_keys(dshash_table *hash_table,
     190             :                               const void *a, const void *b);
     191             : static inline void copy_key(dshash_table *hash_table, void *dest,
     192             :                             const void *src);
     193             : 
     194             : #define PARTITION_LOCK(hash_table, i)           \
     195             :     (&(hash_table)->control->partitions[(i)].lock)
     196             : 
     197             : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     198             :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     199             :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     200             : 
     201             : /*
     202             :  * Create a new hash table backed by the given dynamic shared area, with the
     203             :  * given parameters.  The returned object is allocated in backend-local memory
     204             :  * using the current MemoryContext.  'arg' will be passed through to the
     205             :  * compare, hash, and copy functions.
     206             :  */
     207             : dshash_table *
     208        2622 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     209             : {
     210             :     dshash_table *hash_table;
     211             :     dsa_pointer control;
     212             : 
     213             :     /* Allocate the backend-local object representing the hash table. */
     214        2622 :     hash_table = palloc(sizeof(dshash_table));
     215             : 
     216             :     /* Allocate the control object in shared memory. */
     217        2622 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     218             : 
     219             :     /* Set up the local and shared hash table structs. */
     220        2622 :     hash_table->area = area;
     221        2622 :     hash_table->params = *params;
     222        2622 :     hash_table->arg = arg;
     223        2622 :     hash_table->control = dsa_get_address(area, control);
     224        2622 :     hash_table->control->handle = control;
     225        2622 :     hash_table->control->magic = DSHASH_MAGIC;
     226        2622 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     227             : 
     228             :     /* Set up the array of lock partitions. */
     229             :     {
     230        2622 :         dshash_partition *partitions = hash_table->control->partitions;
     231        2622 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     232             :         int         i;
     233             : 
     234      338238 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     235             :         {
     236      335616 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     237      335616 :             partitions[i].count = 0;
     238             :         }
     239             :     }
     240             : 
     241             :     /*
     242             :      * Set up the initial array of buckets.  Our initial size is the same as
     243             :      * the number of partitions.
     244             :      */
     245        2622 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     246        5244 :     hash_table->control->buckets =
     247        2622 :         dsa_allocate_extended(area,
     248             :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     249             :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     250        2622 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     251             :     {
     252           0 :         dsa_free(area, control);
     253           0 :         ereport(ERROR,
     254             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     255             :                  errmsg("out of memory"),
     256             :                  errdetail("Failed on DSA request of size %zu.",
     257             :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     258             :     }
     259        5244 :     hash_table->buckets = dsa_get_address(area,
     260        2622 :                                           hash_table->control->buckets);
     261        2622 :     hash_table->size_log2 = hash_table->control->size_log2;
     262             : 
     263        2622 :     return hash_table;
     264             : }
     265             : 
     266             : /*
     267             :  * Attach to an existing hash table using a handle.  The returned object is
     268             :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     269             :  * will be passed through to the compare and hash functions.
     270             :  */
     271             : dshash_table *
     272       50600 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     273             :               dshash_table_handle handle, void *arg)
     274             : {
     275             :     dshash_table *hash_table;
     276             :     dsa_pointer control;
     277             : 
     278             :     /* Allocate the backend-local object representing the hash table. */
     279       50600 :     hash_table = palloc(sizeof(dshash_table));
     280             : 
     281             :     /* Find the control object in shared memory. */
     282       50600 :     control = handle;
     283             : 
     284             :     /* Set up the local hash table struct. */
     285       50600 :     hash_table->area = area;
     286       50600 :     hash_table->params = *params;
     287       50600 :     hash_table->arg = arg;
     288       50600 :     hash_table->control = dsa_get_address(area, control);
     289             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     290             : 
     291             :     /*
     292             :      * These will later be set to the correct values by
     293             :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     294             :      * partition lock for interlocking against concurrent resizing.
     295             :      */
     296       50600 :     hash_table->buckets = NULL;
     297       50600 :     hash_table->size_log2 = 0;
     298             : 
     299       50600 :     return hash_table;
     300             : }
     301             : 
     302             : /*
     303             :  * Detach from a hash table.  This frees backend-local resources associated
     304             :  * with the hash table, but the hash table will continue to exist until it is
     305             :  * either explicitly destroyed (by a backend that is still attached to it), or
     306             :  * the area that backs it is returned to the operating system.
     307             :  */
     308             : void
     309       52656 : dshash_detach(dshash_table *hash_table)
     310             : {
     311             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     312             : 
     313             :     /* The hash table may have been destroyed.  Just free local memory. */
     314       52656 :     pfree(hash_table);
     315       52656 : }
     316             : 
     317             : /*
     318             :  * Destroy a hash table, returning all memory to the area.  The caller must be
     319             :  * certain that no other backend will attempt to access the hash table before
     320             :  * calling this function.  Other backend must explicitly call dshash_detach to
     321             :  * free up backend-local memory associated with the hash table.  The backend
     322             :  * that calls dshash_destroy must not call dshash_detach.
     323             :  */
     324             : void
     325           0 : dshash_destroy(dshash_table *hash_table)
     326             : {
     327             :     size_t      size;
     328             :     size_t      i;
     329             : 
     330             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     331           0 :     ensure_valid_bucket_pointers(hash_table);
     332             : 
     333             :     /* Free all the entries. */
     334           0 :     size = NUM_BUCKETS(hash_table->size_log2);
     335           0 :     for (i = 0; i < size; ++i)
     336             :     {
     337           0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     338             : 
     339           0 :         while (DsaPointerIsValid(item_pointer))
     340             :         {
     341             :             dshash_table_item *item;
     342             :             dsa_pointer next_item_pointer;
     343             : 
     344           0 :             item = dsa_get_address(hash_table->area, item_pointer);
     345           0 :             next_item_pointer = item->next;
     346           0 :             dsa_free(hash_table->area, item_pointer);
     347           0 :             item_pointer = next_item_pointer;
     348             :         }
     349             :     }
     350             : 
     351             :     /*
     352             :      * Vandalize the control block to help catch programming errors where
     353             :      * other backends access the memory formerly occupied by this hash table.
     354             :      */
     355           0 :     hash_table->control->magic = 0;
     356             : 
     357             :     /* Free the active table and control object. */
     358           0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     359           0 :     dsa_free(hash_table->area, hash_table->control->handle);
     360             : 
     361           0 :     pfree(hash_table);
     362           0 : }
     363             : 
     364             : /*
     365             :  * Get a handle that can be used by other processes to attach to this hash
     366             :  * table.
     367             :  */
     368             : dshash_table_handle
     369        2622 : dshash_get_hash_table_handle(dshash_table *hash_table)
     370             : {
     371             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     372             : 
     373        2622 :     return hash_table->control->handle;
     374             : }
     375             : 
     376             : /*
     377             :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     378             :  * found with the given key.  Returns NULL if the key is not found.  If a
     379             :  * non-NULL value is returned, the entry is locked and must be released by
     380             :  * calling dshash_release_lock.  If an error is raised before
     381             :  * dshash_release_lock is called, the lock will be released automatically, but
     382             :  * the caller must take care to ensure that the entry is not left corrupted.
     383             :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     384             :  *
     385             :  * The caller must not hold a lock already.
     386             :  *
     387             :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     388             :  * return from this function, and not resumed until dshash_release_lock is
     389             :  * called.  It is a very good idea for the caller to release the lock quickly.
     390             :  */
     391             : void *
     392     2431186 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     393             : {
     394             :     dshash_hash hash;
     395             :     size_t      partition;
     396             :     dshash_table_item *item;
     397             : 
     398     2431186 :     hash = hash_key(hash_table, key);
     399     2431186 :     partition = PARTITION_FOR_HASH(hash);
     400             : 
     401             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     402             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     403             : 
     404     2431186 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     405     2431186 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     406     2431186 :     ensure_valid_bucket_pointers(hash_table);
     407             : 
     408             :     /* Search the active bucket. */
     409     2431186 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     410             : 
     411     2431186 :     if (!item)
     412             :     {
     413             :         /* Not found. */
     414      453098 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     415      453098 :         return NULL;
     416             :     }
     417             :     else
     418             :     {
     419             :         /* The caller will free the lock by calling dshash_release_lock. */
     420     1978088 :         return ENTRY_FROM_ITEM(item);
     421             :     }
     422             : }
     423             : 
     424             : /*
     425             :  * Returns a pointer to an exclusively locked item which must be released with
     426             :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     427             :  * to true and a pointer to the existing entry is returned.  If the key is not
     428             :  * found, 'found' is set to false, and a pointer to a newly created entry is
     429             :  * returned.
     430             :  *
     431             :  * Notes above dshash_find() regarding locking and error handling equally
     432             :  * apply here.
     433             :  */
     434             : void *
     435      647194 : dshash_find_or_insert(dshash_table *hash_table,
     436             :                       const void *key,
     437             :                       bool *found)
     438             : {
     439             :     dshash_hash hash;
     440             :     size_t      partition_index;
     441             :     dshash_partition *partition;
     442             :     dshash_table_item *item;
     443             : 
     444      647194 :     hash = hash_key(hash_table, key);
     445      647194 :     partition_index = PARTITION_FOR_HASH(hash);
     446      647194 :     partition = &hash_table->control->partitions[partition_index];
     447             : 
     448             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     449             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     450             : 
     451      653462 : restart:
     452      653462 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     453             :                   LW_EXCLUSIVE);
     454      653462 :     ensure_valid_bucket_pointers(hash_table);
     455             : 
     456             :     /* Search the active bucket. */
     457      653462 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     458             : 
     459      653462 :     if (item)
     460         204 :         *found = true;
     461             :     else
     462             :     {
     463      653258 :         *found = false;
     464             : 
     465             :         /* Check if we are getting too full. */
     466      653258 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     467             :         {
     468             :             /*
     469             :              * The load factor (= keys / buckets) for all buckets protected by
     470             :              * this partition is > 0.75.  Presumably the same applies
     471             :              * generally across the whole hash table (though we don't attempt
     472             :              * to track that directly to avoid contention on some kind of
     473             :              * central counter; we just assume that this partition is
     474             :              * representative).  This is a good time to resize.
     475             :              *
     476             :              * Give up our existing lock first, because resizing needs to
     477             :              * reacquire all the locks in the right order to avoid deadlocks.
     478             :              */
     479        6268 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     480        6268 :             resize(hash_table, hash_table->size_log2 + 1);
     481             : 
     482        6268 :             goto restart;
     483             :         }
     484             : 
     485             :         /* Finally we can try to insert the new item. */
     486      646990 :         item = insert_into_bucket(hash_table, key,
     487      646990 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     488      646990 :         item->hash = hash;
     489             :         /* Adjust per-lock-partition counter for load factor knowledge. */
     490      646990 :         ++partition->count;
     491             :     }
     492             : 
     493             :     /* The caller must release the lock with dshash_release_lock. */
     494      647194 :     return ENTRY_FROM_ITEM(item);
     495             : }
     496             : 
     497             : /*
     498             :  * Remove an entry by key.  Returns true if the key was found and the
     499             :  * corresponding entry was removed.
     500             :  *
     501             :  * To delete an entry that you already have a pointer to, see
     502             :  * dshash_delete_entry.
     503             :  */
     504             : bool
     505         446 : dshash_delete_key(dshash_table *hash_table, const void *key)
     506             : {
     507             :     dshash_hash hash;
     508             :     size_t      partition;
     509             :     bool        found;
     510             : 
     511             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     512             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     513             : 
     514         446 :     hash = hash_key(hash_table, key);
     515         446 :     partition = PARTITION_FOR_HASH(hash);
     516             : 
     517         446 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     518         446 :     ensure_valid_bucket_pointers(hash_table);
     519             : 
     520         446 :     if (delete_key_from_bucket(hash_table, key,
     521         446 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     522             :     {
     523             :         Assert(hash_table->control->partitions[partition].count > 0);
     524         276 :         found = true;
     525         276 :         --hash_table->control->partitions[partition].count;
     526             :     }
     527             :     else
     528         170 :         found = false;
     529             : 
     530         446 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     531             : 
     532         446 :     return found;
     533             : }
     534             : 
     535             : /*
     536             :  * Remove an entry.  The entry must already be exclusively locked, and must
     537             :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     538             :  * function releases the lock just like dshash_release_lock.
     539             :  *
     540             :  * To delete an entry by key, see dshash_delete_key.
     541             :  */
     542             : void
     543      104060 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     544             : {
     545      104060 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     546      104060 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     547             : 
     548             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     549             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     550             :                                 LW_EXCLUSIVE));
     551             : 
     552      104060 :     delete_item(hash_table, item);
     553      104060 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     554      104060 : }
     555             : 
     556             : /*
     557             :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     558             :  */
     559             : void
     560     2521222 : dshash_release_lock(dshash_table *hash_table, void *entry)
     561             : {
     562     2521222 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     563     2521222 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     564             : 
     565             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     566             : 
     567     2521222 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     568     2521222 : }
     569             : 
     570             : /*
     571             :  * A compare function that forwards to memcmp.
     572             :  */
     573             : int
     574         808 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     575             : {
     576         808 :     return memcmp(a, b, size);
     577             : }
     578             : 
     579             : /*
     580             :  * A hash function that forwards to tag_hash.
     581             :  */
     582             : dshash_hash
     583        1684 : dshash_memhash(const void *v, size_t size, void *arg)
     584             : {
     585        1684 :     return tag_hash(v, size);
     586             : }
     587             : 
     588             : /*
     589             :  * A copy function that forwards to memcpy.
     590             :  */
     591             : void
     592      646964 : dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
     593             : {
     594      646964 :     (void) memcpy(dest, src, size);
     595      646964 : }
     596             : 
     597             : /*
     598             :  * A compare function that forwards to strcmp.
     599             :  */
     600             : int
     601          74 : dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
     602             : {
     603             :     Assert(strlen((const char *) a) < size);
     604             :     Assert(strlen((const char *) b) < size);
     605             : 
     606          74 :     return strcmp((const char *) a, (const char *) b);
     607             : }
     608             : 
     609             : /*
     610             :  * A hash function that forwards to string_hash.
     611             :  */
     612             : dshash_hash
     613         100 : dshash_strhash(const void *v, size_t size, void *arg)
     614             : {
     615             :     Assert(strlen((const char *) v) < size);
     616             : 
     617         100 :     return string_hash((const char *) v, size);
     618             : }
     619             : 
     620             : /*
     621             :  * A copy function that forwards to strcpy.
     622             :  */
     623             : void
     624          26 : dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
     625             : {
     626             :     Assert(strlen((const char *) src) < size);
     627             : 
     628          26 :     (void) strcpy((char *) dest, (const char *) src);
     629          26 : }
     630             : 
     631             : /*
     632             :  * Sequentially scan through dshash table and return all the elements one by
     633             :  * one, return NULL when all elements have been returned.
     634             :  *
     635             :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     636             :  * delete returned elements midst of a scan by using dshash_delete_current()
     637             :  * if exclusive = true.
     638             :  */
     639             : void
     640        1940 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     641             :                 bool exclusive)
     642             : {
     643        1940 :     status->hash_table = hash_table;
     644        1940 :     status->curbucket = 0;
     645        1940 :     status->nbuckets = 0;
     646        1940 :     status->curitem = NULL;
     647        1940 :     status->pnextitem = InvalidDsaPointer;
     648        1940 :     status->curpartition = -1;
     649        1940 :     status->exclusive = exclusive;
     650        1940 : }
     651             : 
     652             : /*
     653             :  * Returns the next element.
     654             :  *
     655             :  * Returned elements are locked and the caller may not release the lock. It is
     656             :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     657             :  */
     658             : void *
     659      501830 : dshash_seq_next(dshash_seq_status *status)
     660             : {
     661             :     dsa_pointer next_item_pointer;
     662             : 
     663             :     /*
     664             :      * Not yet holding any partition locks. Need to determine the size of the
     665             :      * hash table, it could have been resized since we were looking last.
     666             :      * Since we iterate in partition order, we can start by unconditionally
     667             :      * lock partition 0.
     668             :      *
     669             :      * Once we hold the lock, no resizing can happen until the scan ends. So
     670             :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     671             :      */
     672      501830 :     if (status->curpartition == -1)
     673             :     {
     674             :         Assert(status->curbucket == 0);
     675             :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     676             : 
     677        1940 :         status->curpartition = 0;
     678             : 
     679        1940 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     680             :                                      status->curpartition),
     681        1940 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     682             : 
     683        1940 :         ensure_valid_bucket_pointers(status->hash_table);
     684             : 
     685        1940 :         status->nbuckets =
     686        1940 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     687        1940 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     688             :     }
     689             :     else
     690      499890 :         next_item_pointer = status->pnextitem;
     691             : 
     692             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     693             :                                                status->curpartition),
     694             :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     695             : 
     696             :     /* Move to the next bucket if we finished the current bucket */
     697     3190194 :     while (!DsaPointerIsValid(next_item_pointer))
     698             :     {
     699             :         int         next_partition;
     700             : 
     701     2690304 :         if (++status->curbucket >= status->nbuckets)
     702             :         {
     703             :             /* all buckets have been scanned. finish. */
     704        1940 :             return NULL;
     705             :         }
     706             : 
     707             :         /* Check if move to the next partition */
     708     2688364 :         next_partition =
     709     2688364 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     710             :                                        status->hash_table->size_log2);
     711             : 
     712     2688364 :         if (status->curpartition != next_partition)
     713             :         {
     714             :             /*
     715             :              * Move to the next partition. Lock the next partition then
     716             :              * release the current, not in the reverse order to avoid
     717             :              * concurrent resizing.  Avoid dead lock by taking lock in the
     718             :              * same order with resize().
     719             :              */
     720      246380 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     721             :                                          next_partition),
     722      246380 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     723      246380 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     724             :                                          status->curpartition));
     725      246380 :             status->curpartition = next_partition;
     726             :         }
     727             : 
     728     2688364 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     729             :     }
     730             : 
     731      499890 :     status->curitem =
     732      499890 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     733             : 
     734             :     /*
     735             :      * The caller may delete the item. Store the next item in case of
     736             :      * deletion.
     737             :      */
     738      499890 :     status->pnextitem = status->curitem->next;
     739             : 
     740      499890 :     return ENTRY_FROM_ITEM(status->curitem);
     741             : }
     742             : 
     743             : /*
     744             :  * Terminates the seqscan and release all locks.
     745             :  *
     746             :  * Needs to be called after finishing or when exiting a seqscan.
     747             :  */
     748             : void
     749        1940 : dshash_seq_term(dshash_seq_status *status)
     750             : {
     751        1940 :     if (status->curpartition >= 0)
     752        1940 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     753        1940 : }
     754             : 
     755             : /*
     756             :  * Remove the current entry of the seq scan.
     757             :  */
     758             : void
     759        8794 : dshash_delete_current(dshash_seq_status *status)
     760             : {
     761        8794 :     dshash_table *hash_table = status->hash_table;
     762        8794 :     dshash_table_item *item = status->curitem;
     763             :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     764             : 
     765        8794 :     partition = PARTITION_FOR_HASH(item->hash);
     766             : 
     767             :     Assert(status->exclusive);
     768             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     769             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     770             :                                 LW_EXCLUSIVE));
     771             : 
     772        8794 :     delete_item(hash_table, item);
     773        8794 : }
     774             : 
     775             : /*
     776             :  * Print debugging information about the internal state of the hash table to
     777             :  * stderr.  The caller must hold no partition locks.
     778             :  */
     779             : void
     780           0 : dshash_dump(dshash_table *hash_table)
     781             : {
     782             :     size_t      i;
     783             :     size_t      j;
     784             : 
     785             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     786             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     787             : 
     788           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     789             :     {
     790             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     791           0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     792             :     }
     793             : 
     794           0 :     ensure_valid_bucket_pointers(hash_table);
     795             : 
     796           0 :     fprintf(stderr,
     797           0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     798           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     799             :     {
     800           0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     801           0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     802           0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     803             : 
     804           0 :         fprintf(stderr, "  partition %zu\n", i);
     805           0 :         fprintf(stderr,
     806             :                 "    active buckets (key count = %zu)\n", partition->count);
     807             : 
     808           0 :         for (j = begin; j < end; ++j)
     809             :         {
     810           0 :             size_t      count = 0;
     811           0 :             dsa_pointer bucket = hash_table->buckets[j];
     812             : 
     813           0 :             while (DsaPointerIsValid(bucket))
     814             :             {
     815             :                 dshash_table_item *item;
     816             : 
     817           0 :                 item = dsa_get_address(hash_table->area, bucket);
     818             : 
     819           0 :                 bucket = item->next;
     820           0 :                 ++count;
     821             :             }
     822           0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     823             :         }
     824             :     }
     825             : 
     826           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     827           0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     828           0 : }
     829             : 
     830             : /*
     831             :  * Delete a locked item to which we have a pointer.
     832             :  */
     833             : static void
     834      112854 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     835             : {
     836      112854 :     size_t      hash = item->hash;
     837      112854 :     size_t      partition = PARTITION_FOR_HASH(hash);
     838             : 
     839             :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     840             : 
     841      112854 :     if (delete_item_from_bucket(hash_table, item,
     842      112854 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     843             :     {
     844             :         Assert(hash_table->control->partitions[partition].count > 0);
     845      112854 :         --hash_table->control->partitions[partition].count;
     846             :     }
     847             :     else
     848             :     {
     849             :         Assert(false);
     850             :     }
     851      112854 : }
     852             : 
     853             : /*
     854             :  * Grow the hash table if necessary to the requested number of buckets.  The
     855             :  * requested size must be double some previously observed size.
     856             :  *
     857             :  * Must be called without any partition lock held.
     858             :  */
     859             : static void
     860        6268 : resize(dshash_table *hash_table, size_t new_size_log2)
     861             : {
     862             :     dsa_pointer old_buckets;
     863             :     dsa_pointer new_buckets_shared;
     864             :     dsa_pointer *new_buckets;
     865             :     size_t      size;
     866        6268 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     867             :     size_t      i;
     868             : 
     869             :     /*
     870             :      * Acquire the locks for all lock partitions.  This is expensive, but we
     871             :      * shouldn't have to do it many times.
     872             :      */
     873      808572 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     874             :     {
     875             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     876             : 
     877      802304 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     878      802304 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     879             :         {
     880             :             /*
     881             :              * Another backend has already increased the size; we can avoid
     882             :              * obtaining all the locks and return early.
     883             :              */
     884           0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     885           0 :             return;
     886             :         }
     887             :     }
     888             : 
     889             :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     890             : 
     891             :     /* Allocate the space for the new table. */
     892             :     new_buckets_shared =
     893        6268 :         dsa_allocate_extended(hash_table->area,
     894             :                               sizeof(dsa_pointer) * new_size,
     895             :                               DSA_ALLOC_HUGE | DSA_ALLOC_ZERO);
     896        6268 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     897             : 
     898             :     /*
     899             :      * We've allocated the new bucket array; all that remains to do now is to
     900             :      * reinsert all items, which amounts to adjusting all the pointers.
     901             :      */
     902        6268 :     size = ((size_t) 1) << hash_table->control->size_log2;
     903     2885756 :     for (i = 0; i < size; ++i)
     904             :     {
     905     2879488 :         dsa_pointer item_pointer = hash_table->buckets[i];
     906             : 
     907     3100214 :         while (DsaPointerIsValid(item_pointer))
     908             :         {
     909             :             dshash_table_item *item;
     910             :             dsa_pointer next_item_pointer;
     911             : 
     912      220726 :             item = dsa_get_address(hash_table->area, item_pointer);
     913      220726 :             next_item_pointer = item->next;
     914      220726 :             insert_item_into_bucket(hash_table, item_pointer, item,
     915      220726 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     916             :                                                                                 new_size_log2)]);
     917      220726 :             item_pointer = next_item_pointer;
     918             :         }
     919             :     }
     920             : 
     921             :     /* Swap the hash table into place and free the old one. */
     922        6268 :     old_buckets = hash_table->control->buckets;
     923        6268 :     hash_table->control->buckets = new_buckets_shared;
     924        6268 :     hash_table->control->size_log2 = new_size_log2;
     925        6268 :     hash_table->buckets = new_buckets;
     926        6268 :     dsa_free(hash_table->area, old_buckets);
     927             : 
     928             :     /* Release all the locks. */
     929      808572 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     930      802304 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     931             : }
     932             : 
     933             : /*
     934             :  * Make sure that our backend-local bucket pointers are up to date.  The
     935             :  * caller must have locked one lock partition, which prevents resize() from
     936             :  * running concurrently.
     937             :  */
     938             : static inline void
     939     3087034 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     940             : {
     941     3087034 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     942             :     {
     943      103900 :         hash_table->buckets = dsa_get_address(hash_table->area,
     944       51950 :                                               hash_table->control->buckets);
     945       51950 :         hash_table->size_log2 = hash_table->control->size_log2;
     946             :     }
     947     3087034 : }
     948             : 
     949             : /*
     950             :  * Scan a locked bucket for a match, using the provided compare function.
     951             :  */
     952             : static inline dshash_table_item *
     953     3084648 : find_in_bucket(dshash_table *hash_table, const void *key,
     954             :                dsa_pointer item_pointer)
     955             : {
     956     3515956 :     while (DsaPointerIsValid(item_pointer))
     957             :     {
     958             :         dshash_table_item *item;
     959             : 
     960     2409600 :         item = dsa_get_address(hash_table->area, item_pointer);
     961     2409600 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     962     1978292 :             return item;
     963      431308 :         item_pointer = item->next;
     964             :     }
     965     1106356 :     return NULL;
     966             : }
     967             : 
     968             : /*
     969             :  * Insert an already-allocated item into a bucket.
     970             :  */
     971             : static void
     972      867716 : insert_item_into_bucket(dshash_table *hash_table,
     973             :                         dsa_pointer item_pointer,
     974             :                         dshash_table_item *item,
     975             :                         dsa_pointer *bucket)
     976             : {
     977             :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     978             : 
     979      867716 :     item->next = *bucket;
     980      867716 :     *bucket = item_pointer;
     981      867716 : }
     982             : 
     983             : /*
     984             :  * Allocate space for an entry with the given key and insert it into the
     985             :  * provided bucket.
     986             :  */
     987             : static dshash_table_item *
     988      646990 : insert_into_bucket(dshash_table *hash_table,
     989             :                    const void *key,
     990             :                    dsa_pointer *bucket)
     991             : {
     992             :     dsa_pointer item_pointer;
     993             :     dshash_table_item *item;
     994             : 
     995      646990 :     item_pointer = dsa_allocate(hash_table->area,
     996             :                                 hash_table->params.entry_size +
     997             :                                 MAXALIGN(sizeof(dshash_table_item)));
     998      646990 :     item = dsa_get_address(hash_table->area, item_pointer);
     999      646990 :     copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
    1000      646990 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
    1001      646990 :     return item;
    1002             : }
    1003             : 
    1004             : /*
    1005             :  * Search a bucket for a matching key and delete it.
    1006             :  */
    1007             : static bool
    1008         446 : delete_key_from_bucket(dshash_table *hash_table,
    1009             :                        const void *key,
    1010             :                        dsa_pointer *bucket_head)
    1011             : {
    1012         446 :     while (DsaPointerIsValid(*bucket_head))
    1013             :     {
    1014             :         dshash_table_item *item;
    1015             : 
    1016         276 :         item = dsa_get_address(hash_table->area, *bucket_head);
    1017             : 
    1018         276 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
    1019             :         {
    1020             :             dsa_pointer next;
    1021             : 
    1022         276 :             next = item->next;
    1023         276 :             dsa_free(hash_table->area, *bucket_head);
    1024         276 :             *bucket_head = next;
    1025             : 
    1026         276 :             return true;
    1027             :         }
    1028           0 :         bucket_head = &item->next;
    1029             :     }
    1030         170 :     return false;
    1031             : }
    1032             : 
    1033             : /*
    1034             :  * Delete the specified item from the bucket.
    1035             :  */
    1036             : static bool
    1037      112854 : delete_item_from_bucket(dshash_table *hash_table,
    1038             :                         dshash_table_item *item,
    1039             :                         dsa_pointer *bucket_head)
    1040             : {
    1041      113762 :     while (DsaPointerIsValid(*bucket_head))
    1042             :     {
    1043             :         dshash_table_item *bucket_item;
    1044             : 
    1045      113762 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
    1046             : 
    1047      113762 :         if (bucket_item == item)
    1048             :         {
    1049             :             dsa_pointer next;
    1050             : 
    1051      112854 :             next = item->next;
    1052      112854 :             dsa_free(hash_table->area, *bucket_head);
    1053      112854 :             *bucket_head = next;
    1054      112854 :             return true;
    1055             :         }
    1056         908 :         bucket_head = &bucket_item->next;
    1057             :     }
    1058           0 :     return false;
    1059             : }
    1060             : 
    1061             : /*
    1062             :  * Compute the hash value for a key.
    1063             :  */
    1064             : static inline dshash_hash
    1065     3078826 : hash_key(dshash_table *hash_table, const void *key)
    1066             : {
    1067     3078826 :     return hash_table->params.hash_function(key,
    1068             :                                             hash_table->params.key_size,
    1069             :                                             hash_table->arg);
    1070             : }
    1071             : 
    1072             : /*
    1073             :  * Check whether two keys compare equal.
    1074             :  */
    1075             : static inline bool
    1076     2409876 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1077             : {
    1078     2409876 :     return hash_table->params.compare_function(a, b,
    1079             :                                                hash_table->params.key_size,
    1080     2409876 :                                                hash_table->arg) == 0;
    1081             : }
    1082             : 
    1083             : /*
    1084             :  * Copy a key.
    1085             :  */
    1086             : static inline void
    1087      646990 : copy_key(dshash_table *hash_table, void *dest, const void *src)
    1088             : {
    1089      646990 :     hash_table->params.copy_function(dest, src,
    1090             :                                      hash_table->params.key_size,
    1091             :                                      hash_table->arg);
    1092      646990 : }

Generated by: LCOV version 1.16