LCOV - code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 229 273 83.9 %
Date: 2025-01-18 04:15:08 Functions: 30 32 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dshash.c
       4             :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5             :  *
       6             :  * This is an open hashing hash table, with a linked list at each table
       7             :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8             :  * lists from growing too long on average.  Currently, only growing is
       9             :  * supported: the hash table never becomes smaller.
      10             :  *
      11             :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12             :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13             :  * find and iterate operations normally only acquire one lock.  Therefore,
      14             :  * good concurrency is achieved whenever such operations don't collide at the
      15             :  * lock partition level.  However, when a resize operation begins, all
      16             :  * partition locks must be acquired simultaneously for a brief period.  This
      17             :  * is only expected to happen a small number of times until a stable size is
      18             :  * found, since growth is geometric.
      19             :  *
      20             :  * Future versions may support iterators and incremental resizing; for now
      21             :  * the implementation is minimalist.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/lib/dshash.c
      28             :  *
      29             :  *-------------------------------------------------------------------------
      30             :  */
      31             : 
      32             : #include "postgres.h"
      33             : 
      34             : #include "common/hashfn.h"
      35             : #include "lib/dshash.h"
      36             : #include "storage/lwlock.h"
      37             : #include "utils/dsa.h"
      38             : 
      39             : /*
      40             :  * An item in the hash table.  This wraps the user's entry object in an
      41             :  * envelop that holds a pointer back to the bucket and a pointer to the next
      42             :  * item in the bucket.
      43             :  */
      44             : struct dshash_table_item
      45             : {
      46             :     /* The next item in the same bucket. */
      47             :     dsa_pointer next;
      48             :     /* The hashed key, to avoid having to recompute it. */
      49             :     dshash_hash hash;
      50             :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      51             : };
      52             : 
      53             : /*
      54             :  * The number of partitions for locking purposes.  This is set to match
      55             :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      56             :  * the buffer pool must be good enough for any other purpose.  This could
      57             :  * become a runtime parameter in future.
      58             :  */
      59             : #define DSHASH_NUM_PARTITIONS_LOG2 7
      60             : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      61             : 
      62             : /* A magic value used to identify our hash tables. */
      63             : #define DSHASH_MAGIC 0x75ff6a20
      64             : 
      65             : /*
      66             :  * Tracking information for each lock partition.  Initially, each partition
      67             :  * corresponds to one bucket, but each time the hash table grows, the buckets
      68             :  * covered by each partition split so the number of buckets covered doubles.
      69             :  *
      70             :  * We might want to add padding here so that each partition is on a different
      71             :  * cache line, but doing so would bloat this structure considerably.
      72             :  */
      73             : typedef struct dshash_partition
      74             : {
      75             :     LWLock      lock;           /* Protects all buckets in this partition. */
      76             :     size_t      count;          /* # of items in this partition's buckets */
      77             : } dshash_partition;
      78             : 
      79             : /*
      80             :  * The head object for a hash table.  This will be stored in dynamic shared
      81             :  * memory.
      82             :  */
      83             : typedef struct dshash_table_control
      84             : {
      85             :     dshash_table_handle handle;
      86             :     uint32      magic;
      87             :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      88             :     int         lwlock_tranche_id;
      89             : 
      90             :     /*
      91             :      * The following members are written to only when ALL partitions locks are
      92             :      * held.  They can be read when any one partition lock is held.
      93             :      */
      94             : 
      95             :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      96             :     size_t      size_log2;      /* log2(number of buckets) */
      97             :     dsa_pointer buckets;        /* current bucket array */
      98             : } dshash_table_control;
      99             : 
     100             : /*
     101             :  * Per-backend state for a dynamic hash table.
     102             :  */
     103             : struct dshash_table
     104             : {
     105             :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     106             :     dshash_parameters params;   /* Parameters. */
     107             :     void       *arg;            /* User-supplied data pointer. */
     108             :     dshash_table_control *control;  /* Control object in DSM. */
     109             :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     110             :     size_t      size_log2;      /* log2(number of buckets) */
     111             : };
     112             : 
     113             : /* Given a pointer to an item, find the entry (user data) it holds. */
     114             : #define ENTRY_FROM_ITEM(item) \
     115             :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     116             : 
     117             : /* Given a pointer to an entry, find the item that holds it. */
     118             : #define ITEM_FROM_ENTRY(entry)                                          \
     119             :     ((dshash_table_item *)((char *)(entry) -                            \
     120             :                              MAXALIGN(sizeof(dshash_table_item))))
     121             : 
     122             : /* How many resize operations (bucket splits) have there been? */
     123             : #define NUM_SPLITS(size_log2)                   \
     124             :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     125             : 
     126             : /* How many buckets are there in a given size? */
     127             : #define NUM_BUCKETS(size_log2)      \
     128             :     (((size_t) 1) << (size_log2))
     129             : 
     130             : /* How many buckets are there in each partition at a given size? */
     131             : #define BUCKETS_PER_PARTITION(size_log2)        \
     132             :     (((size_t) 1) << NUM_SPLITS(size_log2))
     133             : 
     134             : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     135             : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     136             :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     137             :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     138             : 
     139             : /* Choose partition based on the highest order bits of the hash. */
     140             : #define PARTITION_FOR_HASH(hash)                                        \
     141             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     142             : 
     143             : /*
     144             :  * Find the bucket index for a given hash and table size.  Each time the table
     145             :  * doubles in size, the appropriate bucket for a given hash value doubles and
     146             :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     147             :  * are split.
     148             :  */
     149             : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     150             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     151             : 
     152             : /* The index of the first bucket in a given partition. */
     153             : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     154             :     ((partition) << NUM_SPLITS(size_log2))
     155             : 
     156             : /* Choose partition based on bucket index. */
     157             : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     158             :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     159             : 
     160             : /* The head of the active bucket for a given hash value (lvalue). */
     161             : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     162             :     (hash_table->buckets[                                                \
     163             :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     164             :                                        hash_table->size_log2)])
     165             : 
     166             : static void delete_item(dshash_table *hash_table,
     167             :                         dshash_table_item *item);
     168             : static void resize(dshash_table *hash_table, size_t new_size_log2);
     169             : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     170             : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     171             :                                                 const void *key,
     172             :                                                 dsa_pointer item_pointer);
     173             : static void insert_item_into_bucket(dshash_table *hash_table,
     174             :                                     dsa_pointer item_pointer,
     175             :                                     dshash_table_item *item,
     176             :                                     dsa_pointer *bucket);
     177             : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     178             :                                              const void *key,
     179             :                                              dsa_pointer *bucket);
     180             : static bool delete_key_from_bucket(dshash_table *hash_table,
     181             :                                    const void *key,
     182             :                                    dsa_pointer *bucket_head);
     183             : static bool delete_item_from_bucket(dshash_table *hash_table,
     184             :                                     dshash_table_item *item,
     185             :                                     dsa_pointer *bucket_head);
     186             : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     187             : static inline bool equal_keys(dshash_table *hash_table,
     188             :                               const void *a, const void *b);
     189             : static inline void copy_key(dshash_table *hash_table, void *dest,
     190             :                             const void *src);
     191             : 
     192             : #define PARTITION_LOCK(hash_table, i)           \
     193             :     (&(hash_table)->control->partitions[(i)].lock)
     194             : 
     195             : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     196             :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     197             :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     198             : 
     199             : /*
     200             :  * Create a new hash table backed by the given dynamic shared area, with the
     201             :  * given parameters.  The returned object is allocated in backend-local memory
     202             :  * using the current MemoryContext.  'arg' will be passed through to the
     203             :  * compare, hash, and copy functions.
     204             :  */
     205             : dshash_table *
     206        2310 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     207             : {
     208             :     dshash_table *hash_table;
     209             :     dsa_pointer control;
     210             : 
     211             :     /* Allocate the backend-local object representing the hash table. */
     212        2310 :     hash_table = palloc(sizeof(dshash_table));
     213             : 
     214             :     /* Allocate the control object in shared memory. */
     215        2310 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     216             : 
     217             :     /* Set up the local and shared hash table structs. */
     218        2310 :     hash_table->area = area;
     219        2310 :     hash_table->params = *params;
     220        2310 :     hash_table->arg = arg;
     221        2310 :     hash_table->control = dsa_get_address(area, control);
     222        2310 :     hash_table->control->handle = control;
     223        2310 :     hash_table->control->magic = DSHASH_MAGIC;
     224        2310 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     225             : 
     226             :     /* Set up the array of lock partitions. */
     227             :     {
     228        2310 :         dshash_partition *partitions = hash_table->control->partitions;
     229        2310 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     230             :         int         i;
     231             : 
     232      297990 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     233             :         {
     234      295680 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     235      295680 :             partitions[i].count = 0;
     236             :         }
     237             :     }
     238             : 
     239             :     /*
     240             :      * Set up the initial array of buckets.  Our initial size is the same as
     241             :      * the number of partitions.
     242             :      */
     243        2310 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     244        4620 :     hash_table->control->buckets =
     245        2310 :         dsa_allocate_extended(area,
     246             :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     247             :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     248        2310 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     249             :     {
     250           0 :         dsa_free(area, control);
     251           0 :         ereport(ERROR,
     252             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     253             :                  errmsg("out of memory"),
     254             :                  errdetail("Failed on DSA request of size %zu.",
     255             :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     256             :     }
     257        4620 :     hash_table->buckets = dsa_get_address(area,
     258        2310 :                                           hash_table->control->buckets);
     259        2310 :     hash_table->size_log2 = hash_table->control->size_log2;
     260             : 
     261        2310 :     return hash_table;
     262             : }
     263             : 
     264             : /*
     265             :  * Attach to an existing hash table using a handle.  The returned object is
     266             :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     267             :  * will be passed through to the compare and hash functions.
     268             :  */
     269             : dshash_table *
     270       40326 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     271             :               dshash_table_handle handle, void *arg)
     272             : {
     273             :     dshash_table *hash_table;
     274             :     dsa_pointer control;
     275             : 
     276             :     /* Allocate the backend-local object representing the hash table. */
     277       40326 :     hash_table = palloc(sizeof(dshash_table));
     278             : 
     279             :     /* Find the control object in shared memory. */
     280       40326 :     control = handle;
     281             : 
     282             :     /* Set up the local hash table struct. */
     283       40326 :     hash_table->area = area;
     284       40326 :     hash_table->params = *params;
     285       40326 :     hash_table->arg = arg;
     286       40326 :     hash_table->control = dsa_get_address(area, control);
     287             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     288             : 
     289             :     /*
     290             :      * These will later be set to the correct values by
     291             :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     292             :      * partition lock for interlocking against concurrent resizing.
     293             :      */
     294       40326 :     hash_table->buckets = NULL;
     295       40326 :     hash_table->size_log2 = 0;
     296             : 
     297       40326 :     return hash_table;
     298             : }
     299             : 
     300             : /*
     301             :  * Detach from a hash table.  This frees backend-local resources associated
     302             :  * with the hash table, but the hash table will continue to exist until it is
     303             :  * either explicitly destroyed (by a backend that is still attached to it), or
     304             :  * the area that backs it is returned to the operating system.
     305             :  */
     306             : void
     307       42276 : dshash_detach(dshash_table *hash_table)
     308             : {
     309             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     310             : 
     311             :     /* The hash table may have been destroyed.  Just free local memory. */
     312       42276 :     pfree(hash_table);
     313       42276 : }
     314             : 
     315             : /*
     316             :  * Destroy a hash table, returning all memory to the area.  The caller must be
     317             :  * certain that no other backend will attempt to access the hash table before
     318             :  * calling this function.  Other backend must explicitly call dshash_detach to
     319             :  * free up backend-local memory associated with the hash table.  The backend
     320             :  * that calls dshash_destroy must not call dshash_detach.
     321             :  */
     322             : void
     323           0 : dshash_destroy(dshash_table *hash_table)
     324             : {
     325             :     size_t      size;
     326             :     size_t      i;
     327             : 
     328             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     329           0 :     ensure_valid_bucket_pointers(hash_table);
     330             : 
     331             :     /* Free all the entries. */
     332           0 :     size = NUM_BUCKETS(hash_table->size_log2);
     333           0 :     for (i = 0; i < size; ++i)
     334             :     {
     335           0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     336             : 
     337           0 :         while (DsaPointerIsValid(item_pointer))
     338             :         {
     339             :             dshash_table_item *item;
     340             :             dsa_pointer next_item_pointer;
     341             : 
     342           0 :             item = dsa_get_address(hash_table->area, item_pointer);
     343           0 :             next_item_pointer = item->next;
     344           0 :             dsa_free(hash_table->area, item_pointer);
     345           0 :             item_pointer = next_item_pointer;
     346             :         }
     347             :     }
     348             : 
     349             :     /*
     350             :      * Vandalize the control block to help catch programming errors where
     351             :      * other backends access the memory formerly occupied by this hash table.
     352             :      */
     353           0 :     hash_table->control->magic = 0;
     354             : 
     355             :     /* Free the active table and control object. */
     356           0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     357           0 :     dsa_free(hash_table->area, hash_table->control->handle);
     358             : 
     359           0 :     pfree(hash_table);
     360           0 : }
     361             : 
     362             : /*
     363             :  * Get a handle that can be used by other processes to attach to this hash
     364             :  * table.
     365             :  */
     366             : dshash_table_handle
     367        2310 : dshash_get_hash_table_handle(dshash_table *hash_table)
     368             : {
     369             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     370             : 
     371        2310 :     return hash_table->control->handle;
     372             : }
     373             : 
     374             : /*
     375             :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     376             :  * found with the given key.  Returns NULL if the key is not found.  If a
     377             :  * non-NULL value is returned, the entry is locked and must be released by
     378             :  * calling dshash_release_lock.  If an error is raised before
     379             :  * dshash_release_lock is called, the lock will be released automatically, but
     380             :  * the caller must take care to ensure that the entry is not left corrupted.
     381             :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     382             :  *
     383             :  * The caller must not hold a lock already.
     384             :  *
     385             :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     386             :  * return from this function, and not resumed until dshash_release_lock is
     387             :  * called.  It is a very good idea for the caller to release the lock quickly.
     388             :  */
     389             : void *
     390     1900342 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     391             : {
     392             :     dshash_hash hash;
     393             :     size_t      partition;
     394             :     dshash_table_item *item;
     395             : 
     396     1900342 :     hash = hash_key(hash_table, key);
     397     1900342 :     partition = PARTITION_FOR_HASH(hash);
     398             : 
     399             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     400             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     401             : 
     402     1900342 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     403     1900342 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     404     1900342 :     ensure_valid_bucket_pointers(hash_table);
     405             : 
     406             :     /* Search the active bucket. */
     407     1900342 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     408             : 
     409     1900342 :     if (!item)
     410             :     {
     411             :         /* Not found. */
     412      400658 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     413      400658 :         return NULL;
     414             :     }
     415             :     else
     416             :     {
     417             :         /* The caller will free the lock by calling dshash_release_lock. */
     418     1499684 :         return ENTRY_FROM_ITEM(item);
     419             :     }
     420             : }
     421             : 
     422             : /*
     423             :  * Returns a pointer to an exclusively locked item which must be released with
     424             :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     425             :  * to true and a pointer to the existing entry is returned.  If the key is not
     426             :  * found, 'found' is set to false, and a pointer to a newly created entry is
     427             :  * returned.
     428             :  *
     429             :  * Notes above dshash_find() regarding locking and error handling equally
     430             :  * apply here.
     431             :  */
     432             : void *
     433      535084 : dshash_find_or_insert(dshash_table *hash_table,
     434             :                       const void *key,
     435             :                       bool *found)
     436             : {
     437             :     dshash_hash hash;
     438             :     size_t      partition_index;
     439             :     dshash_partition *partition;
     440             :     dshash_table_item *item;
     441             : 
     442      535084 :     hash = hash_key(hash_table, key);
     443      535084 :     partition_index = PARTITION_FOR_HASH(hash);
     444      535084 :     partition = &hash_table->control->partitions[partition_index];
     445             : 
     446             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     447             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     448             : 
     449      540322 : restart:
     450      540322 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     451             :                   LW_EXCLUSIVE);
     452      540322 :     ensure_valid_bucket_pointers(hash_table);
     453             : 
     454             :     /* Search the active bucket. */
     455      540322 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     456             : 
     457      540322 :     if (item)
     458          60 :         *found = true;
     459             :     else
     460             :     {
     461      540262 :         *found = false;
     462             : 
     463             :         /* Check if we are getting too full. */
     464      540262 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     465             :         {
     466             :             /*
     467             :              * The load factor (= keys / buckets) for all buckets protected by
     468             :              * this partition is > 0.75.  Presumably the same applies
     469             :              * generally across the whole hash table (though we don't attempt
     470             :              * to track that directly to avoid contention on some kind of
     471             :              * central counter; we just assume that this partition is
     472             :              * representative).  This is a good time to resize.
     473             :              *
     474             :              * Give up our existing lock first, because resizing needs to
     475             :              * reacquire all the locks in the right order to avoid deadlocks.
     476             :              */
     477        5238 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     478        5238 :             resize(hash_table, hash_table->size_log2 + 1);
     479             : 
     480        5238 :             goto restart;
     481             :         }
     482             : 
     483             :         /* Finally we can try to insert the new item. */
     484      535024 :         item = insert_into_bucket(hash_table, key,
     485      535024 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     486      535024 :         item->hash = hash;
     487             :         /* Adjust per-lock-partition counter for load factor knowledge. */
     488      535024 :         ++partition->count;
     489             :     }
     490             : 
     491             :     /* The caller must release the lock with dshash_release_lock. */
     492      535084 :     return ENTRY_FROM_ITEM(item);
     493             : }
     494             : 
     495             : /*
     496             :  * Remove an entry by key.  Returns true if the key was found and the
     497             :  * corresponding entry was removed.
     498             :  *
     499             :  * To delete an entry that you already have a pointer to, see
     500             :  * dshash_delete_entry.
     501             :  */
     502             : bool
     503         282 : dshash_delete_key(dshash_table *hash_table, const void *key)
     504             : {
     505             :     dshash_hash hash;
     506             :     size_t      partition;
     507             :     bool        found;
     508             : 
     509             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     510             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     511             : 
     512         282 :     hash = hash_key(hash_table, key);
     513         282 :     partition = PARTITION_FOR_HASH(hash);
     514             : 
     515         282 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     516         282 :     ensure_valid_bucket_pointers(hash_table);
     517             : 
     518         282 :     if (delete_key_from_bucket(hash_table, key,
     519         282 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     520             :     {
     521             :         Assert(hash_table->control->partitions[partition].count > 0);
     522         212 :         found = true;
     523         212 :         --hash_table->control->partitions[partition].count;
     524             :     }
     525             :     else
     526          70 :         found = false;
     527             : 
     528         282 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     529             : 
     530         282 :     return found;
     531             : }
     532             : 
     533             : /*
     534             :  * Remove an entry.  The entry must already be exclusively locked, and must
     535             :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     536             :  * function releases the lock just like dshash_release_lock.
     537             :  *
     538             :  * To delete an entry by key, see dshash_delete_key.
     539             :  */
     540             : void
     541       92756 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     542             : {
     543       92756 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     544       92756 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     545             : 
     546             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     547             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     548             :                                 LW_EXCLUSIVE));
     549             : 
     550       92756 :     delete_item(hash_table, item);
     551       92756 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     552       92756 : }
     553             : 
     554             : /*
     555             :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     556             :  */
     557             : void
     558     1942012 : dshash_release_lock(dshash_table *hash_table, void *entry)
     559             : {
     560     1942012 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     561     1942012 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     562             : 
     563             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     564             : 
     565     1942012 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     566     1942012 : }
     567             : 
     568             : /*
     569             :  * A compare function that forwards to memcmp.
     570             :  */
     571             : int
     572         372 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     573             : {
     574         372 :     return memcmp(a, b, size);
     575             : }
     576             : 
     577             : /*
     578             :  * A hash function that forwards to tag_hash.
     579             :  */
     580             : dshash_hash
     581        1058 : dshash_memhash(const void *v, size_t size, void *arg)
     582             : {
     583        1058 :     return tag_hash(v, size);
     584             : }
     585             : 
     586             : /*
     587             :  * A copy function that forwards to memcpy.
     588             :  */
     589             : void
     590      535006 : dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
     591             : {
     592      535006 :     (void) memcpy(dest, src, size);
     593      535006 : }
     594             : 
     595             : /*
     596             :  * A compare function that forwards to strcmp.
     597             :  */
     598             : int
     599          22 : dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
     600             : {
     601             :     Assert(strlen((const char *) a) < size);
     602             :     Assert(strlen((const char *) b) < size);
     603             : 
     604          22 :     return strcmp((const char *) a, (const char *) b);
     605             : }
     606             : 
     607             : /*
     608             :  * A hash function that forwards to string_hash.
     609             :  */
     610             : dshash_hash
     611          40 : dshash_strhash(const void *v, size_t size, void *arg)
     612             : {
     613             :     Assert(strlen((const char *) v) < size);
     614             : 
     615          40 :     return string_hash((const char *) v, size);
     616             : }
     617             : 
     618             : /*
     619             :  * A copy function that forwards to strcpy.
     620             :  */
     621             : void
     622          18 : dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
     623             : {
     624             :     Assert(strlen((const char *) src) < size);
     625             : 
     626          18 :     (void) strcpy((char *) dest, (const char *) src);
     627          18 : }
     628             : 
     629             : /*
     630             :  * Sequentially scan through dshash table and return all the elements one by
     631             :  * one, return NULL when all elements have been returned.
     632             :  *
     633             :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     634             :  * delete returned elements midst of a scan by using dshash_delete_current()
     635             :  * if exclusive = true.
     636             :  */
     637             : void
     638        1678 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     639             :                 bool exclusive)
     640             : {
     641        1678 :     status->hash_table = hash_table;
     642        1678 :     status->curbucket = 0;
     643        1678 :     status->nbuckets = 0;
     644        1678 :     status->curitem = NULL;
     645        1678 :     status->pnextitem = InvalidDsaPointer;
     646        1678 :     status->curpartition = -1;
     647        1678 :     status->exclusive = exclusive;
     648        1678 : }
     649             : 
     650             : /*
     651             :  * Returns the next element.
     652             :  *
     653             :  * Returned elements are locked and the caller may not release the lock. It is
     654             :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     655             :  */
     656             : void *
     657      403744 : dshash_seq_next(dshash_seq_status *status)
     658             : {
     659             :     dsa_pointer next_item_pointer;
     660             : 
     661             :     /*
     662             :      * Not yet holding any partition locks. Need to determine the size of the
     663             :      * hash table, it could have been resized since we were looking last.
     664             :      * Since we iterate in partition order, we can start by unconditionally
     665             :      * lock partition 0.
     666             :      *
     667             :      * Once we hold the lock, no resizing can happen until the scan ends. So
     668             :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     669             :      */
     670      403744 :     if (status->curpartition == -1)
     671             :     {
     672             :         Assert(status->curbucket == 0);
     673             :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     674             : 
     675        1678 :         status->curpartition = 0;
     676             : 
     677        1678 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     678             :                                      status->curpartition),
     679        1678 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     680             : 
     681        1678 :         ensure_valid_bucket_pointers(status->hash_table);
     682             : 
     683        1678 :         status->nbuckets =
     684        1678 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     685        1678 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     686             :     }
     687             :     else
     688      402066 :         next_item_pointer = status->pnextitem;
     689             : 
     690             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     691             :                                                status->curpartition),
     692             :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     693             : 
     694             :     /* Move to the next bucket if we finished the current bucket */
     695     2599570 :     while (!DsaPointerIsValid(next_item_pointer))
     696             :     {
     697             :         int         next_partition;
     698             : 
     699     2197504 :         if (++status->curbucket >= status->nbuckets)
     700             :         {
     701             :             /* all buckets have been scanned. finish. */
     702        1678 :             return NULL;
     703             :         }
     704             : 
     705             :         /* Check if move to the next partition */
     706     2195826 :         next_partition =
     707     2195826 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     708             :                                        status->hash_table->size_log2);
     709             : 
     710     2195826 :         if (status->curpartition != next_partition)
     711             :         {
     712             :             /*
     713             :              * Move to the next partition. Lock the next partition then
     714             :              * release the current, not in the reverse order to avoid
     715             :              * concurrent resizing.  Avoid dead lock by taking lock in the
     716             :              * same order with resize().
     717             :              */
     718      213106 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     719             :                                          next_partition),
     720      213106 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     721      213106 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     722             :                                          status->curpartition));
     723      213106 :             status->curpartition = next_partition;
     724             :         }
     725             : 
     726     2195826 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     727             :     }
     728             : 
     729      402066 :     status->curitem =
     730      402066 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     731             : 
     732             :     /*
     733             :      * The caller may delete the item. Store the next item in case of
     734             :      * deletion.
     735             :      */
     736      402066 :     status->pnextitem = status->curitem->next;
     737             : 
     738      402066 :     return ENTRY_FROM_ITEM(status->curitem);
     739             : }
     740             : 
     741             : /*
     742             :  * Terminates the seqscan and release all locks.
     743             :  *
     744             :  * Needs to be called after finishing or when exiting a seqscan.
     745             :  */
     746             : void
     747        1678 : dshash_seq_term(dshash_seq_status *status)
     748             : {
     749        1678 :     if (status->curpartition >= 0)
     750        1678 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     751        1678 : }
     752             : 
     753             : /*
     754             :  * Remove the current entry of the seq scan.
     755             :  */
     756             : void
     757        1934 : dshash_delete_current(dshash_seq_status *status)
     758             : {
     759        1934 :     dshash_table *hash_table = status->hash_table;
     760        1934 :     dshash_table_item *item = status->curitem;
     761             :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     762             : 
     763        1934 :     partition = PARTITION_FOR_HASH(item->hash);
     764             : 
     765             :     Assert(status->exclusive);
     766             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     767             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     768             :                                 LW_EXCLUSIVE));
     769             : 
     770        1934 :     delete_item(hash_table, item);
     771        1934 : }
     772             : 
     773             : /*
     774             :  * Print debugging information about the internal state of the hash table to
     775             :  * stderr.  The caller must hold no partition locks.
     776             :  */
     777             : void
     778           0 : dshash_dump(dshash_table *hash_table)
     779             : {
     780             :     size_t      i;
     781             :     size_t      j;
     782             : 
     783             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     784             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     785             : 
     786           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     787             :     {
     788             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     789           0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     790             :     }
     791             : 
     792           0 :     ensure_valid_bucket_pointers(hash_table);
     793             : 
     794           0 :     fprintf(stderr,
     795           0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     796           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     797             :     {
     798           0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     799           0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     800           0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     801             : 
     802           0 :         fprintf(stderr, "  partition %zu\n", i);
     803           0 :         fprintf(stderr,
     804             :                 "    active buckets (key count = %zu)\n", partition->count);
     805             : 
     806           0 :         for (j = begin; j < end; ++j)
     807             :         {
     808           0 :             size_t      count = 0;
     809           0 :             dsa_pointer bucket = hash_table->buckets[j];
     810             : 
     811           0 :             while (DsaPointerIsValid(bucket))
     812             :             {
     813             :                 dshash_table_item *item;
     814             : 
     815           0 :                 item = dsa_get_address(hash_table->area, bucket);
     816             : 
     817           0 :                 bucket = item->next;
     818           0 :                 ++count;
     819             :             }
     820           0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     821             :         }
     822             :     }
     823             : 
     824           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     825           0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     826           0 : }
     827             : 
     828             : /*
     829             :  * Delete a locked item to which we have a pointer.
     830             :  */
     831             : static void
     832       94690 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     833             : {
     834       94690 :     size_t      hash = item->hash;
     835       94690 :     size_t      partition = PARTITION_FOR_HASH(hash);
     836             : 
     837             :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     838             : 
     839       94690 :     if (delete_item_from_bucket(hash_table, item,
     840       94690 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     841             :     {
     842             :         Assert(hash_table->control->partitions[partition].count > 0);
     843       94690 :         --hash_table->control->partitions[partition].count;
     844             :     }
     845             :     else
     846             :     {
     847             :         Assert(false);
     848             :     }
     849       94690 : }
     850             : 
     851             : /*
     852             :  * Grow the hash table if necessary to the requested number of buckets.  The
     853             :  * requested size must be double some previously observed size.
     854             :  *
     855             :  * Must be called without any partition lock held.
     856             :  */
     857             : static void
     858        5238 : resize(dshash_table *hash_table, size_t new_size_log2)
     859             : {
     860             :     dsa_pointer old_buckets;
     861             :     dsa_pointer new_buckets_shared;
     862             :     dsa_pointer *new_buckets;
     863             :     size_t      size;
     864        5238 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     865             :     size_t      i;
     866             : 
     867             :     /*
     868             :      * Acquire the locks for all lock partitions.  This is expensive, but we
     869             :      * shouldn't have to do it many times.
     870             :      */
     871      675702 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     872             :     {
     873             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     874             : 
     875      670464 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     876      670464 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     877             :         {
     878             :             /*
     879             :              * Another backend has already increased the size; we can avoid
     880             :              * obtaining all the locks and return early.
     881             :              */
     882           0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     883           0 :             return;
     884             :         }
     885             :     }
     886             : 
     887             :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     888             : 
     889             :     /* Allocate the space for the new table. */
     890             :     new_buckets_shared =
     891        5238 :         dsa_allocate_extended(hash_table->area,
     892             :                               sizeof(dsa_pointer) * new_size,
     893             :                               DSA_ALLOC_HUGE | DSA_ALLOC_ZERO);
     894        5238 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     895             : 
     896             :     /*
     897             :      * We've allocated the new bucket array; all that remains to do now is to
     898             :      * reinsert all items, which amounts to adjusting all the pointers.
     899             :      */
     900        5238 :     size = ((size_t) 1) << hash_table->control->size_log2;
     901     2380406 :     for (i = 0; i < size; ++i)
     902             :     {
     903     2375168 :         dsa_pointer item_pointer = hash_table->buckets[i];
     904             : 
     905     2563284 :         while (DsaPointerIsValid(item_pointer))
     906             :         {
     907             :             dshash_table_item *item;
     908             :             dsa_pointer next_item_pointer;
     909             : 
     910      188116 :             item = dsa_get_address(hash_table->area, item_pointer);
     911      188116 :             next_item_pointer = item->next;
     912      188116 :             insert_item_into_bucket(hash_table, item_pointer, item,
     913      188116 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     914             :                                                                                 new_size_log2)]);
     915      188116 :             item_pointer = next_item_pointer;
     916             :         }
     917             :     }
     918             : 
     919             :     /* Swap the hash table into place and free the old one. */
     920        5238 :     old_buckets = hash_table->control->buckets;
     921        5238 :     hash_table->control->buckets = new_buckets_shared;
     922        5238 :     hash_table->control->size_log2 = new_size_log2;
     923        5238 :     hash_table->buckets = new_buckets;
     924        5238 :     dsa_free(hash_table->area, old_buckets);
     925             : 
     926             :     /* Release all the locks. */
     927      675702 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     928      670464 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     929             : }
     930             : 
     931             : /*
     932             :  * Make sure that our backend-local bucket pointers are up to date.  The
     933             :  * caller must have locked one lock partition, which prevents resize() from
     934             :  * running concurrently.
     935             :  */
     936             : static inline void
     937     2442624 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     938             : {
     939     2442624 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     940             :     {
     941       80968 :         hash_table->buckets = dsa_get_address(hash_table->area,
     942       40484 :                                               hash_table->control->buckets);
     943       40484 :         hash_table->size_log2 = hash_table->control->size_log2;
     944             :     }
     945     2442624 : }
     946             : 
     947             : /*
     948             :  * Scan a locked bucket for a match, using the provided compare function.
     949             :  */
     950             : static inline dshash_table_item *
     951     2440664 : find_in_bucket(dshash_table *hash_table, const void *key,
     952             :                dsa_pointer item_pointer)
     953             : {
     954     2766478 :     while (DsaPointerIsValid(item_pointer))
     955             :     {
     956             :         dshash_table_item *item;
     957             : 
     958     1825558 :         item = dsa_get_address(hash_table->area, item_pointer);
     959     1825558 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     960     1499744 :             return item;
     961      325814 :         item_pointer = item->next;
     962             :     }
     963      940920 :     return NULL;
     964             : }
     965             : 
     966             : /*
     967             :  * Insert an already-allocated item into a bucket.
     968             :  */
     969             : static void
     970      723140 : insert_item_into_bucket(dshash_table *hash_table,
     971             :                         dsa_pointer item_pointer,
     972             :                         dshash_table_item *item,
     973             :                         dsa_pointer *bucket)
     974             : {
     975             :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     976             : 
     977      723140 :     item->next = *bucket;
     978      723140 :     *bucket = item_pointer;
     979      723140 : }
     980             : 
     981             : /*
     982             :  * Allocate space for an entry with the given key and insert it into the
     983             :  * provided bucket.
     984             :  */
     985             : static dshash_table_item *
     986      535024 : insert_into_bucket(dshash_table *hash_table,
     987             :                    const void *key,
     988             :                    dsa_pointer *bucket)
     989             : {
     990             :     dsa_pointer item_pointer;
     991             :     dshash_table_item *item;
     992             : 
     993      535024 :     item_pointer = dsa_allocate(hash_table->area,
     994             :                                 hash_table->params.entry_size +
     995             :                                 MAXALIGN(sizeof(dshash_table_item)));
     996      535024 :     item = dsa_get_address(hash_table->area, item_pointer);
     997      535024 :     copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
     998      535024 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
     999      535024 :     return item;
    1000             : }
    1001             : 
    1002             : /*
    1003             :  * Search a bucket for a matching key and delete it.
    1004             :  */
    1005             : static bool
    1006         282 : delete_key_from_bucket(dshash_table *hash_table,
    1007             :                        const void *key,
    1008             :                        dsa_pointer *bucket_head)
    1009             : {
    1010         282 :     while (DsaPointerIsValid(*bucket_head))
    1011             :     {
    1012             :         dshash_table_item *item;
    1013             : 
    1014         212 :         item = dsa_get_address(hash_table->area, *bucket_head);
    1015             : 
    1016         212 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
    1017             :         {
    1018             :             dsa_pointer next;
    1019             : 
    1020         212 :             next = item->next;
    1021         212 :             dsa_free(hash_table->area, *bucket_head);
    1022         212 :             *bucket_head = next;
    1023             : 
    1024         212 :             return true;
    1025             :         }
    1026           0 :         bucket_head = &item->next;
    1027             :     }
    1028          70 :     return false;
    1029             : }
    1030             : 
    1031             : /*
    1032             :  * Delete the specified item from the bucket.
    1033             :  */
    1034             : static bool
    1035       94690 : delete_item_from_bucket(dshash_table *hash_table,
    1036             :                         dshash_table_item *item,
    1037             :                         dsa_pointer *bucket_head)
    1038             : {
    1039       95200 :     while (DsaPointerIsValid(*bucket_head))
    1040             :     {
    1041             :         dshash_table_item *bucket_item;
    1042             : 
    1043       95200 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
    1044             : 
    1045       95200 :         if (bucket_item == item)
    1046             :         {
    1047             :             dsa_pointer next;
    1048             : 
    1049       94690 :             next = item->next;
    1050       94690 :             dsa_free(hash_table->area, *bucket_head);
    1051       94690 :             *bucket_head = next;
    1052       94690 :             return true;
    1053             :         }
    1054         510 :         bucket_head = &bucket_item->next;
    1055             :     }
    1056           0 :     return false;
    1057             : }
    1058             : 
    1059             : /*
    1060             :  * Compute the hash value for a key.
    1061             :  */
    1062             : static inline dshash_hash
    1063     2435708 : hash_key(dshash_table *hash_table, const void *key)
    1064             : {
    1065     2435708 :     return hash_table->params.hash_function(key,
    1066             :                                             hash_table->params.key_size,
    1067             :                                             hash_table->arg);
    1068             : }
    1069             : 
    1070             : /*
    1071             :  * Check whether two keys compare equal.
    1072             :  */
    1073             : static inline bool
    1074     1825770 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1075             : {
    1076     1825770 :     return hash_table->params.compare_function(a, b,
    1077             :                                                hash_table->params.key_size,
    1078     1825770 :                                                hash_table->arg) == 0;
    1079             : }
    1080             : 
    1081             : /*
    1082             :  * Copy a key.
    1083             :  */
    1084             : static inline void
    1085      535024 : copy_key(dshash_table *hash_table, void *dest, const void *src)
    1086             : {
    1087      535024 :     hash_table->params.copy_function(dest, src,
    1088             :                                      hash_table->params.key_size,
    1089             :                                      hash_table->arg);
    1090      535024 : }

Generated by: LCOV version 1.14