LCOV - code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 216 260 83.1 %
Date: 2024-02-22 00:11:39 Functions: 25 27 92.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dshash.c
       4             :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5             :  *
       6             :  * This is an open hashing hash table, with a linked list at each table
       7             :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8             :  * lists from growing too long on average.  Currently, only growing is
       9             :  * supported: the hash table never becomes smaller.
      10             :  *
      11             :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12             :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13             :  * find and iterate operations normally only acquire one lock.  Therefore,
      14             :  * good concurrency is achieved whenever such operations don't collide at the
      15             :  * lock partition level.  However, when a resize operation begins, all
      16             :  * partition locks must be acquired simultaneously for a brief period.  This
      17             :  * is only expected to happen a small number of times until a stable size is
      18             :  * found, since growth is geometric.
      19             :  *
      20             :  * Future versions may support iterators and incremental resizing; for now
      21             :  * the implementation is minimalist.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/lib/dshash.c
      28             :  *
      29             :  *-------------------------------------------------------------------------
      30             :  */
      31             : 
      32             : #include "postgres.h"
      33             : 
      34             : #include "common/hashfn.h"
      35             : #include "lib/dshash.h"
      36             : #include "storage/ipc.h"
      37             : #include "storage/lwlock.h"
      38             : #include "utils/dsa.h"
      39             : #include "utils/memutils.h"
      40             : 
      41             : /*
      42             :  * An item in the hash table.  This wraps the user's entry object in an
      43             :  * envelop that holds a pointer back to the bucket and a pointer to the next
      44             :  * item in the bucket.
      45             :  */
      46             : struct dshash_table_item
      47             : {
      48             :     /* The next item in the same bucket. */
      49             :     dsa_pointer next;
      50             :     /* The hashed key, to avoid having to recompute it. */
      51             :     dshash_hash hash;
      52             :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      53             : };
      54             : 
      55             : /*
      56             :  * The number of partitions for locking purposes.  This is set to match
      57             :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      58             :  * the buffer pool must be good enough for any other purpose.  This could
      59             :  * become a runtime parameter in future.
      60             :  */
      61             : #define DSHASH_NUM_PARTITIONS_LOG2 7
      62             : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      63             : 
      64             : /* A magic value used to identify our hash tables. */
      65             : #define DSHASH_MAGIC 0x75ff6a20
      66             : 
      67             : /*
      68             :  * Tracking information for each lock partition.  Initially, each partition
      69             :  * corresponds to one bucket, but each time the hash table grows, the buckets
      70             :  * covered by each partition split so the number of buckets covered doubles.
      71             :  *
      72             :  * We might want to add padding here so that each partition is on a different
      73             :  * cache line, but doing so would bloat this structure considerably.
      74             :  */
      75             : typedef struct dshash_partition
      76             : {
      77             :     LWLock      lock;           /* Protects all buckets in this partition. */
      78             :     size_t      count;          /* # of items in this partition's buckets */
      79             : } dshash_partition;
      80             : 
      81             : /*
      82             :  * The head object for a hash table.  This will be stored in dynamic shared
      83             :  * memory.
      84             :  */
      85             : typedef struct dshash_table_control
      86             : {
      87             :     dshash_table_handle handle;
      88             :     uint32      magic;
      89             :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      90             :     int         lwlock_tranche_id;
      91             : 
      92             :     /*
      93             :      * The following members are written to only when ALL partitions locks are
      94             :      * held.  They can be read when any one partition lock is held.
      95             :      */
      96             : 
      97             :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      98             :     size_t      size_log2;      /* log2(number of buckets) */
      99             :     dsa_pointer buckets;        /* current bucket array */
     100             : } dshash_table_control;
     101             : 
     102             : /*
     103             :  * Per-backend state for a dynamic hash table.
     104             :  */
     105             : struct dshash_table
     106             : {
     107             :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     108             :     dshash_parameters params;   /* Parameters. */
     109             :     void       *arg;            /* User-supplied data pointer. */
     110             :     dshash_table_control *control;  /* Control object in DSM. */
     111             :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     112             :     size_t      size_log2;      /* log2(number of buckets) */
     113             : };
     114             : 
     115             : /* Given a pointer to an item, find the entry (user data) it holds. */
     116             : #define ENTRY_FROM_ITEM(item) \
     117             :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     118             : 
     119             : /* Given a pointer to an entry, find the item that holds it. */
     120             : #define ITEM_FROM_ENTRY(entry)                                          \
     121             :     ((dshash_table_item *)((char *)(entry) -                            \
     122             :                              MAXALIGN(sizeof(dshash_table_item))))
     123             : 
     124             : /* How many resize operations (bucket splits) have there been? */
     125             : #define NUM_SPLITS(size_log2)                   \
     126             :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     127             : 
     128             : /* How many buckets are there in a given size? */
     129             : #define NUM_BUCKETS(size_log2)      \
     130             :     (((size_t) 1) << (size_log2))
     131             : 
     132             : /* How many buckets are there in each partition at a given size? */
     133             : #define BUCKETS_PER_PARTITION(size_log2)        \
     134             :     (((size_t) 1) << NUM_SPLITS(size_log2))
     135             : 
     136             : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     137             : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     138             :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     139             :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     140             : 
     141             : /* Choose partition based on the highest order bits of the hash. */
     142             : #define PARTITION_FOR_HASH(hash)                                        \
     143             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     144             : 
     145             : /*
     146             :  * Find the bucket index for a given hash and table size.  Each time the table
     147             :  * doubles in size, the appropriate bucket for a given hash value doubles and
     148             :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     149             :  * are split.
     150             :  */
     151             : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     152             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     153             : 
     154             : /* The index of the first bucket in a given partition. */
     155             : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     156             :     ((partition) << NUM_SPLITS(size_log2))
     157             : 
     158             : /* Choose partition based on bucket index. */
     159             : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     160             :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     161             : 
     162             : /* The head of the active bucket for a given hash value (lvalue). */
     163             : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     164             :     (hash_table->buckets[                                                \
     165             :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     166             :                                        hash_table->size_log2)])
     167             : 
     168             : static void delete_item(dshash_table *hash_table,
     169             :                         dshash_table_item *item);
     170             : static void resize(dshash_table *hash_table, size_t new_size_log2);
     171             : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     172             : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     173             :                                                 const void *key,
     174             :                                                 dsa_pointer item_pointer);
     175             : static void insert_item_into_bucket(dshash_table *hash_table,
     176             :                                     dsa_pointer item_pointer,
     177             :                                     dshash_table_item *item,
     178             :                                     dsa_pointer *bucket);
     179             : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     180             :                                              const void *key,
     181             :                                              dsa_pointer *bucket);
     182             : static bool delete_key_from_bucket(dshash_table *hash_table,
     183             :                                    const void *key,
     184             :                                    dsa_pointer *bucket_head);
     185             : static bool delete_item_from_bucket(dshash_table *hash_table,
     186             :                                     dshash_table_item *item,
     187             :                                     dsa_pointer *bucket_head);
     188             : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     189             : static inline bool equal_keys(dshash_table *hash_table,
     190             :                               const void *a, const void *b);
     191             : 
     192             : #define PARTITION_LOCK(hash_table, i)           \
     193             :     (&(hash_table)->control->partitions[(i)].lock)
     194             : 
     195             : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     196             :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     197             :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     198             : 
     199             : /*
     200             :  * Create a new hash table backed by the given dynamic shared area, with the
     201             :  * given parameters.  The returned object is allocated in backend-local memory
     202             :  * using the current MemoryContext.  'arg' will be passed through to the
     203             :  * compare and hash functions.
     204             :  */
     205             : dshash_table *
     206        1946 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     207             : {
     208             :     dshash_table *hash_table;
     209             :     dsa_pointer control;
     210             : 
     211             :     /* Allocate the backend-local object representing the hash table. */
     212        1946 :     hash_table = palloc(sizeof(dshash_table));
     213             : 
     214             :     /* Allocate the control object in shared memory. */
     215        1946 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     216             : 
     217             :     /* Set up the local and shared hash table structs. */
     218        1946 :     hash_table->area = area;
     219        1946 :     hash_table->params = *params;
     220        1946 :     hash_table->arg = arg;
     221        1946 :     hash_table->control = dsa_get_address(area, control);
     222        1946 :     hash_table->control->handle = control;
     223        1946 :     hash_table->control->magic = DSHASH_MAGIC;
     224        1946 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     225             : 
     226             :     /* Set up the array of lock partitions. */
     227             :     {
     228        1946 :         dshash_partition *partitions = hash_table->control->partitions;
     229        1946 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     230             :         int         i;
     231             : 
     232      251034 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     233             :         {
     234      249088 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     235      249088 :             partitions[i].count = 0;
     236             :         }
     237             :     }
     238             : 
     239             :     /*
     240             :      * Set up the initial array of buckets.  Our initial size is the same as
     241             :      * the number of partitions.
     242             :      */
     243        1946 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     244        3892 :     hash_table->control->buckets =
     245        1946 :         dsa_allocate_extended(area,
     246             :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     247             :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     248        1946 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     249             :     {
     250           0 :         dsa_free(area, control);
     251           0 :         ereport(ERROR,
     252             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     253             :                  errmsg("out of memory"),
     254             :                  errdetail("Failed on DSA request of size %zu.",
     255             :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     256             :     }
     257        3892 :     hash_table->buckets = dsa_get_address(area,
     258        1946 :                                           hash_table->control->buckets);
     259        1946 :     hash_table->size_log2 = hash_table->control->size_log2;
     260             : 
     261        1946 :     return hash_table;
     262             : }
     263             : 
     264             : /*
     265             :  * Attach to an existing hash table using a handle.  The returned object is
     266             :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     267             :  * will be passed through to the compare and hash functions.
     268             :  */
     269             : dshash_table *
     270       34180 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     271             :               dshash_table_handle handle, void *arg)
     272             : {
     273             :     dshash_table *hash_table;
     274             :     dsa_pointer control;
     275             : 
     276             :     /* Allocate the backend-local object representing the hash table. */
     277       34180 :     hash_table = palloc(sizeof(dshash_table));
     278             : 
     279             :     /* Find the control object in shared memory. */
     280       34180 :     control = handle;
     281             : 
     282             :     /* Set up the local hash table struct. */
     283       34180 :     hash_table->area = area;
     284       34180 :     hash_table->params = *params;
     285       34180 :     hash_table->arg = arg;
     286       34180 :     hash_table->control = dsa_get_address(area, control);
     287             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     288             : 
     289             :     /*
     290             :      * These will later be set to the correct values by
     291             :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     292             :      * partition lock for interlocking against concurrent resizing.
     293             :      */
     294       34180 :     hash_table->buckets = NULL;
     295       34180 :     hash_table->size_log2 = 0;
     296             : 
     297       34180 :     return hash_table;
     298             : }
     299             : 
     300             : /*
     301             :  * Detach from a hash table.  This frees backend-local resources associated
     302             :  * with the hash table, but the hash table will continue to exist until it is
     303             :  * either explicitly destroyed (by a backend that is still attached to it), or
     304             :  * the area that backs it is returned to the operating system.
     305             :  */
     306             : void
     307       35834 : dshash_detach(dshash_table *hash_table)
     308             : {
     309             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     310             : 
     311             :     /* The hash table may have been destroyed.  Just free local memory. */
     312       35834 :     pfree(hash_table);
     313       35834 : }
     314             : 
     315             : /*
     316             :  * Destroy a hash table, returning all memory to the area.  The caller must be
     317             :  * certain that no other backend will attempt to access the hash table before
     318             :  * calling this function.  Other backend must explicitly call dshash_detach to
     319             :  * free up backend-local memory associated with the hash table.  The backend
     320             :  * that calls dshash_destroy must not call dshash_detach.
     321             :  */
     322             : void
     323           0 : dshash_destroy(dshash_table *hash_table)
     324             : {
     325             :     size_t      size;
     326             :     size_t      i;
     327             : 
     328             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     329           0 :     ensure_valid_bucket_pointers(hash_table);
     330             : 
     331             :     /* Free all the entries. */
     332           0 :     size = NUM_BUCKETS(hash_table->size_log2);
     333           0 :     for (i = 0; i < size; ++i)
     334             :     {
     335           0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     336             : 
     337           0 :         while (DsaPointerIsValid(item_pointer))
     338             :         {
     339             :             dshash_table_item *item;
     340             :             dsa_pointer next_item_pointer;
     341             : 
     342           0 :             item = dsa_get_address(hash_table->area, item_pointer);
     343           0 :             next_item_pointer = item->next;
     344           0 :             dsa_free(hash_table->area, item_pointer);
     345           0 :             item_pointer = next_item_pointer;
     346             :         }
     347             :     }
     348             : 
     349             :     /*
     350             :      * Vandalize the control block to help catch programming errors where
     351             :      * other backends access the memory formerly occupied by this hash table.
     352             :      */
     353           0 :     hash_table->control->magic = 0;
     354             : 
     355             :     /* Free the active table and control object. */
     356           0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     357           0 :     dsa_free(hash_table->area, hash_table->control->handle);
     358             : 
     359           0 :     pfree(hash_table);
     360           0 : }
     361             : 
     362             : /*
     363             :  * Get a handle that can be used by other processes to attach to this hash
     364             :  * table.
     365             :  */
     366             : dshash_table_handle
     367        1946 : dshash_get_hash_table_handle(dshash_table *hash_table)
     368             : {
     369             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     370             : 
     371        1946 :     return hash_table->control->handle;
     372             : }
     373             : 
     374             : /*
     375             :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     376             :  * found with the given key.  Returns NULL if the key is not found.  If a
     377             :  * non-NULL value is returned, the entry is locked and must be released by
     378             :  * calling dshash_release_lock.  If an error is raised before
     379             :  * dshash_release_lock is called, the lock will be released automatically, but
     380             :  * the caller must take care to ensure that the entry is not left corrupted.
     381             :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     382             :  *
     383             :  * The caller must not hold a lock already.
     384             :  *
     385             :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     386             :  * return from this function, and not resumed until dshash_release_lock is
     387             :  * called.  It is a very good idea for the caller to release the lock quickly.
     388             :  */
     389             : void *
     390     1310120 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     391             : {
     392             :     dshash_hash hash;
     393             :     size_t      partition;
     394             :     dshash_table_item *item;
     395             : 
     396     1310120 :     hash = hash_key(hash_table, key);
     397     1310120 :     partition = PARTITION_FOR_HASH(hash);
     398             : 
     399             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     400             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     401             : 
     402     1310120 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     403     1310120 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     404     1310120 :     ensure_valid_bucket_pointers(hash_table);
     405             : 
     406             :     /* Search the active bucket. */
     407     1310120 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     408             : 
     409     1310120 :     if (!item)
     410             :     {
     411             :         /* Not found. */
     412      313706 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     413      313706 :         return NULL;
     414             :     }
     415             :     else
     416             :     {
     417             :         /* The caller will free the lock by calling dshash_release_lock. */
     418      996414 :         return ENTRY_FROM_ITEM(item);
     419             :     }
     420             : }
     421             : 
     422             : /*
     423             :  * Returns a pointer to an exclusively locked item which must be released with
     424             :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     425             :  * to true and a pointer to the existing entry is returned.  If the key is not
     426             :  * found, 'found' is set to false, and a pointer to a newly created entry is
     427             :  * returned.
     428             :  *
     429             :  * Notes above dshash_find() regarding locking and error handling equally
     430             :  * apply here.
     431             :  */
     432             : void *
     433      448290 : dshash_find_or_insert(dshash_table *hash_table,
     434             :                       const void *key,
     435             :                       bool *found)
     436             : {
     437             :     dshash_hash hash;
     438             :     size_t      partition_index;
     439             :     dshash_partition *partition;
     440             :     dshash_table_item *item;
     441             : 
     442      448290 :     hash = hash_key(hash_table, key);
     443      448290 :     partition_index = PARTITION_FOR_HASH(hash);
     444      448290 :     partition = &hash_table->control->partitions[partition_index];
     445             : 
     446             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     447             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     448             : 
     449      452538 : restart:
     450      452538 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     451             :                   LW_EXCLUSIVE);
     452      452538 :     ensure_valid_bucket_pointers(hash_table);
     453             : 
     454             :     /* Search the active bucket. */
     455      452538 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     456             : 
     457      452538 :     if (item)
     458          38 :         *found = true;
     459             :     else
     460             :     {
     461      452500 :         *found = false;
     462             : 
     463             :         /* Check if we are getting too full. */
     464      452500 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     465             :         {
     466             :             /*
     467             :              * The load factor (= keys / buckets) for all buckets protected by
     468             :              * this partition is > 0.75.  Presumably the same applies
     469             :              * generally across the whole hash table (though we don't attempt
     470             :              * to track that directly to avoid contention on some kind of
     471             :              * central counter; we just assume that this partition is
     472             :              * representative).  This is a good time to resize.
     473             :              *
     474             :              * Give up our existing lock first, because resizing needs to
     475             :              * reacquire all the locks in the right order to avoid deadlocks.
     476             :              */
     477        4248 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     478        4248 :             resize(hash_table, hash_table->size_log2 + 1);
     479             : 
     480        4248 :             goto restart;
     481             :         }
     482             : 
     483             :         /* Finally we can try to insert the new item. */
     484      448252 :         item = insert_into_bucket(hash_table, key,
     485      448252 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     486      448252 :         item->hash = hash;
     487             :         /* Adjust per-lock-partition counter for load factor knowledge. */
     488      448252 :         ++partition->count;
     489             :     }
     490             : 
     491             :     /* The caller must release the lock with dshash_release_lock. */
     492      448290 :     return ENTRY_FROM_ITEM(item);
     493             : }
     494             : 
     495             : /*
     496             :  * Remove an entry by key.  Returns true if the key was found and the
     497             :  * corresponding entry was removed.
     498             :  *
     499             :  * To delete an entry that you already have a pointer to, see
     500             :  * dshash_delete_entry.
     501             :  */
     502             : bool
     503         252 : dshash_delete_key(dshash_table *hash_table, const void *key)
     504             : {
     505             :     dshash_hash hash;
     506             :     size_t      partition;
     507             :     bool        found;
     508             : 
     509             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     510             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     511             : 
     512         252 :     hash = hash_key(hash_table, key);
     513         252 :     partition = PARTITION_FOR_HASH(hash);
     514             : 
     515         252 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     516         252 :     ensure_valid_bucket_pointers(hash_table);
     517             : 
     518         252 :     if (delete_key_from_bucket(hash_table, key,
     519         252 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     520             :     {
     521             :         Assert(hash_table->control->partitions[partition].count > 0);
     522         184 :         found = true;
     523         184 :         --hash_table->control->partitions[partition].count;
     524             :     }
     525             :     else
     526          68 :         found = false;
     527             : 
     528         252 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     529             : 
     530         252 :     return found;
     531             : }
     532             : 
     533             : /*
     534             :  * Remove an entry.  The entry must already be exclusively locked, and must
     535             :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     536             :  * function releases the lock just like dshash_release_lock.
     537             :  *
     538             :  * To delete an entry by key, see dshash_delete_key.
     539             :  */
     540             : void
     541       58672 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     542             : {
     543       58672 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     544       58672 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     545             : 
     546             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     547             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     548             :                                 LW_EXCLUSIVE));
     549             : 
     550       58672 :     delete_item(hash_table, item);
     551       58672 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     552       58672 : }
     553             : 
     554             : /*
     555             :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     556             :  */
     557             : void
     558     1386032 : dshash_release_lock(dshash_table *hash_table, void *entry)
     559             : {
     560     1386032 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     561     1386032 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     562             : 
     563             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     564             : 
     565     1386032 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     566     1386032 : }
     567             : 
     568             : /*
     569             :  * A compare function that forwards to memcmp.
     570             :  */
     571             : int
     572         322 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     573             : {
     574         322 :     return memcmp(a, b, size);
     575             : }
     576             : 
     577             : /*
     578             :  * A hash function that forwards to tag_hash.
     579             :  */
     580             : dshash_hash
     581         876 : dshash_memhash(const void *v, size_t size, void *arg)
     582             : {
     583         876 :     return tag_hash(v, size);
     584             : }
     585             : 
     586             : /*
     587             :  * Sequentially scan through dshash table and return all the elements one by
     588             :  * one, return NULL when all elements have been returned.
     589             :  *
     590             :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     591             :  * delete returned elements midst of a scan by using dshash_delete_current()
     592             :  * if exclusive = true.
     593             :  */
     594             : void
     595        1418 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     596             :                 bool exclusive)
     597             : {
     598        1418 :     status->hash_table = hash_table;
     599        1418 :     status->curbucket = 0;
     600        1418 :     status->nbuckets = 0;
     601        1418 :     status->curitem = NULL;
     602        1418 :     status->pnextitem = InvalidDsaPointer;
     603        1418 :     status->curpartition = -1;
     604        1418 :     status->exclusive = exclusive;
     605        1418 : }
     606             : 
     607             : /*
     608             :  * Returns the next element.
     609             :  *
     610             :  * Returned elements are locked and the caller may not release the lock. It is
     611             :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     612             :  */
     613             : void *
     614      359428 : dshash_seq_next(dshash_seq_status *status)
     615             : {
     616             :     dsa_pointer next_item_pointer;
     617             : 
     618             :     /*
     619             :      * Not yet holding any partition locks. Need to determine the size of the
     620             :      * hash table, it could have been resized since we were looking last.
     621             :      * Since we iterate in partition order, we can start by unconditionally
     622             :      * lock partition 0.
     623             :      *
     624             :      * Once we hold the lock, no resizing can happen until the scan ends. So
     625             :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     626             :      */
     627      359428 :     if (status->curpartition == -1)
     628             :     {
     629             :         Assert(status->curbucket == 0);
     630             :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     631             : 
     632        1418 :         status->curpartition = 0;
     633             : 
     634        1418 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     635             :                                      status->curpartition),
     636        1418 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     637             : 
     638        1418 :         ensure_valid_bucket_pointers(status->hash_table);
     639             : 
     640        1418 :         status->nbuckets =
     641        1418 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     642        1418 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     643             :     }
     644             :     else
     645      358010 :         next_item_pointer = status->pnextitem;
     646             : 
     647             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     648             :                                                status->curpartition),
     649             :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     650             : 
     651             :     /* Move to the next bucket if we finished the current bucket */
     652     1918842 :     while (!DsaPointerIsValid(next_item_pointer))
     653             :     {
     654             :         int         next_partition;
     655             : 
     656     1560832 :         if (++status->curbucket >= status->nbuckets)
     657             :         {
     658             :             /* all buckets have been scanned. finish. */
     659        1418 :             return NULL;
     660             :         }
     661             : 
     662             :         /* Check if move to the next partition */
     663     1559414 :         next_partition =
     664     1559414 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     665             :                                        status->hash_table->size_log2);
     666             : 
     667     1559414 :         if (status->curpartition != next_partition)
     668             :         {
     669             :             /*
     670             :              * Move to the next partition. Lock the next partition then
     671             :              * release the current, not in the reverse order to avoid
     672             :              * concurrent resizing.  Avoid dead lock by taking lock in the
     673             :              * same order with resize().
     674             :              */
     675      180086 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     676             :                                          next_partition),
     677      180086 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     678      180086 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     679             :                                          status->curpartition));
     680      180086 :             status->curpartition = next_partition;
     681             :         }
     682             : 
     683     1559414 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     684             :     }
     685             : 
     686      358010 :     status->curitem =
     687      358010 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     688             : 
     689             :     /*
     690             :      * The caller may delete the item. Store the next item in case of
     691             :      * deletion.
     692             :      */
     693      358010 :     status->pnextitem = status->curitem->next;
     694             : 
     695      358010 :     return ENTRY_FROM_ITEM(status->curitem);
     696             : }
     697             : 
     698             : /*
     699             :  * Terminates the seqscan and release all locks.
     700             :  *
     701             :  * Needs to be called after finishing or when exiting a seqscan.
     702             :  */
     703             : void
     704        1418 : dshash_seq_term(dshash_seq_status *status)
     705             : {
     706        1418 :     if (status->curpartition >= 0)
     707        1418 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     708        1418 : }
     709             : 
     710             : /*
     711             :  * Remove the current entry of the seq scan.
     712             :  */
     713             : void
     714        3902 : dshash_delete_current(dshash_seq_status *status)
     715             : {
     716        3902 :     dshash_table *hash_table = status->hash_table;
     717        3902 :     dshash_table_item *item = status->curitem;
     718             :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     719             : 
     720        3902 :     partition = PARTITION_FOR_HASH(item->hash);
     721             : 
     722             :     Assert(status->exclusive);
     723             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     724             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     725             :                                 LW_EXCLUSIVE));
     726             : 
     727        3902 :     delete_item(hash_table, item);
     728        3902 : }
     729             : 
     730             : /*
     731             :  * Print debugging information about the internal state of the hash table to
     732             :  * stderr.  The caller must hold no partition locks.
     733             :  */
     734             : void
     735           0 : dshash_dump(dshash_table *hash_table)
     736             : {
     737             :     size_t      i;
     738             :     size_t      j;
     739             : 
     740             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     741             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     742             : 
     743           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     744             :     {
     745             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     746           0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     747             :     }
     748             : 
     749           0 :     ensure_valid_bucket_pointers(hash_table);
     750             : 
     751           0 :     fprintf(stderr,
     752           0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     753           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     754             :     {
     755           0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     756           0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     757           0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     758             : 
     759           0 :         fprintf(stderr, "  partition %zu\n", i);
     760           0 :         fprintf(stderr,
     761             :                 "    active buckets (key count = %zu)\n", partition->count);
     762             : 
     763           0 :         for (j = begin; j < end; ++j)
     764             :         {
     765           0 :             size_t      count = 0;
     766           0 :             dsa_pointer bucket = hash_table->buckets[j];
     767             : 
     768           0 :             while (DsaPointerIsValid(bucket))
     769             :             {
     770             :                 dshash_table_item *item;
     771             : 
     772           0 :                 item = dsa_get_address(hash_table->area, bucket);
     773             : 
     774           0 :                 bucket = item->next;
     775           0 :                 ++count;
     776             :             }
     777           0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     778             :         }
     779             :     }
     780             : 
     781           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     782           0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     783           0 : }
     784             : 
     785             : /*
     786             :  * Delete a locked item to which we have a pointer.
     787             :  */
     788             : static void
     789       62574 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     790             : {
     791       62574 :     size_t      hash = item->hash;
     792       62574 :     size_t      partition = PARTITION_FOR_HASH(hash);
     793             : 
     794             :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     795             : 
     796       62574 :     if (delete_item_from_bucket(hash_table, item,
     797       62574 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     798             :     {
     799             :         Assert(hash_table->control->partitions[partition].count > 0);
     800       62574 :         --hash_table->control->partitions[partition].count;
     801             :     }
     802             :     else
     803             :     {
     804             :         Assert(false);
     805             :     }
     806       62574 : }
     807             : 
     808             : /*
     809             :  * Grow the hash table if necessary to the requested number of buckets.  The
     810             :  * requested size must be double some previously observed size.
     811             :  *
     812             :  * Must be called without any partition lock held.
     813             :  */
     814             : static void
     815        4248 : resize(dshash_table *hash_table, size_t new_size_log2)
     816             : {
     817             :     dsa_pointer old_buckets;
     818             :     dsa_pointer new_buckets_shared;
     819             :     dsa_pointer *new_buckets;
     820             :     size_t      size;
     821        4248 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     822             :     size_t      i;
     823             : 
     824             :     /*
     825             :      * Acquire the locks for all lock partitions.  This is expensive, but we
     826             :      * shouldn't have to do it many times.
     827             :      */
     828      547992 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     829             :     {
     830             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     831             : 
     832      543744 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     833      543744 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     834             :         {
     835             :             /*
     836             :              * Another backend has already increased the size; we can avoid
     837             :              * obtaining all the locks and return early.
     838             :              */
     839           0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     840           0 :             return;
     841             :         }
     842             :     }
     843             : 
     844             :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     845             : 
     846             :     /* Allocate the space for the new table. */
     847        4248 :     new_buckets_shared = dsa_allocate0(hash_table->area,
     848             :                                        sizeof(dsa_pointer) * new_size);
     849        4248 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     850             : 
     851             :     /*
     852             :      * We've allocated the new bucket array; all that remains to do now is to
     853             :      * reinsert all items, which amounts to adjusting all the pointers.
     854             :      */
     855        4248 :     size = ((size_t) 1) << hash_table->control->size_log2;
     856     1635224 :     for (i = 0; i < size; ++i)
     857             :     {
     858     1630976 :         dsa_pointer item_pointer = hash_table->buckets[i];
     859             : 
     860     1843334 :         while (DsaPointerIsValid(item_pointer))
     861             :         {
     862             :             dshash_table_item *item;
     863             :             dsa_pointer next_item_pointer;
     864             : 
     865      212358 :             item = dsa_get_address(hash_table->area, item_pointer);
     866      212358 :             next_item_pointer = item->next;
     867      212358 :             insert_item_into_bucket(hash_table, item_pointer, item,
     868      212358 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     869             :                                                                                 new_size_log2)]);
     870      212358 :             item_pointer = next_item_pointer;
     871             :         }
     872             :     }
     873             : 
     874             :     /* Swap the hash table into place and free the old one. */
     875        4248 :     old_buckets = hash_table->control->buckets;
     876        4248 :     hash_table->control->buckets = new_buckets_shared;
     877        4248 :     hash_table->control->size_log2 = new_size_log2;
     878        4248 :     hash_table->buckets = new_buckets;
     879        4248 :     dsa_free(hash_table->area, old_buckets);
     880             : 
     881             :     /* Release all the locks. */
     882      547992 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     883      543744 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     884             : }
     885             : 
     886             : /*
     887             :  * Make sure that our backend-local bucket pointers are up to date.  The
     888             :  * caller must have locked one lock partition, which prevents resize() from
     889             :  * running concurrently.
     890             :  */
     891             : static inline void
     892     1764328 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     893             : {
     894     1764328 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     895             :     {
     896       64244 :         hash_table->buckets = dsa_get_address(hash_table->area,
     897       32122 :                                               hash_table->control->buckets);
     898       32122 :         hash_table->size_log2 = hash_table->control->size_log2;
     899             :     }
     900     1764328 : }
     901             : 
     902             : /*
     903             :  * Scan a locked bucket for a match, using the provided compare function.
     904             :  */
     905             : static inline dshash_table_item *
     906     1762658 : find_in_bucket(dshash_table *hash_table, const void *key,
     907             :                dsa_pointer item_pointer)
     908             : {
     909     2022400 :     while (DsaPointerIsValid(item_pointer))
     910             :     {
     911             :         dshash_table_item *item;
     912             : 
     913     1256194 :         item = dsa_get_address(hash_table->area, item_pointer);
     914     1256194 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     915      996452 :             return item;
     916      259742 :         item_pointer = item->next;
     917             :     }
     918      766206 :     return NULL;
     919             : }
     920             : 
     921             : /*
     922             :  * Insert an already-allocated item into a bucket.
     923             :  */
     924             : static void
     925      660610 : insert_item_into_bucket(dshash_table *hash_table,
     926             :                         dsa_pointer item_pointer,
     927             :                         dshash_table_item *item,
     928             :                         dsa_pointer *bucket)
     929             : {
     930             :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     931             : 
     932      660610 :     item->next = *bucket;
     933      660610 :     *bucket = item_pointer;
     934      660610 : }
     935             : 
     936             : /*
     937             :  * Allocate space for an entry with the given key and insert it into the
     938             :  * provided bucket.
     939             :  */
     940             : static dshash_table_item *
     941      448252 : insert_into_bucket(dshash_table *hash_table,
     942             :                    const void *key,
     943             :                    dsa_pointer *bucket)
     944             : {
     945             :     dsa_pointer item_pointer;
     946             :     dshash_table_item *item;
     947             : 
     948      448252 :     item_pointer = dsa_allocate(hash_table->area,
     949             :                                 hash_table->params.entry_size +
     950             :                                 MAXALIGN(sizeof(dshash_table_item)));
     951      448252 :     item = dsa_get_address(hash_table->area, item_pointer);
     952      448252 :     memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
     953      448252 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
     954      448252 :     return item;
     955             : }
     956             : 
     957             : /*
     958             :  * Search a bucket for a matching key and delete it.
     959             :  */
     960             : static bool
     961         252 : delete_key_from_bucket(dshash_table *hash_table,
     962             :                        const void *key,
     963             :                        dsa_pointer *bucket_head)
     964             : {
     965         252 :     while (DsaPointerIsValid(*bucket_head))
     966             :     {
     967             :         dshash_table_item *item;
     968             : 
     969         184 :         item = dsa_get_address(hash_table->area, *bucket_head);
     970             : 
     971         184 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     972             :         {
     973             :             dsa_pointer next;
     974             : 
     975         184 :             next = item->next;
     976         184 :             dsa_free(hash_table->area, *bucket_head);
     977         184 :             *bucket_head = next;
     978             : 
     979         184 :             return true;
     980             :         }
     981           0 :         bucket_head = &item->next;
     982             :     }
     983          68 :     return false;
     984             : }
     985             : 
     986             : /*
     987             :  * Delete the specified item from the bucket.
     988             :  */
     989             : static bool
     990       62574 : delete_item_from_bucket(dshash_table *hash_table,
     991             :                         dshash_table_item *item,
     992             :                         dsa_pointer *bucket_head)
     993             : {
     994       63110 :     while (DsaPointerIsValid(*bucket_head))
     995             :     {
     996             :         dshash_table_item *bucket_item;
     997             : 
     998       63110 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
     999             : 
    1000       63110 :         if (bucket_item == item)
    1001             :         {
    1002             :             dsa_pointer next;
    1003             : 
    1004       62574 :             next = item->next;
    1005       62574 :             dsa_free(hash_table->area, *bucket_head);
    1006       62574 :             *bucket_head = next;
    1007       62574 :             return true;
    1008             :         }
    1009         536 :         bucket_head = &bucket_item->next;
    1010             :     }
    1011           0 :     return false;
    1012             : }
    1013             : 
    1014             : /*
    1015             :  * Compute the hash value for a key.
    1016             :  */
    1017             : static inline dshash_hash
    1018     1758662 : hash_key(dshash_table *hash_table, const void *key)
    1019             : {
    1020     1758662 :     return hash_table->params.hash_function(key,
    1021             :                                             hash_table->params.key_size,
    1022             :                                             hash_table->arg);
    1023             : }
    1024             : 
    1025             : /*
    1026             :  * Check whether two keys compare equal.
    1027             :  */
    1028             : static inline bool
    1029     1256378 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1030             : {
    1031     1256378 :     return hash_table->params.compare_function(a, b,
    1032             :                                                hash_table->params.key_size,
    1033     1256378 :                                                hash_table->arg) == 0;
    1034             : }

Generated by: LCOV version 1.14