LCOV - code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 229 273 83.9 %
Date: 2024-07-18 16:11:39 Functions: 30 32 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dshash.c
       4             :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5             :  *
       6             :  * This is an open hashing hash table, with a linked list at each table
       7             :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8             :  * lists from growing too long on average.  Currently, only growing is
       9             :  * supported: the hash table never becomes smaller.
      10             :  *
      11             :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12             :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13             :  * find and iterate operations normally only acquire one lock.  Therefore,
      14             :  * good concurrency is achieved whenever such operations don't collide at the
      15             :  * lock partition level.  However, when a resize operation begins, all
      16             :  * partition locks must be acquired simultaneously for a brief period.  This
      17             :  * is only expected to happen a small number of times until a stable size is
      18             :  * found, since growth is geometric.
      19             :  *
      20             :  * Future versions may support iterators and incremental resizing; for now
      21             :  * the implementation is minimalist.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/lib/dshash.c
      28             :  *
      29             :  *-------------------------------------------------------------------------
      30             :  */
      31             : 
      32             : #include "postgres.h"
      33             : 
      34             : #include "common/hashfn.h"
      35             : #include "lib/dshash.h"
      36             : #include "storage/lwlock.h"
      37             : #include "utils/dsa.h"
      38             : 
      39             : /*
      40             :  * An item in the hash table.  This wraps the user's entry object in an
      41             :  * envelop that holds a pointer back to the bucket and a pointer to the next
      42             :  * item in the bucket.
      43             :  */
      44             : struct dshash_table_item
      45             : {
      46             :     /* The next item in the same bucket. */
      47             :     dsa_pointer next;
      48             :     /* The hashed key, to avoid having to recompute it. */
      49             :     dshash_hash hash;
      50             :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      51             : };
      52             : 
      53             : /*
      54             :  * The number of partitions for locking purposes.  This is set to match
      55             :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      56             :  * the buffer pool must be good enough for any other purpose.  This could
      57             :  * become a runtime parameter in future.
      58             :  */
      59             : #define DSHASH_NUM_PARTITIONS_LOG2 7
      60             : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      61             : 
      62             : /* A magic value used to identify our hash tables. */
      63             : #define DSHASH_MAGIC 0x75ff6a20
      64             : 
      65             : /*
      66             :  * Tracking information for each lock partition.  Initially, each partition
      67             :  * corresponds to one bucket, but each time the hash table grows, the buckets
      68             :  * covered by each partition split so the number of buckets covered doubles.
      69             :  *
      70             :  * We might want to add padding here so that each partition is on a different
      71             :  * cache line, but doing so would bloat this structure considerably.
      72             :  */
      73             : typedef struct dshash_partition
      74             : {
      75             :     LWLock      lock;           /* Protects all buckets in this partition. */
      76             :     size_t      count;          /* # of items in this partition's buckets */
      77             : } dshash_partition;
      78             : 
      79             : /*
      80             :  * The head object for a hash table.  This will be stored in dynamic shared
      81             :  * memory.
      82             :  */
      83             : typedef struct dshash_table_control
      84             : {
      85             :     dshash_table_handle handle;
      86             :     uint32      magic;
      87             :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      88             :     int         lwlock_tranche_id;
      89             : 
      90             :     /*
      91             :      * The following members are written to only when ALL partitions locks are
      92             :      * held.  They can be read when any one partition lock is held.
      93             :      */
      94             : 
      95             :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      96             :     size_t      size_log2;      /* log2(number of buckets) */
      97             :     dsa_pointer buckets;        /* current bucket array */
      98             : } dshash_table_control;
      99             : 
     100             : /*
     101             :  * Per-backend state for a dynamic hash table.
     102             :  */
     103             : struct dshash_table
     104             : {
     105             :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     106             :     dshash_parameters params;   /* Parameters. */
     107             :     void       *arg;            /* User-supplied data pointer. */
     108             :     dshash_table_control *control;  /* Control object in DSM. */
     109             :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     110             :     size_t      size_log2;      /* log2(number of buckets) */
     111             : };
     112             : 
     113             : /* Given a pointer to an item, find the entry (user data) it holds. */
     114             : #define ENTRY_FROM_ITEM(item) \
     115             :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     116             : 
     117             : /* Given a pointer to an entry, find the item that holds it. */
     118             : #define ITEM_FROM_ENTRY(entry)                                          \
     119             :     ((dshash_table_item *)((char *)(entry) -                            \
     120             :                              MAXALIGN(sizeof(dshash_table_item))))
     121             : 
     122             : /* How many resize operations (bucket splits) have there been? */
     123             : #define NUM_SPLITS(size_log2)                   \
     124             :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     125             : 
     126             : /* How many buckets are there in a given size? */
     127             : #define NUM_BUCKETS(size_log2)      \
     128             :     (((size_t) 1) << (size_log2))
     129             : 
     130             : /* How many buckets are there in each partition at a given size? */
     131             : #define BUCKETS_PER_PARTITION(size_log2)        \
     132             :     (((size_t) 1) << NUM_SPLITS(size_log2))
     133             : 
     134             : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     135             : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     136             :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     137             :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     138             : 
     139             : /* Choose partition based on the highest order bits of the hash. */
     140             : #define PARTITION_FOR_HASH(hash)                                        \
     141             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     142             : 
     143             : /*
     144             :  * Find the bucket index for a given hash and table size.  Each time the table
     145             :  * doubles in size, the appropriate bucket for a given hash value doubles and
     146             :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     147             :  * are split.
     148             :  */
     149             : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     150             :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     151             : 
     152             : /* The index of the first bucket in a given partition. */
     153             : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     154             :     ((partition) << NUM_SPLITS(size_log2))
     155             : 
     156             : /* Choose partition based on bucket index. */
     157             : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     158             :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     159             : 
     160             : /* The head of the active bucket for a given hash value (lvalue). */
     161             : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     162             :     (hash_table->buckets[                                                \
     163             :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     164             :                                        hash_table->size_log2)])
     165             : 
     166             : static void delete_item(dshash_table *hash_table,
     167             :                         dshash_table_item *item);
     168             : static void resize(dshash_table *hash_table, size_t new_size_log2);
     169             : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     170             : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     171             :                                                 const void *key,
     172             :                                                 dsa_pointer item_pointer);
     173             : static void insert_item_into_bucket(dshash_table *hash_table,
     174             :                                     dsa_pointer item_pointer,
     175             :                                     dshash_table_item *item,
     176             :                                     dsa_pointer *bucket);
     177             : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     178             :                                              const void *key,
     179             :                                              dsa_pointer *bucket);
     180             : static bool delete_key_from_bucket(dshash_table *hash_table,
     181             :                                    const void *key,
     182             :                                    dsa_pointer *bucket_head);
     183             : static bool delete_item_from_bucket(dshash_table *hash_table,
     184             :                                     dshash_table_item *item,
     185             :                                     dsa_pointer *bucket_head);
     186             : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     187             : static inline bool equal_keys(dshash_table *hash_table,
     188             :                               const void *a, const void *b);
     189             : static inline void copy_key(dshash_table *hash_table, void *dest,
     190             :                             const void *src);
     191             : 
     192             : #define PARTITION_LOCK(hash_table, i)           \
     193             :     (&(hash_table)->control->partitions[(i)].lock)
     194             : 
     195             : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     196             :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     197             :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     198             : 
     199             : /*
     200             :  * Create a new hash table backed by the given dynamic shared area, with the
     201             :  * given parameters.  The returned object is allocated in backend-local memory
     202             :  * using the current MemoryContext.  'arg' will be passed through to the
     203             :  * compare, hash, and copy functions.
     204             :  */
     205             : dshash_table *
     206        2108 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     207             : {
     208             :     dshash_table *hash_table;
     209             :     dsa_pointer control;
     210             : 
     211             :     /* Allocate the backend-local object representing the hash table. */
     212        2108 :     hash_table = palloc(sizeof(dshash_table));
     213             : 
     214             :     /* Allocate the control object in shared memory. */
     215        2108 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     216             : 
     217             :     /* Set up the local and shared hash table structs. */
     218        2108 :     hash_table->area = area;
     219        2108 :     hash_table->params = *params;
     220        2108 :     hash_table->arg = arg;
     221        2108 :     hash_table->control = dsa_get_address(area, control);
     222        2108 :     hash_table->control->handle = control;
     223        2108 :     hash_table->control->magic = DSHASH_MAGIC;
     224        2108 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     225             : 
     226             :     /* Set up the array of lock partitions. */
     227             :     {
     228        2108 :         dshash_partition *partitions = hash_table->control->partitions;
     229        2108 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     230             :         int         i;
     231             : 
     232      271932 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     233             :         {
     234      269824 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     235      269824 :             partitions[i].count = 0;
     236             :         }
     237             :     }
     238             : 
     239             :     /*
     240             :      * Set up the initial array of buckets.  Our initial size is the same as
     241             :      * the number of partitions.
     242             :      */
     243        2108 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     244        4216 :     hash_table->control->buckets =
     245        2108 :         dsa_allocate_extended(area,
     246             :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     247             :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     248        2108 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     249             :     {
     250           0 :         dsa_free(area, control);
     251           0 :         ereport(ERROR,
     252             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     253             :                  errmsg("out of memory"),
     254             :                  errdetail("Failed on DSA request of size %zu.",
     255             :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     256             :     }
     257        4216 :     hash_table->buckets = dsa_get_address(area,
     258        2108 :                                           hash_table->control->buckets);
     259        2108 :     hash_table->size_log2 = hash_table->control->size_log2;
     260             : 
     261        2108 :     return hash_table;
     262             : }
     263             : 
     264             : /*
     265             :  * Attach to an existing hash table using a handle.  The returned object is
     266             :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     267             :  * will be passed through to the compare and hash functions.
     268             :  */
     269             : dshash_table *
     270       35514 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     271             :               dshash_table_handle handle, void *arg)
     272             : {
     273             :     dshash_table *hash_table;
     274             :     dsa_pointer control;
     275             : 
     276             :     /* Allocate the backend-local object representing the hash table. */
     277       35514 :     hash_table = palloc(sizeof(dshash_table));
     278             : 
     279             :     /* Find the control object in shared memory. */
     280       35514 :     control = handle;
     281             : 
     282             :     /* Set up the local hash table struct. */
     283       35514 :     hash_table->area = area;
     284       35514 :     hash_table->params = *params;
     285       35514 :     hash_table->arg = arg;
     286       35514 :     hash_table->control = dsa_get_address(area, control);
     287             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     288             : 
     289             :     /*
     290             :      * These will later be set to the correct values by
     291             :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     292             :      * partition lock for interlocking against concurrent resizing.
     293             :      */
     294       35514 :     hash_table->buckets = NULL;
     295       35514 :     hash_table->size_log2 = 0;
     296             : 
     297       35514 :     return hash_table;
     298             : }
     299             : 
     300             : /*
     301             :  * Detach from a hash table.  This frees backend-local resources associated
     302             :  * with the hash table, but the hash table will continue to exist until it is
     303             :  * either explicitly destroyed (by a backend that is still attached to it), or
     304             :  * the area that backs it is returned to the operating system.
     305             :  */
     306             : void
     307       37324 : dshash_detach(dshash_table *hash_table)
     308             : {
     309             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     310             : 
     311             :     /* The hash table may have been destroyed.  Just free local memory. */
     312       37324 :     pfree(hash_table);
     313       37324 : }
     314             : 
     315             : /*
     316             :  * Destroy a hash table, returning all memory to the area.  The caller must be
     317             :  * certain that no other backend will attempt to access the hash table before
     318             :  * calling this function.  Other backend must explicitly call dshash_detach to
     319             :  * free up backend-local memory associated with the hash table.  The backend
     320             :  * that calls dshash_destroy must not call dshash_detach.
     321             :  */
     322             : void
     323           0 : dshash_destroy(dshash_table *hash_table)
     324             : {
     325             :     size_t      size;
     326             :     size_t      i;
     327             : 
     328             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     329           0 :     ensure_valid_bucket_pointers(hash_table);
     330             : 
     331             :     /* Free all the entries. */
     332           0 :     size = NUM_BUCKETS(hash_table->size_log2);
     333           0 :     for (i = 0; i < size; ++i)
     334             :     {
     335           0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     336             : 
     337           0 :         while (DsaPointerIsValid(item_pointer))
     338             :         {
     339             :             dshash_table_item *item;
     340             :             dsa_pointer next_item_pointer;
     341             : 
     342           0 :             item = dsa_get_address(hash_table->area, item_pointer);
     343           0 :             next_item_pointer = item->next;
     344           0 :             dsa_free(hash_table->area, item_pointer);
     345           0 :             item_pointer = next_item_pointer;
     346             :         }
     347             :     }
     348             : 
     349             :     /*
     350             :      * Vandalize the control block to help catch programming errors where
     351             :      * other backends access the memory formerly occupied by this hash table.
     352             :      */
     353           0 :     hash_table->control->magic = 0;
     354             : 
     355             :     /* Free the active table and control object. */
     356           0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     357           0 :     dsa_free(hash_table->area, hash_table->control->handle);
     358             : 
     359           0 :     pfree(hash_table);
     360           0 : }
     361             : 
     362             : /*
     363             :  * Get a handle that can be used by other processes to attach to this hash
     364             :  * table.
     365             :  */
     366             : dshash_table_handle
     367        2108 : dshash_get_hash_table_handle(dshash_table *hash_table)
     368             : {
     369             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     370             : 
     371        2108 :     return hash_table->control->handle;
     372             : }
     373             : 
     374             : /*
     375             :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     376             :  * found with the given key.  Returns NULL if the key is not found.  If a
     377             :  * non-NULL value is returned, the entry is locked and must be released by
     378             :  * calling dshash_release_lock.  If an error is raised before
     379             :  * dshash_release_lock is called, the lock will be released automatically, but
     380             :  * the caller must take care to ensure that the entry is not left corrupted.
     381             :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     382             :  *
     383             :  * The caller must not hold a lock already.
     384             :  *
     385             :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     386             :  * return from this function, and not resumed until dshash_release_lock is
     387             :  * called.  It is a very good idea for the caller to release the lock quickly.
     388             :  */
     389             : void *
     390     1369678 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     391             : {
     392             :     dshash_hash hash;
     393             :     size_t      partition;
     394             :     dshash_table_item *item;
     395             : 
     396     1369678 :     hash = hash_key(hash_table, key);
     397     1369678 :     partition = PARTITION_FOR_HASH(hash);
     398             : 
     399             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     400             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     401             : 
     402     1369678 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     403     1369678 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     404     1369678 :     ensure_valid_bucket_pointers(hash_table);
     405             : 
     406             :     /* Search the active bucket. */
     407     1369678 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     408             : 
     409     1369678 :     if (!item)
     410             :     {
     411             :         /* Not found. */
     412      334106 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     413      334106 :         return NULL;
     414             :     }
     415             :     else
     416             :     {
     417             :         /* The caller will free the lock by calling dshash_release_lock. */
     418     1035572 :         return ENTRY_FROM_ITEM(item);
     419             :     }
     420             : }
     421             : 
     422             : /*
     423             :  * Returns a pointer to an exclusively locked item which must be released with
     424             :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     425             :  * to true and a pointer to the existing entry is returned.  If the key is not
     426             :  * found, 'found' is set to false, and a pointer to a newly created entry is
     427             :  * returned.
     428             :  *
     429             :  * Notes above dshash_find() regarding locking and error handling equally
     430             :  * apply here.
     431             :  */
     432             : void *
     433      473512 : dshash_find_or_insert(dshash_table *hash_table,
     434             :                       const void *key,
     435             :                       bool *found)
     436             : {
     437             :     dshash_hash hash;
     438             :     size_t      partition_index;
     439             :     dshash_partition *partition;
     440             :     dshash_table_item *item;
     441             : 
     442      473512 :     hash = hash_key(hash_table, key);
     443      473512 :     partition_index = PARTITION_FOR_HASH(hash);
     444      473512 :     partition = &hash_table->control->partitions[partition_index];
     445             : 
     446             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     447             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     448             : 
     449      477706 : restart:
     450      477706 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     451             :                   LW_EXCLUSIVE);
     452      477706 :     ensure_valid_bucket_pointers(hash_table);
     453             : 
     454             :     /* Search the active bucket. */
     455      477706 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     456             : 
     457      477706 :     if (item)
     458          38 :         *found = true;
     459             :     else
     460             :     {
     461      477668 :         *found = false;
     462             : 
     463             :         /* Check if we are getting too full. */
     464      477668 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     465             :         {
     466             :             /*
     467             :              * The load factor (= keys / buckets) for all buckets protected by
     468             :              * this partition is > 0.75.  Presumably the same applies
     469             :              * generally across the whole hash table (though we don't attempt
     470             :              * to track that directly to avoid contention on some kind of
     471             :              * central counter; we just assume that this partition is
     472             :              * representative).  This is a good time to resize.
     473             :              *
     474             :              * Give up our existing lock first, because resizing needs to
     475             :              * reacquire all the locks in the right order to avoid deadlocks.
     476             :              */
     477        4194 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     478        4194 :             resize(hash_table, hash_table->size_log2 + 1);
     479             : 
     480        4194 :             goto restart;
     481             :         }
     482             : 
     483             :         /* Finally we can try to insert the new item. */
     484      473474 :         item = insert_into_bucket(hash_table, key,
     485      473474 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     486      473474 :         item->hash = hash;
     487             :         /* Adjust per-lock-partition counter for load factor knowledge. */
     488      473474 :         ++partition->count;
     489             :     }
     490             : 
     491             :     /* The caller must release the lock with dshash_release_lock. */
     492      473512 :     return ENTRY_FROM_ITEM(item);
     493             : }
     494             : 
     495             : /*
     496             :  * Remove an entry by key.  Returns true if the key was found and the
     497             :  * corresponding entry was removed.
     498             :  *
     499             :  * To delete an entry that you already have a pointer to, see
     500             :  * dshash_delete_entry.
     501             :  */
     502             : bool
     503         256 : dshash_delete_key(dshash_table *hash_table, const void *key)
     504             : {
     505             :     dshash_hash hash;
     506             :     size_t      partition;
     507             :     bool        found;
     508             : 
     509             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     510             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     511             : 
     512         256 :     hash = hash_key(hash_table, key);
     513         256 :     partition = PARTITION_FOR_HASH(hash);
     514             : 
     515         256 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     516         256 :     ensure_valid_bucket_pointers(hash_table);
     517             : 
     518         256 :     if (delete_key_from_bucket(hash_table, key,
     519         256 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     520             :     {
     521             :         Assert(hash_table->control->partitions[partition].count > 0);
     522         188 :         found = true;
     523         188 :         --hash_table->control->partitions[partition].count;
     524             :     }
     525             :     else
     526          68 :         found = false;
     527             : 
     528         256 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     529             : 
     530         256 :     return found;
     531             : }
     532             : 
     533             : /*
     534             :  * Remove an entry.  The entry must already be exclusively locked, and must
     535             :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     536             :  * function releases the lock just like dshash_release_lock.
     537             :  *
     538             :  * To delete an entry by key, see dshash_delete_key.
     539             :  */
     540             : void
     541       62354 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     542             : {
     543       62354 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     544       62354 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     545             : 
     546             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     547             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     548             :                                 LW_EXCLUSIVE));
     549             : 
     550       62354 :     delete_item(hash_table, item);
     551       62354 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     552       62354 : }
     553             : 
     554             : /*
     555             :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     556             :  */
     557             : void
     558     1446730 : dshash_release_lock(dshash_table *hash_table, void *entry)
     559             : {
     560     1446730 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     561     1446730 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     562             : 
     563             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     564             : 
     565     1446730 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     566     1446730 : }
     567             : 
     568             : /*
     569             :  * A compare function that forwards to memcmp.
     570             :  */
     571             : int
     572         332 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     573             : {
     574         332 :     return memcmp(a, b, size);
     575             : }
     576             : 
     577             : /*
     578             :  * A hash function that forwards to tag_hash.
     579             :  */
     580             : dshash_hash
     581         978 : dshash_memhash(const void *v, size_t size, void *arg)
     582             : {
     583         978 :     return tag_hash(v, size);
     584             : }
     585             : 
     586             : /*
     587             :  * A copy function that forwards to memcpy.
     588             :  */
     589             : void
     590      473468 : dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
     591             : {
     592      473468 :     (void) memcpy(dest, src, size);
     593      473468 : }
     594             : 
     595             : /*
     596             :  * A compare function that forwards to strcmp.
     597             :  */
     598             : int
     599           6 : dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
     600             : {
     601             :     Assert(strlen((const char *) a) < size);
     602             :     Assert(strlen((const char *) b) < size);
     603             : 
     604           6 :     return strcmp((const char *) a, (const char *) b);
     605             : }
     606             : 
     607             : /*
     608             :  * A hash function that forwards to string_hash.
     609             :  */
     610             : dshash_hash
     611          12 : dshash_strhash(const void *v, size_t size, void *arg)
     612             : {
     613             :     Assert(strlen((const char *) v) < size);
     614             : 
     615          12 :     return string_hash((const char *) v, size);
     616             : }
     617             : 
     618             : /*
     619             :  * A copy function that forwards to strcpy.
     620             :  */
     621             : void
     622           6 : dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
     623             : {
     624             :     Assert(strlen((const char *) src) < size);
     625             : 
     626           6 :     (void) strcpy((char *) dest, (const char *) src);
     627           6 : }
     628             : 
     629             : /*
     630             :  * Sequentially scan through dshash table and return all the elements one by
     631             :  * one, return NULL when all elements have been returned.
     632             :  *
     633             :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     634             :  * delete returned elements midst of a scan by using dshash_delete_current()
     635             :  * if exclusive = true.
     636             :  */
     637             : void
     638        1546 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     639             :                 bool exclusive)
     640             : {
     641        1546 :     status->hash_table = hash_table;
     642        1546 :     status->curbucket = 0;
     643        1546 :     status->nbuckets = 0;
     644        1546 :     status->curitem = NULL;
     645        1546 :     status->pnextitem = InvalidDsaPointer;
     646        1546 :     status->curpartition = -1;
     647        1546 :     status->exclusive = exclusive;
     648        1546 : }
     649             : 
     650             : /*
     651             :  * Returns the next element.
     652             :  *
     653             :  * Returned elements are locked and the caller may not release the lock. It is
     654             :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     655             :  */
     656             : void *
     657      379654 : dshash_seq_next(dshash_seq_status *status)
     658             : {
     659             :     dsa_pointer next_item_pointer;
     660             : 
     661             :     /*
     662             :      * Not yet holding any partition locks. Need to determine the size of the
     663             :      * hash table, it could have been resized since we were looking last.
     664             :      * Since we iterate in partition order, we can start by unconditionally
     665             :      * lock partition 0.
     666             :      *
     667             :      * Once we hold the lock, no resizing can happen until the scan ends. So
     668             :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     669             :      */
     670      379654 :     if (status->curpartition == -1)
     671             :     {
     672             :         Assert(status->curbucket == 0);
     673             :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     674             : 
     675        1546 :         status->curpartition = 0;
     676             : 
     677        1546 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     678             :                                      status->curpartition),
     679        1546 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     680             : 
     681        1546 :         ensure_valid_bucket_pointers(status->hash_table);
     682             : 
     683        1546 :         status->nbuckets =
     684        1546 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     685        1546 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     686             :     }
     687             :     else
     688      378108 :         next_item_pointer = status->pnextitem;
     689             : 
     690             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     691             :                                                status->curpartition),
     692             :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     693             : 
     694             :     /* Move to the next bucket if we finished the current bucket */
     695     1821948 :     while (!DsaPointerIsValid(next_item_pointer))
     696             :     {
     697             :         int         next_partition;
     698             : 
     699     1443840 :         if (++status->curbucket >= status->nbuckets)
     700             :         {
     701             :             /* all buckets have been scanned. finish. */
     702        1546 :             return NULL;
     703             :         }
     704             : 
     705             :         /* Check if move to the next partition */
     706     1442294 :         next_partition =
     707     1442294 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     708             :                                        status->hash_table->size_log2);
     709             : 
     710     1442294 :         if (status->curpartition != next_partition)
     711             :         {
     712             :             /*
     713             :              * Move to the next partition. Lock the next partition then
     714             :              * release the current, not in the reverse order to avoid
     715             :              * concurrent resizing.  Avoid dead lock by taking lock in the
     716             :              * same order with resize().
     717             :              */
     718      196342 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     719             :                                          next_partition),
     720      196342 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     721      196342 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     722             :                                          status->curpartition));
     723      196342 :             status->curpartition = next_partition;
     724             :         }
     725             : 
     726     1442294 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     727             :     }
     728             : 
     729      378108 :     status->curitem =
     730      378108 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     731             : 
     732             :     /*
     733             :      * The caller may delete the item. Store the next item in case of
     734             :      * deletion.
     735             :      */
     736      378108 :     status->pnextitem = status->curitem->next;
     737             : 
     738      378108 :     return ENTRY_FROM_ITEM(status->curitem);
     739             : }
     740             : 
     741             : /*
     742             :  * Terminates the seqscan and release all locks.
     743             :  *
     744             :  * Needs to be called after finishing or when exiting a seqscan.
     745             :  */
     746             : void
     747        1546 : dshash_seq_term(dshash_seq_status *status)
     748             : {
     749        1546 :     if (status->curpartition >= 0)
     750        1546 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     751        1546 : }
     752             : 
     753             : /*
     754             :  * Remove the current entry of the seq scan.
     755             :  */
     756             : void
     757        4088 : dshash_delete_current(dshash_seq_status *status)
     758             : {
     759        4088 :     dshash_table *hash_table = status->hash_table;
     760        4088 :     dshash_table_item *item = status->curitem;
     761             :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     762             : 
     763        4088 :     partition = PARTITION_FOR_HASH(item->hash);
     764             : 
     765             :     Assert(status->exclusive);
     766             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     767             :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     768             :                                 LW_EXCLUSIVE));
     769             : 
     770        4088 :     delete_item(hash_table, item);
     771        4088 : }
     772             : 
     773             : /*
     774             :  * Print debugging information about the internal state of the hash table to
     775             :  * stderr.  The caller must hold no partition locks.
     776             :  */
     777             : void
     778           0 : dshash_dump(dshash_table *hash_table)
     779             : {
     780             :     size_t      i;
     781             :     size_t      j;
     782             : 
     783             :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     784             :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     785             : 
     786           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     787             :     {
     788             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     789           0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     790             :     }
     791             : 
     792           0 :     ensure_valid_bucket_pointers(hash_table);
     793             : 
     794           0 :     fprintf(stderr,
     795           0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     796           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     797             :     {
     798           0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     799           0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     800           0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     801             : 
     802           0 :         fprintf(stderr, "  partition %zu\n", i);
     803           0 :         fprintf(stderr,
     804             :                 "    active buckets (key count = %zu)\n", partition->count);
     805             : 
     806           0 :         for (j = begin; j < end; ++j)
     807             :         {
     808           0 :             size_t      count = 0;
     809           0 :             dsa_pointer bucket = hash_table->buckets[j];
     810             : 
     811           0 :             while (DsaPointerIsValid(bucket))
     812             :             {
     813             :                 dshash_table_item *item;
     814             : 
     815           0 :                 item = dsa_get_address(hash_table->area, bucket);
     816             : 
     817           0 :                 bucket = item->next;
     818           0 :                 ++count;
     819             :             }
     820           0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     821             :         }
     822             :     }
     823             : 
     824           0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     825           0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     826           0 : }
     827             : 
     828             : /*
     829             :  * Delete a locked item to which we have a pointer.
     830             :  */
     831             : static void
     832       66442 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     833             : {
     834       66442 :     size_t      hash = item->hash;
     835       66442 :     size_t      partition = PARTITION_FOR_HASH(hash);
     836             : 
     837             :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     838             : 
     839       66442 :     if (delete_item_from_bucket(hash_table, item,
     840       66442 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     841             :     {
     842             :         Assert(hash_table->control->partitions[partition].count > 0);
     843       66442 :         --hash_table->control->partitions[partition].count;
     844             :     }
     845             :     else
     846             :     {
     847             :         Assert(false);
     848             :     }
     849       66442 : }
     850             : 
     851             : /*
     852             :  * Grow the hash table if necessary to the requested number of buckets.  The
     853             :  * requested size must be double some previously observed size.
     854             :  *
     855             :  * Must be called without any partition lock held.
     856             :  */
     857             : static void
     858        4194 : resize(dshash_table *hash_table, size_t new_size_log2)
     859             : {
     860             :     dsa_pointer old_buckets;
     861             :     dsa_pointer new_buckets_shared;
     862             :     dsa_pointer *new_buckets;
     863             :     size_t      size;
     864        4194 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     865             :     size_t      i;
     866             : 
     867             :     /*
     868             :      * Acquire the locks for all lock partitions.  This is expensive, but we
     869             :      * shouldn't have to do it many times.
     870             :      */
     871      541026 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     872             :     {
     873             :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     874             : 
     875      536832 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     876      536832 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     877             :         {
     878             :             /*
     879             :              * Another backend has already increased the size; we can avoid
     880             :              * obtaining all the locks and return early.
     881             :              */
     882           0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     883           0 :             return;
     884             :         }
     885             :     }
     886             : 
     887             :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     888             : 
     889             :     /* Allocate the space for the new table. */
     890        4194 :     new_buckets_shared = dsa_allocate0(hash_table->area,
     891             :                                        sizeof(dsa_pointer) * new_size);
     892        4194 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     893             : 
     894             :     /*
     895             :      * We've allocated the new bucket array; all that remains to do now is to
     896             :      * reinsert all items, which amounts to adjusting all the pointers.
     897             :      */
     898        4194 :     size = ((size_t) 1) << hash_table->control->size_log2;
     899     1373538 :     for (i = 0; i < size; ++i)
     900             :     {
     901     1369344 :         dsa_pointer item_pointer = hash_table->buckets[i];
     902             : 
     903     1494370 :         while (DsaPointerIsValid(item_pointer))
     904             :         {
     905             :             dshash_table_item *item;
     906             :             dsa_pointer next_item_pointer;
     907             : 
     908      125026 :             item = dsa_get_address(hash_table->area, item_pointer);
     909      125026 :             next_item_pointer = item->next;
     910      125026 :             insert_item_into_bucket(hash_table, item_pointer, item,
     911      125026 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     912             :                                                                                 new_size_log2)]);
     913      125026 :             item_pointer = next_item_pointer;
     914             :         }
     915             :     }
     916             : 
     917             :     /* Swap the hash table into place and free the old one. */
     918        4194 :     old_buckets = hash_table->control->buckets;
     919        4194 :     hash_table->control->buckets = new_buckets_shared;
     920        4194 :     hash_table->control->size_log2 = new_size_log2;
     921        4194 :     hash_table->buckets = new_buckets;
     922        4194 :     dsa_free(hash_table->area, old_buckets);
     923             : 
     924             :     /* Release all the locks. */
     925      541026 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     926      536832 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     927             : }
     928             : 
     929             : /*
     930             :  * Make sure that our backend-local bucket pointers are up to date.  The
     931             :  * caller must have locked one lock partition, which prevents resize() from
     932             :  * running concurrently.
     933             :  */
     934             : static inline void
     935     1849186 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     936             : {
     937     1849186 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     938             :     {
     939       65372 :         hash_table->buckets = dsa_get_address(hash_table->area,
     940       32686 :                                               hash_table->control->buckets);
     941       32686 :         hash_table->size_log2 = hash_table->control->size_log2;
     942             :     }
     943     1849186 : }
     944             : 
     945             : /*
     946             :  * Scan a locked bucket for a match, using the provided compare function.
     947             :  */
     948             : static inline dshash_table_item *
     949     1847384 : find_in_bucket(dshash_table *hash_table, const void *key,
     950             :                dsa_pointer item_pointer)
     951             : {
     952     2117608 :     while (DsaPointerIsValid(item_pointer))
     953             :     {
     954             :         dshash_table_item *item;
     955             : 
     956     1305834 :         item = dsa_get_address(hash_table->area, item_pointer);
     957     1305834 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     958     1035610 :             return item;
     959      270224 :         item_pointer = item->next;
     960             :     }
     961      811774 :     return NULL;
     962             : }
     963             : 
     964             : /*
     965             :  * Insert an already-allocated item into a bucket.
     966             :  */
     967             : static void
     968      598500 : insert_item_into_bucket(dshash_table *hash_table,
     969             :                         dsa_pointer item_pointer,
     970             :                         dshash_table_item *item,
     971             :                         dsa_pointer *bucket)
     972             : {
     973             :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     974             : 
     975      598500 :     item->next = *bucket;
     976      598500 :     *bucket = item_pointer;
     977      598500 : }
     978             : 
     979             : /*
     980             :  * Allocate space for an entry with the given key and insert it into the
     981             :  * provided bucket.
     982             :  */
     983             : static dshash_table_item *
     984      473474 : insert_into_bucket(dshash_table *hash_table,
     985             :                    const void *key,
     986             :                    dsa_pointer *bucket)
     987             : {
     988             :     dsa_pointer item_pointer;
     989             :     dshash_table_item *item;
     990             : 
     991      473474 :     item_pointer = dsa_allocate(hash_table->area,
     992             :                                 hash_table->params.entry_size +
     993             :                                 MAXALIGN(sizeof(dshash_table_item)));
     994      473474 :     item = dsa_get_address(hash_table->area, item_pointer);
     995      473474 :     copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
     996      473474 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
     997      473474 :     return item;
     998             : }
     999             : 
    1000             : /*
    1001             :  * Search a bucket for a matching key and delete it.
    1002             :  */
    1003             : static bool
    1004         256 : delete_key_from_bucket(dshash_table *hash_table,
    1005             :                        const void *key,
    1006             :                        dsa_pointer *bucket_head)
    1007             : {
    1008         256 :     while (DsaPointerIsValid(*bucket_head))
    1009             :     {
    1010             :         dshash_table_item *item;
    1011             : 
    1012         188 :         item = dsa_get_address(hash_table->area, *bucket_head);
    1013             : 
    1014         188 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
    1015             :         {
    1016             :             dsa_pointer next;
    1017             : 
    1018         188 :             next = item->next;
    1019         188 :             dsa_free(hash_table->area, *bucket_head);
    1020         188 :             *bucket_head = next;
    1021             : 
    1022         188 :             return true;
    1023             :         }
    1024           0 :         bucket_head = &item->next;
    1025             :     }
    1026          68 :     return false;
    1027             : }
    1028             : 
    1029             : /*
    1030             :  * Delete the specified item from the bucket.
    1031             :  */
    1032             : static bool
    1033       66442 : delete_item_from_bucket(dshash_table *hash_table,
    1034             :                         dshash_table_item *item,
    1035             :                         dsa_pointer *bucket_head)
    1036             : {
    1037       67060 :     while (DsaPointerIsValid(*bucket_head))
    1038             :     {
    1039             :         dshash_table_item *bucket_item;
    1040             : 
    1041       67060 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
    1042             : 
    1043       67060 :         if (bucket_item == item)
    1044             :         {
    1045             :             dsa_pointer next;
    1046             : 
    1047       66442 :             next = item->next;
    1048       66442 :             dsa_free(hash_table->area, *bucket_head);
    1049       66442 :             *bucket_head = next;
    1050       66442 :             return true;
    1051             :         }
    1052         618 :         bucket_head = &bucket_item->next;
    1053             :     }
    1054           0 :     return false;
    1055             : }
    1056             : 
    1057             : /*
    1058             :  * Compute the hash value for a key.
    1059             :  */
    1060             : static inline dshash_hash
    1061     1843446 : hash_key(dshash_table *hash_table, const void *key)
    1062             : {
    1063     1843446 :     return hash_table->params.hash_function(key,
    1064             :                                             hash_table->params.key_size,
    1065             :                                             hash_table->arg);
    1066             : }
    1067             : 
    1068             : /*
    1069             :  * Check whether two keys compare equal.
    1070             :  */
    1071             : static inline bool
    1072     1306022 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1073             : {
    1074     1306022 :     return hash_table->params.compare_function(a, b,
    1075             :                                                hash_table->params.key_size,
    1076     1306022 :                                                hash_table->arg) == 0;
    1077             : }
    1078             : 
    1079             : /*
    1080             :  * Copy a key.
    1081             :  */
    1082             : static inline void
    1083      473474 : copy_key(dshash_table *hash_table, void *dest, const void *src)
    1084             : {
    1085      473474 :     hash_table->params.copy_function(dest, src,
    1086             :                                      hash_table->params.key_size,
    1087             :                                      hash_table->arg);
    1088      473474 : }

Generated by: LCOV version 1.14