LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 310 324 95.7 %
Date: 2024-11-21 08:14:44 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_shmem.c
       4             :  *    Storage of stats entries in shared memory
       5             :  *
       6             :  * Copyright (c) 2001-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/activity/pgstat_shmem.c
      10             :  * -------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres.h"
      14             : 
      15             : #include "pgstat.h"
      16             : #include "storage/shmem.h"
      17             : #include "utils/memutils.h"
      18             : #include "utils/pgstat_internal.h"
      19             : 
      20             : 
      21             : #define PGSTAT_ENTRY_REF_HASH_SIZE  128
      22             : 
      23             : /* hash table entry for finding the PgStat_EntryRef for a key */
      24             : typedef struct PgStat_EntryRefHashEntry
      25             : {
      26             :     PgStat_HashKey key;         /* hash key */
      27             :     char        status;         /* for simplehash use */
      28             :     PgStat_EntryRef *entry_ref;
      29             : } PgStat_EntryRefHashEntry;
      30             : 
      31             : 
      32             : /* for references to shared statistics entries */
      33             : #define SH_PREFIX pgstat_entry_ref_hash
      34             : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
      35             : #define SH_KEY_TYPE PgStat_HashKey
      36             : #define SH_KEY key
      37             : #define SH_HASH_KEY(tb, key) \
      38             :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
      39             : #define SH_EQUAL(tb, a, b) \
      40             :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
      41             : #define SH_SCOPE static inline
      42             : #define SH_DEFINE
      43             : #define SH_DECLARE
      44             : #include "lib/simplehash.h"
      45             : 
      46             : 
      47             : static void pgstat_drop_database_and_contents(Oid dboid);
      48             : 
      49             : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
      50             : 
      51             : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
      52             : static bool pgstat_need_entry_refs_gc(void);
      53             : static void pgstat_gc_entry_refs(void);
      54             : static void pgstat_release_all_entry_refs(bool discard_pending);
      55             : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
      56             : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
      57             : 
      58             : static void pgstat_setup_memcxt(void);
      59             : 
      60             : 
      61             : /* parameter for the shared hash */
      62             : static const dshash_parameters dsh_params = {
      63             :     sizeof(PgStat_HashKey),
      64             :     sizeof(PgStatShared_HashEntry),
      65             :     pgstat_cmp_hash_key,
      66             :     pgstat_hash_hash_key,
      67             :     dshash_memcpy,
      68             :     LWTRANCHE_PGSTATS_HASH
      69             : };
      70             : 
      71             : 
      72             : /*
      73             :  * Backend local references to shared stats entries. If there are pending
      74             :  * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
      75             :  * list.
      76             :  *
      77             :  * When a stats entry is dropped each backend needs to release its reference
      78             :  * to it before the memory can be released. To trigger that
      79             :  * pgStatLocal.shmem->gc_request_count is incremented - which each backend
      80             :  * compares to their copy of pgStatSharedRefAge on a regular basis.
      81             :  */
      82             : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
      83             : static int  pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
      84             : 
      85             : /*
      86             :  * Memory contexts containing the pgStatEntryRefHash table and the
      87             :  * pgStatSharedRef entries respectively. Kept separate to make it easier to
      88             :  * track / attribute memory usage.
      89             :  */
      90             : static MemoryContext pgStatSharedRefContext = NULL;
      91             : static MemoryContext pgStatEntryRefHashContext = NULL;
      92             : 
      93             : 
      94             : /* ------------------------------------------------------------
      95             :  * Public functions called from postmaster follow
      96             :  * ------------------------------------------------------------
      97             :  */
      98             : 
      99             : /*
     100             :  * The size of the shared memory allocation for stats stored in the shared
     101             :  * stats hash table. This allocation will be done as part of the main shared
     102             :  * memory, rather than dynamic shared memory, allowing it to be initialized in
     103             :  * postmaster.
     104             :  */
     105             : static Size
     106       11142 : pgstat_dsa_init_size(void)
     107             : {
     108             :     Size        sz;
     109             : 
     110             :     /*
     111             :      * The dshash header / initial buckets array needs to fit into "plain"
     112             :      * shared memory, but it's beneficial to not need dsm segments
     113             :      * immediately. A size of 256kB seems works well and is not
     114             :      * disproportional compared to other constant sized shared memory
     115             :      * allocations. NB: To avoid DSMs further, the user can configure
     116             :      * min_dynamic_shared_memory.
     117             :      */
     118       11142 :     sz = 256 * 1024;
     119             :     Assert(dsa_minimum_size() <= sz);
     120       11142 :     return MAXALIGN(sz);
     121             : }
     122             : 
     123             : /*
     124             :  * Compute shared memory space needed for cumulative statistics
     125             :  */
     126             : Size
     127        5436 : StatsShmemSize(void)
     128             : {
     129             :     Size        sz;
     130             : 
     131        5436 :     sz = MAXALIGN(sizeof(PgStat_ShmemControl));
     132        5436 :     sz = add_size(sz, pgstat_dsa_init_size());
     133             : 
     134             :     /* Add shared memory for all the custom fixed-numbered statistics */
     135      706680 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
     136             :     {
     137      701244 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     138             : 
     139      701244 :         if (!kind_info)
     140      701196 :             continue;
     141          48 :         if (!kind_info->fixed_amount)
     142          24 :             continue;
     143             : 
     144             :         Assert(kind_info->shared_size != 0);
     145             : 
     146          24 :         sz += MAXALIGN(kind_info->shared_size);
     147             :     }
     148             : 
     149        5436 :     return sz;
     150             : }
     151             : 
     152             : /*
     153             :  * Initialize cumulative statistics system during startup
     154             :  */
     155             : void
     156        1902 : StatsShmemInit(void)
     157             : {
     158             :     bool        found;
     159             :     Size        sz;
     160             : 
     161        1902 :     sz = StatsShmemSize();
     162        1902 :     pgStatLocal.shmem = (PgStat_ShmemControl *)
     163        1902 :         ShmemInitStruct("Shared Memory Stats", sz, &found);
     164             : 
     165        1902 :     if (!IsUnderPostmaster)
     166             :     {
     167             :         dsa_area   *dsa;
     168             :         dshash_table *dsh;
     169        1902 :         PgStat_ShmemControl *ctl = pgStatLocal.shmem;
     170        1902 :         char       *p = (char *) ctl;
     171             : 
     172             :         Assert(!found);
     173             : 
     174             :         /* the allocation of pgStatLocal.shmem itself */
     175        1902 :         p += MAXALIGN(sizeof(PgStat_ShmemControl));
     176             : 
     177             :         /*
     178             :          * Create a small dsa allocation in plain shared memory. This is
     179             :          * required because postmaster cannot use dsm segments. It also
     180             :          * provides a small efficiency win.
     181             :          */
     182        1902 :         ctl->raw_dsa_area = p;
     183        1902 :         p += MAXALIGN(pgstat_dsa_init_size());
     184        1902 :         dsa = dsa_create_in_place(ctl->raw_dsa_area,
     185             :                                   pgstat_dsa_init_size(),
     186             :                                   LWTRANCHE_PGSTATS_DSA, 0);
     187        1902 :         dsa_pin(dsa);
     188             : 
     189             :         /*
     190             :          * To ensure dshash is created in "plain" shared memory, temporarily
     191             :          * limit size of dsa to the initial size of the dsa.
     192             :          */
     193        1902 :         dsa_set_size_limit(dsa, pgstat_dsa_init_size());
     194             : 
     195             :         /*
     196             :          * With the limit in place, create the dshash table. XXX: It'd be nice
     197             :          * if there were dshash_create_in_place().
     198             :          */
     199        1902 :         dsh = dshash_create(dsa, &dsh_params, NULL);
     200        1902 :         ctl->hash_handle = dshash_get_hash_table_handle(dsh);
     201             : 
     202             :         /* lift limit set above */
     203        1902 :         dsa_set_size_limit(dsa, -1);
     204             : 
     205             :         /*
     206             :          * Postmaster will never access these again, thus free the local
     207             :          * dsa/dshash references.
     208             :          */
     209        1902 :         dshash_detach(dsh);
     210        1902 :         dsa_detach(dsa);
     211             : 
     212        1902 :         pg_atomic_init_u64(&ctl->gc_request_count, 1);
     213             : 
     214             :         /* initialize fixed-numbered stats */
     215      488814 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     216             :         {
     217      486912 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     218             :             char       *ptr;
     219             : 
     220      486912 :             if (!kind_info || !kind_info->fixed_amount)
     221      475492 :                 continue;
     222             : 
     223       11420 :             if (pgstat_is_kind_builtin(kind))
     224       11412 :                 ptr = ((char *) ctl) + kind_info->shared_ctl_off;
     225             :             else
     226             :             {
     227           8 :                 int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
     228             : 
     229             :                 Assert(kind_info->shared_size != 0);
     230           8 :                 ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
     231           8 :                 ptr = ctl->custom_data[idx];
     232             :             }
     233             : 
     234       11420 :             kind_info->init_shmem_cb(ptr);
     235             :         }
     236             :     }
     237             :     else
     238             :     {
     239             :         Assert(found);
     240             :     }
     241        1902 : }
     242             : 
     243             : void
     244       35584 : pgstat_attach_shmem(void)
     245             : {
     246             :     MemoryContext oldcontext;
     247             : 
     248             :     Assert(pgStatLocal.dsa == NULL);
     249             : 
     250             :     /* stats shared memory persists for the backend lifetime */
     251       35584 :     oldcontext = MemoryContextSwitchTo(TopMemoryContext);
     252             : 
     253       35584 :     pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
     254             :                                           NULL);
     255       35584 :     dsa_pin_mapping(pgStatLocal.dsa);
     256             : 
     257       71168 :     pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
     258       35584 :                                             pgStatLocal.shmem->hash_handle, 0);
     259             : 
     260       35584 :     MemoryContextSwitchTo(oldcontext);
     261       35584 : }
     262             : 
     263             : void
     264       35584 : pgstat_detach_shmem(void)
     265             : {
     266             :     Assert(pgStatLocal.dsa);
     267             : 
     268             :     /* we shouldn't leave references to shared stats */
     269       35584 :     pgstat_release_all_entry_refs(false);
     270             : 
     271       35584 :     dshash_detach(pgStatLocal.shared_hash);
     272       35584 :     pgStatLocal.shared_hash = NULL;
     273             : 
     274       35584 :     dsa_detach(pgStatLocal.dsa);
     275             : 
     276             :     /*
     277             :      * dsa_detach() does not decrement the DSA reference count as no segment
     278             :      * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
     279             :      * be registered.  Hence, release it manually now.
     280             :      */
     281       35584 :     dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
     282             : 
     283       35584 :     pgStatLocal.dsa = NULL;
     284       35584 : }
     285             : 
     286             : 
     287             : /* ------------------------------------------------------------
     288             :  * Maintenance of shared memory stats entries
     289             :  * ------------------------------------------------------------
     290             :  */
     291             : 
     292             : PgStatShared_Common *
     293      498176 : pgstat_init_entry(PgStat_Kind kind,
     294             :                   PgStatShared_HashEntry *shhashent)
     295             : {
     296             :     /* Create new stats entry. */
     297             :     dsa_pointer chunk;
     298             :     PgStatShared_Common *shheader;
     299             : 
     300             :     /*
     301             :      * Initialize refcount to 1, marking it as valid / not dropped. The entry
     302             :      * can't be freed before the initialization because it can't be found as
     303             :      * long as we hold the dshash partition lock. Caller needs to increase
     304             :      * further if a longer lived reference is needed.
     305             :      */
     306      498176 :     pg_atomic_init_u32(&shhashent->refcount, 1);
     307             : 
     308             :     /*
     309             :      * Initialize "generation" to 0, as freshly created.
     310             :      */
     311      498176 :     pg_atomic_init_u32(&shhashent->generation, 0);
     312      498176 :     shhashent->dropped = false;
     313             : 
     314      498176 :     chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
     315      498176 :     shheader = dsa_get_address(pgStatLocal.dsa, chunk);
     316      498176 :     shheader->magic = 0xdeadbeef;
     317             : 
     318             :     /* Link the new entry from the hash entry. */
     319      498176 :     shhashent->body = chunk;
     320             : 
     321      498176 :     LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
     322             : 
     323      498176 :     return shheader;
     324             : }
     325             : 
     326             : static PgStatShared_Common *
     327          60 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
     328             : {
     329             :     PgStatShared_Common *shheader;
     330             : 
     331          60 :     shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     332             : 
     333             :     /* mark as not dropped anymore */
     334          60 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     335             : 
     336             :     /*
     337             :      * Increment "generation", to let any backend with local references know
     338             :      * that what they point to is outdated.
     339             :      */
     340          60 :     pg_atomic_fetch_add_u32(&shhashent->generation, 1);
     341          60 :     shhashent->dropped = false;
     342             : 
     343             :     /* reinitialize content */
     344             :     Assert(shheader->magic == 0xdeadbeef);
     345          60 :     memset(pgstat_get_entry_data(kind, shheader), 0,
     346             :            pgstat_get_entry_len(kind));
     347             : 
     348          60 :     return shheader;
     349             : }
     350             : 
     351             : static void
     352     3900278 : pgstat_setup_shared_refs(void)
     353             : {
     354     3900278 :     if (likely(pgStatEntryRefHash != NULL))
     355     3869208 :         return;
     356             : 
     357       31070 :     pgStatEntryRefHash =
     358       31070 :         pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
     359             :                                      PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
     360       31070 :     pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     361             :     Assert(pgStatSharedRefAge != 0);
     362             : }
     363             : 
     364             : /*
     365             :  * Helper function for pgstat_get_entry_ref().
     366             :  */
     367             : static void
     368     1519658 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
     369             :                          PgStatShared_HashEntry *shhashent,
     370             :                          PgStatShared_Common *shheader)
     371             : {
     372             :     Assert(shheader->magic == 0xdeadbeef);
     373             :     Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
     374             : 
     375     1519658 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     376             : 
     377     1519658 :     dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     378             : 
     379     1519658 :     entry_ref->shared_stats = shheader;
     380     1519658 :     entry_ref->shared_entry = shhashent;
     381     1519658 :     entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
     382     1519658 : }
     383             : 
     384             : /*
     385             :  * Helper function for pgstat_get_entry_ref().
     386             :  */
     387             : static bool
     388     3900278 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
     389             : {
     390             :     bool        found;
     391             :     PgStat_EntryRefHashEntry *cache_entry;
     392             : 
     393             :     /*
     394             :      * We immediately insert a cache entry, because it avoids 1) multiple
     395             :      * hashtable lookups in case of a cache miss 2) having to deal with
     396             :      * out-of-memory errors after incrementing PgStatShared_Common->refcount.
     397             :      */
     398             : 
     399     3900278 :     cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
     400             : 
     401     3900278 :     if (!found || !cache_entry->entry_ref)
     402     1670784 :     {
     403             :         PgStat_EntryRef *entry_ref;
     404             : 
     405     1670784 :         cache_entry->entry_ref = entry_ref =
     406     1670784 :             MemoryContextAlloc(pgStatSharedRefContext,
     407             :                                sizeof(PgStat_EntryRef));
     408     1670784 :         entry_ref->shared_stats = NULL;
     409     1670784 :         entry_ref->shared_entry = NULL;
     410     1670784 :         entry_ref->pending = NULL;
     411             : 
     412     1670784 :         found = false;
     413             :     }
     414     2229494 :     else if (cache_entry->entry_ref->shared_stats == NULL)
     415             :     {
     416             :         Assert(cache_entry->entry_ref->pending == NULL);
     417           0 :         found = false;
     418             :     }
     419             :     else
     420             :     {
     421             :         PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
     422             : 
     423     2229494 :         entry_ref = cache_entry->entry_ref;
     424             :         Assert(entry_ref->shared_entry != NULL);
     425             :         Assert(entry_ref->shared_stats != NULL);
     426             : 
     427             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     428             :         /* should have at least our reference */
     429             :         Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
     430             :     }
     431             : 
     432     3900278 :     *entry_ref_p = cache_entry->entry_ref;
     433     3900278 :     return found;
     434             : }
     435             : 
     436             : /*
     437             :  * Get a shared stats reference. If create is true, the shared stats object is
     438             :  * created if it does not exist.
     439             :  *
     440             :  * When create is true, and created_entry is non-NULL, it'll be set to true
     441             :  * if the entry is newly created, false otherwise.
     442             :  */
     443             : PgStat_EntryRef *
     444     3900278 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
     445             :                      bool *created_entry)
     446             : {
     447             :     PgStat_HashKey key;
     448             :     PgStatShared_HashEntry *shhashent;
     449     3900278 :     PgStatShared_Common *shheader = NULL;
     450             :     PgStat_EntryRef *entry_ref;
     451             : 
     452             :     /* clear padding */
     453     3900278 :     memset(&key, 0, sizeof(struct PgStat_HashKey));
     454             : 
     455     3900278 :     key.kind = kind;
     456     3900278 :     key.dboid = dboid;
     457     3900278 :     key.objid = objid;
     458             : 
     459             :     /*
     460             :      * passing in created_entry only makes sense if we possibly could create
     461             :      * entry.
     462             :      */
     463             :     Assert(create || created_entry == NULL);
     464             :     pgstat_assert_is_up();
     465             :     Assert(pgStatLocal.shared_hash != NULL);
     466             :     Assert(!pgStatLocal.shmem->is_shutdown);
     467             : 
     468     3900278 :     pgstat_setup_memcxt();
     469     3900278 :     pgstat_setup_shared_refs();
     470             : 
     471     3900278 :     if (created_entry != NULL)
     472         214 :         *created_entry = false;
     473             : 
     474             :     /*
     475             :      * Check if other backends dropped stats that could not be deleted because
     476             :      * somebody held references to it. If so, check this backend's references.
     477             :      * This is not expected to happen often. The location of the check is a
     478             :      * bit random, but this is a relatively frequently called path, so better
     479             :      * than most.
     480             :      */
     481     3900278 :     if (pgstat_need_entry_refs_gc())
     482       11224 :         pgstat_gc_entry_refs();
     483             : 
     484             :     /*
     485             :      * First check the lookup cache hashtable in local memory. If we find a
     486             :      * match here we can avoid taking locks / causing contention.
     487             :      */
     488     3900278 :     if (pgstat_get_entry_ref_cached(key, &entry_ref))
     489     2229494 :         return entry_ref;
     490             : 
     491             :     Assert(entry_ref != NULL);
     492             : 
     493             :     /*
     494             :      * Do a lookup in the hash table first - it's quite likely that the entry
     495             :      * already exists, and that way we only need a shared lock.
     496             :      */
     497     1670784 :     shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
     498             : 
     499     1670784 :     if (create && !shhashent)
     500             :     {
     501             :         bool        shfound;
     502             : 
     503             :         /*
     504             :          * It's possible that somebody created the entry since the above
     505             :          * lookup. If so, fall through to the same path as if we'd have if it
     506             :          * already had been created before the dshash_find() calls.
     507             :          */
     508      179178 :         shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
     509      179178 :         if (!shfound)
     510             :         {
     511      179172 :             shheader = pgstat_init_entry(kind, shhashent);
     512      179172 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     513             : 
     514      179172 :             if (created_entry != NULL)
     515          96 :                 *created_entry = true;
     516             : 
     517      179172 :             return entry_ref;
     518             :         }
     519             :     }
     520             : 
     521     1491612 :     if (!shhashent)
     522             :     {
     523             :         /*
     524             :          * If we're not creating, delete the reference again. In all
     525             :          * likelihood it's just a stats lookup - no point wasting memory for a
     526             :          * shared ref to nothing...
     527             :          */
     528      151052 :         pgstat_release_entry_ref(key, entry_ref, false);
     529             : 
     530      151052 :         return NULL;
     531             :     }
     532             :     else
     533             :     {
     534             :         /*
     535             :          * Can get here either because dshash_find() found a match, or if
     536             :          * dshash_find_or_insert() found a concurrently inserted entry.
     537             :          */
     538             : 
     539     1340560 :         if (shhashent->dropped && create)
     540             :         {
     541             :             /*
     542             :              * There are legitimate cases where the old stats entry might not
     543             :              * yet have been dropped by the time it's reused. The most obvious
     544             :              * case are replication slot stats, where a new slot can be
     545             :              * created with the same index just after dropping. But oid
     546             :              * wraparound can lead to other cases as well. We just reset the
     547             :              * stats to their plain state, while incrementing its "generation"
     548             :              * in the shared entry for any remaining local references.
     549             :              */
     550          60 :             shheader = pgstat_reinit_entry(kind, shhashent);
     551          60 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     552             : 
     553          60 :             if (created_entry != NULL)
     554           0 :                 *created_entry = true;
     555             : 
     556          60 :             return entry_ref;
     557             :         }
     558     1340500 :         else if (shhashent->dropped)
     559             :         {
     560          74 :             dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     561          74 :             pgstat_release_entry_ref(key, entry_ref, false);
     562             : 
     563          74 :             return NULL;
     564             :         }
     565             :         else
     566             :         {
     567     1340426 :             shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     568     1340426 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     569             : 
     570     1340426 :             return entry_ref;
     571             :         }
     572             :     }
     573             : }
     574             : 
     575             : static void
     576     1670784 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
     577             :                          bool discard_pending)
     578             : {
     579     1670784 :     if (entry_ref && entry_ref->pending)
     580             :     {
     581       61598 :         if (discard_pending)
     582       61598 :             pgstat_delete_pending_entry(entry_ref);
     583             :         else
     584           0 :             elog(ERROR, "releasing ref with pending data");
     585             :     }
     586             : 
     587     1670784 :     if (entry_ref && entry_ref->shared_stats)
     588             :     {
     589             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     590             :         Assert(entry_ref->pending == NULL);
     591             : 
     592             :         /*
     593             :          * This can't race with another backend looking up the stats entry and
     594             :          * increasing the refcount because it is not "legal" to create
     595             :          * additional references to dropped entries.
     596             :          */
     597     1519658 :         if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
     598             :         {
     599             :             PgStatShared_HashEntry *shent;
     600             : 
     601             :             /*
     602             :              * We're the last referrer to this entry, try to drop the shared
     603             :              * entry.
     604             :              */
     605             : 
     606             :             /* only dropped entries can reach a 0 refcount */
     607             :             Assert(entry_ref->shared_entry->dropped);
     608             : 
     609        9286 :             shent = dshash_find(pgStatLocal.shared_hash,
     610        9286 :                                 &entry_ref->shared_entry->key,
     611             :                                 true);
     612        9286 :             if (!shent)
     613           0 :                 elog(ERROR, "could not find just referenced shared stats entry");
     614             : 
     615             :             /*
     616             :              * This entry may have been reinitialized while trying to release
     617             :              * it, so double-check that it has not been reused while holding a
     618             :              * lock on its shared entry.
     619             :              */
     620        9286 :             if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
     621        9286 :                 entry_ref->generation)
     622             :             {
     623             :                 /* Same "generation", so we're OK with the removal */
     624             :                 Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
     625             :                 Assert(entry_ref->shared_entry == shent);
     626        9280 :                 pgstat_free_entry(shent, NULL);
     627             :             }
     628             :             else
     629             :             {
     630             :                 /*
     631             :                  * Shared stats entry has been reinitialized, so do not drop
     632             :                  * its shared entry, only release its lock.
     633             :                  */
     634           6 :                 dshash_release_lock(pgStatLocal.shared_hash, shent);
     635             :             }
     636             :         }
     637             :     }
     638             : 
     639     1670784 :     if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
     640           0 :         elog(ERROR, "entry ref vanished before deletion");
     641             : 
     642     1670784 :     if (entry_ref)
     643     1670784 :         pfree(entry_ref);
     644     1670784 : }
     645             : 
     646             : bool
     647     1810044 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
     648             : {
     649     1810044 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     650             : 
     651     1810044 :     if (nowait)
     652      607526 :         return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
     653             : 
     654     1202518 :     LWLockAcquire(lock, LW_EXCLUSIVE);
     655     1202518 :     return true;
     656             : }
     657             : 
     658             : /*
     659             :  * Separate from pgstat_lock_entry() as most callers will need to lock
     660             :  * exclusively.
     661             :  */
     662             : bool
     663      198724 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
     664             : {
     665      198724 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     666             : 
     667      198724 :     if (nowait)
     668           0 :         return LWLockConditionalAcquire(lock, LW_SHARED);
     669             : 
     670      198724 :     LWLockAcquire(lock, LW_SHARED);
     671      198724 :     return true;
     672             : }
     673             : 
     674             : void
     675     2008768 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
     676             : {
     677     2008768 :     LWLockRelease(&entry_ref->shared_stats->lock);
     678     2008768 : }
     679             : 
     680             : /*
     681             :  * Helper function to fetch and lock shared stats.
     682             :  */
     683             : PgStat_EntryRef *
     684      124096 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
     685             :                             bool nowait)
     686             : {
     687             :     PgStat_EntryRef *entry_ref;
     688             : 
     689             :     /* find shared table stats entry corresponding to the local entry */
     690      124096 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
     691             : 
     692             :     /* lock the shared entry to protect the content, skip if failed */
     693      124096 :     if (!pgstat_lock_entry(entry_ref, nowait))
     694           0 :         return NULL;
     695             : 
     696      124096 :     return entry_ref;
     697             : }
     698             : 
     699             : void
     700        3666 : pgstat_request_entry_refs_gc(void)
     701             : {
     702        3666 :     pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
     703        3666 : }
     704             : 
     705             : static bool
     706     3900278 : pgstat_need_entry_refs_gc(void)
     707             : {
     708             :     uint64      curage;
     709             : 
     710     3900278 :     if (!pgStatEntryRefHash)
     711           0 :         return false;
     712             : 
     713             :     /* should have been initialized when creating pgStatEntryRefHash */
     714             :     Assert(pgStatSharedRefAge != 0);
     715             : 
     716     3900278 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     717             : 
     718     3900278 :     return pgStatSharedRefAge != curage;
     719             : }
     720             : 
     721             : static void
     722       11224 : pgstat_gc_entry_refs(void)
     723             : {
     724             :     pgstat_entry_ref_hash_iterator i;
     725             :     PgStat_EntryRefHashEntry *ent;
     726             :     uint64      curage;
     727             : 
     728       11224 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     729             :     Assert(curage != 0);
     730             : 
     731             :     /*
     732             :      * Some entries have been dropped. Invalidate cache pointer to them.
     733             :      */
     734       11224 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     735      846944 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
     736             :     {
     737      835720 :         PgStat_EntryRef *entry_ref = ent->entry_ref;
     738             : 
     739             :         Assert(!entry_ref->shared_stats ||
     740             :                entry_ref->shared_stats->magic == 0xdeadbeef);
     741             : 
     742      835720 :         if (!entry_ref->shared_entry->dropped)
     743      589914 :             continue;
     744             : 
     745             :         /* cannot gc shared ref that has pending data */
     746      245806 :         if (entry_ref->pending != NULL)
     747      236116 :             continue;
     748             : 
     749        9690 :         pgstat_release_entry_ref(ent->key, entry_ref, false);
     750             :     }
     751             : 
     752       11224 :     pgStatSharedRefAge = curage;
     753       11224 : }
     754             : 
     755             : static void
     756       31116 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
     757             :                                    Datum match_data)
     758             : {
     759             :     pgstat_entry_ref_hash_iterator i;
     760             :     PgStat_EntryRefHashEntry *ent;
     761             : 
     762       31116 :     if (pgStatEntryRefHash == NULL)
     763          12 :         return;
     764             : 
     765       31104 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     766             : 
     767     1479752 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
     768             :            != NULL)
     769             :     {
     770             :         Assert(ent->entry_ref != NULL);
     771             : 
     772     1448648 :         if (match && !match(ent, match_data))
     773        1072 :             continue;
     774             : 
     775     1447576 :         pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
     776             :     }
     777             : }
     778             : 
     779             : /*
     780             :  * Release all local references to shared stats entries.
     781             :  *
     782             :  * When a process exits it cannot do so while still holding references onto
     783             :  * stats entries, otherwise the shared stats entries could never be freed.
     784             :  */
     785             : static void
     786       35584 : pgstat_release_all_entry_refs(bool discard_pending)
     787             : {
     788       35584 :     if (pgStatEntryRefHash == NULL)
     789        4514 :         return;
     790             : 
     791       31070 :     pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
     792             :     Assert(pgStatEntryRefHash->members == 0);
     793       31070 :     pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
     794       31070 :     pgStatEntryRefHash = NULL;
     795             : }
     796             : 
     797             : static bool
     798        1072 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
     799             : {
     800        1072 :     Oid         dboid = DatumGetObjectId(match_data);
     801             : 
     802        1072 :     return ent->key.dboid == dboid;
     803             : }
     804             : 
     805             : static void
     806          46 : pgstat_release_db_entry_refs(Oid dboid)
     807             : {
     808          46 :     pgstat_release_matching_entry_refs( /* discard pending = */ true,
     809             :                                        match_db,
     810             :                                        ObjectIdGetDatum(dboid));
     811          46 : }
     812             : 
     813             : 
     814             : /* ------------------------------------------------------------
     815             :  * Dropping and resetting of stats entries
     816             :  * ------------------------------------------------------------
     817             :  */
     818             : 
     819             : static void
     820       64504 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
     821             : {
     822             :     dsa_pointer pdsa;
     823             : 
     824             :     /*
     825             :      * Fetch dsa pointer before deleting entry - that way we can free the
     826             :      * memory after releasing the lock.
     827             :      */
     828       64504 :     pdsa = shent->body;
     829             : 
     830       64504 :     if (!hstat)
     831       62570 :         dshash_delete_entry(pgStatLocal.shared_hash, shent);
     832             :     else
     833        1934 :         dshash_delete_current(hstat);
     834             : 
     835       64504 :     dsa_free(pgStatLocal.dsa, pdsa);
     836       64504 : }
     837             : 
     838             : /*
     839             :  * Helper for both pgstat_drop_database_and_contents() and
     840             :  * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
     841             :  * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
     842             :  * case the entry needs to be already locked.
     843             :  */
     844             : static bool
     845       64566 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
     846             :                            dshash_seq_status *hstat)
     847             : {
     848             :     Assert(shent->body != InvalidDsaPointer);
     849             : 
     850             :     /* should already have released local reference */
     851       64566 :     if (pgStatEntryRefHash)
     852             :         Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
     853             : 
     854             :     /*
     855             :      * Signal that the entry is dropped - this will eventually cause other
     856             :      * backends to release their references.
     857             :      */
     858       64566 :     if (shent->dropped)
     859           0 :         elog(ERROR,
     860             :              "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%llu refcount=%u",
     861             :              pgstat_get_kind_info(shent->key.kind)->name,
     862             :              shent->key.dboid,
     863             :              (unsigned long long) shent->key.objid,
     864             :              pg_atomic_read_u32(&shent->refcount));
     865       64566 :     shent->dropped = true;
     866             : 
     867             :     /* release refcount marking entry as not dropped */
     868       64566 :     if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
     869             :     {
     870       55224 :         pgstat_free_entry(shent, hstat);
     871       55224 :         return true;
     872             :     }
     873             :     else
     874             :     {
     875        9342 :         if (!hstat)
     876        9342 :             dshash_release_lock(pgStatLocal.shared_hash, shent);
     877        9342 :         return false;
     878             :     }
     879             : }
     880             : 
     881             : /*
     882             :  * Drop stats for the database and all the objects inside that database.
     883             :  */
     884             : static void
     885          46 : pgstat_drop_database_and_contents(Oid dboid)
     886             : {
     887             :     dshash_seq_status hstat;
     888             :     PgStatShared_HashEntry *p;
     889          46 :     uint64      not_freed_count = 0;
     890             : 
     891             :     Assert(OidIsValid(dboid));
     892             : 
     893             :     Assert(pgStatLocal.shared_hash != NULL);
     894             : 
     895             :     /*
     896             :      * This backend might very well be the only backend holding a reference to
     897             :      * about-to-be-dropped entries. Ensure that we're not preventing it from
     898             :      * being cleaned up till later.
     899             :      *
     900             :      * Doing this separately from the dshash iteration below avoids having to
     901             :      * do so while holding a partition lock on the shared hashtable.
     902             :      */
     903          46 :     pgstat_release_db_entry_refs(dboid);
     904             : 
     905             :     /* some of the dshash entries are to be removed, take exclusive lock. */
     906          46 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     907       12106 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     908             :     {
     909       12060 :         if (p->dropped)
     910           2 :             continue;
     911             : 
     912       12058 :         if (p->key.dboid != dboid)
     913       10236 :             continue;
     914             : 
     915        1822 :         if (!pgstat_drop_entry_internal(p, &hstat))
     916             :         {
     917             :             /*
     918             :              * Even statistics for a dropped database might currently be
     919             :              * accessed (consider e.g. database stats for pg_stat_database).
     920             :              */
     921           0 :             not_freed_count++;
     922             :         }
     923             :     }
     924          46 :     dshash_seq_term(&hstat);
     925             : 
     926             :     /*
     927             :      * If some of the stats data could not be freed, signal the reference
     928             :      * holders to run garbage collection of their cached pgStatLocal.shmem.
     929             :      */
     930          46 :     if (not_freed_count > 0)
     931           0 :         pgstat_request_entry_refs_gc();
     932          46 : }
     933             : 
     934             : /*
     935             :  * Drop a single stats entry.
     936             :  *
     937             :  * This routine returns false if the stats entry of the dropped object could
     938             :  * not be freed, true otherwise.
     939             :  *
     940             :  * The callers of this function should call pgstat_request_entry_refs_gc()
     941             :  * if the stats entry could not be freed, to ensure that this entry's memory
     942             :  * can be reclaimed later by a different backend calling
     943             :  * pgstat_gc_entry_refs().
     944             :  */
     945             : bool
     946       96664 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
     947             : {
     948             :     PgStat_HashKey key;
     949             :     PgStatShared_HashEntry *shent;
     950       96664 :     bool        freed = true;
     951             : 
     952             :     /* clear padding */
     953       96664 :     memset(&key, 0, sizeof(struct PgStat_HashKey));
     954             : 
     955       96664 :     key.kind = kind;
     956       96664 :     key.dboid = dboid;
     957       96664 :     key.objid = objid;
     958             : 
     959             :     /* delete local reference */
     960       96664 :     if (pgStatEntryRefHash)
     961             :     {
     962             :         PgStat_EntryRefHashEntry *lohashent =
     963       79198 :             pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
     964             : 
     965       79198 :         if (lohashent)
     966       62392 :             pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
     967             :                                      true);
     968             :     }
     969             : 
     970             :     /* mark entry in shared hashtable as deleted, drop if possible */
     971       96664 :     shent = dshash_find(pgStatLocal.shared_hash, &key, true);
     972       96664 :     if (shent)
     973             :     {
     974       62632 :         freed = pgstat_drop_entry_internal(shent, NULL);
     975             : 
     976             :         /*
     977             :          * Database stats contain other stats. Drop those as well when
     978             :          * dropping the database. XXX: Perhaps this should be done in a
     979             :          * slightly more principled way? But not obvious what that'd look
     980             :          * like, and so far this is the only case...
     981             :          */
     982       62632 :         if (key.kind == PGSTAT_KIND_DATABASE)
     983          46 :             pgstat_drop_database_and_contents(key.dboid);
     984             :     }
     985             : 
     986       96664 :     return freed;
     987             : }
     988             : 
     989             : void
     990         458 : pgstat_drop_all_entries(void)
     991             : {
     992             :     dshash_seq_status hstat;
     993             :     PgStatShared_HashEntry *ps;
     994         458 :     uint64      not_freed_count = 0;
     995             : 
     996         458 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     997         570 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
     998             :     {
     999         112 :         if (ps->dropped)
    1000           0 :             continue;
    1001             : 
    1002         112 :         if (!pgstat_drop_entry_internal(ps, &hstat))
    1003           0 :             not_freed_count++;
    1004             :     }
    1005         458 :     dshash_seq_term(&hstat);
    1006             : 
    1007         458 :     if (not_freed_count > 0)
    1008           0 :         pgstat_request_entry_refs_gc();
    1009         458 : }
    1010             : 
    1011             : static void
    1012       17128 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
    1013             :                            TimestampTz ts)
    1014             : {
    1015       17128 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1016             : 
    1017       17128 :     memset(pgstat_get_entry_data(kind, header), 0,
    1018             :            pgstat_get_entry_len(kind));
    1019             : 
    1020       17128 :     if (kind_info->reset_timestamp_cb)
    1021         352 :         kind_info->reset_timestamp_cb(header, ts);
    1022       17128 : }
    1023             : 
    1024             : /*
    1025             :  * Reset one variable-numbered stats entry.
    1026             :  */
    1027             : void
    1028         324 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
    1029             : {
    1030             :     PgStat_EntryRef *entry_ref;
    1031             : 
    1032             :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
    1033             : 
    1034         324 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1035         324 :     if (!entry_ref || entry_ref->shared_entry->dropped)
    1036           2 :         return;
    1037             : 
    1038         322 :     (void) pgstat_lock_entry(entry_ref, false);
    1039         322 :     shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
    1040         322 :     pgstat_unlock_entry(entry_ref);
    1041             : }
    1042             : 
    1043             : /*
    1044             :  * Scan through the shared hashtable of stats, resetting statistics if
    1045             :  * approved by the provided do_reset() function.
    1046             :  */
    1047             : void
    1048          34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
    1049             :                               Datum match_data, TimestampTz ts)
    1050             : {
    1051             :     dshash_seq_status hstat;
    1052             :     PgStatShared_HashEntry *p;
    1053             : 
    1054             :     /* dshash entry is not modified, take shared lock */
    1055          34 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1056       25058 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1057             :     {
    1058             :         PgStatShared_Common *header;
    1059             : 
    1060       25024 :         if (p->dropped)
    1061           2 :             continue;
    1062             : 
    1063       25022 :         if (!do_reset(p, match_data))
    1064        8216 :             continue;
    1065             : 
    1066       16806 :         header = dsa_get_address(pgStatLocal.dsa, p->body);
    1067             : 
    1068       16806 :         LWLockAcquire(&header->lock, LW_EXCLUSIVE);
    1069             : 
    1070       16806 :         shared_stat_reset_contents(p->key.kind, header, ts);
    1071             : 
    1072       16806 :         LWLockRelease(&header->lock);
    1073             :     }
    1074          34 :     dshash_seq_term(&hstat);
    1075          34 : }
    1076             : 
    1077             : static bool
    1078        2936 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
    1079             : {
    1080        2936 :     return p->key.kind == DatumGetInt32(match_data);
    1081             : }
    1082             : 
    1083             : void
    1084           8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
    1085             : {
    1086           8 :     pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
    1087           8 : }
    1088             : 
    1089             : static void
    1090     3900278 : pgstat_setup_memcxt(void)
    1091             : {
    1092     3900278 :     if (unlikely(!pgStatSharedRefContext))
    1093       31070 :         pgStatSharedRefContext =
    1094       31070 :             AllocSetContextCreate(TopMemoryContext,
    1095             :                                   "PgStat Shared Ref",
    1096             :                                   ALLOCSET_SMALL_SIZES);
    1097     3900278 :     if (unlikely(!pgStatEntryRefHashContext))
    1098       31070 :         pgStatEntryRefHashContext =
    1099       31070 :             AllocSetContextCreate(TopMemoryContext,
    1100             :                                   "PgStat Shared Ref Hash",
    1101             :                                   ALLOCSET_SMALL_SIZES);
    1102     3900278 : }

Generated by: LCOV version 1.14