LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 287 301 95.3 %
Date: 2023-06-06 09:15:10 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_shmem.c
       4             :  *    Storage of stats entries in shared memory
       5             :  *
       6             :  * Copyright (c) 2001-2023, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/activity/pgstat_shmem.c
      10             :  * -------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres.h"
      14             : 
      15             : #include "pgstat.h"
      16             : #include "storage/shmem.h"
      17             : #include "utils/memutils.h"
      18             : #include "utils/pgstat_internal.h"
      19             : 
      20             : 
      21             : #define PGSTAT_ENTRY_REF_HASH_SIZE  128
      22             : 
      23             : /* hash table entry for finding the PgStat_EntryRef for a key */
      24             : typedef struct PgStat_EntryRefHashEntry
      25             : {
      26             :     PgStat_HashKey key;         /* hash key */
      27             :     char        status;         /* for simplehash use */
      28             :     PgStat_EntryRef *entry_ref;
      29             : } PgStat_EntryRefHashEntry;
      30             : 
      31             : 
      32             : /* for references to shared statistics entries */
      33             : #define SH_PREFIX pgstat_entry_ref_hash
      34             : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
      35             : #define SH_KEY_TYPE PgStat_HashKey
      36             : #define SH_KEY key
      37             : #define SH_HASH_KEY(tb, key) \
      38             :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
      39             : #define SH_EQUAL(tb, a, b) \
      40             :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
      41             : #define SH_SCOPE static inline
      42             : #define SH_DEFINE
      43             : #define SH_DECLARE
      44             : #include "lib/simplehash.h"
      45             : 
      46             : 
      47             : static void pgstat_drop_database_and_contents(Oid dboid);
      48             : 
      49             : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
      50             : 
      51             : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
      52             : static bool pgstat_need_entry_refs_gc(void);
      53             : static void pgstat_gc_entry_refs(void);
      54             : static void pgstat_release_all_entry_refs(bool discard_pending);
      55             : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
      56             : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
      57             : 
      58             : static void pgstat_setup_memcxt(void);
      59             : 
      60             : 
      61             : /* parameter for the shared hash */
      62             : static const dshash_parameters dsh_params = {
      63             :     sizeof(PgStat_HashKey),
      64             :     sizeof(PgStatShared_HashEntry),
      65             :     pgstat_cmp_hash_key,
      66             :     pgstat_hash_hash_key,
      67             :     LWTRANCHE_PGSTATS_HASH
      68             : };
      69             : 
      70             : 
      71             : /*
      72             :  * Backend local references to shared stats entries. If there are pending
      73             :  * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
      74             :  * list.
      75             :  *
      76             :  * When a stats entry is dropped each backend needs to release its reference
      77             :  * to it before the memory can be released. To trigger that
      78             :  * pgStatLocal.shmem->gc_request_count is incremented - which each backend
      79             :  * compares to their copy of pgStatSharedRefAge on a regular basis.
      80             :  */
      81             : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
      82             : static int  pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
      83             : 
      84             : /*
      85             :  * Memory contexts containing the pgStatEntryRefHash table and the
      86             :  * pgStatSharedRef entries respectively. Kept separate to make it easier to
      87             :  * track / attribute memory usage.
      88             :  */
      89             : static MemoryContext pgStatSharedRefContext = NULL;
      90             : static MemoryContext pgStatEntryRefHashContext = NULL;
      91             : 
      92             : 
      93             : /* ------------------------------------------------------------
      94             :  * Public functions called from postmaster follow
      95             :  * ------------------------------------------------------------
      96             :  */
      97             : 
      98             : /*
      99             :  * The size of the shared memory allocation for stats stored in the shared
     100             :  * stats hash table. This allocation will be done as part of the main shared
     101             :  * memory, rather than dynamic shared memory, allowing it to be initialized in
     102             :  * postmaster.
     103             :  */
     104             : static Size
     105       20000 : pgstat_dsa_init_size(void)
     106             : {
     107             :     Size        sz;
     108             : 
     109             :     /*
     110             :      * The dshash header / initial buckets array needs to fit into "plain"
     111             :      * shared memory, but it's beneficial to not need dsm segments
     112             :      * immediately. A size of 256kB seems works well and is not
     113             :      * disproportional compared to other constant sized shared memory
     114             :      * allocations. NB: To avoid DSMs further, the user can configure
     115             :      * min_dynamic_shared_memory.
     116             :      */
     117       20000 :     sz = 256 * 1024;
     118             :     Assert(dsa_minimum_size() <= sz);
     119       20000 :     return MAXALIGN(sz);
     120             : }
     121             : 
     122             : /*
     123             :  * Compute shared memory space needed for cumulative statistics
     124             :  */
     125             : Size
     126        9092 : StatsShmemSize(void)
     127             : {
     128             :     Size        sz;
     129             : 
     130        9092 :     sz = MAXALIGN(sizeof(PgStat_ShmemControl));
     131        9092 :     sz = add_size(sz, pgstat_dsa_init_size());
     132             : 
     133        9092 :     return sz;
     134             : }
     135             : 
     136             : /*
     137             :  * Initialize cumulative statistics system during startup
     138             :  */
     139             : void
     140        3636 : StatsShmemInit(void)
     141             : {
     142             :     bool        found;
     143             :     Size        sz;
     144             : 
     145        3636 :     sz = StatsShmemSize();
     146        3636 :     pgStatLocal.shmem = (PgStat_ShmemControl *)
     147        3636 :         ShmemInitStruct("Shared Memory Stats", sz, &found);
     148             : 
     149        3636 :     if (!IsUnderPostmaster)
     150             :     {
     151             :         dsa_area   *dsa;
     152             :         dshash_table *dsh;
     153        3636 :         PgStat_ShmemControl *ctl = pgStatLocal.shmem;
     154        3636 :         char       *p = (char *) ctl;
     155             : 
     156             :         Assert(!found);
     157             : 
     158             :         /* the allocation of pgStatLocal.shmem itself */
     159        3636 :         p += MAXALIGN(sizeof(PgStat_ShmemControl));
     160             : 
     161             :         /*
     162             :          * Create a small dsa allocation in plain shared memory. This is
     163             :          * required because postmaster cannot use dsm segments. It also
     164             :          * provides a small efficiency win.
     165             :          */
     166        3636 :         ctl->raw_dsa_area = p;
     167        3636 :         p += MAXALIGN(pgstat_dsa_init_size());
     168        3636 :         dsa = dsa_create_in_place(ctl->raw_dsa_area,
     169             :                                   pgstat_dsa_init_size(),
     170             :                                   LWTRANCHE_PGSTATS_DSA, 0);
     171        3636 :         dsa_pin(dsa);
     172             : 
     173             :         /*
     174             :          * To ensure dshash is created in "plain" shared memory, temporarily
     175             :          * limit size of dsa to the initial size of the dsa.
     176             :          */
     177        3636 :         dsa_set_size_limit(dsa, pgstat_dsa_init_size());
     178             : 
     179             :         /*
     180             :          * With the limit in place, create the dshash table. XXX: It'd be nice
     181             :          * if there were dshash_create_in_place().
     182             :          */
     183        3636 :         dsh = dshash_create(dsa, &dsh_params, 0);
     184        3636 :         ctl->hash_handle = dshash_get_hash_table_handle(dsh);
     185             : 
     186             :         /* lift limit set above */
     187        3636 :         dsa_set_size_limit(dsa, -1);
     188             : 
     189             :         /*
     190             :          * Postmaster will never access these again, thus free the local
     191             :          * dsa/dshash references.
     192             :          */
     193        3636 :         dshash_detach(dsh);
     194        3636 :         dsa_detach(dsa);
     195             : 
     196        3636 :         pg_atomic_init_u64(&ctl->gc_request_count, 1);
     197             : 
     198             : 
     199             :         /* initialize fixed-numbered stats */
     200        3636 :         LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
     201        3636 :         LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
     202        3636 :         LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
     203        3636 :         LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
     204        3636 :         LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
     205             : 
     206       54540 :         for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     207       50904 :             LWLockInitialize(&ctl->io.locks[i],
     208             :                              LWTRANCHE_PGSTATS_DATA);
     209             :     }
     210             :     else
     211             :     {
     212             :         Assert(found);
     213             :     }
     214        3636 : }
     215             : 
     216             : void
     217       30036 : pgstat_attach_shmem(void)
     218             : {
     219             :     MemoryContext oldcontext;
     220             : 
     221             :     Assert(pgStatLocal.dsa == NULL);
     222             : 
     223             :     /* stats shared memory persists for the backend lifetime */
     224       30036 :     oldcontext = MemoryContextSwitchTo(TopMemoryContext);
     225             : 
     226       30036 :     pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
     227             :                                           NULL);
     228       30036 :     dsa_pin_mapping(pgStatLocal.dsa);
     229             : 
     230       60072 :     pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
     231       30036 :                                             pgStatLocal.shmem->hash_handle, 0);
     232             : 
     233       30036 :     MemoryContextSwitchTo(oldcontext);
     234       30036 : }
     235             : 
     236             : void
     237       30036 : pgstat_detach_shmem(void)
     238             : {
     239             :     Assert(pgStatLocal.dsa);
     240             : 
     241             :     /* we shouldn't leave references to shared stats */
     242       30036 :     pgstat_release_all_entry_refs(false);
     243             : 
     244       30036 :     dshash_detach(pgStatLocal.shared_hash);
     245       30036 :     pgStatLocal.shared_hash = NULL;
     246             : 
     247       30036 :     dsa_detach(pgStatLocal.dsa);
     248       30036 :     pgStatLocal.dsa = NULL;
     249       30036 : }
     250             : 
     251             : 
     252             : /* ------------------------------------------------------------
     253             :  * Maintenance of shared memory stats entries
     254             :  * ------------------------------------------------------------
     255             :  */
     256             : 
     257             : PgStatShared_Common *
     258      596678 : pgstat_init_entry(PgStat_Kind kind,
     259             :                   PgStatShared_HashEntry *shhashent)
     260             : {
     261             :     /* Create new stats entry. */
     262             :     dsa_pointer chunk;
     263             :     PgStatShared_Common *shheader;
     264             : 
     265             :     /*
     266             :      * Initialize refcount to 1, marking it as valid / not dropped. The entry
     267             :      * can't be freed before the initialization because it can't be found as
     268             :      * long as we hold the dshash partition lock. Caller needs to increase
     269             :      * further if a longer lived reference is needed.
     270             :      */
     271      596678 :     pg_atomic_init_u32(&shhashent->refcount, 1);
     272      596678 :     shhashent->dropped = false;
     273             : 
     274      596678 :     chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
     275      596678 :     shheader = dsa_get_address(pgStatLocal.dsa, chunk);
     276      596678 :     shheader->magic = 0xdeadbeef;
     277             : 
     278             :     /* Link the new entry from the hash entry. */
     279      596678 :     shhashent->body = chunk;
     280             : 
     281      596678 :     LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
     282             : 
     283      596678 :     return shheader;
     284             : }
     285             : 
     286             : static PgStatShared_Common *
     287          54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
     288             : {
     289             :     PgStatShared_Common *shheader;
     290             : 
     291          54 :     shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     292             : 
     293             :     /* mark as not dropped anymore */
     294          54 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     295          54 :     shhashent->dropped = false;
     296             : 
     297             :     /* reinitialize content */
     298             :     Assert(shheader->magic == 0xdeadbeef);
     299          54 :     memset(pgstat_get_entry_data(kind, shheader), 0,
     300             :            pgstat_get_entry_len(kind));
     301             : 
     302          54 :     return shheader;
     303             : }
     304             : 
     305             : static void
     306     3626574 : pgstat_setup_shared_refs(void)
     307             : {
     308     3626574 :     if (likely(pgStatEntryRefHash != NULL))
     309     3598230 :         return;
     310             : 
     311       28344 :     pgStatEntryRefHash =
     312       28344 :         pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
     313             :                                      PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
     314       28344 :     pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     315             :     Assert(pgStatSharedRefAge != 0);
     316             : }
     317             : 
     318             : /*
     319             :  * Helper function for pgstat_get_entry_ref().
     320             :  */
     321             : static void
     322     1305714 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
     323             :                          PgStatShared_HashEntry *shhashent,
     324             :                          PgStatShared_Common *shheader)
     325             : {
     326             :     Assert(shheader->magic == 0xdeadbeef);
     327             :     Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
     328             : 
     329     1305714 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     330             : 
     331     1305714 :     dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     332             : 
     333     1305714 :     entry_ref->shared_stats = shheader;
     334     1305714 :     entry_ref->shared_entry = shhashent;
     335     1305714 : }
     336             : 
     337             : /*
     338             :  * Helper function for pgstat_get_entry_ref().
     339             :  */
     340             : static bool
     341     3626574 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
     342             : {
     343             :     bool        found;
     344             :     PgStat_EntryRefHashEntry *cache_entry;
     345             : 
     346             :     /*
     347             :      * We immediately insert a cache entry, because it avoids 1) multiple
     348             :      * hashtable lookups in case of a cache miss 2) having to deal with
     349             :      * out-of-memory errors after incrementing PgStatShared_Common->refcount.
     350             :      */
     351             : 
     352     3626574 :     cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
     353             : 
     354     3626574 :     if (!found || !cache_entry->entry_ref)
     355     1660790 :     {
     356             :         PgStat_EntryRef *entry_ref;
     357             : 
     358     1660790 :         cache_entry->entry_ref = entry_ref =
     359     1660790 :             MemoryContextAlloc(pgStatSharedRefContext,
     360             :                                sizeof(PgStat_EntryRef));
     361     1660790 :         entry_ref->shared_stats = NULL;
     362     1660790 :         entry_ref->shared_entry = NULL;
     363     1660790 :         entry_ref->pending = NULL;
     364             : 
     365     1660790 :         found = false;
     366             :     }
     367     1965784 :     else if (cache_entry->entry_ref->shared_stats == NULL)
     368             :     {
     369             :         Assert(cache_entry->entry_ref->pending == NULL);
     370           0 :         found = false;
     371             :     }
     372             :     else
     373             :     {
     374             :         PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
     375             : 
     376     1965784 :         entry_ref = cache_entry->entry_ref;
     377             :         Assert(entry_ref->shared_entry != NULL);
     378             :         Assert(entry_ref->shared_stats != NULL);
     379             : 
     380             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     381             :         /* should have at least our reference */
     382             :         Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
     383             :     }
     384             : 
     385     3626574 :     *entry_ref_p = cache_entry->entry_ref;
     386     3626574 :     return found;
     387             : }
     388             : 
     389             : /*
     390             :  * Get a shared stats reference. If create is true, the shared stats object is
     391             :  * created if it does not exist.
     392             :  *
     393             :  * When create is true, and created_entry is non-NULL, it'll be set to true
     394             :  * if the entry is newly created, false otherwise.
     395             :  */
     396             : PgStat_EntryRef *
     397     3626574 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
     398             :                      bool *created_entry)
     399             : {
     400     3626574 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
     401             :     PgStatShared_HashEntry *shhashent;
     402     3626574 :     PgStatShared_Common *shheader = NULL;
     403             :     PgStat_EntryRef *entry_ref;
     404             : 
     405             :     /*
     406             :      * passing in created_entry only makes sense if we possibly could create
     407             :      * entry.
     408             :      */
     409             :     Assert(create || created_entry == NULL);
     410             :     pgstat_assert_is_up();
     411             :     Assert(pgStatLocal.shared_hash != NULL);
     412             :     Assert(!pgStatLocal.shmem->is_shutdown);
     413             : 
     414     3626574 :     pgstat_setup_memcxt();
     415     3626574 :     pgstat_setup_shared_refs();
     416             : 
     417     3626574 :     if (created_entry != NULL)
     418         208 :         *created_entry = false;
     419             : 
     420             :     /*
     421             :      * Check if other backends dropped stats that could not be deleted because
     422             :      * somebody held references to it. If so, check this backend's references.
     423             :      * This is not expected to happen often. The location of the check is a
     424             :      * bit random, but this is a relatively frequently called path, so better
     425             :      * than most.
     426             :      */
     427     3626574 :     if (pgstat_need_entry_refs_gc())
     428       11094 :         pgstat_gc_entry_refs();
     429             : 
     430             :     /*
     431             :      * First check the lookup cache hashtable in local memory. If we find a
     432             :      * match here we can avoid taking locks / causing contention.
     433             :      */
     434     3626574 :     if (pgstat_get_entry_ref_cached(key, &entry_ref))
     435     1965784 :         return entry_ref;
     436             : 
     437             :     Assert(entry_ref != NULL);
     438             : 
     439             :     /*
     440             :      * Do a lookup in the hash table first - it's quite likely that the entry
     441             :      * already exists, and that way we only need a shared lock.
     442             :      */
     443     1660790 :     shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
     444             : 
     445     1660790 :     if (create && !shhashent)
     446             :     {
     447             :         bool        shfound;
     448             : 
     449             :         /*
     450             :          * It's possible that somebody created the entry since the above
     451             :          * lookup. If so, fall through to the same path as if we'd have if it
     452             :          * already had been created before the dshash_find() calls.
     453             :          */
     454      293866 :         shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
     455      293866 :         if (!shfound)
     456             :         {
     457      293866 :             shheader = pgstat_init_entry(kind, shhashent);
     458      293866 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     459             : 
     460      293866 :             if (created_entry != NULL)
     461          90 :                 *created_entry = true;
     462             : 
     463      293866 :             return entry_ref;
     464             :         }
     465             :     }
     466             : 
     467     1366924 :     if (!shhashent)
     468             :     {
     469             :         /*
     470             :          * If we're not creating, delete the reference again. In all
     471             :          * likelihood it's just a stats lookup - no point wasting memory for a
     472             :          * shared ref to nothing...
     473             :          */
     474      355002 :         pgstat_release_entry_ref(key, entry_ref, false);
     475             : 
     476      355002 :         return NULL;
     477             :     }
     478             :     else
     479             :     {
     480             :         /*
     481             :          * Can get here either because dshash_find() found a match, or if
     482             :          * dshash_find_or_insert() found a concurrently inserted entry.
     483             :          */
     484             : 
     485     1011922 :         if (shhashent->dropped && create)
     486             :         {
     487             :             /*
     488             :              * There are legitimate cases where the old stats entry might not
     489             :              * yet have been dropped by the time it's reused. The most obvious
     490             :              * case are replication slot stats, where a new slot can be
     491             :              * created with the same index just after dropping. But oid
     492             :              * wraparound can lead to other cases as well. We just reset the
     493             :              * stats to their plain state.
     494             :              */
     495          54 :             shheader = pgstat_reinit_entry(kind, shhashent);
     496          54 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     497             : 
     498          54 :             if (created_entry != NULL)
     499           0 :                 *created_entry = true;
     500             : 
     501          54 :             return entry_ref;
     502             :         }
     503     1011868 :         else if (shhashent->dropped)
     504             :         {
     505          74 :             dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     506          74 :             pgstat_release_entry_ref(key, entry_ref, false);
     507             : 
     508          74 :             return NULL;
     509             :         }
     510             :         else
     511             :         {
     512     1011794 :             shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     513     1011794 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     514             : 
     515     1011794 :             return entry_ref;
     516             :         }
     517             :     }
     518             : }
     519             : 
     520             : static void
     521     1660790 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
     522             :                          bool discard_pending)
     523             : {
     524     1660790 :     if (entry_ref && entry_ref->pending)
     525             :     {
     526       55434 :         if (discard_pending)
     527       55434 :             pgstat_delete_pending_entry(entry_ref);
     528             :         else
     529           0 :             elog(ERROR, "releasing ref with pending data");
     530             :     }
     531             : 
     532     1660790 :     if (entry_ref && entry_ref->shared_stats)
     533             :     {
     534             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     535             :         Assert(entry_ref->pending == NULL);
     536             : 
     537             :         /*
     538             :          * This can't race with another backend looking up the stats entry and
     539             :          * increasing the refcount because it is not "legal" to create
     540             :          * additional references to dropped entries.
     541             :          */
     542     1305714 :         if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
     543             :         {
     544             :             PgStatShared_HashEntry *shent;
     545             : 
     546             :             /*
     547             :              * We're the last referrer to this entry, try to drop the shared
     548             :              * entry.
     549             :              */
     550             : 
     551             :             /* only dropped entries can reach a 0 refcount */
     552             :             Assert(entry_ref->shared_entry->dropped);
     553             : 
     554        9226 :             shent = dshash_find(pgStatLocal.shared_hash,
     555        9226 :                                 &entry_ref->shared_entry->key,
     556             :                                 true);
     557        9226 :             if (!shent)
     558           0 :                 elog(ERROR, "could not find just referenced shared stats entry");
     559             : 
     560             :             Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
     561             :             Assert(entry_ref->shared_entry == shent);
     562             : 
     563        9226 :             pgstat_free_entry(shent, NULL);
     564             :         }
     565             :     }
     566             : 
     567     1660790 :     if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
     568           0 :         elog(ERROR, "entry ref vanished before deletion");
     569             : 
     570     1660790 :     if (entry_ref)
     571     1660790 :         pfree(entry_ref);
     572     1660790 : }
     573             : 
     574             : bool
     575     1641442 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
     576             : {
     577     1641442 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     578             : 
     579     1641442 :     if (nowait)
     580      537940 :         return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
     581             : 
     582     1103502 :     LWLockAcquire(lock, LW_EXCLUSIVE);
     583     1103502 :     return true;
     584             : }
     585             : 
     586             : /*
     587             :  * Separate from pgstat_lock_entry() as most callers will need to lock
     588             :  * exclusively.
     589             :  */
     590             : bool
     591       12448 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
     592             : {
     593       12448 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     594             : 
     595       12448 :     if (nowait)
     596           0 :         return LWLockConditionalAcquire(lock, LW_SHARED);
     597             : 
     598       12448 :     LWLockAcquire(lock, LW_SHARED);
     599       12448 :     return true;
     600             : }
     601             : 
     602             : void
     603     1653876 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
     604             : {
     605     1653876 :     LWLockRelease(&entry_ref->shared_stats->lock);
     606     1653876 : }
     607             : 
     608             : /*
     609             :  * Helper function to fetch and lock shared stats.
     610             :  */
     611             : PgStat_EntryRef *
     612      131118 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
     613             :                             bool nowait)
     614             : {
     615             :     PgStat_EntryRef *entry_ref;
     616             : 
     617             :     /* find shared table stats entry corresponding to the local entry */
     618      131118 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
     619             : 
     620             :     /* lock the shared entry to protect the content, skip if failed */
     621      131118 :     if (!pgstat_lock_entry(entry_ref, nowait))
     622           0 :         return NULL;
     623             : 
     624      131118 :     return entry_ref;
     625             : }
     626             : 
     627             : void
     628        3552 : pgstat_request_entry_refs_gc(void)
     629             : {
     630        3552 :     pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
     631        3552 : }
     632             : 
     633             : static bool
     634     3626574 : pgstat_need_entry_refs_gc(void)
     635             : {
     636             :     uint64      curage;
     637             : 
     638     3626574 :     if (!pgStatEntryRefHash)
     639           0 :         return false;
     640             : 
     641             :     /* should have been initialized when creating pgStatEntryRefHash */
     642             :     Assert(pgStatSharedRefAge != 0);
     643             : 
     644     3626574 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     645             : 
     646     3626574 :     return pgStatSharedRefAge != curage;
     647             : }
     648             : 
     649             : static void
     650       11094 : pgstat_gc_entry_refs(void)
     651             : {
     652             :     pgstat_entry_ref_hash_iterator i;
     653             :     PgStat_EntryRefHashEntry *ent;
     654             :     uint64      curage;
     655             : 
     656       11094 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     657             :     Assert(curage != 0);
     658             : 
     659             :     /*
     660             :      * Some entries have been dropped. Invalidate cache pointer to them.
     661             :      */
     662       11094 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     663      881476 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
     664             :     {
     665      870382 :         PgStat_EntryRef *entry_ref = ent->entry_ref;
     666             : 
     667             :         Assert(!entry_ref->shared_stats ||
     668             :                entry_ref->shared_stats->magic == 0xdeadbeef);
     669             : 
     670      870382 :         if (!entry_ref->shared_entry->dropped)
     671      570958 :             continue;
     672             : 
     673             :         /* cannot gc shared ref that has pending data */
     674      299424 :         if (entry_ref->pending != NULL)
     675      291704 :             continue;
     676             : 
     677        7720 :         pgstat_release_entry_ref(ent->key, entry_ref, false);
     678             :     }
     679             : 
     680       11094 :     pgStatSharedRefAge = curage;
     681       11094 : }
     682             : 
     683             : static void
     684       28374 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
     685             :                                    Datum match_data)
     686             : {
     687             :     pgstat_entry_ref_hash_iterator i;
     688             :     PgStat_EntryRefHashEntry *ent;
     689             : 
     690       28374 :     if (pgStatEntryRefHash == NULL)
     691          10 :         return;
     692             : 
     693       28364 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     694             : 
     695     1270902 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
     696             :            != NULL)
     697             :     {
     698             :         Assert(ent->entry_ref != NULL);
     699             : 
     700     1242538 :         if (match && !match(ent, match_data))
     701         594 :             continue;
     702             : 
     703     1241944 :         pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
     704             :     }
     705             : }
     706             : 
     707             : /*
     708             :  * Release all local references to shared stats entries.
     709             :  *
     710             :  * When a process exits it cannot do so while still holding references onto
     711             :  * stats entries, otherwise the shared stats entries could never be freed.
     712             :  */
     713             : static void
     714       30036 : pgstat_release_all_entry_refs(bool discard_pending)
     715             : {
     716       30036 :     if (pgStatEntryRefHash == NULL)
     717        1692 :         return;
     718             : 
     719       28344 :     pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
     720             :     Assert(pgStatEntryRefHash->members == 0);
     721       28344 :     pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
     722       28344 :     pgStatEntryRefHash = NULL;
     723             : }
     724             : 
     725             : static bool
     726         594 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
     727             : {
     728         594 :     Oid         dboid = DatumGetObjectId(match_data);
     729             : 
     730         594 :     return ent->key.dboid == dboid;
     731             : }
     732             : 
     733             : static void
     734          30 : pgstat_release_db_entry_refs(Oid dboid)
     735             : {
     736          30 :     pgstat_release_matching_entry_refs( /* discard pending = */ true,
     737             :                                        match_db,
     738             :                                        ObjectIdGetDatum(dboid));
     739          30 : }
     740             : 
     741             : 
     742             : /* ------------------------------------------------------------
     743             :  * Dropping and resetting of stats entries
     744             :  * ------------------------------------------------------------
     745             :  */
     746             : 
     747             : static void
     748       58308 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
     749             : {
     750             :     dsa_pointer pdsa;
     751             : 
     752             :     /*
     753             :      * Fetch dsa pointer before deleting entry - that way we can free the
     754             :      * memory after releasing the lock.
     755             :      */
     756       58308 :     pdsa = shent->body;
     757             : 
     758       58308 :     if (!hstat)
     759       56188 :         dshash_delete_entry(pgStatLocal.shared_hash, shent);
     760             :     else
     761        2120 :         dshash_delete_current(hstat);
     762             : 
     763       58308 :     dsa_free(pgStatLocal.dsa, pdsa);
     764       58308 : }
     765             : 
     766             : /*
     767             :  * Helper for both pgstat_drop_database_and_contents() and
     768             :  * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
     769             :  * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
     770             :  * case the entry needs to be already locked.
     771             :  */
     772             : static bool
     773       58366 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
     774             :                            dshash_seq_status *hstat)
     775             : {
     776             :     Assert(shent->body != InvalidDsaPointer);
     777             : 
     778             :     /* should already have released local reference */
     779       58366 :     if (pgStatEntryRefHash)
     780             :         Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
     781             : 
     782             :     /*
     783             :      * Signal that the entry is dropped - this will eventually cause other
     784             :      * backends to release their references.
     785             :      */
     786       58366 :     if (shent->dropped)
     787           0 :         elog(ERROR, "can only drop stats once");
     788       58366 :     shent->dropped = true;
     789             : 
     790             :     /* release refcount marking entry as not dropped */
     791       58366 :     if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
     792             :     {
     793       49082 :         pgstat_free_entry(shent, hstat);
     794       49082 :         return true;
     795             :     }
     796             :     else
     797             :     {
     798        9284 :         if (!hstat)
     799        9284 :             dshash_release_lock(pgStatLocal.shared_hash, shent);
     800        9284 :         return false;
     801             :     }
     802             : }
     803             : 
     804             : /*
     805             :  * Drop stats for the database and all the objects inside that database.
     806             :  */
     807             : static void
     808          30 : pgstat_drop_database_and_contents(Oid dboid)
     809             : {
     810             :     dshash_seq_status hstat;
     811             :     PgStatShared_HashEntry *p;
     812          30 :     uint64      not_freed_count = 0;
     813             : 
     814             :     Assert(OidIsValid(dboid));
     815             : 
     816             :     Assert(pgStatLocal.shared_hash != NULL);
     817             : 
     818             :     /*
     819             :      * This backend might very well be the only backend holding a reference to
     820             :      * about-to-be-dropped entries. Ensure that we're not preventing it from
     821             :      * being cleaned up till later.
     822             :      *
     823             :      * Doing this separately from the dshash iteration below avoids having to
     824             :      * do so while holding a partition lock on the shared hashtable.
     825             :      */
     826          30 :     pgstat_release_db_entry_refs(dboid);
     827             : 
     828             :     /* some of the dshash entries are to be removed, take exclusive lock. */
     829          30 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     830       10098 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     831             :     {
     832       10068 :         if (p->dropped)
     833           4 :             continue;
     834             : 
     835       10064 :         if (p->key.dboid != dboid)
     836        8056 :             continue;
     837             : 
     838        2008 :         if (!pgstat_drop_entry_internal(p, &hstat))
     839             :         {
     840             :             /*
     841             :              * Even statistics for a dropped database might currently be
     842             :              * accessed (consider e.g. database stats for pg_stat_database).
     843             :              */
     844           0 :             not_freed_count++;
     845             :         }
     846             :     }
     847          30 :     dshash_seq_term(&hstat);
     848             : 
     849             :     /*
     850             :      * If some of the stats data could not be freed, signal the reference
     851             :      * holders to run garbage collection of their cached pgStatShmLookupCache.
     852             :      */
     853          30 :     if (not_freed_count > 0)
     854           0 :         pgstat_request_entry_refs_gc();
     855          30 : }
     856             : 
     857             : bool
     858       83698 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
     859             : {
     860       83698 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
     861             :     PgStatShared_HashEntry *shent;
     862       83698 :     bool        freed = true;
     863             : 
     864             :     /* delete local reference */
     865       83698 :     if (pgStatEntryRefHash)
     866             :     {
     867             :         PgStat_EntryRefHashEntry *lohashent =
     868       68414 :             pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
     869             : 
     870       68414 :         if (lohashent)
     871       56050 :             pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
     872             :                                      true);
     873             :     }
     874             : 
     875             :     /* mark entry in shared hashtable as deleted, drop if possible */
     876       83698 :     shent = dshash_find(pgStatLocal.shared_hash, &key, true);
     877       83698 :     if (shent)
     878             :     {
     879       56246 :         freed = pgstat_drop_entry_internal(shent, NULL);
     880             : 
     881             :         /*
     882             :          * Database stats contain other stats. Drop those as well when
     883             :          * dropping the database. XXX: Perhaps this should be done in a
     884             :          * slightly more principled way? But not obvious what that'd look
     885             :          * like, and so far this is the only case...
     886             :          */
     887       56246 :         if (key.kind == PGSTAT_KIND_DATABASE)
     888          30 :             pgstat_drop_database_and_contents(key.dboid);
     889             :     }
     890             : 
     891       83698 :     return freed;
     892             : }
     893             : 
     894             : void
     895         882 : pgstat_drop_all_entries(void)
     896             : {
     897             :     dshash_seq_status hstat;
     898             :     PgStatShared_HashEntry *ps;
     899         882 :     uint64      not_freed_count = 0;
     900             : 
     901         882 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     902         994 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
     903             :     {
     904         112 :         if (ps->dropped)
     905           0 :             continue;
     906             : 
     907         112 :         if (!pgstat_drop_entry_internal(ps, &hstat))
     908           0 :             not_freed_count++;
     909             :     }
     910         882 :     dshash_seq_term(&hstat);
     911             : 
     912         882 :     if (not_freed_count > 0)
     913           0 :         pgstat_request_entry_refs_gc();
     914         882 : }
     915             : 
     916             : static void
     917       15944 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
     918             :                            TimestampTz ts)
     919             : {
     920       15944 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     921             : 
     922       15944 :     memset(pgstat_get_entry_data(kind, header), 0,
     923             :            pgstat_get_entry_len(kind));
     924             : 
     925       15944 :     if (kind_info->reset_timestamp_cb)
     926         282 :         kind_info->reset_timestamp_cb(header, ts);
     927       15944 : }
     928             : 
     929             : /*
     930             :  * Reset one variable-numbered stats entry.
     931             :  */
     932             : void
     933         248 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
     934             : {
     935             :     PgStat_EntryRef *entry_ref;
     936             : 
     937             :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     938             : 
     939         248 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
     940         248 :     if (!entry_ref || entry_ref->shared_entry->dropped)
     941           2 :         return;
     942             : 
     943         246 :     (void) pgstat_lock_entry(entry_ref, false);
     944         246 :     shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
     945         246 :     pgstat_unlock_entry(entry_ref);
     946             : }
     947             : 
     948             : /*
     949             :  * Scan through the shared hashtable of stats, resetting statistics if
     950             :  * approved by the provided do_reset() function.
     951             :  */
     952             : void
     953          34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
     954             :                               Datum match_data, TimestampTz ts)
     955             : {
     956             :     dshash_seq_status hstat;
     957             :     PgStatShared_HashEntry *p;
     958             : 
     959             :     /* dshash entry is not modified, take shared lock */
     960          34 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
     961       23976 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     962             :     {
     963             :         PgStatShared_Common *header;
     964             : 
     965       23942 :         if (p->dropped)
     966           2 :             continue;
     967             : 
     968       23940 :         if (!do_reset(p, match_data))
     969        8242 :             continue;
     970             : 
     971       15698 :         header = dsa_get_address(pgStatLocal.dsa, p->body);
     972             : 
     973       15698 :         LWLockAcquire(&header->lock, LW_EXCLUSIVE);
     974             : 
     975       15698 :         shared_stat_reset_contents(p->key.kind, header, ts);
     976             : 
     977       15698 :         LWLockRelease(&header->lock);
     978             :     }
     979          34 :     dshash_seq_term(&hstat);
     980          34 : }
     981             : 
     982             : static bool
     983        2902 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
     984             : {
     985        2902 :     return p->key.kind == DatumGetInt32(match_data);
     986             : }
     987             : 
     988             : void
     989           8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
     990             : {
     991           8 :     pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
     992           8 : }
     993             : 
     994             : static void
     995     3626574 : pgstat_setup_memcxt(void)
     996             : {
     997     3626574 :     if (unlikely(!pgStatSharedRefContext))
     998       28344 :         pgStatSharedRefContext =
     999       28344 :             AllocSetContextCreate(TopMemoryContext,
    1000             :                                   "PgStat Shared Ref",
    1001             :                                   ALLOCSET_SMALL_SIZES);
    1002     3626574 :     if (unlikely(!pgStatEntryRefHashContext))
    1003       28344 :         pgStatEntryRefHashContext =
    1004       28344 :             AllocSetContextCreate(TopMemoryContext,
    1005             :                                   "PgStat Shared Ref Hash",
    1006             :                                   ALLOCSET_SMALL_SIZES);
    1007     3626574 : }

Generated by: LCOV version 1.14