LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 287 301 95.3 %
Date: 2024-02-28 06:10:54 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_shmem.c
       4             :  *    Storage of stats entries in shared memory
       5             :  *
       6             :  * Copyright (c) 2001-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/activity/pgstat_shmem.c
      10             :  * -------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres.h"
      14             : 
      15             : #include "pgstat.h"
      16             : #include "storage/shmem.h"
      17             : #include "utils/memutils.h"
      18             : #include "utils/pgstat_internal.h"
      19             : 
      20             : 
      21             : #define PGSTAT_ENTRY_REF_HASH_SIZE  128
      22             : 
      23             : /* hash table entry for finding the PgStat_EntryRef for a key */
      24             : typedef struct PgStat_EntryRefHashEntry
      25             : {
      26             :     PgStat_HashKey key;         /* hash key */
      27             :     char        status;         /* for simplehash use */
      28             :     PgStat_EntryRef *entry_ref;
      29             : } PgStat_EntryRefHashEntry;
      30             : 
      31             : 
      32             : /* for references to shared statistics entries */
      33             : #define SH_PREFIX pgstat_entry_ref_hash
      34             : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
      35             : #define SH_KEY_TYPE PgStat_HashKey
      36             : #define SH_KEY key
      37             : #define SH_HASH_KEY(tb, key) \
      38             :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
      39             : #define SH_EQUAL(tb, a, b) \
      40             :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
      41             : #define SH_SCOPE static inline
      42             : #define SH_DEFINE
      43             : #define SH_DECLARE
      44             : #include "lib/simplehash.h"
      45             : 
      46             : 
      47             : static void pgstat_drop_database_and_contents(Oid dboid);
      48             : 
      49             : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
      50             : 
      51             : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
      52             : static bool pgstat_need_entry_refs_gc(void);
      53             : static void pgstat_gc_entry_refs(void);
      54             : static void pgstat_release_all_entry_refs(bool discard_pending);
      55             : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
      56             : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
      57             : 
      58             : static void pgstat_setup_memcxt(void);
      59             : 
      60             : 
      61             : /* parameter for the shared hash */
      62             : static const dshash_parameters dsh_params = {
      63             :     sizeof(PgStat_HashKey),
      64             :     sizeof(PgStatShared_HashEntry),
      65             :     pgstat_cmp_hash_key,
      66             :     pgstat_hash_hash_key,
      67             :     dshash_memcpy,
      68             :     LWTRANCHE_PGSTATS_HASH
      69             : };
      70             : 
      71             : 
      72             : /*
      73             :  * Backend local references to shared stats entries. If there are pending
      74             :  * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
      75             :  * list.
      76             :  *
      77             :  * When a stats entry is dropped each backend needs to release its reference
      78             :  * to it before the memory can be released. To trigger that
      79             :  * pgStatLocal.shmem->gc_request_count is incremented - which each backend
      80             :  * compares to their copy of pgStatSharedRefAge on a regular basis.
      81             :  */
      82             : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
      83             : static int  pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
      84             : 
      85             : /*
      86             :  * Memory contexts containing the pgStatEntryRefHash table and the
      87             :  * pgStatSharedRef entries respectively. Kept separate to make it easier to
      88             :  * track / attribute memory usage.
      89             :  */
      90             : static MemoryContext pgStatSharedRefContext = NULL;
      91             : static MemoryContext pgStatEntryRefHashContext = NULL;
      92             : 
      93             : 
      94             : /* ------------------------------------------------------------
      95             :  * Public functions called from postmaster follow
      96             :  * ------------------------------------------------------------
      97             :  */
      98             : 
      99             : /*
     100             :  * The size of the shared memory allocation for stats stored in the shared
     101             :  * stats hash table. This allocation will be done as part of the main shared
     102             :  * memory, rather than dynamic shared memory, allowing it to be initialized in
     103             :  * postmaster.
     104             :  */
     105             : static Size
     106        9632 : pgstat_dsa_init_size(void)
     107             : {
     108             :     Size        sz;
     109             : 
     110             :     /*
     111             :      * The dshash header / initial buckets array needs to fit into "plain"
     112             :      * shared memory, but it's beneficial to not need dsm segments
     113             :      * immediately. A size of 256kB seems works well and is not
     114             :      * disproportional compared to other constant sized shared memory
     115             :      * allocations. NB: To avoid DSMs further, the user can configure
     116             :      * min_dynamic_shared_memory.
     117             :      */
     118        9632 :     sz = 256 * 1024;
     119             :     Assert(dsa_minimum_size() <= sz);
     120        9632 :     return MAXALIGN(sz);
     121             : }
     122             : 
     123             : /*
     124             :  * Compute shared memory space needed for cumulative statistics
     125             :  */
     126             : Size
     127        4718 : StatsShmemSize(void)
     128             : {
     129             :     Size        sz;
     130             : 
     131        4718 :     sz = MAXALIGN(sizeof(PgStat_ShmemControl));
     132        4718 :     sz = add_size(sz, pgstat_dsa_init_size());
     133             : 
     134        4718 :     return sz;
     135             : }
     136             : 
     137             : /*
     138             :  * Initialize cumulative statistics system during startup
     139             :  */
     140             : void
     141        1638 : StatsShmemInit(void)
     142             : {
     143             :     bool        found;
     144             :     Size        sz;
     145             : 
     146        1638 :     sz = StatsShmemSize();
     147        1638 :     pgStatLocal.shmem = (PgStat_ShmemControl *)
     148        1638 :         ShmemInitStruct("Shared Memory Stats", sz, &found);
     149             : 
     150        1638 :     if (!IsUnderPostmaster)
     151             :     {
     152             :         dsa_area   *dsa;
     153             :         dshash_table *dsh;
     154        1638 :         PgStat_ShmemControl *ctl = pgStatLocal.shmem;
     155        1638 :         char       *p = (char *) ctl;
     156             : 
     157             :         Assert(!found);
     158             : 
     159             :         /* the allocation of pgStatLocal.shmem itself */
     160        1638 :         p += MAXALIGN(sizeof(PgStat_ShmemControl));
     161             : 
     162             :         /*
     163             :          * Create a small dsa allocation in plain shared memory. This is
     164             :          * required because postmaster cannot use dsm segments. It also
     165             :          * provides a small efficiency win.
     166             :          */
     167        1638 :         ctl->raw_dsa_area = p;
     168        1638 :         p += MAXALIGN(pgstat_dsa_init_size());
     169        1638 :         dsa = dsa_create_in_place(ctl->raw_dsa_area,
     170             :                                   pgstat_dsa_init_size(),
     171             :                                   LWTRANCHE_PGSTATS_DSA, 0);
     172        1638 :         dsa_pin(dsa);
     173             : 
     174             :         /*
     175             :          * To ensure dshash is created in "plain" shared memory, temporarily
     176             :          * limit size of dsa to the initial size of the dsa.
     177             :          */
     178        1638 :         dsa_set_size_limit(dsa, pgstat_dsa_init_size());
     179             : 
     180             :         /*
     181             :          * With the limit in place, create the dshash table. XXX: It'd be nice
     182             :          * if there were dshash_create_in_place().
     183             :          */
     184        1638 :         dsh = dshash_create(dsa, &dsh_params, NULL);
     185        1638 :         ctl->hash_handle = dshash_get_hash_table_handle(dsh);
     186             : 
     187             :         /* lift limit set above */
     188        1638 :         dsa_set_size_limit(dsa, -1);
     189             : 
     190             :         /*
     191             :          * Postmaster will never access these again, thus free the local
     192             :          * dsa/dshash references.
     193             :          */
     194        1638 :         dshash_detach(dsh);
     195        1638 :         dsa_detach(dsa);
     196             : 
     197        1638 :         pg_atomic_init_u64(&ctl->gc_request_count, 1);
     198             : 
     199             : 
     200             :         /* initialize fixed-numbered stats */
     201        1638 :         LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
     202        1638 :         LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
     203        1638 :         LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
     204        1638 :         LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
     205        1638 :         LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
     206             : 
     207       27846 :         for (int i = 0; i < BACKEND_NUM_TYPES; i++)
     208       26208 :             LWLockInitialize(&ctl->io.locks[i],
     209             :                              LWTRANCHE_PGSTATS_DATA);
     210             :     }
     211             :     else
     212             :     {
     213             :         Assert(found);
     214             :     }
     215        1638 : }
     216             : 
     217             : void
     218       28950 : pgstat_attach_shmem(void)
     219             : {
     220             :     MemoryContext oldcontext;
     221             : 
     222             :     Assert(pgStatLocal.dsa == NULL);
     223             : 
     224             :     /* stats shared memory persists for the backend lifetime */
     225       28950 :     oldcontext = MemoryContextSwitchTo(TopMemoryContext);
     226             : 
     227       28950 :     pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
     228             :                                           NULL);
     229       28950 :     dsa_pin_mapping(pgStatLocal.dsa);
     230             : 
     231       57900 :     pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
     232       28950 :                                             pgStatLocal.shmem->hash_handle, 0);
     233             : 
     234       28950 :     MemoryContextSwitchTo(oldcontext);
     235       28950 : }
     236             : 
     237             : void
     238       28950 : pgstat_detach_shmem(void)
     239             : {
     240             :     Assert(pgStatLocal.dsa);
     241             : 
     242             :     /* we shouldn't leave references to shared stats */
     243       28950 :     pgstat_release_all_entry_refs(false);
     244             : 
     245       28950 :     dshash_detach(pgStatLocal.shared_hash);
     246       28950 :     pgStatLocal.shared_hash = NULL;
     247             : 
     248       28950 :     dsa_detach(pgStatLocal.dsa);
     249       28950 :     pgStatLocal.dsa = NULL;
     250       28950 : }
     251             : 
     252             : 
     253             : /* ------------------------------------------------------------
     254             :  * Maintenance of shared memory stats entries
     255             :  * ------------------------------------------------------------
     256             :  */
     257             : 
     258             : PgStatShared_Common *
     259      448368 : pgstat_init_entry(PgStat_Kind kind,
     260             :                   PgStatShared_HashEntry *shhashent)
     261             : {
     262             :     /* Create new stats entry. */
     263             :     dsa_pointer chunk;
     264             :     PgStatShared_Common *shheader;
     265             : 
     266             :     /*
     267             :      * Initialize refcount to 1, marking it as valid / not dropped. The entry
     268             :      * can't be freed before the initialization because it can't be found as
     269             :      * long as we hold the dshash partition lock. Caller needs to increase
     270             :      * further if a longer lived reference is needed.
     271             :      */
     272      448368 :     pg_atomic_init_u32(&shhashent->refcount, 1);
     273      448368 :     shhashent->dropped = false;
     274             : 
     275      448368 :     chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
     276      448368 :     shheader = dsa_get_address(pgStatLocal.dsa, chunk);
     277      448368 :     shheader->magic = 0xdeadbeef;
     278             : 
     279             :     /* Link the new entry from the hash entry. */
     280      448368 :     shhashent->body = chunk;
     281             : 
     282      448368 :     LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
     283             : 
     284      448368 :     return shheader;
     285             : }
     286             : 
     287             : static PgStatShared_Common *
     288          54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
     289             : {
     290             :     PgStatShared_Common *shheader;
     291             : 
     292          54 :     shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     293             : 
     294             :     /* mark as not dropped anymore */
     295          54 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     296          54 :     shhashent->dropped = false;
     297             : 
     298             :     /* reinitialize content */
     299             :     Assert(shheader->magic == 0xdeadbeef);
     300          54 :     memset(pgstat_get_entry_data(kind, shheader), 0,
     301             :            pgstat_get_entry_len(kind));
     302             : 
     303          54 :     return shheader;
     304             : }
     305             : 
     306             : static void
     307     2790262 : pgstat_setup_shared_refs(void)
     308             : {
     309     2790262 :     if (likely(pgStatEntryRefHash != NULL))
     310     2765228 :         return;
     311             : 
     312       25034 :     pgStatEntryRefHash =
     313       25034 :         pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
     314             :                                      PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
     315       25034 :     pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     316             :     Assert(pgStatSharedRefAge != 0);
     317             : }
     318             : 
     319             : /*
     320             :  * Helper function for pgstat_get_entry_ref().
     321             :  */
     322             : static void
     323     1090358 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
     324             :                          PgStatShared_HashEntry *shhashent,
     325             :                          PgStatShared_Common *shheader)
     326             : {
     327             :     Assert(shheader->magic == 0xdeadbeef);
     328             :     Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
     329             : 
     330     1090358 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     331             : 
     332     1090358 :     dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     333             : 
     334     1090358 :     entry_ref->shared_stats = shheader;
     335     1090358 :     entry_ref->shared_entry = shhashent;
     336     1090358 : }
     337             : 
     338             : /*
     339             :  * Helper function for pgstat_get_entry_ref().
     340             :  */
     341             : static bool
     342     2790262 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
     343             : {
     344             :     bool        found;
     345             :     PgStat_EntryRefHashEntry *cache_entry;
     346             : 
     347             :     /*
     348             :      * We immediately insert a cache entry, because it avoids 1) multiple
     349             :      * hashtable lookups in case of a cache miss 2) having to deal with
     350             :      * out-of-memory errors after incrementing PgStatShared_Common->refcount.
     351             :      */
     352             : 
     353     2790262 :     cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
     354             : 
     355     2790262 :     if (!found || !cache_entry->entry_ref)
     356     1217780 :     {
     357             :         PgStat_EntryRef *entry_ref;
     358             : 
     359     1217780 :         cache_entry->entry_ref = entry_ref =
     360     1217780 :             MemoryContextAlloc(pgStatSharedRefContext,
     361             :                                sizeof(PgStat_EntryRef));
     362     1217780 :         entry_ref->shared_stats = NULL;
     363     1217780 :         entry_ref->shared_entry = NULL;
     364     1217780 :         entry_ref->pending = NULL;
     365             : 
     366     1217780 :         found = false;
     367             :     }
     368     1572482 :     else if (cache_entry->entry_ref->shared_stats == NULL)
     369             :     {
     370             :         Assert(cache_entry->entry_ref->pending == NULL);
     371           0 :         found = false;
     372             :     }
     373             :     else
     374             :     {
     375             :         PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
     376             : 
     377     1572482 :         entry_ref = cache_entry->entry_ref;
     378             :         Assert(entry_ref->shared_entry != NULL);
     379             :         Assert(entry_ref->shared_stats != NULL);
     380             : 
     381             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     382             :         /* should have at least our reference */
     383             :         Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
     384             :     }
     385             : 
     386     2790262 :     *entry_ref_p = cache_entry->entry_ref;
     387     2790262 :     return found;
     388             : }
     389             : 
     390             : /*
     391             :  * Get a shared stats reference. If create is true, the shared stats object is
     392             :  * created if it does not exist.
     393             :  *
     394             :  * When create is true, and created_entry is non-NULL, it'll be set to true
     395             :  * if the entry is newly created, false otherwise.
     396             :  */
     397             : PgStat_EntryRef *
     398     2790262 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
     399             :                      bool *created_entry)
     400             : {
     401     2790262 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
     402             :     PgStatShared_HashEntry *shhashent;
     403     2790262 :     PgStatShared_Common *shheader = NULL;
     404             :     PgStat_EntryRef *entry_ref;
     405             : 
     406             :     /*
     407             :      * passing in created_entry only makes sense if we possibly could create
     408             :      * entry.
     409             :      */
     410             :     Assert(create || created_entry == NULL);
     411             :     pgstat_assert_is_up();
     412             :     Assert(pgStatLocal.shared_hash != NULL);
     413             :     Assert(!pgStatLocal.shmem->is_shutdown);
     414             : 
     415     2790262 :     pgstat_setup_memcxt();
     416     2790262 :     pgstat_setup_shared_refs();
     417             : 
     418     2790262 :     if (created_entry != NULL)
     419         214 :         *created_entry = false;
     420             : 
     421             :     /*
     422             :      * Check if other backends dropped stats that could not be deleted because
     423             :      * somebody held references to it. If so, check this backend's references.
     424             :      * This is not expected to happen often. The location of the check is a
     425             :      * bit random, but this is a relatively frequently called path, so better
     426             :      * than most.
     427             :      */
     428     2790262 :     if (pgstat_need_entry_refs_gc())
     429       11040 :         pgstat_gc_entry_refs();
     430             : 
     431             :     /*
     432             :      * First check the lookup cache hashtable in local memory. If we find a
     433             :      * match here we can avoid taking locks / causing contention.
     434             :      */
     435     2790262 :     if (pgstat_get_entry_ref_cached(key, &entry_ref))
     436     1572482 :         return entry_ref;
     437             : 
     438             :     Assert(entry_ref != NULL);
     439             : 
     440             :     /*
     441             :      * Do a lookup in the hash table first - it's quite likely that the entry
     442             :      * already exists, and that way we only need a shared lock.
     443             :      */
     444     1217780 :     shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
     445             : 
     446     1217780 :     if (create && !shhashent)
     447             :     {
     448             :         bool        shfound;
     449             : 
     450             :         /*
     451             :          * It's possible that somebody created the entry since the above
     452             :          * lookup. If so, fall through to the same path as if we'd have if it
     453             :          * already had been created before the dshash_find() calls.
     454             :          */
     455      157648 :         shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
     456      157648 :         if (!shfound)
     457             :         {
     458      157644 :             shheader = pgstat_init_entry(kind, shhashent);
     459      157644 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     460             : 
     461      157644 :             if (created_entry != NULL)
     462          96 :                 *created_entry = true;
     463             : 
     464      157644 :             return entry_ref;
     465             :         }
     466             :     }
     467             : 
     468     1060136 :     if (!shhashent)
     469             :     {
     470             :         /*
     471             :          * If we're not creating, delete the reference again. In all
     472             :          * likelihood it's just a stats lookup - no point wasting memory for a
     473             :          * shared ref to nothing...
     474             :          */
     475      127348 :         pgstat_release_entry_ref(key, entry_ref, false);
     476             : 
     477      127348 :         return NULL;
     478             :     }
     479             :     else
     480             :     {
     481             :         /*
     482             :          * Can get here either because dshash_find() found a match, or if
     483             :          * dshash_find_or_insert() found a concurrently inserted entry.
     484             :          */
     485             : 
     486      932788 :         if (shhashent->dropped && create)
     487             :         {
     488             :             /*
     489             :              * There are legitimate cases where the old stats entry might not
     490             :              * yet have been dropped by the time it's reused. The most obvious
     491             :              * case are replication slot stats, where a new slot can be
     492             :              * created with the same index just after dropping. But oid
     493             :              * wraparound can lead to other cases as well. We just reset the
     494             :              * stats to their plain state.
     495             :              */
     496          54 :             shheader = pgstat_reinit_entry(kind, shhashent);
     497          54 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     498             : 
     499          54 :             if (created_entry != NULL)
     500           0 :                 *created_entry = true;
     501             : 
     502          54 :             return entry_ref;
     503             :         }
     504      932734 :         else if (shhashent->dropped)
     505             :         {
     506          74 :             dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     507          74 :             pgstat_release_entry_ref(key, entry_ref, false);
     508             : 
     509          74 :             return NULL;
     510             :         }
     511             :         else
     512             :         {
     513      932660 :             shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     514      932660 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     515             : 
     516      932660 :             return entry_ref;
     517             :         }
     518             :     }
     519             : }
     520             : 
     521             : static void
     522     1217780 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
     523             :                          bool discard_pending)
     524             : {
     525     1217780 :     if (entry_ref && entry_ref->pending)
     526             :     {
     527       57752 :         if (discard_pending)
     528       57752 :             pgstat_delete_pending_entry(entry_ref);
     529             :         else
     530           0 :             elog(ERROR, "releasing ref with pending data");
     531             :     }
     532             : 
     533     1217780 :     if (entry_ref && entry_ref->shared_stats)
     534             :     {
     535             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     536             :         Assert(entry_ref->pending == NULL);
     537             : 
     538             :         /*
     539             :          * This can't race with another backend looking up the stats entry and
     540             :          * increasing the refcount because it is not "legal" to create
     541             :          * additional references to dropped entries.
     542             :          */
     543     1090358 :         if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
     544             :         {
     545             :             PgStatShared_HashEntry *shent;
     546             : 
     547             :             /*
     548             :              * We're the last referrer to this entry, try to drop the shared
     549             :              * entry.
     550             :              */
     551             : 
     552             :             /* only dropped entries can reach a 0 refcount */
     553             :             Assert(entry_ref->shared_entry->dropped);
     554             : 
     555        9214 :             shent = dshash_find(pgStatLocal.shared_hash,
     556        9214 :                                 &entry_ref->shared_entry->key,
     557             :                                 true);
     558        9214 :             if (!shent)
     559           0 :                 elog(ERROR, "could not find just referenced shared stats entry");
     560             : 
     561             :             Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
     562             :             Assert(entry_ref->shared_entry == shent);
     563             : 
     564        9214 :             pgstat_free_entry(shent, NULL);
     565             :         }
     566             :     }
     567             : 
     568     1217780 :     if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
     569           0 :         elog(ERROR, "entry ref vanished before deletion");
     570             : 
     571     1217780 :     if (entry_ref)
     572     1217780 :         pfree(entry_ref);
     573     1217780 : }
     574             : 
     575             : bool
     576     1313198 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
     577             : {
     578     1313198 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     579             : 
     580     1313198 :     if (nowait)
     581      485162 :         return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
     582             : 
     583      828036 :     LWLockAcquire(lock, LW_EXCLUSIVE);
     584      828036 :     return true;
     585             : }
     586             : 
     587             : /*
     588             :  * Separate from pgstat_lock_entry() as most callers will need to lock
     589             :  * exclusively.
     590             :  */
     591             : bool
     592       11364 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
     593             : {
     594       11364 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     595             : 
     596       11364 :     if (nowait)
     597           0 :         return LWLockConditionalAcquire(lock, LW_SHARED);
     598             : 
     599       11364 :     LWLockAcquire(lock, LW_SHARED);
     600       11364 :     return true;
     601             : }
     602             : 
     603             : void
     604     1324562 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
     605             : {
     606     1324562 :     LWLockRelease(&entry_ref->shared_stats->lock);
     607     1324562 : }
     608             : 
     609             : /*
     610             :  * Helper function to fetch and lock shared stats.
     611             :  */
     612             : PgStat_EntryRef *
     613       40300 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
     614             :                             bool nowait)
     615             : {
     616             :     PgStat_EntryRef *entry_ref;
     617             : 
     618             :     /* find shared table stats entry corresponding to the local entry */
     619       40300 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
     620             : 
     621             :     /* lock the shared entry to protect the content, skip if failed */
     622       40300 :     if (!pgstat_lock_entry(entry_ref, nowait))
     623           0 :         return NULL;
     624             : 
     625       40300 :     return entry_ref;
     626             : }
     627             : 
     628             : void
     629        3534 : pgstat_request_entry_refs_gc(void)
     630             : {
     631        3534 :     pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
     632        3534 : }
     633             : 
     634             : static bool
     635     2790262 : pgstat_need_entry_refs_gc(void)
     636             : {
     637             :     uint64      curage;
     638             : 
     639     2790262 :     if (!pgStatEntryRefHash)
     640           0 :         return false;
     641             : 
     642             :     /* should have been initialized when creating pgStatEntryRefHash */
     643             :     Assert(pgStatSharedRefAge != 0);
     644             : 
     645     2790262 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     646             : 
     647     2790262 :     return pgStatSharedRefAge != curage;
     648             : }
     649             : 
     650             : static void
     651       11040 : pgstat_gc_entry_refs(void)
     652             : {
     653             :     pgstat_entry_ref_hash_iterator i;
     654             :     PgStat_EntryRefHashEntry *ent;
     655             :     uint64      curage;
     656             : 
     657       11040 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     658             :     Assert(curage != 0);
     659             : 
     660             :     /*
     661             :      * Some entries have been dropped. Invalidate cache pointer to them.
     662             :      */
     663       11040 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     664      828114 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
     665             :     {
     666      817074 :         PgStat_EntryRef *entry_ref = ent->entry_ref;
     667             : 
     668             :         Assert(!entry_ref->shared_stats ||
     669             :                entry_ref->shared_stats->magic == 0xdeadbeef);
     670             : 
     671      817074 :         if (!entry_ref->shared_entry->dropped)
     672      574588 :             continue;
     673             : 
     674             :         /* cannot gc shared ref that has pending data */
     675      242486 :         if (entry_ref->pending != NULL)
     676      233176 :             continue;
     677             : 
     678        9310 :         pgstat_release_entry_ref(ent->key, entry_ref, false);
     679             :     }
     680             : 
     681       11040 :     pgStatSharedRefAge = curage;
     682       11040 : }
     683             : 
     684             : static void
     685       25072 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
     686             :                                    Datum match_data)
     687             : {
     688             :     pgstat_entry_ref_hash_iterator i;
     689             :     PgStat_EntryRefHashEntry *ent;
     690             : 
     691       25072 :     if (pgStatEntryRefHash == NULL)
     692          10 :         return;
     693             : 
     694       25062 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     695             : 
     696     1048510 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
     697             :            != NULL)
     698             :     {
     699             :         Assert(ent->entry_ref != NULL);
     700             : 
     701     1023448 :         if (match && !match(ent, match_data))
     702         914 :             continue;
     703             : 
     704     1022534 :         pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
     705             :     }
     706             : }
     707             : 
     708             : /*
     709             :  * Release all local references to shared stats entries.
     710             :  *
     711             :  * When a process exits it cannot do so while still holding references onto
     712             :  * stats entries, otherwise the shared stats entries could never be freed.
     713             :  */
     714             : static void
     715       28950 : pgstat_release_all_entry_refs(bool discard_pending)
     716             : {
     717       28950 :     if (pgStatEntryRefHash == NULL)
     718        3916 :         return;
     719             : 
     720       25034 :     pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
     721             :     Assert(pgStatEntryRefHash->members == 0);
     722       25034 :     pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
     723       25034 :     pgStatEntryRefHash = NULL;
     724             : }
     725             : 
     726             : static bool
     727         914 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
     728             : {
     729         914 :     Oid         dboid = DatumGetObjectId(match_data);
     730             : 
     731         914 :     return ent->key.dboid == dboid;
     732             : }
     733             : 
     734             : static void
     735          38 : pgstat_release_db_entry_refs(Oid dboid)
     736             : {
     737          38 :     pgstat_release_matching_entry_refs( /* discard pending = */ true,
     738             :                                        match_db,
     739             :                                        ObjectIdGetDatum(dboid));
     740          38 : }
     741             : 
     742             : 
     743             : /* ------------------------------------------------------------
     744             :  * Dropping and resetting of stats entries
     745             :  * ------------------------------------------------------------
     746             :  */
     747             : 
     748             : static void
     749       62600 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
     750             : {
     751             :     dsa_pointer pdsa;
     752             : 
     753             :     /*
     754             :      * Fetch dsa pointer before deleting entry - that way we can free the
     755             :      * memory after releasing the lock.
     756             :      */
     757       62600 :     pdsa = shent->body;
     758             : 
     759       62600 :     if (!hstat)
     760       58698 :         dshash_delete_entry(pgStatLocal.shared_hash, shent);
     761             :     else
     762        3902 :         dshash_delete_current(hstat);
     763             : 
     764       62600 :     dsa_free(pgStatLocal.dsa, pdsa);
     765       62600 : }
     766             : 
     767             : /*
     768             :  * Helper for both pgstat_drop_database_and_contents() and
     769             :  * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
     770             :  * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
     771             :  * case the entry needs to be already locked.
     772             :  */
     773             : static bool
     774       62656 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
     775             :                            dshash_seq_status *hstat)
     776             : {
     777             :     Assert(shent->body != InvalidDsaPointer);
     778             : 
     779             :     /* should already have released local reference */
     780       62656 :     if (pgStatEntryRefHash)
     781             :         Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
     782             : 
     783             :     /*
     784             :      * Signal that the entry is dropped - this will eventually cause other
     785             :      * backends to release their references.
     786             :      */
     787       62656 :     if (shent->dropped)
     788           0 :         elog(ERROR, "can only drop stats once");
     789       62656 :     shent->dropped = true;
     790             : 
     791             :     /* release refcount marking entry as not dropped */
     792       62656 :     if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
     793             :     {
     794       53386 :         pgstat_free_entry(shent, hstat);
     795       53386 :         return true;
     796             :     }
     797             :     else
     798             :     {
     799        9270 :         if (!hstat)
     800        9270 :             dshash_release_lock(pgStatLocal.shared_hash, shent);
     801        9270 :         return false;
     802             :     }
     803             : }
     804             : 
     805             : /*
     806             :  * Drop stats for the database and all the objects inside that database.
     807             :  */
     808             : static void
     809          38 : pgstat_drop_database_and_contents(Oid dboid)
     810             : {
     811             :     dshash_seq_status hstat;
     812             :     PgStatShared_HashEntry *p;
     813          38 :     uint64      not_freed_count = 0;
     814             : 
     815             :     Assert(OidIsValid(dboid));
     816             : 
     817             :     Assert(pgStatLocal.shared_hash != NULL);
     818             : 
     819             :     /*
     820             :      * This backend might very well be the only backend holding a reference to
     821             :      * about-to-be-dropped entries. Ensure that we're not preventing it from
     822             :      * being cleaned up till later.
     823             :      *
     824             :      * Doing this separately from the dshash iteration below avoids having to
     825             :      * do so while holding a partition lock on the shared hashtable.
     826             :      */
     827          38 :     pgstat_release_db_entry_refs(dboid);
     828             : 
     829             :     /* some of the dshash entries are to be removed, take exclusive lock. */
     830          38 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     831       13608 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     832             :     {
     833       13570 :         if (p->dropped)
     834           2 :             continue;
     835             : 
     836       13568 :         if (p->key.dboid != dboid)
     837        9778 :             continue;
     838             : 
     839        3790 :         if (!pgstat_drop_entry_internal(p, &hstat))
     840             :         {
     841             :             /*
     842             :              * Even statistics for a dropped database might currently be
     843             :              * accessed (consider e.g. database stats for pg_stat_database).
     844             :              */
     845           0 :             not_freed_count++;
     846             :         }
     847             :     }
     848          38 :     dshash_seq_term(&hstat);
     849             : 
     850             :     /*
     851             :      * If some of the stats data could not be freed, signal the reference
     852             :      * holders to run garbage collection of their cached pgStatShmLookupCache.
     853             :      */
     854          38 :     if (not_freed_count > 0)
     855           0 :         pgstat_request_entry_refs_gc();
     856          38 : }
     857             : 
     858             : bool
     859       87698 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
     860             : {
     861       87698 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
     862             :     PgStatShared_HashEntry *shent;
     863       87698 :     bool        freed = true;
     864             : 
     865             :     /* delete local reference */
     866       87698 :     if (pgStatEntryRefHash)
     867             :     {
     868             :         PgStat_EntryRefHashEntry *lohashent =
     869       71518 :             pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
     870             : 
     871       71518 :         if (lohashent)
     872       58514 :             pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
     873             :                                      true);
     874             :     }
     875             : 
     876             :     /* mark entry in shared hashtable as deleted, drop if possible */
     877       87698 :     shent = dshash_find(pgStatLocal.shared_hash, &key, true);
     878       87698 :     if (shent)
     879             :     {
     880       58754 :         freed = pgstat_drop_entry_internal(shent, NULL);
     881             : 
     882             :         /*
     883             :          * Database stats contain other stats. Drop those as well when
     884             :          * dropping the database. XXX: Perhaps this should be done in a
     885             :          * slightly more principled way? But not obvious what that'd look
     886             :          * like, and so far this is the only case...
     887             :          */
     888       58754 :         if (key.kind == PGSTAT_KIND_DATABASE)
     889          38 :             pgstat_drop_database_and_contents(key.dboid);
     890             :     }
     891             : 
     892       87698 :     return freed;
     893             : }
     894             : 
     895             : void
     896         386 : pgstat_drop_all_entries(void)
     897             : {
     898             :     dshash_seq_status hstat;
     899             :     PgStatShared_HashEntry *ps;
     900         386 :     uint64      not_freed_count = 0;
     901             : 
     902         386 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     903         498 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
     904             :     {
     905         112 :         if (ps->dropped)
     906           0 :             continue;
     907             : 
     908         112 :         if (!pgstat_drop_entry_internal(ps, &hstat))
     909           0 :             not_freed_count++;
     910             :     }
     911         386 :     dshash_seq_term(&hstat);
     912             : 
     913         386 :     if (not_freed_count > 0)
     914           0 :         pgstat_request_entry_refs_gc();
     915         386 : }
     916             : 
     917             : static void
     918       17028 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
     919             :                            TimestampTz ts)
     920             : {
     921       17028 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     922             : 
     923       17028 :     memset(pgstat_get_entry_data(kind, header), 0,
     924             :            pgstat_get_entry_len(kind));
     925             : 
     926       17028 :     if (kind_info->reset_timestamp_cb)
     927         334 :         kind_info->reset_timestamp_cb(header, ts);
     928       17028 : }
     929             : 
     930             : /*
     931             :  * Reset one variable-numbered stats entry.
     932             :  */
     933             : void
     934         306 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
     935             : {
     936             :     PgStat_EntryRef *entry_ref;
     937             : 
     938             :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     939             : 
     940         306 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
     941         306 :     if (!entry_ref || entry_ref->shared_entry->dropped)
     942           2 :         return;
     943             : 
     944         304 :     (void) pgstat_lock_entry(entry_ref, false);
     945         304 :     shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
     946         304 :     pgstat_unlock_entry(entry_ref);
     947             : }
     948             : 
     949             : /*
     950             :  * Scan through the shared hashtable of stats, resetting statistics if
     951             :  * approved by the provided do_reset() function.
     952             :  */
     953             : void
     954          34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
     955             :                               Datum match_data, TimestampTz ts)
     956             : {
     957             :     dshash_seq_status hstat;
     958             :     PgStatShared_HashEntry *p;
     959             : 
     960             :     /* dshash entry is not modified, take shared lock */
     961          34 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
     962       24872 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     963             :     {
     964             :         PgStatShared_Common *header;
     965             : 
     966       24838 :         if (p->dropped)
     967           2 :             continue;
     968             : 
     969       24836 :         if (!do_reset(p, match_data))
     970        8112 :             continue;
     971             : 
     972       16724 :         header = dsa_get_address(pgStatLocal.dsa, p->body);
     973             : 
     974       16724 :         LWLockAcquire(&header->lock, LW_EXCLUSIVE);
     975             : 
     976       16724 :         shared_stat_reset_contents(p->key.kind, header, ts);
     977             : 
     978       16724 :         LWLockRelease(&header->lock);
     979             :     }
     980          34 :     dshash_seq_term(&hstat);
     981          34 : }
     982             : 
     983             : static bool
     984        2904 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
     985             : {
     986        2904 :     return p->key.kind == DatumGetInt32(match_data);
     987             : }
     988             : 
     989             : void
     990           8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
     991             : {
     992           8 :     pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
     993           8 : }
     994             : 
     995             : static void
     996     2790262 : pgstat_setup_memcxt(void)
     997             : {
     998     2790262 :     if (unlikely(!pgStatSharedRefContext))
     999       25034 :         pgStatSharedRefContext =
    1000       25034 :             AllocSetContextCreate(TopMemoryContext,
    1001             :                                   "PgStat Shared Ref",
    1002             :                                   ALLOCSET_SMALL_SIZES);
    1003     2790262 :     if (unlikely(!pgStatEntryRefHashContext))
    1004       25034 :         pgStatEntryRefHashContext =
    1005       25034 :             AllocSetContextCreate(TopMemoryContext,
    1006             :                                   "PgStat Shared Ref Hash",
    1007             :                                   ALLOCSET_SMALL_SIZES);
    1008     2790262 : }

Generated by: LCOV version 1.14