LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 311 326 95.4 %
Date: 2025-01-18 04:15:08 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_shmem.c
       4             :  *    Storage of stats entries in shared memory
       5             :  *
       6             :  * Copyright (c) 2001-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/activity/pgstat_shmem.c
      10             :  * -------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres.h"
      14             : 
      15             : #include "pgstat.h"
      16             : #include "storage/shmem.h"
      17             : #include "utils/memutils.h"
      18             : #include "utils/pgstat_internal.h"
      19             : 
      20             : 
      21             : #define PGSTAT_ENTRY_REF_HASH_SIZE  128
      22             : 
      23             : /* hash table entry for finding the PgStat_EntryRef for a key */
      24             : typedef struct PgStat_EntryRefHashEntry
      25             : {
      26             :     PgStat_HashKey key;         /* hash key */
      27             :     char        status;         /* for simplehash use */
      28             :     PgStat_EntryRef *entry_ref;
      29             : } PgStat_EntryRefHashEntry;
      30             : 
      31             : 
      32             : /* for references to shared statistics entries */
      33             : #define SH_PREFIX pgstat_entry_ref_hash
      34             : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
      35             : #define SH_KEY_TYPE PgStat_HashKey
      36             : #define SH_KEY key
      37             : #define SH_HASH_KEY(tb, key) \
      38             :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
      39             : #define SH_EQUAL(tb, a, b) \
      40             :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
      41             : #define SH_SCOPE static inline
      42             : #define SH_DEFINE
      43             : #define SH_DECLARE
      44             : #include "lib/simplehash.h"
      45             : 
      46             : 
      47             : static void pgstat_drop_database_and_contents(Oid dboid);
      48             : 
      49             : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
      50             : 
      51             : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
      52             : static bool pgstat_need_entry_refs_gc(void);
      53             : static void pgstat_gc_entry_refs(void);
      54             : static void pgstat_release_all_entry_refs(bool discard_pending);
      55             : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
      56             : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
      57             : 
      58             : static void pgstat_setup_memcxt(void);
      59             : 
      60             : 
      61             : /* parameter for the shared hash */
      62             : static const dshash_parameters dsh_params = {
      63             :     sizeof(PgStat_HashKey),
      64             :     sizeof(PgStatShared_HashEntry),
      65             :     pgstat_cmp_hash_key,
      66             :     pgstat_hash_hash_key,
      67             :     dshash_memcpy,
      68             :     LWTRANCHE_PGSTATS_HASH
      69             : };
      70             : 
      71             : 
      72             : /*
      73             :  * Backend local references to shared stats entries. If there are pending
      74             :  * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
      75             :  * list.
      76             :  *
      77             :  * When a stats entry is dropped each backend needs to release its reference
      78             :  * to it before the memory can be released. To trigger that
      79             :  * pgStatLocal.shmem->gc_request_count is incremented - which each backend
      80             :  * compares to their copy of pgStatSharedRefAge on a regular basis.
      81             :  */
      82             : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
      83             : static int  pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
      84             : 
      85             : /*
      86             :  * Memory contexts containing the pgStatEntryRefHash table and the
      87             :  * pgStatSharedRef entries respectively. Kept separate to make it easier to
      88             :  * track / attribute memory usage.
      89             :  */
      90             : static MemoryContext pgStatSharedRefContext = NULL;
      91             : static MemoryContext pgStatEntryRefHashContext = NULL;
      92             : 
      93             : 
      94             : /* ------------------------------------------------------------
      95             :  * Public functions called from postmaster follow
      96             :  * ------------------------------------------------------------
      97             :  */
      98             : 
      99             : /*
     100             :  * The size of the shared memory allocation for stats stored in the shared
     101             :  * stats hash table. This allocation will be done as part of the main shared
     102             :  * memory, rather than dynamic shared memory, allowing it to be initialized in
     103             :  * postmaster.
     104             :  */
     105             : static Size
     106       11238 : pgstat_dsa_init_size(void)
     107             : {
     108             :     Size        sz;
     109             : 
     110             :     /*
     111             :      * The dshash header / initial buckets array needs to fit into "plain"
     112             :      * shared memory, but it's beneficial to not need dsm segments
     113             :      * immediately. A size of 256kB seems works well and is not
     114             :      * disproportional compared to other constant sized shared memory
     115             :      * allocations. NB: To avoid DSMs further, the user can configure
     116             :      * min_dynamic_shared_memory.
     117             :      */
     118       11238 :     sz = 256 * 1024;
     119             :     Assert(dsa_minimum_size() <= sz);
     120       11238 :     return MAXALIGN(sz);
     121             : }
     122             : 
     123             : /*
     124             :  * Compute shared memory space needed for cumulative statistics
     125             :  */
     126             : Size
     127        5484 : StatsShmemSize(void)
     128             : {
     129             :     Size        sz;
     130             : 
     131        5484 :     sz = MAXALIGN(sizeof(PgStat_ShmemControl));
     132        5484 :     sz = add_size(sz, pgstat_dsa_init_size());
     133             : 
     134             :     /* Add shared memory for all the custom fixed-numbered statistics */
     135      712920 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
     136             :     {
     137      707436 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     138             : 
     139      707436 :         if (!kind_info)
     140      707388 :             continue;
     141          48 :         if (!kind_info->fixed_amount)
     142          24 :             continue;
     143             : 
     144             :         Assert(kind_info->shared_size != 0);
     145             : 
     146          24 :         sz += MAXALIGN(kind_info->shared_size);
     147             :     }
     148             : 
     149        5484 :     return sz;
     150             : }
     151             : 
     152             : /*
     153             :  * Initialize cumulative statistics system during startup
     154             :  */
     155             : void
     156        1918 : StatsShmemInit(void)
     157             : {
     158             :     bool        found;
     159             :     Size        sz;
     160             : 
     161        1918 :     sz = StatsShmemSize();
     162        1918 :     pgStatLocal.shmem = (PgStat_ShmemControl *)
     163        1918 :         ShmemInitStruct("Shared Memory Stats", sz, &found);
     164             : 
     165        1918 :     if (!IsUnderPostmaster)
     166             :     {
     167             :         dsa_area   *dsa;
     168             :         dshash_table *dsh;
     169        1918 :         PgStat_ShmemControl *ctl = pgStatLocal.shmem;
     170        1918 :         char       *p = (char *) ctl;
     171             : 
     172             :         Assert(!found);
     173             : 
     174             :         /* the allocation of pgStatLocal.shmem itself */
     175        1918 :         p += MAXALIGN(sizeof(PgStat_ShmemControl));
     176             : 
     177             :         /*
     178             :          * Create a small dsa allocation in plain shared memory. This is
     179             :          * required because postmaster cannot use dsm segments. It also
     180             :          * provides a small efficiency win.
     181             :          */
     182        1918 :         ctl->raw_dsa_area = p;
     183        1918 :         p += MAXALIGN(pgstat_dsa_init_size());
     184        1918 :         dsa = dsa_create_in_place(ctl->raw_dsa_area,
     185             :                                   pgstat_dsa_init_size(),
     186             :                                   LWTRANCHE_PGSTATS_DSA, 0);
     187        1918 :         dsa_pin(dsa);
     188             : 
     189             :         /*
     190             :          * To ensure dshash is created in "plain" shared memory, temporarily
     191             :          * limit size of dsa to the initial size of the dsa.
     192             :          */
     193        1918 :         dsa_set_size_limit(dsa, pgstat_dsa_init_size());
     194             : 
     195             :         /*
     196             :          * With the limit in place, create the dshash table. XXX: It'd be nice
     197             :          * if there were dshash_create_in_place().
     198             :          */
     199        1918 :         dsh = dshash_create(dsa, &dsh_params, NULL);
     200        1918 :         ctl->hash_handle = dshash_get_hash_table_handle(dsh);
     201             : 
     202             :         /* lift limit set above */
     203        1918 :         dsa_set_size_limit(dsa, -1);
     204             : 
     205             :         /*
     206             :          * Postmaster will never access these again, thus free the local
     207             :          * dsa/dshash references.
     208             :          */
     209        1918 :         dshash_detach(dsh);
     210        1918 :         dsa_detach(dsa);
     211             : 
     212        1918 :         pg_atomic_init_u64(&ctl->gc_request_count, 1);
     213             : 
     214             :         /* initialize fixed-numbered stats */
     215      492926 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     216             :         {
     217      491008 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     218             :             char       *ptr;
     219             : 
     220      491008 :             if (!kind_info || !kind_info->fixed_amount)
     221      479492 :                 continue;
     222             : 
     223       11516 :             if (pgstat_is_kind_builtin(kind))
     224       11508 :                 ptr = ((char *) ctl) + kind_info->shared_ctl_off;
     225             :             else
     226             :             {
     227           8 :                 int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
     228             : 
     229             :                 Assert(kind_info->shared_size != 0);
     230           8 :                 ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
     231           8 :                 ptr = ctl->custom_data[idx];
     232             :             }
     233             : 
     234       11516 :             kind_info->init_shmem_cb(ptr);
     235             :         }
     236             :     }
     237             :     else
     238             :     {
     239             :         Assert(found);
     240             :     }
     241        1918 : }
     242             : 
     243             : void
     244       34724 : pgstat_attach_shmem(void)
     245             : {
     246             :     MemoryContext oldcontext;
     247             : 
     248             :     Assert(pgStatLocal.dsa == NULL);
     249             : 
     250             :     /* stats shared memory persists for the backend lifetime */
     251       34724 :     oldcontext = MemoryContextSwitchTo(TopMemoryContext);
     252             : 
     253       34724 :     pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
     254             :                                           NULL);
     255       34724 :     dsa_pin_mapping(pgStatLocal.dsa);
     256             : 
     257       69448 :     pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
     258       34724 :                                             pgStatLocal.shmem->hash_handle, 0);
     259             : 
     260       34724 :     MemoryContextSwitchTo(oldcontext);
     261       34724 : }
     262             : 
     263             : void
     264       34724 : pgstat_detach_shmem(void)
     265             : {
     266             :     Assert(pgStatLocal.dsa);
     267             : 
     268             :     /* we shouldn't leave references to shared stats */
     269       34724 :     pgstat_release_all_entry_refs(false);
     270             : 
     271       34724 :     dshash_detach(pgStatLocal.shared_hash);
     272       34724 :     pgStatLocal.shared_hash = NULL;
     273             : 
     274       34724 :     dsa_detach(pgStatLocal.dsa);
     275             : 
     276             :     /*
     277             :      * dsa_detach() does not decrement the DSA reference count as no segment
     278             :      * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
     279             :      * be registered.  Hence, release it manually now.
     280             :      */
     281       34724 :     dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
     282             : 
     283       34724 :     pgStatLocal.dsa = NULL;
     284       34724 : }
     285             : 
     286             : 
     287             : /* ------------------------------------------------------------
     288             :  * Maintenance of shared memory stats entries
     289             :  * ------------------------------------------------------------
     290             :  */
     291             : 
     292             : PgStatShared_Common *
     293      534644 : pgstat_init_entry(PgStat_Kind kind,
     294             :                   PgStatShared_HashEntry *shhashent)
     295             : {
     296             :     /* Create new stats entry. */
     297             :     dsa_pointer chunk;
     298             :     PgStatShared_Common *shheader;
     299             : 
     300             :     /*
     301             :      * Initialize refcount to 1, marking it as valid / not dropped. The entry
     302             :      * can't be freed before the initialization because it can't be found as
     303             :      * long as we hold the dshash partition lock. Caller needs to increase
     304             :      * further if a longer lived reference is needed.
     305             :      */
     306      534644 :     pg_atomic_init_u32(&shhashent->refcount, 1);
     307             : 
     308             :     /*
     309             :      * Initialize "generation" to 0, as freshly created.
     310             :      */
     311      534644 :     pg_atomic_init_u32(&shhashent->generation, 0);
     312      534644 :     shhashent->dropped = false;
     313             : 
     314      534644 :     chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
     315      534644 :     shheader = dsa_get_address(pgStatLocal.dsa, chunk);
     316      534644 :     shheader->magic = 0xdeadbeef;
     317             : 
     318             :     /* Link the new entry from the hash entry. */
     319      534644 :     shhashent->body = chunk;
     320             : 
     321      534644 :     LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
     322             : 
     323      534644 :     return shheader;
     324             : }
     325             : 
     326             : static PgStatShared_Common *
     327           4 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
     328             : {
     329             :     PgStatShared_Common *shheader;
     330             : 
     331           4 :     shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     332             : 
     333             :     /* mark as not dropped anymore */
     334           4 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     335             : 
     336             :     /*
     337             :      * Increment "generation", to let any backend with local references know
     338             :      * that what they point to is outdated.
     339             :      */
     340           4 :     pg_atomic_fetch_add_u32(&shhashent->generation, 1);
     341           4 :     shhashent->dropped = false;
     342             : 
     343             :     /* reinitialize content */
     344             :     Assert(shheader->magic == 0xdeadbeef);
     345           4 :     memset(pgstat_get_entry_data(kind, shheader), 0,
     346             :            pgstat_get_entry_len(kind));
     347             : 
     348           4 :     return shheader;
     349             : }
     350             : 
     351             : static void
     352   105877718 : pgstat_setup_shared_refs(void)
     353             : {
     354   105877718 :     if (likely(pgStatEntryRefHash != NULL))
     355   105847510 :         return;
     356             : 
     357       30208 :     pgStatEntryRefHash =
     358       30208 :         pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
     359             :                                      PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
     360       30208 :     pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     361             :     Assert(pgStatSharedRefAge != 0);
     362             : }
     363             : 
     364             : /*
     365             :  * Helper function for pgstat_get_entry_ref().
     366             :  */
     367             : static void
     368     1610066 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
     369             :                          PgStatShared_HashEntry *shhashent,
     370             :                          PgStatShared_Common *shheader)
     371             : {
     372             :     Assert(shheader->magic == 0xdeadbeef);
     373             :     Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
     374             : 
     375     1610066 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     376             : 
     377     1610066 :     dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     378             : 
     379     1610066 :     entry_ref->shared_stats = shheader;
     380     1610066 :     entry_ref->shared_entry = shhashent;
     381     1610066 :     entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
     382     1610066 : }
     383             : 
     384             : /*
     385             :  * Helper function for pgstat_get_entry_ref().
     386             :  */
     387             : static bool
     388   105877718 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
     389             : {
     390             :     bool        found;
     391             :     PgStat_EntryRefHashEntry *cache_entry;
     392             : 
     393             :     /*
     394             :      * We immediately insert a cache entry, because it avoids 1) multiple
     395             :      * hashtable lookups in case of a cache miss 2) having to deal with
     396             :      * out-of-memory errors after incrementing PgStatShared_Common->refcount.
     397             :      */
     398             : 
     399   105877718 :     cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
     400             : 
     401   105877718 :     if (!found || !cache_entry->entry_ref)
     402     1760416 :     {
     403             :         PgStat_EntryRef *entry_ref;
     404             : 
     405     1760416 :         cache_entry->entry_ref = entry_ref =
     406     1760416 :             MemoryContextAlloc(pgStatSharedRefContext,
     407             :                                sizeof(PgStat_EntryRef));
     408     1760416 :         entry_ref->shared_stats = NULL;
     409     1760416 :         entry_ref->shared_entry = NULL;
     410     1760416 :         entry_ref->pending = NULL;
     411             : 
     412     1760416 :         found = false;
     413             :     }
     414   104117302 :     else if (cache_entry->entry_ref->shared_stats == NULL)
     415             :     {
     416             :         Assert(cache_entry->entry_ref->pending == NULL);
     417           0 :         found = false;
     418             :     }
     419             :     else
     420             :     {
     421             :         PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
     422             : 
     423   104117302 :         entry_ref = cache_entry->entry_ref;
     424             :         Assert(entry_ref->shared_entry != NULL);
     425             :         Assert(entry_ref->shared_stats != NULL);
     426             : 
     427             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     428             :         /* should have at least our reference */
     429             :         Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
     430             :     }
     431             : 
     432   105877718 :     *entry_ref_p = cache_entry->entry_ref;
     433   105877718 :     return found;
     434             : }
     435             : 
     436             : /*
     437             :  * Get a shared stats reference. If create is true, the shared stats object is
     438             :  * created if it does not exist.
     439             :  *
     440             :  * When create is true, and created_entry is non-NULL, it'll be set to true
     441             :  * if the entry is newly created, false otherwise.
     442             :  */
     443             : PgStat_EntryRef *
     444   105877718 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
     445             :                      bool *created_entry)
     446             : {
     447             :     PgStat_HashKey key;
     448             :     PgStatShared_HashEntry *shhashent;
     449   105877718 :     PgStatShared_Common *shheader = NULL;
     450             :     PgStat_EntryRef *entry_ref;
     451             : 
     452             :     /* clear padding */
     453   105877718 :     memset(&key, 0, sizeof(struct PgStat_HashKey));
     454             : 
     455   105877718 :     key.kind = kind;
     456   105877718 :     key.dboid = dboid;
     457   105877718 :     key.objid = objid;
     458             : 
     459             :     /*
     460             :      * passing in created_entry only makes sense if we possibly could create
     461             :      * entry.
     462             :      */
     463             :     Assert(create || created_entry == NULL);
     464             :     pgstat_assert_is_up();
     465             :     Assert(pgStatLocal.shared_hash != NULL);
     466             :     Assert(!pgStatLocal.shmem->is_shutdown);
     467             : 
     468   105877718 :     pgstat_setup_memcxt();
     469   105877718 :     pgstat_setup_shared_refs();
     470             : 
     471   105877718 :     if (created_entry != NULL)
     472         214 :         *created_entry = false;
     473             : 
     474             :     /*
     475             :      * Check if other backends dropped stats that could not be deleted because
     476             :      * somebody held references to it. If so, check this backend's references.
     477             :      * This is not expected to happen often. The location of the check is a
     478             :      * bit random, but this is a relatively frequently called path, so better
     479             :      * than most.
     480             :      */
     481   105877718 :     if (pgstat_need_entry_refs_gc())
     482       11668 :         pgstat_gc_entry_refs();
     483             : 
     484             :     /*
     485             :      * First check the lookup cache hashtable in local memory. If we find a
     486             :      * match here we can avoid taking locks / causing contention.
     487             :      */
     488   105877718 :     if (pgstat_get_entry_ref_cached(key, &entry_ref))
     489   104117302 :         return entry_ref;
     490             : 
     491             :     Assert(entry_ref != NULL);
     492             : 
     493             :     /*
     494             :      * Do a lookup in the hash table first - it's quite likely that the entry
     495             :      * already exists, and that way we only need a shared lock.
     496             :      */
     497     1760416 :     shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
     498             : 
     499     1760416 :     if (create && !shhashent)
     500             :     {
     501             :         bool        shfound;
     502             : 
     503             :         /*
     504             :          * It's possible that somebody created the entry since the above
     505             :          * lookup. If so, fall through to the same path as if we'd have if it
     506             :          * already had been created before the dshash_find() calls.
     507             :          */
     508      210830 :         shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
     509      210830 :         if (!shfound)
     510             :         {
     511      210830 :             shheader = pgstat_init_entry(kind, shhashent);
     512      210830 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     513             : 
     514      210830 :             if (created_entry != NULL)
     515          96 :                 *created_entry = true;
     516             : 
     517      210830 :             return entry_ref;
     518             :         }
     519             :     }
     520             : 
     521     1549586 :     if (!shhashent)
     522             :     {
     523             :         /*
     524             :          * If we're not creating, delete the reference again. In all
     525             :          * likelihood it's just a stats lookup - no point wasting memory for a
     526             :          * shared ref to nothing...
     527             :          */
     528      150276 :         pgstat_release_entry_ref(key, entry_ref, false);
     529             : 
     530      150276 :         return NULL;
     531             :     }
     532             :     else
     533             :     {
     534             :         /*
     535             :          * Can get here either because dshash_find() found a match, or if
     536             :          * dshash_find_or_insert() found a concurrently inserted entry.
     537             :          */
     538             : 
     539     1399310 :         if (shhashent->dropped && create)
     540             :         {
     541             :             /*
     542             :              * There are legitimate cases where the old stats entry might not
     543             :              * yet have been dropped by the time it's reused. The most obvious
     544             :              * case are replication slot stats, where a new slot can be
     545             :              * created with the same index just after dropping. But oid
     546             :              * wraparound can lead to other cases as well. We just reset the
     547             :              * stats to their plain state, while incrementing its "generation"
     548             :              * in the shared entry for any remaining local references.
     549             :              */
     550           4 :             shheader = pgstat_reinit_entry(kind, shhashent);
     551           4 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     552             : 
     553           4 :             if (created_entry != NULL)
     554           0 :                 *created_entry = true;
     555             : 
     556           4 :             return entry_ref;
     557             :         }
     558     1399306 :         else if (shhashent->dropped)
     559             :         {
     560          74 :             dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     561          74 :             pgstat_release_entry_ref(key, entry_ref, false);
     562             : 
     563          74 :             return NULL;
     564             :         }
     565             :         else
     566             :         {
     567     1399232 :             shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     568     1399232 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     569             : 
     570     1399232 :             return entry_ref;
     571             :         }
     572             :     }
     573             : }
     574             : 
     575             : static void
     576     1760416 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
     577             :                          bool discard_pending)
     578             : {
     579     1760416 :     if (entry_ref && entry_ref->pending)
     580             :     {
     581       62240 :         if (discard_pending)
     582       62240 :             pgstat_delete_pending_entry(entry_ref);
     583             :         else
     584           0 :             elog(ERROR, "releasing ref with pending data");
     585             :     }
     586             : 
     587     1760416 :     if (entry_ref && entry_ref->shared_stats)
     588             :     {
     589             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     590             :         Assert(entry_ref->pending == NULL);
     591             : 
     592             :         /*
     593             :          * This can't race with another backend looking up the stats entry and
     594             :          * increasing the refcount because it is not "legal" to create
     595             :          * additional references to dropped entries.
     596             :          */
     597     1610066 :         if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
     598             :         {
     599             :             PgStatShared_HashEntry *shent;
     600             : 
     601             :             /*
     602             :              * We're the last referrer to this entry, try to drop the shared
     603             :              * entry.
     604             :              */
     605             : 
     606             :             /* only dropped entries can reach a 0 refcount */
     607             :             Assert(entry_ref->shared_entry->dropped);
     608             : 
     609        9352 :             shent = dshash_find(pgStatLocal.shared_hash,
     610        9352 :                                 &entry_ref->shared_entry->key,
     611             :                                 true);
     612        9352 :             if (!shent)
     613           0 :                 elog(ERROR, "could not find just referenced shared stats entry");
     614             : 
     615             :             /*
     616             :              * This entry may have been reinitialized while trying to release
     617             :              * it, so double-check that it has not been reused while holding a
     618             :              * lock on its shared entry.
     619             :              */
     620        9352 :             if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
     621        9352 :                 entry_ref->generation)
     622             :             {
     623             :                 /* Same "generation", so we're OK with the removal */
     624             :                 Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
     625             :                 Assert(entry_ref->shared_entry == shent);
     626        9352 :                 pgstat_free_entry(shent, NULL);
     627             :             }
     628             :             else
     629             :             {
     630             :                 /*
     631             :                  * Shared stats entry has been reinitialized, so do not drop
     632             :                  * its shared entry, only release its lock.
     633             :                  */
     634           0 :                 dshash_release_lock(pgStatLocal.shared_hash, shent);
     635             :             }
     636             :         }
     637             :     }
     638             : 
     639     1760416 :     if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
     640           0 :         elog(ERROR, "entry ref vanished before deletion");
     641             : 
     642     1760416 :     if (entry_ref)
     643     1760416 :         pfree(entry_ref);
     644     1760416 : }
     645             : 
     646             : bool
     647     1927698 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
     648             : {
     649     1927698 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     650             : 
     651     1927698 :     if (nowait)
     652      592050 :         return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
     653             : 
     654     1335648 :     LWLockAcquire(lock, LW_EXCLUSIVE);
     655     1335648 :     return true;
     656             : }
     657             : 
     658             : /*
     659             :  * Separate from pgstat_lock_entry() as most callers will need to lock
     660             :  * exclusively.
     661             :  */
     662             : bool
     663      354756 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
     664             : {
     665      354756 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     666             : 
     667      354756 :     if (nowait)
     668           0 :         return LWLockConditionalAcquire(lock, LW_SHARED);
     669             : 
     670      354756 :     LWLockAcquire(lock, LW_SHARED);
     671      354756 :     return true;
     672             : }
     673             : 
     674             : void
     675     2282454 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
     676             : {
     677     2282454 :     LWLockRelease(&entry_ref->shared_stats->lock);
     678     2282454 : }
     679             : 
     680             : /*
     681             :  * Helper function to fetch and lock shared stats.
     682             :  */
     683             : PgStat_EntryRef *
     684      142894 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
     685             :                             bool nowait)
     686             : {
     687             :     PgStat_EntryRef *entry_ref;
     688             : 
     689             :     /* find shared table stats entry corresponding to the local entry */
     690      142894 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
     691             : 
     692             :     /* lock the shared entry to protect the content, skip if failed */
     693      142894 :     if (!pgstat_lock_entry(entry_ref, nowait))
     694           0 :         return NULL;
     695             : 
     696      142894 :     return entry_ref;
     697             : }
     698             : 
     699             : void
     700        3688 : pgstat_request_entry_refs_gc(void)
     701             : {
     702        3688 :     pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
     703        3688 : }
     704             : 
     705             : static bool
     706   105877718 : pgstat_need_entry_refs_gc(void)
     707             : {
     708             :     uint64      curage;
     709             : 
     710   105877718 :     if (!pgStatEntryRefHash)
     711           0 :         return false;
     712             : 
     713             :     /* should have been initialized when creating pgStatEntryRefHash */
     714             :     Assert(pgStatSharedRefAge != 0);
     715             : 
     716   105877718 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     717             : 
     718   105877718 :     return pgStatSharedRefAge != curage;
     719             : }
     720             : 
     721             : static void
     722       11668 : pgstat_gc_entry_refs(void)
     723             : {
     724             :     pgstat_entry_ref_hash_iterator i;
     725             :     PgStat_EntryRefHashEntry *ent;
     726             :     uint64      curage;
     727             : 
     728       11668 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     729             :     Assert(curage != 0);
     730             : 
     731             :     /*
     732             :      * Some entries have been dropped or reinitialized.  Invalidate cache
     733             :      * pointer to them.
     734             :      */
     735       11668 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     736      870674 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
     737             :     {
     738      859006 :         PgStat_EntryRef *entry_ref = ent->entry_ref;
     739             : 
     740             :         Assert(!entry_ref->shared_stats ||
     741             :                entry_ref->shared_stats->magic == 0xdeadbeef);
     742             : 
     743             :         /*
     744             :          * "generation" checks for the case of entries being reinitialized,
     745             :          * and "dropped" for the case where these are..  dropped.
     746             :          */
     747      859006 :         if (!entry_ref->shared_entry->dropped &&
     748      621702 :             pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
     749      621702 :             entry_ref->generation)
     750      621698 :             continue;
     751             : 
     752             :         /* cannot gc shared ref that has pending data */
     753      237308 :         if (entry_ref->pending != NULL)
     754      228226 :             continue;
     755             : 
     756        9082 :         pgstat_release_entry_ref(ent->key, entry_ref, false);
     757             :     }
     758             : 
     759       11668 :     pgStatSharedRefAge = curage;
     760       11668 : }
     761             : 
     762             : static void
     763       30254 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
     764             :                                    Datum match_data)
     765             : {
     766             :     pgstat_entry_ref_hash_iterator i;
     767             :     PgStat_EntryRefHashEntry *ent;
     768             : 
     769       30254 :     if (pgStatEntryRefHash == NULL)
     770          12 :         return;
     771             : 
     772       30242 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     773             : 
     774     1539754 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
     775             :            != NULL)
     776             :     {
     777             :         Assert(ent->entry_ref != NULL);
     778             : 
     779     1509512 :         if (match && !match(ent, match_data))
     780        1106 :             continue;
     781             : 
     782     1508406 :         pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
     783             :     }
     784             : }
     785             : 
     786             : /*
     787             :  * Release all local references to shared stats entries.
     788             :  *
     789             :  * When a process exits it cannot do so while still holding references onto
     790             :  * stats entries, otherwise the shared stats entries could never be freed.
     791             :  */
     792             : static void
     793       34724 : pgstat_release_all_entry_refs(bool discard_pending)
     794             : {
     795       34724 :     if (pgStatEntryRefHash == NULL)
     796        4516 :         return;
     797             : 
     798       30208 :     pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
     799             :     Assert(pgStatEntryRefHash->members == 0);
     800       30208 :     pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
     801       30208 :     pgStatEntryRefHash = NULL;
     802             : }
     803             : 
     804             : static bool
     805        1106 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
     806             : {
     807        1106 :     Oid         dboid = DatumGetObjectId(match_data);
     808             : 
     809        1106 :     return ent->key.dboid == dboid;
     810             : }
     811             : 
     812             : static void
     813          46 : pgstat_release_db_entry_refs(Oid dboid)
     814             : {
     815          46 :     pgstat_release_matching_entry_refs( /* discard pending = */ true,
     816             :                                        match_db,
     817             :                                        ObjectIdGetDatum(dboid));
     818          46 : }
     819             : 
     820             : 
     821             : /* ------------------------------------------------------------
     822             :  * Dropping and resetting of stats entries
     823             :  * ------------------------------------------------------------
     824             :  */
     825             : 
     826             : static void
     827       94756 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
     828             : {
     829             :     dsa_pointer pdsa;
     830             : 
     831             :     /*
     832             :      * Fetch dsa pointer before deleting entry - that way we can free the
     833             :      * memory after releasing the lock.
     834             :      */
     835       94756 :     pdsa = shent->body;
     836             : 
     837       94756 :     if (!hstat)
     838       92822 :         dshash_delete_entry(pgStatLocal.shared_hash, shent);
     839             :     else
     840        1934 :         dshash_delete_current(hstat);
     841             : 
     842       94756 :     dsa_free(pgStatLocal.dsa, pdsa);
     843       94756 : }
     844             : 
     845             : /*
     846             :  * Helper for both pgstat_drop_database_and_contents() and
     847             :  * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
     848             :  * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
     849             :  * case the entry needs to be already locked.
     850             :  */
     851             : static bool
     852       94762 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
     853             :                            dshash_seq_status *hstat)
     854             : {
     855             :     Assert(shent->body != InvalidDsaPointer);
     856             : 
     857             :     /* should already have released local reference */
     858       94762 :     if (pgStatEntryRefHash)
     859             :         Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
     860             : 
     861             :     /*
     862             :      * Signal that the entry is dropped - this will eventually cause other
     863             :      * backends to release their references.
     864             :      */
     865       94762 :     if (shent->dropped)
     866           0 :         elog(ERROR,
     867             :              "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%llu refcount=%u",
     868             :              pgstat_get_kind_info(shent->key.kind)->name,
     869             :              shent->key.dboid,
     870             :              (unsigned long long) shent->key.objid,
     871             :              pg_atomic_read_u32(&shent->refcount));
     872       94762 :     shent->dropped = true;
     873             : 
     874             :     /* release refcount marking entry as not dropped */
     875       94762 :     if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
     876             :     {
     877       85404 :         pgstat_free_entry(shent, hstat);
     878       85404 :         return true;
     879             :     }
     880             :     else
     881             :     {
     882        9358 :         if (!hstat)
     883        9358 :             dshash_release_lock(pgStatLocal.shared_hash, shent);
     884        9358 :         return false;
     885             :     }
     886             : }
     887             : 
     888             : /*
     889             :  * Drop stats for the database and all the objects inside that database.
     890             :  */
     891             : static void
     892          46 : pgstat_drop_database_and_contents(Oid dboid)
     893             : {
     894             :     dshash_seq_status hstat;
     895             :     PgStatShared_HashEntry *p;
     896          46 :     uint64      not_freed_count = 0;
     897             : 
     898             :     Assert(OidIsValid(dboid));
     899             : 
     900             :     Assert(pgStatLocal.shared_hash != NULL);
     901             : 
     902             :     /*
     903             :      * This backend might very well be the only backend holding a reference to
     904             :      * about-to-be-dropped entries. Ensure that we're not preventing it from
     905             :      * being cleaned up till later.
     906             :      *
     907             :      * Doing this separately from the dshash iteration below avoids having to
     908             :      * do so while holding a partition lock on the shared hashtable.
     909             :      */
     910          46 :     pgstat_release_db_entry_refs(dboid);
     911             : 
     912             :     /* some of the dshash entries are to be removed, take exclusive lock. */
     913          46 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     914       12180 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     915             :     {
     916       12134 :         if (p->dropped)
     917           2 :             continue;
     918             : 
     919       12132 :         if (p->key.dboid != dboid)
     920       10310 :             continue;
     921             : 
     922        1822 :         if (!pgstat_drop_entry_internal(p, &hstat))
     923             :         {
     924             :             /*
     925             :              * Even statistics for a dropped database might currently be
     926             :              * accessed (consider e.g. database stats for pg_stat_database).
     927             :              */
     928           0 :             not_freed_count++;
     929             :         }
     930             :     }
     931          46 :     dshash_seq_term(&hstat);
     932             : 
     933             :     /*
     934             :      * If some of the stats data could not be freed, signal the reference
     935             :      * holders to run garbage collection of their cached pgStatLocal.shmem.
     936             :      */
     937          46 :     if (not_freed_count > 0)
     938           0 :         pgstat_request_entry_refs_gc();
     939          46 : }
     940             : 
     941             : /*
     942             :  * Drop a single stats entry.
     943             :  *
     944             :  * This routine returns false if the stats entry of the dropped object could
     945             :  * not be freed, true otherwise.
     946             :  *
     947             :  * The callers of this function should call pgstat_request_entry_refs_gc()
     948             :  * if the stats entry could not be freed, to ensure that this entry's memory
     949             :  * can be reclaimed later by a different backend calling
     950             :  * pgstat_gc_entry_refs().
     951             :  */
     952             : bool
     953      132300 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
     954             : {
     955             :     PgStat_HashKey key;
     956             :     PgStatShared_HashEntry *shent;
     957      132300 :     bool        freed = true;
     958             : 
     959             :     /* clear padding */
     960      132300 :     memset(&key, 0, sizeof(struct PgStat_HashKey));
     961             : 
     962      132300 :     key.kind = kind;
     963      132300 :     key.dboid = dboid;
     964      132300 :     key.objid = objid;
     965             : 
     966             :     /* delete local reference */
     967      132300 :     if (pgStatEntryRefHash)
     968             :     {
     969             :         PgStat_EntryRefHashEntry *lohashent =
     970      110158 :             pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
     971             : 
     972      110158 :         if (lohashent)
     973       92578 :             pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
     974             :                                      true);
     975             :     }
     976             : 
     977             :     /* mark entry in shared hashtable as deleted, drop if possible */
     978      132300 :     shent = dshash_find(pgStatLocal.shared_hash, &key, true);
     979      132300 :     if (shent)
     980             :     {
     981       92828 :         freed = pgstat_drop_entry_internal(shent, NULL);
     982             : 
     983             :         /*
     984             :          * Database stats contain other stats. Drop those as well when
     985             :          * dropping the database. XXX: Perhaps this should be done in a
     986             :          * slightly more principled way? But not obvious what that'd look
     987             :          * like, and so far this is the only case...
     988             :          */
     989       92828 :         if (key.kind == PGSTAT_KIND_DATABASE)
     990          46 :             pgstat_drop_database_and_contents(key.dboid);
     991             :     }
     992             : 
     993      132300 :     return freed;
     994             : }
     995             : 
     996             : void
     997         458 : pgstat_drop_all_entries(void)
     998             : {
     999             :     dshash_seq_status hstat;
    1000             :     PgStatShared_HashEntry *ps;
    1001         458 :     uint64      not_freed_count = 0;
    1002             : 
    1003         458 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
    1004         570 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
    1005             :     {
    1006         112 :         if (ps->dropped)
    1007           0 :             continue;
    1008             : 
    1009         112 :         if (!pgstat_drop_entry_internal(ps, &hstat))
    1010           0 :             not_freed_count++;
    1011             :     }
    1012         458 :     dshash_seq_term(&hstat);
    1013             : 
    1014         458 :     if (not_freed_count > 0)
    1015           0 :         pgstat_request_entry_refs_gc();
    1016         458 : }
    1017             : 
    1018             : static void
    1019       16984 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
    1020             :                            TimestampTz ts)
    1021             : {
    1022       16984 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1023             : 
    1024       16984 :     memset(pgstat_get_entry_data(kind, header), 0,
    1025             :            pgstat_get_entry_len(kind));
    1026             : 
    1027       16984 :     if (kind_info->reset_timestamp_cb)
    1028         368 :         kind_info->reset_timestamp_cb(header, ts);
    1029       16984 : }
    1030             : 
    1031             : /*
    1032             :  * Reset one variable-numbered stats entry.
    1033             :  */
    1034             : void
    1035         340 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
    1036             : {
    1037             :     PgStat_EntryRef *entry_ref;
    1038             : 
    1039             :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
    1040             : 
    1041         340 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1042         340 :     if (!entry_ref || entry_ref->shared_entry->dropped)
    1043           2 :         return;
    1044             : 
    1045         338 :     (void) pgstat_lock_entry(entry_ref, false);
    1046         338 :     shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
    1047         338 :     pgstat_unlock_entry(entry_ref);
    1048             : }
    1049             : 
    1050             : /*
    1051             :  * Scan through the shared hashtable of stats, resetting statistics if
    1052             :  * approved by the provided do_reset() function.
    1053             :  */
    1054             : void
    1055          34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
    1056             :                               Datum match_data, TimestampTz ts)
    1057             : {
    1058             :     dshash_seq_status hstat;
    1059             :     PgStatShared_HashEntry *p;
    1060             : 
    1061             :     /* dshash entry is not modified, take shared lock */
    1062          34 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1063       25046 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1064             :     {
    1065             :         PgStatShared_Common *header;
    1066             : 
    1067       25012 :         if (p->dropped)
    1068           2 :             continue;
    1069             : 
    1070       25010 :         if (!do_reset(p, match_data))
    1071        8364 :             continue;
    1072             : 
    1073       16646 :         header = dsa_get_address(pgStatLocal.dsa, p->body);
    1074             : 
    1075       16646 :         LWLockAcquire(&header->lock, LW_EXCLUSIVE);
    1076             : 
    1077       16646 :         shared_stat_reset_contents(p->key.kind, header, ts);
    1078             : 
    1079       16646 :         LWLockRelease(&header->lock);
    1080             :     }
    1081          34 :     dshash_seq_term(&hstat);
    1082          34 : }
    1083             : 
    1084             : static bool
    1085        2960 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
    1086             : {
    1087        2960 :     return p->key.kind == DatumGetInt32(match_data);
    1088             : }
    1089             : 
    1090             : void
    1091           8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
    1092             : {
    1093           8 :     pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
    1094           8 : }
    1095             : 
    1096             : static void
    1097   105877718 : pgstat_setup_memcxt(void)
    1098             : {
    1099   105877718 :     if (unlikely(!pgStatSharedRefContext))
    1100       30208 :         pgStatSharedRefContext =
    1101       30208 :             AllocSetContextCreate(TopMemoryContext,
    1102             :                                   "PgStat Shared Ref",
    1103             :                                   ALLOCSET_SMALL_SIZES);
    1104   105877718 :     if (unlikely(!pgStatEntryRefHashContext))
    1105       30208 :         pgStatEntryRefHashContext =
    1106       30208 :             AllocSetContextCreate(TopMemoryContext,
    1107             :                                   "PgStat Shared Ref Hash",
    1108             :                                   ALLOCSET_SMALL_SIZES);
    1109   105877718 : }

Generated by: LCOV version 1.14