LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 292 312 93.6 %
Date: 2024-10-10 04:14:55 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -------------------------------------------------------------------------
       2             :  *
       3             :  * pgstat_shmem.c
       4             :  *    Storage of stats entries in shared memory
       5             :  *
       6             :  * Copyright (c) 2001-2024, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/utils/activity/pgstat_shmem.c
      10             :  * -------------------------------------------------------------------------
      11             :  */
      12             : 
      13             : #include "postgres.h"
      14             : 
      15             : #include "pgstat.h"
      16             : #include "storage/shmem.h"
      17             : #include "utils/memutils.h"
      18             : #include "utils/pgstat_internal.h"
      19             : 
      20             : 
      21             : #define PGSTAT_ENTRY_REF_HASH_SIZE  128
      22             : 
      23             : /* hash table entry for finding the PgStat_EntryRef for a key */
      24             : typedef struct PgStat_EntryRefHashEntry
      25             : {
      26             :     PgStat_HashKey key;         /* hash key */
      27             :     char        status;         /* for simplehash use */
      28             :     PgStat_EntryRef *entry_ref;
      29             : } PgStat_EntryRefHashEntry;
      30             : 
      31             : 
      32             : /* for references to shared statistics entries */
      33             : #define SH_PREFIX pgstat_entry_ref_hash
      34             : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
      35             : #define SH_KEY_TYPE PgStat_HashKey
      36             : #define SH_KEY key
      37             : #define SH_HASH_KEY(tb, key) \
      38             :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
      39             : #define SH_EQUAL(tb, a, b) \
      40             :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
      41             : #define SH_SCOPE static inline
      42             : #define SH_DEFINE
      43             : #define SH_DECLARE
      44             : #include "lib/simplehash.h"
      45             : 
      46             : 
      47             : static void pgstat_drop_database_and_contents(Oid dboid);
      48             : 
      49             : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
      50             : 
      51             : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
      52             : static bool pgstat_need_entry_refs_gc(void);
      53             : static void pgstat_gc_entry_refs(void);
      54             : static void pgstat_release_all_entry_refs(bool discard_pending);
      55             : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
      56             : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
      57             : 
      58             : static void pgstat_setup_memcxt(void);
      59             : 
      60             : 
      61             : /* parameter for the shared hash */
      62             : static const dshash_parameters dsh_params = {
      63             :     sizeof(PgStat_HashKey),
      64             :     sizeof(PgStatShared_HashEntry),
      65             :     pgstat_cmp_hash_key,
      66             :     pgstat_hash_hash_key,
      67             :     dshash_memcpy,
      68             :     LWTRANCHE_PGSTATS_HASH
      69             : };
      70             : 
      71             : 
      72             : /*
      73             :  * Backend local references to shared stats entries. If there are pending
      74             :  * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
      75             :  * list.
      76             :  *
      77             :  * When a stats entry is dropped each backend needs to release its reference
      78             :  * to it before the memory can be released. To trigger that
      79             :  * pgStatLocal.shmem->gc_request_count is incremented - which each backend
      80             :  * compares to their copy of pgStatSharedRefAge on a regular basis.
      81             :  */
      82             : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
      83             : static int  pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
      84             : 
      85             : /*
      86             :  * Memory contexts containing the pgStatEntryRefHash table and the
      87             :  * pgStatSharedRef entries respectively. Kept separate to make it easier to
      88             :  * track / attribute memory usage.
      89             :  */
      90             : static MemoryContext pgStatSharedRefContext = NULL;
      91             : static MemoryContext pgStatEntryRefHashContext = NULL;
      92             : 
      93             : 
      94             : /* ------------------------------------------------------------
      95             :  * Public functions called from postmaster follow
      96             :  * ------------------------------------------------------------
      97             :  */
      98             : 
      99             : /*
     100             :  * The size of the shared memory allocation for stats stored in the shared
     101             :  * stats hash table. This allocation will be done as part of the main shared
     102             :  * memory, rather than dynamic shared memory, allowing it to be initialized in
     103             :  * postmaster.
     104             :  */
     105             : static Size
     106       10724 : pgstat_dsa_init_size(void)
     107             : {
     108             :     Size        sz;
     109             : 
     110             :     /*
     111             :      * The dshash header / initial buckets array needs to fit into "plain"
     112             :      * shared memory, but it's beneficial to not need dsm segments
     113             :      * immediately. A size of 256kB seems works well and is not
     114             :      * disproportional compared to other constant sized shared memory
     115             :      * allocations. NB: To avoid DSMs further, the user can configure
     116             :      * min_dynamic_shared_memory.
     117             :      */
     118       10724 :     sz = 256 * 1024;
     119             :     Assert(dsa_minimum_size() <= sz);
     120       10724 :     return MAXALIGN(sz);
     121             : }
     122             : 
     123             : /*
     124             :  * Compute shared memory space needed for cumulative statistics
     125             :  */
     126             : Size
     127        5240 : StatsShmemSize(void)
     128             : {
     129             :     Size        sz;
     130             : 
     131        5240 :     sz = MAXALIGN(sizeof(PgStat_ShmemControl));
     132        5240 :     sz = add_size(sz, pgstat_dsa_init_size());
     133             : 
     134             :     /* Add shared memory for all the custom fixed-numbered statistics */
     135      681200 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
     136             :     {
     137      675960 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     138             : 
     139      675960 :         if (!kind_info)
     140      675960 :             continue;
     141           0 :         if (!kind_info->fixed_amount)
     142           0 :             continue;
     143             : 
     144             :         Assert(kind_info->shared_size != 0);
     145             : 
     146           0 :         sz += MAXALIGN(kind_info->shared_size);
     147             :     }
     148             : 
     149        5240 :     return sz;
     150             : }
     151             : 
     152             : /*
     153             :  * Initialize cumulative statistics system during startup
     154             :  */
     155             : void
     156        1828 : StatsShmemInit(void)
     157             : {
     158             :     bool        found;
     159             :     Size        sz;
     160             : 
     161        1828 :     sz = StatsShmemSize();
     162        1828 :     pgStatLocal.shmem = (PgStat_ShmemControl *)
     163        1828 :         ShmemInitStruct("Shared Memory Stats", sz, &found);
     164             : 
     165        1828 :     if (!IsUnderPostmaster)
     166             :     {
     167             :         dsa_area   *dsa;
     168             :         dshash_table *dsh;
     169        1828 :         PgStat_ShmemControl *ctl = pgStatLocal.shmem;
     170        1828 :         char       *p = (char *) ctl;
     171             : 
     172             :         Assert(!found);
     173             : 
     174             :         /* the allocation of pgStatLocal.shmem itself */
     175        1828 :         p += MAXALIGN(sizeof(PgStat_ShmemControl));
     176             : 
     177             :         /*
     178             :          * Create a small dsa allocation in plain shared memory. This is
     179             :          * required because postmaster cannot use dsm segments. It also
     180             :          * provides a small efficiency win.
     181             :          */
     182        1828 :         ctl->raw_dsa_area = p;
     183        1828 :         p += MAXALIGN(pgstat_dsa_init_size());
     184        1828 :         dsa = dsa_create_in_place(ctl->raw_dsa_area,
     185             :                                   pgstat_dsa_init_size(),
     186             :                                   LWTRANCHE_PGSTATS_DSA, 0);
     187        1828 :         dsa_pin(dsa);
     188             : 
     189             :         /*
     190             :          * To ensure dshash is created in "plain" shared memory, temporarily
     191             :          * limit size of dsa to the initial size of the dsa.
     192             :          */
     193        1828 :         dsa_set_size_limit(dsa, pgstat_dsa_init_size());
     194             : 
     195             :         /*
     196             :          * With the limit in place, create the dshash table. XXX: It'd be nice
     197             :          * if there were dshash_create_in_place().
     198             :          */
     199        1828 :         dsh = dshash_create(dsa, &dsh_params, NULL);
     200        1828 :         ctl->hash_handle = dshash_get_hash_table_handle(dsh);
     201             : 
     202             :         /* lift limit set above */
     203        1828 :         dsa_set_size_limit(dsa, -1);
     204             : 
     205             :         /*
     206             :          * Postmaster will never access these again, thus free the local
     207             :          * dsa/dshash references.
     208             :          */
     209        1828 :         dshash_detach(dsh);
     210        1828 :         dsa_detach(dsa);
     211             : 
     212        1828 :         pg_atomic_init_u64(&ctl->gc_request_count, 1);
     213             : 
     214             :         /* initialize fixed-numbered stats */
     215      469796 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     216             :         {
     217      467968 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     218             :             char       *ptr;
     219             : 
     220      467968 :             if (!kind_info || !kind_info->fixed_amount)
     221      457000 :                 continue;
     222             : 
     223       10968 :             if (pgstat_is_kind_builtin(kind))
     224       10968 :                 ptr = ((char *) ctl) + kind_info->shared_ctl_off;
     225             :             else
     226             :             {
     227           0 :                 int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
     228             : 
     229             :                 Assert(kind_info->shared_size != 0);
     230           0 :                 ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
     231           0 :                 ptr = ctl->custom_data[idx];
     232             :             }
     233             : 
     234       10968 :             kind_info->init_shmem_cb(ptr);
     235             :         }
     236             :     }
     237             :     else
     238             :     {
     239             :         Assert(found);
     240             :     }
     241        1828 : }
     242             : 
     243             : void
     244       33572 : pgstat_attach_shmem(void)
     245             : {
     246             :     MemoryContext oldcontext;
     247             : 
     248             :     Assert(pgStatLocal.dsa == NULL);
     249             : 
     250             :     /* stats shared memory persists for the backend lifetime */
     251       33572 :     oldcontext = MemoryContextSwitchTo(TopMemoryContext);
     252             : 
     253       33572 :     pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
     254             :                                           NULL);
     255       33572 :     dsa_pin_mapping(pgStatLocal.dsa);
     256             : 
     257       67144 :     pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
     258       33572 :                                             pgStatLocal.shmem->hash_handle, 0);
     259             : 
     260       33572 :     MemoryContextSwitchTo(oldcontext);
     261       33572 : }
     262             : 
     263             : void
     264       33572 : pgstat_detach_shmem(void)
     265             : {
     266             :     Assert(pgStatLocal.dsa);
     267             : 
     268             :     /* we shouldn't leave references to shared stats */
     269       33572 :     pgstat_release_all_entry_refs(false);
     270             : 
     271       33572 :     dshash_detach(pgStatLocal.shared_hash);
     272       33572 :     pgStatLocal.shared_hash = NULL;
     273             : 
     274       33572 :     dsa_detach(pgStatLocal.dsa);
     275             : 
     276             :     /*
     277             :      * dsa_detach() does not decrement the DSA reference count as no segment
     278             :      * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
     279             :      * be registered.  Hence, release it manually now.
     280             :      */
     281       33572 :     dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
     282             : 
     283       33572 :     pgStatLocal.dsa = NULL;
     284       33572 : }
     285             : 
     286             : 
     287             : /* ------------------------------------------------------------
     288             :  * Maintenance of shared memory stats entries
     289             :  * ------------------------------------------------------------
     290             :  */
     291             : 
     292             : PgStatShared_Common *
     293      481990 : pgstat_init_entry(PgStat_Kind kind,
     294             :                   PgStatShared_HashEntry *shhashent)
     295             : {
     296             :     /* Create new stats entry. */
     297             :     dsa_pointer chunk;
     298             :     PgStatShared_Common *shheader;
     299             : 
     300             :     /*
     301             :      * Initialize refcount to 1, marking it as valid / not dropped. The entry
     302             :      * can't be freed before the initialization because it can't be found as
     303             :      * long as we hold the dshash partition lock. Caller needs to increase
     304             :      * further if a longer lived reference is needed.
     305             :      */
     306      481990 :     pg_atomic_init_u32(&shhashent->refcount, 1);
     307      481990 :     shhashent->dropped = false;
     308             : 
     309      481990 :     chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
     310      481990 :     shheader = dsa_get_address(pgStatLocal.dsa, chunk);
     311      481990 :     shheader->magic = 0xdeadbeef;
     312             : 
     313             :     /* Link the new entry from the hash entry. */
     314      481990 :     shhashent->body = chunk;
     315             : 
     316      481990 :     LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
     317             : 
     318      481990 :     return shheader;
     319             : }
     320             : 
     321             : static PgStatShared_Common *
     322          54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
     323             : {
     324             :     PgStatShared_Common *shheader;
     325             : 
     326          54 :     shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     327             : 
     328             :     /* mark as not dropped anymore */
     329          54 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     330          54 :     shhashent->dropped = false;
     331             : 
     332             :     /* reinitialize content */
     333             :     Assert(shheader->magic == 0xdeadbeef);
     334          54 :     memset(pgstat_get_entry_data(kind, shheader), 0,
     335             :            pgstat_get_entry_len(kind));
     336             : 
     337          54 :     return shheader;
     338             : }
     339             : 
     340             : static void
     341     3792410 : pgstat_setup_shared_refs(void)
     342             : {
     343     3792410 :     if (likely(pgStatEntryRefHash != NULL))
     344     3763210 :         return;
     345             : 
     346       29200 :     pgStatEntryRefHash =
     347       29200 :         pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
     348             :                                      PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
     349       29200 :     pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     350             :     Assert(pgStatSharedRefAge != 0);
     351             : }
     352             : 
     353             : /*
     354             :  * Helper function for pgstat_get_entry_ref().
     355             :  */
     356             : static void
     357     1544664 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
     358             :                          PgStatShared_HashEntry *shhashent,
     359             :                          PgStatShared_Common *shheader)
     360             : {
     361             :     Assert(shheader->magic == 0xdeadbeef);
     362             :     Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
     363             : 
     364     1544664 :     pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
     365             : 
     366     1544664 :     dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     367             : 
     368     1544664 :     entry_ref->shared_stats = shheader;
     369     1544664 :     entry_ref->shared_entry = shhashent;
     370     1544664 : }
     371             : 
     372             : /*
     373             :  * Helper function for pgstat_get_entry_ref().
     374             :  */
     375             : static bool
     376     3792410 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
     377             : {
     378             :     bool        found;
     379             :     PgStat_EntryRefHashEntry *cache_entry;
     380             : 
     381             :     /*
     382             :      * We immediately insert a cache entry, because it avoids 1) multiple
     383             :      * hashtable lookups in case of a cache miss 2) having to deal with
     384             :      * out-of-memory errors after incrementing PgStatShared_Common->refcount.
     385             :      */
     386             : 
     387     3792410 :     cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
     388             : 
     389     3792410 :     if (!found || !cache_entry->entry_ref)
     390     1686078 :     {
     391             :         PgStat_EntryRef *entry_ref;
     392             : 
     393     1686078 :         cache_entry->entry_ref = entry_ref =
     394     1686078 :             MemoryContextAlloc(pgStatSharedRefContext,
     395             :                                sizeof(PgStat_EntryRef));
     396     1686078 :         entry_ref->shared_stats = NULL;
     397     1686078 :         entry_ref->shared_entry = NULL;
     398     1686078 :         entry_ref->pending = NULL;
     399             : 
     400     1686078 :         found = false;
     401             :     }
     402     2106332 :     else if (cache_entry->entry_ref->shared_stats == NULL)
     403             :     {
     404             :         Assert(cache_entry->entry_ref->pending == NULL);
     405           0 :         found = false;
     406             :     }
     407             :     else
     408             :     {
     409             :         PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
     410             : 
     411     2106332 :         entry_ref = cache_entry->entry_ref;
     412             :         Assert(entry_ref->shared_entry != NULL);
     413             :         Assert(entry_ref->shared_stats != NULL);
     414             : 
     415             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     416             :         /* should have at least our reference */
     417             :         Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
     418             :     }
     419             : 
     420     3792410 :     *entry_ref_p = cache_entry->entry_ref;
     421     3792410 :     return found;
     422             : }
     423             : 
     424             : /*
     425             :  * Get a shared stats reference. If create is true, the shared stats object is
     426             :  * created if it does not exist.
     427             :  *
     428             :  * When create is true, and created_entry is non-NULL, it'll be set to true
     429             :  * if the entry is newly created, false otherwise.
     430             :  */
     431             : PgStat_EntryRef *
     432     3792410 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
     433             :                      bool *created_entry)
     434             : {
     435     3792410 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
     436             :     PgStatShared_HashEntry *shhashent;
     437     3792410 :     PgStatShared_Common *shheader = NULL;
     438             :     PgStat_EntryRef *entry_ref;
     439             : 
     440             :     /*
     441             :      * passing in created_entry only makes sense if we possibly could create
     442             :      * entry.
     443             :      */
     444             :     Assert(create || created_entry == NULL);
     445             :     pgstat_assert_is_up();
     446             :     Assert(pgStatLocal.shared_hash != NULL);
     447             :     Assert(!pgStatLocal.shmem->is_shutdown);
     448             : 
     449     3792410 :     pgstat_setup_memcxt();
     450     3792410 :     pgstat_setup_shared_refs();
     451             : 
     452     3792410 :     if (created_entry != NULL)
     453         214 :         *created_entry = false;
     454             : 
     455             :     /*
     456             :      * Check if other backends dropped stats that could not be deleted because
     457             :      * somebody held references to it. If so, check this backend's references.
     458             :      * This is not expected to happen often. The location of the check is a
     459             :      * bit random, but this is a relatively frequently called path, so better
     460             :      * than most.
     461             :      */
     462     3792410 :     if (pgstat_need_entry_refs_gc())
     463       11148 :         pgstat_gc_entry_refs();
     464             : 
     465             :     /*
     466             :      * First check the lookup cache hashtable in local memory. If we find a
     467             :      * match here we can avoid taking locks / causing contention.
     468             :      */
     469     3792410 :     if (pgstat_get_entry_ref_cached(key, &entry_ref))
     470     2106332 :         return entry_ref;
     471             : 
     472             :     Assert(entry_ref != NULL);
     473             : 
     474             :     /*
     475             :      * Do a lookup in the hash table first - it's quite likely that the entry
     476             :      * already exists, and that way we only need a shared lock.
     477             :      */
     478     1686078 :     shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
     479             : 
     480     1686078 :     if (create && !shhashent)
     481             :     {
     482             :         bool        shfound;
     483             : 
     484             :         /*
     485             :          * It's possible that somebody created the entry since the above
     486             :          * lookup. If so, fall through to the same path as if we'd have if it
     487             :          * already had been created before the dshash_find() calls.
     488             :          */
     489      173788 :         shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
     490      173788 :         if (!shfound)
     491             :         {
     492      173786 :             shheader = pgstat_init_entry(kind, shhashent);
     493      173786 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     494             : 
     495      173786 :             if (created_entry != NULL)
     496          96 :                 *created_entry = true;
     497             : 
     498      173786 :             return entry_ref;
     499             :         }
     500             :     }
     501             : 
     502     1512292 :     if (!shhashent)
     503             :     {
     504             :         /*
     505             :          * If we're not creating, delete the reference again. In all
     506             :          * likelihood it's just a stats lookup - no point wasting memory for a
     507             :          * shared ref to nothing...
     508             :          */
     509      141340 :         pgstat_release_entry_ref(key, entry_ref, false);
     510             : 
     511      141340 :         return NULL;
     512             :     }
     513             :     else
     514             :     {
     515             :         /*
     516             :          * Can get here either because dshash_find() found a match, or if
     517             :          * dshash_find_or_insert() found a concurrently inserted entry.
     518             :          */
     519             : 
     520     1370952 :         if (shhashent->dropped && create)
     521             :         {
     522             :             /*
     523             :              * There are legitimate cases where the old stats entry might not
     524             :              * yet have been dropped by the time it's reused. The most obvious
     525             :              * case are replication slot stats, where a new slot can be
     526             :              * created with the same index just after dropping. But oid
     527             :              * wraparound can lead to other cases as well. We just reset the
     528             :              * stats to their plain state.
     529             :              */
     530          54 :             shheader = pgstat_reinit_entry(kind, shhashent);
     531          54 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     532             : 
     533          54 :             if (created_entry != NULL)
     534           0 :                 *created_entry = true;
     535             : 
     536          54 :             return entry_ref;
     537             :         }
     538     1370898 :         else if (shhashent->dropped)
     539             :         {
     540          74 :             dshash_release_lock(pgStatLocal.shared_hash, shhashent);
     541          74 :             pgstat_release_entry_ref(key, entry_ref, false);
     542             : 
     543          74 :             return NULL;
     544             :         }
     545             :         else
     546             :         {
     547     1370824 :             shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
     548     1370824 :             pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
     549             : 
     550     1370824 :             return entry_ref;
     551             :         }
     552             :     }
     553             : }
     554             : 
     555             : static void
     556     1686078 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
     557             :                          bool discard_pending)
     558             : {
     559     1686078 :     if (entry_ref && entry_ref->pending)
     560             :     {
     561       60420 :         if (discard_pending)
     562       60420 :             pgstat_delete_pending_entry(entry_ref);
     563             :         else
     564           0 :             elog(ERROR, "releasing ref with pending data");
     565             :     }
     566             : 
     567     1686078 :     if (entry_ref && entry_ref->shared_stats)
     568             :     {
     569             :         Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
     570             :         Assert(entry_ref->pending == NULL);
     571             : 
     572             :         /*
     573             :          * This can't race with another backend looking up the stats entry and
     574             :          * increasing the refcount because it is not "legal" to create
     575             :          * additional references to dropped entries.
     576             :          */
     577     1544664 :         if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
     578             :         {
     579             :             PgStatShared_HashEntry *shent;
     580             : 
     581             :             /*
     582             :              * We're the last referrer to this entry, try to drop the shared
     583             :              * entry.
     584             :              */
     585             : 
     586             :             /* only dropped entries can reach a 0 refcount */
     587             :             Assert(entry_ref->shared_entry->dropped);
     588             : 
     589        9222 :             shent = dshash_find(pgStatLocal.shared_hash,
     590        9222 :                                 &entry_ref->shared_entry->key,
     591             :                                 true);
     592        9222 :             if (!shent)
     593           0 :                 elog(ERROR, "could not find just referenced shared stats entry");
     594             : 
     595             :             Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
     596             :             Assert(entry_ref->shared_entry == shent);
     597             : 
     598        9222 :             pgstat_free_entry(shent, NULL);
     599             :         }
     600             :     }
     601             : 
     602     1686078 :     if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
     603           0 :         elog(ERROR, "entry ref vanished before deletion");
     604             : 
     605     1686078 :     if (entry_ref)
     606     1686078 :         pfree(entry_ref);
     607     1686078 : }
     608             : 
     609             : bool
     610     1687996 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
     611             : {
     612     1687996 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     613             : 
     614     1687996 :     if (nowait)
     615      534328 :         return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
     616             : 
     617     1153668 :     LWLockAcquire(lock, LW_EXCLUSIVE);
     618     1153668 :     return true;
     619             : }
     620             : 
     621             : /*
     622             :  * Separate from pgstat_lock_entry() as most callers will need to lock
     623             :  * exclusively.
     624             :  */
     625             : bool
     626      355196 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
     627             : {
     628      355196 :     LWLock     *lock = &entry_ref->shared_stats->lock;
     629             : 
     630      355196 :     if (nowait)
     631           0 :         return LWLockConditionalAcquire(lock, LW_SHARED);
     632             : 
     633      355196 :     LWLockAcquire(lock, LW_SHARED);
     634      355196 :     return true;
     635             : }
     636             : 
     637             : void
     638     2043190 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
     639             : {
     640     2043190 :     LWLockRelease(&entry_ref->shared_stats->lock);
     641     2043190 : }
     642             : 
     643             : /*
     644             :  * Helper function to fetch and lock shared stats.
     645             :  */
     646             : PgStat_EntryRef *
     647      140382 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
     648             :                             bool nowait)
     649             : {
     650             :     PgStat_EntryRef *entry_ref;
     651             : 
     652             :     /* find shared table stats entry corresponding to the local entry */
     653      140382 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
     654             : 
     655             :     /* lock the shared entry to protect the content, skip if failed */
     656      140382 :     if (!pgstat_lock_entry(entry_ref, nowait))
     657           0 :         return NULL;
     658             : 
     659      140382 :     return entry_ref;
     660             : }
     661             : 
     662             : void
     663        3646 : pgstat_request_entry_refs_gc(void)
     664             : {
     665        3646 :     pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
     666        3646 : }
     667             : 
     668             : static bool
     669     3792410 : pgstat_need_entry_refs_gc(void)
     670             : {
     671             :     uint64      curage;
     672             : 
     673     3792410 :     if (!pgStatEntryRefHash)
     674           0 :         return false;
     675             : 
     676             :     /* should have been initialized when creating pgStatEntryRefHash */
     677             :     Assert(pgStatSharedRefAge != 0);
     678             : 
     679     3792410 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     680             : 
     681     3792410 :     return pgStatSharedRefAge != curage;
     682             : }
     683             : 
     684             : static void
     685       11148 : pgstat_gc_entry_refs(void)
     686             : {
     687             :     pgstat_entry_ref_hash_iterator i;
     688             :     PgStat_EntryRefHashEntry *ent;
     689             :     uint64      curage;
     690             : 
     691       11148 :     curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
     692             :     Assert(curage != 0);
     693             : 
     694             :     /*
     695             :      * Some entries have been dropped. Invalidate cache pointer to them.
     696             :      */
     697       11148 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     698      868398 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
     699             :     {
     700      857250 :         PgStat_EntryRef *entry_ref = ent->entry_ref;
     701             : 
     702             :         Assert(!entry_ref->shared_stats ||
     703             :                entry_ref->shared_stats->magic == 0xdeadbeef);
     704             : 
     705      857250 :         if (!entry_ref->shared_entry->dropped)
     706      580434 :             continue;
     707             : 
     708             :         /* cannot gc shared ref that has pending data */
     709      276816 :         if (entry_ref->pending != NULL)
     710      268012 :             continue;
     711             : 
     712        8804 :         pgstat_release_entry_ref(ent->key, entry_ref, false);
     713             :     }
     714             : 
     715       11148 :     pgStatSharedRefAge = curage;
     716       11148 : }
     717             : 
     718             : static void
     719       29244 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
     720             :                                    Datum match_data)
     721             : {
     722             :     pgstat_entry_ref_hash_iterator i;
     723             :     PgStat_EntryRefHashEntry *ent;
     724             : 
     725       29244 :     if (pgStatEntryRefHash == NULL)
     726          12 :         return;
     727             : 
     728       29232 :     pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
     729             : 
     730     1504918 :     while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
     731             :            != NULL)
     732             :     {
     733             :         Assert(ent->entry_ref != NULL);
     734             : 
     735     1475686 :         if (match && !match(ent, match_data))
     736        1020 :             continue;
     737             : 
     738     1474666 :         pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
     739             :     }
     740             : }
     741             : 
     742             : /*
     743             :  * Release all local references to shared stats entries.
     744             :  *
     745             :  * When a process exits it cannot do so while still holding references onto
     746             :  * stats entries, otherwise the shared stats entries could never be freed.
     747             :  */
     748             : static void
     749       33572 : pgstat_release_all_entry_refs(bool discard_pending)
     750             : {
     751       33572 :     if (pgStatEntryRefHash == NULL)
     752        4372 :         return;
     753             : 
     754       29200 :     pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
     755             :     Assert(pgStatEntryRefHash->members == 0);
     756       29200 :     pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
     757       29200 :     pgStatEntryRefHash = NULL;
     758             : }
     759             : 
     760             : static bool
     761        1020 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
     762             : {
     763        1020 :     Oid         dboid = DatumGetObjectId(match_data);
     764             : 
     765        1020 :     return ent->key.dboid == dboid;
     766             : }
     767             : 
     768             : static void
     769          44 : pgstat_release_db_entry_refs(Oid dboid)
     770             : {
     771          44 :     pgstat_release_matching_entry_refs( /* discard pending = */ true,
     772             :                                        match_db,
     773             :                                        ObjectIdGetDatum(dboid));
     774          44 : }
     775             : 
     776             : 
     777             : /* ------------------------------------------------------------
     778             :  * Dropping and resetting of stats entries
     779             :  * ------------------------------------------------------------
     780             :  */
     781             : 
     782             : static void
     783       63200 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
     784             : {
     785             :     dsa_pointer pdsa;
     786             : 
     787             :     /*
     788             :      * Fetch dsa pointer before deleting entry - that way we can free the
     789             :      * memory after releasing the lock.
     790             :      */
     791       63200 :     pdsa = shent->body;
     792             : 
     793       63200 :     if (!hstat)
     794       61366 :         dshash_delete_entry(pgStatLocal.shared_hash, shent);
     795             :     else
     796        1834 :         dshash_delete_current(hstat);
     797             : 
     798       63200 :     dsa_free(pgStatLocal.dsa, pdsa);
     799       63200 : }
     800             : 
     801             : /*
     802             :  * Helper for both pgstat_drop_database_and_contents() and
     803             :  * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
     804             :  * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
     805             :  * case the entry needs to be already locked.
     806             :  */
     807             : static bool
     808       63256 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
     809             :                            dshash_seq_status *hstat)
     810             : {
     811             :     Assert(shent->body != InvalidDsaPointer);
     812             : 
     813             :     /* should already have released local reference */
     814       63256 :     if (pgStatEntryRefHash)
     815             :         Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
     816             : 
     817             :     /*
     818             :      * Signal that the entry is dropped - this will eventually cause other
     819             :      * backends to release their references.
     820             :      */
     821       63256 :     if (shent->dropped)
     822           0 :         elog(ERROR,
     823             :              "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%llu refcount=%u",
     824             :              pgstat_get_kind_info(shent->key.kind)->name,
     825             :              shent->key.dboid,
     826             :              (unsigned long long) shent->key.objid,
     827             :              pg_atomic_read_u32(&shent->refcount));
     828       63256 :     shent->dropped = true;
     829             : 
     830             :     /* release refcount marking entry as not dropped */
     831       63256 :     if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
     832             :     {
     833       53978 :         pgstat_free_entry(shent, hstat);
     834       53978 :         return true;
     835             :     }
     836             :     else
     837             :     {
     838        9278 :         if (!hstat)
     839        9278 :             dshash_release_lock(pgStatLocal.shared_hash, shent);
     840        9278 :         return false;
     841             :     }
     842             : }
     843             : 
     844             : /*
     845             :  * Drop stats for the database and all the objects inside that database.
     846             :  */
     847             : static void
     848          44 : pgstat_drop_database_and_contents(Oid dboid)
     849             : {
     850             :     dshash_seq_status hstat;
     851             :     PgStatShared_HashEntry *p;
     852          44 :     uint64      not_freed_count = 0;
     853             : 
     854             :     Assert(OidIsValid(dboid));
     855             : 
     856             :     Assert(pgStatLocal.shared_hash != NULL);
     857             : 
     858             :     /*
     859             :      * This backend might very well be the only backend holding a reference to
     860             :      * about-to-be-dropped entries. Ensure that we're not preventing it from
     861             :      * being cleaned up till later.
     862             :      *
     863             :      * Doing this separately from the dshash iteration below avoids having to
     864             :      * do so while holding a partition lock on the shared hashtable.
     865             :      */
     866          44 :     pgstat_release_db_entry_refs(dboid);
     867             : 
     868             :     /* some of the dshash entries are to be removed, take exclusive lock. */
     869          44 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     870       11316 :     while ((p = dshash_seq_next(&hstat)) != NULL)
     871             :     {
     872       11272 :         if (p->dropped)
     873           2 :             continue;
     874             : 
     875       11270 :         if (p->key.dboid != dboid)
     876        9548 :             continue;
     877             : 
     878        1722 :         if (!pgstat_drop_entry_internal(p, &hstat))
     879             :         {
     880             :             /*
     881             :              * Even statistics for a dropped database might currently be
     882             :              * accessed (consider e.g. database stats for pg_stat_database).
     883             :              */
     884           0 :             not_freed_count++;
     885             :         }
     886             :     }
     887          44 :     dshash_seq_term(&hstat);
     888             : 
     889             :     /*
     890             :      * If some of the stats data could not be freed, signal the reference
     891             :      * holders to run garbage collection of their cached pgStatLocal.shmem.
     892             :      */
     893          44 :     if (not_freed_count > 0)
     894           0 :         pgstat_request_entry_refs_gc();
     895          44 : }
     896             : 
     897             : /*
     898             :  * Drop a single stats entry.
     899             :  *
     900             :  * This routine returns false if the stats entry of the dropped object could
     901             :  * not be freed, true otherwise.
     902             :  *
     903             :  * The callers of this function should call pgstat_request_entry_refs_gc()
     904             :  * if the stats entry could not be freed, to ensure that this entry's memory
     905             :  * can be reclaimed later by a different backend calling
     906             :  * pgstat_gc_entry_refs().
     907             :  */
     908             : bool
     909       91804 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
     910             : {
     911       91804 :     PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
     912             :     PgStatShared_HashEntry *shent;
     913       91804 :     bool        freed = true;
     914             : 
     915             :     /* delete local reference */
     916       91804 :     if (pgStatEntryRefHash)
     917             :     {
     918             :         PgStat_EntryRefHashEntry *lohashent =
     919       74778 :             pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
     920             : 
     921       74778 :         if (lohashent)
     922       61194 :             pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
     923             :                                      true);
     924             :     }
     925             : 
     926             :     /* mark entry in shared hashtable as deleted, drop if possible */
     927       91804 :     shent = dshash_find(pgStatLocal.shared_hash, &key, true);
     928       91804 :     if (shent)
     929             :     {
     930       61422 :         freed = pgstat_drop_entry_internal(shent, NULL);
     931             : 
     932             :         /*
     933             :          * Database stats contain other stats. Drop those as well when
     934             :          * dropping the database. XXX: Perhaps this should be done in a
     935             :          * slightly more principled way? But not obvious what that'd look
     936             :          * like, and so far this is the only case...
     937             :          */
     938       61422 :         if (key.kind == PGSTAT_KIND_DATABASE)
     939          44 :             pgstat_drop_database_and_contents(key.dboid);
     940             :     }
     941             : 
     942       91804 :     return freed;
     943             : }
     944             : 
     945             : void
     946         442 : pgstat_drop_all_entries(void)
     947             : {
     948             :     dshash_seq_status hstat;
     949             :     PgStatShared_HashEntry *ps;
     950         442 :     uint64      not_freed_count = 0;
     951             : 
     952         442 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
     953         554 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
     954             :     {
     955         112 :         if (ps->dropped)
     956           0 :             continue;
     957             : 
     958         112 :         if (!pgstat_drop_entry_internal(ps, &hstat))
     959           0 :             not_freed_count++;
     960             :     }
     961         442 :     dshash_seq_term(&hstat);
     962             : 
     963         442 :     if (not_freed_count > 0)
     964           0 :         pgstat_request_entry_refs_gc();
     965         442 : }
     966             : 
     967             : static void
     968       16448 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
     969             :                            TimestampTz ts)
     970             : {
     971       16448 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     972             : 
     973       16448 :     memset(pgstat_get_entry_data(kind, header), 0,
     974             :            pgstat_get_entry_len(kind));
     975             : 
     976       16448 :     if (kind_info->reset_timestamp_cb)
     977         344 :         kind_info->reset_timestamp_cb(header, ts);
     978       16448 : }
     979             : 
     980             : /*
     981             :  * Reset one variable-numbered stats entry.
     982             :  */
     983             : void
     984         316 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
     985             : {
     986             :     PgStat_EntryRef *entry_ref;
     987             : 
     988             :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     989             : 
     990         316 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
     991         316 :     if (!entry_ref || entry_ref->shared_entry->dropped)
     992           2 :         return;
     993             : 
     994         314 :     (void) pgstat_lock_entry(entry_ref, false);
     995         314 :     shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
     996         314 :     pgstat_unlock_entry(entry_ref);
     997             : }
     998             : 
     999             : /*
    1000             :  * Scan through the shared hashtable of stats, resetting statistics if
    1001             :  * approved by the provided do_reset() function.
    1002             :  */
    1003             : void
    1004          34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
    1005             :                               Datum match_data, TimestampTz ts)
    1006             : {
    1007             :     dshash_seq_status hstat;
    1008             :     PgStatShared_HashEntry *p;
    1009             : 
    1010             :     /* dshash entry is not modified, take shared lock */
    1011          34 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1012       24386 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1013             :     {
    1014             :         PgStatShared_Common *header;
    1015             : 
    1016       24352 :         if (p->dropped)
    1017           2 :             continue;
    1018             : 
    1019       24350 :         if (!do_reset(p, match_data))
    1020        8216 :             continue;
    1021             : 
    1022       16134 :         header = dsa_get_address(pgStatLocal.dsa, p->body);
    1023             : 
    1024       16134 :         LWLockAcquire(&header->lock, LW_EXCLUSIVE);
    1025             : 
    1026       16134 :         shared_stat_reset_contents(p->key.kind, header, ts);
    1027             : 
    1028       16134 :         LWLockRelease(&header->lock);
    1029             :     }
    1030          34 :     dshash_seq_term(&hstat);
    1031          34 : }
    1032             : 
    1033             : static bool
    1034        2936 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
    1035             : {
    1036        2936 :     return p->key.kind == DatumGetInt32(match_data);
    1037             : }
    1038             : 
    1039             : void
    1040           8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
    1041             : {
    1042           8 :     pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
    1043           8 : }
    1044             : 
    1045             : static void
    1046     3792410 : pgstat_setup_memcxt(void)
    1047             : {
    1048     3792410 :     if (unlikely(!pgStatSharedRefContext))
    1049       29200 :         pgStatSharedRefContext =
    1050       29200 :             AllocSetContextCreate(TopMemoryContext,
    1051             :                                   "PgStat Shared Ref",
    1052             :                                   ALLOCSET_SMALL_SIZES);
    1053     3792410 :     if (unlikely(!pgStatEntryRefHashContext))
    1054       29200 :         pgStatEntryRefHashContext =
    1055       29200 :             AllocSetContextCreate(TopMemoryContext,
    1056             :                                   "PgStat Shared Ref Hash",
    1057             :                                   ALLOCSET_SMALL_SIZES);
    1058     3792410 : }

Generated by: LCOV version 1.14