LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.6 % 529 458
Test Date: 2026-05-05 12:17:12 Functions: 100.0 % 34 34
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* ----------
       2              :  * pgstat.c
       3              :  *    Infrastructure for the cumulative statistics system.
       4              :  *
       5              :  * The cumulative statistics system accumulates statistics for different kinds
       6              :  * of objects. Some kinds of statistics are collected for a fixed number of
       7              :  * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
       8              :  * statistics are collected for a varying number of objects
       9              :  * (e.g. relations). See PgStat_KindInfo for a list of currently handled
      10              :  * statistics.
      11              :  *
      12              :  * Statistics are loaded from the filesystem during startup (by the startup
      13              :  * process), unless preceded by a crash, in which case all stats are
      14              :  * discarded. They are written out by the checkpointer process just before
      15              :  * shutting down (if the stats kind allows it), except when shutting down in
      16              :  * immediate mode.
      17              :  *
      18              :  * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
      19              :  *
      20              :  * Statistics for variable-numbered objects are stored in dynamic shared
      21              :  * memory and can be found via a dshash hashtable. The statistics counters are
      22              :  * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
      23              :  * separately allocated (PgStatShared_HashEntry->body). The separate
      24              :  * allocation allows different kinds of statistics to be stored in the same
      25              :  * hashtable without wasting space in PgStatShared_HashEntry.
      26              :  *
      27              :  * Variable-numbered stats are addressed by PgStat_HashKey while running.  It
      28              :  * is not possible to have statistics for an object that cannot be addressed
      29              :  * that way at runtime. A wider identifier can be used when serializing to
      30              :  * disk (used for replication slot stats).
      31              :  *
      32              :  * To avoid contention on the shared hashtable, each backend has a
      33              :  * backend-local hashtable (pgStatEntryRefHash) in front of the shared
      34              :  * hashtable, containing references (PgStat_EntryRef) to shared hashtable
      35              :  * entries. The shared hashtable only needs to be accessed when no prior
      36              :  * reference is found in the local hashtable. Besides pointing to the
      37              :  * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
      38              :  * contains a pointer to the shared statistics data, as a process-local
      39              :  * address, to reduce access costs.
      40              :  *
      41              :  * The names for structs stored in shared memory are prefixed with
      42              :  * PgStatShared instead of PgStat. Each stats entry in shared memory is
      43              :  * protected by a dedicated lwlock.
      44              :  *
      45              :  * Most stats updates are first accumulated locally in each process as pending
      46              :  * entries, then later flushed to shared memory (just after commit, or by
      47              :  * idle-timeout). This practically eliminates contention on individual stats
      48              :  * entries. For most kinds of variable-numbered pending stats data is stored
      49              :  * in PgStat_EntryRef->pending. All entries with pending data are in the
      50              :  * pgStatPending list. Pending statistics updates are flushed out by
      51              :  * pgstat_report_stat().
      52              :  *
      53              :  * It is possible for external modules to define custom statistics kinds,
      54              :  * that can use the same properties as any built-in stats kinds.  Each custom
      55              :  * stats kind needs to assign a unique ID to ensure that it does not overlap
      56              :  * with other extensions.  In order to reserve a unique stats kind ID, refer
      57              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
      58              :  *
      59              :  * The behavior of different kinds of statistics is determined by the kind's
      60              :  * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
      61              :  * defined, and pgstat_kind_custom_infos for custom kinds registered at
      62              :  * startup by pgstat_register_kind().  See PgStat_KindInfo for details.
      63              :  *
      64              :  * The consistency of read accesses to statistics can be configured using the
      65              :  * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
      66              :  * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
      67              :  * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
      68              :  * pgStatLocal.snapshot.
      69              :  *
      70              :  * To keep things manageable, stats handling is split across several
      71              :  * files. Infrastructure pieces are in:
      72              :  * - pgstat.c - this file, to tie it all together
      73              :  * - pgstat_shmem.c - nearly everything dealing with shared memory, including
      74              :  *   the maintenance of hashtable entries
      75              :  * - pgstat_xact.c - transactional integration, including the transactional
      76              :  *   creation and dropping of stats entries
      77              :  *
      78              :  * Each statistics kind is handled in a dedicated file:
      79              :  * - pgstat_archiver.c
      80              :  * - pgstat_backend.c
      81              :  * - pgstat_bgwriter.c
      82              :  * - pgstat_checkpointer.c
      83              :  * - pgstat_database.c
      84              :  * - pgstat_function.c
      85              :  * - pgstat_io.c
      86              :  * - pgstat_lock.c
      87              :  * - pgstat_relation.c
      88              :  * - pgstat_replslot.c
      89              :  * - pgstat_slru.c
      90              :  * - pgstat_subscription.c
      91              :  * - pgstat_wal.c
      92              :  *
      93              :  * Whenever possible infrastructure files should not contain code related to
      94              :  * specific kinds of stats.
      95              :  *
      96              :  *
      97              :  * Copyright (c) 2001-2026, PostgreSQL Global Development Group
      98              :  *
      99              :  * IDENTIFICATION
     100              :  *    src/backend/utils/activity/pgstat.c
     101              :  * ----------
     102              :  */
     103              : #include "postgres.h"
     104              : 
     105              : #include <unistd.h>
     106              : 
     107              : #include "access/xact.h"
     108              : #include "lib/dshash.h"
     109              : #include "pgstat.h"
     110              : #include "storage/fd.h"
     111              : #include "storage/ipc.h"
     112              : #include "storage/lwlock.h"
     113              : #include "utils/guc_hooks.h"
     114              : #include "utils/memutils.h"
     115              : #include "utils/pgstat_internal.h"
     116              : #include "utils/timestamp.h"
     117              : 
     118              : 
     119              : /* ----------
     120              :  * Timer definitions.
     121              :  *
     122              :  * In milliseconds.
     123              :  * ----------
     124              :  */
     125              : 
     126              : /* minimum interval non-forced stats flushes.*/
     127              : #define PGSTAT_MIN_INTERVAL         1000
     128              : /* how long until to block flushing pending stats updates */
     129              : #define PGSTAT_MAX_INTERVAL         60000
     130              : /* when to call pgstat_report_stat() again, even when idle */
     131              : #define PGSTAT_IDLE_INTERVAL        10000
     132              : 
     133              : /* ----------
     134              :  * Initial size hints for the hash tables used in statistics.
     135              :  * ----------
     136              :  */
     137              : 
     138              : #define PGSTAT_SNAPSHOT_HASH_SIZE   512
     139              : 
     140              : /* ---------
     141              :  * Identifiers in stats file.
     142              :  * ---------
     143              :  */
     144              : #define PGSTAT_FILE_ENTRY_END   'E' /* end of file */
     145              : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
     146              : #define PGSTAT_FILE_ENTRY_NAME  'N' /* stats entry identified by name */
     147              : #define PGSTAT_FILE_ENTRY_HASH  'S' /* stats entry identified by
     148              :                                      * PgStat_HashKey */
     149              : 
     150              : /* hash table for statistics snapshots entry */
     151              : typedef struct PgStat_SnapshotEntry
     152              : {
     153              :     PgStat_HashKey key;
     154              :     char        status;         /* for simplehash use */
     155              :     void       *data;           /* the stats data itself */
     156              : } PgStat_SnapshotEntry;
     157              : 
     158              : 
     159              : /* ----------
     160              :  * Backend-local Hash Table Definitions
     161              :  * ----------
     162              :  */
     163              : 
     164              : /* for stats snapshot entries */
     165              : #define SH_PREFIX pgstat_snapshot
     166              : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
     167              : #define SH_KEY_TYPE PgStat_HashKey
     168              : #define SH_KEY key
     169              : #define SH_HASH_KEY(tb, key) \
     170              :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
     171              : #define SH_EQUAL(tb, a, b) \
     172              :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
     173              : #define SH_SCOPE static inline
     174              : #define SH_DEFINE
     175              : #define SH_DECLARE
     176              : #include "lib/simplehash.h"
     177              : 
     178              : 
     179              : /* ----------
     180              :  * Local function forward declarations
     181              :  * ----------
     182              :  */
     183              : 
     184              : static void pgstat_write_statsfile(void);
     185              : static void pgstat_read_statsfile(void);
     186              : 
     187              : static void pgstat_init_snapshot_fixed(void);
     188              : 
     189              : static void pgstat_reset_after_failure(void);
     190              : 
     191              : static bool pgstat_flush_pending_entries(bool nowait);
     192              : 
     193              : static void pgstat_prep_snapshot(void);
     194              : static void pgstat_build_snapshot(void);
     195              : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
     196              : 
     197              : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
     198              : 
     199              : 
     200              : /* ----------
     201              :  * GUC parameters
     202              :  * ----------
     203              :  */
     204              : 
     205              : bool        pgstat_track_counts = false;
     206              : int         pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
     207              : 
     208              : 
     209              : /* ----------
     210              :  * state shared with pgstat_*.c
     211              :  * ----------
     212              :  */
     213              : 
     214              : PgStat_LocalState pgStatLocal;
     215              : 
     216              : /*
     217              :  * Track pending reports for fixed-numbered stats, used by
     218              :  * pgstat_report_stat().
     219              :  */
     220              : bool        pgstat_report_fixed = false;
     221              : 
     222              : /* ----------
     223              :  * Local data
     224              :  *
     225              :  * NB: There should be only variables related to stats infrastructure here,
     226              :  * not for specific kinds of stats.
     227              :  * ----------
     228              :  */
     229              : 
     230              : /*
     231              :  * Memory contexts containing the pgStatEntryRefHash table, the
     232              :  * pgStatSharedRef entries, and pending data respectively. Mostly to make it
     233              :  * easier to track / attribute memory usage.
     234              :  */
     235              : 
     236              : static MemoryContext pgStatPendingContext = NULL;
     237              : 
     238              : /*
     239              :  * Backend local list of PgStat_EntryRef with unflushed pending stats.
     240              :  *
     241              :  * Newly pending entries should only ever be added to the end of the list,
     242              :  * otherwise pgstat_flush_pending_entries() might not see them immediately.
     243              :  */
     244              : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
     245              : 
     246              : 
     247              : /*
     248              :  * Force the next stats flush to happen regardless of
     249              :  * PGSTAT_MIN_INTERVAL. Useful in test scripts.
     250              :  */
     251              : static bool pgStatForceNextFlush = false;
     252              : 
     253              : /*
     254              :  * Force-clear existing snapshot before next use when stats_fetch_consistency
     255              :  * is changed.
     256              :  */
     257              : static bool force_stats_snapshot_clear = false;
     258              : 
     259              : 
     260              : /*
     261              :  * For assertions that check pgstat is not used before initialization / after
     262              :  * shutdown.
     263              :  */
     264              : #ifdef USE_ASSERT_CHECKING
     265              : static bool pgstat_is_initialized = false;
     266              : static bool pgstat_is_shutdown = false;
     267              : #endif
     268              : 
     269              : 
     270              : /*
     271              :  * The different kinds of built-in statistics.
     272              :  *
     273              :  * If reasonably possible, handling specific to one kind of stats should go
     274              :  * through this abstraction, rather than making more of pgstat.c aware.
     275              :  *
     276              :  * See comments for struct PgStat_KindInfo for details about the individual
     277              :  * fields.
     278              :  *
     279              :  * XXX: It'd be nicer to define this outside of this file. But there doesn't
     280              :  * seem to be a great way of doing that, given the split across multiple
     281              :  * files.
     282              :  */
     283              : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
     284              : 
     285              :     /* stats kinds for variable-numbered objects */
     286              : 
     287              :     [PGSTAT_KIND_DATABASE] = {
     288              :         .name = "database",
     289              : 
     290              :         .fixed_amount = false,
     291              :         .write_to_file = true,
     292              :         /* so pg_stat_database entries can be seen in all databases */
     293              :         .accessed_across_databases = true,
     294              : 
     295              :         .shared_size = sizeof(PgStatShared_Database),
     296              :         .shared_data_off = offsetof(PgStatShared_Database, stats),
     297              :         .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
     298              :         .pending_size = sizeof(PgStat_StatDBEntry),
     299              : 
     300              :         .flush_pending_cb = pgstat_database_flush_cb,
     301              :         .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
     302              :     },
     303              : 
     304              :     [PGSTAT_KIND_RELATION] = {
     305              :         .name = "relation",
     306              : 
     307              :         .fixed_amount = false,
     308              :         .write_to_file = true,
     309              : 
     310              :         .shared_size = sizeof(PgStatShared_Relation),
     311              :         .shared_data_off = offsetof(PgStatShared_Relation, stats),
     312              :         .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
     313              :         .pending_size = sizeof(PgStat_TableStatus),
     314              : 
     315              :         .flush_pending_cb = pgstat_relation_flush_cb,
     316              :         .delete_pending_cb = pgstat_relation_delete_pending_cb,
     317              :         .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
     318              :     },
     319              : 
     320              :     [PGSTAT_KIND_FUNCTION] = {
     321              :         .name = "function",
     322              : 
     323              :         .fixed_amount = false,
     324              :         .write_to_file = true,
     325              : 
     326              :         .shared_size = sizeof(PgStatShared_Function),
     327              :         .shared_data_off = offsetof(PgStatShared_Function, stats),
     328              :         .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
     329              :         .pending_size = sizeof(PgStat_FunctionCounts),
     330              : 
     331              :         .flush_pending_cb = pgstat_function_flush_cb,
     332              :         .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
     333              :     },
     334              : 
     335              :     [PGSTAT_KIND_REPLSLOT] = {
     336              :         .name = "replslot",
     337              : 
     338              :         .fixed_amount = false,
     339              :         .write_to_file = true,
     340              : 
     341              :         .accessed_across_databases = true,
     342              : 
     343              :         .shared_size = sizeof(PgStatShared_ReplSlot),
     344              :         .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
     345              :         .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
     346              : 
     347              :         .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
     348              :         .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
     349              :         .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
     350              :     },
     351              : 
     352              :     [PGSTAT_KIND_SUBSCRIPTION] = {
     353              :         .name = "subscription",
     354              : 
     355              :         .fixed_amount = false,
     356              :         .write_to_file = true,
     357              :         /* so pg_stat_subscription_stats entries can be seen in all databases */
     358              :         .accessed_across_databases = true,
     359              : 
     360              :         .shared_size = sizeof(PgStatShared_Subscription),
     361              :         .shared_data_off = offsetof(PgStatShared_Subscription, stats),
     362              :         .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
     363              :         .pending_size = sizeof(PgStat_BackendSubEntry),
     364              : 
     365              :         .flush_pending_cb = pgstat_subscription_flush_cb,
     366              :         .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
     367              :     },
     368              : 
     369              :     [PGSTAT_KIND_BACKEND] = {
     370              :         .name = "backend",
     371              : 
     372              :         .fixed_amount = false,
     373              :         .write_to_file = false,
     374              : 
     375              :         .accessed_across_databases = true,
     376              : 
     377              :         .shared_size = sizeof(PgStatShared_Backend),
     378              :         .shared_data_off = offsetof(PgStatShared_Backend, stats),
     379              :         .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
     380              : 
     381              :         .flush_static_cb = pgstat_backend_flush_cb,
     382              :         .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
     383              :     },
     384              : 
     385              :     /* stats for fixed-numbered (mostly 1) objects */
     386              : 
     387              :     [PGSTAT_KIND_ARCHIVER] = {
     388              :         .name = "archiver",
     389              : 
     390              :         .fixed_amount = true,
     391              :         .write_to_file = true,
     392              : 
     393              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
     394              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
     395              :         .shared_data_off = offsetof(PgStatShared_Archiver, stats),
     396              :         .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
     397              : 
     398              :         .init_shmem_cb = pgstat_archiver_init_shmem_cb,
     399              :         .reset_all_cb = pgstat_archiver_reset_all_cb,
     400              :         .snapshot_cb = pgstat_archiver_snapshot_cb,
     401              :     },
     402              : 
     403              :     [PGSTAT_KIND_BGWRITER] = {
     404              :         .name = "bgwriter",
     405              : 
     406              :         .fixed_amount = true,
     407              :         .write_to_file = true,
     408              : 
     409              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
     410              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
     411              :         .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
     412              :         .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
     413              : 
     414              :         .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
     415              :         .reset_all_cb = pgstat_bgwriter_reset_all_cb,
     416              :         .snapshot_cb = pgstat_bgwriter_snapshot_cb,
     417              :     },
     418              : 
     419              :     [PGSTAT_KIND_CHECKPOINTER] = {
     420              :         .name = "checkpointer",
     421              : 
     422              :         .fixed_amount = true,
     423              :         .write_to_file = true,
     424              : 
     425              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
     426              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
     427              :         .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
     428              :         .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
     429              : 
     430              :         .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
     431              :         .reset_all_cb = pgstat_checkpointer_reset_all_cb,
     432              :         .snapshot_cb = pgstat_checkpointer_snapshot_cb,
     433              :     },
     434              : 
     435              :     [PGSTAT_KIND_IO] = {
     436              :         .name = "io",
     437              : 
     438              :         .fixed_amount = true,
     439              :         .write_to_file = true,
     440              : 
     441              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
     442              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
     443              :         .shared_data_off = offsetof(PgStatShared_IO, stats),
     444              :         .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
     445              : 
     446              :         .flush_static_cb = pgstat_io_flush_cb,
     447              :         .init_shmem_cb = pgstat_io_init_shmem_cb,
     448              :         .reset_all_cb = pgstat_io_reset_all_cb,
     449              :         .snapshot_cb = pgstat_io_snapshot_cb,
     450              :     },
     451              : 
     452              :     [PGSTAT_KIND_LOCK] = {
     453              :         .name = "lock",
     454              : 
     455              :         .fixed_amount = true,
     456              :         .write_to_file = true,
     457              : 
     458              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, lock),
     459              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, lock),
     460              :         .shared_data_off = offsetof(PgStatShared_Lock, stats),
     461              :         .shared_data_len = sizeof(((PgStatShared_Lock *) 0)->stats),
     462              : 
     463              :         .flush_static_cb = pgstat_lock_flush_cb,
     464              :         .init_shmem_cb = pgstat_lock_init_shmem_cb,
     465              :         .reset_all_cb = pgstat_lock_reset_all_cb,
     466              :         .snapshot_cb = pgstat_lock_snapshot_cb,
     467              :     },
     468              : 
     469              :     [PGSTAT_KIND_SLRU] = {
     470              :         .name = "slru",
     471              : 
     472              :         .fixed_amount = true,
     473              :         .write_to_file = true,
     474              : 
     475              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
     476              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
     477              :         .shared_data_off = offsetof(PgStatShared_SLRU, stats),
     478              :         .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
     479              : 
     480              :         .flush_static_cb = pgstat_slru_flush_cb,
     481              :         .init_shmem_cb = pgstat_slru_init_shmem_cb,
     482              :         .reset_all_cb = pgstat_slru_reset_all_cb,
     483              :         .snapshot_cb = pgstat_slru_snapshot_cb,
     484              :     },
     485              : 
     486              :     [PGSTAT_KIND_WAL] = {
     487              :         .name = "wal",
     488              : 
     489              :         .fixed_amount = true,
     490              :         .write_to_file = true,
     491              : 
     492              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
     493              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
     494              :         .shared_data_off = offsetof(PgStatShared_Wal, stats),
     495              :         .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
     496              : 
     497              :         .init_backend_cb = pgstat_wal_init_backend_cb,
     498              :         .flush_static_cb = pgstat_wal_flush_cb,
     499              :         .init_shmem_cb = pgstat_wal_init_shmem_cb,
     500              :         .reset_all_cb = pgstat_wal_reset_all_cb,
     501              :         .snapshot_cb = pgstat_wal_snapshot_cb,
     502              :     },
     503              : };
     504              : 
     505              : /*
     506              :  * Information about custom statistics kinds.
     507              :  *
     508              :  * These are saved in a different array than the built-in kinds to save
     509              :  * in clarity with the initializations.
     510              :  *
     511              :  * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
     512              :  */
     513              : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
     514              : 
     515              : /* ------------------------------------------------------------
     516              :  * Functions managing the state of the stats system for all backends.
     517              :  * ------------------------------------------------------------
     518              :  */
     519              : 
     520              : /*
     521              :  * Read on-disk stats into memory at server start.
     522              :  *
     523              :  * Should only be called by the startup process or in single user mode.
     524              :  */
     525              : void
     526          894 : pgstat_restore_stats(void)
     527              : {
     528          894 :     pgstat_read_statsfile();
     529          894 : }
     530              : 
     531              : /*
     532              :  * Remove the stats file.  This is currently used only if WAL recovery is
     533              :  * needed after a crash.
     534              :  *
     535              :  * Should only be called by the startup process or in single user mode.
     536              :  */
     537              : void
     538          190 : pgstat_discard_stats(void)
     539              : {
     540              :     int         ret;
     541              : 
     542              :     /* NB: this needs to be done even in single user mode */
     543              : 
     544              :     /* First, cleanup the main pgstats file */
     545          190 :     ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
     546          190 :     if (ret != 0)
     547              :     {
     548          189 :         if (errno == ENOENT)
     549          189 :             elog(DEBUG2,
     550              :                  "didn't need to unlink permanent stats file \"%s\" - didn't exist",
     551              :                  PGSTAT_STAT_PERMANENT_FILENAME);
     552              :         else
     553            0 :             ereport(LOG,
     554              :                     (errcode_for_file_access(),
     555              :                      errmsg("could not unlink permanent statistics file \"%s\": %m",
     556              :                             PGSTAT_STAT_PERMANENT_FILENAME)));
     557              :     }
     558              :     else
     559              :     {
     560            1 :         ereport(DEBUG2,
     561              :                 (errcode_for_file_access(),
     562              :                  errmsg_internal("unlinked permanent statistics file \"%s\"",
     563              :                                  PGSTAT_STAT_PERMANENT_FILENAME)));
     564              :     }
     565              : 
     566              :     /* Finish callbacks, if required */
     567         6270 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     568              :     {
     569         6080 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     570              : 
     571         6080 :         if (kind_info && kind_info->finish)
     572            1 :             kind_info->finish(STATS_DISCARD);
     573              :     }
     574              : 
     575              :     /*
     576              :      * Reset stats contents. This will set reset timestamps of fixed-numbered
     577              :      * stats to the current time (no variable stats exist).
     578              :      */
     579          190 :     pgstat_reset_after_failure();
     580          190 : }
     581              : 
     582              : /*
     583              :  * pgstat_before_server_shutdown() needs to be called by exactly one process
     584              :  * during regular server shutdowns. Otherwise all stats will be lost.
     585              :  *
     586              :  * We currently only write out stats for proc_exit(0). We might want to change
     587              :  * that at some point... But right now pgstat_discard_stats() would be called
     588              :  * during the start after a disorderly shutdown, anyway.
     589              :  */
     590              : void
     591          776 : pgstat_before_server_shutdown(int code, Datum arg)
     592              : {
     593              :     Assert(pgStatLocal.shmem != NULL);
     594              :     Assert(!pgStatLocal.shmem->is_shutdown);
     595              : 
     596              :     /*
     597              :      * Stats should only be reported after pgstat_initialize() and before
     598              :      * pgstat_shutdown(). This is a convenient point to catch most violations
     599              :      * of this rule.
     600              :      */
     601              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
     602              : 
     603              :     /* flush out our own pending changes before writing out */
     604          776 :     pgstat_report_stat(true);
     605              : 
     606              :     /*
     607              :      * Only write out file during normal shutdown. Don't even signal that
     608              :      * we've shutdown during irregular shutdowns, because the shutdown
     609              :      * sequence isn't coordinated to ensure this backend shuts down last.
     610              :      */
     611          776 :     if (code == 0)
     612              :     {
     613          771 :         pgStatLocal.shmem->is_shutdown = true;
     614          771 :         pgstat_write_statsfile();
     615              :     }
     616          776 : }
     617              : 
     618              : 
     619              : /* ------------------------------------------------------------
     620              :  * Backend initialization / shutdown functions
     621              :  * ------------------------------------------------------------
     622              :  */
     623              : 
     624              : /*
     625              :  * Shut down a single backend's statistics reporting at process exit.
     626              :  *
     627              :  * Flush out any remaining statistics counts.  Without this, operations
     628              :  * triggered during backend exit (such as temp table deletions) won't be
     629              :  * counted.
     630              :  */
     631              : static void
     632        24528 : pgstat_shutdown_hook(int code, Datum arg)
     633              : {
     634              :     Assert(!pgstat_is_shutdown);
     635              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     636              : 
     637              :     /*
     638              :      * If we got as far as discovering our own database ID, we can flush out
     639              :      * what we did so far.  Otherwise, we'd be reporting an invalid database
     640              :      * ID, so forget it.  (This means that accesses to pg_database during
     641              :      * failed backend starts might never get counted.)
     642              :      */
     643        24528 :     if (OidIsValid(MyDatabaseId))
     644        18451 :         pgstat_report_disconnect(MyDatabaseId);
     645              : 
     646        24528 :     pgstat_report_stat(true);
     647              : 
     648              :     /* there shouldn't be any pending changes left */
     649              :     Assert(dlist_is_empty(&pgStatPending));
     650        24528 :     dlist_init(&pgStatPending);
     651              : 
     652              :     /* drop the backend stats entry */
     653        24528 :     if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
     654            0 :         pgstat_request_entry_refs_gc();
     655              : 
     656        24528 :     pgstat_detach_shmem();
     657              : 
     658              : #ifdef USE_ASSERT_CHECKING
     659              :     pgstat_is_shutdown = true;
     660              : #endif
     661        24528 : }
     662              : 
     663              : /*
     664              :  * Initialize pgstats state, and set up our on-proc-exit hook. Called from
     665              :  * BaseInit().
     666              :  *
     667              :  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
     668              :  */
     669              : void
     670        24528 : pgstat_initialize(void)
     671              : {
     672              :     Assert(!pgstat_is_initialized);
     673              : 
     674        24528 :     pgstat_attach_shmem();
     675              : 
     676        24528 :     pgstat_init_snapshot_fixed();
     677              : 
     678              :     /* Backend initialization callbacks */
     679       809424 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     680              :     {
     681       784896 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     682              : 
     683       784896 :         if (kind_info == NULL || kind_info->init_backend_cb == NULL)
     684       760368 :             continue;
     685              : 
     686        24528 :         kind_info->init_backend_cb();
     687              :     }
     688              : 
     689              :     /* Set up a process-exit hook to clean up */
     690        24528 :     before_shmem_exit(pgstat_shutdown_hook, 0);
     691              : 
     692              : #ifdef USE_ASSERT_CHECKING
     693              :     pgstat_is_initialized = true;
     694              : #endif
     695        24528 : }
     696              : 
     697              : 
     698              : /* ------------------------------------------------------------
     699              :  * Public functions used by backends follow
     700              :  * ------------------------------------------------------------
     701              :  */
     702              : 
     703              : /*
     704              :  * Must be called by processes that performs DML: tcop/postgres.c, logical
     705              :  * receiver processes, SPI worker, etc. to flush pending statistics updates to
     706              :  * shared memory.
     707              :  *
     708              :  * Unless called with 'force', pending stats updates are flushed happen once
     709              :  * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
     710              :  * block on lock acquisition, except if stats updates have been pending for
     711              :  * longer than PGSTAT_MAX_INTERVAL (60000ms).
     712              :  *
     713              :  * Whenever pending stats updates remain at the end of pgstat_report_stat() a
     714              :  * suggested idle timeout is returned. Currently this is always
     715              :  * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
     716              :  * a timeout after which to call pgstat_report_stat(true), but are not
     717              :  * required to do so.
     718              :  *
     719              :  * Note that this is called only when not within a transaction, so it is fair
     720              :  * to use transaction stop time as an approximation of current time.
     721              :  */
     722              : long
     723       394480 : pgstat_report_stat(bool force)
     724              : {
     725              :     static TimestampTz pending_since = 0;
     726              :     static TimestampTz last_flush = 0;
     727              :     bool        partial_flush;
     728              :     TimestampTz now;
     729              :     bool        nowait;
     730              : 
     731              :     pgstat_assert_is_up();
     732              :     Assert(!IsTransactionOrTransactionBlock());
     733              : 
     734              :     /* "absorb" the forced flush even if there's nothing to flush */
     735       394480 :     if (pgStatForceNextFlush)
     736              :     {
     737          339 :         force = true;
     738          339 :         pgStatForceNextFlush = false;
     739              :     }
     740              : 
     741              :     /* Don't expend a clock check if nothing to do */
     742       394480 :     if (dlist_is_empty(&pgStatPending) &&
     743        10752 :         !pgstat_report_fixed)
     744              :     {
     745         7734 :         return 0;
     746              :     }
     747              : 
     748              :     /*
     749              :      * There should never be stats to report once stats are shut down. Can't
     750              :      * assert that before the checks above, as there is an unconditional
     751              :      * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
     752              :      * the process that ran pgstat_before_server_shutdown() will still call.
     753              :      */
     754              :     Assert(!pgStatLocal.shmem->is_shutdown);
     755              : 
     756       386746 :     if (force)
     757              :     {
     758              :         /*
     759              :          * Stats reports are forced either when it's been too long since stats
     760              :          * have been reported or in processes that force stats reporting to
     761              :          * happen at specific points (including shutdown). In the former case
     762              :          * the transaction stop time might be quite old, in the latter it
     763              :          * would never get cleared.
     764              :          */
     765        24305 :         now = GetCurrentTimestamp();
     766              :     }
     767              :     else
     768              :     {
     769       362441 :         now = GetCurrentTransactionStopTimestamp();
     770              : 
     771       693418 :         if (pending_since > 0 &&
     772       330977 :             TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
     773              :         {
     774              :             /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
     775            0 :             force = true;
     776              :         }
     777       362441 :         else if (last_flush > 0 &&
     778       347695 :                  !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
     779              :         {
     780              :             /* don't flush too frequently */
     781       345692 :             if (pending_since == 0)
     782        16533 :                 pending_since = now;
     783              : 
     784       345692 :             return PGSTAT_IDLE_INTERVAL;
     785              :         }
     786              :     }
     787              : 
     788        41054 :     pgstat_update_dbstats(now);
     789              : 
     790              :     /* don't wait for lock acquisition when !force */
     791        41054 :     nowait = !force;
     792              : 
     793        41054 :     partial_flush = false;
     794              : 
     795              :     /* flush of variable-numbered stats tracked in pending entries list */
     796        41054 :     partial_flush |= pgstat_flush_pending_entries(nowait);
     797              : 
     798              :     /* flush of other stats kinds */
     799        41054 :     if (pgstat_report_fixed)
     800              :     {
     801      1314456 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     802              :         {
     803      1274624 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     804              : 
     805      1274624 :             if (!kind_info)
     806       756638 :                 continue;
     807       517986 :             if (!kind_info->flush_static_cb)
     808       318826 :                 continue;
     809              : 
     810       199160 :             partial_flush |= kind_info->flush_static_cb(nowait);
     811              :         }
     812              :     }
     813              : 
     814        41054 :     last_flush = now;
     815              : 
     816              :     /*
     817              :      * If some of the pending stats could not be flushed due to lock
     818              :      * contention, let the caller know when to retry.
     819              :      */
     820        41054 :     if (partial_flush)
     821              :     {
     822              :         /* force should have prevented us from getting here */
     823              :         Assert(!force);
     824              : 
     825              :         /* remember since when stats have been pending */
     826            8 :         if (pending_since == 0)
     827            7 :             pending_since = now;
     828              : 
     829            8 :         return PGSTAT_IDLE_INTERVAL;
     830              :     }
     831              : 
     832        41046 :     pending_since = 0;
     833        41046 :     pgstat_report_fixed = false;
     834              : 
     835        41046 :     return 0;
     836              : }
     837              : 
     838              : /*
     839              :  * Force locally pending stats to be flushed during the next
     840              :  * pgstat_report_stat() call. This is useful for writing tests.
     841              :  */
     842              : void
     843          339 : pgstat_force_next_flush(void)
     844              : {
     845          339 :     pgStatForceNextFlush = true;
     846          339 : }
     847              : 
     848              : /*
     849              :  * Only for use by pgstat_reset_counters()
     850              :  */
     851              : static bool
     852        16199 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
     853              : {
     854        16199 :     return entry->key.dboid == MyDatabaseId;
     855              : }
     856              : 
     857              : /*
     858              :  * Reset counters for our database.
     859              :  *
     860              :  * Permission checking for this function is managed through the normal
     861              :  * GRANT system.
     862              :  */
     863              : void
     864           15 : pgstat_reset_counters(void)
     865              : {
     866           15 :     TimestampTz ts = GetCurrentTimestamp();
     867              : 
     868           15 :     pgstat_reset_matching_entries(match_db_entries,
     869              :                                   ObjectIdGetDatum(MyDatabaseId),
     870              :                                   ts);
     871           15 : }
     872              : 
     873              : /*
     874              :  * Reset a single variable-numbered entry.
     875              :  *
     876              :  * If the stats kind is within a database, also reset the database's
     877              :  * stat_reset_timestamp.
     878              :  *
     879              :  * Permission checking for this function is managed through the normal
     880              :  * GRANT system.
     881              :  */
     882              : void
     883           43 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
     884              : {
     885           43 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     886           43 :     TimestampTz ts = GetCurrentTimestamp();
     887              : 
     888              :     /* not needed atm, and doesn't make sense with the current signature */
     889              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     890              : 
     891              :     /* reset the "single counter" */
     892           43 :     pgstat_reset_entry(kind, dboid, objid, ts);
     893              : 
     894           43 :     if (!kind_info->accessed_across_databases)
     895           26 :         pgstat_reset_database_timestamp(dboid, ts);
     896           43 : }
     897              : 
     898              : /*
     899              :  * Reset stats for all entries of a kind.
     900              :  *
     901              :  * Permission checking for this function is managed through the normal
     902              :  * GRANT system.
     903              :  */
     904              : void
     905           46 : pgstat_reset_of_kind(PgStat_Kind kind)
     906              : {
     907           46 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     908           46 :     TimestampTz ts = GetCurrentTimestamp();
     909              : 
     910           46 :     if (kind_info->fixed_amount)
     911           42 :         kind_info->reset_all_cb(ts);
     912              :     else
     913            4 :         pgstat_reset_entries_of_kind(kind, ts);
     914           46 : }
     915              : 
     916              : 
     917              : /* ------------------------------------------------------------
     918              :  * Fetching of stats
     919              :  * ------------------------------------------------------------
     920              :  */
     921              : 
     922              : /*
     923              :  * Discard any data collected in the current transaction.  Any subsequent
     924              :  * request will cause new snapshots to be read.
     925              :  *
     926              :  * This is also invoked during transaction commit or abort to discard
     927              :  * the no-longer-wanted snapshot.  Updates of stats_fetch_consistency can
     928              :  * cause this routine to be called.
     929              :  */
     930              : void
     931       654349 : pgstat_clear_snapshot(void)
     932              : {
     933              :     pgstat_assert_is_up();
     934              : 
     935       654349 :     memset(&pgStatLocal.snapshot.fixed_valid, 0,
     936              :            sizeof(pgStatLocal.snapshot.fixed_valid));
     937       654349 :     memset(&pgStatLocal.snapshot.custom_valid, 0,
     938              :            sizeof(pgStatLocal.snapshot.custom_valid));
     939       654349 :     pgStatLocal.snapshot.stats = NULL;
     940       654349 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
     941              : 
     942              :     /* Release memory, if any was allocated */
     943       654349 :     if (pgStatLocal.snapshot.context)
     944              :     {
     945          810 :         MemoryContextDelete(pgStatLocal.snapshot.context);
     946              : 
     947              :         /* Reset variables */
     948          810 :         pgStatLocal.snapshot.context = NULL;
     949              :     }
     950              : 
     951              :     /*
     952              :      * Historically the backend_status.c facilities lived in this file, and
     953              :      * were reset with the same function. For now keep it that way, and
     954              :      * forward the reset request.
     955              :      */
     956       654349 :     pgstat_clear_backend_activity_snapshot();
     957              : 
     958              :     /* Reset this flag, as it may be possible that a cleanup was forced. */
     959       654349 :     force_stats_snapshot_clear = false;
     960       654349 : }
     961              : 
     962              : void *
     963       298582 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *may_free)
     964              : {
     965       298582 :     PgStat_HashKey key = {0};
     966              :     PgStat_EntryRef *entry_ref;
     967              :     void       *stats_data;
     968       298582 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     969              : 
     970              :     /* should be called from backends */
     971              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     972              :     Assert(!kind_info->fixed_amount);
     973              : 
     974              :     /*
     975              :      * Initialize *may_free to false.  We'll change it to true later if we end
     976              :      * up allocating the result in the caller's context and not caching it.
     977              :      */
     978       298582 :     if (may_free)
     979       286437 :         *may_free = false;
     980              : 
     981       298582 :     pgstat_prep_snapshot();
     982              : 
     983       298582 :     key.kind = kind;
     984       298582 :     key.dboid = dboid;
     985       298582 :     key.objid = objid;
     986              : 
     987              :     /* if we need to build a full snapshot, do so */
     988       298582 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
     989          299 :         pgstat_build_snapshot();
     990              : 
     991              :     /* if caching is desired, look up in cache */
     992       298582 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
     993              :     {
     994         7277 :         PgStat_SnapshotEntry *entry = NULL;
     995              : 
     996         7277 :         entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
     997              : 
     998         7277 :         if (entry)
     999          612 :             return entry->data;
    1000              : 
    1001              :         /*
    1002              :          * If we built a full snapshot and the key is not in
    1003              :          * pgStatLocal.snapshot.stats, there are no matching stats.
    1004              :          */
    1005         6665 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1006           16 :             return NULL;
    1007              :     }
    1008              : 
    1009       297954 :     pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
    1010              : 
    1011       297954 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1012              : 
    1013       297954 :     if (entry_ref == NULL || entry_ref->shared_entry->dropped)
    1014              :     {
    1015              :         /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
    1016         7394 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
    1017              :         {
    1018         1353 :             PgStat_SnapshotEntry *entry = NULL;
    1019              :             bool        found;
    1020              : 
    1021         1353 :             entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
    1022              :             Assert(!found);
    1023         1353 :             entry->data = NULL;
    1024              :         }
    1025         7394 :         return NULL;
    1026              :     }
    1027              : 
    1028              :     /*
    1029              :      * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
    1030              :      * otherwise we could quickly end up with a fair bit of memory used due to
    1031              :      * repeated accesses.
    1032              :      */
    1033       290560 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1034              :     {
    1035       285264 :         stats_data = palloc(kind_info->shared_data_len);
    1036              : 
    1037              :         /*
    1038              :          * Since we allocated the result in the caller's context and aren't
    1039              :          * caching it, the caller can safely pfree() it.
    1040              :          */
    1041       285264 :         if (may_free)
    1042       281711 :             *may_free = true;
    1043              :     }
    1044              :     else
    1045         5296 :         stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1046         5296 :                                         kind_info->shared_data_len);
    1047              : 
    1048       290560 :     (void) pgstat_lock_entry_shared(entry_ref, false);
    1049       581120 :     memcpy(stats_data,
    1050       290560 :            pgstat_get_entry_data(kind, entry_ref->shared_stats),
    1051       290560 :            kind_info->shared_data_len);
    1052       290560 :     pgstat_unlock_entry(entry_ref);
    1053              : 
    1054       290560 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
    1055              :     {
    1056         5296 :         PgStat_SnapshotEntry *entry = NULL;
    1057              :         bool        found;
    1058              : 
    1059         5296 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
    1060         5296 :         entry->data = stats_data;
    1061              :     }
    1062              : 
    1063       290560 :     return stats_data;
    1064              : }
    1065              : 
    1066              : /*
    1067              :  * If a stats snapshot has been taken, return the timestamp at which that was
    1068              :  * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
    1069              :  * false.
    1070              :  */
    1071              : TimestampTz
    1072           40 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
    1073              : {
    1074           40 :     if (force_stats_snapshot_clear)
    1075           12 :         pgstat_clear_snapshot();
    1076              : 
    1077           40 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1078              :     {
    1079           16 :         *have_snapshot = true;
    1080           16 :         return pgStatLocal.snapshot.snapshot_timestamp;
    1081              :     }
    1082              : 
    1083           24 :     *have_snapshot = false;
    1084              : 
    1085           24 :     return 0;
    1086              : }
    1087              : 
    1088              : bool
    1089           95 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1090              : {
    1091              :     /* fixed-numbered stats always exist */
    1092           95 :     if (pgstat_get_kind_info(kind)->fixed_amount)
    1093            8 :         return true;
    1094              : 
    1095           87 :     return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
    1096              : }
    1097              : 
    1098              : /*
    1099              :  * Ensure snapshot for fixed-numbered 'kind' exists.
    1100              :  *
    1101              :  * Typically used by the pgstat_fetch_* functions for a kind of stats, before
    1102              :  * massaging the data into the desired format.
    1103              :  */
    1104              : void
    1105          283 : pgstat_snapshot_fixed(PgStat_Kind kind)
    1106              : {
    1107              :     Assert(pgstat_is_kind_valid(kind));
    1108              :     Assert(pgstat_get_kind_info(kind)->fixed_amount);
    1109              : 
    1110          283 :     if (force_stats_snapshot_clear)
    1111            0 :         pgstat_clear_snapshot();
    1112              : 
    1113          283 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1114           12 :         pgstat_build_snapshot();
    1115              :     else
    1116          271 :         pgstat_build_snapshot_fixed(kind);
    1117              : 
    1118          283 :     if (pgstat_is_kind_builtin(kind))
    1119              :         Assert(pgStatLocal.snapshot.fixed_valid[kind]);
    1120            5 :     else if (pgstat_is_kind_custom(kind))
    1121              :         Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
    1122          283 : }
    1123              : 
    1124              : static void
    1125        24528 : pgstat_init_snapshot_fixed(void)
    1126              : {
    1127              :     /*
    1128              :      * Initialize fixed-numbered statistics data in snapshots, only for custom
    1129              :      * stats kinds.
    1130              :      */
    1131       245280 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1132              :     {
    1133       220752 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1134              : 
    1135       220752 :         if (!kind_info || !kind_info->fixed_amount)
    1136       220702 :             continue;
    1137              : 
    1138           50 :         pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
    1139           50 :             MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
    1140              :     }
    1141        24528 : }
    1142              : 
    1143              : static void
    1144       298612 : pgstat_prep_snapshot(void)
    1145              : {
    1146       298612 :     if (force_stats_snapshot_clear)
    1147           12 :         pgstat_clear_snapshot();
    1148              : 
    1149       298612 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
    1150         7307 :         pgStatLocal.snapshot.stats != NULL)
    1151       297802 :         return;
    1152              : 
    1153          810 :     if (!pgStatLocal.snapshot.context)
    1154          810 :         pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
    1155              :                                                              "PgStat Snapshot",
    1156              :                                                              ALLOCSET_SMALL_SIZES);
    1157              : 
    1158          810 :     pgStatLocal.snapshot.stats =
    1159          810 :         pgstat_snapshot_create(pgStatLocal.snapshot.context,
    1160              :                                PGSTAT_SNAPSHOT_HASH_SIZE,
    1161              :                                NULL);
    1162              : }
    1163              : 
    1164              : static void
    1165          311 : pgstat_build_snapshot(void)
    1166              : {
    1167              :     dshash_seq_status hstat;
    1168              :     PgStatShared_HashEntry *p;
    1169              : 
    1170              :     /* should only be called when we need a snapshot */
    1171              :     Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
    1172              : 
    1173              :     /* snapshot already built */
    1174          311 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1175          281 :         return;
    1176              : 
    1177           30 :     pgstat_prep_snapshot();
    1178              : 
    1179              :     Assert(pgStatLocal.snapshot.stats->members == 0);
    1180              : 
    1181           30 :     pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
    1182              : 
    1183              :     /*
    1184              :      * Snapshot all variable stats.
    1185              :      */
    1186           30 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1187        36633 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1188              :     {
    1189        36603 :         PgStat_Kind kind = p->key.kind;
    1190        36603 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1191              :         bool        found;
    1192              :         PgStat_SnapshotEntry *entry;
    1193              :         PgStatShared_Common *stats_data;
    1194              : 
    1195              :         /*
    1196              :          * Check if the stats object should be included in the snapshot.
    1197              :          * Unless the stats kind can be accessed from all databases (e.g.,
    1198              :          * database stats themselves), we only include stats for the current
    1199              :          * database or objects not associated with a database (e.g. shared
    1200              :          * relations).
    1201              :          */
    1202        36603 :         if (p->key.dboid != MyDatabaseId &&
    1203         9857 :             p->key.dboid != InvalidOid &&
    1204         8162 :             !kind_info->accessed_across_databases)
    1205         8174 :             continue;
    1206              : 
    1207        28531 :         if (p->dropped)
    1208          102 :             continue;
    1209              : 
    1210              :         Assert(pg_atomic_read_u32(&p->refcount) > 0);
    1211              : 
    1212        28429 :         stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
    1213              :         Assert(stats_data);
    1214              : 
    1215        28429 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
    1216              :         Assert(!found);
    1217              : 
    1218        28429 :         entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1219              :                                          pgstat_get_entry_len(kind));
    1220              : 
    1221              :         /*
    1222              :          * Acquire the LWLock directly instead of using
    1223              :          * pg_stat_lock_entry_shared() which requires a reference.
    1224              :          */
    1225        28429 :         LWLockAcquire(&stats_data->lock, LW_SHARED);
    1226        28429 :         memcpy(entry->data,
    1227        28429 :                pgstat_get_entry_data(kind, stats_data),
    1228              :                pgstat_get_entry_len(kind));
    1229        28429 :         LWLockRelease(&stats_data->lock);
    1230              :     }
    1231           30 :     dshash_seq_term(&hstat);
    1232              : 
    1233              :     /*
    1234              :      * Build snapshot of all fixed-numbered stats.
    1235              :      */
    1236          990 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1237              :     {
    1238          960 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1239              : 
    1240          960 :         if (!kind_info)
    1241          570 :             continue;
    1242          390 :         if (!kind_info->fixed_amount)
    1243              :         {
    1244              :             Assert(kind_info->snapshot_cb == NULL);
    1245          180 :             continue;
    1246              :         }
    1247              : 
    1248          210 :         pgstat_build_snapshot_fixed(kind);
    1249              :     }
    1250              : 
    1251           30 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
    1252              : }
    1253              : 
    1254              : static void
    1255         5879 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
    1256              : {
    1257         5879 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1258              :     int         idx;
    1259              :     bool       *valid;
    1260              : 
    1261              :     /* Position in fixed_valid or custom_valid */
    1262         5879 :     if (pgstat_is_kind_builtin(kind))
    1263              :     {
    1264         5873 :         idx = kind;
    1265         5873 :         valid = pgStatLocal.snapshot.fixed_valid;
    1266              :     }
    1267              :     else
    1268              :     {
    1269            6 :         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1270            6 :         valid = pgStatLocal.snapshot.custom_valid;
    1271              :     }
    1272              : 
    1273              :     Assert(kind_info->fixed_amount);
    1274              :     Assert(kind_info->snapshot_cb != NULL);
    1275              : 
    1276         5879 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1277              :     {
    1278              :         /* rebuild every time */
    1279         5413 :         valid[idx] = false;
    1280              :     }
    1281          466 :     else if (valid[idx])
    1282              :     {
    1283              :         /* in snapshot mode we shouldn't get called again */
    1284              :         Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
    1285            6 :         return;
    1286              :     }
    1287              : 
    1288              :     Assert(!valid[idx]);
    1289              : 
    1290         5873 :     kind_info->snapshot_cb();
    1291              : 
    1292              :     Assert(!valid[idx]);
    1293         5873 :     valid[idx] = true;
    1294              : }
    1295              : 
    1296              : 
    1297              : /* ------------------------------------------------------------
    1298              :  * Backend-local pending stats infrastructure
    1299              :  * ------------------------------------------------------------
    1300              :  */
    1301              : 
    1302              : /*
    1303              :  * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
    1304              :  * stats if not already done.
    1305              :  *
    1306              :  * If created_entry is non-NULL, it'll be set to true if the entry is newly
    1307              :  * created, false otherwise.
    1308              :  */
    1309              : PgStat_EntryRef *
    1310      2401673 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
    1311              : {
    1312              :     PgStat_EntryRef *entry_ref;
    1313              : 
    1314              :     /* need to be able to flush out */
    1315              :     Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
    1316              : 
    1317      2401673 :     if (unlikely(!pgStatPendingContext))
    1318              :     {
    1319        20058 :         pgStatPendingContext =
    1320        20058 :             AllocSetContextCreate(TopMemoryContext,
    1321              :                                   "PgStat Pending",
    1322              :                                   ALLOCSET_SMALL_SIZES);
    1323              :     }
    1324              : 
    1325      2401673 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
    1326              :                                      true, created_entry);
    1327              : 
    1328      2401673 :     if (entry_ref->pending == NULL)
    1329              :     {
    1330      1226112 :         size_t      entrysize = pgstat_get_kind_info(kind)->pending_size;
    1331              : 
    1332              :         Assert(entrysize != (size_t) -1);
    1333              : 
    1334      1226112 :         entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
    1335      1226112 :         dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
    1336              :     }
    1337              : 
    1338      2401673 :     return entry_ref;
    1339              : }
    1340              : 
    1341              : /*
    1342              :  * Return an existing stats entry, or NULL.
    1343              :  *
    1344              :  * This should only be used for helper function for pgstatfuncs.c - outside of
    1345              :  * that it shouldn't be needed.
    1346              :  */
    1347              : PgStat_EntryRef *
    1348           56 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1349              : {
    1350              :     PgStat_EntryRef *entry_ref;
    1351              : 
    1352           56 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1353              : 
    1354           56 :     if (entry_ref == NULL || entry_ref->pending == NULL)
    1355           20 :         return NULL;
    1356              : 
    1357           36 :     return entry_ref;
    1358              : }
    1359              : 
    1360              : void
    1361      1226112 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
    1362              : {
    1363      1226112 :     PgStat_Kind kind = entry_ref->shared_entry->key.kind;
    1364      1226112 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1365      1226112 :     void       *pending_data = entry_ref->pending;
    1366              : 
    1367              :     Assert(pending_data != NULL);
    1368              :     /* !fixed_amount stats should be handled explicitly */
    1369              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
    1370              : 
    1371      1226112 :     if (kind_info->delete_pending_cb)
    1372      1159635 :         kind_info->delete_pending_cb(entry_ref);
    1373              : 
    1374      1226112 :     pfree(pending_data);
    1375      1226112 :     entry_ref->pending = NULL;
    1376              : 
    1377      1226112 :     dlist_delete(&entry_ref->pending_node);
    1378      1226112 : }
    1379              : 
    1380              : /*
    1381              :  * Flush out pending variable-numbered stats.
    1382              :  */
    1383              : static bool
    1384        41054 : pgstat_flush_pending_entries(bool nowait)
    1385              : {
    1386        41054 :     bool        have_pending = false;
    1387        41054 :     dlist_node *cur = NULL;
    1388              : 
    1389              :     /*
    1390              :      * Need to be a bit careful iterating over the list of pending entries.
    1391              :      * Processing a pending entry may queue further pending entries to the end
    1392              :      * of the list that we want to process, so a simple iteration won't do.
    1393              :      * Further complicating matters is that we want to delete the current
    1394              :      * entry in each iteration from the list if we flushed successfully.
    1395              :      *
    1396              :      * So we just keep track of the next pointer in each loop iteration.
    1397              :      */
    1398        41054 :     if (!dlist_is_empty(&pgStatPending))
    1399        38255 :         cur = dlist_head_node(&pgStatPending);
    1400              : 
    1401      1221887 :     while (cur)
    1402              :     {
    1403      1180833 :         PgStat_EntryRef *entry_ref =
    1404              :             dlist_container(PgStat_EntryRef, pending_node, cur);
    1405      1180833 :         PgStat_HashKey key = entry_ref->shared_entry->key;
    1406      1180833 :         PgStat_Kind kind = key.kind;
    1407      1180833 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1408              :         bool        did_flush;
    1409              :         dlist_node *next;
    1410              : 
    1411              :         Assert(!kind_info->fixed_amount);
    1412              :         Assert(kind_info->flush_pending_cb != NULL);
    1413              : 
    1414              :         /* flush the stats, if possible */
    1415      1180833 :         did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
    1416              : 
    1417              :         Assert(did_flush || nowait);
    1418              : 
    1419              :         /* determine next entry, before deleting the pending entry */
    1420      1180833 :         if (dlist_has_next(&pgStatPending, cur))
    1421      1142578 :             next = dlist_next_node(&pgStatPending, cur);
    1422              :         else
    1423        38255 :             next = NULL;
    1424              : 
    1425              :         /* if successfully flushed, remove entry */
    1426      1180833 :         if (did_flush)
    1427      1180821 :             pgstat_delete_pending_entry(entry_ref);
    1428              :         else
    1429           12 :             have_pending = true;
    1430              : 
    1431      1180833 :         cur = next;
    1432              :     }
    1433              : 
    1434              :     Assert(dlist_is_empty(&pgStatPending) == !have_pending);
    1435              : 
    1436        41054 :     return have_pending;
    1437              : }
    1438              : 
    1439              : 
    1440              : /* ------------------------------------------------------------
    1441              :  * Helper / infrastructure functions
    1442              :  * ------------------------------------------------------------
    1443              :  */
    1444              : 
    1445              : PgStat_Kind
    1446           99 : pgstat_get_kind_from_str(char *kind_str)
    1447              : {
    1448          307 :     for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
    1449              :     {
    1450          303 :         if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
    1451           95 :             return kind;
    1452              :     }
    1453              : 
    1454              :     /* Check the custom set of cumulative stats */
    1455            4 :     if (pgstat_kind_custom_infos)
    1456              :     {
    1457            0 :         for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1458              :         {
    1459            0 :             uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1460              : 
    1461            0 :             if (pgstat_kind_custom_infos[idx] &&
    1462            0 :                 pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
    1463            0 :                 return kind;
    1464              :         }
    1465              :     }
    1466              : 
    1467            4 :     ereport(ERROR,
    1468              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1469              :              errmsg("invalid statistics kind: \"%s\"", kind_str)));
    1470              :     return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
    1471              : }
    1472              : 
    1473              : static inline bool
    1474       497724 : pgstat_is_kind_valid(PgStat_Kind kind)
    1475              : {
    1476       497724 :     return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
    1477              : }
    1478              : 
    1479              : const PgStat_KindInfo *
    1480      8760856 : pgstat_get_kind_info(PgStat_Kind kind)
    1481              : {
    1482      8760856 :     if (pgstat_is_kind_builtin(kind))
    1483      7228191 :         return &pgstat_kind_builtin_infos[kind];
    1484              : 
    1485      1532665 :     if (pgstat_is_kind_custom(kind))
    1486              :     {
    1487       848135 :         uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1488              : 
    1489       848135 :         if (pgstat_kind_custom_infos == NULL ||
    1490         1847 :             pgstat_kind_custom_infos[idx] == NULL)
    1491       847667 :             return NULL;
    1492          468 :         return pgstat_kind_custom_infos[idx];
    1493              :     }
    1494              : 
    1495       684530 :     return NULL;
    1496              : }
    1497              : 
    1498              : /*
    1499              :  * Register a new stats kind.
    1500              :  *
    1501              :  * PgStat_Kinds must be globally unique across all extensions. Refer
    1502              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
    1503              :  * unique ID for your extension, to avoid conflicts with other extension
    1504              :  * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
    1505              :  * needlessly reserving a new ID.
    1506              :  */
    1507              : void
    1508            6 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
    1509              : {
    1510            6 :     uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1511              : 
    1512            6 :     if (kind_info->name == NULL || strlen(kind_info->name) == 0)
    1513            0 :         ereport(ERROR,
    1514              :                 (errmsg("custom cumulative statistics name is invalid"),
    1515              :                  errhint("Provide a non-empty name for the custom cumulative statistics.")));
    1516              : 
    1517            6 :     if (!pgstat_is_kind_custom(kind))
    1518            0 :         ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
    1519              :                         errhint("Provide a custom cumulative statistics ID between %u and %u.",
    1520              :                                 PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
    1521              : 
    1522            6 :     if (!process_shared_preload_libraries_in_progress)
    1523            0 :         ereport(ERROR,
    1524              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1525              :                  errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
    1526              : 
    1527              :     /*
    1528              :      * Check some data for fixed-numbered stats.
    1529              :      */
    1530            6 :     if (kind_info->fixed_amount)
    1531              :     {
    1532            3 :         if (kind_info->shared_size == 0)
    1533            0 :             ereport(ERROR,
    1534              :                     (errmsg("custom cumulative statistics property is invalid"),
    1535              :                      errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
    1536            3 :         if (kind_info->track_entry_count)
    1537            0 :             ereport(ERROR,
    1538              :                     (errmsg("custom cumulative statistics property is invalid"),
    1539              :                      errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
    1540              :     }
    1541              : 
    1542              :     /*
    1543              :      * If pgstat_kind_custom_infos is not available yet, allocate it.
    1544              :      */
    1545            6 :     if (pgstat_kind_custom_infos == NULL)
    1546              :     {
    1547            3 :         pgstat_kind_custom_infos = (const PgStat_KindInfo **)
    1548            3 :             MemoryContextAllocZero(TopMemoryContext,
    1549              :                                    sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
    1550              :     }
    1551              : 
    1552            6 :     if (pgstat_kind_custom_infos[idx] != NULL &&
    1553            0 :         pgstat_kind_custom_infos[idx]->name != NULL)
    1554            0 :         ereport(ERROR,
    1555              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1556              :                  errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
    1557              :                            pgstat_kind_custom_infos[idx]->name)));
    1558              : 
    1559              :     /* check for existing custom stats with the same name */
    1560           60 :     for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
    1561              :     {
    1562           54 :         uint32      existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
    1563              : 
    1564           54 :         if (pgstat_kind_custom_infos[existing_idx] == NULL)
    1565           51 :             continue;
    1566            3 :         if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
    1567            0 :             ereport(ERROR,
    1568              :                     (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1569              :                      errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
    1570              :     }
    1571              : 
    1572              :     /* Register it */
    1573            6 :     pgstat_kind_custom_infos[idx] = kind_info;
    1574            6 :     ereport(LOG,
    1575              :             (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
    1576              :                     kind_info->name, kind)));
    1577            6 : }
    1578              : 
    1579              : /*
    1580              :  * Stats should only be reported after pgstat_initialize() and before
    1581              :  * pgstat_shutdown(). This check is put in a few central places to catch
    1582              :  * violations of this rule more easily.
    1583              :  */
    1584              : #ifdef USE_ASSERT_CHECKING
    1585              : void
    1586              : pgstat_assert_is_up(void)
    1587              : {
    1588              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
    1589              : }
    1590              : #endif
    1591              : 
    1592              : 
    1593              : /* ------------------------------------------------------------
    1594              :  * reading and writing of on-disk stats file
    1595              :  * ------------------------------------------------------------
    1596              :  */
    1597              : 
    1598              : /* helper for pgstat_write_statsfile() */
    1599              : void
    1600       497050 : pgstat_write_chunk(FILE *fpout, void *ptr, size_t len)
    1601              : {
    1602              :     int         rc;
    1603              : 
    1604       497050 :     rc = fwrite(ptr, len, 1, fpout);
    1605              : 
    1606              :     /* We check for errors with ferror() when done writing the stats. */
    1607              :     (void) rc;
    1608       497050 : }
    1609              : 
    1610              : /*
    1611              :  * This function is called in the last process that is accessing the shared
    1612              :  * stats so locking is not required.
    1613              :  */
    1614              : static void
    1615          771 : pgstat_write_statsfile(void)
    1616              : {
    1617              :     FILE       *fpout;
    1618              :     int32       format_id;
    1619          771 :     const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
    1620          771 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1621              :     dshash_seq_status hstat;
    1622              :     PgStatShared_HashEntry *ps;
    1623              : 
    1624              :     pgstat_assert_is_up();
    1625              : 
    1626              :     /* should be called only by the checkpointer or single user mode */
    1627              :     Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
    1628              : 
    1629              :     /* we're shutting down, so it's ok to just override this */
    1630          771 :     pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
    1631              : 
    1632          771 :     elog(DEBUG2, "writing stats file \"%s\"", statfile);
    1633              : 
    1634              :     /*
    1635              :      * Open the statistics temp file to write out the current values.
    1636              :      */
    1637          771 :     fpout = AllocateFile(tmpfile, PG_BINARY_W);
    1638          771 :     if (fpout == NULL)
    1639              :     {
    1640            0 :         ereport(LOG,
    1641              :                 (errcode_for_file_access(),
    1642              :                  errmsg("could not open temporary statistics file \"%s\": %m",
    1643              :                         tmpfile)));
    1644            0 :         return;
    1645              :     }
    1646              : 
    1647              :     /*
    1648              :      * Write the file header --- currently just a format ID.
    1649              :      */
    1650          771 :     format_id = PGSTAT_FILE_FORMAT_ID;
    1651          771 :     pgstat_write_chunk_s(fpout, &format_id);
    1652              : 
    1653              :     /* Write various stats structs for fixed number of objects */
    1654        25443 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1655              :     {
    1656              :         char       *ptr;
    1657        24672 :         const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
    1658              : 
    1659        24672 :         if (!info || !info->fixed_amount)
    1660        19274 :             continue;
    1661              : 
    1662         5398 :         if (pgstat_is_kind_builtin(kind))
    1663              :             Assert(info->snapshot_ctl_off != 0);
    1664              : 
    1665              :         /* skip if no need to write to file */
    1666         5398 :         if (!info->write_to_file)
    1667            0 :             continue;
    1668              : 
    1669         5398 :         pgstat_build_snapshot_fixed(kind);
    1670         5398 :         if (pgstat_is_kind_builtin(kind))
    1671         5397 :             ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
    1672              :         else
    1673            1 :             ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
    1674              : 
    1675         5398 :         fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
    1676         5398 :         pgstat_write_chunk_s(fpout, &kind);
    1677         5398 :         pgstat_write_chunk(fpout, ptr, info->shared_data_len);
    1678              :     }
    1679              : 
    1680              :     /*
    1681              :      * Walk through the stats entries
    1682              :      */
    1683          771 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1684       243583 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
    1685              :     {
    1686              :         PgStatShared_Common *shstats;
    1687       242812 :         const PgStat_KindInfo *kind_info = NULL;
    1688              : 
    1689       242812 :         CHECK_FOR_INTERRUPTS();
    1690              : 
    1691              :         /*
    1692              :          * We should not see any "dropped" entries when writing the stats
    1693              :          * file, as all backends and auxiliary processes should have cleaned
    1694              :          * up their references before they terminated.
    1695              :          *
    1696              :          * However, since we are already shutting down, it is not worth
    1697              :          * crashing the server over any potential cleanup issues, so we simply
    1698              :          * skip such entries if encountered.
    1699              :          */
    1700              :         Assert(!ps->dropped);
    1701       242812 :         if (ps->dropped)
    1702            0 :             continue;
    1703              : 
    1704              :         /*
    1705              :          * This discards data related to custom stats kinds that are unknown
    1706              :          * to this process.
    1707              :          */
    1708       242812 :         if (!pgstat_is_kind_valid(ps->key.kind))
    1709              :         {
    1710            0 :             elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
    1711              :                  ps->key.kind, ps->key.dboid,
    1712              :                  ps->key.objid);
    1713            0 :             continue;
    1714              :         }
    1715              : 
    1716       242812 :         shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
    1717              : 
    1718       242812 :         kind_info = pgstat_get_kind_info(ps->key.kind);
    1719              : 
    1720              :         /* if not dropped the valid-entry refcount should exist */
    1721              :         Assert(pg_atomic_read_u32(&ps->refcount) > 0);
    1722              : 
    1723              :         /* skip if no need to write to file */
    1724       242812 :         if (!kind_info->write_to_file)
    1725          131 :             continue;
    1726              : 
    1727       242681 :         if (!kind_info->to_serialized_name)
    1728              :         {
    1729              :             /* normal stats entry, identified by PgStat_HashKey */
    1730       242570 :             fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
    1731       242570 :             pgstat_write_chunk_s(fpout, &ps->key);
    1732              :         }
    1733              :         else
    1734              :         {
    1735              :             /* stats entry identified by name on disk (e.g. slots) */
    1736              :             NameData    name;
    1737              : 
    1738          111 :             kind_info->to_serialized_name(&ps->key, shstats, &name);
    1739              : 
    1740          111 :             fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
    1741          111 :             pgstat_write_chunk_s(fpout, &ps->key.kind);
    1742          111 :             pgstat_write_chunk_s(fpout, &name);
    1743              :         }
    1744              : 
    1745              :         /* Write except the header part of the entry */
    1746       242681 :         pgstat_write_chunk(fpout,
    1747              :                            pgstat_get_entry_data(ps->key.kind, shstats),
    1748              :                            pgstat_get_entry_len(ps->key.kind));
    1749              : 
    1750              :         /* Write more data for the entry, if required */
    1751       242681 :         if (kind_info->to_serialized_data)
    1752            2 :             kind_info->to_serialized_data(&ps->key, shstats, fpout);
    1753              :     }
    1754          771 :     dshash_seq_term(&hstat);
    1755              : 
    1756              :     /*
    1757              :      * No more output to be done. Close the temp file and replace the old
    1758              :      * pgstat.stat with it.  The ferror() check replaces testing for error
    1759              :      * after each individual fputc or fwrite (in pgstat_write_chunk()) above.
    1760              :      */
    1761          771 :     fputc(PGSTAT_FILE_ENTRY_END, fpout);
    1762              : 
    1763          771 :     if (ferror(fpout))
    1764              :     {
    1765            0 :         ereport(LOG,
    1766              :                 (errcode_for_file_access(),
    1767              :                  errmsg("could not write temporary statistics file \"%s\": %m",
    1768              :                         tmpfile)));
    1769            0 :         FreeFile(fpout);
    1770            0 :         unlink(tmpfile);
    1771              :     }
    1772          771 :     else if (FreeFile(fpout) < 0)
    1773              :     {
    1774            0 :         ereport(LOG,
    1775              :                 (errcode_for_file_access(),
    1776              :                  errmsg("could not close temporary statistics file \"%s\": %m",
    1777              :                         tmpfile)));
    1778            0 :         unlink(tmpfile);
    1779              :     }
    1780          771 :     else if (durable_rename(tmpfile, statfile, LOG) < 0)
    1781              :     {
    1782              :         /* durable_rename already emitted log message */
    1783            0 :         unlink(tmpfile);
    1784              :     }
    1785              : 
    1786              :     /* Finish callbacks, if required */
    1787        25443 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1788              :     {
    1789        24672 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1790              : 
    1791        24672 :         if (kind_info && kind_info->finish)
    1792            1 :             kind_info->finish(STATS_WRITE);
    1793              :     }
    1794              : }
    1795              : 
    1796              : /* helper for pgstat_read_statsfile() */
    1797              : bool
    1798       510747 : pgstat_read_chunk(FILE *fpin, void *ptr, size_t len)
    1799              : {
    1800       510747 :     return fread(ptr, 1, len, fpin) == len;
    1801              : }
    1802              : 
    1803              : /*
    1804              :  * Reads in existing statistics file into memory.
    1805              :  *
    1806              :  * This function is called in the only process that is accessing the shared
    1807              :  * stats so locking is not required.
    1808              :  */
    1809              : static void
    1810          894 : pgstat_read_statsfile(void)
    1811              : {
    1812              :     FILE       *fpin;
    1813              :     int32       format_id;
    1814              :     bool        found;
    1815          894 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1816          894 :     PgStat_ShmemControl *shmem = pgStatLocal.shmem;
    1817              : 
    1818              :     /* shouldn't be called from postmaster */
    1819              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
    1820              : 
    1821          894 :     elog(DEBUG2, "reading stats file \"%s\"", statfile);
    1822              : 
    1823              :     /*
    1824              :      * Try to open the stats file. If it doesn't exist, the backends simply
    1825              :      * returns zero for anything and statistics simply starts from scratch
    1826              :      * with empty counters.
    1827              :      *
    1828              :      * ENOENT is a possibility if stats collection was previously disabled or
    1829              :      * has not yet written the stats file for the first time.  Any other
    1830              :      * failure condition is suspicious.
    1831              :      */
    1832          894 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    1833              :     {
    1834           58 :         if (errno != ENOENT)
    1835            0 :             ereport(LOG,
    1836              :                     (errcode_for_file_access(),
    1837              :                      errmsg("could not open statistics file \"%s\": %m",
    1838              :                             statfile)));
    1839           58 :         pgstat_reset_after_failure();
    1840           58 :         return;
    1841              :     }
    1842              : 
    1843              :     /*
    1844              :      * Verify it's of the expected format.
    1845              :      */
    1846          836 :     if (!pgstat_read_chunk_s(fpin, &format_id))
    1847              :     {
    1848            0 :         elog(WARNING, "could not read format ID");
    1849            0 :         goto error;
    1850              :     }
    1851              : 
    1852          836 :     if (format_id != PGSTAT_FILE_FORMAT_ID)
    1853              :     {
    1854            1 :         elog(WARNING, "found incorrect format ID %d (expected %d)",
    1855              :              format_id, PGSTAT_FILE_FORMAT_ID);
    1856            1 :         goto error;
    1857              :     }
    1858              : 
    1859              :     /*
    1860              :      * We found an existing statistics file. Read it and put all the stats
    1861              :      * data into place.
    1862              :      */
    1863              :     for (;;)
    1864       254912 :     {
    1865       255747 :         int         t = fgetc(fpin);
    1866              : 
    1867       255747 :         switch (t)
    1868              :         {
    1869         5846 :             case PGSTAT_FILE_ENTRY_FIXED:
    1870              :                 {
    1871              :                     PgStat_Kind kind;
    1872              :                     const PgStat_KindInfo *info;
    1873              :                     char       *ptr;
    1874              : 
    1875              :                     /* entry for fixed-numbered stats */
    1876         5846 :                     if (!pgstat_read_chunk_s(fpin, &kind))
    1877              :                     {
    1878            0 :                         elog(WARNING, "could not read stats kind for entry of type %c", t);
    1879            0 :                         goto error;
    1880              :                     }
    1881              : 
    1882         5846 :                     if (!pgstat_is_kind_valid(kind))
    1883              :                     {
    1884            0 :                         elog(WARNING, "invalid stats kind %u for entry of type %c",
    1885              :                              kind, t);
    1886            0 :                         goto error;
    1887              :                     }
    1888              : 
    1889         5846 :                     info = pgstat_get_kind_info(kind);
    1890         5846 :                     if (!info)
    1891              :                     {
    1892            0 :                         elog(WARNING, "could not find information of kind %u for entry of type %c",
    1893              :                              kind, t);
    1894            0 :                         goto error;
    1895              :                     }
    1896              : 
    1897         5846 :                     if (!info->fixed_amount)
    1898              :                     {
    1899            0 :                         elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
    1900              :                              kind, t);
    1901            0 :                         goto error;
    1902              :                     }
    1903              : 
    1904              :                     /* Load back stats into shared memory */
    1905         5846 :                     if (pgstat_is_kind_builtin(kind))
    1906         5845 :                         ptr = ((char *) shmem) + info->shared_ctl_off +
    1907         5845 :                             info->shared_data_off;
    1908              :                     else
    1909              :                     {
    1910            1 :                         int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1911              : 
    1912            1 :                         ptr = ((char *) shmem->custom_data[idx]) +
    1913            1 :                             info->shared_data_off;
    1914              :                     }
    1915              : 
    1916         5846 :                     if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len))
    1917              :                     {
    1918            0 :                         elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
    1919              :                              kind, t, info->shared_data_len);
    1920            0 :                         goto error;
    1921              :                     }
    1922              : 
    1923         5846 :                     break;
    1924              :                 }
    1925       249066 :             case PGSTAT_FILE_ENTRY_HASH:
    1926              :             case PGSTAT_FILE_ENTRY_NAME:
    1927              :                 {
    1928              :                     PgStat_HashKey key;
    1929              :                     PgStatShared_HashEntry *p;
    1930              :                     PgStatShared_Common *header;
    1931       249066 :                     const PgStat_KindInfo *kind_info = NULL;
    1932              : 
    1933       249066 :                     CHECK_FOR_INTERRUPTS();
    1934              : 
    1935       249066 :                     if (t == PGSTAT_FILE_ENTRY_HASH)
    1936              :                     {
    1937              :                         /* normal stats entry, identified by PgStat_HashKey */
    1938       248988 :                         if (!pgstat_read_chunk_s(fpin, &key))
    1939              :                         {
    1940            0 :                             elog(WARNING, "could not read key for entry of type %c", t);
    1941            0 :                             goto error;
    1942              :                         }
    1943              : 
    1944       248988 :                         if (!pgstat_is_kind_valid(key.kind))
    1945              :                         {
    1946            0 :                             elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
    1947              :                                  key.kind, key.dboid,
    1948              :                                  key.objid, t);
    1949            0 :                             goto error;
    1950              :                         }
    1951              : 
    1952       248988 :                         kind_info = pgstat_get_kind_info(key.kind);
    1953       248988 :                         if (!kind_info)
    1954              :                         {
    1955            0 :                             elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
    1956              :                                  key.kind, key.dboid,
    1957              :                                  key.objid, t);
    1958            0 :                             goto error;
    1959              :                         }
    1960              :                     }
    1961              :                     else
    1962              :                     {
    1963              :                         /* stats entry identified by name on disk (e.g. slots) */
    1964              :                         PgStat_Kind kind;
    1965              :                         NameData    name;
    1966              : 
    1967           78 :                         if (!pgstat_read_chunk_s(fpin, &kind))
    1968              :                         {
    1969            0 :                             elog(WARNING, "could not read stats kind for entry of type %c", t);
    1970            0 :                             goto error;
    1971              :                         }
    1972           78 :                         if (!pgstat_read_chunk_s(fpin, &name))
    1973              :                         {
    1974            0 :                             elog(WARNING, "could not read name of stats kind %u for entry of type %c",
    1975              :                                  kind, t);
    1976            0 :                             goto error;
    1977              :                         }
    1978           78 :                         if (!pgstat_is_kind_valid(kind))
    1979              :                         {
    1980            0 :                             elog(WARNING, "invalid stats kind %u for entry of type %c",
    1981              :                                  kind, t);
    1982            0 :                             goto error;
    1983              :                         }
    1984              : 
    1985           78 :                         kind_info = pgstat_get_kind_info(kind);
    1986           78 :                         if (!kind_info)
    1987              :                         {
    1988            0 :                             elog(WARNING, "could not find information of kind %u for entry of type %c",
    1989              :                                  kind, t);
    1990            0 :                             goto error;
    1991              :                         }
    1992              : 
    1993           78 :                         if (!kind_info->from_serialized_name)
    1994              :                         {
    1995            0 :                             elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
    1996              :                                  kind, t);
    1997            0 :                             goto error;
    1998              :                         }
    1999              : 
    2000           78 :                         if (!kind_info->from_serialized_name(&name, &key))
    2001              :                         {
    2002              :                             /* skip over data for entry we don't care about */
    2003            1 :                             if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
    2004              :                             {
    2005            0 :                                 elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
    2006              :                                      NameStr(name), kind, t);
    2007            0 :                                 goto error;
    2008              :                             }
    2009              : 
    2010            1 :                             continue;
    2011              :                         }
    2012              : 
    2013              :                         Assert(key.kind == kind);
    2014              :                     }
    2015              : 
    2016              :                     /*
    2017              :                      * This intentionally doesn't use pgstat_get_entry_ref() -
    2018              :                      * putting all stats into checkpointer's
    2019              :                      * pgStatEntryRefHash would be wasted effort and memory.
    2020              :                      */
    2021       249065 :                     p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
    2022              : 
    2023              :                     /* don't allow duplicate entries */
    2024       249065 :                     if (found)
    2025              :                     {
    2026            0 :                         dshash_release_lock(pgStatLocal.shared_hash, p);
    2027            0 :                         elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
    2028              :                              key.kind, key.dboid,
    2029              :                              key.objid, t);
    2030            0 :                         goto error;
    2031              :                     }
    2032              : 
    2033       249065 :                     header = pgstat_init_entry(key.kind, p);
    2034       249065 :                     dshash_release_lock(pgStatLocal.shared_hash, p);
    2035       249065 :                     if (header == NULL)
    2036              :                     {
    2037              :                         /*
    2038              :                          * It would be tempting to switch this ERROR to a
    2039              :                          * WARNING, but it would mean that all the statistics
    2040              :                          * are discarded when the environment fails on OOM.
    2041              :                          */
    2042            0 :                         elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
    2043              :                              key.kind, key.dboid,
    2044              :                              key.objid, t);
    2045              :                     }
    2046              : 
    2047       249065 :                     if (!pgstat_read_chunk(fpin,
    2048              :                                            pgstat_get_entry_data(key.kind, header),
    2049              :                                            pgstat_get_entry_len(key.kind)))
    2050              :                     {
    2051            0 :                         elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
    2052              :                              key.kind, key.dboid,
    2053              :                              key.objid, t);
    2054            0 :                         goto error;
    2055              :                     }
    2056              : 
    2057              :                     /* read more data for the entry, if required */
    2058       249065 :                     if (kind_info->from_serialized_data)
    2059              :                     {
    2060            2 :                         if (!kind_info->from_serialized_data(&key, header, fpin))
    2061              :                         {
    2062            0 :                             elog(WARNING, "could not read auxiliary data for entry %u/%u/%" PRIu64 " of type %c",
    2063              :                                  key.kind, key.dboid,
    2064              :                                  key.objid, t);
    2065            0 :                             goto error;
    2066              :                         }
    2067              :                     }
    2068              : 
    2069       249065 :                     break;
    2070              :                 }
    2071          835 :             case PGSTAT_FILE_ENTRY_END:
    2072              : 
    2073              :                 /*
    2074              :                  * check that PGSTAT_FILE_ENTRY_END actually signals end of
    2075              :                  * file
    2076              :                  */
    2077          835 :                 if (fgetc(fpin) != EOF)
    2078              :                 {
    2079            1 :                     elog(WARNING, "could not read end-of-file");
    2080            1 :                     goto error;
    2081              :                 }
    2082              : 
    2083          834 :                 goto done;
    2084              : 
    2085            0 :             default:
    2086            0 :                 elog(WARNING, "could not read entry of type %c", t);
    2087            0 :                 goto error;
    2088              :         }
    2089              :     }
    2090              : 
    2091          836 : done:
    2092              :     /* First, cleanup the main stats file */
    2093          836 :     FreeFile(fpin);
    2094              : 
    2095          836 :     elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
    2096          836 :     unlink(statfile);
    2097              : 
    2098              :     /* Finish callbacks, if required */
    2099        27588 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2100              :     {
    2101        26752 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2102              : 
    2103        26752 :         if (kind_info && kind_info->finish)
    2104            2 :             kind_info->finish(STATS_READ);
    2105              :     }
    2106              : 
    2107          836 :     return;
    2108              : 
    2109            2 : error:
    2110            2 :     ereport(LOG,
    2111              :             (errmsg("corrupted statistics file \"%s\"", statfile)));
    2112              : 
    2113            2 :     pgstat_reset_after_failure();
    2114              : 
    2115            2 :     goto done;
    2116              : }
    2117              : 
    2118              : /*
    2119              :  * Helper to reset / drop stats after a crash or after restoring stats from
    2120              :  * disk failed, potentially after already loading parts.
    2121              :  */
    2122              : static void
    2123          250 : pgstat_reset_after_failure(void)
    2124              : {
    2125          250 :     TimestampTz ts = GetCurrentTimestamp();
    2126              : 
    2127              :     /* reset fixed-numbered stats */
    2128         8250 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2129              :     {
    2130         8000 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2131              : 
    2132         8000 :         if (!kind_info || !kind_info->fixed_amount)
    2133         6249 :             continue;
    2134              : 
    2135         1751 :         kind_info->reset_all_cb(ts);
    2136              :     }
    2137              : 
    2138              :     /* and drop variable-numbered ones */
    2139          250 :     pgstat_drop_all_entries();
    2140          250 : }
    2141              : 
    2142              : /*
    2143              :  * GUC assign_hook for stats_fetch_consistency.
    2144              :  */
    2145              : void
    2146         3453 : assign_stats_fetch_consistency(int newval, void *extra)
    2147              : {
    2148              :     /*
    2149              :      * Changing this value in a transaction may cause snapshot state
    2150              :      * inconsistencies, so force a clear of the current snapshot on the next
    2151              :      * snapshot build attempt.
    2152              :      */
    2153         3453 :     if (pgstat_fetch_consistency != newval)
    2154         2056 :         force_stats_snapshot_clear = true;
    2155         3453 : }
        

Generated by: LCOV version 2.0-1