LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.5 % 525 454
Test Date: 2026-04-07 14:16:30 Functions: 100.0 % 34 34
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* ----------
       2              :  * pgstat.c
       3              :  *    Infrastructure for the cumulative statistics system.
       4              :  *
       5              :  * The cumulative statistics system accumulates statistics for different kinds
       6              :  * of objects. Some kinds of statistics are collected for a fixed number of
       7              :  * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
       8              :  * statistics are collected for a varying number of objects
       9              :  * (e.g. relations). See PgStat_KindInfo for a list of currently handled
      10              :  * statistics.
      11              :  *
      12              :  * Statistics are loaded from the filesystem during startup (by the startup
      13              :  * process), unless preceded by a crash, in which case all stats are
      14              :  * discarded. They are written out by the checkpointer process just before
      15              :  * shutting down (if the stats kind allows it), except when shutting down in
      16              :  * immediate mode.
      17              :  *
      18              :  * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
      19              :  *
      20              :  * Statistics for variable-numbered objects are stored in dynamic shared
      21              :  * memory and can be found via a dshash hashtable. The statistics counters are
      22              :  * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
      23              :  * separately allocated (PgStatShared_HashEntry->body). The separate
      24              :  * allocation allows different kinds of statistics to be stored in the same
      25              :  * hashtable without wasting space in PgStatShared_HashEntry.
      26              :  *
      27              :  * Variable-numbered stats are addressed by PgStat_HashKey while running.  It
      28              :  * is not possible to have statistics for an object that cannot be addressed
      29              :  * that way at runtime. A wider identifier can be used when serializing to
      30              :  * disk (used for replication slot stats).
      31              :  *
      32              :  * To avoid contention on the shared hashtable, each backend has a
      33              :  * backend-local hashtable (pgStatEntryRefHash) in front of the shared
      34              :  * hashtable, containing references (PgStat_EntryRef) to shared hashtable
      35              :  * entries. The shared hashtable only needs to be accessed when no prior
      36              :  * reference is found in the local hashtable. Besides pointing to the
      37              :  * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
      38              :  * contains a pointer to the shared statistics data, as a process-local
      39              :  * address, to reduce access costs.
      40              :  *
      41              :  * The names for structs stored in shared memory are prefixed with
      42              :  * PgStatShared instead of PgStat. Each stats entry in shared memory is
      43              :  * protected by a dedicated lwlock.
      44              :  *
      45              :  * Most stats updates are first accumulated locally in each process as pending
      46              :  * entries, then later flushed to shared memory (just after commit, or by
      47              :  * idle-timeout). This practically eliminates contention on individual stats
      48              :  * entries. For most kinds of variable-numbered pending stats data is stored
      49              :  * in PgStat_EntryRef->pending. All entries with pending data are in the
      50              :  * pgStatPending list. Pending statistics updates are flushed out by
      51              :  * pgstat_report_stat().
      52              :  *
      53              :  * It is possible for external modules to define custom statistics kinds,
      54              :  * that can use the same properties as any built-in stats kinds.  Each custom
      55              :  * stats kind needs to assign a unique ID to ensure that it does not overlap
      56              :  * with other extensions.  In order to reserve a unique stats kind ID, refer
      57              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
      58              :  *
      59              :  * The behavior of different kinds of statistics is determined by the kind's
      60              :  * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
      61              :  * defined, and pgstat_kind_custom_infos for custom kinds registered at
      62              :  * startup by pgstat_register_kind().  See PgStat_KindInfo for details.
      63              :  *
      64              :  * The consistency of read accesses to statistics can be configured using the
      65              :  * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
      66              :  * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
      67              :  * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
      68              :  * pgStatLocal.snapshot.
      69              :  *
      70              :  * To keep things manageable, stats handling is split across several
      71              :  * files. Infrastructure pieces are in:
      72              :  * - pgstat.c - this file, to tie it all together
      73              :  * - pgstat_shmem.c - nearly everything dealing with shared memory, including
      74              :  *   the maintenance of hashtable entries
      75              :  * - pgstat_xact.c - transactional integration, including the transactional
      76              :  *   creation and dropping of stats entries
      77              :  *
      78              :  * Each statistics kind is handled in a dedicated file:
      79              :  * - pgstat_archiver.c
      80              :  * - pgstat_backend.c
      81              :  * - pgstat_bgwriter.c
      82              :  * - pgstat_checkpointer.c
      83              :  * - pgstat_database.c
      84              :  * - pgstat_function.c
      85              :  * - pgstat_io.c
      86              :  * - pgstat_lock.c
      87              :  * - pgstat_relation.c
      88              :  * - pgstat_replslot.c
      89              :  * - pgstat_slru.c
      90              :  * - pgstat_subscription.c
      91              :  * - pgstat_wal.c
      92              :  *
      93              :  * Whenever possible infrastructure files should not contain code related to
      94              :  * specific kinds of stats.
      95              :  *
      96              :  *
      97              :  * Copyright (c) 2001-2026, PostgreSQL Global Development Group
      98              :  *
      99              :  * IDENTIFICATION
     100              :  *    src/backend/utils/activity/pgstat.c
     101              :  * ----------
     102              :  */
     103              : #include "postgres.h"
     104              : 
     105              : #include <unistd.h>
     106              : 
     107              : #include "access/xact.h"
     108              : #include "lib/dshash.h"
     109              : #include "pgstat.h"
     110              : #include "storage/fd.h"
     111              : #include "storage/ipc.h"
     112              : #include "storage/lwlock.h"
     113              : #include "utils/guc_hooks.h"
     114              : #include "utils/memutils.h"
     115              : #include "utils/pgstat_internal.h"
     116              : #include "utils/timestamp.h"
     117              : 
     118              : 
     119              : /* ----------
     120              :  * Timer definitions.
     121              :  *
     122              :  * In milliseconds.
     123              :  * ----------
     124              :  */
     125              : 
     126              : /* minimum interval non-forced stats flushes.*/
     127              : #define PGSTAT_MIN_INTERVAL         1000
     128              : /* how long until to block flushing pending stats updates */
     129              : #define PGSTAT_MAX_INTERVAL         60000
     130              : /* when to call pgstat_report_stat() again, even when idle */
     131              : #define PGSTAT_IDLE_INTERVAL        10000
     132              : 
     133              : /* ----------
     134              :  * Initial size hints for the hash tables used in statistics.
     135              :  * ----------
     136              :  */
     137              : 
     138              : #define PGSTAT_SNAPSHOT_HASH_SIZE   512
     139              : 
     140              : /* ---------
     141              :  * Identifiers in stats file.
     142              :  * ---------
     143              :  */
     144              : #define PGSTAT_FILE_ENTRY_END   'E' /* end of file */
     145              : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
     146              : #define PGSTAT_FILE_ENTRY_NAME  'N' /* stats entry identified by name */
     147              : #define PGSTAT_FILE_ENTRY_HASH  'S' /* stats entry identified by
     148              :                                      * PgStat_HashKey */
     149              : 
     150              : /* hash table for statistics snapshots entry */
     151              : typedef struct PgStat_SnapshotEntry
     152              : {
     153              :     PgStat_HashKey key;
     154              :     char        status;         /* for simplehash use */
     155              :     void       *data;           /* the stats data itself */
     156              : } PgStat_SnapshotEntry;
     157              : 
     158              : 
     159              : /* ----------
     160              :  * Backend-local Hash Table Definitions
     161              :  * ----------
     162              :  */
     163              : 
     164              : /* for stats snapshot entries */
     165              : #define SH_PREFIX pgstat_snapshot
     166              : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
     167              : #define SH_KEY_TYPE PgStat_HashKey
     168              : #define SH_KEY key
     169              : #define SH_HASH_KEY(tb, key) \
     170              :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
     171              : #define SH_EQUAL(tb, a, b) \
     172              :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
     173              : #define SH_SCOPE static inline
     174              : #define SH_DEFINE
     175              : #define SH_DECLARE
     176              : #include "lib/simplehash.h"
     177              : 
     178              : 
     179              : /* ----------
     180              :  * Local function forward declarations
     181              :  * ----------
     182              :  */
     183              : 
     184              : static void pgstat_write_statsfile(void);
     185              : static void pgstat_read_statsfile(void);
     186              : 
     187              : static void pgstat_init_snapshot_fixed(void);
     188              : 
     189              : static void pgstat_reset_after_failure(void);
     190              : 
     191              : static bool pgstat_flush_pending_entries(bool nowait);
     192              : 
     193              : static void pgstat_prep_snapshot(void);
     194              : static void pgstat_build_snapshot(void);
     195              : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
     196              : 
     197              : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
     198              : 
     199              : 
     200              : /* ----------
     201              :  * GUC parameters
     202              :  * ----------
     203              :  */
     204              : 
     205              : bool        pgstat_track_counts = false;
     206              : int         pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
     207              : 
     208              : 
     209              : /* ----------
     210              :  * state shared with pgstat_*.c
     211              :  * ----------
     212              :  */
     213              : 
     214              : PgStat_LocalState pgStatLocal;
     215              : 
     216              : /*
     217              :  * Track pending reports for fixed-numbered stats, used by
     218              :  * pgstat_report_stat().
     219              :  */
     220              : bool        pgstat_report_fixed = false;
     221              : 
     222              : /* ----------
     223              :  * Local data
     224              :  *
     225              :  * NB: There should be only variables related to stats infrastructure here,
     226              :  * not for specific kinds of stats.
     227              :  * ----------
     228              :  */
     229              : 
     230              : /*
     231              :  * Memory contexts containing the pgStatEntryRefHash table, the
     232              :  * pgStatSharedRef entries, and pending data respectively. Mostly to make it
     233              :  * easier to track / attribute memory usage.
     234              :  */
     235              : 
     236              : static MemoryContext pgStatPendingContext = NULL;
     237              : 
     238              : /*
     239              :  * Backend local list of PgStat_EntryRef with unflushed pending stats.
     240              :  *
     241              :  * Newly pending entries should only ever be added to the end of the list,
     242              :  * otherwise pgstat_flush_pending_entries() might not see them immediately.
     243              :  */
     244              : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
     245              : 
     246              : 
     247              : /*
     248              :  * Force the next stats flush to happen regardless of
     249              :  * PGSTAT_MIN_INTERVAL. Useful in test scripts.
     250              :  */
     251              : static bool pgStatForceNextFlush = false;
     252              : 
     253              : /*
     254              :  * Force-clear existing snapshot before next use when stats_fetch_consistency
     255              :  * is changed.
     256              :  */
     257              : static bool force_stats_snapshot_clear = false;
     258              : 
     259              : 
     260              : /*
     261              :  * For assertions that check pgstat is not used before initialization / after
     262              :  * shutdown.
     263              :  */
     264              : #ifdef USE_ASSERT_CHECKING
     265              : static bool pgstat_is_initialized = false;
     266              : static bool pgstat_is_shutdown = false;
     267              : #endif
     268              : 
     269              : 
     270              : /*
     271              :  * The different kinds of built-in statistics.
     272              :  *
     273              :  * If reasonably possible, handling specific to one kind of stats should go
     274              :  * through this abstraction, rather than making more of pgstat.c aware.
     275              :  *
     276              :  * See comments for struct PgStat_KindInfo for details about the individual
     277              :  * fields.
     278              :  *
     279              :  * XXX: It'd be nicer to define this outside of this file. But there doesn't
     280              :  * seem to be a great way of doing that, given the split across multiple
     281              :  * files.
     282              :  */
     283              : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
     284              : 
     285              :     /* stats kinds for variable-numbered objects */
     286              : 
     287              :     [PGSTAT_KIND_DATABASE] = {
     288              :         .name = "database",
     289              : 
     290              :         .fixed_amount = false,
     291              :         .write_to_file = true,
     292              :         /* so pg_stat_database entries can be seen in all databases */
     293              :         .accessed_across_databases = true,
     294              : 
     295              :         .shared_size = sizeof(PgStatShared_Database),
     296              :         .shared_data_off = offsetof(PgStatShared_Database, stats),
     297              :         .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
     298              :         .pending_size = sizeof(PgStat_StatDBEntry),
     299              : 
     300              :         .flush_pending_cb = pgstat_database_flush_cb,
     301              :         .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
     302              :     },
     303              : 
     304              :     [PGSTAT_KIND_RELATION] = {
     305              :         .name = "relation",
     306              : 
     307              :         .fixed_amount = false,
     308              :         .write_to_file = true,
     309              : 
     310              :         .shared_size = sizeof(PgStatShared_Relation),
     311              :         .shared_data_off = offsetof(PgStatShared_Relation, stats),
     312              :         .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
     313              :         .pending_size = sizeof(PgStat_TableStatus),
     314              : 
     315              :         .flush_pending_cb = pgstat_relation_flush_cb,
     316              :         .delete_pending_cb = pgstat_relation_delete_pending_cb,
     317              :         .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
     318              :     },
     319              : 
     320              :     [PGSTAT_KIND_FUNCTION] = {
     321              :         .name = "function",
     322              : 
     323              :         .fixed_amount = false,
     324              :         .write_to_file = true,
     325              : 
     326              :         .shared_size = sizeof(PgStatShared_Function),
     327              :         .shared_data_off = offsetof(PgStatShared_Function, stats),
     328              :         .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
     329              :         .pending_size = sizeof(PgStat_FunctionCounts),
     330              : 
     331              :         .flush_pending_cb = pgstat_function_flush_cb,
     332              :         .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
     333              :     },
     334              : 
     335              :     [PGSTAT_KIND_REPLSLOT] = {
     336              :         .name = "replslot",
     337              : 
     338              :         .fixed_amount = false,
     339              :         .write_to_file = true,
     340              : 
     341              :         .accessed_across_databases = true,
     342              : 
     343              :         .shared_size = sizeof(PgStatShared_ReplSlot),
     344              :         .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
     345              :         .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
     346              : 
     347              :         .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
     348              :         .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
     349              :         .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
     350              :     },
     351              : 
     352              :     [PGSTAT_KIND_SUBSCRIPTION] = {
     353              :         .name = "subscription",
     354              : 
     355              :         .fixed_amount = false,
     356              :         .write_to_file = true,
     357              :         /* so pg_stat_subscription_stats entries can be seen in all databases */
     358              :         .accessed_across_databases = true,
     359              : 
     360              :         .shared_size = sizeof(PgStatShared_Subscription),
     361              :         .shared_data_off = offsetof(PgStatShared_Subscription, stats),
     362              :         .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
     363              :         .pending_size = sizeof(PgStat_BackendSubEntry),
     364              : 
     365              :         .flush_pending_cb = pgstat_subscription_flush_cb,
     366              :         .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
     367              :     },
     368              : 
     369              :     [PGSTAT_KIND_BACKEND] = {
     370              :         .name = "backend",
     371              : 
     372              :         .fixed_amount = false,
     373              :         .write_to_file = false,
     374              : 
     375              :         .accessed_across_databases = true,
     376              : 
     377              :         .shared_size = sizeof(PgStatShared_Backend),
     378              :         .shared_data_off = offsetof(PgStatShared_Backend, stats),
     379              :         .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
     380              : 
     381              :         .flush_static_cb = pgstat_backend_flush_cb,
     382              :         .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
     383              :     },
     384              : 
     385              :     /* stats for fixed-numbered (mostly 1) objects */
     386              : 
     387              :     [PGSTAT_KIND_ARCHIVER] = {
     388              :         .name = "archiver",
     389              : 
     390              :         .fixed_amount = true,
     391              :         .write_to_file = true,
     392              : 
     393              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
     394              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
     395              :         .shared_data_off = offsetof(PgStatShared_Archiver, stats),
     396              :         .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
     397              : 
     398              :         .init_shmem_cb = pgstat_archiver_init_shmem_cb,
     399              :         .reset_all_cb = pgstat_archiver_reset_all_cb,
     400              :         .snapshot_cb = pgstat_archiver_snapshot_cb,
     401              :     },
     402              : 
     403              :     [PGSTAT_KIND_BGWRITER] = {
     404              :         .name = "bgwriter",
     405              : 
     406              :         .fixed_amount = true,
     407              :         .write_to_file = true,
     408              : 
     409              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
     410              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
     411              :         .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
     412              :         .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
     413              : 
     414              :         .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
     415              :         .reset_all_cb = pgstat_bgwriter_reset_all_cb,
     416              :         .snapshot_cb = pgstat_bgwriter_snapshot_cb,
     417              :     },
     418              : 
     419              :     [PGSTAT_KIND_CHECKPOINTER] = {
     420              :         .name = "checkpointer",
     421              : 
     422              :         .fixed_amount = true,
     423              :         .write_to_file = true,
     424              : 
     425              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
     426              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
     427              :         .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
     428              :         .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
     429              : 
     430              :         .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
     431              :         .reset_all_cb = pgstat_checkpointer_reset_all_cb,
     432              :         .snapshot_cb = pgstat_checkpointer_snapshot_cb,
     433              :     },
     434              : 
     435              :     [PGSTAT_KIND_IO] = {
     436              :         .name = "io",
     437              : 
     438              :         .fixed_amount = true,
     439              :         .write_to_file = true,
     440              : 
     441              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
     442              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
     443              :         .shared_data_off = offsetof(PgStatShared_IO, stats),
     444              :         .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
     445              : 
     446              :         .flush_static_cb = pgstat_io_flush_cb,
     447              :         .init_shmem_cb = pgstat_io_init_shmem_cb,
     448              :         .reset_all_cb = pgstat_io_reset_all_cb,
     449              :         .snapshot_cb = pgstat_io_snapshot_cb,
     450              :     },
     451              : 
     452              :     [PGSTAT_KIND_LOCK] = {
     453              :         .name = "lock",
     454              : 
     455              :         .fixed_amount = true,
     456              :         .write_to_file = true,
     457              : 
     458              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, lock),
     459              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, lock),
     460              :         .shared_data_off = offsetof(PgStatShared_Lock, stats),
     461              :         .shared_data_len = sizeof(((PgStatShared_Lock *) 0)->stats),
     462              : 
     463              :         .flush_static_cb = pgstat_lock_flush_cb,
     464              :         .init_shmem_cb = pgstat_lock_init_shmem_cb,
     465              :         .reset_all_cb = pgstat_lock_reset_all_cb,
     466              :         .snapshot_cb = pgstat_lock_snapshot_cb,
     467              :     },
     468              : 
     469              :     [PGSTAT_KIND_SLRU] = {
     470              :         .name = "slru",
     471              : 
     472              :         .fixed_amount = true,
     473              :         .write_to_file = true,
     474              : 
     475              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
     476              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
     477              :         .shared_data_off = offsetof(PgStatShared_SLRU, stats),
     478              :         .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
     479              : 
     480              :         .flush_static_cb = pgstat_slru_flush_cb,
     481              :         .init_shmem_cb = pgstat_slru_init_shmem_cb,
     482              :         .reset_all_cb = pgstat_slru_reset_all_cb,
     483              :         .snapshot_cb = pgstat_slru_snapshot_cb,
     484              :     },
     485              : 
     486              :     [PGSTAT_KIND_WAL] = {
     487              :         .name = "wal",
     488              : 
     489              :         .fixed_amount = true,
     490              :         .write_to_file = true,
     491              : 
     492              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
     493              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
     494              :         .shared_data_off = offsetof(PgStatShared_Wal, stats),
     495              :         .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
     496              : 
     497              :         .init_backend_cb = pgstat_wal_init_backend_cb,
     498              :         .flush_static_cb = pgstat_wal_flush_cb,
     499              :         .init_shmem_cb = pgstat_wal_init_shmem_cb,
     500              :         .reset_all_cb = pgstat_wal_reset_all_cb,
     501              :         .snapshot_cb = pgstat_wal_snapshot_cb,
     502              :     },
     503              : };
     504              : 
     505              : /*
     506              :  * Information about custom statistics kinds.
     507              :  *
     508              :  * These are saved in a different array than the built-in kinds to save
     509              :  * in clarity with the initializations.
     510              :  *
     511              :  * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
     512              :  */
     513              : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
     514              : 
     515              : /* ------------------------------------------------------------
     516              :  * Functions managing the state of the stats system for all backends.
     517              :  * ------------------------------------------------------------
     518              :  */
     519              : 
     520              : /*
     521              :  * Read on-disk stats into memory at server start.
     522              :  *
     523              :  * Should only be called by the startup process or in single user mode.
     524              :  */
     525              : void
     526          884 : pgstat_restore_stats(void)
     527              : {
     528          884 :     pgstat_read_statsfile();
     529          884 : }
     530              : 
     531              : /*
     532              :  * Remove the stats file.  This is currently used only if WAL recovery is
     533              :  * needed after a crash.
     534              :  *
     535              :  * Should only be called by the startup process or in single user mode.
     536              :  */
     537              : void
     538          186 : pgstat_discard_stats(void)
     539              : {
     540              :     int         ret;
     541              : 
     542              :     /* NB: this needs to be done even in single user mode */
     543              : 
     544              :     /* First, cleanup the main pgstats file */
     545          186 :     ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
     546          186 :     if (ret != 0)
     547              :     {
     548          185 :         if (errno == ENOENT)
     549          185 :             elog(DEBUG2,
     550              :                  "didn't need to unlink permanent stats file \"%s\" - didn't exist",
     551              :                  PGSTAT_STAT_PERMANENT_FILENAME);
     552              :         else
     553            0 :             ereport(LOG,
     554              :                     (errcode_for_file_access(),
     555              :                      errmsg("could not unlink permanent statistics file \"%s\": %m",
     556              :                             PGSTAT_STAT_PERMANENT_FILENAME)));
     557              :     }
     558              :     else
     559              :     {
     560            1 :         ereport(DEBUG2,
     561              :                 (errcode_for_file_access(),
     562              :                  errmsg_internal("unlinked permanent statistics file \"%s\"",
     563              :                                  PGSTAT_STAT_PERMANENT_FILENAME)));
     564              :     }
     565              : 
     566              :     /* Finish callbacks, if required */
     567         6138 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     568              :     {
     569         5952 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     570              : 
     571         5952 :         if (kind_info && kind_info->finish)
     572            1 :             kind_info->finish(STATS_DISCARD);
     573              :     }
     574              : 
     575              :     /*
     576              :      * Reset stats contents. This will set reset timestamps of fixed-numbered
     577              :      * stats to the current time (no variable stats exist).
     578              :      */
     579          186 :     pgstat_reset_after_failure();
     580          186 : }
     581              : 
     582              : /*
     583              :  * pgstat_before_server_shutdown() needs to be called by exactly one process
     584              :  * during regular server shutdowns. Otherwise all stats will be lost.
     585              :  *
     586              :  * We currently only write out stats for proc_exit(0). We might want to change
     587              :  * that at some point... But right now pgstat_discard_stats() would be called
     588              :  * during the start after a disorderly shutdown, anyway.
     589              :  */
     590              : void
     591          766 : pgstat_before_server_shutdown(int code, Datum arg)
     592              : {
     593              :     Assert(pgStatLocal.shmem != NULL);
     594              :     Assert(!pgStatLocal.shmem->is_shutdown);
     595              : 
     596              :     /*
     597              :      * Stats should only be reported after pgstat_initialize() and before
     598              :      * pgstat_shutdown(). This is a convenient point to catch most violations
     599              :      * of this rule.
     600              :      */
     601              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
     602              : 
     603              :     /* flush out our own pending changes before writing out */
     604          766 :     pgstat_report_stat(true);
     605              : 
     606              :     /*
     607              :      * Only write out file during normal shutdown. Don't even signal that
     608              :      * we've shutdown during irregular shutdowns, because the shutdown
     609              :      * sequence isn't coordinated to ensure this backend shuts down last.
     610              :      */
     611          766 :     if (code == 0)
     612              :     {
     613          757 :         pgStatLocal.shmem->is_shutdown = true;
     614          757 :         pgstat_write_statsfile();
     615              :     }
     616          766 : }
     617              : 
     618              : 
     619              : /* ------------------------------------------------------------
     620              :  * Backend initialization / shutdown functions
     621              :  * ------------------------------------------------------------
     622              :  */
     623              : 
     624              : /*
     625              :  * Shut down a single backend's statistics reporting at process exit.
     626              :  *
     627              :  * Flush out any remaining statistics counts.  Without this, operations
     628              :  * triggered during backend exit (such as temp table deletions) won't be
     629              :  * counted.
     630              :  */
     631              : static void
     632        24783 : pgstat_shutdown_hook(int code, Datum arg)
     633              : {
     634              :     Assert(!pgstat_is_shutdown);
     635              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     636              : 
     637              :     /*
     638              :      * If we got as far as discovering our own database ID, we can flush out
     639              :      * what we did so far.  Otherwise, we'd be reporting an invalid database
     640              :      * ID, so forget it.  (This means that accesses to pg_database during
     641              :      * failed backend starts might never get counted.)
     642              :      */
     643        24783 :     if (OidIsValid(MyDatabaseId))
     644        18200 :         pgstat_report_disconnect(MyDatabaseId);
     645              : 
     646        24783 :     pgstat_report_stat(true);
     647              : 
     648              :     /* there shouldn't be any pending changes left */
     649              :     Assert(dlist_is_empty(&pgStatPending));
     650        24783 :     dlist_init(&pgStatPending);
     651              : 
     652              :     /* drop the backend stats entry */
     653        24783 :     if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
     654            0 :         pgstat_request_entry_refs_gc();
     655              : 
     656        24783 :     pgstat_detach_shmem();
     657              : 
     658              : #ifdef USE_ASSERT_CHECKING
     659              :     pgstat_is_shutdown = true;
     660              : #endif
     661        24783 : }
     662              : 
     663              : /*
     664              :  * Initialize pgstats state, and set up our on-proc-exit hook. Called from
     665              :  * BaseInit().
     666              :  *
     667              :  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
     668              :  */
     669              : void
     670        24783 : pgstat_initialize(void)
     671              : {
     672              :     Assert(!pgstat_is_initialized);
     673              : 
     674        24783 :     pgstat_attach_shmem();
     675              : 
     676        24783 :     pgstat_init_snapshot_fixed();
     677              : 
     678              :     /* Backend initialization callbacks */
     679       817839 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     680              :     {
     681       793056 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     682              : 
     683       793056 :         if (kind_info == NULL || kind_info->init_backend_cb == NULL)
     684       768273 :             continue;
     685              : 
     686        24783 :         kind_info->init_backend_cb();
     687              :     }
     688              : 
     689              :     /* Set up a process-exit hook to clean up */
     690        24783 :     before_shmem_exit(pgstat_shutdown_hook, 0);
     691              : 
     692              : #ifdef USE_ASSERT_CHECKING
     693              :     pgstat_is_initialized = true;
     694              : #endif
     695        24783 : }
     696              : 
     697              : 
     698              : /* ------------------------------------------------------------
     699              :  * Public functions used by backends follow
     700              :  * ------------------------------------------------------------
     701              :  */
     702              : 
     703              : /*
     704              :  * Must be called by processes that performs DML: tcop/postgres.c, logical
     705              :  * receiver processes, SPI worker, etc. to flush pending statistics updates to
     706              :  * shared memory.
     707              :  *
     708              :  * Unless called with 'force', pending stats updates are flushed happen once
     709              :  * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
     710              :  * block on lock acquisition, except if stats updates have been pending for
     711              :  * longer than PGSTAT_MAX_INTERVAL (60000ms).
     712              :  *
     713              :  * Whenever pending stats updates remain at the end of pgstat_report_stat() a
     714              :  * suggested idle timeout is returned. Currently this is always
     715              :  * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
     716              :  * a timeout after which to call pgstat_report_stat(true), but are not
     717              :  * required to do so.
     718              :  *
     719              :  * Note that this is called only when not within a transaction, so it is fair
     720              :  * to use transaction stop time as an approximation of current time.
     721              :  */
     722              : long
     723       393336 : pgstat_report_stat(bool force)
     724              : {
     725              :     static TimestampTz pending_since = 0;
     726              :     static TimestampTz last_flush = 0;
     727              :     bool        partial_flush;
     728              :     TimestampTz now;
     729              :     bool        nowait;
     730              : 
     731              :     pgstat_assert_is_up();
     732              :     Assert(!IsTransactionOrTransactionBlock());
     733              : 
     734              :     /* "absorb" the forced flush even if there's nothing to flush */
     735       393336 :     if (pgStatForceNextFlush)
     736              :     {
     737          339 :         force = true;
     738          339 :         pgStatForceNextFlush = false;
     739              :     }
     740              : 
     741              :     /* Don't expend a clock check if nothing to do */
     742       393336 :     if (dlist_is_empty(&pgStatPending) &&
     743        11377 :         !pgstat_report_fixed)
     744              :     {
     745         8427 :         return 0;
     746              :     }
     747              : 
     748              :     /*
     749              :      * There should never be stats to report once stats are shut down. Can't
     750              :      * assert that before the checks above, as there is an unconditional
     751              :      * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
     752              :      * the process that ran pgstat_before_server_shutdown() will still call.
     753              :      */
     754              :     Assert(!pgStatLocal.shmem->is_shutdown);
     755              : 
     756       384909 :     if (force)
     757              :     {
     758              :         /*
     759              :          * Stats reports are forced either when it's been too long since stats
     760              :          * have been reported or in processes that force stats reporting to
     761              :          * happen at specific points (including shutdown). In the former case
     762              :          * the transaction stop time might be quite old, in the latter it
     763              :          * would never get cleared.
     764              :          */
     765        23970 :         now = GetCurrentTimestamp();
     766              :     }
     767              :     else
     768              :     {
     769       360939 :         now = GetCurrentTransactionStopTimestamp();
     770              : 
     771       690775 :         if (pending_since > 0 &&
     772       329836 :             TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
     773              :         {
     774              :             /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
     775            0 :             force = true;
     776              :         }
     777       360939 :         else if (last_flush > 0 &&
     778       346372 :                  !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
     779              :         {
     780              :             /* don't flush too frequently */
     781       344375 :             if (pending_since == 0)
     782        16308 :                 pending_since = now;
     783              : 
     784       344375 :             return PGSTAT_IDLE_INTERVAL;
     785              :         }
     786              :     }
     787              : 
     788        40534 :     pgstat_update_dbstats(now);
     789              : 
     790              :     /* don't wait for lock acquisition when !force */
     791        40534 :     nowait = !force;
     792              : 
     793        40534 :     partial_flush = false;
     794              : 
     795              :     /* flush of variable-numbered stats tracked in pending entries list */
     796        40534 :     partial_flush |= pgstat_flush_pending_entries(nowait);
     797              : 
     798              :     /* flush of other stats kinds */
     799        40534 :     if (pgstat_report_fixed)
     800              :     {
     801      1297758 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     802              :         {
     803      1258432 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     804              : 
     805      1258432 :             if (!kind_info)
     806       747024 :                 continue;
     807       511408 :             if (!kind_info->flush_static_cb)
     808       314778 :                 continue;
     809              : 
     810       196630 :             partial_flush |= kind_info->flush_static_cb(nowait);
     811              :         }
     812              :     }
     813              : 
     814        40534 :     last_flush = now;
     815              : 
     816              :     /*
     817              :      * If some of the pending stats could not be flushed due to lock
     818              :      * contention, let the caller know when to retry.
     819              :      */
     820        40534 :     if (partial_flush)
     821              :     {
     822              :         /* force should have prevented us from getting here */
     823              :         Assert(!force);
     824              : 
     825              :         /* remember since when stats have been pending */
     826            2 :         if (pending_since == 0)
     827            2 :             pending_since = now;
     828              : 
     829            2 :         return PGSTAT_IDLE_INTERVAL;
     830              :     }
     831              : 
     832        40532 :     pending_since = 0;
     833        40532 :     pgstat_report_fixed = false;
     834              : 
     835        40532 :     return 0;
     836              : }
     837              : 
     838              : /*
     839              :  * Force locally pending stats to be flushed during the next
     840              :  * pgstat_report_stat() call. This is useful for writing tests.
     841              :  */
     842              : void
     843          339 : pgstat_force_next_flush(void)
     844              : {
     845          339 :     pgStatForceNextFlush = true;
     846          339 : }
     847              : 
     848              : /*
     849              :  * Only for use by pgstat_reset_counters()
     850              :  */
     851              : static bool
     852        16453 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
     853              : {
     854        16453 :     return entry->key.dboid == MyDatabaseId;
     855              : }
     856              : 
     857              : /*
     858              :  * Reset counters for our database.
     859              :  *
     860              :  * Permission checking for this function is managed through the normal
     861              :  * GRANT system.
     862              :  */
     863              : void
     864           15 : pgstat_reset_counters(void)
     865              : {
     866           15 :     TimestampTz ts = GetCurrentTimestamp();
     867              : 
     868           15 :     pgstat_reset_matching_entries(match_db_entries,
     869              :                                   ObjectIdGetDatum(MyDatabaseId),
     870              :                                   ts);
     871           15 : }
     872              : 
     873              : /*
     874              :  * Reset a single variable-numbered entry.
     875              :  *
     876              :  * If the stats kind is within a database, also reset the database's
     877              :  * stat_reset_timestamp.
     878              :  *
     879              :  * Permission checking for this function is managed through the normal
     880              :  * GRANT system.
     881              :  */
     882              : void
     883           39 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
     884              : {
     885           39 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     886           39 :     TimestampTz ts = GetCurrentTimestamp();
     887              : 
     888              :     /* not needed atm, and doesn't make sense with the current signature */
     889              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     890              : 
     891              :     /* reset the "single counter" */
     892           39 :     pgstat_reset_entry(kind, dboid, objid, ts);
     893              : 
     894           39 :     if (!kind_info->accessed_across_databases)
     895           22 :         pgstat_reset_database_timestamp(dboid, ts);
     896           39 : }
     897              : 
     898              : /*
     899              :  * Reset stats for all entries of a kind.
     900              :  *
     901              :  * Permission checking for this function is managed through the normal
     902              :  * GRANT system.
     903              :  */
     904              : void
     905           45 : pgstat_reset_of_kind(PgStat_Kind kind)
     906              : {
     907           45 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     908           45 :     TimestampTz ts = GetCurrentTimestamp();
     909              : 
     910           45 :     if (kind_info->fixed_amount)
     911           41 :         kind_info->reset_all_cb(ts);
     912              :     else
     913            4 :         pgstat_reset_entries_of_kind(kind, ts);
     914           45 : }
     915              : 
     916              : 
     917              : /* ------------------------------------------------------------
     918              :  * Fetching of stats
     919              :  * ------------------------------------------------------------
     920              :  */
     921              : 
     922              : /*
     923              :  * Discard any data collected in the current transaction.  Any subsequent
     924              :  * request will cause new snapshots to be read.
     925              :  *
     926              :  * This is also invoked during transaction commit or abort to discard
     927              :  * the no-longer-wanted snapshot.  Updates of stats_fetch_consistency can
     928              :  * cause this routine to be called.
     929              :  */
     930              : void
     931       630542 : pgstat_clear_snapshot(void)
     932              : {
     933              :     pgstat_assert_is_up();
     934              : 
     935       630542 :     memset(&pgStatLocal.snapshot.fixed_valid, 0,
     936              :            sizeof(pgStatLocal.snapshot.fixed_valid));
     937       630542 :     memset(&pgStatLocal.snapshot.custom_valid, 0,
     938              :            sizeof(pgStatLocal.snapshot.custom_valid));
     939       630542 :     pgStatLocal.snapshot.stats = NULL;
     940       630542 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
     941              : 
     942              :     /* Release memory, if any was allocated */
     943       630542 :     if (pgStatLocal.snapshot.context)
     944              :     {
     945          800 :         MemoryContextDelete(pgStatLocal.snapshot.context);
     946              : 
     947              :         /* Reset variables */
     948          800 :         pgStatLocal.snapshot.context = NULL;
     949              :     }
     950              : 
     951              :     /*
     952              :      * Historically the backend_status.c facilities lived in this file, and
     953              :      * were reset with the same function. For now keep it that way, and
     954              :      * forward the reset request.
     955              :      */
     956       630542 :     pgstat_clear_backend_activity_snapshot();
     957              : 
     958              :     /* Reset this flag, as it may be possible that a cleanup was forced. */
     959       630542 :     force_stats_snapshot_clear = false;
     960       630542 : }
     961              : 
     962              : void *
     963       277327 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
     964              : {
     965       277327 :     PgStat_HashKey key = {0};
     966              :     PgStat_EntryRef *entry_ref;
     967              :     void       *stats_data;
     968       277327 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     969              : 
     970              :     /* should be called from backends */
     971              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     972              :     Assert(!kind_info->fixed_amount);
     973              : 
     974       277327 :     pgstat_prep_snapshot();
     975              : 
     976       277327 :     key.kind = kind;
     977       277327 :     key.dboid = dboid;
     978       277327 :     key.objid = objid;
     979              : 
     980              :     /* if we need to build a full snapshot, do so */
     981       277327 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
     982          299 :         pgstat_build_snapshot();
     983              : 
     984              :     /* if caching is desired, look up in cache */
     985       277327 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
     986              :     {
     987         7245 :         PgStat_SnapshotEntry *entry = NULL;
     988              : 
     989         7245 :         entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
     990              : 
     991         7245 :         if (entry)
     992          612 :             return entry->data;
     993              : 
     994              :         /*
     995              :          * If we built a full snapshot and the key is not in
     996              :          * pgStatLocal.snapshot.stats, there are no matching stats.
     997              :          */
     998         6633 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
     999           16 :             return NULL;
    1000              :     }
    1001              : 
    1002       276699 :     pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
    1003              : 
    1004       276699 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1005              : 
    1006       276699 :     if (entry_ref == NULL || entry_ref->shared_entry->dropped)
    1007              :     {
    1008              :         /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
    1009         7503 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
    1010              :         {
    1011         1363 :             PgStat_SnapshotEntry *entry = NULL;
    1012              :             bool        found;
    1013              : 
    1014         1363 :             entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
    1015              :             Assert(!found);
    1016         1363 :             entry->data = NULL;
    1017              :         }
    1018         7503 :         return NULL;
    1019              :     }
    1020              : 
    1021              :     /*
    1022              :      * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
    1023              :      * otherwise we could quickly end up with a fair bit of memory used due to
    1024              :      * repeated accesses.
    1025              :      */
    1026       269196 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1027       263942 :         stats_data = palloc(kind_info->shared_data_len);
    1028              :     else
    1029         5254 :         stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1030         5254 :                                         kind_info->shared_data_len);
    1031              : 
    1032       269196 :     (void) pgstat_lock_entry_shared(entry_ref, false);
    1033       538392 :     memcpy(stats_data,
    1034       269196 :            pgstat_get_entry_data(kind, entry_ref->shared_stats),
    1035       269196 :            kind_info->shared_data_len);
    1036       269196 :     pgstat_unlock_entry(entry_ref);
    1037              : 
    1038       269196 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
    1039              :     {
    1040         5254 :         PgStat_SnapshotEntry *entry = NULL;
    1041              :         bool        found;
    1042              : 
    1043         5254 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
    1044         5254 :         entry->data = stats_data;
    1045              :     }
    1046              : 
    1047       269196 :     return stats_data;
    1048              : }
    1049              : 
    1050              : /*
    1051              :  * If a stats snapshot has been taken, return the timestamp at which that was
    1052              :  * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
    1053              :  * false.
    1054              :  */
    1055              : TimestampTz
    1056           40 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
    1057              : {
    1058           40 :     if (force_stats_snapshot_clear)
    1059           12 :         pgstat_clear_snapshot();
    1060              : 
    1061           40 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1062              :     {
    1063           16 :         *have_snapshot = true;
    1064           16 :         return pgStatLocal.snapshot.snapshot_timestamp;
    1065              :     }
    1066              : 
    1067           24 :     *have_snapshot = false;
    1068              : 
    1069           24 :     return 0;
    1070              : }
    1071              : 
    1072              : bool
    1073           95 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1074              : {
    1075              :     /* fixed-numbered stats always exist */
    1076           95 :     if (pgstat_get_kind_info(kind)->fixed_amount)
    1077            8 :         return true;
    1078              : 
    1079           87 :     return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
    1080              : }
    1081              : 
    1082              : /*
    1083              :  * Ensure snapshot for fixed-numbered 'kind' exists.
    1084              :  *
    1085              :  * Typically used by the pgstat_fetch_* functions for a kind of stats, before
    1086              :  * massaging the data into the desired format.
    1087              :  */
    1088              : void
    1089          280 : pgstat_snapshot_fixed(PgStat_Kind kind)
    1090              : {
    1091              :     Assert(pgstat_is_kind_valid(kind));
    1092              :     Assert(pgstat_get_kind_info(kind)->fixed_amount);
    1093              : 
    1094          280 :     if (force_stats_snapshot_clear)
    1095            0 :         pgstat_clear_snapshot();
    1096              : 
    1097          280 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1098           12 :         pgstat_build_snapshot();
    1099              :     else
    1100          268 :         pgstat_build_snapshot_fixed(kind);
    1101              : 
    1102          280 :     if (pgstat_is_kind_builtin(kind))
    1103              :         Assert(pgStatLocal.snapshot.fixed_valid[kind]);
    1104            5 :     else if (pgstat_is_kind_custom(kind))
    1105              :         Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
    1106          280 : }
    1107              : 
    1108              : static void
    1109        24783 : pgstat_init_snapshot_fixed(void)
    1110              : {
    1111              :     /*
    1112              :      * Initialize fixed-numbered statistics data in snapshots, only for custom
    1113              :      * stats kinds.
    1114              :      */
    1115       247830 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1116              :     {
    1117       223047 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1118              : 
    1119       223047 :         if (!kind_info || !kind_info->fixed_amount)
    1120       222996 :             continue;
    1121              : 
    1122           51 :         pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
    1123           51 :             MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
    1124              :     }
    1125        24783 : }
    1126              : 
    1127              : static void
    1128       277357 : pgstat_prep_snapshot(void)
    1129              : {
    1130       277357 :     if (force_stats_snapshot_clear)
    1131           12 :         pgstat_clear_snapshot();
    1132              : 
    1133       277357 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
    1134         7275 :         pgStatLocal.snapshot.stats != NULL)
    1135       276557 :         return;
    1136              : 
    1137          800 :     if (!pgStatLocal.snapshot.context)
    1138          800 :         pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
    1139              :                                                              "PgStat Snapshot",
    1140              :                                                              ALLOCSET_SMALL_SIZES);
    1141              : 
    1142          800 :     pgStatLocal.snapshot.stats =
    1143          800 :         pgstat_snapshot_create(pgStatLocal.snapshot.context,
    1144              :                                PGSTAT_SNAPSHOT_HASH_SIZE,
    1145              :                                NULL);
    1146              : }
    1147              : 
    1148              : static void
    1149          311 : pgstat_build_snapshot(void)
    1150              : {
    1151              :     dshash_seq_status hstat;
    1152              :     PgStatShared_HashEntry *p;
    1153              : 
    1154              :     /* should only be called when we need a snapshot */
    1155              :     Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
    1156              : 
    1157              :     /* snapshot already built */
    1158          311 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1159          281 :         return;
    1160              : 
    1161           30 :     pgstat_prep_snapshot();
    1162              : 
    1163              :     Assert(pgStatLocal.snapshot.stats->members == 0);
    1164              : 
    1165           30 :     pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
    1166              : 
    1167              :     /*
    1168              :      * Snapshot all variable stats.
    1169              :      */
    1170           30 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1171        37260 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1172              :     {
    1173        37230 :         PgStat_Kind kind = p->key.kind;
    1174        37230 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1175              :         bool        found;
    1176              :         PgStat_SnapshotEntry *entry;
    1177              :         PgStatShared_Common *stats_data;
    1178              : 
    1179              :         /*
    1180              :          * Check if the stats object should be included in the snapshot.
    1181              :          * Unless the stats kind can be accessed from all databases (e.g.,
    1182              :          * database stats themselves), we only include stats for the current
    1183              :          * database or objects not associated with a database (e.g. shared
    1184              :          * relations).
    1185              :          */
    1186        37230 :         if (p->key.dboid != MyDatabaseId &&
    1187         9871 :             p->key.dboid != InvalidOid &&
    1188         8162 :             !kind_info->accessed_across_databases)
    1189         8174 :             continue;
    1190              : 
    1191        29158 :         if (p->dropped)
    1192          102 :             continue;
    1193              : 
    1194              :         Assert(pg_atomic_read_u32(&p->refcount) > 0);
    1195              : 
    1196        29056 :         stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
    1197              :         Assert(stats_data);
    1198              : 
    1199        29056 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
    1200              :         Assert(!found);
    1201              : 
    1202        29056 :         entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1203              :                                          pgstat_get_entry_len(kind));
    1204              : 
    1205              :         /*
    1206              :          * Acquire the LWLock directly instead of using
    1207              :          * pg_stat_lock_entry_shared() which requires a reference.
    1208              :          */
    1209        29056 :         LWLockAcquire(&stats_data->lock, LW_SHARED);
    1210        29056 :         memcpy(entry->data,
    1211        29056 :                pgstat_get_entry_data(kind, stats_data),
    1212              :                pgstat_get_entry_len(kind));
    1213        29056 :         LWLockRelease(&stats_data->lock);
    1214              :     }
    1215           30 :     dshash_seq_term(&hstat);
    1216              : 
    1217              :     /*
    1218              :      * Build snapshot of all fixed-numbered stats.
    1219              :      */
    1220          990 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1221              :     {
    1222          960 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1223              : 
    1224          960 :         if (!kind_info)
    1225          570 :             continue;
    1226          390 :         if (!kind_info->fixed_amount)
    1227              :         {
    1228              :             Assert(kind_info->snapshot_cb == NULL);
    1229          180 :             continue;
    1230              :         }
    1231              : 
    1232          210 :         pgstat_build_snapshot_fixed(kind);
    1233              :     }
    1234              : 
    1235           30 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
    1236              : }
    1237              : 
    1238              : static void
    1239         5778 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
    1240              : {
    1241         5778 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1242              :     int         idx;
    1243              :     bool       *valid;
    1244              : 
    1245              :     /* Position in fixed_valid or custom_valid */
    1246         5778 :     if (pgstat_is_kind_builtin(kind))
    1247              :     {
    1248         5772 :         idx = kind;
    1249         5772 :         valid = pgStatLocal.snapshot.fixed_valid;
    1250              :     }
    1251              :     else
    1252              :     {
    1253            6 :         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1254            6 :         valid = pgStatLocal.snapshot.custom_valid;
    1255              :     }
    1256              : 
    1257              :     Assert(kind_info->fixed_amount);
    1258              :     Assert(kind_info->snapshot_cb != NULL);
    1259              : 
    1260         5778 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1261              :     {
    1262              :         /* rebuild every time */
    1263         5315 :         valid[idx] = false;
    1264              :     }
    1265          463 :     else if (valid[idx])
    1266              :     {
    1267              :         /* in snapshot mode we shouldn't get called again */
    1268              :         Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
    1269            6 :         return;
    1270              :     }
    1271              : 
    1272              :     Assert(!valid[idx]);
    1273              : 
    1274         5772 :     kind_info->snapshot_cb();
    1275              : 
    1276              :     Assert(!valid[idx]);
    1277         5772 :     valid[idx] = true;
    1278              : }
    1279              : 
    1280              : 
    1281              : /* ------------------------------------------------------------
    1282              :  * Backend-local pending stats infrastructure
    1283              :  * ------------------------------------------------------------
    1284              :  */
    1285              : 
    1286              : /*
    1287              :  * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
    1288              :  * stats if not already done.
    1289              :  *
    1290              :  * If created_entry is non-NULL, it'll be set to true if the entry is newly
    1291              :  * created, false otherwise.
    1292              :  */
    1293              : PgStat_EntryRef *
    1294      2362323 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
    1295              : {
    1296              :     PgStat_EntryRef *entry_ref;
    1297              : 
    1298              :     /* need to be able to flush out */
    1299              :     Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
    1300              : 
    1301      2362323 :     if (unlikely(!pgStatPendingContext))
    1302              :     {
    1303        19783 :         pgStatPendingContext =
    1304        19783 :             AllocSetContextCreate(TopMemoryContext,
    1305              :                                   "PgStat Pending",
    1306              :                                   ALLOCSET_SMALL_SIZES);
    1307              :     }
    1308              : 
    1309      2362323 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
    1310              :                                      true, created_entry);
    1311              : 
    1312      2362323 :     if (entry_ref->pending == NULL)
    1313              :     {
    1314      1206032 :         size_t      entrysize = pgstat_get_kind_info(kind)->pending_size;
    1315              : 
    1316              :         Assert(entrysize != (size_t) -1);
    1317              : 
    1318      1206032 :         entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
    1319      1206032 :         dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
    1320              :     }
    1321              : 
    1322      2362323 :     return entry_ref;
    1323              : }
    1324              : 
    1325              : /*
    1326              :  * Return an existing stats entry, or NULL.
    1327              :  *
    1328              :  * This should only be used for helper function for pgstatfuncs.c - outside of
    1329              :  * that it shouldn't be needed.
    1330              :  */
    1331              : PgStat_EntryRef *
    1332           56 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1333              : {
    1334              :     PgStat_EntryRef *entry_ref;
    1335              : 
    1336           56 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1337              : 
    1338           56 :     if (entry_ref == NULL || entry_ref->pending == NULL)
    1339           20 :         return NULL;
    1340              : 
    1341           36 :     return entry_ref;
    1342              : }
    1343              : 
    1344              : void
    1345      1206032 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
    1346              : {
    1347      1206032 :     PgStat_Kind kind = entry_ref->shared_entry->key.kind;
    1348      1206032 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1349      1206032 :     void       *pending_data = entry_ref->pending;
    1350              : 
    1351              :     Assert(pending_data != NULL);
    1352              :     /* !fixed_amount stats should be handled explicitly */
    1353              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
    1354              : 
    1355      1206032 :     if (kind_info->delete_pending_cb)
    1356      1140272 :         kind_info->delete_pending_cb(entry_ref);
    1357              : 
    1358      1206032 :     pfree(pending_data);
    1359      1206032 :     entry_ref->pending = NULL;
    1360              : 
    1361      1206032 :     dlist_delete(&entry_ref->pending_node);
    1362      1206032 : }
    1363              : 
    1364              : /*
    1365              :  * Flush out pending variable-numbered stats.
    1366              :  */
    1367              : static bool
    1368        40534 : pgstat_flush_pending_entries(bool nowait)
    1369              : {
    1370        40534 :     bool        have_pending = false;
    1371        40534 :     dlist_node *cur = NULL;
    1372              : 
    1373              :     /*
    1374              :      * Need to be a bit careful iterating over the list of pending entries.
    1375              :      * Processing a pending entry may queue further pending entries to the end
    1376              :      * of the list that we want to process, so a simple iteration won't do.
    1377              :      * Further complicating matters is that we want to delete the current
    1378              :      * entry in each iteration from the list if we flushed successfully.
    1379              :      *
    1380              :      * So we just keep track of the next pointer in each loop iteration.
    1381              :      */
    1382        40534 :     if (!dlist_is_empty(&pgStatPending))
    1383        37790 :         cur = dlist_head_node(&pgStatPending);
    1384              : 
    1385      1201745 :     while (cur)
    1386              :     {
    1387      1161211 :         PgStat_EntryRef *entry_ref =
    1388              :             dlist_container(PgStat_EntryRef, pending_node, cur);
    1389      1161211 :         PgStat_HashKey key = entry_ref->shared_entry->key;
    1390      1161211 :         PgStat_Kind kind = key.kind;
    1391      1161211 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1392              :         bool        did_flush;
    1393              :         dlist_node *next;
    1394              : 
    1395              :         Assert(!kind_info->fixed_amount);
    1396              :         Assert(kind_info->flush_pending_cb != NULL);
    1397              : 
    1398              :         /* flush the stats, if possible */
    1399      1161211 :         did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
    1400              : 
    1401              :         Assert(did_flush || nowait);
    1402              : 
    1403              :         /* determine next entry, before deleting the pending entry */
    1404      1161211 :         if (dlist_has_next(&pgStatPending, cur))
    1405      1123421 :             next = dlist_next_node(&pgStatPending, cur);
    1406              :         else
    1407        37790 :             next = NULL;
    1408              : 
    1409              :         /* if successfully flushed, remove entry */
    1410      1161211 :         if (did_flush)
    1411      1161209 :             pgstat_delete_pending_entry(entry_ref);
    1412              :         else
    1413            2 :             have_pending = true;
    1414              : 
    1415      1161211 :         cur = next;
    1416              :     }
    1417              : 
    1418              :     Assert(dlist_is_empty(&pgStatPending) == !have_pending);
    1419              : 
    1420        40534 :     return have_pending;
    1421              : }
    1422              : 
    1423              : 
    1424              : /* ------------------------------------------------------------
    1425              :  * Helper / infrastructure functions
    1426              :  * ------------------------------------------------------------
    1427              :  */
    1428              : 
    1429              : PgStat_Kind
    1430           99 : pgstat_get_kind_from_str(char *kind_str)
    1431              : {
    1432          306 :     for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
    1433              :     {
    1434          302 :         if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
    1435           95 :             return kind;
    1436              :     }
    1437              : 
    1438              :     /* Check the custom set of cumulative stats */
    1439            4 :     if (pgstat_kind_custom_infos)
    1440              :     {
    1441            0 :         for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1442              :         {
    1443            0 :             uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1444              : 
    1445            0 :             if (pgstat_kind_custom_infos[idx] &&
    1446            0 :                 pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
    1447            0 :                 return kind;
    1448              :         }
    1449              :     }
    1450              : 
    1451            4 :     ereport(ERROR,
    1452              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1453              :              errmsg("invalid statistics kind: \"%s\"", kind_str)));
    1454              :     return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
    1455              : }
    1456              : 
    1457              : static inline bool
    1458       492235 : pgstat_is_kind_valid(PgStat_Kind kind)
    1459              : {
    1460       492235 :     return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
    1461              : }
    1462              : 
    1463              : const PgStat_KindInfo *
    1464      8632909 : pgstat_get_kind_info(PgStat_Kind kind)
    1465              : {
    1466      8632909 :     if (pgstat_is_kind_builtin(kind))
    1467      7103984 :         return &pgstat_kind_builtin_infos[kind];
    1468              : 
    1469      1528925 :     if (pgstat_is_kind_custom(kind))
    1470              :     {
    1471       847505 :         uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1472              : 
    1473       847505 :         if (pgstat_kind_custom_infos == NULL ||
    1474         1865 :             pgstat_kind_custom_infos[idx] == NULL)
    1475       847033 :             return NULL;
    1476          472 :         return pgstat_kind_custom_infos[idx];
    1477              :     }
    1478              : 
    1479       681420 :     return NULL;
    1480              : }
    1481              : 
    1482              : /*
    1483              :  * Register a new stats kind.
    1484              :  *
    1485              :  * PgStat_Kinds must be globally unique across all extensions. Refer
    1486              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
    1487              :  * unique ID for your extension, to avoid conflicts with other extension
    1488              :  * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
    1489              :  * needlessly reserving a new ID.
    1490              :  */
    1491              : void
    1492            6 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
    1493              : {
    1494            6 :     uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1495              : 
    1496            6 :     if (kind_info->name == NULL || strlen(kind_info->name) == 0)
    1497            0 :         ereport(ERROR,
    1498              :                 (errmsg("custom cumulative statistics name is invalid"),
    1499              :                  errhint("Provide a non-empty name for the custom cumulative statistics.")));
    1500              : 
    1501            6 :     if (!pgstat_is_kind_custom(kind))
    1502            0 :         ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
    1503              :                         errhint("Provide a custom cumulative statistics ID between %u and %u.",
    1504              :                                 PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
    1505              : 
    1506            6 :     if (!process_shared_preload_libraries_in_progress)
    1507            0 :         ereport(ERROR,
    1508              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1509              :                  errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
    1510              : 
    1511              :     /*
    1512              :      * Check some data for fixed-numbered stats.
    1513              :      */
    1514            6 :     if (kind_info->fixed_amount)
    1515              :     {
    1516            3 :         if (kind_info->shared_size == 0)
    1517            0 :             ereport(ERROR,
    1518              :                     (errmsg("custom cumulative statistics property is invalid"),
    1519              :                      errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
    1520            3 :         if (kind_info->track_entry_count)
    1521            0 :             ereport(ERROR,
    1522              :                     (errmsg("custom cumulative statistics property is invalid"),
    1523              :                      errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
    1524              :     }
    1525              : 
    1526              :     /*
    1527              :      * If pgstat_kind_custom_infos is not available yet, allocate it.
    1528              :      */
    1529            6 :     if (pgstat_kind_custom_infos == NULL)
    1530              :     {
    1531            3 :         pgstat_kind_custom_infos = (const PgStat_KindInfo **)
    1532            3 :             MemoryContextAllocZero(TopMemoryContext,
    1533              :                                    sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
    1534              :     }
    1535              : 
    1536            6 :     if (pgstat_kind_custom_infos[idx] != NULL &&
    1537            0 :         pgstat_kind_custom_infos[idx]->name != NULL)
    1538            0 :         ereport(ERROR,
    1539              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1540              :                  errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
    1541              :                            pgstat_kind_custom_infos[idx]->name)));
    1542              : 
    1543              :     /* check for existing custom stats with the same name */
    1544           60 :     for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
    1545              :     {
    1546           54 :         uint32      existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
    1547              : 
    1548           54 :         if (pgstat_kind_custom_infos[existing_idx] == NULL)
    1549           51 :             continue;
    1550            3 :         if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
    1551            0 :             ereport(ERROR,
    1552              :                     (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1553              :                      errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
    1554              :     }
    1555              : 
    1556              :     /* Register it */
    1557            6 :     pgstat_kind_custom_infos[idx] = kind_info;
    1558            6 :     ereport(LOG,
    1559              :             (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
    1560              :                     kind_info->name, kind)));
    1561            6 : }
    1562              : 
    1563              : /*
    1564              :  * Stats should only be reported after pgstat_initialize() and before
    1565              :  * pgstat_shutdown(). This check is put in a few central places to catch
    1566              :  * violations of this rule more easily.
    1567              :  */
    1568              : #ifdef USE_ASSERT_CHECKING
    1569              : void
    1570              : pgstat_assert_is_up(void)
    1571              : {
    1572              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
    1573              : }
    1574              : #endif
    1575              : 
    1576              : 
    1577              : /* ------------------------------------------------------------
    1578              :  * reading and writing of on-disk stats file
    1579              :  * ------------------------------------------------------------
    1580              :  */
    1581              : 
    1582              : /* helper for pgstat_write_statsfile() */
    1583              : void
    1584       491162 : pgstat_write_chunk(FILE *fpout, void *ptr, size_t len)
    1585              : {
    1586              :     int         rc;
    1587              : 
    1588       491162 :     rc = fwrite(ptr, len, 1, fpout);
    1589              : 
    1590              :     /* We check for errors with ferror() when done writing the stats. */
    1591              :     (void) rc;
    1592       491162 : }
    1593              : 
    1594              : /*
    1595              :  * This function is called in the last process that is accessing the shared
    1596              :  * stats so locking is not required.
    1597              :  */
    1598              : static void
    1599          757 : pgstat_write_statsfile(void)
    1600              : {
    1601              :     FILE       *fpout;
    1602              :     int32       format_id;
    1603          757 :     const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
    1604          757 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1605              :     dshash_seq_status hstat;
    1606              :     PgStatShared_HashEntry *ps;
    1607              : 
    1608              :     pgstat_assert_is_up();
    1609              : 
    1610              :     /* should be called only by the checkpointer or single user mode */
    1611              :     Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
    1612              : 
    1613              :     /* we're shutting down, so it's ok to just override this */
    1614          757 :     pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
    1615              : 
    1616          757 :     elog(DEBUG2, "writing stats file \"%s\"", statfile);
    1617              : 
    1618              :     /*
    1619              :      * Open the statistics temp file to write out the current values.
    1620              :      */
    1621          757 :     fpout = AllocateFile(tmpfile, PG_BINARY_W);
    1622          757 :     if (fpout == NULL)
    1623              :     {
    1624            0 :         ereport(LOG,
    1625              :                 (errcode_for_file_access(),
    1626              :                  errmsg("could not open temporary statistics file \"%s\": %m",
    1627              :                         tmpfile)));
    1628            0 :         return;
    1629              :     }
    1630              : 
    1631              :     /*
    1632              :      * Write the file header --- currently just a format ID.
    1633              :      */
    1634          757 :     format_id = PGSTAT_FILE_FORMAT_ID;
    1635          757 :     pgstat_write_chunk_s(fpout, &format_id);
    1636              : 
    1637              :     /* Write various stats structs for fixed number of objects */
    1638        24981 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1639              :     {
    1640              :         char       *ptr;
    1641        24224 :         const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
    1642              : 
    1643        24224 :         if (!info || !info->fixed_amount)
    1644        18924 :             continue;
    1645              : 
    1646         5300 :         if (pgstat_is_kind_builtin(kind))
    1647              :             Assert(info->snapshot_ctl_off != 0);
    1648              : 
    1649              :         /* skip if no need to write to file */
    1650         5300 :         if (!info->write_to_file)
    1651            0 :             continue;
    1652              : 
    1653         5300 :         pgstat_build_snapshot_fixed(kind);
    1654         5300 :         if (pgstat_is_kind_builtin(kind))
    1655         5299 :             ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
    1656              :         else
    1657            1 :             ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
    1658              : 
    1659         5300 :         fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
    1660         5300 :         pgstat_write_chunk_s(fpout, &kind);
    1661         5300 :         pgstat_write_chunk(fpout, ptr, info->shared_data_len);
    1662              :     }
    1663              : 
    1664              :     /*
    1665              :      * Walk through the stats entries
    1666              :      */
    1667          757 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1668       240730 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
    1669              :     {
    1670              :         PgStatShared_Common *shstats;
    1671       239973 :         const PgStat_KindInfo *kind_info = NULL;
    1672              : 
    1673       239973 :         CHECK_FOR_INTERRUPTS();
    1674              : 
    1675              :         /*
    1676              :          * We should not see any "dropped" entries when writing the stats
    1677              :          * file, as all backends and auxiliary processes should have cleaned
    1678              :          * up their references before they terminated.
    1679              :          *
    1680              :          * However, since we are already shutting down, it is not worth
    1681              :          * crashing the server over any potential cleanup issues, so we simply
    1682              :          * skip such entries if encountered.
    1683              :          */
    1684              :         Assert(!ps->dropped);
    1685       239973 :         if (ps->dropped)
    1686            0 :             continue;
    1687              : 
    1688              :         /*
    1689              :          * This discards data related to custom stats kinds that are unknown
    1690              :          * to this process.
    1691              :          */
    1692       239973 :         if (!pgstat_is_kind_valid(ps->key.kind))
    1693              :         {
    1694            0 :             elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
    1695              :                  ps->key.kind, ps->key.dboid,
    1696              :                  ps->key.objid);
    1697            0 :             continue;
    1698              :         }
    1699              : 
    1700       239973 :         shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
    1701              : 
    1702       239973 :         kind_info = pgstat_get_kind_info(ps->key.kind);
    1703              : 
    1704              :         /* if not dropped the valid-entry refcount should exist */
    1705              :         Assert(pg_atomic_read_u32(&ps->refcount) > 0);
    1706              : 
    1707              :         /* skip if no need to write to file */
    1708       239973 :         if (!kind_info->write_to_file)
    1709          131 :             continue;
    1710              : 
    1711       239842 :         if (!kind_info->to_serialized_name)
    1712              :         {
    1713              :             /* normal stats entry, identified by PgStat_HashKey */
    1714       239731 :             fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
    1715       239731 :             pgstat_write_chunk_s(fpout, &ps->key);
    1716              :         }
    1717              :         else
    1718              :         {
    1719              :             /* stats entry identified by name on disk (e.g. slots) */
    1720              :             NameData    name;
    1721              : 
    1722          111 :             kind_info->to_serialized_name(&ps->key, shstats, &name);
    1723              : 
    1724          111 :             fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
    1725          111 :             pgstat_write_chunk_s(fpout, &ps->key.kind);
    1726          111 :             pgstat_write_chunk_s(fpout, &name);
    1727              :         }
    1728              : 
    1729              :         /* Write except the header part of the entry */
    1730       239842 :         pgstat_write_chunk(fpout,
    1731              :                            pgstat_get_entry_data(ps->key.kind, shstats),
    1732              :                            pgstat_get_entry_len(ps->key.kind));
    1733              : 
    1734              :         /* Write more data for the entry, if required */
    1735       239842 :         if (kind_info->to_serialized_data)
    1736            2 :             kind_info->to_serialized_data(&ps->key, shstats, fpout);
    1737              :     }
    1738          757 :     dshash_seq_term(&hstat);
    1739              : 
    1740              :     /*
    1741              :      * No more output to be done. Close the temp file and replace the old
    1742              :      * pgstat.stat with it.  The ferror() check replaces testing for error
    1743              :      * after each individual fputc or fwrite (in pgstat_write_chunk()) above.
    1744              :      */
    1745          757 :     fputc(PGSTAT_FILE_ENTRY_END, fpout);
    1746              : 
    1747          757 :     if (ferror(fpout))
    1748              :     {
    1749            0 :         ereport(LOG,
    1750              :                 (errcode_for_file_access(),
    1751              :                  errmsg("could not write temporary statistics file \"%s\": %m",
    1752              :                         tmpfile)));
    1753            0 :         FreeFile(fpout);
    1754            0 :         unlink(tmpfile);
    1755              :     }
    1756          757 :     else if (FreeFile(fpout) < 0)
    1757              :     {
    1758            0 :         ereport(LOG,
    1759              :                 (errcode_for_file_access(),
    1760              :                  errmsg("could not close temporary statistics file \"%s\": %m",
    1761              :                         tmpfile)));
    1762            0 :         unlink(tmpfile);
    1763              :     }
    1764          757 :     else if (durable_rename(tmpfile, statfile, LOG) < 0)
    1765              :     {
    1766              :         /* durable_rename already emitted log message */
    1767            0 :         unlink(tmpfile);
    1768              :     }
    1769              : 
    1770              :     /* Finish callbacks, if required */
    1771        24981 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1772              :     {
    1773        24224 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1774              : 
    1775        24224 :         if (kind_info && kind_info->finish)
    1776            1 :             kind_info->finish(STATS_WRITE);
    1777              :     }
    1778              : }
    1779              : 
    1780              : /* helper for pgstat_read_statsfile() */
    1781              : bool
    1782       505437 : pgstat_read_chunk(FILE *fpin, void *ptr, size_t len)
    1783              : {
    1784       505437 :     return fread(ptr, 1, len, fpin) == len;
    1785              : }
    1786              : 
    1787              : /*
    1788              :  * Reads in existing statistics file into memory.
    1789              :  *
    1790              :  * This function is called in the only process that is accessing the shared
    1791              :  * stats so locking is not required.
    1792              :  */
    1793              : static void
    1794          884 : pgstat_read_statsfile(void)
    1795              : {
    1796              :     FILE       *fpin;
    1797              :     int32       format_id;
    1798              :     bool        found;
    1799          884 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1800          884 :     PgStat_ShmemControl *shmem = pgStatLocal.shmem;
    1801              : 
    1802              :     /* shouldn't be called from postmaster */
    1803              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
    1804              : 
    1805          884 :     elog(DEBUG2, "reading stats file \"%s\"", statfile);
    1806              : 
    1807              :     /*
    1808              :      * Try to open the stats file. If it doesn't exist, the backends simply
    1809              :      * returns zero for anything and statistics simply starts from scratch
    1810              :      * with empty counters.
    1811              :      *
    1812              :      * ENOENT is a possibility if stats collection was previously disabled or
    1813              :      * has not yet written the stats file for the first time.  Any other
    1814              :      * failure condition is suspicious.
    1815              :      */
    1816          884 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    1817              :     {
    1818           58 :         if (errno != ENOENT)
    1819            0 :             ereport(LOG,
    1820              :                     (errcode_for_file_access(),
    1821              :                      errmsg("could not open statistics file \"%s\": %m",
    1822              :                             statfile)));
    1823           58 :         pgstat_reset_after_failure();
    1824           58 :         return;
    1825              :     }
    1826              : 
    1827              :     /*
    1828              :      * Verify it's of the expected format.
    1829              :      */
    1830          826 :     if (!pgstat_read_chunk_s(fpin, &format_id))
    1831              :     {
    1832            0 :         elog(WARNING, "could not read format ID");
    1833            0 :         goto error;
    1834              :     }
    1835              : 
    1836          826 :     if (format_id != PGSTAT_FILE_FORMAT_ID)
    1837              :     {
    1838            1 :         elog(WARNING, "found incorrect format ID %d (expected %d)",
    1839              :              format_id, PGSTAT_FILE_FORMAT_ID);
    1840            1 :         goto error;
    1841              :     }
    1842              : 
    1843              :     /*
    1844              :      * We found an existing statistics file. Read it and put all the stats
    1845              :      * data into place.
    1846              :      */
    1847              :     for (;;)
    1848       252262 :     {
    1849       253087 :         int         t = fgetc(fpin);
    1850              : 
    1851       253087 :         switch (t)
    1852              :         {
    1853         5776 :             case PGSTAT_FILE_ENTRY_FIXED:
    1854              :                 {
    1855              :                     PgStat_Kind kind;
    1856              :                     const PgStat_KindInfo *info;
    1857              :                     char       *ptr;
    1858              : 
    1859              :                     /* entry for fixed-numbered stats */
    1860         5776 :                     if (!pgstat_read_chunk_s(fpin, &kind))
    1861              :                     {
    1862            0 :                         elog(WARNING, "could not read stats kind for entry of type %c", t);
    1863            0 :                         goto error;
    1864              :                     }
    1865              : 
    1866         5776 :                     if (!pgstat_is_kind_valid(kind))
    1867              :                     {
    1868            0 :                         elog(WARNING, "invalid stats kind %u for entry of type %c",
    1869              :                              kind, t);
    1870            0 :                         goto error;
    1871              :                     }
    1872              : 
    1873         5776 :                     info = pgstat_get_kind_info(kind);
    1874         5776 :                     if (!info)
    1875              :                     {
    1876            0 :                         elog(WARNING, "could not find information of kind %u for entry of type %c",
    1877              :                              kind, t);
    1878            0 :                         goto error;
    1879              :                     }
    1880              : 
    1881         5776 :                     if (!info->fixed_amount)
    1882              :                     {
    1883            0 :                         elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
    1884              :                              kind, t);
    1885            0 :                         goto error;
    1886              :                     }
    1887              : 
    1888              :                     /* Load back stats into shared memory */
    1889         5776 :                     if (pgstat_is_kind_builtin(kind))
    1890         5775 :                         ptr = ((char *) shmem) + info->shared_ctl_off +
    1891         5775 :                             info->shared_data_off;
    1892              :                     else
    1893              :                     {
    1894            1 :                         int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1895              : 
    1896            1 :                         ptr = ((char *) shmem->custom_data[idx]) +
    1897            1 :                             info->shared_data_off;
    1898              :                     }
    1899              : 
    1900         5776 :                     if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len))
    1901              :                     {
    1902            0 :                         elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
    1903              :                              kind, t, info->shared_data_len);
    1904            0 :                         goto error;
    1905              :                     }
    1906              : 
    1907         5776 :                     break;
    1908              :                 }
    1909       246486 :             case PGSTAT_FILE_ENTRY_HASH:
    1910              :             case PGSTAT_FILE_ENTRY_NAME:
    1911              :                 {
    1912              :                     PgStat_HashKey key;
    1913              :                     PgStatShared_HashEntry *p;
    1914              :                     PgStatShared_Common *header;
    1915       246486 :                     const PgStat_KindInfo *kind_info = NULL;
    1916              : 
    1917       246486 :                     CHECK_FOR_INTERRUPTS();
    1918              : 
    1919       246486 :                     if (t == PGSTAT_FILE_ENTRY_HASH)
    1920              :                     {
    1921              :                         /* normal stats entry, identified by PgStat_HashKey */
    1922       246408 :                         if (!pgstat_read_chunk_s(fpin, &key))
    1923              :                         {
    1924            0 :                             elog(WARNING, "could not read key for entry of type %c", t);
    1925            0 :                             goto error;
    1926              :                         }
    1927              : 
    1928       246408 :                         if (!pgstat_is_kind_valid(key.kind))
    1929              :                         {
    1930            0 :                             elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
    1931              :                                  key.kind, key.dboid,
    1932              :                                  key.objid, t);
    1933            0 :                             goto error;
    1934              :                         }
    1935              : 
    1936       246408 :                         kind_info = pgstat_get_kind_info(key.kind);
    1937       246408 :                         if (!kind_info)
    1938              :                         {
    1939            0 :                             elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
    1940              :                                  key.kind, key.dboid,
    1941              :                                  key.objid, t);
    1942            0 :                             goto error;
    1943              :                         }
    1944              :                     }
    1945              :                     else
    1946              :                     {
    1947              :                         /* stats entry identified by name on disk (e.g. slots) */
    1948              :                         PgStat_Kind kind;
    1949              :                         NameData    name;
    1950              : 
    1951           78 :                         if (!pgstat_read_chunk_s(fpin, &kind))
    1952              :                         {
    1953            0 :                             elog(WARNING, "could not read stats kind for entry of type %c", t);
    1954            0 :                             goto error;
    1955              :                         }
    1956           78 :                         if (!pgstat_read_chunk_s(fpin, &name))
    1957              :                         {
    1958            0 :                             elog(WARNING, "could not read name of stats kind %u for entry of type %c",
    1959              :                                  kind, t);
    1960            0 :                             goto error;
    1961              :                         }
    1962           78 :                         if (!pgstat_is_kind_valid(kind))
    1963              :                         {
    1964            0 :                             elog(WARNING, "invalid stats kind %u for entry of type %c",
    1965              :                                  kind, t);
    1966            0 :                             goto error;
    1967              :                         }
    1968              : 
    1969           78 :                         kind_info = pgstat_get_kind_info(kind);
    1970           78 :                         if (!kind_info)
    1971              :                         {
    1972            0 :                             elog(WARNING, "could not find information of kind %u for entry of type %c",
    1973              :                                  kind, t);
    1974            0 :                             goto error;
    1975              :                         }
    1976              : 
    1977           78 :                         if (!kind_info->from_serialized_name)
    1978              :                         {
    1979            0 :                             elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
    1980              :                                  kind, t);
    1981            0 :                             goto error;
    1982              :                         }
    1983              : 
    1984           78 :                         if (!kind_info->from_serialized_name(&name, &key))
    1985              :                         {
    1986              :                             /* skip over data for entry we don't care about */
    1987            1 :                             if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
    1988              :                             {
    1989            0 :                                 elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
    1990              :                                      NameStr(name), kind, t);
    1991            0 :                                 goto error;
    1992              :                             }
    1993              : 
    1994            1 :                             continue;
    1995              :                         }
    1996              : 
    1997              :                         Assert(key.kind == kind);
    1998              :                     }
    1999              : 
    2000              :                     /*
    2001              :                      * This intentionally doesn't use pgstat_get_entry_ref() -
    2002              :                      * putting all stats into checkpointer's
    2003              :                      * pgStatEntryRefHash would be wasted effort and memory.
    2004              :                      */
    2005       246485 :                     p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
    2006              : 
    2007              :                     /* don't allow duplicate entries */
    2008       246485 :                     if (found)
    2009              :                     {
    2010            0 :                         dshash_release_lock(pgStatLocal.shared_hash, p);
    2011            0 :                         elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
    2012              :                              key.kind, key.dboid,
    2013              :                              key.objid, t);
    2014            0 :                         goto error;
    2015              :                     }
    2016              : 
    2017       246485 :                     header = pgstat_init_entry(key.kind, p);
    2018       246485 :                     dshash_release_lock(pgStatLocal.shared_hash, p);
    2019       246485 :                     if (header == NULL)
    2020              :                     {
    2021              :                         /*
    2022              :                          * It would be tempting to switch this ERROR to a
    2023              :                          * WARNING, but it would mean that all the statistics
    2024              :                          * are discarded when the environment fails on OOM.
    2025              :                          */
    2026            0 :                         elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
    2027              :                              key.kind, key.dboid,
    2028              :                              key.objid, t);
    2029              :                     }
    2030              : 
    2031       246485 :                     if (!pgstat_read_chunk(fpin,
    2032              :                                            pgstat_get_entry_data(key.kind, header),
    2033              :                                            pgstat_get_entry_len(key.kind)))
    2034              :                     {
    2035            0 :                         elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
    2036              :                              key.kind, key.dboid,
    2037              :                              key.objid, t);
    2038            0 :                         goto error;
    2039              :                     }
    2040              : 
    2041              :                     /* read more data for the entry, if required */
    2042       246485 :                     if (kind_info->from_serialized_data)
    2043              :                     {
    2044            2 :                         if (!kind_info->from_serialized_data(&key, header, fpin))
    2045              :                         {
    2046            0 :                             elog(WARNING, "could not read auxiliary data for entry %u/%u/%" PRIu64 " of type %c",
    2047              :                                  key.kind, key.dboid,
    2048              :                                  key.objid, t);
    2049            0 :                             goto error;
    2050              :                         }
    2051              :                     }
    2052              : 
    2053       246485 :                     break;
    2054              :                 }
    2055          825 :             case PGSTAT_FILE_ENTRY_END:
    2056              : 
    2057              :                 /*
    2058              :                  * check that PGSTAT_FILE_ENTRY_END actually signals end of
    2059              :                  * file
    2060              :                  */
    2061          825 :                 if (fgetc(fpin) != EOF)
    2062              :                 {
    2063            1 :                     elog(WARNING, "could not read end-of-file");
    2064            1 :                     goto error;
    2065              :                 }
    2066              : 
    2067          824 :                 goto done;
    2068              : 
    2069            0 :             default:
    2070            0 :                 elog(WARNING, "could not read entry of type %c", t);
    2071            0 :                 goto error;
    2072              :         }
    2073              :     }
    2074              : 
    2075          826 : done:
    2076              :     /* First, cleanup the main stats file */
    2077          826 :     FreeFile(fpin);
    2078              : 
    2079          826 :     elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
    2080          826 :     unlink(statfile);
    2081              : 
    2082              :     /* Finish callbacks, if required */
    2083        27258 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2084              :     {
    2085        26432 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2086              : 
    2087        26432 :         if (kind_info && kind_info->finish)
    2088            2 :             kind_info->finish(STATS_READ);
    2089              :     }
    2090              : 
    2091          826 :     return;
    2092              : 
    2093            2 : error:
    2094            2 :     ereport(LOG,
    2095              :             (errmsg("corrupted statistics file \"%s\"", statfile)));
    2096              : 
    2097            2 :     pgstat_reset_after_failure();
    2098              : 
    2099            2 :     goto done;
    2100              : }
    2101              : 
    2102              : /*
    2103              :  * Helper to reset / drop stats after a crash or after restoring stats from
    2104              :  * disk failed, potentially after already loading parts.
    2105              :  */
    2106              : static void
    2107          246 : pgstat_reset_after_failure(void)
    2108              : {
    2109          246 :     TimestampTz ts = GetCurrentTimestamp();
    2110              : 
    2111              :     /* reset fixed-numbered stats */
    2112         8118 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2113              :     {
    2114         7872 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2115              : 
    2116         7872 :         if (!kind_info || !kind_info->fixed_amount)
    2117         6149 :             continue;
    2118              : 
    2119         1723 :         kind_info->reset_all_cb(ts);
    2120              :     }
    2121              : 
    2122              :     /* and drop variable-numbered ones */
    2123          246 :     pgstat_drop_all_entries();
    2124          246 : }
    2125              : 
    2126              : /*
    2127              :  * GUC assign_hook for stats_fetch_consistency.
    2128              :  */
    2129              : void
    2130         3352 : assign_stats_fetch_consistency(int newval, void *extra)
    2131              : {
    2132              :     /*
    2133              :      * Changing this value in a transaction may cause snapshot state
    2134              :      * inconsistencies, so force a clear of the current snapshot on the next
    2135              :      * snapshot build attempt.
    2136              :      */
    2137         3352 :     if (pgstat_fetch_consistency != newval)
    2138         1969 :         force_stats_snapshot_clear = true;
    2139         3352 : }
        

Generated by: LCOV version 2.0-1