LCOV - code coverage report
Current view: top level - src/backend/utils/activity - pgstat.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.5 % 525 454
Test Date: 2026-03-02 04:14:39 Functions: 100.0 % 34 34
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* ----------
       2              :  * pgstat.c
       3              :  *    Infrastructure for the cumulative statistics system.
       4              :  *
       5              :  * The cumulative statistics system accumulates statistics for different kinds
       6              :  * of objects. Some kinds of statistics are collected for a fixed number of
       7              :  * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
       8              :  * statistics are collected for a varying number of objects
       9              :  * (e.g. relations). See PgStat_KindInfo for a list of currently handled
      10              :  * statistics.
      11              :  *
      12              :  * Statistics are loaded from the filesystem during startup (by the startup
      13              :  * process), unless preceded by a crash, in which case all stats are
      14              :  * discarded. They are written out by the checkpointer process just before
      15              :  * shutting down (if the stats kind allows it), except when shutting down in
      16              :  * immediate mode.
      17              :  *
      18              :  * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
      19              :  *
      20              :  * Statistics for variable-numbered objects are stored in dynamic shared
      21              :  * memory and can be found via a dshash hashtable. The statistics counters are
      22              :  * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
      23              :  * separately allocated (PgStatShared_HashEntry->body). The separate
      24              :  * allocation allows different kinds of statistics to be stored in the same
      25              :  * hashtable without wasting space in PgStatShared_HashEntry.
      26              :  *
      27              :  * Variable-numbered stats are addressed by PgStat_HashKey while running.  It
      28              :  * is not possible to have statistics for an object that cannot be addressed
      29              :  * that way at runtime. A wider identifier can be used when serializing to
      30              :  * disk (used for replication slot stats).
      31              :  *
      32              :  * To avoid contention on the shared hashtable, each backend has a
      33              :  * backend-local hashtable (pgStatEntryRefHash) in front of the shared
      34              :  * hashtable, containing references (PgStat_EntryRef) to shared hashtable
      35              :  * entries. The shared hashtable only needs to be accessed when no prior
      36              :  * reference is found in the local hashtable. Besides pointing to the
      37              :  * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
      38              :  * contains a pointer to the shared statistics data, as a process-local
      39              :  * address, to reduce access costs.
      40              :  *
      41              :  * The names for structs stored in shared memory are prefixed with
      42              :  * PgStatShared instead of PgStat. Each stats entry in shared memory is
      43              :  * protected by a dedicated lwlock.
      44              :  *
      45              :  * Most stats updates are first accumulated locally in each process as pending
      46              :  * entries, then later flushed to shared memory (just after commit, or by
      47              :  * idle-timeout). This practically eliminates contention on individual stats
      48              :  * entries. For most kinds of variable-numbered pending stats data is stored
      49              :  * in PgStat_EntryRef->pending. All entries with pending data are in the
      50              :  * pgStatPending list. Pending statistics updates are flushed out by
      51              :  * pgstat_report_stat().
      52              :  *
      53              :  * It is possible for external modules to define custom statistics kinds,
      54              :  * that can use the same properties as any built-in stats kinds.  Each custom
      55              :  * stats kind needs to assign a unique ID to ensure that it does not overlap
      56              :  * with other extensions.  In order to reserve a unique stats kind ID, refer
      57              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
      58              :  *
      59              :  * The behavior of different kinds of statistics is determined by the kind's
      60              :  * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
      61              :  * defined, and pgstat_kind_custom_infos for custom kinds registered at
      62              :  * startup by pgstat_register_kind().  See PgStat_KindInfo for details.
      63              :  *
      64              :  * The consistency of read accesses to statistics can be configured using the
      65              :  * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
      66              :  * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
      67              :  * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
      68              :  * pgStatLocal.snapshot.
      69              :  *
      70              :  * To keep things manageable, stats handling is split across several
      71              :  * files. Infrastructure pieces are in:
      72              :  * - pgstat.c - this file, to tie it all together
      73              :  * - pgstat_shmem.c - nearly everything dealing with shared memory, including
      74              :  *   the maintenance of hashtable entries
      75              :  * - pgstat_xact.c - transactional integration, including the transactional
      76              :  *   creation and dropping of stats entries
      77              :  *
      78              :  * Each statistics kind is handled in a dedicated file:
      79              :  * - pgstat_archiver.c
      80              :  * - pgstat_backend.c
      81              :  * - pgstat_bgwriter.c
      82              :  * - pgstat_checkpointer.c
      83              :  * - pgstat_database.c
      84              :  * - pgstat_function.c
      85              :  * - pgstat_io.c
      86              :  * - pgstat_relation.c
      87              :  * - pgstat_replslot.c
      88              :  * - pgstat_slru.c
      89              :  * - pgstat_subscription.c
      90              :  * - pgstat_wal.c
      91              :  *
      92              :  * Whenever possible infrastructure files should not contain code related to
      93              :  * specific kinds of stats.
      94              :  *
      95              :  *
      96              :  * Copyright (c) 2001-2026, PostgreSQL Global Development Group
      97              :  *
      98              :  * IDENTIFICATION
      99              :  *    src/backend/utils/activity/pgstat.c
     100              :  * ----------
     101              :  */
     102              : #include "postgres.h"
     103              : 
     104              : #include <unistd.h>
     105              : 
     106              : #include "access/xact.h"
     107              : #include "lib/dshash.h"
     108              : #include "pgstat.h"
     109              : #include "storage/fd.h"
     110              : #include "storage/ipc.h"
     111              : #include "storage/lwlock.h"
     112              : #include "utils/guc_hooks.h"
     113              : #include "utils/memutils.h"
     114              : #include "utils/pgstat_internal.h"
     115              : #include "utils/timestamp.h"
     116              : 
     117              : 
     118              : /* ----------
     119              :  * Timer definitions.
     120              :  *
     121              :  * In milliseconds.
     122              :  * ----------
     123              :  */
     124              : 
     125              : /* minimum interval non-forced stats flushes.*/
     126              : #define PGSTAT_MIN_INTERVAL         1000
     127              : /* how long until to block flushing pending stats updates */
     128              : #define PGSTAT_MAX_INTERVAL         60000
     129              : /* when to call pgstat_report_stat() again, even when idle */
     130              : #define PGSTAT_IDLE_INTERVAL        10000
     131              : 
     132              : /* ----------
     133              :  * Initial size hints for the hash tables used in statistics.
     134              :  * ----------
     135              :  */
     136              : 
     137              : #define PGSTAT_SNAPSHOT_HASH_SIZE   512
     138              : 
     139              : /* ---------
     140              :  * Identifiers in stats file.
     141              :  * ---------
     142              :  */
     143              : #define PGSTAT_FILE_ENTRY_END   'E' /* end of file */
     144              : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
     145              : #define PGSTAT_FILE_ENTRY_NAME  'N' /* stats entry identified by name */
     146              : #define PGSTAT_FILE_ENTRY_HASH  'S' /* stats entry identified by
     147              :                                      * PgStat_HashKey */
     148              : 
     149              : /* hash table for statistics snapshots entry */
     150              : typedef struct PgStat_SnapshotEntry
     151              : {
     152              :     PgStat_HashKey key;
     153              :     char        status;         /* for simplehash use */
     154              :     void       *data;           /* the stats data itself */
     155              : } PgStat_SnapshotEntry;
     156              : 
     157              : 
     158              : /* ----------
     159              :  * Backend-local Hash Table Definitions
     160              :  * ----------
     161              :  */
     162              : 
     163              : /* for stats snapshot entries */
     164              : #define SH_PREFIX pgstat_snapshot
     165              : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
     166              : #define SH_KEY_TYPE PgStat_HashKey
     167              : #define SH_KEY key
     168              : #define SH_HASH_KEY(tb, key) \
     169              :     pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
     170              : #define SH_EQUAL(tb, a, b) \
     171              :     pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
     172              : #define SH_SCOPE static inline
     173              : #define SH_DEFINE
     174              : #define SH_DECLARE
     175              : #include "lib/simplehash.h"
     176              : 
     177              : 
     178              : /* ----------
     179              :  * Local function forward declarations
     180              :  * ----------
     181              :  */
     182              : 
     183              : static void pgstat_write_statsfile(void);
     184              : static void pgstat_read_statsfile(void);
     185              : 
     186              : static void pgstat_init_snapshot_fixed(void);
     187              : 
     188              : static void pgstat_reset_after_failure(void);
     189              : 
     190              : static bool pgstat_flush_pending_entries(bool nowait);
     191              : 
     192              : static void pgstat_prep_snapshot(void);
     193              : static void pgstat_build_snapshot(void);
     194              : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
     195              : 
     196              : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
     197              : 
     198              : 
     199              : /* ----------
     200              :  * GUC parameters
     201              :  * ----------
     202              :  */
     203              : 
     204              : bool        pgstat_track_counts = false;
     205              : int         pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
     206              : 
     207              : 
     208              : /* ----------
     209              :  * state shared with pgstat_*.c
     210              :  * ----------
     211              :  */
     212              : 
     213              : PgStat_LocalState pgStatLocal;
     214              : 
     215              : /*
     216              :  * Track pending reports for fixed-numbered stats, used by
     217              :  * pgstat_report_stat().
     218              :  */
     219              : bool        pgstat_report_fixed = false;
     220              : 
     221              : /* ----------
     222              :  * Local data
     223              :  *
     224              :  * NB: There should be only variables related to stats infrastructure here,
     225              :  * not for specific kinds of stats.
     226              :  * ----------
     227              :  */
     228              : 
     229              : /*
     230              :  * Memory contexts containing the pgStatEntryRefHash table, the
     231              :  * pgStatSharedRef entries, and pending data respectively. Mostly to make it
     232              :  * easier to track / attribute memory usage.
     233              :  */
     234              : 
     235              : static MemoryContext pgStatPendingContext = NULL;
     236              : 
     237              : /*
     238              :  * Backend local list of PgStat_EntryRef with unflushed pending stats.
     239              :  *
     240              :  * Newly pending entries should only ever be added to the end of the list,
     241              :  * otherwise pgstat_flush_pending_entries() might not see them immediately.
     242              :  */
     243              : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
     244              : 
     245              : 
     246              : /*
     247              :  * Force the next stats flush to happen regardless of
     248              :  * PGSTAT_MIN_INTERVAL. Useful in test scripts.
     249              :  */
     250              : static bool pgStatForceNextFlush = false;
     251              : 
     252              : /*
     253              :  * Force-clear existing snapshot before next use when stats_fetch_consistency
     254              :  * is changed.
     255              :  */
     256              : static bool force_stats_snapshot_clear = false;
     257              : 
     258              : 
     259              : /*
     260              :  * For assertions that check pgstat is not used before initialization / after
     261              :  * shutdown.
     262              :  */
     263              : #ifdef USE_ASSERT_CHECKING
     264              : static bool pgstat_is_initialized = false;
     265              : static bool pgstat_is_shutdown = false;
     266              : #endif
     267              : 
     268              : 
     269              : /*
     270              :  * The different kinds of built-in statistics.
     271              :  *
     272              :  * If reasonably possible, handling specific to one kind of stats should go
     273              :  * through this abstraction, rather than making more of pgstat.c aware.
     274              :  *
     275              :  * See comments for struct PgStat_KindInfo for details about the individual
     276              :  * fields.
     277              :  *
     278              :  * XXX: It'd be nicer to define this outside of this file. But there doesn't
     279              :  * seem to be a great way of doing that, given the split across multiple
     280              :  * files.
     281              :  */
     282              : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
     283              : 
     284              :     /* stats kinds for variable-numbered objects */
     285              : 
     286              :     [PGSTAT_KIND_DATABASE] = {
     287              :         .name = "database",
     288              : 
     289              :         .fixed_amount = false,
     290              :         .write_to_file = true,
     291              :         /* so pg_stat_database entries can be seen in all databases */
     292              :         .accessed_across_databases = true,
     293              : 
     294              :         .shared_size = sizeof(PgStatShared_Database),
     295              :         .shared_data_off = offsetof(PgStatShared_Database, stats),
     296              :         .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
     297              :         .pending_size = sizeof(PgStat_StatDBEntry),
     298              : 
     299              :         .flush_pending_cb = pgstat_database_flush_cb,
     300              :         .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
     301              :     },
     302              : 
     303              :     [PGSTAT_KIND_RELATION] = {
     304              :         .name = "relation",
     305              : 
     306              :         .fixed_amount = false,
     307              :         .write_to_file = true,
     308              : 
     309              :         .shared_size = sizeof(PgStatShared_Relation),
     310              :         .shared_data_off = offsetof(PgStatShared_Relation, stats),
     311              :         .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
     312              :         .pending_size = sizeof(PgStat_TableStatus),
     313              : 
     314              :         .flush_pending_cb = pgstat_relation_flush_cb,
     315              :         .delete_pending_cb = pgstat_relation_delete_pending_cb,
     316              :         .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
     317              :     },
     318              : 
     319              :     [PGSTAT_KIND_FUNCTION] = {
     320              :         .name = "function",
     321              : 
     322              :         .fixed_amount = false,
     323              :         .write_to_file = true,
     324              : 
     325              :         .shared_size = sizeof(PgStatShared_Function),
     326              :         .shared_data_off = offsetof(PgStatShared_Function, stats),
     327              :         .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
     328              :         .pending_size = sizeof(PgStat_FunctionCounts),
     329              : 
     330              :         .flush_pending_cb = pgstat_function_flush_cb,
     331              :         .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
     332              :     },
     333              : 
     334              :     [PGSTAT_KIND_REPLSLOT] = {
     335              :         .name = "replslot",
     336              : 
     337              :         .fixed_amount = false,
     338              :         .write_to_file = true,
     339              : 
     340              :         .accessed_across_databases = true,
     341              : 
     342              :         .shared_size = sizeof(PgStatShared_ReplSlot),
     343              :         .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
     344              :         .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
     345              : 
     346              :         .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
     347              :         .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
     348              :         .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
     349              :     },
     350              : 
     351              :     [PGSTAT_KIND_SUBSCRIPTION] = {
     352              :         .name = "subscription",
     353              : 
     354              :         .fixed_amount = false,
     355              :         .write_to_file = true,
     356              :         /* so pg_stat_subscription_stats entries can be seen in all databases */
     357              :         .accessed_across_databases = true,
     358              : 
     359              :         .shared_size = sizeof(PgStatShared_Subscription),
     360              :         .shared_data_off = offsetof(PgStatShared_Subscription, stats),
     361              :         .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
     362              :         .pending_size = sizeof(PgStat_BackendSubEntry),
     363              : 
     364              :         .flush_pending_cb = pgstat_subscription_flush_cb,
     365              :         .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
     366              :     },
     367              : 
     368              :     [PGSTAT_KIND_BACKEND] = {
     369              :         .name = "backend",
     370              : 
     371              :         .fixed_amount = false,
     372              :         .write_to_file = false,
     373              : 
     374              :         .accessed_across_databases = true,
     375              : 
     376              :         .shared_size = sizeof(PgStatShared_Backend),
     377              :         .shared_data_off = offsetof(PgStatShared_Backend, stats),
     378              :         .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
     379              : 
     380              :         .flush_static_cb = pgstat_backend_flush_cb,
     381              :         .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
     382              :     },
     383              : 
     384              :     /* stats for fixed-numbered (mostly 1) objects */
     385              : 
     386              :     [PGSTAT_KIND_ARCHIVER] = {
     387              :         .name = "archiver",
     388              : 
     389              :         .fixed_amount = true,
     390              :         .write_to_file = true,
     391              : 
     392              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
     393              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
     394              :         .shared_data_off = offsetof(PgStatShared_Archiver, stats),
     395              :         .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
     396              : 
     397              :         .init_shmem_cb = pgstat_archiver_init_shmem_cb,
     398              :         .reset_all_cb = pgstat_archiver_reset_all_cb,
     399              :         .snapshot_cb = pgstat_archiver_snapshot_cb,
     400              :     },
     401              : 
     402              :     [PGSTAT_KIND_BGWRITER] = {
     403              :         .name = "bgwriter",
     404              : 
     405              :         .fixed_amount = true,
     406              :         .write_to_file = true,
     407              : 
     408              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
     409              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
     410              :         .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
     411              :         .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
     412              : 
     413              :         .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
     414              :         .reset_all_cb = pgstat_bgwriter_reset_all_cb,
     415              :         .snapshot_cb = pgstat_bgwriter_snapshot_cb,
     416              :     },
     417              : 
     418              :     [PGSTAT_KIND_CHECKPOINTER] = {
     419              :         .name = "checkpointer",
     420              : 
     421              :         .fixed_amount = true,
     422              :         .write_to_file = true,
     423              : 
     424              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
     425              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
     426              :         .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
     427              :         .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
     428              : 
     429              :         .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
     430              :         .reset_all_cb = pgstat_checkpointer_reset_all_cb,
     431              :         .snapshot_cb = pgstat_checkpointer_snapshot_cb,
     432              :     },
     433              : 
     434              :     [PGSTAT_KIND_IO] = {
     435              :         .name = "io",
     436              : 
     437              :         .fixed_amount = true,
     438              :         .write_to_file = true,
     439              : 
     440              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
     441              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
     442              :         .shared_data_off = offsetof(PgStatShared_IO, stats),
     443              :         .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
     444              : 
     445              :         .flush_static_cb = pgstat_io_flush_cb,
     446              :         .init_shmem_cb = pgstat_io_init_shmem_cb,
     447              :         .reset_all_cb = pgstat_io_reset_all_cb,
     448              :         .snapshot_cb = pgstat_io_snapshot_cb,
     449              :     },
     450              : 
     451              :     [PGSTAT_KIND_SLRU] = {
     452              :         .name = "slru",
     453              : 
     454              :         .fixed_amount = true,
     455              :         .write_to_file = true,
     456              : 
     457              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
     458              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
     459              :         .shared_data_off = offsetof(PgStatShared_SLRU, stats),
     460              :         .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
     461              : 
     462              :         .flush_static_cb = pgstat_slru_flush_cb,
     463              :         .init_shmem_cb = pgstat_slru_init_shmem_cb,
     464              :         .reset_all_cb = pgstat_slru_reset_all_cb,
     465              :         .snapshot_cb = pgstat_slru_snapshot_cb,
     466              :     },
     467              : 
     468              :     [PGSTAT_KIND_WAL] = {
     469              :         .name = "wal",
     470              : 
     471              :         .fixed_amount = true,
     472              :         .write_to_file = true,
     473              : 
     474              :         .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
     475              :         .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
     476              :         .shared_data_off = offsetof(PgStatShared_Wal, stats),
     477              :         .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
     478              : 
     479              :         .init_backend_cb = pgstat_wal_init_backend_cb,
     480              :         .flush_static_cb = pgstat_wal_flush_cb,
     481              :         .init_shmem_cb = pgstat_wal_init_shmem_cb,
     482              :         .reset_all_cb = pgstat_wal_reset_all_cb,
     483              :         .snapshot_cb = pgstat_wal_snapshot_cb,
     484              :     },
     485              : };
     486              : 
     487              : /*
     488              :  * Information about custom statistics kinds.
     489              :  *
     490              :  * These are saved in a different array than the built-in kinds to save
     491              :  * in clarity with the initializations.
     492              :  *
     493              :  * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
     494              :  */
     495              : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
     496              : 
     497              : /* ------------------------------------------------------------
     498              :  * Functions managing the state of the stats system for all backends.
     499              :  * ------------------------------------------------------------
     500              :  */
     501              : 
     502              : /*
     503              :  * Read on-disk stats into memory at server start.
     504              :  *
     505              :  * Should only be called by the startup process or in single user mode.
     506              :  */
     507              : void
     508          818 : pgstat_restore_stats(void)
     509              : {
     510          818 :     pgstat_read_statsfile();
     511          818 : }
     512              : 
     513              : /*
     514              :  * Remove the stats file.  This is currently used only if WAL recovery is
     515              :  * needed after a crash.
     516              :  *
     517              :  * Should only be called by the startup process or in single user mode.
     518              :  */
     519              : void
     520          185 : pgstat_discard_stats(void)
     521              : {
     522              :     int         ret;
     523              : 
     524              :     /* NB: this needs to be done even in single user mode */
     525              : 
     526              :     /* First, cleanup the main pgstats file */
     527          185 :     ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
     528          185 :     if (ret != 0)
     529              :     {
     530          184 :         if (errno == ENOENT)
     531          184 :             elog(DEBUG2,
     532              :                  "didn't need to unlink permanent stats file \"%s\" - didn't exist",
     533              :                  PGSTAT_STAT_PERMANENT_FILENAME);
     534              :         else
     535            0 :             ereport(LOG,
     536              :                     (errcode_for_file_access(),
     537              :                      errmsg("could not unlink permanent statistics file \"%s\": %m",
     538              :                             PGSTAT_STAT_PERMANENT_FILENAME)));
     539              :     }
     540              :     else
     541              :     {
     542            1 :         ereport(DEBUG2,
     543              :                 (errcode_for_file_access(),
     544              :                  errmsg_internal("unlinked permanent statistics file \"%s\"",
     545              :                                  PGSTAT_STAT_PERMANENT_FILENAME)));
     546              :     }
     547              : 
     548              :     /* Finish callbacks, if required */
     549         6105 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     550              :     {
     551         5920 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     552              : 
     553         5920 :         if (kind_info && kind_info->finish)
     554            1 :             kind_info->finish(STATS_DISCARD);
     555              :     }
     556              : 
     557              :     /*
     558              :      * Reset stats contents. This will set reset timestamps of fixed-numbered
     559              :      * stats to the current time (no variable stats exist).
     560              :      */
     561          185 :     pgstat_reset_after_failure();
     562          185 : }
     563              : 
     564              : /*
     565              :  * pgstat_before_server_shutdown() needs to be called by exactly one process
     566              :  * during regular server shutdowns. Otherwise all stats will be lost.
     567              :  *
     568              :  * We currently only write out stats for proc_exit(0). We might want to change
     569              :  * that at some point... But right now pgstat_discard_stats() would be called
     570              :  * during the start after a disorderly shutdown, anyway.
     571              :  */
     572              : void
     573          702 : pgstat_before_server_shutdown(int code, Datum arg)
     574              : {
     575              :     Assert(pgStatLocal.shmem != NULL);
     576              :     Assert(!pgStatLocal.shmem->is_shutdown);
     577              : 
     578              :     /*
     579              :      * Stats should only be reported after pgstat_initialize() and before
     580              :      * pgstat_shutdown(). This is a convenient point to catch most violations
     581              :      * of this rule.
     582              :      */
     583              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
     584              : 
     585              :     /* flush out our own pending changes before writing out */
     586          702 :     pgstat_report_stat(true);
     587              : 
     588              :     /*
     589              :      * Only write out file during normal shutdown. Don't even signal that
     590              :      * we've shutdown during irregular shutdowns, because the shutdown
     591              :      * sequence isn't coordinated to ensure this backend shuts down last.
     592              :      */
     593          702 :     if (code == 0)
     594              :     {
     595          694 :         pgStatLocal.shmem->is_shutdown = true;
     596          694 :         pgstat_write_statsfile();
     597              :     }
     598          702 : }
     599              : 
     600              : 
     601              : /* ------------------------------------------------------------
     602              :  * Backend initialization / shutdown functions
     603              :  * ------------------------------------------------------------
     604              :  */
     605              : 
     606              : /*
     607              :  * Shut down a single backend's statistics reporting at process exit.
     608              :  *
     609              :  * Flush out any remaining statistics counts.  Without this, operations
     610              :  * triggered during backend exit (such as temp table deletions) won't be
     611              :  * counted.
     612              :  */
     613              : static void
     614        23358 : pgstat_shutdown_hook(int code, Datum arg)
     615              : {
     616              :     Assert(!pgstat_is_shutdown);
     617              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     618              : 
     619              :     /*
     620              :      * If we got as far as discovering our own database ID, we can flush out
     621              :      * what we did so far.  Otherwise, we'd be reporting an invalid database
     622              :      * ID, so forget it.  (This means that accesses to pg_database during
     623              :      * failed backend starts might never get counted.)
     624              :      */
     625        23358 :     if (OidIsValid(MyDatabaseId))
     626        17304 :         pgstat_report_disconnect(MyDatabaseId);
     627              : 
     628        23358 :     pgstat_report_stat(true);
     629              : 
     630              :     /* there shouldn't be any pending changes left */
     631              :     Assert(dlist_is_empty(&pgStatPending));
     632        23358 :     dlist_init(&pgStatPending);
     633              : 
     634              :     /* drop the backend stats entry */
     635        23358 :     if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
     636            0 :         pgstat_request_entry_refs_gc();
     637              : 
     638        23358 :     pgstat_detach_shmem();
     639              : 
     640              : #ifdef USE_ASSERT_CHECKING
     641              :     pgstat_is_shutdown = true;
     642              : #endif
     643        23358 : }
     644              : 
     645              : /*
     646              :  * Initialize pgstats state, and set up our on-proc-exit hook. Called from
     647              :  * BaseInit().
     648              :  *
     649              :  * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
     650              :  */
     651              : void
     652        23358 : pgstat_initialize(void)
     653              : {
     654              :     Assert(!pgstat_is_initialized);
     655              : 
     656        23358 :     pgstat_attach_shmem();
     657              : 
     658        23358 :     pgstat_init_snapshot_fixed();
     659              : 
     660              :     /* Backend initialization callbacks */
     661       770814 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     662              :     {
     663       747456 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     664              : 
     665       747456 :         if (kind_info == NULL || kind_info->init_backend_cb == NULL)
     666       724098 :             continue;
     667              : 
     668        23358 :         kind_info->init_backend_cb();
     669              :     }
     670              : 
     671              :     /* Set up a process-exit hook to clean up */
     672        23358 :     before_shmem_exit(pgstat_shutdown_hook, 0);
     673              : 
     674              : #ifdef USE_ASSERT_CHECKING
     675              :     pgstat_is_initialized = true;
     676              : #endif
     677        23358 : }
     678              : 
     679              : 
     680              : /* ------------------------------------------------------------
     681              :  * Public functions used by backends follow
     682              :  * ------------------------------------------------------------
     683              :  */
     684              : 
     685              : /*
     686              :  * Must be called by processes that performs DML: tcop/postgres.c, logical
     687              :  * receiver processes, SPI worker, etc. to flush pending statistics updates to
     688              :  * shared memory.
     689              :  *
     690              :  * Unless called with 'force', pending stats updates are flushed happen once
     691              :  * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
     692              :  * block on lock acquisition, except if stats updates have been pending for
     693              :  * longer than PGSTAT_MAX_INTERVAL (60000ms).
     694              :  *
     695              :  * Whenever pending stats updates remain at the end of pgstat_report_stat() a
     696              :  * suggested idle timeout is returned. Currently this is always
     697              :  * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
     698              :  * a timeout after which to call pgstat_report_stat(true), but are not
     699              :  * required to do so.
     700              :  *
     701              :  * Note that this is called only when not within a transaction, so it is fair
     702              :  * to use transaction stop time as an approximation of current time.
     703              :  */
     704              : long
     705       320704 : pgstat_report_stat(bool force)
     706              : {
     707              :     static TimestampTz pending_since = 0;
     708              :     static TimestampTz last_flush = 0;
     709              :     bool        partial_flush;
     710              :     TimestampTz now;
     711              :     bool        nowait;
     712              : 
     713              :     pgstat_assert_is_up();
     714              :     Assert(!IsTransactionOrTransactionBlock());
     715              : 
     716              :     /* "absorb" the forced flush even if there's nothing to flush */
     717       320704 :     if (pgStatForceNextFlush)
     718              :     {
     719          287 :         force = true;
     720          287 :         pgStatForceNextFlush = false;
     721              :     }
     722              : 
     723              :     /* Don't expend a clock check if nothing to do */
     724       320704 :     if (dlist_is_empty(&pgStatPending) &&
     725        10293 :         !pgstat_report_fixed)
     726              :     {
     727         7522 :         return 0;
     728              :     }
     729              : 
     730              :     /*
     731              :      * There should never be stats to report once stats are shut down. Can't
     732              :      * assert that before the checks above, as there is an unconditional
     733              :      * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
     734              :      * the process that ran pgstat_before_server_shutdown() will still call.
     735              :      */
     736              :     Assert(!pgStatLocal.shmem->is_shutdown);
     737              : 
     738       313182 :     if (force)
     739              :     {
     740              :         /*
     741              :          * Stats reports are forced either when it's been too long since stats
     742              :          * have been reported or in processes that force stats reporting to
     743              :          * happen at specific points (including shutdown). In the former case
     744              :          * the transaction stop time might be quite old, in the latter it
     745              :          * would never get cleared.
     746              :          */
     747        22577 :         now = GetCurrentTimestamp();
     748              :     }
     749              :     else
     750              :     {
     751       290605 :         now = GetCurrentTransactionStopTimestamp();
     752              : 
     753       551772 :         if (pending_since > 0 &&
     754       261167 :             TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
     755              :         {
     756              :             /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
     757            0 :             force = true;
     758              :         }
     759       290605 :         else if (last_flush > 0 &&
     760       276703 :                  !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
     761              :         {
     762              :             /* don't flush too frequently */
     763       274966 :             if (pending_since == 0)
     764        15301 :                 pending_since = now;
     765              : 
     766       274966 :             return PGSTAT_IDLE_INTERVAL;
     767              :         }
     768              :     }
     769              : 
     770        38216 :     pgstat_update_dbstats(now);
     771              : 
     772              :     /* don't wait for lock acquisition when !force */
     773        38216 :     nowait = !force;
     774              : 
     775        38216 :     partial_flush = false;
     776              : 
     777              :     /* flush of variable-numbered stats tracked in pending entries list */
     778        38216 :     partial_flush |= pgstat_flush_pending_entries(nowait);
     779              : 
     780              :     /* flush of other stats kinds */
     781        38216 :     if (pgstat_report_fixed)
     782              :     {
     783      1224927 :         for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
     784              :         {
     785      1187808 :             const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     786              : 
     787      1187808 :             if (!kind_info)
     788       742218 :                 continue;
     789       445590 :             if (!kind_info->flush_static_cb)
     790       297114 :                 continue;
     791              : 
     792       148476 :             partial_flush |= kind_info->flush_static_cb(nowait);
     793              :         }
     794              :     }
     795              : 
     796        38216 :     last_flush = now;
     797              : 
     798              :     /*
     799              :      * If some of the pending stats could not be flushed due to lock
     800              :      * contention, let the caller know when to retry.
     801              :      */
     802        38216 :     if (partial_flush)
     803              :     {
     804              :         /* force should have prevented us from getting here */
     805              :         Assert(!force);
     806              : 
     807              :         /* remember since when stats have been pending */
     808            9 :         if (pending_since == 0)
     809            9 :             pending_since = now;
     810              : 
     811            9 :         return PGSTAT_IDLE_INTERVAL;
     812              :     }
     813              : 
     814        38207 :     pending_since = 0;
     815        38207 :     pgstat_report_fixed = false;
     816              : 
     817        38207 :     return 0;
     818              : }
     819              : 
     820              : /*
     821              :  * Force locally pending stats to be flushed during the next
     822              :  * pgstat_report_stat() call. This is useful for writing tests.
     823              :  */
     824              : void
     825          287 : pgstat_force_next_flush(void)
     826              : {
     827          287 :     pgStatForceNextFlush = true;
     828          287 : }
     829              : 
     830              : /*
     831              :  * Only for use by pgstat_reset_counters()
     832              :  */
     833              : static bool
     834        11666 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
     835              : {
     836        11666 :     return entry->key.dboid == MyDatabaseId;
     837              : }
     838              : 
     839              : /*
     840              :  * Reset counters for our database.
     841              :  *
     842              :  * Permission checking for this function is managed through the normal
     843              :  * GRANT system.
     844              :  */
     845              : void
     846           13 : pgstat_reset_counters(void)
     847              : {
     848           13 :     TimestampTz ts = GetCurrentTimestamp();
     849              : 
     850           13 :     pgstat_reset_matching_entries(match_db_entries,
     851              :                                   ObjectIdGetDatum(MyDatabaseId),
     852              :                                   ts);
     853           13 : }
     854              : 
     855              : /*
     856              :  * Reset a single variable-numbered entry.
     857              :  *
     858              :  * If the stats kind is within a database, also reset the database's
     859              :  * stat_reset_timestamp.
     860              :  *
     861              :  * Permission checking for this function is managed through the normal
     862              :  * GRANT system.
     863              :  */
     864              : void
     865           25 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
     866              : {
     867           25 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     868           25 :     TimestampTz ts = GetCurrentTimestamp();
     869              : 
     870              :     /* not needed atm, and doesn't make sense with the current signature */
     871              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
     872              : 
     873              :     /* reset the "single counter" */
     874           25 :     pgstat_reset_entry(kind, dboid, objid, ts);
     875              : 
     876           25 :     if (!kind_info->accessed_across_databases)
     877           11 :         pgstat_reset_database_timestamp(dboid, ts);
     878           25 : }
     879              : 
     880              : /*
     881              :  * Reset stats for all entries of a kind.
     882              :  *
     883              :  * Permission checking for this function is managed through the normal
     884              :  * GRANT system.
     885              :  */
     886              : void
     887           30 : pgstat_reset_of_kind(PgStat_Kind kind)
     888              : {
     889           30 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     890           30 :     TimestampTz ts = GetCurrentTimestamp();
     891              : 
     892           30 :     if (kind_info->fixed_amount)
     893           26 :         kind_info->reset_all_cb(ts);
     894              :     else
     895            4 :         pgstat_reset_entries_of_kind(kind, ts);
     896           30 : }
     897              : 
     898              : 
     899              : /* ------------------------------------------------------------
     900              :  * Fetching of stats
     901              :  * ------------------------------------------------------------
     902              :  */
     903              : 
     904              : /*
     905              :  * Discard any data collected in the current transaction.  Any subsequent
     906              :  * request will cause new snapshots to be read.
     907              :  *
     908              :  * This is also invoked during transaction commit or abort to discard
     909              :  * the no-longer-wanted snapshot.  Updates of stats_fetch_consistency can
     910              :  * cause this routine to be called.
     911              :  */
     912              : void
     913       556553 : pgstat_clear_snapshot(void)
     914              : {
     915              :     pgstat_assert_is_up();
     916              : 
     917       556553 :     memset(&pgStatLocal.snapshot.fixed_valid, 0,
     918              :            sizeof(pgStatLocal.snapshot.fixed_valid));
     919       556553 :     memset(&pgStatLocal.snapshot.custom_valid, 0,
     920              :            sizeof(pgStatLocal.snapshot.custom_valid));
     921       556553 :     pgStatLocal.snapshot.stats = NULL;
     922       556553 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
     923              : 
     924              :     /* Release memory, if any was allocated */
     925       556553 :     if (pgStatLocal.snapshot.context)
     926              :     {
     927          649 :         MemoryContextDelete(pgStatLocal.snapshot.context);
     928              : 
     929              :         /* Reset variables */
     930          649 :         pgStatLocal.snapshot.context = NULL;
     931              :     }
     932              : 
     933              :     /*
     934              :      * Historically the backend_status.c facilities lived in this file, and
     935              :      * were reset with the same function. For now keep it that way, and
     936              :      * forward the reset request.
     937              :      */
     938       556553 :     pgstat_clear_backend_activity_snapshot();
     939              : 
     940              :     /* Reset this flag, as it may be possible that a cleanup was forced. */
     941       556553 :     force_stats_snapshot_clear = false;
     942       556553 : }
     943              : 
     944              : void *
     945       313767 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
     946              : {
     947       313767 :     PgStat_HashKey key = {0};
     948              :     PgStat_EntryRef *entry_ref;
     949              :     void       *stats_data;
     950       313767 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
     951              : 
     952              :     /* should be called from backends */
     953              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
     954              :     Assert(!kind_info->fixed_amount);
     955              : 
     956       313767 :     pgstat_prep_snapshot();
     957              : 
     958       313767 :     key.kind = kind;
     959       313767 :     key.dboid = dboid;
     960       313767 :     key.objid = objid;
     961              : 
     962              :     /* if we need to build a full snapshot, do so */
     963       313767 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
     964          230 :         pgstat_build_snapshot();
     965              : 
     966              :     /* if caching is desired, look up in cache */
     967       313767 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
     968              :     {
     969         5071 :         PgStat_SnapshotEntry *entry = NULL;
     970              : 
     971         5071 :         entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
     972              : 
     973         5071 :         if (entry)
     974          472 :             return entry->data;
     975              : 
     976              :         /*
     977              :          * If we built a full snapshot and the key is not in
     978              :          * pgStatLocal.snapshot.stats, there are no matching stats.
     979              :          */
     980         4599 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
     981           14 :             return NULL;
     982              :     }
     983              : 
     984       313281 :     pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
     985              : 
     986       313281 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
     987              : 
     988       313281 :     if (entry_ref == NULL || entry_ref->shared_entry->dropped)
     989              :     {
     990              :         /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
     991         8020 :         if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
     992              :         {
     993          889 :             PgStat_SnapshotEntry *entry = NULL;
     994              :             bool        found;
     995              : 
     996          889 :             entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
     997              :             Assert(!found);
     998          889 :             entry->data = NULL;
     999              :         }
    1000         8020 :         return NULL;
    1001              :     }
    1002              : 
    1003              :     /*
    1004              :      * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
    1005              :      * otherwise we could quickly end up with a fair bit of memory used due to
    1006              :      * repeated accesses.
    1007              :      */
    1008       305261 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1009       301565 :         stats_data = palloc(kind_info->shared_data_len);
    1010              :     else
    1011         3696 :         stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1012         3696 :                                         kind_info->shared_data_len);
    1013              : 
    1014       305261 :     (void) pgstat_lock_entry_shared(entry_ref, false);
    1015       610522 :     memcpy(stats_data,
    1016       305261 :            pgstat_get_entry_data(kind, entry_ref->shared_stats),
    1017       305261 :            kind_info->shared_data_len);
    1018       305261 :     pgstat_unlock_entry(entry_ref);
    1019              : 
    1020       305261 :     if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
    1021              :     {
    1022         3696 :         PgStat_SnapshotEntry *entry = NULL;
    1023              :         bool        found;
    1024              : 
    1025         3696 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
    1026         3696 :         entry->data = stats_data;
    1027              :     }
    1028              : 
    1029       305261 :     return stats_data;
    1030              : }
    1031              : 
    1032              : /*
    1033              :  * If a stats snapshot has been taken, return the timestamp at which that was
    1034              :  * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
    1035              :  * false.
    1036              :  */
    1037              : TimestampTz
    1038           30 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
    1039              : {
    1040           30 :     if (force_stats_snapshot_clear)
    1041            9 :         pgstat_clear_snapshot();
    1042              : 
    1043           30 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1044              :     {
    1045           12 :         *have_snapshot = true;
    1046           12 :         return pgStatLocal.snapshot.snapshot_timestamp;
    1047              :     }
    1048              : 
    1049           18 :     *have_snapshot = false;
    1050              : 
    1051           18 :     return 0;
    1052              : }
    1053              : 
    1054              : bool
    1055           80 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1056              : {
    1057              :     /* fixed-numbered stats always exist */
    1058           80 :     if (pgstat_get_kind_info(kind)->fixed_amount)
    1059            6 :         return true;
    1060              : 
    1061           74 :     return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
    1062              : }
    1063              : 
    1064              : /*
    1065              :  * Ensure snapshot for fixed-numbered 'kind' exists.
    1066              :  *
    1067              :  * Typically used by the pgstat_fetch_* functions for a kind of stats, before
    1068              :  * massaging the data into the desired format.
    1069              :  */
    1070              : void
    1071          222 : pgstat_snapshot_fixed(PgStat_Kind kind)
    1072              : {
    1073              :     Assert(pgstat_is_kind_valid(kind));
    1074              :     Assert(pgstat_get_kind_info(kind)->fixed_amount);
    1075              : 
    1076          222 :     if (force_stats_snapshot_clear)
    1077            0 :         pgstat_clear_snapshot();
    1078              : 
    1079          222 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1080           12 :         pgstat_build_snapshot();
    1081              :     else
    1082          210 :         pgstat_build_snapshot_fixed(kind);
    1083              : 
    1084          222 :     if (pgstat_is_kind_builtin(kind))
    1085              :         Assert(pgStatLocal.snapshot.fixed_valid[kind]);
    1086            3 :     else if (pgstat_is_kind_custom(kind))
    1087              :         Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
    1088          222 : }
    1089              : 
    1090              : static void
    1091        23358 : pgstat_init_snapshot_fixed(void)
    1092              : {
    1093              :     /*
    1094              :      * Initialize fixed-numbered statistics data in snapshots, only for custom
    1095              :      * stats kinds.
    1096              :      */
    1097       233580 :     for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1098              :     {
    1099       210222 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1100              : 
    1101       210222 :         if (!kind_info || !kind_info->fixed_amount)
    1102       210173 :             continue;
    1103              : 
    1104           49 :         pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
    1105           49 :             MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
    1106              :     }
    1107        23358 : }
    1108              : 
    1109              : static void
    1110       313793 : pgstat_prep_snapshot(void)
    1111              : {
    1112       313793 :     if (force_stats_snapshot_clear)
    1113            9 :         pgstat_clear_snapshot();
    1114              : 
    1115       313793 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
    1116         5097 :         pgStatLocal.snapshot.stats != NULL)
    1117       313144 :         return;
    1118              : 
    1119          649 :     if (!pgStatLocal.snapshot.context)
    1120          649 :         pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
    1121              :                                                              "PgStat Snapshot",
    1122              :                                                              ALLOCSET_SMALL_SIZES);
    1123              : 
    1124          649 :     pgStatLocal.snapshot.stats =
    1125          649 :         pgstat_snapshot_create(pgStatLocal.snapshot.context,
    1126              :                                PGSTAT_SNAPSHOT_HASH_SIZE,
    1127              :                                NULL);
    1128              : }
    1129              : 
    1130              : static void
    1131          242 : pgstat_build_snapshot(void)
    1132              : {
    1133              :     dshash_seq_status hstat;
    1134              :     PgStatShared_HashEntry *p;
    1135              : 
    1136              :     /* should only be called when we need a snapshot */
    1137              :     Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
    1138              : 
    1139              :     /* snapshot already built */
    1140          242 :     if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
    1141          216 :         return;
    1142              : 
    1143           26 :     pgstat_prep_snapshot();
    1144              : 
    1145              :     Assert(pgStatLocal.snapshot.stats->members == 0);
    1146              : 
    1147           26 :     pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
    1148              : 
    1149              :     /*
    1150              :      * Snapshot all variable stats.
    1151              :      */
    1152           26 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1153        27477 :     while ((p = dshash_seq_next(&hstat)) != NULL)
    1154              :     {
    1155        27451 :         PgStat_Kind kind = p->key.kind;
    1156        27451 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1157              :         bool        found;
    1158              :         PgStat_SnapshotEntry *entry;
    1159              :         PgStatShared_Common *stats_data;
    1160              : 
    1161              :         /*
    1162              :          * Check if the stats object should be included in the snapshot.
    1163              :          * Unless the stats kind can be accessed from all databases (e.g.,
    1164              :          * database stats themselves), we only include stats for the current
    1165              :          * database or objects not associated with a database (e.g. shared
    1166              :          * relations).
    1167              :          */
    1168        27451 :         if (p->key.dboid != MyDatabaseId &&
    1169         8131 :             p->key.dboid != InvalidOid &&
    1170         6654 :             !kind_info->accessed_across_databases)
    1171         6678 :             continue;
    1172              : 
    1173        20875 :         if (p->dropped)
    1174          102 :             continue;
    1175              : 
    1176              :         Assert(pg_atomic_read_u32(&p->refcount) > 0);
    1177              : 
    1178        20773 :         stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
    1179              :         Assert(stats_data);
    1180              : 
    1181        20773 :         entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
    1182              :         Assert(!found);
    1183              : 
    1184        20773 :         entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
    1185              :                                          pgstat_get_entry_len(kind));
    1186              : 
    1187              :         /*
    1188              :          * Acquire the LWLock directly instead of using
    1189              :          * pg_stat_lock_entry_shared() which requires a reference.
    1190              :          */
    1191        20773 :         LWLockAcquire(&stats_data->lock, LW_SHARED);
    1192        20773 :         memcpy(entry->data,
    1193        20773 :                pgstat_get_entry_data(kind, stats_data),
    1194              :                pgstat_get_entry_len(kind));
    1195        20773 :         LWLockRelease(&stats_data->lock);
    1196              :     }
    1197           26 :     dshash_seq_term(&hstat);
    1198              : 
    1199              :     /*
    1200              :      * Build snapshot of all fixed-numbered stats.
    1201              :      */
    1202          858 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1203              :     {
    1204          832 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1205              : 
    1206          832 :         if (!kind_info)
    1207          520 :             continue;
    1208          312 :         if (!kind_info->fixed_amount)
    1209              :         {
    1210              :             Assert(kind_info->snapshot_cb == NULL);
    1211          156 :             continue;
    1212              :         }
    1213              : 
    1214          156 :         pgstat_build_snapshot_fixed(kind);
    1215              :     }
    1216              : 
    1217           26 :     pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
    1218              : }
    1219              : 
    1220              : static void
    1221         4531 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
    1222              : {
    1223         4531 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1224              :     int         idx;
    1225              :     bool       *valid;
    1226              : 
    1227              :     /* Position in fixed_valid or custom_valid */
    1228         4531 :     if (pgstat_is_kind_builtin(kind))
    1229              :     {
    1230         4527 :         idx = kind;
    1231         4527 :         valid = pgStatLocal.snapshot.fixed_valid;
    1232              :     }
    1233              :     else
    1234              :     {
    1235            4 :         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1236            4 :         valid = pgStatLocal.snapshot.custom_valid;
    1237              :     }
    1238              : 
    1239              :     Assert(kind_info->fixed_amount);
    1240              :     Assert(kind_info->snapshot_cb != NULL);
    1241              : 
    1242         4531 :     if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
    1243              :     {
    1244              :         /* rebuild every time */
    1245         4180 :         valid[idx] = false;
    1246              :     }
    1247          351 :     else if (valid[idx])
    1248              :     {
    1249              :         /* in snapshot mode we shouldn't get called again */
    1250              :         Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
    1251            6 :         return;
    1252              :     }
    1253              : 
    1254              :     Assert(!valid[idx]);
    1255              : 
    1256         4525 :     kind_info->snapshot_cb();
    1257              : 
    1258              :     Assert(!valid[idx]);
    1259         4525 :     valid[idx] = true;
    1260              : }
    1261              : 
    1262              : 
    1263              : /* ------------------------------------------------------------
    1264              :  * Backend-local pending stats infrastructure
    1265              :  * ------------------------------------------------------------
    1266              :  */
    1267              : 
    1268              : /*
    1269              :  * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
    1270              :  * stats if not already done.
    1271              :  *
    1272              :  * If created_entry is non-NULL, it'll be set to true if the entry is newly
    1273              :  * created, false otherwise.
    1274              :  */
    1275              : PgStat_EntryRef *
    1276      2213621 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
    1277              : {
    1278              :     PgStat_EntryRef *entry_ref;
    1279              : 
    1280              :     /* need to be able to flush out */
    1281              :     Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
    1282              : 
    1283      2213621 :     if (unlikely(!pgStatPendingContext))
    1284              :     {
    1285        18751 :         pgStatPendingContext =
    1286        18751 :             AllocSetContextCreate(TopMemoryContext,
    1287              :                                   "PgStat Pending",
    1288              :                                   ALLOCSET_SMALL_SIZES);
    1289              :     }
    1290              : 
    1291      2213621 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
    1292              :                                      true, created_entry);
    1293              : 
    1294      2213621 :     if (entry_ref->pending == NULL)
    1295              :     {
    1296      1132197 :         size_t      entrysize = pgstat_get_kind_info(kind)->pending_size;
    1297              : 
    1298              :         Assert(entrysize != (size_t) -1);
    1299              : 
    1300      1132197 :         entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
    1301      1132197 :         dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
    1302              :     }
    1303              : 
    1304      2213621 :     return entry_ref;
    1305              : }
    1306              : 
    1307              : /*
    1308              :  * Return an existing stats entry, or NULL.
    1309              :  *
    1310              :  * This should only be used for helper function for pgstatfuncs.c - outside of
    1311              :  * that it shouldn't be needed.
    1312              :  */
    1313              : PgStat_EntryRef *
    1314           42 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
    1315              : {
    1316              :     PgStat_EntryRef *entry_ref;
    1317              : 
    1318           42 :     entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
    1319              : 
    1320           42 :     if (entry_ref == NULL || entry_ref->pending == NULL)
    1321           15 :         return NULL;
    1322              : 
    1323           27 :     return entry_ref;
    1324              : }
    1325              : 
    1326              : void
    1327      1132197 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
    1328              : {
    1329      1132197 :     PgStat_Kind kind = entry_ref->shared_entry->key.kind;
    1330      1132197 :     const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1331      1132197 :     void       *pending_data = entry_ref->pending;
    1332              : 
    1333              :     Assert(pending_data != NULL);
    1334              :     /* !fixed_amount stats should be handled explicitly */
    1335              :     Assert(!pgstat_get_kind_info(kind)->fixed_amount);
    1336              : 
    1337      1132197 :     if (kind_info->delete_pending_cb)
    1338      1070153 :         kind_info->delete_pending_cb(entry_ref);
    1339              : 
    1340      1132197 :     pfree(pending_data);
    1341      1132197 :     entry_ref->pending = NULL;
    1342              : 
    1343      1132197 :     dlist_delete(&entry_ref->pending_node);
    1344      1132197 : }
    1345              : 
    1346              : /*
    1347              :  * Flush out pending variable-numbered stats.
    1348              :  */
    1349              : static bool
    1350        38216 : pgstat_flush_pending_entries(bool nowait)
    1351              : {
    1352        38216 :     bool        have_pending = false;
    1353        38216 :     dlist_node *cur = NULL;
    1354              : 
    1355              :     /*
    1356              :      * Need to be a bit careful iterating over the list of pending entries.
    1357              :      * Processing a pending entry may queue further pending entries to the end
    1358              :      * of the list that we want to process, so a simple iteration won't do.
    1359              :      * Further complicating matters is that we want to delete the current
    1360              :      * entry in each iteration from the list if we flushed successfully.
    1361              :      *
    1362              :      * So we just keep track of the next pointer in each loop iteration.
    1363              :      */
    1364        38216 :     if (!dlist_is_empty(&pgStatPending))
    1365        35631 :         cur = dlist_head_node(&pgStatPending);
    1366              : 
    1367      1135498 :     while (cur)
    1368              :     {
    1369      1097282 :         PgStat_EntryRef *entry_ref =
    1370              :             dlist_container(PgStat_EntryRef, pending_node, cur);
    1371      1097282 :         PgStat_HashKey key = entry_ref->shared_entry->key;
    1372      1097282 :         PgStat_Kind kind = key.kind;
    1373      1097282 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1374              :         bool        did_flush;
    1375              :         dlist_node *next;
    1376              : 
    1377              :         Assert(!kind_info->fixed_amount);
    1378              :         Assert(kind_info->flush_pending_cb != NULL);
    1379              : 
    1380              :         /* flush the stats, if possible */
    1381      1097282 :         did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
    1382              : 
    1383              :         Assert(did_flush || nowait);
    1384              : 
    1385              :         /* determine next entry, before deleting the pending entry */
    1386      1097282 :         if (dlist_has_next(&pgStatPending, cur))
    1387      1061651 :             next = dlist_next_node(&pgStatPending, cur);
    1388              :         else
    1389        35631 :             next = NULL;
    1390              : 
    1391              :         /* if successfully flushed, remove entry */
    1392      1097282 :         if (did_flush)
    1393      1097274 :             pgstat_delete_pending_entry(entry_ref);
    1394              :         else
    1395            8 :             have_pending = true;
    1396              : 
    1397      1097282 :         cur = next;
    1398              :     }
    1399              : 
    1400              :     Assert(dlist_is_empty(&pgStatPending) == !have_pending);
    1401              : 
    1402        38216 :     return have_pending;
    1403              : }
    1404              : 
    1405              : 
    1406              : /* ------------------------------------------------------------
    1407              :  * Helper / infrastructure functions
    1408              :  * ------------------------------------------------------------
    1409              :  */
    1410              : 
    1411              : PgStat_Kind
    1412           83 : pgstat_get_kind_from_str(char *kind_str)
    1413              : {
    1414          247 :     for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
    1415              :     {
    1416          244 :         if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
    1417           80 :             return kind;
    1418              :     }
    1419              : 
    1420              :     /* Check the custom set of cumulative stats */
    1421            3 :     if (pgstat_kind_custom_infos)
    1422              :     {
    1423            0 :         for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
    1424              :         {
    1425            0 :             uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1426              : 
    1427            0 :             if (pgstat_kind_custom_infos[idx] &&
    1428            0 :                 pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
    1429            0 :                 return kind;
    1430              :         }
    1431              :     }
    1432              : 
    1433            3 :     ereport(ERROR,
    1434              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1435              :              errmsg("invalid statistics kind: \"%s\"", kind_str)));
    1436              :     return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
    1437              : }
    1438              : 
    1439              : static inline bool
    1440       424380 : pgstat_is_kind_valid(PgStat_Kind kind)
    1441              : {
    1442       424380 :     return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
    1443              : }
    1444              : 
    1445              : const PgStat_KindInfo *
    1446      8059512 : pgstat_get_kind_info(PgStat_Kind kind)
    1447              : {
    1448      8059512 :     if (pgstat_is_kind_builtin(kind))
    1449      6534878 :         return &pgstat_kind_builtin_infos[kind];
    1450              : 
    1451      1524634 :     if (pgstat_is_kind_custom(kind))
    1452              :     {
    1453       818082 :         uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1454              : 
    1455       818082 :         if (pgstat_kind_custom_infos == NULL ||
    1456         1845 :             pgstat_kind_custom_infos[idx] == NULL)
    1457       817616 :             return NULL;
    1458          466 :         return pgstat_kind_custom_infos[idx];
    1459              :     }
    1460              : 
    1461       706552 :     return NULL;
    1462              : }
    1463              : 
    1464              : /*
    1465              :  * Register a new stats kind.
    1466              :  *
    1467              :  * PgStat_Kinds must be globally unique across all extensions. Refer
    1468              :  * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
    1469              :  * unique ID for your extension, to avoid conflicts with other extension
    1470              :  * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
    1471              :  * needlessly reserving a new ID.
    1472              :  */
    1473              : void
    1474            6 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
    1475              : {
    1476            6 :     uint32      idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1477              : 
    1478            6 :     if (kind_info->name == NULL || strlen(kind_info->name) == 0)
    1479            0 :         ereport(ERROR,
    1480              :                 (errmsg("custom cumulative statistics name is invalid"),
    1481              :                  errhint("Provide a non-empty name for the custom cumulative statistics.")));
    1482              : 
    1483            6 :     if (!pgstat_is_kind_custom(kind))
    1484            0 :         ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
    1485              :                         errhint("Provide a custom cumulative statistics ID between %u and %u.",
    1486              :                                 PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
    1487              : 
    1488            6 :     if (!process_shared_preload_libraries_in_progress)
    1489            0 :         ereport(ERROR,
    1490              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1491              :                  errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
    1492              : 
    1493              :     /*
    1494              :      * Check some data for fixed-numbered stats.
    1495              :      */
    1496            6 :     if (kind_info->fixed_amount)
    1497              :     {
    1498            3 :         if (kind_info->shared_size == 0)
    1499            0 :             ereport(ERROR,
    1500              :                     (errmsg("custom cumulative statistics property is invalid"),
    1501              :                      errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
    1502            3 :         if (kind_info->track_entry_count)
    1503            0 :             ereport(ERROR,
    1504              :                     (errmsg("custom cumulative statistics property is invalid"),
    1505              :                      errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
    1506              :     }
    1507              : 
    1508              :     /*
    1509              :      * If pgstat_kind_custom_infos is not available yet, allocate it.
    1510              :      */
    1511            6 :     if (pgstat_kind_custom_infos == NULL)
    1512              :     {
    1513            3 :         pgstat_kind_custom_infos = (const PgStat_KindInfo **)
    1514            3 :             MemoryContextAllocZero(TopMemoryContext,
    1515              :                                    sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
    1516              :     }
    1517              : 
    1518            6 :     if (pgstat_kind_custom_infos[idx] != NULL &&
    1519            0 :         pgstat_kind_custom_infos[idx]->name != NULL)
    1520            0 :         ereport(ERROR,
    1521              :                 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1522              :                  errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
    1523              :                            pgstat_kind_custom_infos[idx]->name)));
    1524              : 
    1525              :     /* check for existing custom stats with the same name */
    1526           60 :     for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
    1527              :     {
    1528           54 :         uint32      existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
    1529              : 
    1530           54 :         if (pgstat_kind_custom_infos[existing_idx] == NULL)
    1531           51 :             continue;
    1532            3 :         if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
    1533            0 :             ereport(ERROR,
    1534              :                     (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
    1535              :                      errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
    1536              :     }
    1537              : 
    1538              :     /* Register it */
    1539            6 :     pgstat_kind_custom_infos[idx] = kind_info;
    1540            6 :     ereport(LOG,
    1541              :             (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
    1542              :                     kind_info->name, kind)));
    1543            6 : }
    1544              : 
    1545              : /*
    1546              :  * Stats should only be reported after pgstat_initialize() and before
    1547              :  * pgstat_shutdown(). This check is put in a few central places to catch
    1548              :  * violations of this rule more easily.
    1549              :  */
    1550              : #ifdef USE_ASSERT_CHECKING
    1551              : void
    1552              : pgstat_assert_is_up(void)
    1553              : {
    1554              :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
    1555              : }
    1556              : #endif
    1557              : 
    1558              : 
    1559              : /* ------------------------------------------------------------
    1560              :  * reading and writing of on-disk stats file
    1561              :  * ------------------------------------------------------------
    1562              :  */
    1563              : 
    1564              : /* helper for pgstat_write_statsfile() */
    1565              : void
    1566       421206 : pgstat_write_chunk(FILE *fpout, void *ptr, size_t len)
    1567              : {
    1568              :     int         rc;
    1569              : 
    1570       421206 :     rc = fwrite(ptr, len, 1, fpout);
    1571              : 
    1572              :     /* We check for errors with ferror() when done writing the stats. */
    1573              :     (void) rc;
    1574       421206 : }
    1575              : 
    1576              : /*
    1577              :  * This function is called in the last process that is accessing the shared
    1578              :  * stats so locking is not required.
    1579              :  */
    1580              : static void
    1581          694 : pgstat_write_statsfile(void)
    1582              : {
    1583              :     FILE       *fpout;
    1584              :     int32       format_id;
    1585          694 :     const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
    1586          694 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1587              :     dshash_seq_status hstat;
    1588              :     PgStatShared_HashEntry *ps;
    1589              : 
    1590              :     pgstat_assert_is_up();
    1591              : 
    1592              :     /* should be called only by the checkpointer or single user mode */
    1593              :     Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
    1594              : 
    1595              :     /* we're shutting down, so it's ok to just override this */
    1596          694 :     pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
    1597              : 
    1598          694 :     elog(DEBUG2, "writing stats file \"%s\"", statfile);
    1599              : 
    1600              :     /*
    1601              :      * Open the statistics temp file to write out the current values.
    1602              :      */
    1603          694 :     fpout = AllocateFile(tmpfile, PG_BINARY_W);
    1604          694 :     if (fpout == NULL)
    1605              :     {
    1606            0 :         ereport(LOG,
    1607              :                 (errcode_for_file_access(),
    1608              :                  errmsg("could not open temporary statistics file \"%s\": %m",
    1609              :                         tmpfile)));
    1610            0 :         return;
    1611              :     }
    1612              : 
    1613              :     /*
    1614              :      * Write the file header --- currently just a format ID.
    1615              :      */
    1616          694 :     format_id = PGSTAT_FILE_FORMAT_ID;
    1617          694 :     pgstat_write_chunk_s(fpout, &format_id);
    1618              : 
    1619              :     /* Write various stats structs for fixed number of objects */
    1620        22902 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1621              :     {
    1622              :         char       *ptr;
    1623        22208 :         const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
    1624              : 
    1625        22208 :         if (!info || !info->fixed_amount)
    1626        18043 :             continue;
    1627              : 
    1628         4165 :         if (pgstat_is_kind_builtin(kind))
    1629              :             Assert(info->snapshot_ctl_off != 0);
    1630              : 
    1631              :         /* skip if no need to write to file */
    1632         4165 :         if (!info->write_to_file)
    1633            0 :             continue;
    1634              : 
    1635         4165 :         pgstat_build_snapshot_fixed(kind);
    1636         4165 :         if (pgstat_is_kind_builtin(kind))
    1637         4164 :             ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
    1638              :         else
    1639            1 :             ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
    1640              : 
    1641         4165 :         fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
    1642         4165 :         pgstat_write_chunk_s(fpout, &kind);
    1643         4165 :         pgstat_write_chunk(fpout, ptr, info->shared_data_len);
    1644              :     }
    1645              : 
    1646              :     /*
    1647              :      * Walk through the stats entries
    1648              :      */
    1649          694 :     dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
    1650       206846 :     while ((ps = dshash_seq_next(&hstat)) != NULL)
    1651              :     {
    1652              :         PgStatShared_Common *shstats;
    1653       206152 :         const PgStat_KindInfo *kind_info = NULL;
    1654              : 
    1655       206152 :         CHECK_FOR_INTERRUPTS();
    1656              : 
    1657              :         /*
    1658              :          * We should not see any "dropped" entries when writing the stats
    1659              :          * file, as all backends and auxiliary processes should have cleaned
    1660              :          * up their references before they terminated.
    1661              :          *
    1662              :          * However, since we are already shutting down, it is not worth
    1663              :          * crashing the server over any potential cleanup issues, so we simply
    1664              :          * skip such entries if encountered.
    1665              :          */
    1666              :         Assert(!ps->dropped);
    1667       206152 :         if (ps->dropped)
    1668            0 :             continue;
    1669              : 
    1670              :         /*
    1671              :          * This discards data related to custom stats kinds that are unknown
    1672              :          * to this process.
    1673              :          */
    1674       206152 :         if (!pgstat_is_kind_valid(ps->key.kind))
    1675              :         {
    1676            0 :             elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
    1677              :                  ps->key.kind, ps->key.dboid,
    1678              :                  ps->key.objid);
    1679            0 :             continue;
    1680              :         }
    1681              : 
    1682       206152 :         shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
    1683              : 
    1684       206152 :         kind_info = pgstat_get_kind_info(ps->key.kind);
    1685              : 
    1686              :         /* if not dropped the valid-entry refcount should exist */
    1687              :         Assert(pg_atomic_read_u32(&ps->refcount) > 0);
    1688              : 
    1689              :         /* skip if no need to write to file */
    1690       206152 :         if (!kind_info->write_to_file)
    1691          119 :             continue;
    1692              : 
    1693       206033 :         if (!kind_info->to_serialized_name)
    1694              :         {
    1695              :             /* normal stats entry, identified by PgStat_HashKey */
    1696       205927 :             fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
    1697       205927 :             pgstat_write_chunk_s(fpout, &ps->key);
    1698              :         }
    1699              :         else
    1700              :         {
    1701              :             /* stats entry identified by name on disk (e.g. slots) */
    1702              :             NameData    name;
    1703              : 
    1704          106 :             kind_info->to_serialized_name(&ps->key, shstats, &name);
    1705              : 
    1706          106 :             fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
    1707          106 :             pgstat_write_chunk_s(fpout, &ps->key.kind);
    1708          106 :             pgstat_write_chunk_s(fpout, &name);
    1709              :         }
    1710              : 
    1711              :         /* Write except the header part of the entry */
    1712       206033 :         pgstat_write_chunk(fpout,
    1713              :                            pgstat_get_entry_data(ps->key.kind, shstats),
    1714              :                            pgstat_get_entry_len(ps->key.kind));
    1715              : 
    1716              :         /* Write more data for the entry, if required */
    1717       206033 :         if (kind_info->to_serialized_data)
    1718            2 :             kind_info->to_serialized_data(&ps->key, shstats, fpout);
    1719              :     }
    1720          694 :     dshash_seq_term(&hstat);
    1721              : 
    1722              :     /*
    1723              :      * No more output to be done. Close the temp file and replace the old
    1724              :      * pgstat.stat with it.  The ferror() check replaces testing for error
    1725              :      * after each individual fputc or fwrite (in pgstat_write_chunk()) above.
    1726              :      */
    1727          694 :     fputc(PGSTAT_FILE_ENTRY_END, fpout);
    1728              : 
    1729          694 :     if (ferror(fpout))
    1730              :     {
    1731            0 :         ereport(LOG,
    1732              :                 (errcode_for_file_access(),
    1733              :                  errmsg("could not write temporary statistics file \"%s\": %m",
    1734              :                         tmpfile)));
    1735            0 :         FreeFile(fpout);
    1736            0 :         unlink(tmpfile);
    1737              :     }
    1738          694 :     else if (FreeFile(fpout) < 0)
    1739              :     {
    1740            0 :         ereport(LOG,
    1741              :                 (errcode_for_file_access(),
    1742              :                  errmsg("could not close temporary statistics file \"%s\": %m",
    1743              :                         tmpfile)));
    1744            0 :         unlink(tmpfile);
    1745              :     }
    1746          694 :     else if (durable_rename(tmpfile, statfile, LOG) < 0)
    1747              :     {
    1748              :         /* durable_rename already emitted log message */
    1749            0 :         unlink(tmpfile);
    1750              :     }
    1751              : 
    1752              :     /* Finish callbacks, if required */
    1753        22902 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    1754              :     {
    1755        22208 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    1756              : 
    1757        22208 :         if (kind_info && kind_info->finish)
    1758            1 :             kind_info->finish(STATS_WRITE);
    1759              :     }
    1760              : }
    1761              : 
    1762              : /* helper for pgstat_read_statsfile() */
    1763              : bool
    1764       437307 : pgstat_read_chunk(FILE *fpin, void *ptr, size_t len)
    1765              : {
    1766       437307 :     return fread(ptr, 1, len, fpin) == len;
    1767              : }
    1768              : 
    1769              : /*
    1770              :  * Reads in existing statistics file into memory.
    1771              :  *
    1772              :  * This function is called in the only process that is accessing the shared
    1773              :  * stats so locking is not required.
    1774              :  */
    1775              : static void
    1776          818 : pgstat_read_statsfile(void)
    1777              : {
    1778              :     FILE       *fpin;
    1779              :     int32       format_id;
    1780              :     bool        found;
    1781          818 :     const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
    1782          818 :     PgStat_ShmemControl *shmem = pgStatLocal.shmem;
    1783              : 
    1784              :     /* shouldn't be called from postmaster */
    1785              :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
    1786              : 
    1787          818 :     elog(DEBUG2, "reading stats file \"%s\"", statfile);
    1788              : 
    1789              :     /*
    1790              :      * Try to open the stats file. If it doesn't exist, the backends simply
    1791              :      * returns zero for anything and statistics simply starts from scratch
    1792              :      * with empty counters.
    1793              :      *
    1794              :      * ENOENT is a possibility if stats collection was previously disabled or
    1795              :      * has not yet written the stats file for the first time.  Any other
    1796              :      * failure condition is suspicious.
    1797              :      */
    1798          818 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    1799              :     {
    1800           52 :         if (errno != ENOENT)
    1801            0 :             ereport(LOG,
    1802              :                     (errcode_for_file_access(),
    1803              :                      errmsg("could not open statistics file \"%s\": %m",
    1804              :                             statfile)));
    1805           52 :         pgstat_reset_after_failure();
    1806           52 :         return;
    1807              :     }
    1808              : 
    1809              :     /*
    1810              :      * Verify it's of the expected format.
    1811              :      */
    1812          766 :     if (!pgstat_read_chunk_s(fpin, &format_id))
    1813              :     {
    1814            0 :         elog(WARNING, "could not read format ID");
    1815            0 :         goto error;
    1816              :     }
    1817              : 
    1818          766 :     if (format_id != PGSTAT_FILE_FORMAT_ID)
    1819              :     {
    1820            1 :         elog(WARNING, "found incorrect format ID %d (expected %d)",
    1821              :              format_id, PGSTAT_FILE_FORMAT_ID);
    1822            1 :         goto error;
    1823              :     }
    1824              : 
    1825              :     /*
    1826              :      * We found an existing statistics file. Read it and put all the stats
    1827              :      * data into place.
    1828              :      */
    1829              :     for (;;)
    1830       218228 :     {
    1831       218993 :         int         t = fgetc(fpin);
    1832              : 
    1833       218993 :         switch (t)
    1834              :         {
    1835         4591 :             case PGSTAT_FILE_ENTRY_FIXED:
    1836              :                 {
    1837              :                     PgStat_Kind kind;
    1838              :                     const PgStat_KindInfo *info;
    1839              :                     char       *ptr;
    1840              : 
    1841              :                     /* entry for fixed-numbered stats */
    1842         4591 :                     if (!pgstat_read_chunk_s(fpin, &kind))
    1843              :                     {
    1844            0 :                         elog(WARNING, "could not read stats kind for entry of type %c", t);
    1845            0 :                         goto error;
    1846              :                     }
    1847              : 
    1848         4591 :                     if (!pgstat_is_kind_valid(kind))
    1849              :                     {
    1850            0 :                         elog(WARNING, "invalid stats kind %u for entry of type %c",
    1851              :                              kind, t);
    1852            0 :                         goto error;
    1853              :                     }
    1854              : 
    1855         4591 :                     info = pgstat_get_kind_info(kind);
    1856         4591 :                     if (!info)
    1857              :                     {
    1858            0 :                         elog(WARNING, "could not find information of kind %u for entry of type %c",
    1859              :                              kind, t);
    1860            0 :                         goto error;
    1861              :                     }
    1862              : 
    1863         4591 :                     if (!info->fixed_amount)
    1864              :                     {
    1865            0 :                         elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
    1866              :                              kind, t);
    1867            0 :                         goto error;
    1868              :                     }
    1869              : 
    1870              :                     /* Load back stats into shared memory */
    1871         4591 :                     if (pgstat_is_kind_builtin(kind))
    1872         4590 :                         ptr = ((char *) shmem) + info->shared_ctl_off +
    1873         4590 :                             info->shared_data_off;
    1874              :                     else
    1875              :                     {
    1876            1 :                         int         idx = kind - PGSTAT_KIND_CUSTOM_MIN;
    1877              : 
    1878            1 :                         ptr = ((char *) shmem->custom_data[idx]) +
    1879            1 :                             info->shared_data_off;
    1880              :                     }
    1881              : 
    1882         4591 :                     if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len))
    1883              :                     {
    1884            0 :                         elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
    1885              :                              kind, t, info->shared_data_len);
    1886            0 :                         goto error;
    1887              :                     }
    1888              : 
    1889         4591 :                     break;
    1890              :                 }
    1891       213637 :             case PGSTAT_FILE_ENTRY_HASH:
    1892              :             case PGSTAT_FILE_ENTRY_NAME:
    1893              :                 {
    1894              :                     PgStat_HashKey key;
    1895              :                     PgStatShared_HashEntry *p;
    1896              :                     PgStatShared_Common *header;
    1897       213637 :                     const PgStat_KindInfo *kind_info = NULL;
    1898              : 
    1899       213637 :                     CHECK_FOR_INTERRUPTS();
    1900              : 
    1901       213637 :                     if (t == PGSTAT_FILE_ENTRY_HASH)
    1902              :                     {
    1903              :                         /* normal stats entry, identified by PgStat_HashKey */
    1904       213561 :                         if (!pgstat_read_chunk_s(fpin, &key))
    1905              :                         {
    1906            0 :                             elog(WARNING, "could not read key for entry of type %c", t);
    1907            0 :                             goto error;
    1908              :                         }
    1909              : 
    1910       213561 :                         if (!pgstat_is_kind_valid(key.kind))
    1911              :                         {
    1912            0 :                             elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
    1913              :                                  key.kind, key.dboid,
    1914              :                                  key.objid, t);
    1915            0 :                             goto error;
    1916              :                         }
    1917              : 
    1918       213561 :                         kind_info = pgstat_get_kind_info(key.kind);
    1919       213561 :                         if (!kind_info)
    1920              :                         {
    1921            0 :                             elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
    1922              :                                  key.kind, key.dboid,
    1923              :                                  key.objid, t);
    1924            0 :                             goto error;
    1925              :                         }
    1926              :                     }
    1927              :                     else
    1928              :                     {
    1929              :                         /* stats entry identified by name on disk (e.g. slots) */
    1930              :                         PgStat_Kind kind;
    1931              :                         NameData    name;
    1932              : 
    1933           76 :                         if (!pgstat_read_chunk_s(fpin, &kind))
    1934              :                         {
    1935            0 :                             elog(WARNING, "could not read stats kind for entry of type %c", t);
    1936            0 :                             goto error;
    1937              :                         }
    1938           76 :                         if (!pgstat_read_chunk_s(fpin, &name))
    1939              :                         {
    1940            0 :                             elog(WARNING, "could not read name of stats kind %u for entry of type %c",
    1941              :                                  kind, t);
    1942            0 :                             goto error;
    1943              :                         }
    1944           76 :                         if (!pgstat_is_kind_valid(kind))
    1945              :                         {
    1946            0 :                             elog(WARNING, "invalid stats kind %u for entry of type %c",
    1947              :                                  kind, t);
    1948            0 :                             goto error;
    1949              :                         }
    1950              : 
    1951           76 :                         kind_info = pgstat_get_kind_info(kind);
    1952           76 :                         if (!kind_info)
    1953              :                         {
    1954            0 :                             elog(WARNING, "could not find information of kind %u for entry of type %c",
    1955              :                                  kind, t);
    1956            0 :                             goto error;
    1957              :                         }
    1958              : 
    1959           76 :                         if (!kind_info->from_serialized_name)
    1960              :                         {
    1961            0 :                             elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
    1962              :                                  kind, t);
    1963            0 :                             goto error;
    1964              :                         }
    1965              : 
    1966           76 :                         if (!kind_info->from_serialized_name(&name, &key))
    1967              :                         {
    1968              :                             /* skip over data for entry we don't care about */
    1969            1 :                             if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
    1970              :                             {
    1971            0 :                                 elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
    1972              :                                      NameStr(name), kind, t);
    1973            0 :                                 goto error;
    1974              :                             }
    1975              : 
    1976            1 :                             continue;
    1977              :                         }
    1978              : 
    1979              :                         Assert(key.kind == kind);
    1980              :                     }
    1981              : 
    1982              :                     /*
    1983              :                      * This intentionally doesn't use pgstat_get_entry_ref() -
    1984              :                      * putting all stats into checkpointer's
    1985              :                      * pgStatEntryRefHash would be wasted effort and memory.
    1986              :                      */
    1987       213636 :                     p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
    1988              : 
    1989              :                     /* don't allow duplicate entries */
    1990       213636 :                     if (found)
    1991              :                     {
    1992            0 :                         dshash_release_lock(pgStatLocal.shared_hash, p);
    1993            0 :                         elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
    1994              :                              key.kind, key.dboid,
    1995              :                              key.objid, t);
    1996            0 :                         goto error;
    1997              :                     }
    1998              : 
    1999       213636 :                     header = pgstat_init_entry(key.kind, p);
    2000       213636 :                     dshash_release_lock(pgStatLocal.shared_hash, p);
    2001       213636 :                     if (header == NULL)
    2002              :                     {
    2003              :                         /*
    2004              :                          * It would be tempting to switch this ERROR to a
    2005              :                          * WARNING, but it would mean that all the statistics
    2006              :                          * are discarded when the environment fails on OOM.
    2007              :                          */
    2008            0 :                         elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
    2009              :                              key.kind, key.dboid,
    2010              :                              key.objid, t);
    2011              :                     }
    2012              : 
    2013       213636 :                     if (!pgstat_read_chunk(fpin,
    2014              :                                            pgstat_get_entry_data(key.kind, header),
    2015              :                                            pgstat_get_entry_len(key.kind)))
    2016              :                     {
    2017            0 :                         elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
    2018              :                              key.kind, key.dboid,
    2019              :                              key.objid, t);
    2020            0 :                         goto error;
    2021              :                     }
    2022              : 
    2023              :                     /* read more data for the entry, if required */
    2024       213636 :                     if (kind_info->from_serialized_data)
    2025              :                     {
    2026            2 :                         if (!kind_info->from_serialized_data(&key, header, fpin))
    2027              :                         {
    2028            0 :                             elog(WARNING, "could not read auxiliary data for entry %u/%u/%" PRIu64 " of type %c",
    2029              :                                  key.kind, key.dboid,
    2030              :                                  key.objid, t);
    2031            0 :                             goto error;
    2032              :                         }
    2033              :                     }
    2034              : 
    2035       213636 :                     break;
    2036              :                 }
    2037          765 :             case PGSTAT_FILE_ENTRY_END:
    2038              : 
    2039              :                 /*
    2040              :                  * check that PGSTAT_FILE_ENTRY_END actually signals end of
    2041              :                  * file
    2042              :                  */
    2043          765 :                 if (fgetc(fpin) != EOF)
    2044              :                 {
    2045            1 :                     elog(WARNING, "could not read end-of-file");
    2046            1 :                     goto error;
    2047              :                 }
    2048              : 
    2049          764 :                 goto done;
    2050              : 
    2051            0 :             default:
    2052            0 :                 elog(WARNING, "could not read entry of type %c", t);
    2053            0 :                 goto error;
    2054              :         }
    2055              :     }
    2056              : 
    2057          766 : done:
    2058              :     /* First, cleanup the main stats file */
    2059          766 :     FreeFile(fpin);
    2060              : 
    2061          766 :     elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
    2062          766 :     unlink(statfile);
    2063              : 
    2064              :     /* Finish callbacks, if required */
    2065        25278 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2066              :     {
    2067        24512 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2068              : 
    2069        24512 :         if (kind_info && kind_info->finish)
    2070            2 :             kind_info->finish(STATS_READ);
    2071              :     }
    2072              : 
    2073          766 :     return;
    2074              : 
    2075            2 : error:
    2076            2 :     ereport(LOG,
    2077              :             (errmsg("corrupted statistics file \"%s\"", statfile)));
    2078              : 
    2079            2 :     pgstat_reset_after_failure();
    2080              : 
    2081            2 :     goto done;
    2082              : }
    2083              : 
    2084              : /*
    2085              :  * Helper to reset / drop stats after a crash or after restoring stats from
    2086              :  * disk failed, potentially after already loading parts.
    2087              :  */
    2088              : static void
    2089          239 : pgstat_reset_after_failure(void)
    2090              : {
    2091          239 :     TimestampTz ts = GetCurrentTimestamp();
    2092              : 
    2093              :     /* reset fixed-numbered stats */
    2094         7887 :     for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
    2095              :     {
    2096         7648 :         const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
    2097              : 
    2098         7648 :         if (!kind_info || !kind_info->fixed_amount)
    2099         6213 :             continue;
    2100              : 
    2101         1435 :         kind_info->reset_all_cb(ts);
    2102              :     }
    2103              : 
    2104              :     /* and drop variable-numbered ones */
    2105          239 :     pgstat_drop_all_entries();
    2106          239 : }
    2107              : 
    2108              : /*
    2109              :  * GUC assign_hook for stats_fetch_consistency.
    2110              :  */
    2111              : void
    2112         3575 : assign_stats_fetch_consistency(int newval, void *extra)
    2113              : {
    2114              :     /*
    2115              :      * Changing this value in a transaction may cause snapshot state
    2116              :      * inconsistencies, so force a clear of the current snapshot on the next
    2117              :      * snapshot build attempt.
    2118              :      */
    2119         3575 :     if (pgstat_fetch_consistency != newval)
    2120         2279 :         force_stats_snapshot_clear = true;
    2121         3575 : }
        

Generated by: LCOV version 2.0-1