LCOV - code coverage report
Current view: top level - src/backend/postmaster - pgstat.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 1476 1965 75.1 %
Date: 2021-12-09 03:08:47 Functions: 120 138 87.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* ----------
       2             :  * pgstat.c
       3             :  *
       4             :  *  All the statistics collector stuff hacked up in one big, ugly file.
       5             :  *
       6             :  *  TODO:   - Separate collector, postmaster and backend stuff
       7             :  *            into different files.
       8             :  *
       9             :  *          - Add some automatic call for pgstat vacuuming.
      10             :  *
      11             :  *          - Add a pgstat config column to pg_database, so this
      12             :  *            entire thing can be enabled/disabled on a per db basis.
      13             :  *
      14             :  *  Copyright (c) 2001-2021, PostgreSQL Global Development Group
      15             :  *
      16             :  *  src/backend/postmaster/pgstat.c
      17             :  * ----------
      18             :  */
      19             : #include "postgres.h"
      20             : 
      21             : #include <unistd.h>
      22             : #include <fcntl.h>
      23             : #include <sys/param.h>
      24             : #include <sys/time.h>
      25             : #include <sys/socket.h>
      26             : #include <netdb.h>
      27             : #include <netinet/in.h>
      28             : #include <arpa/inet.h>
      29             : #include <signal.h>
      30             : #include <time.h>
      31             : #ifdef HAVE_SYS_SELECT_H
      32             : #include <sys/select.h>
      33             : #endif
      34             : 
      35             : #include "access/heapam.h"
      36             : #include "access/htup_details.h"
      37             : #include "access/tableam.h"
      38             : #include "access/transam.h"
      39             : #include "access/twophase_rmgr.h"
      40             : #include "access/xact.h"
      41             : #include "catalog/catalog.h"
      42             : #include "catalog/pg_database.h"
      43             : #include "catalog/pg_proc.h"
      44             : #include "catalog/pg_subscription.h"
      45             : #include "common/ip.h"
      46             : #include "executor/instrument.h"
      47             : #include "libpq/libpq.h"
      48             : #include "libpq/pqsignal.h"
      49             : #include "mb/pg_wchar.h"
      50             : #include "miscadmin.h"
      51             : #include "pgstat.h"
      52             : #include "postmaster/autovacuum.h"
      53             : #include "postmaster/fork_process.h"
      54             : #include "postmaster/interrupt.h"
      55             : #include "postmaster/postmaster.h"
      56             : #include "replication/slot.h"
      57             : #include "replication/walsender.h"
      58             : #include "storage/backendid.h"
      59             : #include "storage/dsm.h"
      60             : #include "storage/fd.h"
      61             : #include "storage/ipc.h"
      62             : #include "storage/latch.h"
      63             : #include "storage/lmgr.h"
      64             : #include "storage/pg_shmem.h"
      65             : #include "storage/proc.h"
      66             : #include "storage/procsignal.h"
      67             : #include "utils/builtins.h"
      68             : #include "utils/guc.h"
      69             : #include "utils/memutils.h"
      70             : #include "utils/ps_status.h"
      71             : #include "utils/rel.h"
      72             : #include "utils/snapmgr.h"
      73             : #include "utils/timestamp.h"
      74             : 
      75             : /* ----------
      76             :  * Timer definitions.
      77             :  * ----------
      78             :  */
      79             : #define PGSTAT_STAT_INTERVAL    500 /* Minimum time between stats file
      80             :                                      * updates; in milliseconds. */
      81             : 
      82             : #define PGSTAT_RETRY_DELAY      10  /* How long to wait between checks for a
      83             :                                      * new file; in milliseconds. */
      84             : 
      85             : #define PGSTAT_MAX_WAIT_TIME    10000   /* Maximum time to wait for a stats
      86             :                                          * file update; in milliseconds. */
      87             : 
      88             : #define PGSTAT_INQ_INTERVAL     640 /* How often to ping the collector for a
      89             :                                      * new file; in milliseconds. */
      90             : 
      91             : #define PGSTAT_RESTART_INTERVAL 60  /* How often to attempt to restart a
      92             :                                      * failed statistics collector; in
      93             :                                      * seconds. */
      94             : 
      95             : #define PGSTAT_POLL_LOOP_COUNT  (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
      96             : #define PGSTAT_INQ_LOOP_COUNT   (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
      97             : 
      98             : /* Minimum receive buffer size for the collector's socket. */
      99             : #define PGSTAT_MIN_RCVBUF       (100 * 1024)
     100             : 
     101             : 
     102             : /* ----------
     103             :  * The initial size hints for the hash tables used in the collector.
     104             :  * ----------
     105             :  */
     106             : #define PGSTAT_DB_HASH_SIZE     16
     107             : #define PGSTAT_TAB_HASH_SIZE    512
     108             : #define PGSTAT_FUNCTION_HASH_SIZE   512
     109             : #define PGSTAT_SUBWORKER_HASH_SIZE  32
     110             : #define PGSTAT_REPLSLOT_HASH_SIZE   32
     111             : 
     112             : 
     113             : /* ----------
     114             :  * GUC parameters
     115             :  * ----------
     116             :  */
     117             : bool        pgstat_track_counts = false;
     118             : int         pgstat_track_functions = TRACK_FUNC_OFF;
     119             : 
     120             : /* ----------
     121             :  * Built from GUC parameter
     122             :  * ----------
     123             :  */
     124             : char       *pgstat_stat_directory = NULL;
     125             : char       *pgstat_stat_filename = NULL;
     126             : char       *pgstat_stat_tmpname = NULL;
     127             : 
     128             : /*
     129             :  * BgWriter and WAL global statistics counters.
     130             :  * Stored directly in a stats message structure so they can be sent
     131             :  * without needing to copy things around.  We assume these init to zeroes.
     132             :  */
     133             : PgStat_MsgBgWriter PendingBgWriterStats;
     134             : PgStat_MsgCheckpointer PendingCheckpointerStats;
     135             : PgStat_MsgWal WalStats;
     136             : 
     137             : /*
     138             :  * WAL usage counters saved from pgWALUsage at the previous call to
     139             :  * pgstat_send_wal(). This is used to calculate how much WAL usage
     140             :  * happens between pgstat_send_wal() calls, by subtracting
     141             :  * the previous counters from the current ones.
     142             :  */
     143             : static WalUsage prevWalUsage;
     144             : 
     145             : /*
     146             :  * List of SLRU names that we keep stats for.  There is no central registry of
     147             :  * SLRUs, so we use this fixed list instead.  The "other" entry is used for
     148             :  * all SLRUs without an explicit entry (e.g. SLRUs in extensions).
     149             :  */
     150             : static const char *const slru_names[] = {
     151             :     "CommitTs",
     152             :     "MultiXactMember",
     153             :     "MultiXactOffset",
     154             :     "Notify",
     155             :     "Serial",
     156             :     "Subtrans",
     157             :     "Xact",
     158             :     "other"                       /* has to be last */
     159             : };
     160             : 
     161             : #define SLRU_NUM_ELEMENTS   lengthof(slru_names)
     162             : 
     163             : /*
     164             :  * SLRU statistics counts waiting to be sent to the collector.  These are
     165             :  * stored directly in stats message format so they can be sent without needing
     166             :  * to copy things around.  We assume this variable inits to zeroes.  Entries
     167             :  * are one-to-one with slru_names[].
     168             :  */
     169             : static PgStat_MsgSLRU SLRUStats[SLRU_NUM_ELEMENTS];
     170             : 
     171             : /* ----------
     172             :  * Local data
     173             :  * ----------
     174             :  */
     175             : NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
     176             : 
     177             : static struct sockaddr_storage pgStatAddr;
     178             : 
     179             : static time_t last_pgstat_start_time;
     180             : 
     181             : static bool pgStatRunningInCollector = false;
     182             : 
     183             : /*
     184             :  * Structures in which backends store per-table info that's waiting to be
     185             :  * sent to the collector.
     186             :  *
     187             :  * NOTE: once allocated, TabStatusArray structures are never moved or deleted
     188             :  * for the life of the backend.  Also, we zero out the t_id fields of the
     189             :  * contained PgStat_TableStatus structs whenever they are not actively in use.
     190             :  * This allows relcache pgstat_info pointers to be treated as long-lived data,
     191             :  * avoiding repeated searches in pgstat_initstats() when a relation is
     192             :  * repeatedly opened during a transaction.
     193             :  */
     194             : #define TABSTAT_QUANTUM     100 /* we alloc this many at a time */
     195             : 
     196             : typedef struct TabStatusArray
     197             : {
     198             :     struct TabStatusArray *tsa_next;    /* link to next array, if any */
     199             :     int         tsa_used;       /* # entries currently used */
     200             :     PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM];    /* per-table data */
     201             : } TabStatusArray;
     202             : 
     203             : static TabStatusArray *pgStatTabList = NULL;
     204             : 
     205             : /*
     206             :  * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer
     207             :  */
     208             : typedef struct TabStatHashEntry
     209             : {
     210             :     Oid         t_id;
     211             :     PgStat_TableStatus *tsa_entry;
     212             : } TabStatHashEntry;
     213             : 
     214             : /*
     215             :  * Hash table for O(1) t_id -> tsa_entry lookup
     216             :  */
     217             : static HTAB *pgStatTabHash = NULL;
     218             : 
     219             : /*
     220             :  * Backends store per-function info that's waiting to be sent to the collector
     221             :  * in this hash table (indexed by function OID).
     222             :  */
     223             : static HTAB *pgStatFunctions = NULL;
     224             : 
     225             : /*
     226             :  * Indicates if backend has some function stats that it hasn't yet
     227             :  * sent to the collector.
     228             :  */
     229             : static bool have_function_stats = false;
     230             : 
     231             : /*
     232             :  * Tuple insertion/deletion counts for an open transaction can't be propagated
     233             :  * into PgStat_TableStatus counters until we know if it is going to commit
     234             :  * or abort.  Hence, we keep these counts in per-subxact structs that live
     235             :  * in TopTransactionContext.  This data structure is designed on the assumption
     236             :  * that subxacts won't usually modify very many tables.
     237             :  */
     238             : typedef struct PgStat_SubXactStatus
     239             : {
     240             :     int         nest_level;     /* subtransaction nest level */
     241             :     struct PgStat_SubXactStatus *prev;  /* higher-level subxact if any */
     242             :     PgStat_TableXactStatus *first;  /* head of list for this subxact */
     243             : } PgStat_SubXactStatus;
     244             : 
     245             : static PgStat_SubXactStatus *pgStatXactStack = NULL;
     246             : 
     247             : static int  pgStatXactCommit = 0;
     248             : static int  pgStatXactRollback = 0;
     249             : PgStat_Counter pgStatBlockReadTime = 0;
     250             : PgStat_Counter pgStatBlockWriteTime = 0;
     251             : static PgStat_Counter pgLastSessionReportTime = 0;
     252             : PgStat_Counter pgStatActiveTime = 0;
     253             : PgStat_Counter pgStatTransactionIdleTime = 0;
     254             : SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
     255             : 
     256             : /* Record that's written to 2PC state file when pgstat state is persisted */
     257             : typedef struct TwoPhasePgStatRecord
     258             : {
     259             :     PgStat_Counter tuples_inserted; /* tuples inserted in xact */
     260             :     PgStat_Counter tuples_updated;  /* tuples updated in xact */
     261             :     PgStat_Counter tuples_deleted;  /* tuples deleted in xact */
     262             :     /* tuples i/u/d prior to truncate/drop */
     263             :     PgStat_Counter inserted_pre_truncdrop;
     264             :     PgStat_Counter updated_pre_truncdrop;
     265             :     PgStat_Counter deleted_pre_truncdrop;
     266             :     Oid         t_id;           /* table's OID */
     267             :     bool        t_shared;       /* is it a shared catalog? */
     268             :     bool        t_truncdropped; /* was the relation truncated/dropped? */
     269             : } TwoPhasePgStatRecord;
     270             : 
     271             : /*
     272             :  * Info about current "snapshot" of stats file
     273             :  */
     274             : static MemoryContext pgStatLocalContext = NULL;
     275             : static HTAB *pgStatDBHash = NULL;
     276             : 
     277             : /*
     278             :  * Cluster wide statistics, kept in the stats collector.
     279             :  * Contains statistics that are not collected per database
     280             :  * or per table.
     281             :  */
     282             : static PgStat_ArchiverStats archiverStats;
     283             : static PgStat_GlobalStats globalStats;
     284             : static PgStat_WalStats walStats;
     285             : static PgStat_SLRUStats slruStats[SLRU_NUM_ELEMENTS];
     286             : static HTAB *replSlotStatHash = NULL;
     287             : 
     288             : /*
     289             :  * List of OIDs of databases we need to write out.  If an entry is InvalidOid,
     290             :  * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
     291             :  * will write both that DB's data and the shared stats.
     292             :  */
     293             : static List *pending_write_requests = NIL;
     294             : 
     295             : /*
     296             :  * Total time charged to functions so far in the current backend.
     297             :  * We use this to help separate "self" and "other" time charges.
     298             :  * (We assume this initializes to zero.)
     299             :  */
     300             : static instr_time total_func_time;
     301             : 
     302             : /*
     303             :  * For assertions that check pgstat is not used before initialization / after
     304             :  * shutdown.
     305             :  */
     306             : #ifdef USE_ASSERT_CHECKING
     307             : static bool pgstat_is_initialized = false;
     308             : static bool pgstat_is_shutdown = false;
     309             : #endif
     310             : 
     311             : 
     312             : /* ----------
     313             :  * Local function forward declarations
     314             :  * ----------
     315             :  */
     316             : #ifdef EXEC_BACKEND
     317             : static pid_t pgstat_forkexec(void);
     318             : #endif
     319             : 
     320             : NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
     321             : 
     322             : static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
     323             : static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
     324             :                                                  Oid tableoid, bool create);
     325             : static PgStat_StatSubWorkerEntry *pgstat_get_subworker_entry(PgStat_StatDBEntry *dbentry,
     326             :                                                              Oid subid, Oid subrelid,
     327             :                                                              bool create);
     328             : static void pgstat_write_statsfiles(bool permanent, bool allDbs);
     329             : static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent);
     330             : static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
     331             : static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
     332             :                                      HTAB *subworkerhash, bool permanent);
     333             : static void backend_read_statsfile(void);
     334             : 
     335             : static bool pgstat_write_statsfile_needed(void);
     336             : static bool pgstat_db_requested(Oid databaseid);
     337             : 
     338             : static PgStat_StatReplSlotEntry *pgstat_get_replslot_entry(NameData name, bool create_it);
     339             : static void pgstat_reset_replslot(PgStat_StatReplSlotEntry *slotstats, TimestampTz ts);
     340             : 
     341             : static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now);
     342             : static void pgstat_send_funcstats(void);
     343             : static void pgstat_send_slru(void);
     344             : static void pgstat_send_subscription_purge(PgStat_MsgSubscriptionPurge *msg);
     345             : static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
     346             : static bool pgstat_should_report_connstat(void);
     347             : static void pgstat_report_disconnect(Oid dboid);
     348             : 
     349             : static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
     350             : 
     351             : static void pgstat_setup_memcxt(void);
     352             : static void pgstat_assert_is_up(void);
     353             : 
     354             : static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
     355             : static void pgstat_send(void *msg, int len);
     356             : 
     357             : static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
     358             : static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
     359             : static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
     360             : static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
     361             : static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
     362             : static void pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len);
     363             : static void pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len);
     364             : static void pgstat_recv_resetslrucounter(PgStat_MsgResetslrucounter *msg, int len);
     365             : static void pgstat_recv_resetreplslotcounter(PgStat_MsgResetreplslotcounter *msg, int len);
     366             : static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
     367             : static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
     368             : static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
     369             : static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
     370             : static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
     371             : static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len);
     372             : static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
     373             : static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len);
     374             : static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
     375             : static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
     376             : static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
     377             : static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
     378             : static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
     379             : static void pgstat_recv_connect(PgStat_MsgConnect *msg, int len);
     380             : static void pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len);
     381             : static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
     382             : static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
     383             : static void pgstat_recv_subscription_purge(PgStat_MsgSubscriptionPurge *msg, int len);
     384             : static void pgstat_recv_subworker_error(PgStat_MsgSubWorkerError *msg, int len);
     385             : 
     386             : /* ------------------------------------------------------------
     387             :  * Public functions called from postmaster follow
     388             :  * ------------------------------------------------------------
     389             :  */
     390             : 
     391             : /* ----------
     392             :  * pgstat_init() -
     393             :  *
     394             :  *  Called from postmaster at startup. Create the resources required
     395             :  *  by the statistics collector process.  If unable to do so, do not
     396             :  *  fail --- better to let the postmaster start with stats collection
     397             :  *  disabled.
     398             :  * ----------
     399             :  */
     400             : void
     401         940 : pgstat_init(void)
     402             : {
     403             :     socklen_t   alen;
     404         940 :     struct addrinfo *addrs = NULL,
     405             :                *addr,
     406             :                 hints;
     407             :     int         ret;
     408             :     fd_set      rset;
     409             :     struct timeval tv;
     410             :     char        test_byte;
     411             :     int         sel_res;
     412         940 :     int         tries = 0;
     413             : 
     414             : #define TESTBYTEVAL ((char) 199)
     415             : 
     416             :     /*
     417             :      * This static assertion verifies that we didn't mess up the calculations
     418             :      * involved in selecting maximum payload sizes for our UDP messages.
     419             :      * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
     420             :      * be silent performance loss from fragmentation, it seems worth having a
     421             :      * compile-time cross-check that we didn't.
     422             :      */
     423             :     StaticAssertStmt(sizeof(PgStat_Msg) <= PGSTAT_MAX_MSG_SIZE,
     424             :                      "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
     425             : 
     426             :     /*
     427             :      * Create the UDP socket for sending and receiving statistic messages
     428             :      */
     429         940 :     hints.ai_flags = AI_PASSIVE;
     430         940 :     hints.ai_family = AF_UNSPEC;
     431         940 :     hints.ai_socktype = SOCK_DGRAM;
     432         940 :     hints.ai_protocol = 0;
     433         940 :     hints.ai_addrlen = 0;
     434         940 :     hints.ai_addr = NULL;
     435         940 :     hints.ai_canonname = NULL;
     436         940 :     hints.ai_next = NULL;
     437         940 :     ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
     438         940 :     if (ret || !addrs)
     439             :     {
     440           0 :         ereport(LOG,
     441             :                 (errmsg("could not resolve \"localhost\": %s",
     442             :                         gai_strerror(ret))));
     443           0 :         goto startup_failed;
     444             :     }
     445             : 
     446             :     /*
     447             :      * On some platforms, pg_getaddrinfo_all() may return multiple addresses
     448             :      * only one of which will actually work (eg, both IPv6 and IPv4 addresses
     449             :      * when kernel will reject IPv6).  Worse, the failure may occur at the
     450             :      * bind() or perhaps even connect() stage.  So we must loop through the
     451             :      * results till we find a working combination. We will generate LOG
     452             :      * messages, but no error, for bogus combinations.
     453             :      */
     454         940 :     for (addr = addrs; addr; addr = addr->ai_next)
     455             :     {
     456             : #ifdef HAVE_UNIX_SOCKETS
     457             :         /* Ignore AF_UNIX sockets, if any are returned. */
     458         940 :         if (addr->ai_family == AF_UNIX)
     459           0 :             continue;
     460             : #endif
     461             : 
     462         940 :         if (++tries > 1)
     463           0 :             ereport(LOG,
     464             :                     (errmsg("trying another address for the statistics collector")));
     465             : 
     466             :         /*
     467             :          * Create the socket.
     468             :          */
     469         940 :         if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
     470             :         {
     471           0 :             ereport(LOG,
     472             :                     (errcode_for_socket_access(),
     473             :                      errmsg("could not create socket for statistics collector: %m")));
     474           0 :             continue;
     475             :         }
     476             : 
     477             :         /*
     478             :          * Bind it to a kernel assigned port on localhost and get the assigned
     479             :          * port via getsockname().
     480             :          */
     481         940 :         if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
     482             :         {
     483           0 :             ereport(LOG,
     484             :                     (errcode_for_socket_access(),
     485             :                      errmsg("could not bind socket for statistics collector: %m")));
     486           0 :             closesocket(pgStatSock);
     487           0 :             pgStatSock = PGINVALID_SOCKET;
     488           0 :             continue;
     489             :         }
     490             : 
     491         940 :         alen = sizeof(pgStatAddr);
     492         940 :         if (getsockname(pgStatSock, (struct sockaddr *) &pgStatAddr, &alen) < 0)
     493             :         {
     494           0 :             ereport(LOG,
     495             :                     (errcode_for_socket_access(),
     496             :                      errmsg("could not get address of socket for statistics collector: %m")));
     497           0 :             closesocket(pgStatSock);
     498           0 :             pgStatSock = PGINVALID_SOCKET;
     499           0 :             continue;
     500             :         }
     501             : 
     502             :         /*
     503             :          * Connect the socket to its own address.  This saves a few cycles by
     504             :          * not having to respecify the target address on every send. This also
     505             :          * provides a kernel-level check that only packets from this same
     506             :          * address will be received.
     507             :          */
     508         940 :         if (connect(pgStatSock, (struct sockaddr *) &pgStatAddr, alen) < 0)
     509             :         {
     510           0 :             ereport(LOG,
     511             :                     (errcode_for_socket_access(),
     512             :                      errmsg("could not connect socket for statistics collector: %m")));
     513           0 :             closesocket(pgStatSock);
     514           0 :             pgStatSock = PGINVALID_SOCKET;
     515           0 :             continue;
     516             :         }
     517             : 
     518             :         /*
     519             :          * Try to send and receive a one-byte test message on the socket. This
     520             :          * is to catch situations where the socket can be created but will not
     521             :          * actually pass data (for instance, because kernel packet filtering
     522             :          * rules prevent it).
     523             :          */
     524         940 :         test_byte = TESTBYTEVAL;
     525             : 
     526         940 : retry1:
     527         940 :         if (send(pgStatSock, &test_byte, 1, 0) != 1)
     528             :         {
     529           0 :             if (errno == EINTR)
     530           0 :                 goto retry1;    /* if interrupted, just retry */
     531           0 :             ereport(LOG,
     532             :                     (errcode_for_socket_access(),
     533             :                      errmsg("could not send test message on socket for statistics collector: %m")));
     534           0 :             closesocket(pgStatSock);
     535           0 :             pgStatSock = PGINVALID_SOCKET;
     536           0 :             continue;
     537             :         }
     538             : 
     539             :         /*
     540             :          * There could possibly be a little delay before the message can be
     541             :          * received.  We arbitrarily allow up to half a second before deciding
     542             :          * it's broken.
     543             :          */
     544             :         for (;;)                /* need a loop to handle EINTR */
     545             :         {
     546         940 :             FD_ZERO(&rset);
     547         940 :             FD_SET(pgStatSock, &rset);
     548             : 
     549         940 :             tv.tv_sec = 0;
     550         940 :             tv.tv_usec = 500000;
     551         940 :             sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
     552         940 :             if (sel_res >= 0 || errno != EINTR)
     553             :                 break;
     554             :         }
     555         940 :         if (sel_res < 0)
     556             :         {
     557           0 :             ereport(LOG,
     558             :                     (errcode_for_socket_access(),
     559             :                      errmsg("select() failed in statistics collector: %m")));
     560           0 :             closesocket(pgStatSock);
     561           0 :             pgStatSock = PGINVALID_SOCKET;
     562           0 :             continue;
     563             :         }
     564         940 :         if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
     565             :         {
     566             :             /*
     567             :              * This is the case we actually think is likely, so take pains to
     568             :              * give a specific message for it.
     569             :              *
     570             :              * errno will not be set meaningfully here, so don't use it.
     571             :              */
     572           0 :             ereport(LOG,
     573             :                     (errcode(ERRCODE_CONNECTION_FAILURE),
     574             :                      errmsg("test message did not get through on socket for statistics collector")));
     575           0 :             closesocket(pgStatSock);
     576           0 :             pgStatSock = PGINVALID_SOCKET;
     577           0 :             continue;
     578             :         }
     579             : 
     580         940 :         test_byte++;            /* just make sure variable is changed */
     581             : 
     582         940 : retry2:
     583         940 :         if (recv(pgStatSock, &test_byte, 1, 0) != 1)
     584             :         {
     585           0 :             if (errno == EINTR)
     586           0 :                 goto retry2;    /* if interrupted, just retry */
     587           0 :             ereport(LOG,
     588             :                     (errcode_for_socket_access(),
     589             :                      errmsg("could not receive test message on socket for statistics collector: %m")));
     590           0 :             closesocket(pgStatSock);
     591           0 :             pgStatSock = PGINVALID_SOCKET;
     592           0 :             continue;
     593             :         }
     594             : 
     595         940 :         if (test_byte != TESTBYTEVAL)   /* strictly paranoia ... */
     596             :         {
     597           0 :             ereport(LOG,
     598             :                     (errcode(ERRCODE_INTERNAL_ERROR),
     599             :                      errmsg("incorrect test message transmission on socket for statistics collector")));
     600           0 :             closesocket(pgStatSock);
     601           0 :             pgStatSock = PGINVALID_SOCKET;
     602           0 :             continue;
     603             :         }
     604             : 
     605             :         /* If we get here, we have a working socket */
     606         940 :         break;
     607             :     }
     608             : 
     609             :     /* Did we find a working address? */
     610         940 :     if (!addr || pgStatSock == PGINVALID_SOCKET)
     611           0 :         goto startup_failed;
     612             : 
     613             :     /*
     614             :      * Set the socket to non-blocking IO.  This ensures that if the collector
     615             :      * falls behind, statistics messages will be discarded; backends won't
     616             :      * block waiting to send messages to the collector.
     617             :      */
     618         940 :     if (!pg_set_noblock(pgStatSock))
     619             :     {
     620           0 :         ereport(LOG,
     621             :                 (errcode_for_socket_access(),
     622             :                  errmsg("could not set statistics collector socket to nonblocking mode: %m")));
     623           0 :         goto startup_failed;
     624             :     }
     625             : 
     626             :     /*
     627             :      * Try to ensure that the socket's receive buffer is at least
     628             :      * PGSTAT_MIN_RCVBUF bytes, so that it won't easily overflow and lose
     629             :      * data.  Use of UDP protocol means that we are willing to lose data under
     630             :      * heavy load, but we don't want it to happen just because of ridiculously
     631             :      * small default buffer sizes (such as 8KB on older Windows versions).
     632             :      */
     633             :     {
     634             :         int         old_rcvbuf;
     635             :         int         new_rcvbuf;
     636         940 :         socklen_t   rcvbufsize = sizeof(old_rcvbuf);
     637             : 
     638         940 :         if (getsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,
     639             :                        (char *) &old_rcvbuf, &rcvbufsize) < 0)
     640             :         {
     641           0 :             ereport(LOG,
     642             :                     (errmsg("%s(%s) failed: %m", "getsockopt", "SO_RCVBUF")));
     643             :             /* if we can't get existing size, always try to set it */
     644           0 :             old_rcvbuf = 0;
     645             :         }
     646             : 
     647         940 :         new_rcvbuf = PGSTAT_MIN_RCVBUF;
     648         940 :         if (old_rcvbuf < new_rcvbuf)
     649             :         {
     650           0 :             if (setsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,
     651             :                            (char *) &new_rcvbuf, sizeof(new_rcvbuf)) < 0)
     652           0 :                 ereport(LOG,
     653             :                         (errmsg("%s(%s) failed: %m", "setsockopt", "SO_RCVBUF")));
     654             :         }
     655             :     }
     656             : 
     657         940 :     pg_freeaddrinfo_all(hints.ai_family, addrs);
     658             : 
     659             :     /* Now that we have a long-lived socket, tell fd.c about it. */
     660         940 :     ReserveExternalFD();
     661             : 
     662         940 :     return;
     663             : 
     664           0 : startup_failed:
     665           0 :     ereport(LOG,
     666             :             (errmsg("disabling statistics collector for lack of working socket")));
     667             : 
     668           0 :     if (addrs)
     669           0 :         pg_freeaddrinfo_all(hints.ai_family, addrs);
     670             : 
     671           0 :     if (pgStatSock != PGINVALID_SOCKET)
     672           0 :         closesocket(pgStatSock);
     673           0 :     pgStatSock = PGINVALID_SOCKET;
     674             : 
     675             :     /*
     676             :      * Adjust GUC variables to suppress useless activity, and for debugging
     677             :      * purposes (seeing track_counts off is a clue that we failed here). We
     678             :      * use PGC_S_OVERRIDE because there is no point in trying to turn it back
     679             :      * on from postgresql.conf without a restart.
     680             :      */
     681           0 :     SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
     682             : }
     683             : 
     684             : /*
     685             :  * subroutine for pgstat_reset_all
     686             :  */
     687             : static void
     688         528 : pgstat_reset_remove_files(const char *directory)
     689             : {
     690             :     DIR        *dir;
     691             :     struct dirent *entry;
     692             :     char        fname[MAXPGPATH * 2];
     693             : 
     694         528 :     dir = AllocateDir(directory);
     695        2084 :     while ((entry = ReadDir(dir, directory)) != NULL)
     696             :     {
     697             :         int         nchars;
     698             :         Oid         tmp_oid;
     699             : 
     700             :         /*
     701             :          * Skip directory entries that don't match the file names we write.
     702             :          * See get_dbstat_filename for the database-specific pattern.
     703             :          */
     704        1556 :         if (strncmp(entry->d_name, "global.", 7) == 0)
     705         180 :             nchars = 7;
     706             :         else
     707             :         {
     708        1376 :             nchars = 0;
     709        1376 :             (void) sscanf(entry->d_name, "db_%u.%n",
     710             :                           &tmp_oid, &nchars);
     711        1376 :             if (nchars <= 0)
     712        1056 :                 continue;
     713             :             /* %u allows leading whitespace, so reject that */
     714         320 :             if (strchr("0123456789", entry->d_name[3]) == NULL)
     715           0 :                 continue;
     716             :         }
     717             : 
     718         500 :         if (strcmp(entry->d_name + nchars, "tmp") != 0 &&
     719         500 :             strcmp(entry->d_name + nchars, "stat") != 0)
     720           0 :             continue;
     721             : 
     722         500 :         snprintf(fname, sizeof(fname), "%s/%s", directory,
     723         500 :                  entry->d_name);
     724         500 :         unlink(fname);
     725             :     }
     726         528 :     FreeDir(dir);
     727         528 : }
     728             : 
     729             : /*
     730             :  * pgstat_reset_all() -
     731             :  *
     732             :  * Remove the stats files.  This is currently used only if WAL
     733             :  * recovery is needed after a crash.
     734             :  */
     735             : void
     736         264 : pgstat_reset_all(void)
     737             : {
     738         264 :     pgstat_reset_remove_files(pgstat_stat_directory);
     739         264 :     pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY);
     740         264 : }
     741             : 
     742             : #ifdef EXEC_BACKEND
     743             : 
     744             : /*
     745             :  * pgstat_forkexec() -
     746             :  *
     747             :  * Format up the arglist for, then fork and exec, statistics collector process
     748             :  */
     749             : static pid_t
     750             : pgstat_forkexec(void)
     751             : {
     752             :     char       *av[10];
     753             :     int         ac = 0;
     754             : 
     755             :     av[ac++] = "postgres";
     756             :     av[ac++] = "--forkcol";
     757             :     av[ac++] = NULL;            /* filled in by postmaster_forkexec */
     758             : 
     759             :     av[ac] = NULL;
     760             :     Assert(ac < lengthof(av));
     761             : 
     762             :     return postmaster_forkexec(ac, av);
     763             : }
     764             : #endif                          /* EXEC_BACKEND */
     765             : 
     766             : 
     767             : /*
     768             :  * pgstat_start() -
     769             :  *
     770             :  *  Called from postmaster at startup or after an existing collector
     771             :  *  died.  Attempt to fire up a fresh statistics collector.
     772             :  *
     773             :  *  Returns PID of child process, or 0 if fail.
     774             :  *
     775             :  *  Note: if fail, we will be called again from the postmaster main loop.
     776             :  */
     777             : int
     778         944 : pgstat_start(void)
     779             : {
     780             :     time_t      curtime;
     781             :     pid_t       pgStatPid;
     782             : 
     783             :     /*
     784             :      * Check that the socket is there, else pgstat_init failed and we can do
     785             :      * nothing useful.
     786             :      */
     787         944 :     if (pgStatSock == PGINVALID_SOCKET)
     788           0 :         return 0;
     789             : 
     790             :     /*
     791             :      * Do nothing if too soon since last collector start.  This is a safety
     792             :      * valve to protect against continuous respawn attempts if the collector
     793             :      * is dying immediately at launch.  Note that since we will be re-called
     794             :      * from the postmaster main loop, we will get another chance later.
     795             :      */
     796         944 :     curtime = time(NULL);
     797         944 :     if ((unsigned int) (curtime - last_pgstat_start_time) <
     798             :         (unsigned int) PGSTAT_RESTART_INTERVAL)
     799           0 :         return 0;
     800         944 :     last_pgstat_start_time = curtime;
     801             : 
     802             :     /*
     803             :      * Okay, fork off the collector.
     804             :      */
     805             : #ifdef EXEC_BACKEND
     806             :     switch ((pgStatPid = pgstat_forkexec()))
     807             : #else
     808         944 :     switch ((pgStatPid = fork_process()))
     809             : #endif
     810             :     {
     811           0 :         case -1:
     812           0 :             ereport(LOG,
     813             :                     (errmsg("could not fork statistics collector: %m")));
     814           0 :             return 0;
     815             : 
     816             : #ifndef EXEC_BACKEND
     817         944 :         case 0:
     818             :             /* in postmaster child ... */
     819         944 :             InitPostmasterChild();
     820             : 
     821             :             /* Close the postmaster's sockets */
     822         944 :             ClosePostmasterPorts(false);
     823             : 
     824             :             /* Drop our connection to postmaster's shared memory, as well */
     825         944 :             dsm_detach_all();
     826         944 :             PGSharedMemoryDetach();
     827             : 
     828         944 :             PgstatCollectorMain(0, NULL);
     829             :             break;
     830             : #endif
     831             : 
     832         944 :         default:
     833         944 :             return (int) pgStatPid;
     834             :     }
     835             : 
     836             :     /* shouldn't get here */
     837             :     return 0;
     838             : }
     839             : 
     840             : void
     841          10 : allow_immediate_pgstat_restart(void)
     842             : {
     843          10 :     last_pgstat_start_time = 0;
     844          10 : }
     845             : 
     846             : /* ------------------------------------------------------------
     847             :  * Public functions used by backends follow
     848             :  *------------------------------------------------------------
     849             :  */
     850             : 
     851             : 
     852             : /* ----------
     853             :  * pgstat_report_stat() -
     854             :  *
     855             :  *  Must be called by processes that performs DML: tcop/postgres.c, logical
     856             :  *  receiver processes, SPI worker, etc. to send the so far collected
     857             :  *  per-table and function usage statistics to the collector.  Note that this
     858             :  *  is called only when not within a transaction, so it is fair to use
     859             :  *  transaction stop time as an approximation of current time.
     860             :  *
     861             :  *  "disconnect" is "true" only for the last call before the backend
     862             :  *  exits.  This makes sure that no data is lost and that interrupted
     863             :  *  sessions are reported correctly.
     864             :  * ----------
     865             :  */
     866             : void
     867      624684 : pgstat_report_stat(bool disconnect)
     868             : {
     869             :     /* we assume this inits to all zeroes: */
     870             :     static const PgStat_TableCounts all_zeroes;
     871             :     static TimestampTz last_report = 0;
     872             : 
     873             :     TimestampTz now;
     874             :     PgStat_MsgTabstat regular_msg;
     875             :     PgStat_MsgTabstat shared_msg;
     876             :     TabStatusArray *tsa;
     877             :     int         i;
     878             : 
     879      624684 :     pgstat_assert_is_up();
     880             : 
     881             :     /*
     882             :      * Don't expend a clock check if nothing to do.
     883             :      *
     884             :      * To determine whether any WAL activity has occurred since last time, not
     885             :      * only the number of generated WAL records but also the numbers of WAL
     886             :      * writes and syncs need to be checked. Because even transaction that
     887             :      * generates no WAL records can write or sync WAL data when flushing the
     888             :      * data pages.
     889             :      */
     890      624684 :     if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
     891      348264 :         pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
     892         570 :         pgWalUsage.wal_records == prevWalUsage.wal_records &&
     893         570 :         WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0 &&
     894         570 :         !have_function_stats && !disconnect)
     895      595202 :         return;
     896             : 
     897             :     /*
     898             :      * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
     899             :      * msec since we last sent one, or the backend is about to exit.
     900             :      */
     901      624678 :     now = GetCurrentTransactionStopTimestamp();
     902      624678 :     if (!disconnect &&
     903      610406 :         !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
     904      595196 :         return;
     905             : 
     906       29482 :     last_report = now;
     907             : 
     908       29482 :     if (disconnect)
     909       14272 :         pgstat_report_disconnect(MyDatabaseId);
     910             : 
     911             :     /*
     912             :      * Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
     913             :      * entries it points to.  (Should we fail partway through the loop below,
     914             :      * it's okay to have removed the hashtable already --- the only
     915             :      * consequence is we'd get multiple entries for the same table in the
     916             :      * pgStatTabList, and that's safe.)
     917             :      */
     918       29482 :     if (pgStatTabHash)
     919       26198 :         hash_destroy(pgStatTabHash);
     920       29482 :     pgStatTabHash = NULL;
     921             : 
     922             :     /*
     923             :      * Scan through the TabStatusArray struct(s) to find tables that actually
     924             :      * have counts, and build messages to send.  We have to separate shared
     925             :      * relations from regular ones because the databaseid field in the message
     926             :      * header has to depend on that.
     927             :      */
     928       29482 :     regular_msg.m_databaseid = MyDatabaseId;
     929       29482 :     shared_msg.m_databaseid = InvalidOid;
     930       29482 :     regular_msg.m_nentries = 0;
     931       29482 :     shared_msg.m_nentries = 0;
     932             : 
     933       61034 :     for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
     934             :     {
     935     1034656 :         for (i = 0; i < tsa->tsa_used; i++)
     936             :         {
     937     1003104 :             PgStat_TableStatus *entry = &tsa->tsa_entries[i];
     938             :             PgStat_MsgTabstat *this_msg;
     939             :             PgStat_TableEntry *this_ent;
     940             : 
     941             :             /* Shouldn't have any pending transaction-dependent counts */
     942             :             Assert(entry->trans == NULL);
     943             : 
     944             :             /*
     945             :              * Ignore entries that didn't accumulate any actual counts, such
     946             :              * as indexes that were opened by the planner but not used.
     947             :              */
     948     1003104 :             if (memcmp(&entry->t_counts, &all_zeroes,
     949             :                        sizeof(PgStat_TableCounts)) == 0)
     950      249720 :                 continue;
     951             : 
     952             :             /*
     953             :              * OK, insert data into the appropriate message, and send if full.
     954             :              */
     955      753384 :             this_msg = entry->t_shared ? &shared_msg : &regular_msg;
     956      753384 :             this_ent = &this_msg->m_entry[this_msg->m_nentries];
     957      753384 :             this_ent->t_id = entry->t_id;
     958      753384 :             memcpy(&this_ent->t_counts, &entry->t_counts,
     959             :                    sizeof(PgStat_TableCounts));
     960      753384 :             if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
     961             :             {
     962       74528 :                 pgstat_send_tabstat(this_msg, now);
     963       74528 :                 this_msg->m_nentries = 0;
     964             :             }
     965             :         }
     966             :         /* zero out PgStat_TableStatus structs after use */
     967      130012 :         MemSet(tsa->tsa_entries, 0,
     968             :                tsa->tsa_used * sizeof(PgStat_TableStatus));
     969       31552 :         tsa->tsa_used = 0;
     970             :     }
     971             : 
     972             :     /*
     973             :      * Send partial messages.  Make sure that any pending xact commit/abort
     974             :      * and connection stats get counted, even if there are no table stats to
     975             :      * send.
     976             :      */
     977       29482 :     if (regular_msg.m_nentries > 0 ||
     978        6184 :         pgStatXactCommit > 0 || pgStatXactRollback > 0 || disconnect)
     979       29330 :         pgstat_send_tabstat(&regular_msg, now);
     980       29482 :     if (shared_msg.m_nentries > 0)
     981       16428 :         pgstat_send_tabstat(&shared_msg, now);
     982             : 
     983             :     /* Now, send function statistics */
     984       29482 :     pgstat_send_funcstats();
     985             : 
     986             :     /* Send WAL statistics */
     987       29482 :     pgstat_send_wal(true);
     988             : 
     989             :     /* Finally send SLRU statistics */
     990       29482 :     pgstat_send_slru();
     991             : }
     992             : 
     993             : /*
     994             :  * Subroutine for pgstat_report_stat: finish and send a tabstat message
     995             :  */
     996             : static void
     997      120286 : pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now)
     998             : {
     999             :     int         n;
    1000             :     int         len;
    1001             : 
    1002             :     /* It's unlikely we'd get here with no socket, but maybe not impossible */
    1003      120286 :     if (pgStatSock == PGINVALID_SOCKET)
    1004        2338 :         return;
    1005             : 
    1006             :     /*
    1007             :      * Report and reset accumulated xact commit/rollback and I/O timings
    1008             :      * whenever we send a normal tabstat message
    1009             :      */
    1010      117948 :     if (OidIsValid(tsmsg->m_databaseid))
    1011             :     {
    1012       95194 :         tsmsg->m_xact_commit = pgStatXactCommit;
    1013       95194 :         tsmsg->m_xact_rollback = pgStatXactRollback;
    1014       95194 :         tsmsg->m_block_read_time = pgStatBlockReadTime;
    1015       95194 :         tsmsg->m_block_write_time = pgStatBlockWriteTime;
    1016             : 
    1017       95194 :         if (pgstat_should_report_connstat())
    1018             :         {
    1019             :             long        secs;
    1020             :             int         usecs;
    1021             : 
    1022             :             /*
    1023             :              * pgLastSessionReportTime is initialized to MyStartTimestamp by
    1024             :              * pgstat_report_connect().
    1025             :              */
    1026       84070 :             TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
    1027       84070 :             pgLastSessionReportTime = now;
    1028       84070 :             tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
    1029       84070 :             tsmsg->m_active_time = pgStatActiveTime;
    1030       84070 :             tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
    1031             :         }
    1032             :         else
    1033             :         {
    1034       11124 :             tsmsg->m_session_time = 0;
    1035       11124 :             tsmsg->m_active_time = 0;
    1036       11124 :             tsmsg->m_idle_in_xact_time = 0;
    1037             :         }
    1038       95194 :         pgStatXactCommit = 0;
    1039       95194 :         pgStatXactRollback = 0;
    1040       95194 :         pgStatBlockReadTime = 0;
    1041       95194 :         pgStatBlockWriteTime = 0;
    1042       95194 :         pgStatActiveTime = 0;
    1043       95194 :         pgStatTransactionIdleTime = 0;
    1044             :     }
    1045             :     else
    1046             :     {
    1047       22754 :         tsmsg->m_xact_commit = 0;
    1048       22754 :         tsmsg->m_xact_rollback = 0;
    1049       22754 :         tsmsg->m_block_read_time = 0;
    1050       22754 :         tsmsg->m_block_write_time = 0;
    1051       22754 :         tsmsg->m_session_time = 0;
    1052       22754 :         tsmsg->m_active_time = 0;
    1053       22754 :         tsmsg->m_idle_in_xact_time = 0;
    1054             :     }
    1055             : 
    1056      117948 :     n = tsmsg->m_nentries;
    1057      117948 :     len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
    1058             :         n * sizeof(PgStat_TableEntry);
    1059             : 
    1060      117948 :     pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
    1061      117948 :     pgstat_send(tsmsg, len);
    1062             : }
    1063             : 
    1064             : /*
    1065             :  * Subroutine for pgstat_report_stat: populate and send a function stat message
    1066             :  */
    1067             : static void
    1068       29482 : pgstat_send_funcstats(void)
    1069             : {
    1070             :     /* we assume this inits to all zeroes: */
    1071             :     static const PgStat_FunctionCounts all_zeroes;
    1072             : 
    1073             :     PgStat_MsgFuncstat msg;
    1074             :     PgStat_BackendFunctionEntry *entry;
    1075             :     HASH_SEQ_STATUS fstat;
    1076             : 
    1077       29482 :     if (pgStatFunctions == NULL)
    1078       29482 :         return;
    1079             : 
    1080           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_FUNCSTAT);
    1081           0 :     msg.m_databaseid = MyDatabaseId;
    1082           0 :     msg.m_nentries = 0;
    1083             : 
    1084           0 :     hash_seq_init(&fstat, pgStatFunctions);
    1085           0 :     while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
    1086             :     {
    1087             :         PgStat_FunctionEntry *m_ent;
    1088             : 
    1089             :         /* Skip it if no counts accumulated since last time */
    1090           0 :         if (memcmp(&entry->f_counts, &all_zeroes,
    1091             :                    sizeof(PgStat_FunctionCounts)) == 0)
    1092           0 :             continue;
    1093             : 
    1094             :         /* need to convert format of time accumulators */
    1095           0 :         m_ent = &msg.m_entry[msg.m_nentries];
    1096           0 :         m_ent->f_id = entry->f_id;
    1097           0 :         m_ent->f_numcalls = entry->f_counts.f_numcalls;
    1098           0 :         m_ent->f_total_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_total_time);
    1099           0 :         m_ent->f_self_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_self_time);
    1100             : 
    1101           0 :         if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
    1102             :         {
    1103           0 :             pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
    1104           0 :                         msg.m_nentries * sizeof(PgStat_FunctionEntry));
    1105           0 :             msg.m_nentries = 0;
    1106             :         }
    1107             : 
    1108             :         /* reset the entry's counts */
    1109           0 :         MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
    1110             :     }
    1111             : 
    1112           0 :     if (msg.m_nentries > 0)
    1113           0 :         pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
    1114           0 :                     msg.m_nentries * sizeof(PgStat_FunctionEntry));
    1115             : 
    1116           0 :     have_function_stats = false;
    1117             : }
    1118             : 
    1119             : 
    1120             : /* ----------
    1121             :  * pgstat_vacuum_stat() -
    1122             :  *
    1123             :  *  Will tell the collector about objects he can get rid of.
    1124             :  * ----------
    1125             :  */
    1126             : void
    1127        4096 : pgstat_vacuum_stat(void)
    1128             : {
    1129             :     HTAB       *htab;
    1130             :     PgStat_MsgTabpurge msg;
    1131             :     PgStat_MsgFuncpurge f_msg;
    1132             :     HASH_SEQ_STATUS hstat;
    1133             :     PgStat_StatDBEntry *dbentry;
    1134             :     PgStat_StatTabEntry *tabentry;
    1135             :     PgStat_StatFuncEntry *funcentry;
    1136             :     int         len;
    1137             : 
    1138        4096 :     if (pgStatSock == PGINVALID_SOCKET)
    1139        1516 :         return;
    1140             : 
    1141             :     /*
    1142             :      * If not done for this transaction, read the statistics collector stats
    1143             :      * file into some hash tables.
    1144             :      */
    1145        3132 :     backend_read_statsfile();
    1146             : 
    1147             :     /*
    1148             :      * Read pg_database and make a list of OIDs of all existing databases
    1149             :      */
    1150        3132 :     htab = pgstat_collect_oids(DatabaseRelationId, Anum_pg_database_oid);
    1151             : 
    1152             :     /*
    1153             :      * Search the database hash table for dead databases and tell the
    1154             :      * collector to drop them.
    1155             :      */
    1156        3132 :     hash_seq_init(&hstat, pgStatDBHash);
    1157        9548 :     while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
    1158             :     {
    1159        6416 :         Oid         dbid = dbentry->databaseid;
    1160             : 
    1161        6416 :         CHECK_FOR_INTERRUPTS();
    1162             : 
    1163             :         /* the DB entry for shared tables (with InvalidOid) is never dropped */
    1164       10252 :         if (OidIsValid(dbid) &&
    1165        3836 :             hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
    1166           0 :             pgstat_drop_database(dbid);
    1167             :     }
    1168             : 
    1169             :     /* Clean up */
    1170        3132 :     hash_destroy(htab);
    1171             : 
    1172             :     /*
    1173             :      * Search for all the dead replication slots in stats hashtable and tell
    1174             :      * the stats collector to drop them.
    1175             :      */
    1176        3132 :     if (replSlotStatHash)
    1177             :     {
    1178             :         PgStat_StatReplSlotEntry *slotentry;
    1179             : 
    1180          40 :         hash_seq_init(&hstat, replSlotStatHash);
    1181          80 :         while ((slotentry = (PgStat_StatReplSlotEntry *) hash_seq_search(&hstat)) != NULL)
    1182             :         {
    1183          40 :             CHECK_FOR_INTERRUPTS();
    1184             : 
    1185          40 :             if (SearchNamedReplicationSlot(NameStr(slotentry->slotname), true) == NULL)
    1186           0 :                 pgstat_report_replslot_drop(NameStr(slotentry->slotname));
    1187             :         }
    1188             :     }
    1189             : 
    1190             :     /*
    1191             :      * Lookup our own database entry; if not found, nothing more to do.
    1192             :      */
    1193        3132 :     dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
    1194             :                                                  (void *) &MyDatabaseId,
    1195             :                                                  HASH_FIND, NULL);
    1196        3132 :     if (dbentry == NULL || dbentry->tables == NULL)
    1197         552 :         return;
    1198             : 
    1199             :     /*
    1200             :      * Similarly to above, make a list of all known relations in this DB.
    1201             :      */
    1202        2580 :     htab = pgstat_collect_oids(RelationRelationId, Anum_pg_class_oid);
    1203             : 
    1204             :     /*
    1205             :      * Initialize our messages table counter to zero
    1206             :      */
    1207        2580 :     msg.m_nentries = 0;
    1208             : 
    1209             :     /*
    1210             :      * Check for all tables listed in stats hashtable if they still exist.
    1211             :      */
    1212        2580 :     hash_seq_init(&hstat, dbentry->tables);
    1213      633388 :     while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
    1214             :     {
    1215      630808 :         Oid         tabid = tabentry->tableid;
    1216             : 
    1217      630808 :         CHECK_FOR_INTERRUPTS();
    1218             : 
    1219      630808 :         if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
    1220      525408 :             continue;
    1221             : 
    1222             :         /*
    1223             :          * Not there, so add this table's Oid to the message
    1224             :          */
    1225      105400 :         msg.m_tableid[msg.m_nentries++] = tabid;
    1226             : 
    1227             :         /*
    1228             :          * If the message is full, send it out and reinitialize to empty
    1229             :          */
    1230      105400 :         if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
    1231             :         {
    1232         222 :             len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
    1233         222 :                 + msg.m_nentries * sizeof(Oid);
    1234             : 
    1235         222 :             pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
    1236         222 :             msg.m_databaseid = MyDatabaseId;
    1237         222 :             pgstat_send(&msg, len);
    1238             : 
    1239         222 :             msg.m_nentries = 0;
    1240             :         }
    1241             :     }
    1242             : 
    1243             :     /*
    1244             :      * Send the rest
    1245             :      */
    1246        2580 :     if (msg.m_nentries > 0)
    1247             :     {
    1248         854 :         len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
    1249         854 :             + msg.m_nentries * sizeof(Oid);
    1250             : 
    1251         854 :         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
    1252         854 :         msg.m_databaseid = MyDatabaseId;
    1253         854 :         pgstat_send(&msg, len);
    1254             :     }
    1255             : 
    1256             :     /* Clean up */
    1257        2580 :     hash_destroy(htab);
    1258             : 
    1259             :     /*
    1260             :      * Now repeat the above steps for functions.  However, we needn't bother
    1261             :      * in the common case where no function stats are being collected.
    1262             :      */
    1263        5160 :     if (dbentry->functions != NULL &&
    1264        2580 :         hash_get_num_entries(dbentry->functions) > 0)
    1265             :     {
    1266           0 :         htab = pgstat_collect_oids(ProcedureRelationId, Anum_pg_proc_oid);
    1267             : 
    1268           0 :         pgstat_setheader(&f_msg.m_hdr, PGSTAT_MTYPE_FUNCPURGE);
    1269           0 :         f_msg.m_databaseid = MyDatabaseId;
    1270           0 :         f_msg.m_nentries = 0;
    1271             : 
    1272           0 :         hash_seq_init(&hstat, dbentry->functions);
    1273           0 :         while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
    1274             :         {
    1275           0 :             Oid         funcid = funcentry->functionid;
    1276             : 
    1277           0 :             CHECK_FOR_INTERRUPTS();
    1278             : 
    1279           0 :             if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
    1280           0 :                 continue;
    1281             : 
    1282             :             /*
    1283             :              * Not there, so add this function's Oid to the message
    1284             :              */
    1285           0 :             f_msg.m_functionid[f_msg.m_nentries++] = funcid;
    1286             : 
    1287             :             /*
    1288             :              * If the message is full, send it out and reinitialize to empty
    1289             :              */
    1290           0 :             if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
    1291             :             {
    1292           0 :                 len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
    1293           0 :                     + f_msg.m_nentries * sizeof(Oid);
    1294             : 
    1295           0 :                 pgstat_send(&f_msg, len);
    1296             : 
    1297           0 :                 f_msg.m_nentries = 0;
    1298             :             }
    1299             :         }
    1300             : 
    1301             :         /*
    1302             :          * Send the rest
    1303             :          */
    1304           0 :         if (f_msg.m_nentries > 0)
    1305             :         {
    1306           0 :             len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
    1307           0 :                 + f_msg.m_nentries * sizeof(Oid);
    1308             : 
    1309           0 :             pgstat_send(&f_msg, len);
    1310             :         }
    1311             : 
    1312           0 :         hash_destroy(htab);
    1313             :     }
    1314             : 
    1315             :     /*
    1316             :      * Repeat for subscription workers.  Similarly, we needn't bother in the
    1317             :      * common case where no subscription workers' stats are being collected.
    1318             :      */
    1319        5160 :     if (dbentry->subworkers != NULL &&
    1320        2580 :         hash_get_num_entries(dbentry->subworkers) > 0)
    1321             :     {
    1322             :         PgStat_StatSubWorkerEntry *subwentry;
    1323             :         PgStat_MsgSubscriptionPurge spmsg;
    1324             : 
    1325             :         /*
    1326             :          * Read pg_subscription and make a list of OIDs of all existing
    1327             :          * subscriptions
    1328             :          */
    1329           0 :         htab = pgstat_collect_oids(SubscriptionRelationId, Anum_pg_subscription_oid);
    1330             : 
    1331           0 :         spmsg.m_databaseid = MyDatabaseId;
    1332           0 :         spmsg.m_nentries = 0;
    1333             : 
    1334           0 :         hash_seq_init(&hstat, dbentry->subworkers);
    1335           0 :         while ((subwentry = (PgStat_StatSubWorkerEntry *) hash_seq_search(&hstat)) != NULL)
    1336             :         {
    1337           0 :             bool        exists = false;
    1338           0 :             Oid         subid = subwentry->key.subid;
    1339             : 
    1340           0 :             CHECK_FOR_INTERRUPTS();
    1341             : 
    1342           0 :             if (hash_search(htab, (void *) &subid, HASH_FIND, NULL) != NULL)
    1343           0 :                 continue;
    1344             : 
    1345             :             /*
    1346             :              * It is possible that we have multiple entries for the
    1347             :              * subscription corresponding to apply worker and tablesync
    1348             :              * workers. In such cases, we don't need to add the same subid
    1349             :              * again.
    1350             :              */
    1351           0 :             for (int i = 0; i < spmsg.m_nentries; i++)
    1352             :             {
    1353           0 :                 if (spmsg.m_subids[i] == subid)
    1354             :                 {
    1355           0 :                     exists = true;
    1356           0 :                     break;
    1357             :                 }
    1358             :             }
    1359             : 
    1360           0 :             if (exists)
    1361           0 :                 continue;
    1362             : 
    1363             :             /* This subscription is dead, add the subid to the message */
    1364           0 :             spmsg.m_subids[spmsg.m_nentries++] = subid;
    1365             : 
    1366             :             /*
    1367             :              * If the message is full, send it out and reinitialize to empty
    1368             :              */
    1369           0 :             if (spmsg.m_nentries >= PGSTAT_NUM_SUBSCRIPTIONPURGE)
    1370             :             {
    1371           0 :                 pgstat_send_subscription_purge(&spmsg);
    1372           0 :                 spmsg.m_nentries = 0;
    1373             :             }
    1374             :         }
    1375             : 
    1376             :         /* Send the rest of dead subscriptions */
    1377           0 :         if (spmsg.m_nentries > 0)
    1378           0 :             pgstat_send_subscription_purge(&spmsg);
    1379             : 
    1380           0 :         hash_destroy(htab);
    1381             :     }
    1382             : }
    1383             : 
    1384             : 
    1385             : /* ----------
    1386             :  * pgstat_collect_oids() -
    1387             :  *
    1388             :  *  Collect the OIDs of all objects listed in the specified system catalog
    1389             :  *  into a temporary hash table.  Caller should hash_destroy the result
    1390             :  *  when done with it.  (However, we make the table in CurrentMemoryContext
    1391             :  *  so that it will be freed properly in event of an error.)
    1392             :  * ----------
    1393             :  */
    1394             : static HTAB *
    1395        5712 : pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
    1396             : {
    1397             :     HTAB       *htab;
    1398             :     HASHCTL     hash_ctl;
    1399             :     Relation    rel;
    1400             :     TableScanDesc scan;
    1401             :     HeapTuple   tup;
    1402             :     Snapshot    snapshot;
    1403             : 
    1404        5712 :     hash_ctl.keysize = sizeof(Oid);
    1405        5712 :     hash_ctl.entrysize = sizeof(Oid);
    1406        5712 :     hash_ctl.hcxt = CurrentMemoryContext;
    1407        5712 :     htab = hash_create("Temporary table of OIDs",
    1408             :                        PGSTAT_TAB_HASH_SIZE,
    1409             :                        &hash_ctl,
    1410             :                        HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    1411             : 
    1412        5712 :     rel = table_open(catalogid, AccessShareLock);
    1413        5712 :     snapshot = RegisterSnapshot(GetLatestSnapshot());
    1414        5712 :     scan = table_beginscan(rel, snapshot, 0, NULL);
    1415     1252256 :     while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1416             :     {
    1417             :         Oid         thisoid;
    1418             :         bool        isnull;
    1419             : 
    1420     1246544 :         thisoid = heap_getattr(tup, anum_oid, RelationGetDescr(rel), &isnull);
    1421             :         Assert(!isnull);
    1422             : 
    1423     1246544 :         CHECK_FOR_INTERRUPTS();
    1424             : 
    1425     1246544 :         (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
    1426             :     }
    1427        5712 :     table_endscan(scan);
    1428        5712 :     UnregisterSnapshot(snapshot);
    1429        5712 :     table_close(rel, AccessShareLock);
    1430             : 
    1431        5712 :     return htab;
    1432             : }
    1433             : 
    1434             : 
    1435             : /* ----------
    1436             :  * pgstat_drop_database() -
    1437             :  *
    1438             :  *  Tell the collector that we just dropped a database.
    1439             :  *  (If the message gets lost, we will still clean the dead DB eventually
    1440             :  *  via future invocations of pgstat_vacuum_stat().)
    1441             :  * ----------
    1442             :  */
    1443             : void
    1444          18 : pgstat_drop_database(Oid databaseid)
    1445             : {
    1446             :     PgStat_MsgDropdb msg;
    1447             : 
    1448          18 :     if (pgStatSock == PGINVALID_SOCKET)
    1449           0 :         return;
    1450             : 
    1451          18 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
    1452          18 :     msg.m_databaseid = databaseid;
    1453          18 :     pgstat_send(&msg, sizeof(msg));
    1454             : }
    1455             : 
    1456             : 
    1457             : /* ----------
    1458             :  * pgstat_drop_relation() -
    1459             :  *
    1460             :  *  Tell the collector that we just dropped a relation.
    1461             :  *  (If the message gets lost, we will still clean the dead entry eventually
    1462             :  *  via future invocations of pgstat_vacuum_stat().)
    1463             :  *
    1464             :  *  Currently not used for lack of any good place to call it; we rely
    1465             :  *  entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
    1466             :  * ----------
    1467             :  */
    1468             : #ifdef NOT_USED
    1469             : void
    1470             : pgstat_drop_relation(Oid relid)
    1471             : {
    1472             :     PgStat_MsgTabpurge msg;
    1473             :     int         len;
    1474             : 
    1475             :     if (pgStatSock == PGINVALID_SOCKET)
    1476             :         return;
    1477             : 
    1478             :     msg.m_tableid[0] = relid;
    1479             :     msg.m_nentries = 1;
    1480             : 
    1481             :     len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) + sizeof(Oid);
    1482             : 
    1483             :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
    1484             :     msg.m_databaseid = MyDatabaseId;
    1485             :     pgstat_send(&msg, len);
    1486             : }
    1487             : #endif                          /* NOT_USED */
    1488             : 
    1489             : /* ----------
    1490             :  * pgstat_reset_counters() -
    1491             :  *
    1492             :  *  Tell the statistics collector to reset counters for our database.
    1493             :  *
    1494             :  *  Permission checking for this function is managed through the normal
    1495             :  *  GRANT system.
    1496             :  * ----------
    1497             :  */
    1498             : void
    1499           0 : pgstat_reset_counters(void)
    1500             : {
    1501             :     PgStat_MsgResetcounter msg;
    1502             : 
    1503           0 :     if (pgStatSock == PGINVALID_SOCKET)
    1504           0 :         return;
    1505             : 
    1506           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
    1507           0 :     msg.m_databaseid = MyDatabaseId;
    1508           0 :     pgstat_send(&msg, sizeof(msg));
    1509             : }
    1510             : 
    1511             : /* ----------
    1512             :  * pgstat_reset_shared_counters() -
    1513             :  *
    1514             :  *  Tell the statistics collector to reset cluster-wide shared counters.
    1515             :  *
    1516             :  *  Permission checking for this function is managed through the normal
    1517             :  *  GRANT system.
    1518             :  * ----------
    1519             :  */
    1520             : void
    1521           0 : pgstat_reset_shared_counters(const char *target)
    1522             : {
    1523             :     PgStat_MsgResetsharedcounter msg;
    1524             : 
    1525           0 :     if (pgStatSock == PGINVALID_SOCKET)
    1526           0 :         return;
    1527             : 
    1528           0 :     if (strcmp(target, "archiver") == 0)
    1529           0 :         msg.m_resettarget = RESET_ARCHIVER;
    1530           0 :     else if (strcmp(target, "bgwriter") == 0)
    1531           0 :         msg.m_resettarget = RESET_BGWRITER;
    1532           0 :     else if (strcmp(target, "wal") == 0)
    1533           0 :         msg.m_resettarget = RESET_WAL;
    1534             :     else
    1535           0 :         ereport(ERROR,
    1536             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1537             :                  errmsg("unrecognized reset target: \"%s\"", target),
    1538             :                  errhint("Target must be \"archiver\", \"bgwriter\", or \"wal\".")));
    1539             : 
    1540           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
    1541           0 :     pgstat_send(&msg, sizeof(msg));
    1542             : }
    1543             : 
    1544             : /* ----------
    1545             :  * pgstat_reset_single_counter() -
    1546             :  *
    1547             :  *  Tell the statistics collector to reset a single counter.
    1548             :  *
    1549             :  *  Permission checking for this function is managed through the normal
    1550             :  *  GRANT system.
    1551             :  * ----------
    1552             :  */
    1553             : void
    1554           0 : pgstat_reset_single_counter(Oid objoid, Oid subobjoid,
    1555             :                             PgStat_Single_Reset_Type type)
    1556             : {
    1557             :     PgStat_MsgResetsinglecounter msg;
    1558             : 
    1559           0 :     if (pgStatSock == PGINVALID_SOCKET)
    1560           0 :         return;
    1561             : 
    1562           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSINGLECOUNTER);
    1563           0 :     msg.m_databaseid = MyDatabaseId;
    1564           0 :     msg.m_resettype = type;
    1565           0 :     msg.m_objectid = objoid;
    1566           0 :     msg.m_subobjectid = subobjoid;
    1567             : 
    1568           0 :     pgstat_send(&msg, sizeof(msg));
    1569             : }
    1570             : 
    1571             : /* ----------
    1572             :  * pgstat_reset_slru_counter() -
    1573             :  *
    1574             :  *  Tell the statistics collector to reset a single SLRU counter, or all
    1575             :  *  SLRU counters (when name is null).
    1576             :  *
    1577             :  *  Permission checking for this function is managed through the normal
    1578             :  *  GRANT system.
    1579             :  * ----------
    1580             :  */
    1581             : void
    1582           0 : pgstat_reset_slru_counter(const char *name)
    1583             : {
    1584             :     PgStat_MsgResetslrucounter msg;
    1585             : 
    1586           0 :     if (pgStatSock == PGINVALID_SOCKET)
    1587           0 :         return;
    1588             : 
    1589           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSLRUCOUNTER);
    1590           0 :     msg.m_index = (name) ? pgstat_slru_index(name) : -1;
    1591             : 
    1592           0 :     pgstat_send(&msg, sizeof(msg));
    1593             : }
    1594             : 
    1595             : /* ----------
    1596             :  * pgstat_reset_replslot_counter() -
    1597             :  *
    1598             :  *  Tell the statistics collector to reset a single replication slot
    1599             :  *  counter, or all replication slots counters (when name is null).
    1600             :  *
    1601             :  *  Permission checking for this function is managed through the normal
    1602             :  *  GRANT system.
    1603             :  * ----------
    1604             :  */
    1605             : void
    1606           2 : pgstat_reset_replslot_counter(const char *name)
    1607             : {
    1608             :     PgStat_MsgResetreplslotcounter msg;
    1609             : 
    1610           2 :     if (pgStatSock == PGINVALID_SOCKET)
    1611           0 :         return;
    1612             : 
    1613           2 :     if (name)
    1614             :     {
    1615           2 :         namestrcpy(&msg.m_slotname, name);
    1616           2 :         msg.clearall = false;
    1617             :     }
    1618             :     else
    1619           0 :         msg.clearall = true;
    1620             : 
    1621           2 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETREPLSLOTCOUNTER);
    1622             : 
    1623           2 :     pgstat_send(&msg, sizeof(msg));
    1624             : }
    1625             : 
    1626             : /* ----------
    1627             :  * pgstat_report_autovac() -
    1628             :  *
    1629             :  *  Called from autovacuum.c to report startup of an autovacuum process.
    1630             :  *  We are called before InitPostgres is done, so can't rely on MyDatabaseId;
    1631             :  *  the db OID must be passed in, instead.
    1632             :  * ----------
    1633             :  */
    1634             : void
    1635          58 : pgstat_report_autovac(Oid dboid)
    1636             : {
    1637             :     PgStat_MsgAutovacStart msg;
    1638             : 
    1639          58 :     if (pgStatSock == PGINVALID_SOCKET)
    1640           0 :         return;
    1641             : 
    1642          58 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
    1643          58 :     msg.m_databaseid = dboid;
    1644          58 :     msg.m_start_time = GetCurrentTimestamp();
    1645             : 
    1646          58 :     pgstat_send(&msg, sizeof(msg));
    1647             : }
    1648             : 
    1649             : 
    1650             : /* ---------
    1651             :  * pgstat_report_vacuum() -
    1652             :  *
    1653             :  *  Tell the collector about the table we just vacuumed.
    1654             :  * ---------
    1655             :  */
    1656             : void
    1657       57378 : pgstat_report_vacuum(Oid tableoid, bool shared,
    1658             :                      PgStat_Counter livetuples, PgStat_Counter deadtuples)
    1659             : {
    1660             :     PgStat_MsgVacuum msg;
    1661             : 
    1662       57378 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1663       51574 :         return;
    1664             : 
    1665        5804 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
    1666        5804 :     msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
    1667        5804 :     msg.m_tableoid = tableoid;
    1668        5804 :     msg.m_autovacuum = IsAutoVacuumWorkerProcess();
    1669        5804 :     msg.m_vacuumtime = GetCurrentTimestamp();
    1670        5804 :     msg.m_live_tuples = livetuples;
    1671        5804 :     msg.m_dead_tuples = deadtuples;
    1672        5804 :     pgstat_send(&msg, sizeof(msg));
    1673             : }
    1674             : 
    1675             : /* --------
    1676             :  * pgstat_report_analyze() -
    1677             :  *
    1678             :  *  Tell the collector about the table we just analyzed.
    1679             :  *
    1680             :  * Caller must provide new live- and dead-tuples estimates, as well as a
    1681             :  * flag indicating whether to reset the changes_since_analyze counter.
    1682             :  * --------
    1683             :  */
    1684             : void
    1685       37212 : pgstat_report_analyze(Relation rel,
    1686             :                       PgStat_Counter livetuples, PgStat_Counter deadtuples,
    1687             :                       bool resetcounter)
    1688             : {
    1689             :     PgStat_MsgAnalyze msg;
    1690             : 
    1691       37212 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1692       31812 :         return;
    1693             : 
    1694             :     /*
    1695             :      * Unlike VACUUM, ANALYZE might be running inside a transaction that has
    1696             :      * already inserted and/or deleted rows in the target table. ANALYZE will
    1697             :      * have counted such rows as live or dead respectively. Because we will
    1698             :      * report our counts of such rows at transaction end, we should subtract
    1699             :      * off these counts from what we send to the collector now, else they'll
    1700             :      * be double-counted after commit.  (This approach also ensures that the
    1701             :      * collector ends up with the right numbers if we abort instead of
    1702             :      * committing.)
    1703             :      *
    1704             :      * Waste no time on partitioned tables, though.
    1705             :      */
    1706        5400 :     if (rel->pgstat_info != NULL &&
    1707        5352 :         rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
    1708             :     {
    1709             :         PgStat_TableXactStatus *trans;
    1710             : 
    1711        5008 :         for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
    1712             :         {
    1713          70 :             livetuples -= trans->tuples_inserted - trans->tuples_deleted;
    1714          70 :             deadtuples -= trans->tuples_updated + trans->tuples_deleted;
    1715             :         }
    1716             :         /* count stuff inserted by already-aborted subxacts, too */
    1717        4938 :         deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
    1718             :         /* Since ANALYZE's counts are estimates, we could have underflowed */
    1719        4938 :         livetuples = Max(livetuples, 0);
    1720        4938 :         deadtuples = Max(deadtuples, 0);
    1721             :     }
    1722             : 
    1723        5400 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
    1724        5400 :     msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
    1725        5400 :     msg.m_tableoid = RelationGetRelid(rel);
    1726        5400 :     msg.m_autovacuum = IsAutoVacuumWorkerProcess();
    1727        5400 :     msg.m_resetcounter = resetcounter;
    1728        5400 :     msg.m_analyzetime = GetCurrentTimestamp();
    1729        5400 :     msg.m_live_tuples = livetuples;
    1730        5400 :     msg.m_dead_tuples = deadtuples;
    1731        5400 :     pgstat_send(&msg, sizeof(msg));
    1732             : }
    1733             : 
    1734             : /* --------
    1735             :  * pgstat_report_recovery_conflict() -
    1736             :  *
    1737             :  *  Tell the collector about a Hot Standby recovery conflict.
    1738             :  * --------
    1739             :  */
    1740             : void
    1741           0 : pgstat_report_recovery_conflict(int reason)
    1742             : {
    1743             :     PgStat_MsgRecoveryConflict msg;
    1744             : 
    1745           0 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1746           0 :         return;
    1747             : 
    1748           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
    1749           0 :     msg.m_databaseid = MyDatabaseId;
    1750           0 :     msg.m_reason = reason;
    1751           0 :     pgstat_send(&msg, sizeof(msg));
    1752             : }
    1753             : 
    1754             : /* --------
    1755             :  * pgstat_report_deadlock() -
    1756             :  *
    1757             :  *  Tell the collector about a deadlock detected.
    1758             :  * --------
    1759             :  */
    1760             : void
    1761           4 : pgstat_report_deadlock(void)
    1762             : {
    1763             :     PgStat_MsgDeadlock msg;
    1764             : 
    1765           4 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1766           0 :         return;
    1767             : 
    1768           4 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DEADLOCK);
    1769           4 :     msg.m_databaseid = MyDatabaseId;
    1770           4 :     pgstat_send(&msg, sizeof(msg));
    1771             : }
    1772             : 
    1773             : 
    1774             : 
    1775             : /* --------
    1776             :  * pgstat_report_checksum_failures_in_db() -
    1777             :  *
    1778             :  *  Tell the collector about one or more checksum failures.
    1779             :  * --------
    1780             :  */
    1781             : void
    1782           4 : pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
    1783             : {
    1784             :     PgStat_MsgChecksumFailure msg;
    1785             : 
    1786           4 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1787           0 :         return;
    1788             : 
    1789           4 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CHECKSUMFAILURE);
    1790           4 :     msg.m_databaseid = dboid;
    1791           4 :     msg.m_failurecount = failurecount;
    1792           4 :     msg.m_failure_time = GetCurrentTimestamp();
    1793             : 
    1794           4 :     pgstat_send(&msg, sizeof(msg));
    1795             : }
    1796             : 
    1797             : /* --------
    1798             :  * pgstat_report_checksum_failure() -
    1799             :  *
    1800             :  *  Tell the collector about a checksum failure.
    1801             :  * --------
    1802             :  */
    1803             : void
    1804           0 : pgstat_report_checksum_failure(void)
    1805             : {
    1806           0 :     pgstat_report_checksum_failures_in_db(MyDatabaseId, 1);
    1807           0 : }
    1808             : 
    1809             : /* --------
    1810             :  * pgstat_report_tempfile() -
    1811             :  *
    1812             :  *  Tell the collector about a temporary file.
    1813             :  * --------
    1814             :  */
    1815             : void
    1816        3414 : pgstat_report_tempfile(size_t filesize)
    1817             : {
    1818             :     PgStat_MsgTempFile msg;
    1819             : 
    1820        3414 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    1821           0 :         return;
    1822             : 
    1823        3414 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TEMPFILE);
    1824        3414 :     msg.m_databaseid = MyDatabaseId;
    1825        3414 :     msg.m_filesize = filesize;
    1826        3414 :     pgstat_send(&msg, sizeof(msg));
    1827             : }
    1828             : 
    1829             : /* --------
    1830             :  * pgstat_report_connect() -
    1831             :  *
    1832             :  *  Tell the collector about a new connection.
    1833             :  * --------
    1834             :  */
    1835             : void
    1836       12222 : pgstat_report_connect(Oid dboid)
    1837             : {
    1838             :     PgStat_MsgConnect msg;
    1839             : 
    1840       12222 :     if (!pgstat_should_report_connstat())
    1841        1458 :         return;
    1842             : 
    1843       10764 :     pgLastSessionReportTime = MyStartTimestamp;
    1844             : 
    1845       10764 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
    1846       10764 :     msg.m_databaseid = MyDatabaseId;
    1847       10764 :     pgstat_send(&msg, sizeof(PgStat_MsgConnect));
    1848             : }
    1849             : 
    1850             : /* --------
    1851             :  * pgstat_report_disconnect() -
    1852             :  *
    1853             :  *  Tell the collector about a disconnect.
    1854             :  * --------
    1855             :  */
    1856             : static void
    1857       14272 : pgstat_report_disconnect(Oid dboid)
    1858             : {
    1859             :     PgStat_MsgDisconnect msg;
    1860             : 
    1861       14272 :     if (!pgstat_should_report_connstat())
    1862        3508 :         return;
    1863             : 
    1864       10764 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
    1865       10764 :     msg.m_databaseid = MyDatabaseId;
    1866       10764 :     msg.m_cause = pgStatSessionEndCause;
    1867       10764 :     pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
    1868             : }
    1869             : 
    1870             : /* --------
    1871             :  * pgstat_should_report_connstats() -
    1872             :  *
    1873             :  *  We report session statistics only for normal backend processes.  Parallel
    1874             :  *  workers run in parallel, so they don't contribute to session times, even
    1875             :  *  though they use CPU time. Walsender processes could be considered here,
    1876             :  *  but they have different session characteristics from normal backends (for
    1877             :  *  example, they are always "active"), so they would skew session statistics.
    1878             :  * ----------
    1879             :  */
    1880             : static bool
    1881      121688 : pgstat_should_report_connstat(void)
    1882             : {
    1883      121688 :     return MyBackendType == B_BACKEND;
    1884             : }
    1885             : 
    1886             : /* ----------
    1887             :  * pgstat_report_replslot() -
    1888             :  *
    1889             :  *  Tell the collector about replication slot statistics.
    1890             :  * ----------
    1891             :  */
    1892             : void
    1893        7830 : pgstat_report_replslot(const PgStat_StatReplSlotEntry *repSlotStat)
    1894             : {
    1895             :     PgStat_MsgReplSlot msg;
    1896             : 
    1897             :     /*
    1898             :      * Prepare and send the message
    1899             :      */
    1900        7830 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REPLSLOT);
    1901        7830 :     namestrcpy(&msg.m_slotname, NameStr(repSlotStat->slotname));
    1902        7830 :     msg.m_create = false;
    1903        7830 :     msg.m_drop = false;
    1904        7830 :     msg.m_spill_txns = repSlotStat->spill_txns;
    1905        7830 :     msg.m_spill_count = repSlotStat->spill_count;
    1906        7830 :     msg.m_spill_bytes = repSlotStat->spill_bytes;
    1907        7830 :     msg.m_stream_txns = repSlotStat->stream_txns;
    1908        7830 :     msg.m_stream_count = repSlotStat->stream_count;
    1909        7830 :     msg.m_stream_bytes = repSlotStat->stream_bytes;
    1910        7830 :     msg.m_total_txns = repSlotStat->total_txns;
    1911        7830 :     msg.m_total_bytes = repSlotStat->total_bytes;
    1912        7830 :     pgstat_send(&msg, sizeof(PgStat_MsgReplSlot));
    1913        7830 : }
    1914             : 
    1915             : /* ----------
    1916             :  * pgstat_report_replslot_create() -
    1917             :  *
    1918             :  *  Tell the collector about creating the replication slot.
    1919             :  * ----------
    1920             :  */
    1921             : void
    1922         434 : pgstat_report_replslot_create(const char *slotname)
    1923             : {
    1924             :     PgStat_MsgReplSlot msg;
    1925             : 
    1926         434 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REPLSLOT);
    1927         434 :     namestrcpy(&msg.m_slotname, slotname);
    1928         434 :     msg.m_create = true;
    1929         434 :     msg.m_drop = false;
    1930         434 :     pgstat_send(&msg, sizeof(PgStat_MsgReplSlot));
    1931         434 : }
    1932             : 
    1933             : /* ----------
    1934             :  * pgstat_report_replslot_drop() -
    1935             :  *
    1936             :  *  Tell the collector about dropping the replication slot.
    1937             :  * ----------
    1938             :  */
    1939             : void
    1940         372 : pgstat_report_replslot_drop(const char *slotname)
    1941             : {
    1942             :     PgStat_MsgReplSlot msg;
    1943             : 
    1944         372 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REPLSLOT);
    1945         372 :     namestrcpy(&msg.m_slotname, slotname);
    1946         372 :     msg.m_create = false;
    1947         372 :     msg.m_drop = true;
    1948         372 :     pgstat_send(&msg, sizeof(PgStat_MsgReplSlot));
    1949         372 : }
    1950             : 
    1951             : /* ----------
    1952             :  * pgstat_report_subworker_error() -
    1953             :  *
    1954             :  *  Tell the collector about the subscription worker error.
    1955             :  * ----------
    1956             :  */
    1957             : void
    1958          14 : pgstat_report_subworker_error(Oid subid, Oid subrelid, Oid relid,
    1959             :                               LogicalRepMsgType command, TransactionId xid,
    1960             :                               const char *errmsg)
    1961             : {
    1962             :     PgStat_MsgSubWorkerError msg;
    1963             :     int         len;
    1964             : 
    1965          14 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_SUBWORKERERROR);
    1966          14 :     msg.m_databaseid = MyDatabaseId;
    1967          14 :     msg.m_subid = subid;
    1968          14 :     msg.m_subrelid = subrelid;
    1969          14 :     msg.m_relid = relid;
    1970          14 :     msg.m_command = command;
    1971          14 :     msg.m_xid = xid;
    1972          14 :     msg.m_timestamp = GetCurrentTimestamp();
    1973          14 :     strlcpy(msg.m_message, errmsg, PGSTAT_SUBWORKERERROR_MSGLEN);
    1974             : 
    1975          14 :     len = offsetof(PgStat_MsgSubWorkerError, m_message) + strlen(msg.m_message) + 1;
    1976          14 :     pgstat_send(&msg, len);
    1977          14 : }
    1978             : 
    1979             : /* ----------
    1980             :  * pgstat_report_subscription_drop() -
    1981             :  *
    1982             :  *  Tell the collector about dropping the subscription.
    1983             :  * ----------
    1984             :  */
    1985             : void
    1986          34 : pgstat_report_subscription_drop(Oid subid)
    1987             : {
    1988             :     PgStat_MsgSubscriptionPurge msg;
    1989             : 
    1990          34 :     msg.m_databaseid = MyDatabaseId;
    1991          34 :     msg.m_subids[0] = subid;
    1992          34 :     msg.m_nentries = 1;
    1993          34 :     pgstat_send_subscription_purge(&msg);
    1994          34 : }
    1995             : 
    1996             : /* ----------
    1997             :  * pgstat_ping() -
    1998             :  *
    1999             :  *  Send some junk data to the collector to increase traffic.
    2000             :  * ----------
    2001             :  */
    2002             : void
    2003           0 : pgstat_ping(void)
    2004             : {
    2005             :     PgStat_MsgDummy msg;
    2006             : 
    2007           0 :     if (pgStatSock == PGINVALID_SOCKET)
    2008           0 :         return;
    2009             : 
    2010           0 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
    2011           0 :     pgstat_send(&msg, sizeof(msg));
    2012             : }
    2013             : 
    2014             : /* ----------
    2015             :  * pgstat_send_inquiry() -
    2016             :  *
    2017             :  *  Notify collector that we need fresh data.
    2018             :  * ----------
    2019             :  */
    2020             : static void
    2021        1136 : pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
    2022             : {
    2023             :     PgStat_MsgInquiry msg;
    2024             : 
    2025        1136 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
    2026        1136 :     msg.clock_time = clock_time;
    2027        1136 :     msg.cutoff_time = cutoff_time;
    2028        1136 :     msg.databaseid = databaseid;
    2029        1136 :     pgstat_send(&msg, sizeof(msg));
    2030        1136 : }
    2031             : 
    2032             : 
    2033             : /*
    2034             :  * Initialize function call usage data.
    2035             :  * Called by the executor before invoking a function.
    2036             :  */
    2037             : void
    2038    17000330 : pgstat_init_function_usage(FunctionCallInfo fcinfo,
    2039             :                            PgStat_FunctionCallUsage *fcu)
    2040             : {
    2041             :     PgStat_BackendFunctionEntry *htabent;
    2042             :     bool        found;
    2043             : 
    2044    17000330 :     if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
    2045             :     {
    2046             :         /* stats not wanted */
    2047    17000330 :         fcu->fs = NULL;
    2048    17000330 :         return;
    2049             :     }
    2050             : 
    2051           0 :     if (!pgStatFunctions)
    2052             :     {
    2053             :         /* First time through - initialize function stat table */
    2054             :         HASHCTL     hash_ctl;
    2055             : 
    2056           0 :         hash_ctl.keysize = sizeof(Oid);
    2057           0 :         hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
    2058           0 :         pgStatFunctions = hash_create("Function stat entries",
    2059             :                                       PGSTAT_FUNCTION_HASH_SIZE,
    2060             :                                       &hash_ctl,
    2061             :                                       HASH_ELEM | HASH_BLOBS);
    2062             :     }
    2063             : 
    2064             :     /* Get the stats entry for this function, create if necessary */
    2065           0 :     htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
    2066             :                           HASH_ENTER, &found);
    2067           0 :     if (!found)
    2068           0 :         MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
    2069             : 
    2070           0 :     fcu->fs = &htabent->f_counts;
    2071             : 
    2072             :     /* save stats for this function, later used to compensate for recursion */
    2073           0 :     fcu->save_f_total_time = htabent->f_counts.f_total_time;
    2074             : 
    2075             :     /* save current backend-wide total time */
    2076           0 :     fcu->save_total = total_func_time;
    2077             : 
    2078             :     /* get clock time as of function start */
    2079           0 :     INSTR_TIME_SET_CURRENT(fcu->f_start);
    2080             : }
    2081             : 
    2082             : /*
    2083             :  * find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
    2084             :  *      for specified function
    2085             :  *
    2086             :  * If no entry, return NULL, don't create a new one
    2087             :  */
    2088             : PgStat_BackendFunctionEntry *
    2089           0 : find_funcstat_entry(Oid func_id)
    2090             : {
    2091           0 :     pgstat_assert_is_up();
    2092             : 
    2093           0 :     if (pgStatFunctions == NULL)
    2094           0 :         return NULL;
    2095             : 
    2096           0 :     return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
    2097             :                                                        (void *) &func_id,
    2098             :                                                        HASH_FIND, NULL);
    2099             : }
    2100             : 
    2101             : /*
    2102             :  * Calculate function call usage and update stat counters.
    2103             :  * Called by the executor after invoking a function.
    2104             :  *
    2105             :  * In the case of a set-returning function that runs in value-per-call mode,
    2106             :  * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
    2107             :  * calls for what the user considers a single call of the function.  The
    2108             :  * finalize flag should be TRUE on the last call.
    2109             :  */
    2110             : void
    2111    16996348 : pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize)
    2112             : {
    2113    16996348 :     PgStat_FunctionCounts *fs = fcu->fs;
    2114             :     instr_time  f_total;
    2115             :     instr_time  f_others;
    2116             :     instr_time  f_self;
    2117             : 
    2118             :     /* stats not wanted? */
    2119    16996348 :     if (fs == NULL)
    2120    16996348 :         return;
    2121             : 
    2122             :     /* total elapsed time in this function call */
    2123           0 :     INSTR_TIME_SET_CURRENT(f_total);
    2124           0 :     INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
    2125             : 
    2126             :     /* self usage: elapsed minus anything already charged to other calls */
    2127           0 :     f_others = total_func_time;
    2128           0 :     INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
    2129           0 :     f_self = f_total;
    2130           0 :     INSTR_TIME_SUBTRACT(f_self, f_others);
    2131             : 
    2132             :     /* update backend-wide total time */
    2133           0 :     INSTR_TIME_ADD(total_func_time, f_self);
    2134             : 
    2135             :     /*
    2136             :      * Compute the new f_total_time as the total elapsed time added to the
    2137             :      * pre-call value of f_total_time.  This is necessary to avoid
    2138             :      * double-counting any time taken by recursive calls of myself.  (We do
    2139             :      * not need any similar kluge for self time, since that already excludes
    2140             :      * any recursive calls.)
    2141             :      */
    2142           0 :     INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
    2143             : 
    2144             :     /* update counters in function stats table */
    2145           0 :     if (finalize)
    2146           0 :         fs->f_numcalls++;
    2147           0 :     fs->f_total_time = f_total;
    2148           0 :     INSTR_TIME_ADD(fs->f_self_time, f_self);
    2149             : 
    2150             :     /* indicate that we have something to send */
    2151           0 :     have_function_stats = true;
    2152             : }
    2153             : 
    2154             : 
    2155             : /* ----------
    2156             :  * pgstat_initstats() -
    2157             :  *
    2158             :  *  Initialize a relcache entry to count access statistics.
    2159             :  *  Called whenever a relation is opened.
    2160             :  *
    2161             :  *  We assume that a relcache entry's pgstat_info field is zeroed by
    2162             :  *  relcache.c when the relcache entry is made; thereafter it is long-lived
    2163             :  *  data.  We can avoid repeated searches of the TabStatus arrays when the
    2164             :  *  same relation is touched repeatedly within a transaction.
    2165             :  * ----------
    2166             :  */
    2167             : void
    2168    35975066 : pgstat_initstats(Relation rel)
    2169             : {
    2170    35975066 :     Oid         rel_id = rel->rd_id;
    2171    35975066 :     char        relkind = rel->rd_rel->relkind;
    2172             : 
    2173             :     /*
    2174             :      * We only count stats for relations with storage and partitioned tables
    2175             :      */
    2176    35975066 :     if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE)
    2177             :     {
    2178      340458 :         rel->pgstat_info = NULL;
    2179      340458 :         return;
    2180             :     }
    2181             : 
    2182    35634608 :     if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
    2183             :     {
    2184             :         /* We're not counting at all */
    2185    18304120 :         rel->pgstat_info = NULL;
    2186    18304120 :         return;
    2187             :     }
    2188             : 
    2189             :     /*
    2190             :      * If we already set up this relation in the current transaction, nothing
    2191             :      * to do.
    2192             :      */
    2193    17330488 :     if (rel->pgstat_info != NULL &&
    2194    16407856 :         rel->pgstat_info->t_id == rel_id)
    2195    16211750 :         return;
    2196             : 
    2197             :     /* Else find or make the PgStat_TableStatus entry, and update link */
    2198     1118738 :     rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
    2199             : }
    2200             : 
    2201             : /*
    2202             :  * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
    2203             :  */
    2204             : static PgStat_TableStatus *
    2205     1119544 : get_tabstat_entry(Oid rel_id, bool isshared)
    2206             : {
    2207             :     TabStatHashEntry *hash_entry;
    2208             :     PgStat_TableStatus *entry;
    2209             :     TabStatusArray *tsa;
    2210             :     bool        found;
    2211             : 
    2212     1119544 :     pgstat_assert_is_up();
    2213             : 
    2214             :     /*
    2215             :      * Create hash table if we don't have it already.
    2216             :      */
    2217     1119544 :     if (pgStatTabHash == NULL)
    2218             :     {
    2219             :         HASHCTL     ctl;
    2220             : 
    2221       27256 :         ctl.keysize = sizeof(Oid);
    2222       27256 :         ctl.entrysize = sizeof(TabStatHashEntry);
    2223             : 
    2224       27256 :         pgStatTabHash = hash_create("pgstat TabStatusArray lookup hash table",
    2225             :                                     TABSTAT_QUANTUM,
    2226             :                                     &ctl,
    2227             :                                     HASH_ELEM | HASH_BLOBS);
    2228             :     }
    2229             : 
    2230             :     /*
    2231             :      * Find an entry or create a new one.
    2232             :      */
    2233     1119544 :     hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
    2234     1119544 :     if (!found)
    2235             :     {
    2236             :         /* initialize new entry with null pointer */
    2237     1004262 :         hash_entry->tsa_entry = NULL;
    2238             :     }
    2239             : 
    2240             :     /*
    2241             :      * If entry is already valid, we're done.
    2242             :      */
    2243     1119544 :     if (hash_entry->tsa_entry)
    2244      115282 :         return hash_entry->tsa_entry;
    2245             : 
    2246             :     /*
    2247             :      * Locate the first pgStatTabList entry with free space, making a new list
    2248             :      * entry if needed.  Note that we could get an OOM failure here, but if so
    2249             :      * we have left the hashtable and the list in a consistent state.
    2250             :      */
    2251     1004262 :     if (pgStatTabList == NULL)
    2252             :     {
    2253             :         /* Set up first pgStatTabList entry */
    2254       14834 :         pgStatTabList = (TabStatusArray *)
    2255       14834 :             MemoryContextAllocZero(TopMemoryContext,
    2256             :                                    sizeof(TabStatusArray));
    2257             :     }
    2258             : 
    2259     1004262 :     tsa = pgStatTabList;
    2260     1148776 :     while (tsa->tsa_used >= TABSTAT_QUANTUM)
    2261             :     {
    2262      144514 :         if (tsa->tsa_next == NULL)
    2263        2152 :             tsa->tsa_next = (TabStatusArray *)
    2264        2152 :                 MemoryContextAllocZero(TopMemoryContext,
    2265             :                                        sizeof(TabStatusArray));
    2266      144514 :         tsa = tsa->tsa_next;
    2267             :     }
    2268             : 
    2269             :     /*
    2270             :      * Allocate a PgStat_TableStatus entry within this list entry.  We assume
    2271             :      * the entry was already zeroed, either at creation or after last use.
    2272             :      */
    2273     1004262 :     entry = &tsa->tsa_entries[tsa->tsa_used++];
    2274     1004262 :     entry->t_id = rel_id;
    2275     1004262 :     entry->t_shared = isshared;
    2276             : 
    2277             :     /*
    2278             :      * Now we can fill the entry in pgStatTabHash.
    2279             :      */
    2280     1004262 :     hash_entry->tsa_entry = entry;
    2281             : 
    2282     1004262 :     return entry;
    2283             : }
    2284             : 
    2285             : /*
    2286             :  * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
    2287             :  *
    2288             :  * If no entry, return NULL, don't create a new one
    2289             :  *
    2290             :  * Note: if we got an error in the most recent execution of pgstat_report_stat,
    2291             :  * it's possible that an entry exists but there's no hashtable entry for it.
    2292             :  * That's okay, we'll treat this case as "doesn't exist".
    2293             :  */
    2294             : PgStat_TableStatus *
    2295           0 : find_tabstat_entry(Oid rel_id)
    2296             : {
    2297             :     TabStatHashEntry *hash_entry;
    2298             : 
    2299             :     /* If hashtable doesn't exist, there are no entries at all */
    2300           0 :     if (!pgStatTabHash)
    2301           0 :         return NULL;
    2302             : 
    2303           0 :     hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
    2304           0 :     if (!hash_entry)
    2305           0 :         return NULL;
    2306             : 
    2307             :     /* Note that this step could also return NULL, but that's correct */
    2308           0 :     return hash_entry->tsa_entry;
    2309             : }
    2310             : 
    2311             : /*
    2312             :  * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
    2313             :  */
    2314             : static PgStat_SubXactStatus *
    2315      311086 : get_tabstat_stack_level(int nest_level)
    2316             : {
    2317             :     PgStat_SubXactStatus *xact_state;
    2318             : 
    2319      311086 :     xact_state = pgStatXactStack;
    2320      311086 :     if (xact_state == NULL || xact_state->nest_level != nest_level)
    2321             :     {
    2322             :         xact_state = (PgStat_SubXactStatus *)
    2323      106952 :             MemoryContextAlloc(TopTransactionContext,
    2324             :                                sizeof(PgStat_SubXactStatus));
    2325      106952 :         xact_state->nest_level = nest_level;
    2326      106952 :         xact_state->prev = pgStatXactStack;
    2327      106952 :         xact_state->first = NULL;
    2328      106952 :         pgStatXactStack = xact_state;
    2329             :     }
    2330      311086 :     return xact_state;
    2331             : }
    2332             : 
    2333             : /*
    2334             :  * add_tabstat_xact_level - add a new (sub)transaction state record
    2335             :  */
    2336             : static void
    2337      309830 : add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
    2338             : {
    2339             :     PgStat_SubXactStatus *xact_state;
    2340             :     PgStat_TableXactStatus *trans;
    2341             : 
    2342             :     /*
    2343             :      * If this is the first rel to be modified at the current nest level, we
    2344             :      * first have to push a transaction stack entry.
    2345             :      */
    2346      309830 :     xact_state = get_tabstat_stack_level(nest_level);
    2347             : 
    2348             :     /* Now make a per-table stack entry */
    2349             :     trans = (PgStat_TableXactStatus *)
    2350      309830 :         MemoryContextAllocZero(TopTransactionContext,
    2351             :                                sizeof(PgStat_TableXactStatus));
    2352      309830 :     trans->nest_level = nest_level;
    2353      309830 :     trans->upper = pgstat_info->trans;
    2354      309830 :     trans->parent = pgstat_info;
    2355      309830 :     trans->next = xact_state->first;
    2356      309830 :     xact_state->first = trans;
    2357      309830 :     pgstat_info->trans = trans;
    2358      309830 : }
    2359             : 
    2360             : /*
    2361             :  * pgstat_count_heap_insert - count a tuple insertion of n tuples
    2362             :  */
    2363             : void
    2364    19472436 : pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
    2365             : {
    2366    19472436 :     PgStat_TableStatus *pgstat_info = rel->pgstat_info;
    2367             : 
    2368    19472436 :     if (pgstat_info != NULL)
    2369             :     {
    2370             :         /* We have to log the effect at the proper transactional level */
    2371    11784536 :         int         nest_level = GetCurrentTransactionNestLevel();
    2372             : 
    2373    11784536 :         if (pgstat_info->trans == NULL ||
    2374    11582086 :             pgstat_info->trans->nest_level != nest_level)
    2375      206344 :             add_tabstat_xact_level(pgstat_info, nest_level);
    2376             : 
    2377    11784536 :         pgstat_info->trans->tuples_inserted += n;
    2378             :     }
    2379    19472436 : }
    2380             : 
    2381             : /*
    2382             :  * pgstat_count_heap_update - count a tuple update
    2383             :  */
    2384             : void
    2385      594982 : pgstat_count_heap_update(Relation rel, bool hot)
    2386             : {
    2387      594982 :     PgStat_TableStatus *pgstat_info = rel->pgstat_info;
    2388             : 
    2389      594982 :     if (pgstat_info != NULL)
    2390             :     {
    2391             :         /* We have to log the effect at the proper transactional level */
    2392      317350 :         int         nest_level = GetCurrentTransactionNestLevel();
    2393             : 
    2394      317350 :         if (pgstat_info->trans == NULL ||
    2395      288246 :             pgstat_info->trans->nest_level != nest_level)
    2396       29176 :             add_tabstat_xact_level(pgstat_info, nest_level);
    2397             : 
    2398      317350 :         pgstat_info->trans->tuples_updated++;
    2399             : 
    2400             :         /* t_tuples_hot_updated is nontransactional, so just advance it */
    2401      317350 :         if (hot)
    2402      146982 :             pgstat_info->t_counts.t_tuples_hot_updated++;
    2403             :     }
    2404      594982 : }
    2405             : 
    2406             : /*
    2407             :  * pgstat_count_heap_delete - count a tuple deletion
    2408             :  */
    2409             : void
    2410     2160790 : pgstat_count_heap_delete(Relation rel)
    2411             : {
    2412     2160790 :     PgStat_TableStatus *pgstat_info = rel->pgstat_info;
    2413             : 
    2414     2160790 :     if (pgstat_info != NULL)
    2415             :     {
    2416             :         /* We have to log the effect at the proper transactional level */
    2417     2147294 :         int         nest_level = GetCurrentTransactionNestLevel();
    2418             : 
    2419     2147294 :         if (pgstat_info->trans == NULL ||
    2420     2074732 :             pgstat_info->trans->nest_level != nest_level)
    2421       72690 :             add_tabstat_xact_level(pgstat_info, nest_level);
    2422             : 
    2423     2147294 :         pgstat_info->trans->tuples_deleted++;
    2424             :     }
    2425     2160790 : }
    2426             : 
    2427             : /*
    2428             :  * pgstat_truncdrop_save_counters
    2429             :  *
    2430             :  * Whenever a table is truncated/dropped, we save its i/u/d counters so that
    2431             :  * they can be cleared, and if the (sub)xact that executed the truncate/drop
    2432             :  * later aborts, the counters can be restored to the saved (pre-truncate/drop)
    2433             :  * values.
    2434             :  *
    2435             :  * Note that for truncate we do this on the first truncate in any particular
    2436             :  * subxact level only.
    2437             :  */
    2438             : static void
    2439        1742 : pgstat_truncdrop_save_counters(PgStat_TableXactStatus *trans, bool is_drop)
    2440             : {
    2441        1742 :     if (!trans->truncdropped || is_drop)
    2442             :     {
    2443        1654 :         trans->inserted_pre_truncdrop = trans->tuples_inserted;
    2444        1654 :         trans->updated_pre_truncdrop = trans->tuples_updated;
    2445        1654 :         trans->deleted_pre_truncdrop = trans->tuples_deleted;
    2446        1654 :         trans->truncdropped = true;
    2447             :     }
    2448        1742 : }
    2449             : 
    2450             : /*
    2451             :  * pgstat_truncdrop_restore_counters - restore counters when a truncate aborts
    2452             :  */
    2453             : static void
    2454       11572 : pgstat_truncdrop_restore_counters(PgStat_TableXactStatus *trans)
    2455             : {
    2456       11572 :     if (trans->truncdropped)
    2457             :     {
    2458          84 :         trans->tuples_inserted = trans->inserted_pre_truncdrop;
    2459          84 :         trans->tuples_updated = trans->updated_pre_truncdrop;
    2460          84 :         trans->tuples_deleted = trans->deleted_pre_truncdrop;
    2461             :     }
    2462       11572 : }
    2463             : 
    2464             : /*
    2465             :  * pgstat_count_truncate - update tuple counters due to truncate
    2466             :  */
    2467             : void
    2468        1726 : pgstat_count_truncate(Relation rel)
    2469             : {
    2470        1726 :     PgStat_TableStatus *pgstat_info = rel->pgstat_info;
    2471             : 
    2472        1726 :     if (pgstat_info != NULL)
    2473             :     {
    2474             :         /* We have to log the effect at the proper transactional level */
    2475        1726 :         int         nest_level = GetCurrentTransactionNestLevel();
    2476             : 
    2477        1726 :         if (pgstat_info->trans == NULL ||
    2478         122 :             pgstat_info->trans->nest_level != nest_level)
    2479        1620 :             add_tabstat_xact_level(pgstat_info, nest_level);
    2480             : 
    2481        1726 :         pgstat_truncdrop_save_counters(pgstat_info->trans, false);
    2482        1726 :         pgstat_info->trans->tuples_inserted = 0;
    2483        1726 :         pgstat_info->trans->tuples_updated = 0;
    2484        1726 :         pgstat_info->trans->tuples_deleted = 0;
    2485             :     }
    2486        1726 : }
    2487             : 
    2488             : /*
    2489             :  * pgstat_update_heap_dead_tuples - update dead-tuples count
    2490             :  *
    2491             :  * The semantics of this are that we are reporting the nontransactional
    2492             :  * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
    2493             :  * rather than increasing, and the change goes straight into the per-table
    2494             :  * counter, not into transactional state.
    2495             :  */
    2496             : void
    2497       89086 : pgstat_update_heap_dead_tuples(Relation rel, int delta)
    2498             : {
    2499       89086 :     PgStat_TableStatus *pgstat_info = rel->pgstat_info;
    2500             : 
    2501       89086 :     if (pgstat_info != NULL)
    2502       12448 :         pgstat_info->t_counts.t_delta_dead_tuples -= delta;
    2503       89086 : }
    2504             : 
    2505             : /*
    2506             :  * Perform relation stats specific end-of-transaction work. Helper for
    2507             :  * AtEOXact_PgStat.
    2508             :  *
    2509             :  * Transfer transactional insert/update counts into the base tabstat entries.
    2510             :  * We don't bother to free any of the transactional state, since it's all in
    2511             :  * TopTransactionContext and will go away anyway.
    2512             :  */
    2513             : static void
    2514      100986 : AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit)
    2515             : {
    2516             :     PgStat_TableXactStatus *trans;
    2517             : 
    2518      405636 :     for (trans = xact_state->first; trans != NULL; trans = trans->next)
    2519             :     {
    2520             :         PgStat_TableStatus *tabstat;
    2521             : 
    2522             :         Assert(trans->nest_level == 1);
    2523             :         Assert(trans->upper == NULL);
    2524      304650 :         tabstat = trans->parent;
    2525             :         Assert(tabstat->trans == trans);
    2526             :         /* restore pre-truncate/drop stats (if any) in case of aborted xact */
    2527      304650 :         if (!isCommit)
    2528       10576 :             pgstat_truncdrop_restore_counters(trans);
    2529             :         /* count attempted actions regardless of commit/abort */
    2530      304650 :         tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
    2531      304650 :         tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
    2532      304650 :         tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
    2533      304650 :         if (isCommit)
    2534             :         {
    2535      294074 :             tabstat->t_counts.t_truncdropped = trans->truncdropped;
    2536      294074 :             if (trans->truncdropped)
    2537             :             {
    2538             :                 /* forget live/dead stats seen by backend thus far */
    2539        1554 :                 tabstat->t_counts.t_delta_live_tuples = 0;
    2540        1554 :                 tabstat->t_counts.t_delta_dead_tuples = 0;
    2541             :             }
    2542             :             /* insert adds a live tuple, delete removes one */
    2543      294074 :             tabstat->t_counts.t_delta_live_tuples +=
    2544      294074 :                 trans->tuples_inserted - trans->tuples_deleted;
    2545             :             /* update and delete each create a dead tuple */
    2546      294074 :             tabstat->t_counts.t_delta_dead_tuples +=
    2547      294074 :                 trans->tuples_updated + trans->tuples_deleted;
    2548             :             /* insert, update, delete each count as one change event */
    2549      294074 :             tabstat->t_counts.t_changed_tuples +=
    2550      294074 :                 trans->tuples_inserted + trans->tuples_updated +
    2551      294074 :                 trans->tuples_deleted;
    2552             :         }
    2553             :         else
    2554             :         {
    2555             :             /* inserted tuples are dead, deleted tuples are unaffected */
    2556       10576 :             tabstat->t_counts.t_delta_dead_tuples +=
    2557       10576 :                 trans->tuples_inserted + trans->tuples_updated;
    2558             :             /* an aborted xact generates no changed_tuple events */
    2559             :         }
    2560      304650 :         tabstat->trans = NULL;
    2561             :     }
    2562      100986 : }
    2563             : 
    2564             : static void
    2565      732024 : AtEOXact_PgStat_Database(bool isCommit, bool parallel)
    2566             : {
    2567             :     /* Don't count parallel worker transaction stats */
    2568      732024 :     if (!parallel)
    2569             :     {
    2570             :         /*
    2571             :          * Count transaction commit or abort.  (We use counters, not just
    2572             :          * bools, in case the reporting message isn't sent right away.)
    2573             :          */
    2574      730332 :         if (isCommit)
    2575      706066 :             pgStatXactCommit++;
    2576             :         else
    2577       24266 :             pgStatXactRollback++;
    2578             :     }
    2579      732024 : }
    2580             : 
    2581             : /* ----------
    2582             :  * AtEOXact_PgStat
    2583             :  *
    2584             :  *  Called from access/transam/xact.c at top-level transaction commit/abort.
    2585             :  * ----------
    2586             :  */
    2587             : void
    2588      732024 : AtEOXact_PgStat(bool isCommit, bool parallel)
    2589             : {
    2590             :     PgStat_SubXactStatus *xact_state;
    2591             : 
    2592      732024 :     AtEOXact_PgStat_Database(isCommit, parallel);
    2593             : 
    2594             :     /* handle transactional stats information */
    2595      732024 :     xact_state = pgStatXactStack;
    2596      732024 :     if (xact_state != NULL)
    2597             :     {
    2598             :         Assert(xact_state->nest_level == 1);
    2599             :         Assert(xact_state->prev == NULL);
    2600             : 
    2601      100986 :         AtEOXact_PgStat_Relations(xact_state, isCommit);
    2602             :     }
    2603      732024 :     pgStatXactStack = NULL;
    2604             : 
    2605             :     /* Make sure any stats snapshot is thrown away */
    2606      732024 :     pgstat_clear_snapshot();
    2607      732024 : }
    2608             : 
    2609             : /*
    2610             :  * Perform relation stats specific end-of-sub-transaction work. Helper for
    2611             :  * AtEOSubXact_PgStat.
    2612             :  *
    2613             :  * Transfer transactional insert/update counts into the next higher
    2614             :  * subtransaction state.
    2615             :  */
    2616             : static void
    2617        5262 : AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth)
    2618             : {
    2619             :     PgStat_TableXactStatus *trans;
    2620             :     PgStat_TableXactStatus *next_trans;
    2621             : 
    2622       10888 :     for (trans = xact_state->first; trans != NULL; trans = next_trans)
    2623             :     {
    2624             :         PgStat_TableStatus *tabstat;
    2625             : 
    2626        5626 :         next_trans = trans->next;
    2627             :         Assert(trans->nest_level == nestDepth);
    2628        5626 :         tabstat = trans->parent;
    2629             :         Assert(tabstat->trans == trans);
    2630             : 
    2631        5626 :         if (isCommit)
    2632             :         {
    2633        4630 :             if (trans->upper && trans->upper->nest_level == nestDepth - 1)
    2634             :             {
    2635        3374 :                 if (trans->truncdropped)
    2636             :                 {
    2637             :                     /* propagate the truncate/drop status one level up */
    2638          16 :                     pgstat_truncdrop_save_counters(trans->upper, false);
    2639             :                     /* replace upper xact stats with ours */
    2640          16 :                     trans->upper->tuples_inserted = trans->tuples_inserted;
    2641          16 :                     trans->upper->tuples_updated = trans->tuples_updated;
    2642          16 :                     trans->upper->tuples_deleted = trans->tuples_deleted;
    2643             :                 }
    2644             :                 else
    2645             :                 {
    2646        3358 :                     trans->upper->tuples_inserted += trans->tuples_inserted;
    2647        3358 :                     trans->upper->tuples_updated += trans->tuples_updated;
    2648        3358 :                     trans->upper->tuples_deleted += trans->tuples_deleted;
    2649             :                 }
    2650        3374 :                 tabstat->trans = trans->upper;
    2651        3374 :                 pfree(trans);
    2652             :             }
    2653             :             else
    2654             :             {
    2655             :                 /*
    2656             :                  * When there isn't an immediate parent state, we can just
    2657             :                  * reuse the record instead of going through a
    2658             :                  * palloc/pfree pushup (this works since it's all in
    2659             :                  * TopTransactionContext anyway).  We have to re-link it
    2660             :                  * into the parent level, though, and that might mean
    2661             :                  * pushing a new entry into the pgStatXactStack.
    2662             :                  */
    2663             :                 PgStat_SubXactStatus *upper_xact_state;
    2664             : 
    2665        1256 :                 upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
    2666        1256 :                 trans->next = upper_xact_state->first;
    2667        1256 :                 upper_xact_state->first = trans;
    2668        1256 :                 trans->nest_level = nestDepth - 1;
    2669             :             }
    2670             :         }
    2671             :         else
    2672             :         {
    2673             :             /*
    2674             :              * On abort, update top-level tabstat counts, then forget the
    2675             :              * subtransaction
    2676             :              */
    2677             : 
    2678             :             /* first restore values obliterated by truncate/drop */
    2679         996 :             pgstat_truncdrop_restore_counters(trans);
    2680             :             /* count attempted actions regardless of commit/abort */
    2681         996 :             tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
    2682         996 :             tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
    2683         996 :             tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
    2684             :             /* inserted tuples are dead, deleted tuples are unaffected */
    2685         996 :             tabstat->t_counts.t_delta_dead_tuples +=
    2686         996 :                 trans->tuples_inserted + trans->tuples_updated;
    2687         996 :             tabstat->trans = trans->upper;
    2688         996 :             pfree(trans);
    2689             :         }
    2690             :     }
    2691        5262 : }
    2692             : 
    2693             : /* ----------
    2694             :  * AtEOSubXact_PgStat
    2695             :  *
    2696             :  *  Called from access/transam/xact.c at subtransaction commit/abort.
    2697             :  * ----------
    2698             :  */
    2699             : void
    2700       12154 : AtEOSubXact_PgStat(bool isCommit, int nestDepth)
    2701             : {
    2702             :     PgStat_SubXactStatus *xact_state;
    2703             : 
    2704             :     /* merge the sub-transaction's transactional stats into the parent */
    2705       12154 :     xact_state = pgStatXactStack;
    2706       12154 :     if (xact_state != NULL &&
    2707        5750 :         xact_state->nest_level >= nestDepth)
    2708             :     {
    2709             :         /* delink xact_state from stack immediately to simplify reuse case */
    2710        5262 :         pgStatXactStack = xact_state->prev;
    2711             : 
    2712        5262 :         AtEOSubXact_PgStat_Relations(xact_state, isCommit, nestDepth);
    2713             : 
    2714        5262 :         pfree(xact_state);
    2715             :     }
    2716       12154 : }
    2717             : 
    2718             : /*
    2719             :  * Generate 2PC records for all the pending transaction-dependent relation
    2720             :  * stats.
    2721             :  */
    2722             : static void
    2723         704 : AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
    2724             : {
    2725             :     PgStat_TableXactStatus *trans;
    2726             : 
    2727        1514 :     for (trans = xact_state->first; trans != NULL; trans = trans->next)
    2728             :     {
    2729             :         PgStat_TableStatus *tabstat;
    2730             :         TwoPhasePgStatRecord record;
    2731             : 
    2732             :         Assert(trans->nest_level == 1);
    2733             :         Assert(trans->upper == NULL);
    2734         810 :         tabstat = trans->parent;
    2735             :         Assert(tabstat->trans == trans);
    2736             : 
    2737         810 :         record.tuples_inserted = trans->tuples_inserted;
    2738         810 :         record.tuples_updated = trans->tuples_updated;
    2739         810 :         record.tuples_deleted = trans->tuples_deleted;
    2740         810 :         record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop;
    2741         810 :         record.updated_pre_truncdrop = trans->updated_pre_truncdrop;
    2742         810 :         record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop;
    2743         810 :         record.t_id = tabstat->t_id;
    2744         810 :         record.t_shared = tabstat->t_shared;
    2745         810 :         record.t_truncdropped = trans->truncdropped;
    2746             : 
    2747         810 :         RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
    2748             :                                &record, sizeof(TwoPhasePgStatRecord));
    2749             :     }
    2750         704 : }
    2751             : 
    2752             : /*
    2753             :  * AtPrepare_PgStat
    2754             :  *      Save the transactional stats state at 2PC transaction prepare.
    2755             :  */
    2756             : void
    2757         708 : AtPrepare_PgStat(void)
    2758             : {
    2759             :     PgStat_SubXactStatus *xact_state;
    2760             : 
    2761         708 :     xact_state = pgStatXactStack;
    2762         708 :     if (xact_state != NULL)
    2763             :     {
    2764             :         Assert(xact_state->nest_level == 1);
    2765             :         Assert(xact_state->prev == NULL);
    2766             : 
    2767         704 :         AtPrepare_PgStat_Relations(xact_state);
    2768             :     }
    2769         708 : }
    2770             : 
    2771             : /*
    2772             :  * All we need do here is unlink the transaction stats state from the
    2773             :  * nontransactional state.  The nontransactional action counts will be
    2774             :  * reported to the stats collector immediately, while the effects on
    2775             :  * live and dead tuple counts are preserved in the 2PC state file.
    2776             :  *
    2777             :  * Note: AtEOXact_PgStat_Relations is not called during PREPARE.
    2778             :  */
    2779             : static void
    2780         704 : PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
    2781             : {
    2782             :     PgStat_TableXactStatus *trans;
    2783             : 
    2784        1514 :     for (trans = xact_state->first; trans != NULL; trans = trans->next)
    2785             :     {
    2786             :         PgStat_TableStatus *tabstat;
    2787             : 
    2788         810 :         tabstat = trans->parent;
    2789         810 :         tabstat->trans = NULL;
    2790             :     }
    2791         704 : }
    2792             : 
    2793             : /*
    2794             :  * PostPrepare_PgStat
    2795             :  *      Clean up after successful PREPARE.
    2796             :  *
    2797             :  * Note: AtEOXact_PgStat is not called during PREPARE.
    2798             :  */
    2799             : void
    2800         708 : PostPrepare_PgStat(void)
    2801             : {
    2802             :     PgStat_SubXactStatus *xact_state;
    2803             : 
    2804             :     /*
    2805             :      * We don't bother to free any of the transactional state, since it's all
    2806             :      * in TopTransactionContext and will go away anyway.
    2807             :      */
    2808         708 :     xact_state = pgStatXactStack;
    2809         708 :     if (xact_state != NULL)
    2810             :     {
    2811             :         Assert(xact_state->nest_level == 1);
    2812             :         Assert(xact_state->prev == NULL);
    2813             : 
    2814         704 :         PostPrepare_PgStat_Relations(xact_state);
    2815             :     }
    2816         708 :     pgStatXactStack = NULL;
    2817             : 
    2818             :     /* Make sure any stats snapshot is thrown away */
    2819         708 :     pgstat_clear_snapshot();
    2820         708 : }
    2821             : 
    2822             : /*
    2823             :  * 2PC processing routine for COMMIT PREPARED case.
    2824             :  *
    2825             :  * Load the saved counts into our local pgstats state.
    2826             :  */
    2827             : void
    2828         744 : pgstat_twophase_postcommit(TransactionId xid, uint16 info,
    2829             :                            void *recdata, uint32 len)
    2830             : {
    2831         744 :     TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
    2832             :     PgStat_TableStatus *pgstat_info;
    2833             : 
    2834             :     /* Find or create a tabstat entry for the rel */
    2835         744 :     pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
    2836             : 
    2837             :     /* Same math as in AtEOXact_PgStat, commit case */
    2838         744 :     pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
    2839         744 :     pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
    2840         744 :     pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
    2841         744 :     pgstat_info->t_counts.t_truncdropped = rec->t_truncdropped;
    2842         744 :     if (rec->t_truncdropped)
    2843             :     {
    2844             :         /* forget live/dead stats seen by backend thus far */
    2845           0 :         pgstat_info->t_counts.t_delta_live_tuples = 0;
    2846           0 :         pgstat_info->t_counts.t_delta_dead_tuples = 0;
    2847             :     }
    2848         744 :     pgstat_info->t_counts.t_delta_live_tuples +=
    2849         744 :         rec->tuples_inserted - rec->tuples_deleted;
    2850         744 :     pgstat_info->t_counts.t_delta_dead_tuples +=
    2851         744 :         rec->tuples_updated + rec->tuples_deleted;
    2852         744 :     pgstat_info->t_counts.t_changed_tuples +=
    2853         744 :         rec->tuples_inserted + rec->tuples_updated +
    2854         744 :         rec->tuples_deleted;
    2855         744 : }
    2856             : 
    2857             : /*
    2858             :  * 2PC processing routine for ROLLBACK PREPARED case.
    2859             :  *
    2860             :  * Load the saved counts into our local pgstats state, but treat them
    2861             :  * as aborted.
    2862             :  */
    2863             : void
    2864          62 : pgstat_twophase_postabort(TransactionId xid, uint16 info,
    2865             :                           void *recdata, uint32 len)
    2866             : {
    2867          62 :     TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
    2868             :     PgStat_TableStatus *pgstat_info;
    2869             : 
    2870             :     /* Find or create a tabstat entry for the rel */
    2871          62 :     pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
    2872             : 
    2873             :     /* Same math as in AtEOXact_PgStat, abort case */
    2874          62 :     if (rec->t_truncdropped)
    2875             :     {
    2876           0 :         rec->tuples_inserted = rec->inserted_pre_truncdrop;
    2877           0 :         rec->tuples_updated = rec->updated_pre_truncdrop;
    2878           0 :         rec->tuples_deleted = rec->deleted_pre_truncdrop;
    2879             :     }
    2880          62 :     pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
    2881          62 :     pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
    2882          62 :     pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
    2883          62 :     pgstat_info->t_counts.t_delta_dead_tuples +=
    2884          62 :         rec->tuples_inserted + rec->tuples_updated;
    2885          62 : }
    2886             : 
    2887             : 
    2888             : /* ----------
    2889             :  * pgstat_fetch_stat_dbentry() -
    2890             :  *
    2891             :  *  Support function for the SQL-callable pgstat* functions. Returns
    2892             :  *  the collected statistics for one database or NULL. NULL doesn't mean
    2893             :  *  that the database doesn't exist, it is just not yet known by the
    2894             :  *  collector, so the caller is better off to report ZERO instead.
    2895             :  * ----------
    2896             :  */
    2897             : PgStat_StatDBEntry *
    2898        3704 : pgstat_fetch_stat_dbentry(Oid dbid)
    2899             : {
    2900             :     /*
    2901             :      * If not done for this transaction, read the statistics collector stats
    2902             :      * file into some hash tables.
    2903             :      */
    2904        3704 :     backend_read_statsfile();
    2905             : 
    2906             :     /*
    2907             :      * Lookup the requested database; return NULL if not found
    2908             :      */
    2909        3702 :     return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
    2910             :                                               (void *) &dbid,
    2911             :                                               HASH_FIND, NULL);
    2912             : }
    2913             : 
    2914             : 
    2915             : /* ----------
    2916             :  * pgstat_fetch_stat_tabentry() -
    2917             :  *
    2918             :  *  Support function for the SQL-callable pgstat* functions. Returns
    2919             :  *  the collected statistics for one table or NULL. NULL doesn't mean
    2920             :  *  that the table doesn't exist, it is just not yet known by the
    2921             :  *  collector, so the caller is better off to report ZERO instead.
    2922             :  * ----------
    2923             :  */
    2924             : PgStat_StatTabEntry *
    2925         760 : pgstat_fetch_stat_tabentry(Oid relid)
    2926             : {
    2927             :     Oid         dbid;
    2928             :     PgStat_StatDBEntry *dbentry;
    2929             :     PgStat_StatTabEntry *tabentry;
    2930             : 
    2931             :     /*
    2932             :      * If not done for this transaction, read the statistics collector stats
    2933             :      * file into some hash tables.
    2934             :      */
    2935         760 :     backend_read_statsfile();
    2936             : 
    2937             :     /*
    2938             :      * Lookup our database, then look in its table hash table.
    2939             :      */
    2940         760 :     dbid = MyDatabaseId;
    2941         760 :     dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
    2942             :                                                  (void *) &dbid,
    2943             :                                                  HASH_FIND, NULL);
    2944         760 :     if (dbentry != NULL && dbentry->tables != NULL)
    2945             :     {
    2946         760 :         tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
    2947             :                                                        (void *) &relid,
    2948             :                                                        HASH_FIND, NULL);
    2949         760 :         if (tabentry)
    2950         370 :             return tabentry;
    2951             :     }
    2952             : 
    2953             :     /*
    2954             :      * If we didn't find it, maybe it's a shared table.
    2955             :      */
    2956         390 :     dbid = InvalidOid;
    2957         390 :     dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
    2958             :                                                  (void *) &dbid,
    2959             :                                                  HASH_FIND, NULL);
    2960         390 :     if (dbentry != NULL && dbentry->tables != NULL)
    2961             :     {
    2962         390 :         tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
    2963             :                                                        (void *) &relid,
    2964             :                                                        HASH_FIND, NULL);
    2965         390 :         if (tabentry)
    2966           0 :             return tabentry;
    2967             :     }
    2968             : 
    2969         390 :     return NULL;
    2970             : }
    2971             : 
    2972             : 
    2973             : /* ----------
    2974             :  * pgstat_fetch_stat_funcentry() -
    2975             :  *
    2976             :  *  Support function for the SQL-callable pgstat* functions. Returns
    2977             :  *  the collected statistics for one function or NULL.
    2978             :  * ----------
    2979             :  */
    2980             : PgStat_StatFuncEntry *
    2981           0 : pgstat_fetch_stat_funcentry(Oid func_id)
    2982             : {
    2983             :     PgStat_StatDBEntry *dbentry;
    2984           0 :     PgStat_StatFuncEntry *funcentry = NULL;
    2985             : 
    2986             :     /* load the stats file if needed */
    2987           0 :     backend_read_statsfile();
    2988             : 
    2989             :     /* Lookup our database, then find the requested function.  */
    2990           0 :     dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
    2991           0 :     if (dbentry != NULL && dbentry->functions != NULL)
    2992             :     {
    2993           0 :         funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
    2994             :                                                          (void *) &func_id,
    2995             :                                                          HASH_FIND, NULL);
    2996             :     }
    2997             : 
    2998           0 :     return funcentry;
    2999             : }
    3000             : 
    3001             : /*
    3002             :  * ---------
    3003             :  * pgstat_fetch_stat_subworker_entry() -
    3004             :  *
    3005             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3006             :  *  the collected statistics for subscription worker or NULL.
    3007             :  * ---------
    3008             :  */
    3009             : PgStat_StatSubWorkerEntry *
    3010          12 : pgstat_fetch_stat_subworker_entry(Oid subid, Oid subrelid)
    3011             : {
    3012             :     PgStat_StatDBEntry *dbentry;
    3013          12 :     PgStat_StatSubWorkerEntry *wentry = NULL;
    3014             : 
    3015             :     /* Load the stats file if needed */
    3016          12 :     backend_read_statsfile();
    3017             : 
    3018             :     /*
    3019             :      * Lookup our database, then find the requested subscription worker stats.
    3020             :      */
    3021          12 :     dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
    3022          12 :     if (dbentry != NULL && dbentry->subworkers != NULL)
    3023             :     {
    3024          12 :         wentry = pgstat_get_subworker_entry(dbentry, subid, subrelid,
    3025             :                                             false);
    3026             :     }
    3027             : 
    3028          12 :     return wentry;
    3029             : }
    3030             : 
    3031             : /*
    3032             :  * ---------
    3033             :  * pgstat_fetch_stat_archiver() -
    3034             :  *
    3035             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3036             :  *  a pointer to the archiver statistics struct.
    3037             :  * ---------
    3038             :  */
    3039             : PgStat_ArchiverStats *
    3040          46 : pgstat_fetch_stat_archiver(void)
    3041             : {
    3042          46 :     backend_read_statsfile();
    3043             : 
    3044          46 :     return &archiverStats;
    3045             : }
    3046             : 
    3047             : /*
    3048             :  * ---------
    3049             :  * pgstat_fetch_stat_bgwriter() -
    3050             :  *
    3051             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3052             :  *  a pointer to the bgwriter statistics struct.
    3053             :  * ---------
    3054             :  */
    3055             : PgStat_BgWriterStats *
    3056           0 : pgstat_fetch_stat_bgwriter(void)
    3057             : {
    3058           0 :     backend_read_statsfile();
    3059             : 
    3060           0 :     return &globalStats.bgwriter;
    3061             : }
    3062             : 
    3063             : /*
    3064             :  * ---------
    3065             :  * pgstat_fetch_stat_checkpointer() -
    3066             :  *
    3067             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3068             :  *  a pointer to the checkpointer statistics struct.
    3069             :  * ---------
    3070             :  */
    3071             : PgStat_CheckpointerStats *
    3072           0 : pgstat_fetch_stat_checkpointer(void)
    3073             : {
    3074           0 :     backend_read_statsfile();
    3075             : 
    3076           0 :     return &globalStats.checkpointer;
    3077             : }
    3078             : 
    3079             : /*
    3080             :  * ---------
    3081             :  * pgstat_fetch_global() -
    3082             :  *
    3083             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3084             :  *  a pointer to the global statistics struct.
    3085             :  * ---------
    3086             :  */
    3087             : PgStat_GlobalStats *
    3088          30 : pgstat_fetch_global(void)
    3089             : {
    3090          30 :     backend_read_statsfile();
    3091             : 
    3092          30 :     return &globalStats;
    3093             : }
    3094             : 
    3095             : /*
    3096             :  * ---------
    3097             :  * pgstat_fetch_stat_wal() -
    3098             :  *
    3099             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3100             :  *  a pointer to the WAL statistics struct.
    3101             :  * ---------
    3102             :  */
    3103             : PgStat_WalStats *
    3104           4 : pgstat_fetch_stat_wal(void)
    3105             : {
    3106           4 :     backend_read_statsfile();
    3107             : 
    3108           4 :     return &walStats;
    3109             : }
    3110             : 
    3111             : /*
    3112             :  * ---------
    3113             :  * pgstat_fetch_slru() -
    3114             :  *
    3115             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3116             :  *  a pointer to the slru statistics struct.
    3117             :  * ---------
    3118             :  */
    3119             : PgStat_SLRUStats *
    3120           4 : pgstat_fetch_slru(void)
    3121             : {
    3122           4 :     backend_read_statsfile();
    3123             : 
    3124           4 :     return slruStats;
    3125             : }
    3126             : 
    3127             : /*
    3128             :  * ---------
    3129             :  * pgstat_fetch_replslot() -
    3130             :  *
    3131             :  *  Support function for the SQL-callable pgstat* functions. Returns
    3132             :  *  a pointer to the replication slot statistics struct.
    3133             :  * ---------
    3134             :  */
    3135             : PgStat_StatReplSlotEntry *
    3136          78 : pgstat_fetch_replslot(NameData slotname)
    3137             : {
    3138          78 :     backend_read_statsfile();
    3139             : 
    3140          78 :     return pgstat_get_replslot_entry(slotname, false);
    3141             : }
    3142             : 
    3143             : /*
    3144             :  * Shut down a single backend's statistics reporting at process exit.
    3145             :  *
    3146             :  * Flush any remaining statistics counts out to the collector.
    3147             :  * Without this, operations triggered during backend exit (such as
    3148             :  * temp table deletions) won't be counted.
    3149             :  */
    3150             : static void
    3151       18588 : pgstat_shutdown_hook(int code, Datum arg)
    3152             : {
    3153             :     Assert(!pgstat_is_shutdown);
    3154             : 
    3155             :     /*
    3156             :      * If we got as far as discovering our own database ID, we can report what
    3157             :      * we did to the collector.  Otherwise, we'd be sending an invalid
    3158             :      * database ID, so forget it.  (This means that accesses to pg_database
    3159             :      * during failed backend starts might never get counted.)
    3160             :      */
    3161       18588 :     if (OidIsValid(MyDatabaseId))
    3162       14272 :         pgstat_report_stat(true);
    3163             : 
    3164             : #ifdef USE_ASSERT_CHECKING
    3165             :     pgstat_is_shutdown = true;
    3166             : #endif
    3167       18588 : }
    3168             : 
    3169             : /* ----------
    3170             :  * pgstat_initialize() -
    3171             :  *
    3172             :  *  Initialize pgstats state, and set up our on-proc-exit hook. Called from
    3173             :  *  BaseInit().
    3174             :  *
    3175             :  *  NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
    3176             :  * ----------
    3177             :  */
    3178             : void
    3179       18588 : pgstat_initialize(void)
    3180             : {
    3181             :     Assert(!pgstat_is_initialized);
    3182             : 
    3183             :     /*
    3184             :      * Initialize prevWalUsage with pgWalUsage so that pgstat_send_wal() can
    3185             :      * calculate how much pgWalUsage counters are increased by subtracting
    3186             :      * prevWalUsage from pgWalUsage.
    3187             :      */
    3188       18588 :     prevWalUsage = pgWalUsage;
    3189             : 
    3190             :     /* Set up a process-exit hook to clean up */
    3191       18588 :     before_shmem_exit(pgstat_shutdown_hook, 0);
    3192             : 
    3193             : #ifdef USE_ASSERT_CHECKING
    3194             :     pgstat_is_initialized = true;
    3195             : #endif
    3196       18588 : }
    3197             : 
    3198             : /* ------------------------------------------------------------
    3199             :  * Local support functions follow
    3200             :  * ------------------------------------------------------------
    3201             :  */
    3202             : 
    3203             : 
    3204             : /* ----------
    3205             :  * pgstat_setheader() -
    3206             :  *
    3207             :  *      Set common header fields in a statistics message
    3208             :  * ----------
    3209             :  */
    3210             : static void
    3211      204506 : pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
    3212             : {
    3213      204506 :     hdr->m_type = mtype;
    3214      204506 : }
    3215             : 
    3216             : 
    3217             : /* ----------
    3218             :  * pgstat_send() -
    3219             :  *
    3220             :  *      Send out one statistics message to the collector
    3221             :  * ----------
    3222             :  */
    3223             : static void
    3224      204506 : pgstat_send(void *msg, int len)
    3225             : {
    3226             :     int         rc;
    3227             : 
    3228      204506 :     pgstat_assert_is_up();
    3229             : 
    3230      204506 :     if (pgStatSock == PGINVALID_SOCKET)
    3231        9784 :         return;
    3232             : 
    3233      194722 :     ((PgStat_MsgHdr *) msg)->m_size = len;
    3234             : 
    3235             :     /* We'll retry after EINTR, but ignore all other failures */
    3236             :     do
    3237             :     {
    3238      194722 :         rc = send(pgStatSock, msg, len, 0);
    3239      194722 :     } while (rc < 0 && errno == EINTR);
    3240             : 
    3241             : #ifdef USE_ASSERT_CHECKING
    3242             :     /* In debug builds, log send failures ... */
    3243             :     if (rc < 0)
    3244             :         elog(LOG, "could not send to statistics collector: %m");
    3245             : #endif
    3246             : }
    3247             : 
    3248             : /* ----------
    3249             :  * pgstat_send_archiver() -
    3250             :  *
    3251             :  *  Tell the collector about the WAL file that we successfully
    3252             :  *  archived or failed to archive.
    3253             :  * ----------
    3254             :  */
    3255             : void
    3256          14 : pgstat_send_archiver(const char *xlog, bool failed)
    3257             : {
    3258             :     PgStat_MsgArchiver msg;
    3259             : 
    3260             :     /*
    3261             :      * Prepare and send the message
    3262             :      */
    3263          14 :     pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ARCHIVER);
    3264          14 :     msg.m_failed = failed;
    3265          14 :     strlcpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
    3266          14 :     msg.m_timestamp = GetCurrentTimestamp();
    3267          14 :     pgstat_send(&msg, sizeof(msg));
    3268          14 : }
    3269             : 
    3270             : /* ----------
    3271             :  * pgstat_send_bgwriter() -
    3272             :  *
    3273             :  *      Send bgwriter statistics to the collector
    3274             :  * ----------
    3275             :  */
    3276             : void
    3277        9846 : pgstat_send_bgwriter(void)
    3278             : {
    3279             :     /* We assume this initializes to zeroes */
    3280             :     static const PgStat_MsgBgWriter all_zeroes;
    3281             : 
    3282        9846 :     pgstat_assert_is_up();
    3283             : 
    3284             :     /*
    3285             :      * This function can be called even if nothing at all has happened. In
    3286             :      * this case, avoid sending a completely empty message to the stats
    3287             :      * collector.
    3288             :      */
    3289        9846 :     if (memcmp(&PendingBgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
    3290        4930 :         return;
    3291             : 
    3292             :     /*
    3293             :      * Prepare and send the message
    3294             :      */
    3295        4916 :     pgstat_setheader(&PendingBgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
    3296        4916 :     pgstat_send(&PendingBgWriterStats, sizeof(PendingBgWriterStats));
    3297             : 
    3298             :     /*
    3299             :      * Clear out the statistics buffer, so it can be re-used.
    3300             :      */
    3301       24580 :     MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats));
    3302             : }
    3303             : 
    3304             : /* ----------
    3305             :  * pgstat_send_checkpointer() -
    3306             :  *
    3307             :  *      Send checkpointer statistics to the collector
    3308             :  * ----------
    3309             :  */
    3310             : void
    3311        4060 : pgstat_send_checkpointer(void)
    3312             : {
    3313             :     /* We assume this initializes to zeroes */
    3314             :     static const PgStat_MsgCheckpointer all_zeroes;
    3315             : 
    3316             :     /*
    3317             :      * This function can be called even if nothing at all has happened. In
    3318             :      * this case, avoid sending a completely empty message to the stats
    3319             :      * collector.
    3320             :      */
    3321        4060 :     if (memcmp(&PendingCheckpointerStats, &all_zeroes, sizeof(PgStat_MsgCheckpointer)) == 0)
    3322         628 :         return;
    3323             : 
    3324             :     /*
    3325             :      * Prepare and send the message
    3326             :      */
    3327        3432 :     pgstat_setheader(&PendingCheckpointerStats.m_hdr, PGSTAT_MTYPE_CHECKPOINTER);
    3328        3432 :     pgstat_send(&PendingCheckpointerStats, sizeof(PendingCheckpointerStats));
    3329             : 
    3330             :     /*
    3331             :      * Clear out the statistics buffer, so it can be re-used.
    3332             :      */
    3333       30888 :     MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats));
    3334             : }
    3335             : 
    3336             : /* ----------
    3337             :  * pgstat_send_wal() -
    3338             :  *
    3339             :  *  Send WAL statistics to the collector.
    3340             :  *
    3341             :  * If 'force' is not set, WAL stats message is only sent if enough time has
    3342             :  * passed since last one was sent to reach PGSTAT_STAT_INTERVAL.
    3343             :  * ----------
    3344             :  */
    3345             : void
    3346       50336 : pgstat_send_wal(bool force)
    3347             : {
    3348             :     static TimestampTz sendTime = 0;
    3349             : 
    3350             :     /*
    3351             :      * This function can be called even if nothing at all has happened. In
    3352             :      * this case, avoid sending a completely empty message to the stats
    3353             :      * collector.
    3354             :      *
    3355             :      * Check wal_records counter to determine whether any WAL activity has
    3356             :      * happened since last time. Note that other WalUsage counters don't need
    3357             :      * to be checked because they are incremented always together with
    3358             :      * wal_records counter.
    3359             :      *
    3360             :      * m_wal_buffers_full also doesn't need to be checked because it's
    3361             :      * incremented only when at least one WAL record is generated (i.e.,
    3362             :      * wal_records counter is incremented). But for safely, we assert that
    3363             :      * m_wal_buffers_full is always zero when no WAL record is generated
    3364             :      *
    3365             :      * This function can be called by a process like walwriter that normally
    3366             :      * generates no WAL records. To determine whether any WAL activity has
    3367             :      * happened at that process since the last time, the numbers of WAL writes
    3368             :      * and syncs are also checked.
    3369             :      */
    3370       50336 :     if (pgWalUsage.wal_records == prevWalUsage.wal_records &&
    3371       39120 :         WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0)
    3372             :     {
    3373             :         Assert(WalStats.m_wal_buffers_full == 0);
    3374       28106 :         return;
    3375             :     }
    3376             : 
    3377       22230 :     if (!force)
    3378             :     {
    3379       10918 :         TimestampTz now = GetCurrentTimestamp();
    3380             : 
    3381             :         /*
    3382             :          * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
    3383             :          * msec since we last sent one to avoid overloading the stats
    3384             :          * collector.
    3385             :          */
    3386       10918 :         if (!TimestampDifferenceExceeds(sendTime, now, PGSTAT_STAT_INTERVAL))
    3387        9794 :             return;
    3388        1124 :         sendTime = now;
    3389             :     }
    3390             : 
    3391             :     /*
    3392             :      * Set the counters related to generated WAL data if the counters were
    3393             :      * updated.
    3394             :      */
    3395       12436 :     if (pgWalUsage.wal_records != prevWalUsage.wal_records)
    3396             :     {
    3397             :         WalUsage    walusage;
    3398             : 
    3399             :         /*
    3400             :          * Calculate how much WAL usage counters were increased by
    3401             :          * subtracting the previous counters from the current ones. Fill the
    3402             :          * results in WAL stats message.
    3403             :          */
    3404       44864 :         MemSet(&walusage, 0, sizeof(WalUsage));
    3405       11216 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &prevWalUsage);
    3406             : 
    3407       11216 :         WalStats.m_wal_records = walusage.wal_records;
    3408       11216 :         WalStats.m_wal_fpi = walusage.wal_fpi;
    3409       11216 :         WalStats.m_wal_bytes = walusage.wal_bytes;
    3410             : 
    3411             :         /*
    3412             :          * Save the current counters for the subsequent calculation of WAL
    3413             :          * usage.
    3414             :          */
    3415       11216 :         prevWalUsage = pgWalUsage;
    3416             :     }
    3417             : 
    3418             :     /*
    3419             :      * Prepare and send the message
    3420             :      */
    3421       12436 :     pgstat_setheader(&WalStats.m_hdr, PGSTAT_MTYPE_WAL);
    3422       12436 :     pgstat_send(&WalStats, sizeof(WalStats));
    3423             : 
    3424             :     /*
    3425             :      * Clear out the statistics buffer, so it can be re-used.
    3426             :      */
    3427      124360 :     MemSet(&WalStats, 0, sizeof(WalStats));
    3428             : }
    3429             : 
    3430             : /* ----------
    3431             :  * pgstat_send_slru() -
    3432             :  *
    3433             :  *      Send SLRU statistics to the collector
    3434             :  * ----------
    3435             :  */
    3436             : static void
    3437       29482 : pgstat_send_slru(void)
    3438             : {
    3439             :     /* We assume this initializes to zeroes */
    3440             :     static const PgStat_MsgSLRU all_zeroes;
    3441             : 
    3442      265338 :     for (int i = 0; i < SLRU_NUM_ELEMENTS; i++)
    3443             :     {
    3444             :         /*
    3445             :          * This function can be called even if nothing at all has happened. In
    3446             :          * this case, avoid sending a completely empty message to the stats
    3447             :          * collector.
    3448             :          */
    3449      235856 :         if (memcmp(&SLRUStats[i], &all_zeroes, sizeof(PgStat_MsgSLRU)) == 0)
    3450      217224 :             continue;
    3451             : 
    3452             :         /* set the SLRU type before each send */
    3453       18632 :         SLRUStats[i].m_index = i;
    3454             : 
    3455             :         /*
    3456             :          * Prepare and send the message
    3457             :          */
    3458       18632 :         pgstat_setheader(&SLRUStats[i].m_hdr, PGSTAT_MTYPE_SLRU);
    3459       18632 :         pgstat_send(&SLRUStats[i], sizeof(PgStat_MsgSLRU));
    3460             : 
    3461             :         /*
    3462             :          * Clear out the statistics buffer, so it can be re-used.
    3463             :          */
    3464      186320 :         MemSet(&SLRUStats[i], 0, sizeof(PgStat_MsgSLRU));
    3465             :     }
    3466       29482 : }
    3467             : 
    3468             : /* --------
    3469             :  * pgstat_send_subscription_purge() -
    3470             :  *
    3471             :  *  Send a subscription purge message to the collector
    3472             :  * --------
    3473             :  */
    3474             : static void
    3475          34 : pgstat_send_subscription_purge(PgStat_MsgSubscriptionPurge *msg)
    3476             : {
    3477             :     int         len;
    3478             : 
    3479          34 :     len = offsetof(PgStat_MsgSubscriptionPurge, m_subids[0])
    3480          34 :         + msg->m_nentries * sizeof(Oid);
    3481             : 
    3482          34 :     pgstat_setheader(&msg->m_hdr, PGSTAT_MTYPE_SUBSCRIPTIONPURGE);
    3483          34 :     pgstat_send(msg, len);
    3484          34 : }
    3485             : 
    3486             : /* ----------
    3487             :  * PgstatCollectorMain() -
    3488             :  *
    3489             :  *  Start up the statistics collector process.  This is the body of the
    3490             :  *  postmaster child process.
    3491             :  *
    3492             :  *  The argc/argv parameters are valid only in EXEC_BACKEND case.
    3493             :  * ----------
    3494             :  */
    3495             : NON_EXEC_STATIC void
    3496         944 : PgstatCollectorMain(int argc, char *argv[])
    3497             : {
    3498             :     int         len;
    3499             :     PgStat_Msg  msg;
    3500             :     int         wr;
    3501             :     WaitEvent   event;
    3502             :     WaitEventSet *wes;
    3503             : 
    3504             :     /*
    3505             :      * Ignore all signals usually bound to some action in the postmaster,
    3506             :      * except SIGHUP and SIGQUIT.  Note we don't need a SIGUSR1 handler to
    3507             :      * support latch operations, because we only use a local latch.
    3508             :      */
    3509         944 :     pqsignal(SIGHUP, SignalHandlerForConfigReload);
    3510         944 :     pqsignal(SIGINT, SIG_IGN);
    3511         944 :     pqsignal(SIGTERM, SIG_IGN);
    3512         944 :     pqsignal(SIGQUIT, SignalHandlerForShutdownRequest);
    3513         944 :     pqsignal(SIGALRM, SIG_IGN);
    3514         944 :     pqsignal(SIGPIPE, SIG_IGN);
    3515         944 :     pqsignal(SIGUSR1, SIG_IGN);
    3516         944 :     pqsignal(SIGUSR2, SIG_IGN);
    3517             :     /* Reset some signals that are accepted by postmaster but not here */
    3518         944 :     pqsignal(SIGCHLD, SIG_DFL);
    3519         944 :     PG_SETMASK(&UnBlockSig);
    3520             : 
    3521         944 :     MyBackendType = B_STATS_COLLECTOR;
    3522         944 :     init_ps_display(NULL);
    3523             : 
    3524             :     /*
    3525             :      * Read in existing stats files or initialize the stats to zero.
    3526             :      */
    3527         944 :     pgStatRunningInCollector = true;
    3528         944 :     pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
    3529             : 
    3530             :     /* Prepare to wait for our latch or data in our socket. */
    3531         944 :     wes = CreateWaitEventSet(CurrentMemoryContext, 3);
    3532         944 :     AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
    3533         944 :     AddWaitEventToSet(wes, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL, NULL);
    3534         944 :     AddWaitEventToSet(wes, WL_SOCKET_READABLE, pgStatSock, NULL, NULL);
    3535             : 
    3536             :     /*
    3537             :      * Loop to process messages until we get SIGQUIT or detect ungraceful
    3538             :      * death of our parent postmaster.
    3539             :      *
    3540             :      * For performance reasons, we don't want to do ResetLatch/WaitLatch after
    3541             :      * every message; instead, do that only after a recv() fails to obtain a
    3542             :      * message.  (This effectively means that if backends are sending us stuff
    3543             :      * like mad, we won't notice postmaster death until things slack off a
    3544             :      * bit; which seems fine.)  To do that, we have an inner loop that
    3545             :      * iterates as long as recv() succeeds.  We do check ConfigReloadPending
    3546             :      * inside the inner loop, which means that such interrupts will get
    3547             :      * serviced but the latch won't get cleared until next time there is a
    3548             :      * break in the action.
    3549             :      */
    3550             :     for (;;)
    3551             :     {
    3552             :         /* Clear any already-pending wakeups */
    3553      119272 :         ResetLatch(MyLatch);
    3554             : 
    3555             :         /*
    3556             :          * Quit if we get SIGQUIT from the postmaster.
    3557             :          */
    3558      119272 :         if (ShutdownRequestPending)
    3559         938 :             break;
    3560             : 
    3561             :         /*
    3562             :          * Inner loop iterates as long as we keep getting messages, or until
    3563             :          * ShutdownRequestPending becomes set.
    3564             :          */
    3565      320810 :         while (!ShutdownRequestPending)
    3566             :         {
    3567             :             /*
    3568             :              * Reload configuration if we got SIGHUP from the postmaster.
    3569             :              */
    3570      320810 :             if (ConfigReloadPending)
    3571             :             {
    3572         104 :                 ConfigReloadPending = false;
    3573         104 :                 ProcessConfigFile(PGC_SIGHUP);
    3574             :             }
    3575             : 
    3576             :             /*
    3577             :              * Write the stats file(s) if a new request has arrived that is
    3578             :              * not satisfied by existing file(s).
    3579             :              */
    3580      320810 :             if (pgstat_write_statsfile_needed())
    3581        1464 :                 pgstat_write_statsfiles(false, false);
    3582             : 
    3583             :             /*
    3584             :              * Try to receive and process a message.  This will not block,
    3585             :              * since the socket is set to non-blocking mode.
    3586             :              *
    3587             :              * XXX On Windows, we have to force pgwin32_recv to cooperate,
    3588             :              * despite the previous use of pg_set_noblock() on the socket.
    3589             :              * This is extremely broken and should be fixed someday.
    3590             :              */
    3591             : #ifdef WIN32
    3592             :             pgwin32_noblock = 1;
    3593             : #endif
    3594             : 
    3595      320810 :             len = recv(pgStatSock, (char *) &msg,
    3596             :                        sizeof(PgStat_Msg), 0);
    3597             : 
    3598             : #ifdef WIN32
    3599             :             pgwin32_noblock = 0;
    3600             : #endif
    3601             : 
    3602      320810 :             if (len < 0)
    3603             :             {
    3604      118334 :                 if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
    3605             :                     break;      /* out of inner loop */
    3606           0 :                 ereport(ERROR,
    3607             :                         (errcode_for_socket_access(),
    3608             :                          errmsg("could not read statistics message: %m")));
    3609             :             }
    3610             : 
    3611             :             /*
    3612             :              * We ignore messages that are smaller than our common header
    3613             :              */
    3614      202476 :             if (len < sizeof(PgStat_MsgHdr))
    3615           0 :                 continue;
    3616             : 
    3617             :             /*
    3618             :              * The received length must match the length in the header
    3619             :              */
    3620      202476 :             if (msg.msg_hdr.m_size != len)
    3621           0 :                 continue;
    3622             : 
    3623             :             /*
    3624             :              * O.K. - we accept this message.  Process it.
    3625             :              */
    3626      202476 :             switch (msg.msg_hdr.m_type)
    3627             :             {
    3628           0 :                 case PGSTAT_MTYPE_DUMMY:
    3629           0 :                     break;
    3630             : 
    3631        1464 :                 case PGSTAT_MTYPE_INQUIRY:
    3632        1464 :                     pgstat_recv_inquiry(&msg.msg_inquiry, len);
    3633        1464 :                     break;
    3634             : 
    3635      119180 :                 case PGSTAT_MTYPE_TABSTAT:
    3636      119180 :                     pgstat_recv_tabstat(&msg.msg_tabstat, len);
    3637      119180 :                     break;
    3638             : 
    3639        1076 :                 case PGSTAT_MTYPE_TABPURGE:
    3640        1076 :                     pgstat_recv_tabpurge(&msg.msg_tabpurge, len);
    3641        1076 :                     break;
    3642             : 
    3643          18 :                 case PGSTAT_MTYPE_DROPDB:
    3644          18 :                     pgstat_recv_dropdb(&msg.msg_dropdb, len);
    3645          18 :                     break;
    3646             : 
    3647           0 :                 case PGSTAT_MTYPE_RESETCOUNTER:
    3648           0 :                     pgstat_recv_resetcounter(&msg.msg_resetcounter, len);
    3649           0 :                     break;
    3650             : 
    3651           2 :                 case PGSTAT_MTYPE_RESETSHAREDCOUNTER:
    3652           2 :                     pgstat_recv_resetsharedcounter(&msg.msg_resetsharedcounter,
    3653             :                                                    len);
    3654           2 :                     break;
    3655             : 
    3656           0 :                 case PGSTAT_MTYPE_RESETSINGLECOUNTER:
    3657           0 :                     pgstat_recv_resetsinglecounter(&msg.msg_resetsinglecounter,
    3658             :                                                    len);
    3659           0 :                     break;
    3660             : 
    3661           0 :                 case PGSTAT_MTYPE_RESETSLRUCOUNTER:
    3662           0 :                     pgstat_recv_resetslrucounter(&msg.msg_resetslrucounter,
    3663             :                                                  len);
    3664           0 :                     break;
    3665             : 
    3666           2 :                 case PGSTAT_MTYPE_RESETREPLSLOTCOUNTER:
    3667           2 :                     pgstat_recv_resetreplslotcounter(&msg.msg_resetreplslotcounter,
    3668             :                                                      len);
    3669           2 :                     break;
    3670             : 
    3671          58 :                 case PGSTAT_MTYPE_AUTOVAC_START:
    3672          58 :                     pgstat_recv_autovac(&msg.msg_autovacuum_start, len);
    3673          58 :                     break;
    3674             : 
    3675        5804 :                 case PGSTAT_MTYPE_VACUUM:
    3676        5804 :                     pgstat_recv_vacuum(&msg.msg_vacuum, len);
    3677        5804 :                     break;
    3678             : 
    3679        5400 :                 case PGSTAT_MTYPE_ANALYZE:
    3680        5400 :                     pgstat_recv_analyze(&msg.msg_analyze, len);
    3681        5400 :                     break;
    3682             : 
    3683          84 :                 case PGSTAT_MTYPE_ARCHIVER:
    3684          84 :                     pgstat_recv_archiver(&msg.msg_archiver, len);
    3685          84 :                     break;
    3686             : 
    3687        6826 :                 case PGSTAT_MTYPE_BGWRITER:
    3688        6826 :                     pgstat_recv_bgwriter(&msg.msg_bgwriter, len);
    3689        6826 :                     break;
    3690             : 
    3691        6188 :                 case PGSTAT_MTYPE_CHECKPOINTER:
    3692        6188 :                     pgstat_recv_checkpointer(&msg.msg_checkpointer, len);
    3693        6188 :                     break;
    3694             : 
    3695       11434 :                 case PGSTAT_MTYPE_WAL:
    3696       11434 :                     pgstat_recv_wal(&msg.msg_wal, len);
    3697       11434 :                     break;
    3698             : 
    3699       10818 :                 case PGSTAT_MTYPE_SLRU:
    3700       10818 :                     pgstat_recv_slru(&msg.msg_slru, len);
    3701       10818 :                     break;
    3702             : 
    3703           0 :                 case PGSTAT_MTYPE_FUNCSTAT:
    3704           0 :                     pgstat_recv_funcstat(&msg.msg_funcstat, len);
    3705           0 :                     break;
    3706             : 
    3707           0 :                 case PGSTAT_MTYPE_FUNCPURGE:
    3708           0 :                     pgstat_recv_funcpurge(&msg.msg_funcpurge, len);
    3709           0 :                     break;
    3710             : 
    3711           0 :                 case PGSTAT_MTYPE_RECOVERYCONFLICT:
    3712           0 :                     pgstat_recv_recoveryconflict(&msg.msg_recoveryconflict,
    3713             :                                                  len);
    3714           0 :                     break;
    3715             : 
    3716           4 :                 case PGSTAT_MTYPE_DEADLOCK:
    3717           4 :                     pgstat_recv_deadlock(&msg.msg_deadlock, len);
    3718           4 :                     break;
    3719             : 
    3720        3420 :                 case PGSTAT_MTYPE_TEMPFILE:
    3721        3420 :                     pgstat_recv_tempfile(&msg.msg_tempfile, len);
    3722        3420 :                     break;
    3723             : 
    3724           4 :                 case PGSTAT_MTYPE_CHECKSUMFAILURE:
    3725           4 :                     pgstat_recv_checksum_failure(&msg.msg_checksumfailure,
    3726             :                                                  len);
    3727           4 :                     break;
    3728             : 
    3729        8868 :                 case PGSTAT_MTYPE_REPLSLOT:
    3730        8868 :                     pgstat_recv_replslot(&msg.msg_replslot, len);
    3731        8868 :                     break;
    3732             : 
    3733       10900 :                 case PGSTAT_MTYPE_CONNECT:
    3734       10900 :                     pgstat_recv_connect(&msg.msg_connect, len);
    3735       10900 :                     break;
    3736             : 
    3737       10878 :                 case PGSTAT_MTYPE_DISCONNECT:
    3738       10878 :                     pgstat_recv_disconnect(&msg.msg_disconnect, len);
    3739       10878 :                     break;
    3740             : 
    3741          34 :                 case PGSTAT_MTYPE_SUBSCRIPTIONPURGE:
    3742          34 :                     pgstat_recv_subscription_purge(&msg.msg_subscriptionpurge, len);
    3743          34 :                     break;
    3744             : 
    3745          14 :                 case PGSTAT_MTYPE_SUBWORKERERROR:
    3746          14 :                     pgstat_recv_subworker_error(&msg.msg_subworkererror, len);
    3747          14 :                     break;
    3748             : 
    3749           0 :                 default:
    3750           0 :                     break;
    3751             :             }
    3752             :         }                       /* end of inner message-processing loop */
    3753             : 
    3754             :         /* Sleep until there's something to do */
    3755             : #ifndef WIN32
    3756      118334 :         wr = WaitEventSetWait(wes, -1L, &event, 1, WAIT_EVENT_PGSTAT_MAIN);
    3757             : #else
    3758             : 
    3759             :         /*
    3760             :          * Windows, at least in its Windows Server 2003 R2 incarnation,
    3761             :          * sometimes loses FD_READ events.  Waking up and retrying the recv()
    3762             :          * fixes that, so don't sleep indefinitely.  This is a crock of the
    3763             :          * first water, but until somebody wants to debug exactly what's
    3764             :          * happening there, this is the best we can do.  The two-second
    3765             :          * timeout matches our pre-9.2 behavior, and needs to be short enough
    3766             :          * to not provoke "using stale statistics" complaints from
    3767             :          * backend_read_statsfile.
    3768             :          */
    3769             :         wr = WaitEventSetWait(wes, 2 * 1000L /* msec */ , &event, 1,
    3770             :                               WAIT_EVENT_PGSTAT_MAIN);
    3771             : #endif
    3772             : 
    3773             :         /*
    3774             :          * Emergency bailout if postmaster has died.  This is to avoid the
    3775             :          * necessity for manual cleanup of all postmaster children.
    3776             :          */
    3777      118334 :         if (wr == 1 && event.events == WL_POSTMASTER_DEATH)
    3778           6 :             break;
    3779             :     }                           /* end of outer loop */
    3780             : 
    3781             :     /*
    3782             :      * Save the final stats to reuse at next startup.
    3783             :      */
    3784         944 :     pgstat_write_statsfiles(true, true);
    3785             : 
    3786         944 :     FreeWaitEventSet(wes);
    3787             : 
    3788         944 :     exit(0);
    3789             : }
    3790             : 
    3791             : /*
    3792             :  * Subroutine to clear stats in a database entry
    3793             :  *
    3794             :  * Tables, functions, and subscription workers hashes are initialized
    3795             :  * to empty.
    3796             :  */
    3797             : static void
    3798        1732 : reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
    3799             : {
    3800             :     HASHCTL     hash_ctl;
    3801             : 
    3802        1732 :     dbentry->n_xact_commit = 0;
    3803        1732 :     dbentry->n_xact_rollback = 0;
    3804        1732 :     dbentry->n_blocks_fetched = 0;
    3805        1732 :     dbentry->n_blocks_hit = 0;
    3806        1732 :     dbentry->n_tuples_returned = 0;
    3807        1732 :     dbentry->n_tuples_fetched = 0;
    3808        1732 :     dbentry->n_tuples_inserted = 0;
    3809        1732 :     dbentry->n_tuples_updated = 0;
    3810        1732 :     dbentry->n_tuples_deleted = 0;
    3811        1732 :     dbentry->last_autovac_time = 0;
    3812        1732 :     dbentry->n_conflict_tablespace = 0;
    3813        1732 :     dbentry->n_conflict_lock = 0;
    3814        1732 :     dbentry->n_conflict_snapshot = 0;
    3815        1732 :     dbentry->n_conflict_bufferpin = 0;
    3816        1732 :     dbentry->n_conflict_startup_deadlock = 0;
    3817        1732 :     dbentry->n_temp_files = 0;
    3818        1732 :     dbentry->n_temp_bytes = 0;
    3819        1732 :     dbentry->n_deadlocks = 0;
    3820        1732 :     dbentry->n_checksum_failures = 0;
    3821        1732 :     dbentry->last_checksum_failure = 0;
    3822        1732 :     dbentry->n_block_read_time = 0;
    3823        1732 :     dbentry->n_block_write_time = 0;
    3824        1732 :     dbentry->n_sessions = 0;
    3825        1732 :     dbentry->total_session_time = 0;
    3826        1732 :     dbentry->total_active_time = 0;
    3827        1732 :     dbentry->total_idle_in_xact_time = 0;
    3828        1732 :     dbentry->n_sessions_abandoned = 0;
    3829        1732 :     dbentry->n_sessions_fatal = 0;
    3830        1732 :     dbentry->n_sessions_killed = 0;
    3831             : 
    3832        1732 :     dbentry->stat_reset_timestamp = GetCurrentTimestamp();
    3833        1732 :     dbentry->stats_timestamp = 0;
    3834             : 
    3835        1732 :     hash_ctl.keysize = sizeof(Oid);
    3836        1732 :     hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
    3837        1732 :     dbentry->tables = hash_create("Per-database table",
    3838             :                                   PGSTAT_TAB_HASH_SIZE,
    3839             :                                   &hash_ctl,
    3840             :                                   HASH_ELEM | HASH_BLOBS);
    3841             : 
    3842        1732 :     hash_ctl.keysize = sizeof(Oid);
    3843        1732 :     hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
    3844        1732 :     dbentry->functions = hash_create("Per-database function",
    3845             :                                      PGSTAT_FUNCTION_HASH_SIZE,
    3846             :                                      &hash_ctl,
    3847             :                                      HASH_ELEM | HASH_BLOBS);
    3848             : 
    3849        1732 :     hash_ctl.keysize = sizeof(PgStat_StatSubWorkerKey);
    3850        1732 :     hash_ctl.entrysize = sizeof(PgStat_StatSubWorkerEntry);
    3851        1732 :     dbentry->subworkers = hash_create("Per-database subscription worker",
    3852             :                                       PGSTAT_SUBWORKER_HASH_SIZE,
    3853             :                                       &hash_ctl,
    3854             :                                       HASH_ELEM | HASH_BLOBS);
    3855        1732 : }
    3856             : 
    3857             : /*
    3858             :  * Lookup the hash table entry for the specified database. If no hash
    3859             :  * table entry exists, initialize it, if the create parameter is true.
    3860             :  * Else, return NULL.
    3861             :  */
    3862             : static PgStat_StatDBEntry *
    3863      158254 : pgstat_get_db_entry(Oid databaseid, bool create)
    3864             : {
    3865             :     PgStat_StatDBEntry *result;
    3866             :     bool        found;
    3867      158254 :     HASHACTION  action = (create ? HASH_ENTER : HASH_FIND);
    3868             : 
    3869             :     /* Lookup or create the hash table entry for this database */
    3870      158254 :     result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
    3871             :                                                 &databaseid,
    3872             :                                                 action, &found);
    3873             : 
    3874      158254 :     if (!create && !found)
    3875         626 :         return NULL;
    3876             : 
    3877             :     /*
    3878             :      * If not found, initialize the new one.  This creates empty hash tables
    3879             :      * for tables, functions, and subscription workers, too.
    3880             :      */
    3881      157628 :     if (!found)
    3882        1732 :         reset_dbentry_counters(result);
    3883             : 
    3884      157628 :     return result;
    3885             : }
    3886             : 
    3887             : 
    3888             : /*
    3889             :  * Lookup the hash table entry for the specified table. If no hash
    3890             :  * table entry exists, initialize it, if the create parameter is true.
    3891             :  * Else, return NULL.
    3892             :  */
    3893             : static PgStat_StatTabEntry *
    3894       11204 : pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
    3895             : {
    3896             :     PgStat_StatTabEntry *result;
    3897             :     bool        found;
    3898       11204 :     HASHACTION  action = (create ? HASH_ENTER : HASH_FIND);
    3899             : 
    3900             :     /* Lookup or create the hash table entry for this table */
    3901       11204 :     result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
    3902             :                                                  &tableoid,
    3903             :                                                  action, &found);
    3904             : 
    3905       11204 :     if (!create && !found)
    3906           0 :         return NULL;
    3907             : 
    3908             :     /* If not found, initialize the new one. */
    3909       11204 :     if (!found)
    3910             :     {
    3911        3938 :         result->numscans = 0;
    3912        3938 :         result->tuples_returned = 0;
    3913        3938 :         result->tuples_fetched = 0;
    3914        3938 :         result->tuples_inserted = 0;
    3915        3938 :         result->tuples_updated = 0;
    3916        3938 :         result->tuples_deleted = 0;
    3917        3938 :         result->tuples_hot_updated = 0;
    3918        3938 :         result->n_live_tuples = 0;
    3919        3938 :         result->n_dead_tuples = 0;
    3920        3938 :         result->changes_since_analyze = 0;
    3921        3938 :         result->inserts_since_vacuum = 0;
    3922        3938 :         result->blocks_fetched = 0;
    3923        3938 :         result->blocks_hit = 0;
    3924        3938 :         result->vacuum_timestamp = 0;
    3925        3938 :         result->vacuum_count = 0;
    3926        3938 :         result->autovac_vacuum_timestamp = 0;
    3927        3938 :         result->autovac_vacuum_count = 0;
    3928        3938 :         result->analyze_timestamp = 0;
    3929        3938 :         result->analyze_count = 0;
    3930        3938 :         result->autovac_analyze_timestamp = 0;
    3931        3938 :         result->autovac_analyze_count = 0;
    3932             :     }
    3933             : 
    3934       11204 :     return result;
    3935             : }
    3936             : 
    3937             : /* ----------
    3938             :  * pgstat_get_subworker_entry
    3939             :  *
    3940             :  * Return subscription worker entry with the given subscription OID and
    3941             :  * relation OID.  If subrelid is InvalidOid, it returns an entry of the
    3942             :  * apply worker otherwise returns an entry of the table sync worker
    3943             :  * associated with subrelid.  If no subscription worker entry exists,
    3944             :  * initialize it, if the create parameter is true.  Else, return NULL.
    3945             :  * ----------
    3946             :  */
    3947             : static PgStat_StatSubWorkerEntry *
    3948          26 : pgstat_get_subworker_entry(PgStat_StatDBEntry *dbentry, Oid subid, Oid subrelid,
    3949             :                            bool create)
    3950             : {
    3951             :     PgStat_StatSubWorkerEntry *subwentry;
    3952             :     PgStat_StatSubWorkerKey key;
    3953             :     bool        found;
    3954          26 :     HASHACTION  action = (create ? HASH_ENTER : HASH_FIND);
    3955             : 
    3956          26 :     key.subid = subid;
    3957          26 :     key.subrelid = subrelid;
    3958          26 :     subwentry = (PgStat_StatSubWorkerEntry *) hash_search(dbentry->subworkers,
    3959             :                                                           (void *) &key,
    3960             :                                                           action, &found);
    3961             : 
    3962          26 :     if (!create && !found)
    3963           4 :         return NULL;
    3964             : 
    3965             :     /* If not found, initialize the new one */
    3966          22 :     if (!found)
    3967             :     {
    3968          12 :         subwentry->last_error_relid = InvalidOid;
    3969          12 :         subwentry->last_error_command = 0;
    3970          12 :         subwentry->last_error_xid = InvalidTransactionId;
    3971          12 :         subwentry->last_error_count = 0;
    3972          12 :         subwentry->last_error_time = 0;
    3973          12 :         subwentry->last_error_message[0] = '\0';
    3974             :     }
    3975             : 
    3976          22 :     return subwentry;
    3977             : }
    3978             : 
    3979             : /* ----------
    3980             :  * pgstat_write_statsfiles() -
    3981             :  *      Write the global statistics file, as well as requested DB files.
    3982             :  *
    3983             :  *  'permanent' specifies writing to the permanent files not temporary ones.
    3984             :  *  When true (happens only when the collector is shutting down), also remove
    3985             :  *  the temporary files so that backends starting up under a new postmaster
    3986             :  *  can't read old data before the new collector is ready.
    3987             :  *
    3988             :  *  When 'allDbs' is false, only the requested databases (listed in
    3989             :  *  pending_write_requests) will be written; otherwise, all databases
    3990             :  *  will be written.
    3991             :  * ----------
    3992             :  */
    3993             : static void
    3994        2408 : pgstat_write_statsfiles(bool permanent, bool allDbs)
    3995             : {
    3996             :     HASH_SEQ_STATUS hstat;
    3997             :     PgStat_StatDBEntry *dbentry;
    3998             :     FILE       *fpout;
    3999             :     int32       format_id;
    4000        2408 :     const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
    4001        2408 :     const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
    4002             :     int         rc;
    4003             : 
    4004        2408 :     elog(DEBUG2, "writing stats file \"%s\"", statfile);
    4005             : 
    4006             :     /*
    4007             :      * Open the statistics temp file to write out the current values.
    4008             :      */
    4009        2408 :     fpout = AllocateFile(tmpfile, PG_BINARY_W);
    4010        2408 :     if (fpout == NULL)
    4011             :     {
    4012           0 :         ereport(LOG,
    4013             :                 (errcode_for_file_access(),
    4014             :                  errmsg("could not open temporary statistics file \"%s\": %m",
    4015             :                         tmpfile)));
    4016           0 :         return;
    4017             :     }
    4018             : 
    4019             :     /*
    4020             :      * Set the timestamp of the stats file.
    4021             :      */
    4022        2408 :     globalStats.stats_timestamp = GetCurrentTimestamp();
    4023             : 
    4024             :     /*
    4025             :      * Write the file header --- currently just a format ID.
    4026             :      */
    4027        2408 :     format_id = PGSTAT_FILE_FORMAT_ID;
    4028        2408 :     rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
    4029             :     (void) rc;                  /* we'll check for error with ferror */
    4030             : 
    4031             :     /*
    4032             :      * Write global stats struct
    4033             :      */
    4034        2408 :     rc = fwrite(&globalStats, sizeof(globalStats), 1, fpout);
    4035             :     (void) rc;                  /* we'll check for error with ferror */
    4036             : 
    4037             :     /*
    4038             :      * Write archiver stats struct
    4039             :      */
    4040        2408 :     rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout);
    4041             :     (void) rc;                  /* we'll check for error with ferror */
    4042             : 
    4043             :     /*
    4044             :      * Write WAL stats struct
    4045             :      */
    4046        2408 :     rc = fwrite(&walStats, sizeof(walStats), 1, fpout);
    4047             :     (void) rc;                  /* we'll check for error with ferror */
    4048             : 
    4049             :     /*
    4050             :      * Write SLRU stats struct
    4051             :      */
    4052        2408 :     rc = fwrite(slruStats, sizeof(slruStats), 1, fpout);
    4053             :     (void) rc;                  /* we'll check for error with ferror */
    4054             : 
    4055             :     /*
    4056             :      * Walk through the database table.
    4057             :      */
    4058        2408 :     hash_seq_init(&hstat, pgStatDBHash);
    4059        6842 :     while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
    4060             :     {
    4061             :         /*
    4062             :          * Write out the table, function, and subscription-worker stats for
    4063             :          * this DB into the appropriate per-DB stat file, if required.
    4064             :          */
    4065        4434 :         if (allDbs || pgstat_db_requested(dbentry->databaseid))
    4066             :         {
    4067             :             /* Make DB's timestamp consistent with the global stats */
    4068        3554 :             dbentry->stats_timestamp = globalStats.stats_timestamp;
    4069             : 
    4070        3554 :             pgstat_write_db_statsfile(dbentry, permanent);
    4071             :         }
    4072             : 
    4073             :         /*
    4074             :          * Write out the DB entry. We don't write the tables or functions
    4075             :          * pointers, since they're of no use to any other process.
    4076             :          */
    4077        4434 :         fputc('D', fpout);
    4078        4434 :         rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
    4079             :         (void) rc;              /* we'll check for error with ferror */
    4080             :     }
    4081             : 
    4082             :     /*
    4083             :      * Write replication slot stats struct
    4084             :      */
    4085        2408 :     if (replSlotStatHash)
    4086             :     {
    4087             :         PgStat_StatReplSlotEntry *slotent;
    4088             : 
    4089         128 :         hash_seq_init(&hstat, replSlotStatHash);
    4090         280 :         while ((slotent = (PgStat_StatReplSlotEntry *) hash_seq_search(&hstat)) != NULL)
    4091             :         {
    4092         152 :             fputc('R', fpout);
    4093         152 :             rc = fwrite(slotent, sizeof(PgStat_StatReplSlotEntry), 1, fpout);
    4094             :             (void) rc;          /* we'll check for error with ferror */
    4095             :         }
    4096             :     }
    4097             : 
    4098             :     /*
    4099             :      * No more output to be done. Close the temp file and replace the old
    4100             :      * pgstat.stat with it.  The ferror() check replaces testing for error
    4101             :      * after each individual fputc or fwrite above.
    4102             :      */
    4103        2408 :     fputc('E', fpout);
    4104             : 
    4105        2408 :     if (ferror(fpout))
    4106             :     {
    4107           0 :         ereport(LOG,
    4108             :                 (errcode_for_file_access(),
    4109             :                  errmsg("could not write temporary statistics file \"%s\": %m",
    4110             :                         tmpfile)));
    4111           0 :         FreeFile(fpout);
    4112           0 :         unlink(tmpfile);
    4113             :     }
    4114        2408 :     else if (FreeFile(fpout) < 0)
    4115             :     {
    4116           0 :         ereport(LOG,
    4117             :                 (errcode_for_file_access(),
    4118             :                  errmsg("could not close temporary statistics file \"%s\": %m",
    4119             :                         tmpfile)));
    4120           0 :         unlink(tmpfile);
    4121             :     }
    4122        2408 :     else if (rename(tmpfile, statfile) < 0)
    4123             :     {
    4124           0 :         ereport(LOG,
    4125             :                 (errcode_for_file_access(),
    4126             :                  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
    4127             :                         tmpfile, statfile)));
    4128           0 :         unlink(tmpfile);
    4129             :     }
    4130             : 
    4131        2408 :     if (permanent)
    4132         944 :         unlink(pgstat_stat_filename);
    4133             : 
    4134             :     /*
    4135             :      * Now throw away the list of requests.  Note that requests sent after we
    4136             :      * started the write are still waiting on the network socket.
    4137             :      */
    4138        2408 :     list_free(pending_write_requests);
    4139        2408 :     pending_write_requests = NIL;
    4140             : }
    4141             : 
    4142             : /*
    4143             :  * return the filename for a DB stat file; filename is the output buffer,
    4144             :  * of length len.
    4145             :  */
    4146             : static void
    4147       16188 : get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
    4148             :                     char *filename, int len)
    4149             : {
    4150             :     int         printed;
    4151             : 
    4152             :     /* NB -- pgstat_reset_remove_files knows about the pattern this uses */
    4153       16188 :     printed = snprintf(filename, len, "%s/db_%u.%s",
    4154             :                        permanent ? PGSTAT_STAT_PERMANENT_DIRECTORY :
    4155             :                        pgstat_stat_directory,
    4156             :                        databaseid,
    4157             :                        tempname ? "tmp" : "stat");
    4158       16188 :     if (printed >= len)
    4159           0 :         elog(ERROR, "overlength pgstat path");
    4160       16188 : }
    4161             : 
    4162             : /* ----------
    4163             :  * pgstat_write_db_statsfile() -
    4164             :  *      Write the stat file for a single database.
    4165             :  *
    4166             :  *  If writing to the permanent file (happens when the collector is
    4167             :  *  shutting down only), remove the temporary file so that backends
    4168             :  *  starting up under a new postmaster can't read the old data before
    4169             :  *  the new collector is ready.
    4170             :  * ----------
    4171             :  */
    4172             : static void
    4173        3554 : pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent)
    4174             : {
    4175             :     HASH_SEQ_STATUS tstat;
    4176             :     HASH_SEQ_STATUS fstat;
    4177             :     HASH_SEQ_STATUS sstat;
    4178             :     PgStat_StatTabEntry *tabentry;
    4179             :     PgStat_StatFuncEntry *funcentry;
    4180             :     PgStat_StatSubWorkerEntry *subwentry;
    4181             :     FILE       *fpout;
    4182             :     int32       format_id;
    4183        3554 :     Oid         dbid = dbentry->databaseid;
    4184             :     int         rc;
    4185             :     char        tmpfile[MAXPGPATH];
    4186             :     char        statfile[MAXPGPATH];
    4187             : 
    4188        3554 :     get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
    4189        3554 :     get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
    4190             : 
    4191        3554 :     elog(DEBUG2, "writing stats file \"%s\"", statfile);
    4192             : 
    4193             :     /*
    4194             :      * Open the statistics temp file to write out the current values.
    4195             :      */
    4196        3554 :     fpout = AllocateFile(tmpfile, PG_BINARY_W);
    4197        3554 :     if (fpout == NULL)
    4198             :     {
    4199           0 :         ereport(LOG,
    4200             :                 (errcode_for_file_access(),
    4201             :                  errmsg("could not open temporary statistics file \"%s\": %m",
    4202             :                         tmpfile)));
    4203           0 :         return;
    4204             :     }
    4205             : 
    4206             :     /*
    4207             :      * Write the file header --- currently just a format ID.
    4208             :      */
    4209        3554 :     format_id = PGSTAT_FILE_FORMAT_ID;
    4210        3554 :     rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
    4211             :     (void) rc;                  /* we'll check for error with ferror */
    4212             : 
    4213             :     /*
    4214             :      * Walk through the database's access stats per table.
    4215             :      */
    4216        3554 :     hash_seq_init(&tstat, dbentry->tables);
    4217      382944 :     while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
    4218             :     {
    4219      379390 :         fputc('T', fpout);
    4220      379390 :         rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
    4221             :         (void) rc;              /* we'll check for error with ferror */
    4222             :     }
    4223             : 
    4224             :     /*
    4225             :      * Walk through the database's function stats table.
    4226             :      */
    4227        3554 :     hash_seq_init(&fstat, dbentry->functions);
    4228        3554 :     while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
    4229             :     {
    4230           0 :         fputc('F', fpout);
    4231           0 :         rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
    4232             :         (void) rc;              /* we'll check for error with ferror */
    4233             :     }
    4234             : 
    4235             :     /*
    4236             :      * Walk through the database's subscription worker stats table.
    4237             :      */
    4238        3554 :     hash_seq_init(&sstat, dbentry->subworkers);
    4239        3562 :     while ((subwentry = (PgStat_StatSubWorkerEntry *) hash_seq_search(&sstat)) != NULL)
    4240             :     {
    4241           8 :         fputc('S', fpout);
    4242           8 :         rc = fwrite(subwentry, sizeof(PgStat_StatSubWorkerEntry), 1, fpout);
    4243             :         (void) rc;              /* we'll check for error with ferror */
    4244             :     }
    4245             : 
    4246             :     /*
    4247             :      * No more output to be done. Close the temp file and replace the old
    4248             :      * pgstat.stat with it.  The ferror() check replaces testing for error
    4249             :      * after each individual fputc or fwrite above.
    4250             :      */
    4251        3554 :     fputc('E', fpout);
    4252             : 
    4253        3554 :     if (ferror(fpout))
    4254             :     {
    4255           0 :         ereport(LOG,
    4256             :                 (errcode_for_file_access(),
    4257             :                  errmsg("could not write temporary statistics file \"%s\": %m",
    4258             :                         tmpfile)));
    4259           0 :         FreeFile(fpout);
    4260           0 :         unlink(tmpfile);
    4261             :     }
    4262        3554 :     else if (FreeFile(fpout) < 0)
    4263             :     {
    4264           0 :         ereport(LOG,
    4265             :                 (errcode_for_file_access(),
    4266             :                  errmsg("could not close temporary statistics file \"%s\": %m",
    4267             :                         tmpfile)));
    4268           0 :         unlink(tmpfile);
    4269             :     }
    4270        3554 :     else if (rename(tmpfile, statfile) < 0)
    4271             :     {
    4272           0 :         ereport(LOG,
    4273             :                 (errcode_for_file_access(),
    4274             :                  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
    4275             :                         tmpfile, statfile)));
    4276           0 :         unlink(tmpfile);
    4277             :     }
    4278             : 
    4279        3554 :     if (permanent)
    4280             :     {
    4281        2108 :         get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
    4282             : 
    4283        2108 :         elog(DEBUG2, "removing temporary stats file \"%s\"", statfile);
    4284        2108 :         unlink(statfile);
    4285             :     }
    4286             : }
    4287             : 
    4288             : /* ----------
    4289             :  * pgstat_read_statsfiles() -
    4290             :  *
    4291             :  *  Reads in some existing statistics collector files and returns the
    4292             :  *  databases hash table that is the top level of the data.
    4293             :  *
    4294             :  *  If 'onlydb' is not InvalidOid, it means we only want data for that DB
    4295             :  *  plus the shared catalogs ("DB 0").  We'll still populate the DB hash
    4296             :  *  table for all databases, but we don't bother even creating table/function
    4297             :  *  hash tables for other databases.
    4298             :  *
    4299             :  *  'permanent' specifies reading from the permanent files not temporary ones.
    4300             :  *  When true (happens only when the collector is starting up), remove the
    4301             :  *  files after reading; the in-memory status is now authoritative, and the
    4302             :  *  files would be out of date in case somebody else reads them.
    4303             :  *
    4304             :  *  If a 'deep' read is requested, table/function/subscription-worker stats are
    4305             :  *  read, otherwise the table/function/subscription-worker hash tables remain
    4306             :  *  empty.
    4307             :  * ----------
    4308             :  */
    4309             : static HTAB *
    4310        5794 : pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
    4311             : {
    4312             :     PgStat_StatDBEntry *dbentry;
    4313             :     PgStat_StatDBEntry dbbuf;
    4314             :     HASHCTL     hash_ctl;
    4315             :     HTAB       *dbhash;
    4316             :     FILE       *fpin;
    4317             :     int32       format_id;
    4318             :     bool        found;
    4319        5794 :     const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
    4320             :     int         i;
    4321             :     TimestampTz ts;
    4322             : 
    4323             :     /*
    4324             :      * The tables will live in pgStatLocalContext.
    4325             :      */
    4326        5794 :     pgstat_setup_memcxt();
    4327             : 
    4328             :     /*
    4329             :      * Create the DB hashtable
    4330             :      */
    4331        5794 :     hash_ctl.keysize = sizeof(Oid);
    4332        5794 :     hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
    4333        5794 :     hash_ctl.hcxt = pgStatLocalContext;
    4334        5794 :     dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
    4335             :                          HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    4336             : 
    4337             :     /*
    4338             :      * Clear out global, archiver, WAL and SLRU statistics so they start from
    4339             :      * zero in case we can't load an existing statsfile.
    4340             :      */
    4341        5794 :     memset(&globalStats, 0, sizeof(globalStats));
    4342        5794 :     memset(&archiverStats, 0, sizeof(archiverStats));
    4343        5794 :     memset(&walStats, 0, sizeof(walStats));
    4344        5794 :     memset(&slruStats, 0, sizeof(slruStats));
    4345             : 
    4346             :     /*
    4347             :      * Set the current timestamp (will be kept only in case we can't load an
    4348             :      * existing statsfile).
    4349             :      */
    4350        5794 :     ts = GetCurrentTimestamp();
    4351        5794 :     globalStats.bgwriter.stat_reset_timestamp = ts;
    4352        5794 :     archiverStats.stat_reset_timestamp = ts;
    4353        5794 :     walStats.stat_reset_timestamp = ts;
    4354             : 
    4355             :     /*
    4356             :      * Set the same reset timestamp for all SLRU items too.
    4357             :      */
    4358       52146 :     for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
    4359       46352 :         slruStats[i].stat_reset_timestamp = ts;
    4360             : 
    4361             :     /*
    4362             :      * Try to open the stats file. If it doesn't exist, the backends simply
    4363             :      * return zero for anything and the collector simply starts from scratch
    4364             :      * with empty counters.
    4365             :      *
    4366             :      * ENOENT is a possibility if the stats collector is not running or has
    4367             :      * not yet written the stats file the first time.  Any other failure
    4368             :      * condition is suspicious.
    4369             :      */
    4370        5794 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    4371             :     {
    4372         784 :         if (errno != ENOENT)
    4373           0 :             ereport(pgStatRunningInCollector ? LOG : WARNING,
    4374             :                     (errcode_for_file_access(),
    4375             :                      errmsg("could not open statistics file \"%s\": %m",
    4376             :                             statfile)));
    4377         784 :         return dbhash;
    4378             :     }
    4379             : 
    4380             :     /*
    4381             :      * Verify it's of the expected format.
    4382             :      */
    4383        5010 :     if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
    4384        5010 :         format_id != PGSTAT_FILE_FORMAT_ID)
    4385             :     {
    4386           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4387             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4388           0 :         goto done;
    4389             :     }
    4390             : 
    4391             :     /*
    4392             :      * Read global stats struct
    4393             :      */
    4394        5010 :     if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
    4395             :     {
    4396           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4397             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4398           0 :         memset(&globalStats, 0, sizeof(globalStats));
    4399           0 :         goto done;
    4400             :     }
    4401             : 
    4402             :     /*
    4403             :      * In the collector, disregard the timestamp we read from the permanent
    4404             :      * stats file; we should be willing to write a temp stats file immediately
    4405             :      * upon the first request from any backend.  This only matters if the old
    4406             :      * file's timestamp is less than PGSTAT_STAT_INTERVAL ago, but that's not
    4407             :      * an unusual scenario.
    4408             :      */
    4409        5010 :     if (pgStatRunningInCollector)
    4410         160 :         globalStats.stats_timestamp = 0;
    4411             : 
    4412             :     /*
    4413             :      * Read archiver stats struct
    4414             :      */
    4415        5010 :     if (fread(&archiverStats, 1, sizeof(archiverStats), fpin) != sizeof(archiverStats))
    4416             :     {
    4417           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4418             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4419           0 :         memset(&archiverStats, 0, sizeof(archiverStats));
    4420           0 :         goto done;
    4421             :     }
    4422             : 
    4423             :     /*
    4424             :      * Read WAL stats struct
    4425             :      */
    4426        5010 :     if (fread(&walStats, 1, sizeof(walStats), fpin) != sizeof(walStats))
    4427             :     {
    4428           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4429             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4430           0 :         memset(&walStats, 0, sizeof(walStats));
    4431           0 :         goto done;
    4432             :     }
    4433             : 
    4434             :     /*
    4435             :      * Read SLRU stats struct
    4436             :      */
    4437        5010 :     if (fread(slruStats, 1, sizeof(slruStats), fpin) != sizeof(slruStats))
    4438             :     {
    4439           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4440             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4441           0 :         memset(&slruStats, 0, sizeof(slruStats));
    4442           0 :         goto done;
    4443             :     }
    4444             : 
    4445             :     /*
    4446             :      * We found an existing collector stats file. Read it and put all the
    4447             :      * hashtable entries into place.
    4448             :      */
    4449             :     for (;;)
    4450             :     {
    4451       14608 :         switch (fgetc(fpin))
    4452             :         {
    4453             :                 /*
    4454             :                  * 'D'  A PgStat_StatDBEntry struct describing a database
    4455             :                  * follows.
    4456             :                  */
    4457        9460 :             case 'D':
    4458        9460 :                 if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
    4459             :                           fpin) != offsetof(PgStat_StatDBEntry, tables))
    4460             :                 {
    4461           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4462             :                             (errmsg("corrupted statistics file \"%s\"",
    4463             :                                     statfile)));
    4464           0 :                     goto done;
    4465             :                 }
    4466             : 
    4467             :                 /*
    4468             :                  * Add to the DB hash
    4469             :                  */
    4470        9460 :                 dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
    4471             :                                                              (void *) &dbbuf.databaseid,
    4472             :                                                              HASH_ENTER,
    4473             :                                                              &found);
    4474        9460 :                 if (found)
    4475             :                 {
    4476           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4477             :                             (errmsg("corrupted statistics file \"%s\"",
    4478             :                                     statfile)));
    4479           0 :                     goto done;
    4480             :                 }
    4481             : 
    4482        9460 :                 memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
    4483        9460 :                 dbentry->tables = NULL;
    4484        9460 :                 dbentry->functions = NULL;
    4485        9460 :                 dbentry->subworkers = NULL;
    4486             : 
    4487             :                 /*
    4488             :                  * In the collector, disregard the timestamp we read from the
    4489             :                  * permanent stats file; we should be willing to write a temp
    4490             :                  * stats file immediately upon the first request from any
    4491             :                  * backend.
    4492             :                  */
    4493        9460 :                 if (pgStatRunningInCollector)
    4494         386 :                     dbentry->stats_timestamp = 0;
    4495             : 
    4496             :                 /*
    4497             :                  * Don't create tables/functions/subworkers hashtables for
    4498             :                  * uninteresting databases.
    4499             :                  */
    4500        9460 :                 if (onlydb != InvalidOid)
    4501             :                 {
    4502        8470 :                     if (dbbuf.databaseid != onlydb &&
    4503        5188 :                         dbbuf.databaseid != InvalidOid)
    4504        1894 :                         break;
    4505             :                 }
    4506             : 
    4507        7566 :                 hash_ctl.keysize = sizeof(Oid);
    4508        7566 :                 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
    4509        7566 :                 hash_ctl.hcxt = pgStatLocalContext;
    4510        7566 :                 dbentry->tables = hash_create("Per-database table",
    4511             :                                               PGSTAT_TAB_HASH_SIZE,
    4512             :                                               &hash_ctl,
    4513             :                                               HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    4514             : 
    4515        7566 :                 hash_ctl.keysize = sizeof(Oid);
    4516        7566 :                 hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
    4517        7566 :                 hash_ctl.hcxt = pgStatLocalContext;
    4518        7566 :                 dbentry->functions = hash_create("Per-database function",
    4519             :                                                  PGSTAT_FUNCTION_HASH_SIZE,
    4520             :                                                  &hash_ctl,
    4521             :                                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    4522             : 
    4523        7566 :                 hash_ctl.keysize = sizeof(PgStat_StatSubWorkerKey);
    4524        7566 :                 hash_ctl.entrysize = sizeof(PgStat_StatSubWorkerEntry);
    4525        7566 :                 hash_ctl.hcxt = pgStatLocalContext;
    4526        7566 :                 dbentry->subworkers = hash_create("Per-database subscription worker",
    4527             :                                                   PGSTAT_SUBWORKER_HASH_SIZE,
    4528             :                                                   &hash_ctl,
    4529             :                                                   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    4530             : 
    4531             :                 /*
    4532             :                  * If requested, read the data from the database-specific
    4533             :                  * file.  Otherwise we just leave the hashtables empty.
    4534             :                  */
    4535        7566 :                 if (deep)
    4536        6962 :                     pgstat_read_db_statsfile(dbentry->databaseid,
    4537             :                                              dbentry->tables,
    4538             :                                              dbentry->functions,
    4539             :                                              dbentry->subworkers,
    4540             :                                              permanent);
    4541             : 
    4542        7566 :                 break;
    4543             : 
    4544             :                 /*
    4545             :                  * 'R'  A PgStat_StatReplSlotEntry struct describing a
    4546             :                  * replication slot follows.
    4547             :                  */
    4548         138 :             case 'R':
    4549             :                 {
    4550             :                     PgStat_StatReplSlotEntry slotbuf;
    4551             :                     PgStat_StatReplSlotEntry *slotent;
    4552             : 
    4553         138 :                     if (fread(&slotbuf, 1, sizeof(PgStat_StatReplSlotEntry), fpin)
    4554             :                         != sizeof(PgStat_StatReplSlotEntry))
    4555             :                     {
    4556           0 :                         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4557             :                                 (errmsg("corrupted statistics file \"%s\"",
    4558             :                                         statfile)));
    4559           0 :                         goto done;
    4560             :                     }
    4561             : 
    4562             :                     /* Create hash table if we don't have it already. */
    4563         138 :                     if (replSlotStatHash == NULL)
    4564             :                     {
    4565             :                         HASHCTL     hash_ctl;
    4566             : 
    4567         104 :                         hash_ctl.keysize = sizeof(NameData);
    4568         104 :                         hash_ctl.entrysize = sizeof(PgStat_StatReplSlotEntry);
    4569         104 :                         hash_ctl.hcxt = pgStatLocalContext;
    4570         104 :                         replSlotStatHash = hash_create("Replication slots hash",
    4571             :                                                        PGSTAT_REPLSLOT_HASH_SIZE,
    4572             :                                                        &hash_ctl,
    4573             :                                                        HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    4574             :                     }
    4575             : 
    4576         138 :                     slotent = (PgStat_StatReplSlotEntry *) hash_search(replSlotStatHash,
    4577             :                                                                        (void *) &slotbuf.slotname,
    4578             :                                                                        HASH_ENTER, NULL);
    4579         138 :                     memcpy(slotent, &slotbuf, sizeof(PgStat_StatReplSlotEntry));
    4580         138 :                     break;
    4581             :                 }
    4582             : 
    4583        5010 :             case 'E':
    4584        5010 :                 goto done;
    4585             : 
    4586           0 :             default:
    4587           0 :                 ereport(pgStatRunningInCollector ? LOG : WARNING,
    4588             :                         (errmsg("corrupted statistics file \"%s\"",
    4589             :                                 statfile)));
    4590           0 :                 goto done;
    4591             :         }
    4592             :     }
    4593             : 
    4594        5010 : done:
    4595        5010 :     FreeFile(fpin);
    4596             : 
    4597             :     /* If requested to read the permanent file, also get rid of it. */
    4598        5010 :     if (permanent)
    4599             :     {
    4600         160 :         elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
    4601         160 :         unlink(statfile);
    4602             :     }
    4603             : 
    4604        5010 :     return dbhash;
    4605             : }
    4606             : 
    4607             : 
    4608             : /* ----------
    4609             :  * pgstat_read_db_statsfile() -
    4610             :  *
    4611             :  *  Reads in the existing statistics collector file for the given database,
    4612             :  *  filling the passed-in tables and functions hash tables.
    4613             :  *
    4614             :  *  As in pgstat_read_statsfiles, if the permanent file is requested, it is
    4615             :  *  removed after reading.
    4616             :  *
    4617             :  *  Note: this code has the ability to skip storing per-table, per-function, or
    4618             :  *  per-subscription-worker data, if NULL is passed for the corresponding hashtable.
    4619             :  *  That's not used at the moment though.
    4620             :  * ----------
    4621             :  */
    4622             : static void
    4623        6962 : pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
    4624             :                          HTAB *subworkerhash, bool permanent)
    4625             : {
    4626             :     PgStat_StatTabEntry *tabentry;
    4627             :     PgStat_StatTabEntry tabbuf;
    4628             :     PgStat_StatFuncEntry funcbuf;
    4629             :     PgStat_StatFuncEntry *funcentry;
    4630             :     PgStat_StatSubWorkerEntry subwbuf;
    4631             :     PgStat_StatSubWorkerEntry *subwentry;
    4632             :     FILE       *fpin;
    4633             :     int32       format_id;
    4634             :     bool        found;
    4635             :     char        statfile[MAXPGPATH];
    4636             : 
    4637        6962 :     get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
    4638             : 
    4639             :     /*
    4640             :      * Try to open the stats file. If it doesn't exist, the backends simply
    4641             :      * return zero for anything and the collector simply starts from scratch
    4642             :      * with empty counters.
    4643             :      *
    4644             :      * ENOENT is a possibility if the stats collector is not running or has
    4645             :      * not yet written the stats file the first time.  Any other failure
    4646             :      * condition is suspicious.
    4647             :      */
    4648        6962 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    4649             :     {
    4650           0 :         if (errno != ENOENT)
    4651           0 :             ereport(pgStatRunningInCollector ? LOG : WARNING,
    4652             :                     (errcode_for_file_access(),
    4653             :                      errmsg("could not open statistics file \"%s\": %m",
    4654             :                             statfile)));
    4655           0 :         return;
    4656             :     }
    4657             : 
    4658             :     /*
    4659             :      * Verify it's of the expected format.
    4660             :      */
    4661        6962 :     if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
    4662        6962 :         format_id != PGSTAT_FILE_FORMAT_ID)
    4663             :     {
    4664           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4665             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4666           0 :         goto done;
    4667             :     }
    4668             : 
    4669             :     /*
    4670             :      * We found an existing collector stats file. Read it and put all the
    4671             :      * hashtable entries into place.
    4672             :      */
    4673             :     for (;;)
    4674             :     {
    4675     1126568 :         switch (fgetc(fpin))
    4676             :         {
    4677             :                 /*
    4678             :                  * 'T'  A PgStat_StatTabEntry follows.
    4679             :                  */
    4680     1119598 :             case 'T':
    4681     1119598 :                 if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
    4682             :                           fpin) != sizeof(PgStat_StatTabEntry))
    4683             :                 {
    4684           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4685             :                             (errmsg("corrupted statistics file \"%s\"",
    4686             :                                     statfile)));
    4687           0 :                     goto done;
    4688             :                 }
    4689             : 
    4690             :                 /*
    4691             :                  * Skip if table data not wanted.
    4692             :                  */
    4693     1119598 :                 if (tabhash == NULL)
    4694           0 :                     break;
    4695             : 
    4696     1119598 :                 tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
    4697             :                                                                (void *) &tabbuf.tableid,
    4698             :                                                                HASH_ENTER, &found);
    4699             : 
    4700     1119598 :                 if (found)
    4701             :                 {
    4702           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4703             :                             (errmsg("corrupted statistics file \"%s\"",
    4704             :                                     statfile)));
    4705           0 :                     goto done;
    4706             :                 }
    4707             : 
    4708     1119598 :                 memcpy(tabentry, &tabbuf, sizeof(tabbuf));
    4709     1119598 :                 break;
    4710             : 
    4711             :                 /*
    4712             :                  * 'F'  A PgStat_StatFuncEntry follows.
    4713             :                  */
    4714           0 :             case 'F':
    4715           0 :                 if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
    4716             :                           fpin) != sizeof(PgStat_StatFuncEntry))
    4717             :                 {
    4718           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4719             :                             (errmsg("corrupted statistics file \"%s\"",
    4720             :                                     statfile)));
    4721           0 :                     goto done;
    4722             :                 }
    4723             : 
    4724             :                 /*
    4725             :                  * Skip if function data not wanted.
    4726             :                  */
    4727           0 :                 if (funchash == NULL)
    4728           0 :                     break;
    4729             : 
    4730           0 :                 funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
    4731             :                                                                  (void *) &funcbuf.functionid,
    4732             :                                                                  HASH_ENTER, &found);
    4733             : 
    4734           0 :                 if (found)
    4735             :                 {
    4736           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4737             :                             (errmsg("corrupted statistics file \"%s\"",
    4738             :                                     statfile)));
    4739           0 :                     goto done;
    4740             :                 }
    4741             : 
    4742           0 :                 memcpy(funcentry, &funcbuf, sizeof(funcbuf));
    4743           0 :                 break;
    4744             : 
    4745             :                 /*
    4746             :                  * 'S'  A PgStat_StatSubWorkerEntry struct describing
    4747             :                  * subscription worker statistics.
    4748             :                  */
    4749           8 :             case 'S':
    4750           8 :                 if (fread(&subwbuf, 1, sizeof(PgStat_StatSubWorkerEntry),
    4751             :                           fpin) != sizeof(PgStat_StatSubWorkerEntry))
    4752             :                 {
    4753           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4754             :                             (errmsg("corrupted statistics file \"%s\"",
    4755             :                                     statfile)));
    4756           0 :                     goto done;
    4757             :                 }
    4758             : 
    4759             :                 /*
    4760             :                  * Skip if subscription worker data not wanted.
    4761             :                  */
    4762           8 :                 if (subworkerhash == NULL)
    4763           0 :                     break;
    4764             : 
    4765           8 :                 subwentry = (PgStat_StatSubWorkerEntry *) hash_search(subworkerhash,
    4766             :                                                                       (void *) &subwbuf.key,
    4767             :                                                                       HASH_ENTER, &found);
    4768             : 
    4769           8 :                 if (found)
    4770             :                 {
    4771           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4772             :                             (errmsg("corrupted statistics file \"%s\"",
    4773             :                                     statfile)));
    4774           0 :                     goto done;
    4775             :                 }
    4776             : 
    4777           8 :                 memcpy(subwentry, &subwbuf, sizeof(subwbuf));
    4778           8 :                 break;
    4779             : 
    4780             :                 /*
    4781             :                  * 'E'  The EOF marker of a complete stats file.
    4782             :                  */
    4783        6962 :             case 'E':
    4784        6962 :                 goto done;
    4785             : 
    4786           0 :             default:
    4787           0 :                 ereport(pgStatRunningInCollector ? LOG : WARNING,
    4788             :                         (errmsg("corrupted statistics file \"%s\"",
    4789             :                                 statfile)));
    4790           0 :                 goto done;
    4791             :         }
    4792             :     }
    4793             : 
    4794        6962 : done:
    4795        6962 :     FreeFile(fpin);
    4796             : 
    4797        6962 :     if (permanent)
    4798             :     {
    4799         386 :         elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
    4800         386 :         unlink(statfile);
    4801             :     }
    4802             : }
    4803             : 
    4804             : /* ----------
    4805             :  * pgstat_read_db_statsfile_timestamp() -
    4806             :  *
    4807             :  *  Attempt to determine the timestamp of the last db statfile write.
    4808             :  *  Returns true if successful; the timestamp is stored in *ts. The caller must
    4809             :  *  rely on timestamp stored in *ts iff the function returns true.
    4810             :  *
    4811             :  *  This needs to be careful about handling databases for which no stats file
    4812             :  *  exists, such as databases without a stat entry or those not yet written:
    4813             :  *
    4814             :  *  - if there's a database entry in the global file, return the corresponding
    4815             :  *  stats_timestamp value.
    4816             :  *
    4817             :  *  - if there's no db stat entry (e.g. for a new or inactive database),
    4818             :  *  there's no stats_timestamp value, but also nothing to write so we return
    4819             :  *  the timestamp of the global statfile.
    4820             :  * ----------
    4821             :  */
    4822             : static bool
    4823        7026 : pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent,
    4824             :                                    TimestampTz *ts)
    4825             : {
    4826             :     PgStat_StatDBEntry dbentry;
    4827             :     PgStat_GlobalStats myGlobalStats;
    4828             :     PgStat_ArchiverStats myArchiverStats;
    4829             :     PgStat_WalStats myWalStats;
    4830             :     PgStat_SLRUStats mySLRUStats[SLRU_NUM_ELEMENTS];
    4831             :     PgStat_StatReplSlotEntry myReplSlotStats;
    4832             :     FILE       *fpin;
    4833             :     int32       format_id;
    4834        7026 :     const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
    4835             : 
    4836             :     /*
    4837             :      * Try to open the stats file.  As above, anything but ENOENT is worthy of
    4838             :      * complaining about.
    4839             :      */
    4840        7026 :     if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
    4841             :     {
    4842        1510 :         if (errno != ENOENT)
    4843           0 :             ereport(pgStatRunningInCollector ? LOG : WARNING,
    4844             :                     (errcode_for_file_access(),
    4845             :                      errmsg("could not open statistics file \"%s\": %m",
    4846             :                             statfile)));
    4847        1510 :         return false;
    4848             :     }
    4849             : 
    4850             :     /*
    4851             :      * Verify it's of the expected format.
    4852             :      */
    4853        5516 :     if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
    4854        5516 :         format_id != PGSTAT_FILE_FORMAT_ID)
    4855             :     {
    4856           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4857             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4858           0 :         FreeFile(fpin);
    4859           0 :         return false;
    4860             :     }
    4861             : 
    4862             :     /*
    4863             :      * Read global stats struct
    4864             :      */
    4865        5516 :     if (fread(&myGlobalStats, 1, sizeof(myGlobalStats),
    4866             :               fpin) != sizeof(myGlobalStats))
    4867             :     {
    4868           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4869             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4870           0 :         FreeFile(fpin);
    4871           0 :         return false;
    4872             :     }
    4873             : 
    4874             :     /*
    4875             :      * Read archiver stats struct
    4876             :      */
    4877        5516 :     if (fread(&myArchiverStats, 1, sizeof(myArchiverStats),
    4878             :               fpin) != sizeof(myArchiverStats))
    4879             :     {
    4880           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4881             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4882           0 :         FreeFile(fpin);
    4883           0 :         return false;
    4884             :     }
    4885             : 
    4886             :     /*
    4887             :      * Read WAL stats struct
    4888             :      */
    4889        5516 :     if (fread(&myWalStats, 1, sizeof(myWalStats), fpin) != sizeof(myWalStats))
    4890             :     {
    4891           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4892             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4893           0 :         FreeFile(fpin);
    4894           0 :         return false;
    4895             :     }
    4896             : 
    4897             :     /*
    4898             :      * Read SLRU stats struct
    4899             :      */
    4900        5516 :     if (fread(mySLRUStats, 1, sizeof(mySLRUStats), fpin) != sizeof(mySLRUStats))
    4901             :     {
    4902           0 :         ereport(pgStatRunningInCollector ? LOG : WARNING,
    4903             :                 (errmsg("corrupted statistics file \"%s\"", statfile)));
    4904           0 :         FreeFile(fpin);
    4905           0 :         return false;
    4906             :     }
    4907             : 
    4908             :     /* By default, we're going to return the timestamp of the global file. */
    4909        5516 :     *ts = myGlobalStats.stats_timestamp;
    4910             : 
    4911             :     /*
    4912             :      * We found an existing collector stats file.  Read it and look for a
    4913             :      * record for the requested database.  If found, use its timestamp.
    4914             :      */
    4915             :     for (;;)
    4916             :     {
    4917        6318 :         switch (fgetc(fpin))
    4918             :         {
    4919             :                 /*
    4920             :                  * 'D'  A PgStat_StatDBEntry struct describing a database
    4921             :                  * follows.
    4922             :                  */
    4923        4904 :             case 'D':
    4924        4904 :                 if (fread(&dbentry, 1, offsetof(PgStat_StatDBEntry, tables),
    4925             :                           fpin) != offsetof(PgStat_StatDBEntry, tables))
    4926             :                 {
    4927           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4928             :                             (errmsg("corrupted statistics file \"%s\"",
    4929             :                                     statfile)));
    4930           0 :                     FreeFile(fpin);
    4931           0 :                     return false;
    4932             :                 }
    4933             : 
    4934             :                 /*
    4935             :                  * If this is the DB we're looking for, save its timestamp and
    4936             :                  * we're done.
    4937             :                  */
    4938        4904 :                 if (dbentry.databaseid == databaseid)
    4939             :                 {
    4940        4102 :                     *ts = dbentry.stats_timestamp;
    4941        4102 :                     goto done;
    4942             :                 }
    4943             : 
    4944         802 :                 break;
    4945             : 
    4946             :                 /*
    4947             :                  * 'R'  A PgStat_StatReplSlotEntry struct describing a
    4948             :                  * replication slot follows.
    4949             :                  */
    4950           0 :             case 'R':
    4951           0 :                 if (fread(&myReplSlotStats, 1, sizeof(PgStat_StatReplSlotEntry), fpin)
    4952             :                     != sizeof(PgStat_StatReplSlotEntry))
    4953             :                 {
    4954           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4955             :                             (errmsg("corrupted statistics file \"%s\"",
    4956             :                                     statfile)));
    4957           0 :                     FreeFile(fpin);
    4958           0 :                     return false;
    4959             :                 }
    4960           0 :                 break;
    4961             : 
    4962        1414 :             case 'E':
    4963        1414 :                 goto done;
    4964             : 
    4965           0 :             default:
    4966             :                 {
    4967           0 :                     ereport(pgStatRunningInCollector ? LOG : WARNING,
    4968             :                             (errmsg("corrupted statistics file \"%s\"",
    4969             :                                     statfile)));
    4970           0 :                     FreeFile(fpin);
    4971           0 :                     return false;
    4972             :                 }
    4973             :         }
    4974             :     }
    4975             : 
    4976        5516 : done:
    4977        5516 :     FreeFile(fpin);
    4978        5516 :     return true;
    4979             : }
    4980             : 
    4981             : /*
    4982             :  * If not already done, read the statistics collector stats file into
    4983             :  * some hash tables.  The results will be kept until pgstat_clear_snapshot()
    4984             :  * is called (typically, at end of transaction).
    4985             :  */
    4986             : static void
    4987        7770 : backend_read_statsfile(void)
    4988             : {
    4989        7770 :     TimestampTz min_ts = 0;
    4990        7770 :     TimestampTz ref_ts = 0;
    4991             :     Oid         inquiry_db;
    4992             :     int         count;
    4993             : 
    4994        7770 :     pgstat_assert_is_up();
    4995             : 
    4996             :     /* already read it? */
    4997        7770 :     if (pgStatDBHash)
    4998        2918 :         return;
    4999             :     Assert(!pgStatRunningInCollector);
    5000             : 
    5001             :     /*
    5002             :      * In a normal backend, we check staleness of the data for our own DB, and
    5003             :      * so we send MyDatabaseId in inquiry messages.  In the autovac launcher,
    5004             :      * check staleness of the shared-catalog data, and send InvalidOid in
    5005             :      * inquiry messages so as not to force writing unnecessary data.
    5006             :      */
    5007        4852 :     if (IsAutoVacuumLauncherProcess())
    5008         996 :         inquiry_db = InvalidOid;
    5009             :     else
    5010        3856 :         inquiry_db = MyDatabaseId;
    5011             : 
    5012             :     /*
    5013             :      * Loop until fresh enough stats file is available or we ran out of time.
    5014             :      * The stats inquiry message is sent repeatedly in case collector drops
    5015             :      * it; but not every single time, as that just swamps the collector.
    5016             :      */
    5017        7028 :     for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
    5018             :     {
    5019             :         bool        ok;
    5020        7028 :         TimestampTz file_ts = 0;
    5021             :         TimestampTz cur_ts;
    5022             : 
    5023        7028 :         CHECK_FOR_INTERRUPTS();
    5024             : 
    5025        7026 :         ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);
    5026             : 
    5027        7026 :         cur_ts = GetCurrentTimestamp();
    5028             :         /* Calculate min acceptable timestamp, if we didn't already */
    5029        7026 :         if (count == 0 || cur_ts < ref_ts)
    5030             :         {
    5031             :             /*
    5032             :              * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
    5033             :              * msec before now.  This indirectly ensures that the collector
    5034             :              * needn't write the file more often than PGSTAT_STAT_INTERVAL. In
    5035             :              * an autovacuum worker, however, we want a lower delay to avoid
    5036             :              * using stale data, so we use PGSTAT_RETRY_DELAY (since the
    5037             :              * number of workers is low, this shouldn't be a problem).
    5038             :              *
    5039             :              * We don't recompute min_ts after sleeping, except in the
    5040             :              * unlikely case that cur_ts went backwards.  So we might end up
    5041             :              * accepting a file a bit older than PGSTAT_STAT_INTERVAL.  In
    5042             :              * practice that shouldn't happen, though, as long as the sleep
    5043             :              * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
    5044             :              * tell the collector that our cutoff time is less than what we'd
    5045             :              * actually accept.
    5046             :              */
    5047        4852 :             ref_ts = cur_ts;
    5048        4852 :             if (IsAutoVacuumWorkerProcess())
    5049         318 :                 min_ts = TimestampTzPlusMilliseconds(ref_ts,
    5050             :                                                      -PGSTAT_RETRY_DELAY);
    5051             :             else
    5052        4534 :                 min_ts = TimestampTzPlusMilliseconds(ref_ts,
    5053             :                                                      -PGSTAT_STAT_INTERVAL);
    5054             :         }
    5055             : 
    5056             :         /*
    5057             :          * If the file timestamp is actually newer than cur_ts, we must have
    5058             :          * had a clock glitch (system time went backwards) or there is clock
    5059             :          * skew between our processor and the stats collector's processor.
    5060             :          * Accept the file, but send an inquiry message anyway to make
    5061             :          * pgstat_recv_inquiry do a sanity check on the collector's time.
    5062             :          */
    5063        7026 :         if (ok && file_ts > cur_ts)
    5064             :         {
    5065             :             /*
    5066             :              * A small amount of clock skew between processors isn't terribly
    5067             :              * surprising, but a large difference is worth logging.  We
    5068             :              * arbitrarily define "large" as 1000 msec.
    5069             :              */
    5070           0 :             if (file_ts >= TimestampTzPlusMilliseconds(cur_ts, 1000))
    5071             :             {
    5072             :                 char       *filetime;
    5073             :                 char       *mytime;
    5074             : 
    5075             :                 /* Copy because timestamptz_to_str returns a static buffer */
    5076           0 :                 filetime = pstrdup(timestamptz_to_str(file_ts));
    5077           0 :                 mytime = pstrdup(timestamptz_to_str(cur_ts));
    5078           0 :                 ereport(LOG,
    5079             :                         (errmsg("statistics collector's time %s is later than backend local time %s",
    5080             :                                 filetime, mytime)));
    5081           0 :                 pfree(filetime);
    5082           0 :                 pfree(mytime);
    5083             :             }
    5084             : 
    5085           0 :             pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
    5086        4850 :             break;
    5087             :         }
    5088             : 
    5089             :         /* Normal acceptance case: file is not older than cutoff time */
    5090        7026 :         if (ok && file_ts >= min_ts)
    5091        4850 :             break;
    5092             : 
    5093             :         /* Not there or too old, so kick the collector and wait a bit */
    5094        2176 :         if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
    5095        1136 :             pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
    5096             : 
    5097        2176 :         pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
    5098             :     }
    5099             : 
    5100        4850 :     if (count >= PGSTAT_POLL_LOOP_COUNT)
    5101           0 :         ereport(LOG,
    5102             :                 (errmsg("using stale statistics instead of current ones "
    5103             :                         "because stats collector is not responding")));
    5104             : 
    5105             :     /*
    5106             :      * Autovacuum launcher wants stats about all databases, but a shallow read
    5107             :      * is sufficient.  Regular backends want a deep read for just the tables
    5108             :      * they can see (MyDatabaseId + shared catalogs).
    5109             :      */
    5110        4850 :     if (IsAutoVacuumLauncherProcess())
    5111         996 :         pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
    5112             :     else
    5113        3854 :         pgStatDBHash = pgstat_read_statsfiles(MyDatabaseId, false, true);
    5114             : }
    5115             : 
    5116             : 
    5117             : /* ----------
    5118             :  * pgstat_setup_memcxt() -
    5119             :  *
    5120             :  *  Create pgStatLocalContext, if not already done.
    5121             :  * ----------
    5122             :  */
    5123             : static void
    5124        5794 : pgstat_setup_memcxt(void)
    5125             : {
    5126        5794 :     if (!pgStatLocalContext)
    5127        5794 :         pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
    5128             :                                                    "Statistics snapshot",
    5129             :                                                    ALLOCSET_SMALL_SIZES);
    5130        5794 : }
    5131             : 
    5132             : /*
    5133             :  * Stats should only be reported after pgstat_initialize() and before
    5134             :  * pgstat_shutdown(). This check is put in a few central places to catch
    5135             :  * violations of this rule more easily.
    5136             :  */
    5137             : static void
    5138     6118212 : pgstat_assert_is_up(void)
    5139             : {
    5140             :     Assert(pgstat_is_initialized && !pgstat_is_shutdown);
    5141     6118212 : }
    5142             : 
    5143             : 
    5144             : /* ----------
    5145             :  * pgstat_clear_snapshot() -
    5146             :  *
    5147             :  *  Discard any data collected in the current transaction.  Any subsequent
    5148             :  *  request will cause new snapshots to be read.
    5149             :  *
    5150             :  *  This is also invoked during transaction commit or abort to discard
    5151             :  *  the no-longer-wanted snapshot.
    5152             :  * ----------
    5153             :  */
    5154             : void
    5155      733586 : pgstat_clear_snapshot(void)
    5156             : {
    5157      733586 :     pgstat_assert_is_up();
    5158             : 
    5159             :     /* Release memory, if any was allocated */
    5160      733586 :     if (pgStatLocalContext)
    5161        4384 :         MemoryContextDelete(pgStatLocalContext);
    5162             : 
    5163             :     /* Reset variables */
    5164      733586 :     pgStatLocalContext = NULL;
    5165      733586 :     pgStatDBHash = NULL;
    5166      733586 :     replSlotStatHash = NULL;
    5167             : 
    5168             :     /*
    5169             :      * Historically the backend_status.c facilities lived in this file, and
    5170             :      * were reset with the same function. For now keep it that way, and
    5171             :      * forward the reset request.
    5172             :      */
    5173      733586 :     pgstat_clear_backend_activity_snapshot();
    5174      733586 : }
    5175             : 
    5176             : 
    5177             : /* ----------
    5178             :  * pgstat_recv_inquiry() -
    5179             :  *
    5180             :  *  Process stat inquiry requests.
    5181             :  * ----------
    5182             :  */
    5183             : static void
    5184        1464 : pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
    5185             : {
    5186             :     PgStat_StatDBEntry *dbentry;
    5187             : 
    5188        1464 :     elog(DEBUG2, "received inquiry for database %u", msg->databaseid);
    5189             : 
    5190             :     /*
    5191             :      * If there's already a write request for this DB, there's nothing to do.
    5192             :      *
    5193             :      * Note that if a request is found, we return early and skip the below
    5194             :      * check for clock skew.  This is okay, since the only way for a DB
    5195             :      * request to be present in the list is that we have been here since the
    5196             :      * last write round.  It seems sufficient to check for clock skew once per
    5197             :      * write round.
    5198             :      */
    5199        1464 :     if (list_member_oid(pending_write_requests, msg->databaseid))
    5200           0 :         return;
    5201             : 
    5202             :     /*
    5203             :      * Check to see if we last wrote this database at a time >= the requested
    5204             :      * cutoff time.  If so, this is a stale request that was generated before
    5205             :      * we updated the DB file, and we don't need to do so again.
    5206             :      *
    5207             :      * If the requestor's local clock time is older than stats_timestamp, we
    5208             :      * should suspect a clock glitch, ie system time going backwards; though
    5209             :      * the more likely explanation is just delayed message receipt.  It is
    5210             :      * worth expending a GetCurrentTimestamp call to be sure, since a large
    5211             :      * retreat in the system clock reading could otherwise cause us to neglect
    5212             :      * to update the stats file for a long time.
    5213             :      */
    5214        1464 :     dbentry = pgstat_get_db_entry(msg->databaseid, false);
    5215        1464 :     if (dbentry == NULL)
    5216             :     {
    5217             :         /*
    5218             :          * We have no data for this DB.  Enter a write request anyway so that
    5219             :          * the global stats will get updated.  This is needed to prevent
    5220             :          * backend_read_statsfile from waiting for data that we cannot supply,
    5221             :          * in the case of a new DB that nobody has yet reported any stats for.
    5222             :          * See the behavior of pgstat_read_db_statsfile_timestamp.
    5223             :          */
    5224             :     }
    5225         846 :     else if (msg->clock_time < dbentry->stats_timestamp)
    5226             :     {
    5227           0 :         TimestampTz cur_ts = GetCurrentTimestamp();
    5228             : 
    5229           0 :         if (cur_ts < dbentry->stats_timestamp)
    5230             :         {
    5231             :             /*
    5232             :              * Sure enough, time went backwards.  Force a new stats file write
    5233             :              * to get back in sync; but first, log a complaint.
    5234             :              */
    5235             :             char       *writetime;
    5236             :             char       *mytime;
    5237             : 
    5238             :             /* Copy because timestamptz_to_str returns a static buffer */
    5239           0 :             writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
    5240           0 :             mytime = pstrdup(timestamptz_to_str(cur_ts));
    5241           0 :             ereport(LOG,
    5242             :                     (errmsg("stats_timestamp %s is later than collector's time %s for database %u",
    5243             :                             writetime, mytime, dbentry->databaseid)));
    5244           0 :             pfree(writetime);
    5245           0 :             pfree(mytime);
    5246             :         }
    5247             :         else
    5248             :         {
    5249             :             /*
    5250             :              * Nope, it's just an old request.  Assuming msg's clock_time is
    5251             :              * >= its cutoff_time, it must be stale, so we can ignore it.
    5252             :              */
    5253           0 :             return;
    5254             :         }
    5255             :     }
    5256         846 :     else if (msg->cutoff_time <= dbentry->stats_timestamp)
    5257             :     {
    5258             :         /* Stale request, ignore it */
    5259           0 :         return;
    5260             :     }
    5261             : 
    5262             :     /*
    5263             :      * We need to write this DB, so create a request.
    5264             :      */
    5265        1464 :     pending_write_requests = lappend_oid(pending_write_requests,
    5266             :                                          msg->databaseid);
    5267             : }
    5268             : 
    5269             : 
    5270             : /* ----------
    5271             :  * pgstat_recv_tabstat() -
    5272             :  *
    5273             :  *  Count what the backend has done.
    5274             :  * ----------
    5275             :  */
    5276             : static void
    5277      119180 : pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
    5278             : {
    5279             :     PgStat_StatDBEntry *dbentry;
    5280             :     PgStat_StatTabEntry *tabentry;
    5281             :     int         i;
    5282             :     bool        found;
    5283             : 
    5284      119180 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5285             : 
    5286             :     /*
    5287             :      * Update database-wide stats.
    5288             :      */
    5289      119180 :     dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
    5290      119180 :     dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
    5291      119180 :     dbentry->n_block_read_time += msg->m_block_read_time;
    5292      119180 :     dbentry->n_block_write_time += msg->m_block_write_time;
    5293             : 
    5294      119180 :     dbentry->total_session_time += msg->m_session_time;
    5295      119180 :     dbentry->total_active_time += msg->m_active_time;
    5296      119180 :     dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
    5297             : 
    5298             :     /*
    5299             :      * Process all table entries in the message.
    5300             :      */
    5301      879836 :     for (i = 0; i < msg->m_nentries; i++)
    5302             :     {
    5303      760656 :         PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
    5304             : 
    5305      760656 :         tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
    5306      760656 :                                                        (void *) &(tabmsg->t_id),
    5307             :                                                        HASH_ENTER, &found);
    5308             : 
    5309      760656 :         if (!found)
    5310             :         {
    5311             :             /*
    5312             :              * If it's a new table entry, initialize counters to the values we
    5313             :              * just got.
    5314             :              */
    5315       88528 :             tabentry->numscans = tabmsg->t_counts.t_numscans;
    5316       88528 :             tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
    5317       88528 :             tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
    5318       88528 :             tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
    5319       88528 :             tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
    5320       88528 :             tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
    5321       88528 :             tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
    5322       88528 :             tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
    5323       88528 :             tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
    5324       88528 :             tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
    5325       88528 :             tabentry->inserts_since_vacuum = tabmsg->t_counts.t_tuples_inserted;
    5326       88528 :             tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
    5327       88528 :             tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
    5328             : 
    5329       88528 :             tabentry->vacuum_timestamp = 0;
    5330       88528 :             tabentry->vacuum_count = 0;
    5331       88528 :             tabentry->autovac_vacuum_timestamp = 0;
    5332       88528 :             tabentry->autovac_vacuum_count = 0;
    5333       88528 :             tabentry->analyze_timestamp = 0;
    5334       88528 :             tabentry->analyze_count = 0;
    5335       88528 :             tabentry->autovac_analyze_timestamp = 0;
    5336       88528 :             tabentry->autovac_analyze_count = 0;
    5337             :         }
    5338             :         else
    5339             :         {
    5340             :             /*
    5341             :              * Otherwise add the values to the existing entry.
    5342             :              */
    5343      672128 :             tabentry->numscans += tabmsg->t_counts.t_numscans;
    5344      672128 :             tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
    5345      672128 :             tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
    5346      672128 :             tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
    5347      672128 :             tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
    5348      672128 :             tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
    5349      672128 :             tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
    5350             :             /*
    5351             :              * If table was truncated/dropped, first reset the live/dead
    5352             :              * counters.
    5353             :              */
    5354      672128 :             if (tabmsg->t_counts.t_truncdropped)
    5355             :             {
    5356         236 :                 tabentry->n_live_tuples = 0;
    5357         236 :                 tabentry->n_dead_tuples = 0;
    5358         236 :                 tabentry->inserts_since_vacuum = 0;
    5359             :             }
    5360      672128 :             tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
    5361      672128 :             tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
    5362      672128 :             tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
    5363      672128 :             tabentry->inserts_since_vacuum += tabmsg->t_counts.t_tuples_inserted;
    5364      672128 :             tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
    5365      672128 :             tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
    5366             :         }
    5367             : 
    5368             :         /* Clamp n_live_tuples in case of negative delta_live_tuples */
    5369      760656 :         tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
    5370             :         /* Likewise for n_dead_tuples */
    5371      760656 :         tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
    5372             : 
    5373             :         /*
    5374             :          * Add per-table stats to the per-database entry, too.
    5375             :          */
    5376      760656 :         dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
    5377      760656 :         dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
    5378      760656 :         dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
    5379      760656 :         dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
    5380      760656 :         dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
    5381      760656 :         dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
    5382      760656 :         dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
    5383             :     }
    5384      119180 : }
    5385             : 
    5386             : 
    5387             : /* ----------
    5388             :  * pgstat_recv_tabpurge() -
    5389             :  *
    5390             :  *  Arrange for dead table removal.
    5391             :  * ----------
    5392             :  */
    5393             : static void
    5394        1076 : pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
    5395             : {
    5396             :     PgStat_StatDBEntry *dbentry;
    5397             :     int         i;
    5398             : 
    5399        1076 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
    5400             : 
    5401             :     /*
    5402             :      * No need to purge if we don't even know the database.
    5403             :      */
    5404        1076 :     if (!dbentry || !dbentry->tables)
    5405           0 :         return;
    5406             : 
    5407             :     /*
    5408             :      * Process all table entries in the message.
    5409             :      */
    5410      106476 :     for (i = 0; i < msg->m_nentries; i++)
    5411             :     {
    5412             :         /* Remove from hashtable if present; we don't care if it's not. */
    5413      105400 :         (void) hash_search(dbentry->tables,
    5414      105400 :                            (void *) &(msg->m_tableid[i]),
    5415             :                            HASH_REMOVE, NULL);
    5416             :     }
    5417             : }
    5418             : 
    5419             : 
    5420             : /* ----------
    5421             :  * pgstat_recv_dropdb() -
    5422             :  *
    5423             :  *  Arrange for dead database removal
    5424             :  * ----------
    5425             :  */
    5426             : static void
    5427          18 : pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
    5428             : {
    5429          18 :     Oid         dbid = msg->m_databaseid;
    5430             :     PgStat_StatDBEntry *dbentry;
    5431             : 
    5432             :     /*
    5433             :      * Lookup the database in the hashtable.
    5434             :      */
    5435          18 :     dbentry = pgstat_get_db_entry(dbid, false);
    5436             : 
    5437             :     /*
    5438             :      * If found, remove it (along with the db statfile).
    5439             :      */
    5440          18 :     if (dbentry)
    5441             :     {
    5442             :         char        statfile[MAXPGPATH];
    5443             : 
    5444          10 :         get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
    5445             : 
    5446          10 :         elog(DEBUG2, "removing stats file \"%s\"", statfile);
    5447          10 :         unlink(statfile);
    5448             : 
    5449          10 :         if (dbentry->tables != NULL)
    5450          10 :             hash_destroy(dbentry->tables);
    5451          10 :         if (dbentry->functions != NULL)
    5452          10 :             hash_destroy(dbentry->functions);
    5453          10 :         if (dbentry->subworkers != NULL)
    5454          10 :             hash_destroy(dbentry->subworkers);
    5455             : 
    5456          10 :         if (hash_search(pgStatDBHash,
    5457             :                         (void *) &dbid,
    5458             :                         HASH_REMOVE, NULL) == NULL)
    5459           0 :             ereport(ERROR,
    5460             :                     (errmsg("database hash table corrupted during cleanup --- abort")));
    5461             :     }
    5462          18 : }
    5463             : 
    5464             : 
    5465             : /* ----------
    5466             :  * pgstat_recv_resetcounter() -
    5467             :  *
    5468             :  *  Reset the statistics for the specified database.
    5469             :  * ----------
    5470             :  */
    5471             : static void
    5472           0 : pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
    5473             : {
    5474             :     PgStat_StatDBEntry *dbentry;
    5475             : 
    5476             :     /*
    5477             :      * Lookup the database in the hashtable.  Nothing to do if not there.
    5478             :      */
    5479           0 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
    5480             : 
    5481           0 :     if (!dbentry)
    5482           0 :         return;
    5483             : 
    5484             :     /*
    5485             :      * We simply throw away all the database's table entries by recreating a
    5486             :      * new hash table for them.
    5487             :      */
    5488           0 :     if (dbentry->tables != NULL)
    5489           0 :         hash_destroy(dbentry->tables);
    5490           0 :     if (dbentry->functions != NULL)
    5491           0 :         hash_destroy(dbentry->functions);
    5492           0 :     if (dbentry->subworkers != NULL)
    5493           0 :         hash_destroy(dbentry->subworkers);
    5494             : 
    5495           0 :     dbentry->tables = NULL;
    5496           0 :     dbentry->functions = NULL;
    5497           0 :     dbentry->subworkers = NULL;
    5498             : 
    5499             :     /*
    5500             :      * Reset database-level stats, too.  This creates empty hash tables for
    5501             :      * tables, functions, and subscription workers.
    5502             :      */
    5503           0 :     reset_dbentry_counters(dbentry);
    5504             : }
    5505             : 
    5506             : /* ----------
    5507             :  * pgstat_recv_resetsharedcounter() -
    5508             :  *
    5509             :  *  Reset some shared statistics of the cluster.
    5510             :  * ----------
    5511             :  */
    5512             : static void
    5513           2 : pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
    5514             : {
    5515           2 :     if (msg->m_resettarget == RESET_BGWRITER)
    5516             :     {
    5517             :         /* Reset the global, bgwriter and checkpointer statistics for the cluster. */
    5518           0 :         memset(&globalStats, 0, sizeof(globalStats));
    5519           0 :         globalStats.bgwriter.stat_reset_timestamp = GetCurrentTimestamp();
    5520             :     }
    5521           2 :     else if (msg->m_resettarget == RESET_ARCHIVER)
    5522             :     {
    5523             :         /* Reset the archiver statistics for the cluster. */
    5524           2 :         memset(&archiverStats, 0, sizeof(archiverStats));
    5525           2 :         archiverStats.stat_reset_timestamp = GetCurrentTimestamp();
    5526             :     }
    5527           0 :     else if (msg->m_resettarget == RESET_WAL)
    5528             :     {
    5529             :         /* Reset the WAL statistics for the cluster. */
    5530           0 :         memset(&walStats, 0, sizeof(walStats));
    5531           0 :         walStats.stat_reset_timestamp = GetCurrentTimestamp();
    5532             :     }
    5533             : 
    5534             :     /*
    5535             :      * Presumably the sender of this message validated the target, don't
    5536             :      * complain here if it's not valid
    5537             :      */
    5538           2 : }
    5539             : 
    5540             : /* ----------
    5541             :  * pgstat_recv_resetsinglecounter() -
    5542             :  *
    5543             :  *  Reset a statistics for a single object, which may be of current
    5544             :  *  database or shared across all databases in the cluster.
    5545             :  * ----------
    5546             :  */
    5547             : static void
    5548           0 : pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len)
    5549             : {
    5550             :     PgStat_StatDBEntry *dbentry;
    5551             : 
    5552           0 :     if (IsSharedRelation(msg->m_objectid))
    5553           0 :         dbentry = pgstat_get_db_entry(InvalidOid, false);
    5554             :     else
    5555           0 :         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
    5556             : 
    5557           0 :     if (!dbentry)
    5558           0 :         return;
    5559             : 
    5560             :     /* Set the reset timestamp for the whole database */
    5561           0 :     dbentry->stat_reset_timestamp = GetCurrentTimestamp();
    5562             : 
    5563             :     /* Remove object if it exists, ignore it if not */
    5564           0 :     if (msg->m_resettype == RESET_TABLE)
    5565           0 :         (void) hash_search(dbentry->tables, (void *) &(msg->m_objectid),
    5566             :                            HASH_REMOVE, NULL);
    5567           0 :     else if (msg->m_resettype == RESET_FUNCTION)
    5568           0 :         (void) hash_search(dbentry->functions, (void *) &(msg->m_objectid),
    5569             :                            HASH_REMOVE, NULL);
    5570           0 :     else if (msg->m_resettype == RESET_SUBWORKER)
    5571             :     {
    5572             :         PgStat_StatSubWorkerKey key;
    5573             : 
    5574           0 :         key.subid = msg->m_objectid;
    5575           0 :         key.subrelid = msg->m_subobjectid;
    5576           0 :         (void) hash_search(dbentry->subworkers, (void *) &key, HASH_REMOVE, NULL);
    5577             :     }
    5578             : }
    5579             : 
    5580             : /* ----------
    5581             :  * pgstat_recv_resetslrucounter() -
    5582             :  *
    5583             :  *  Reset some SLRU statistics of the cluster.
    5584             :  * ----------
    5585             :  */
    5586             : static void
    5587           0 : pgstat_recv_resetslrucounter(PgStat_MsgResetslrucounter *msg, int len)
    5588             : {
    5589             :     int         i;
    5590           0 :     TimestampTz ts = GetCurrentTimestamp();
    5591             : 
    5592           0 :     for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
    5593             :     {
    5594             :         /* reset entry with the given index, or all entries (index is -1) */
    5595           0 :         if ((msg->m_index == -1) || (msg->m_index == i))
    5596             :         {
    5597           0 :             memset(&slruStats[i], 0, sizeof(slruStats[i]));
    5598           0 :             slruStats[i].stat_reset_timestamp = ts;
    5599             :         }
    5600             :     }
    5601           0 : }
    5602             : 
    5603             : /* ----------
    5604             :  * pgstat_recv_resetreplslotcounter() -
    5605             :  *
    5606             :  *  Reset some replication slot statistics of the cluster.
    5607             :  * ----------
    5608             :  */
    5609             : static void
    5610           2 : pgstat_recv_resetreplslotcounter(PgStat_MsgResetreplslotcounter *msg,
    5611             :                                  int len)
    5612             : {
    5613             :     PgStat_StatReplSlotEntry *slotent;
    5614             :     TimestampTz ts;
    5615             : 
    5616             :     /* Return if we don't have replication slot statistics */
    5617           2 :     if (replSlotStatHash == NULL)
    5618           0 :         return;
    5619             : 
    5620           2 :     ts = GetCurrentTimestamp();
    5621           2 :     if (msg->clearall)
    5622             :     {
    5623             :         HASH_SEQ_STATUS sstat;
    5624             : 
    5625           0 :         hash_seq_init(&sstat, replSlotStatHash);
    5626           0 :         while ((slotent = (PgStat_StatReplSlotEntry *) hash_seq_search(&sstat)) != NULL)
    5627           0 :             pgstat_reset_replslot(slotent, ts);
    5628             :     }
    5629             :     else
    5630             :     {
    5631             :         /* Get the slot statistics to reset */
    5632           2 :         slotent = pgstat_get_replslot_entry(msg->m_slotname, false);
    5633             : 
    5634             :         /*
    5635             :          * Nothing to do if the given slot entry is not found.  This could
    5636             :          * happen when the slot with the given name is removed and the
    5637             :          * corresponding statistics entry is also removed before receiving the
    5638             :          * reset message.
    5639             :          */
    5640           2 :         if (!slotent)
    5641           0 :             return;
    5642             : 
    5643             :         /* Reset the stats for the requested replication slot */
    5644           2 :         pgstat_reset_replslot(slotent, ts);
    5645             :     }
    5646             : }
    5647             : 
    5648             : 
    5649             : /* ----------
    5650             :  * pgstat_recv_autovac() -
    5651             :  *
    5652             :  *  Process an autovacuum signaling message.
    5653             :  * ----------
    5654             :  */
    5655             : static void
    5656          58 : pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len)
    5657             : {
    5658             :     PgStat_StatDBEntry *dbentry;
    5659             : 
    5660             :     /*
    5661             :      * Store the last autovacuum time in the database's hashtable entry.
    5662             :      */
    5663          58 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5664             : 
    5665          58 :     dbentry->last_autovac_time = msg->m_start_time;
    5666          58 : }
    5667             : 
    5668             : /* ----------
    5669             :  * pgstat_recv_vacuum() -
    5670             :  *
    5671             :  *  Process a VACUUM message.
    5672             :  * ----------
    5673             :  */
    5674             : static void
    5675        5804 : pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
    5676             : {
    5677             :     PgStat_StatDBEntry *dbentry;
    5678             :     PgStat_StatTabEntry *tabentry;
    5679             : 
    5680             :     /*
    5681             :      * Store the data in the table's hashtable entry.
    5682             :      */
    5683        5804 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5684             : 
    5685        5804 :     tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
    5686             : 
    5687        5804 :     tabentry->n_live_tuples = msg->m_live_tuples;
    5688        5804 :     tabentry->n_dead_tuples = msg->m_dead_tuples;
    5689             : 
    5690             :     /*
    5691             :      * It is quite possible that a non-aggressive VACUUM ended up skipping
    5692             :      * various pages, however, we'll zero the insert counter here regardless.
    5693             :      * It's currently used only to track when we need to perform an "insert"
    5694             :      * autovacuum, which are mainly intended to freeze newly inserted tuples.
    5695             :      * Zeroing this may just mean we'll not try to vacuum the table again
    5696             :      * until enough tuples have been inserted to trigger another insert
    5697             :      * autovacuum.  An anti-wraparound autovacuum will catch any persistent
    5698             :      * stragglers.
    5699             :      */
    5700        5804 :     tabentry->inserts_since_vacuum = 0;
    5701             : 
    5702        5804 :     if (msg->m_autovacuum)
    5703             :     {
    5704         114 :         tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
    5705         114 :         tabentry->autovac_vacuum_count++;
    5706             :     }
    5707             :     else
    5708             :     {
    5709        5690 :         tabentry->vacuum_timestamp = msg->m_vacuumtime;
    5710        5690 :         tabentry->vacuum_count++;
    5711             :     }
    5712        5804 : }
    5713             : 
    5714             : /* ----------
    5715             :  * pgstat_recv_analyze() -
    5716             :  *
    5717             :  *  Process an ANALYZE message.
    5718             :  * ----------
    5719             :  */
    5720             : static void
    5721        5400 : pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
    5722             : {
    5723             :     PgStat_StatDBEntry *dbentry;
    5724             :     PgStat_StatTabEntry *tabentry;
    5725             : 
    5726             :     /*
    5727             :      * Store the data in the table's hashtable entry.
    5728             :      */
    5729        5400 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5730             : 
    5731        5400 :     tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
    5732             : 
    5733        5400 :     tabentry->n_live_tuples = msg->m_live_tuples;
    5734        5400 :     tabentry->n_dead_tuples = msg->m_dead_tuples;
    5735             : 
    5736             :     /*
    5737             :      * If commanded, reset changes_since_analyze to zero.  This forgets any
    5738             :      * changes that were committed while the ANALYZE was in progress, but we
    5739             :      * have no good way to estimate how many of those there were.
    5740             :      */
    5741        5400 :     if (msg->m_resetcounter)
    5742        5368 :         tabentry->changes_since_analyze = 0;
    5743             : 
    5744        5400 :     if (msg->m_autovacuum)
    5745             :     {
    5746         250 :         tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
    5747         250 :         tabentry->autovac_analyze_count++;
    5748             :     }
    5749             :     else
    5750             :     {
    5751        5150 :         tabentry->analyze_timestamp = msg->m_analyzetime;
    5752        5150 :         tabentry->analyze_count++;
    5753             :     }
    5754        5400 : }
    5755             : 
    5756             : 
    5757             : /* ----------
    5758             :  * pgstat_recv_archiver() -
    5759             :  *
    5760             :  *  Process a ARCHIVER message.
    5761             :  * ----------
    5762             :  */
    5763             : static void
    5764          84 : pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len)
    5765             : {
    5766          84 :     if (msg->m_failed)
    5767             :     {
    5768             :         /* Failed archival attempt */
    5769           8 :         ++archiverStats.failed_count;
    5770           8 :         memcpy(archiverStats.last_failed_wal, msg->m_xlog,
    5771             :                sizeof(archiverStats.last_failed_wal));
    5772           8 :         archiverStats.last_failed_timestamp = msg->m_timestamp;
    5773             :     }
    5774             :     else
    5775             :     {
    5776             :         /* Successful archival operation */
    5777          76 :         ++archiverStats.archived_count;
    5778          76 :         memcpy(archiverStats.last_archived_wal, msg->m_xlog,
    5779             :                sizeof(archiverStats.last_archived_wal));
    5780          76 :         archiverStats.last_archived_timestamp = msg->m_timestamp;
    5781             :     }
    5782          84 : }
    5783             : 
    5784             : /* ----------
    5785             :  * pgstat_recv_bgwriter() -
    5786             :  *
    5787             :  *  Process a BGWRITER message.
    5788             :  * ----------
    5789             :  */
    5790             : static void
    5791        6826 : pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
    5792             : {
    5793        6826 :     globalStats.bgwriter.buf_written_clean += msg->m_buf_written_clean;
    5794        6826 :     globalStats.bgwriter.maxwritten_clean += msg->m_maxwritten_clean;
    5795        6826 :     globalStats.bgwriter.buf_alloc += msg->m_buf_alloc;
    5796        6826 : }
    5797             : 
    5798             : /* ----------
    5799             :  * pgstat_recv_checkpointer() -
    5800             :  *
    5801             :  *  Process a CHECKPOINTER message.
    5802             :  * ----------
    5803             :  */
    5804             : static void
    5805        6188 : pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len)
    5806             : {
    5807        6188 :     globalStats.checkpointer.timed_checkpoints += msg->m_timed_checkpoints;
    5808        6188 :     globalStats.checkpointer.requested_checkpoints += msg->m_requested_checkpoints;
    5809        6188 :     globalStats.checkpointer.checkpoint_write_time += msg->m_checkpoint_write_time;
    5810        6188 :     globalStats.checkpointer.checkpoint_sync_time += msg->m_checkpoint_sync_time;
    5811        6188 :     globalStats.checkpointer.buf_written_checkpoints += msg->m_buf_written_checkpoints;
    5812        6188 :     globalStats.checkpointer.buf_written_backend += msg->m_buf_written_backend;
    5813        6188 :     globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend;
    5814        6188 : }
    5815             : 
    5816             : /* ----------
    5817             :  * pgstat_recv_wal() -
    5818             :  *
    5819             :  *  Process a WAL message.
    5820             :  * ----------
    5821             :  */
    5822             : static void
    5823       11434 : pgstat_recv_wal(PgStat_MsgWal *msg, int len)
    5824             : {
    5825       11434 :     walStats.wal_records += msg->m_wal_records;
    5826       11434 :     walStats.wal_fpi += msg->m_wal_fpi;
    5827       11434 :     walStats.wal_bytes += msg->m_wal_bytes;
    5828       11434 :     walStats.wal_buffers_full += msg->m_wal_buffers_full;
    5829       11434 :     walStats.wal_write += msg->m_wal_write;
    5830       11434 :     walStats.wal_sync += msg->m_wal_sync;
    5831       11434 :     walStats.wal_write_time += msg->m_wal_write_time;
    5832       11434 :     walStats.wal_sync_time += msg->m_wal_sync_time;
    5833       11434 : }
    5834             : 
    5835             : /* ----------
    5836             :  * pgstat_recv_slru() -
    5837             :  *
    5838             :  *  Process a SLRU message.
    5839             :  * ----------
    5840             :  */
    5841             : static void
    5842       10818 : pgstat_recv_slru(PgStat_MsgSLRU *msg, int len)
    5843             : {
    5844       10818 :     slruStats[msg->m_index].blocks_zeroed += msg->m_blocks_zeroed;
    5845       10818 :     slruStats[msg->m_index].blocks_hit += msg->m_blocks_hit;
    5846       10818 :     slruStats[msg->m_index].blocks_read += msg->m_blocks_read;
    5847       10818 :     slruStats[msg->m_index].blocks_written += msg->m_blocks_written;
    5848       10818 :     slruStats[msg->m_index].blocks_exists += msg->m_blocks_exists;
    5849       10818 :     slruStats[msg->m_index].flush += msg->m_flush;
    5850       10818 :     slruStats[msg->m_index].truncate += msg->m_truncate;
    5851       10818 : }
    5852             : 
    5853             : /* ----------
    5854             :  * pgstat_recv_recoveryconflict() -
    5855             :  *
    5856             :  *  Process a RECOVERYCONFLICT message.
    5857             :  * ----------
    5858             :  */
    5859             : static void
    5860           0 : pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len)
    5861             : {
    5862             :     PgStat_StatDBEntry *dbentry;
    5863             : 
    5864           0 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5865             : 
    5866           0 :     switch (msg->m_reason)
    5867             :     {
    5868           0 :         case PROCSIG_RECOVERY_CONFLICT_DATABASE:
    5869             : 
    5870             :             /*
    5871             :              * Since we drop the information about the database as soon as it
    5872             :              * replicates, there is no point in counting these conflicts.
    5873             :              */
    5874           0 :             break;
    5875           0 :         case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
    5876           0 :             dbentry->n_conflict_tablespace++;
    5877           0 :             break;
    5878           0 :         case PROCSIG_RECOVERY_CONFLICT_LOCK:
    5879           0 :             dbentry->n_conflict_lock++;
    5880           0 :             break;
    5881           0 :         case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
    5882           0 :             dbentry->n_conflict_snapshot++;
    5883           0 :             break;
    5884           0 :         case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
    5885           0 :             dbentry->n_conflict_bufferpin++;
    5886           0 :             break;
    5887           0 :         case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
    5888           0 :             dbentry->n_conflict_startup_deadlock++;
    5889           0 :             break;
    5890             :     }
    5891           0 : }
    5892             : 
    5893             : /* ----------
    5894             :  * pgstat_recv_deadlock() -
    5895             :  *
    5896             :  *  Process a DEADLOCK message.
    5897             :  * ----------
    5898             :  */
    5899             : static void
    5900           4 : pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len)
    5901             : {
    5902             :     PgStat_StatDBEntry *dbentry;
    5903             : 
    5904           4 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5905             : 
    5906           4 :     dbentry->n_deadlocks++;
    5907           4 : }
    5908             : 
    5909             : /* ----------
    5910             :  * pgstat_recv_checksum_failure() -
    5911             :  *
    5912             :  *  Process a CHECKSUMFAILURE message.
    5913             :  * ----------
    5914             :  */
    5915             : static void
    5916           4 : pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len)
    5917             : {
    5918             :     PgStat_StatDBEntry *dbentry;
    5919             : 
    5920           4 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5921             : 
    5922           4 :     dbentry->n_checksum_failures += msg->m_failurecount;
    5923           4 :     dbentry->last_checksum_failure = msg->m_failure_time;
    5924           4 : }
    5925             : 
    5926             : /* ----------
    5927             :  * pgstat_recv_replslot() -
    5928             :  *
    5929             :  *  Process a REPLSLOT message.
    5930             :  * ----------
    5931             :  */
    5932             : static void
    5933        8868 : pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
    5934             : {
    5935        8868 :     if (msg->m_drop)
    5936             :     {
    5937             :         Assert(!msg->m_create);
    5938             : 
    5939             :         /* Remove the replication slot statistics with the given name */
    5940         372 :         if (replSlotStatHash != NULL)
    5941         370 :             (void) hash_search(replSlotStatHash,
    5942         370 :                                (void *) &(msg->m_slotname),
    5943             :                                HASH_REMOVE,
    5944             :                                NULL);
    5945             :     }
    5946             :     else
    5947             :     {
    5948             :         PgStat_StatReplSlotEntry *slotent;
    5949             : 
    5950        8496 :         slotent = pgstat_get_replslot_entry(msg->m_slotname, true);
    5951             :         Assert(slotent);
    5952             : 
    5953        8496 :         if (msg->m_create)
    5954             :         {
    5955             :             /*
    5956             :              * If the message for dropping the slot with the same name gets
    5957             :              * lost, slotent has stats for the old slot. So we initialize all
    5958             :              * counters at slot creation.
    5959             :              */
    5960         434 :             pgstat_reset_replslot(slotent, 0);
    5961             :         }
    5962             :         else
    5963             :         {
    5964             :             /* Update the replication slot statistics */
    5965        8062 :             slotent->spill_txns += msg->m_spill_txns;
    5966        8062 :             slotent->spill_count += msg->m_spill_count;
    5967        8062 :             slotent->spill_bytes += msg->m_spill_bytes;
    5968        8062 :             slotent->stream_txns += msg->m_stream_txns;
    5969        8062 :             slotent->stream_count += msg->m_stream_count;
    5970        8062 :             slotent->stream_bytes += msg->m_stream_bytes;
    5971        8062 :             slotent->total_txns += msg->m_total_txns;
    5972        8062 :             slotent->total_bytes += msg->m_total_bytes;
    5973             :         }
    5974             :     }
    5975        8868 : }
    5976             : 
    5977             : /* ----------
    5978             :  * pgstat_recv_connect() -
    5979             :  *
    5980             :  *  Process a CONNECT message.
    5981             :  * ----------
    5982             :  */
    5983             : static void
    5984       10900 : pgstat_recv_connect(PgStat_MsgConnect *msg, int len)
    5985             : {
    5986             :     PgStat_StatDBEntry *dbentry;
    5987             : 
    5988       10900 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    5989       10900 :     dbentry->n_sessions++;
    5990       10900 : }
    5991             : 
    5992             : /* ----------
    5993             :  * pgstat_recv_disconnect() -
    5994             :  *
    5995             :  *  Process a DISCONNECT message.
    5996             :  * ----------
    5997             :  */
    5998             : static void
    5999       10878 : pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len)
    6000             : {
    6001             :     PgStat_StatDBEntry *dbentry;
    6002             : 
    6003       10878 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    6004             : 
    6005       10878 :     switch (msg->m_cause)
    6006             :     {
    6007       10840 :         case DISCONNECT_NOT_YET:
    6008             :         case DISCONNECT_NORMAL:
    6009             :             /* we don't collect these */
    6010       10840 :             break;
    6011          34 :         case DISCONNECT_CLIENT_EOF:
    6012          34 :             dbentry->n_sessions_abandoned++;
    6013          34 :             break;
    6014           0 :         case DISCONNECT_FATAL:
    6015           0 :             dbentry->n_sessions_fatal++;
    6016           0 :             break;
    6017           4 :         case DISCONNECT_KILLED:
    6018           4 :             dbentry->n_sessions_killed++;
    6019           4 :             break;
    6020             :     }
    6021       10878 : }
    6022             : 
    6023             : /* ----------
    6024             :  * pgstat_recv_tempfile() -
    6025             :  *
    6026             :  *  Process a TEMPFILE message.
    6027             :  * ----------
    6028             :  */
    6029             : static void
    6030        3420 : pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len)
    6031             : {
    6032             :     PgStat_StatDBEntry *dbentry;
    6033             : 
    6034        3420 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    6035             : 
    6036        3420 :     dbentry->n_temp_bytes += msg->m_filesize;
    6037        3420 :     dbentry->n_temp_files += 1;
    6038        3420 : }
    6039             : 
    6040             : /* ----------
    6041             :  * pgstat_recv_funcstat() -
    6042             :  *
    6043             :  *  Count what the backend has done.
    6044             :  * ----------
    6045             :  */
    6046             : static void
    6047           0 : pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len)
    6048             : {
    6049           0 :     PgStat_FunctionEntry *funcmsg = &(msg->m_entry[0]);
    6050             :     PgStat_StatDBEntry *dbentry;
    6051             :     PgStat_StatFuncEntry *funcentry;
    6052             :     int         i;
    6053             :     bool        found;
    6054             : 
    6055           0 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    6056             : 
    6057             :     /*
    6058             :      * Process all function entries in the message.
    6059             :      */
    6060           0 :     for (i = 0; i < msg->m_nentries; i++, funcmsg++)
    6061             :     {
    6062           0 :         funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
    6063           0 :                                                          (void *) &(funcmsg->f_id),
    6064             :                                                          HASH_ENTER, &found);
    6065             : 
    6066           0 :         if (!found)
    6067             :         {
    6068             :             /*
    6069             :              * If it's a new function entry, initialize counters to the values
    6070             :              * we just got.
    6071             :              */
    6072           0 :             funcentry->f_numcalls = funcmsg->f_numcalls;
    6073           0 :             funcentry->f_total_time = funcmsg->f_total_time;
    6074           0 :             funcentry->f_self_time = funcmsg->f_self_time;
    6075             :         }
    6076             :         else
    6077             :         {
    6078             :             /*
    6079             :              * Otherwise add the values to the existing entry.
    6080             :              */
    6081           0 :             funcentry->f_numcalls += funcmsg->f_numcalls;
    6082           0 :             funcentry->f_total_time += funcmsg->f_total_time;
    6083           0 :             funcentry->f_self_time += funcmsg->f_self_time;
    6084             :         }
    6085             :     }
    6086           0 : }
    6087             : 
    6088             : /* ----------
    6089             :  * pgstat_recv_funcpurge() -
    6090             :  *
    6091             :  *  Arrange for dead function removal.
    6092             :  * ----------
    6093             :  */
    6094             : static void
    6095           0 : pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
    6096             : {
    6097             :     PgStat_StatDBEntry *dbentry;
    6098             :     int         i;
    6099             : 
    6100           0 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
    6101             : 
    6102             :     /*
    6103             :      * No need to purge if we don't even know the database.
    6104             :      */
    6105           0 :     if (!dbentry || !dbentry->functions)
    6106           0 :         return;
    6107             : 
    6108             :     /*
    6109             :      * Process all function entries in the message.
    6110             :      */
    6111           0 :     for (i = 0; i < msg->m_nentries; i++)
    6112             :     {
    6113             :         /* Remove from hashtable if present; we don't care if it's not. */
    6114           0 :         (void) hash_search(dbentry->functions,
    6115           0 :                            (void *) &(msg->m_functionid[i]),
    6116             :                            HASH_REMOVE, NULL);
    6117             :     }
    6118             : }
    6119             : 
    6120             : /* ----------
    6121             :  * pgstat_recv_subscription_purge() -
    6122             :  *
    6123             :  *  Process a SUBSCRIPTIONPURGE message.
    6124             :  * ----------
    6125             :  */
    6126             : static void
    6127          34 : pgstat_recv_subscription_purge(PgStat_MsgSubscriptionPurge *msg, int len)
    6128             : {
    6129             :     HASH_SEQ_STATUS hstat;
    6130             :     PgStat_StatDBEntry *dbentry;
    6131             :     PgStat_StatSubWorkerEntry *subwentry;
    6132             : 
    6133          34 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
    6134             : 
    6135             :     /* No need to purge if we don't even know the database */
    6136          34 :     if (!dbentry || !dbentry->subworkers)
    6137           0 :         return;
    6138             : 
    6139             :     /* Remove all subscription worker statistics for the given subscriptions */
    6140          34 :     hash_seq_init(&hstat, dbentry->subworkers);
    6141          42 :     while ((subwentry = (PgStat_StatSubWorkerEntry *) hash_seq_search(&hstat)) != NULL)
    6142             :     {
    6143           8 :         for (int i = 0; i < msg->m_nentries; i++)
    6144             :         {
    6145           8 :             if (subwentry->key.subid == msg->m_subids[i])
    6146             :             {
    6147           8 :                 (void) hash_search(dbentry->subworkers, (void *) &(subwentry->key),
    6148             :                                    HASH_REMOVE, NULL);
    6149           8 :                 break;
    6150             :             }
    6151             :         }
    6152             :     }
    6153             : }
    6154             : 
    6155             : /* ----------
    6156             :  * pgstat_recv_subworker_error() -
    6157             :  *
    6158             :  *  Process a SUBWORKERERROR message.
    6159             :  * ----------
    6160             :  */
    6161             : static void
    6162          14 : pgstat_recv_subworker_error(PgStat_MsgSubWorkerError *msg, int len)
    6163             : {
    6164             :     PgStat_StatDBEntry *dbentry;
    6165             :     PgStat_StatSubWorkerEntry *subwentry;
    6166             : 
    6167          14 :     dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
    6168             : 
    6169             :     /* Get the subscription worker stats */
    6170          14 :     subwentry = pgstat_get_subworker_entry(dbentry, msg->m_subid,
    6171             :                                            msg->m_subrelid, true);
    6172             :     Assert(subwentry);
    6173             : 
    6174          14 :     if (subwentry->last_error_relid == msg->m_relid &&
    6175           6 :         subwentry->last_error_command == msg->m_command &&
    6176           2 :         subwentry->last_error_xid == msg->m_xid &&
    6177           0 :         strcmp(subwentry->last_error_message, msg->m_message) == 0)
    6178             :     {
    6179             :         /*
    6180             :          * The same error occurred again in succession, just update its
    6181             :          * timestamp and count.
    6182             :          */
    6183           0 :         subwentry->last_error_count++;
    6184           0 :         subwentry->last_error_time = msg->m_timestamp;
    6185           0 :         return;
    6186             :     }
    6187             : 
    6188             :     /* Otherwise, update the error information */
    6189          14 :     subwentry->last_error_relid = msg->m_relid;
    6190          14 :     subwentry->last_error_command = msg->m_command;
    6191          14 :     subwentry->last_error_xid = msg->m_xid;
    6192          14 :     subwentry->last_error_count = 1;
    6193          14 :     subwentry->last_error_time = msg->m_timestamp;
    6194          14 :     strlcpy(subwentry->last_error_message, msg->m_message,
    6195             :             PGSTAT_SUBWORKERERROR_MSGLEN);
    6196             : }
    6197             : 
    6198             : /* ----------
    6199             :  * pgstat_write_statsfile_needed() -
    6200             :  *
    6201             :  *  Do we need to write out any stats files?
    6202             :  * ----------
    6203             :  */
    6204             : static bool
    6205      320810 : pgstat_write_statsfile_needed(void)
    6206             : {
    6207      320810 :     if (pending_write_requests != NIL)
    6208        1464 :         return true;
    6209             : 
    6210             :     /* Everything was written recently */
    6211      319346 :     return false;
    6212             : }
    6213             : 
    6214             : /* ----------
    6215             :  * pgstat_db_requested() -
    6216             :  *
    6217             :  *  Checks whether stats for a particular DB need to be written to a file.
    6218             :  * ----------
    6219             :  */
    6220             : static bool
    6221        2326 : pgstat_db_requested(Oid databaseid)
    6222             : {
    6223             :     /*
    6224             :      * If any requests are outstanding at all, we should write the stats for
    6225             :      * shared catalogs (the "database" with OID 0).  This ensures that
    6226             :      * backends will see up-to-date stats for shared catalogs, even though
    6227             :      * they send inquiry messages mentioning only their own DB.
    6228             :      */
    6229        2326 :     if (databaseid == InvalidOid && pending_write_requests != NIL)
    6230         846 :         return true;
    6231             : 
    6232             :     /* Search to see if there's an open request to write this database. */
    6233        1480 :     if (list_member_oid(pending_write_requests, databaseid))
    6234         600 :         return true;
    6235             : 
    6236         880 :     return false;
    6237             : }
    6238             : 
    6239             : /* ----------
    6240             :  * pgstat_replslot_entry
    6241             :  *
    6242             :  * Return the entry of replication slot stats with the given name. Return
    6243             :  * NULL if not found and the caller didn't request to create it.
    6244             :  *
    6245             :  * create tells whether to create the new slot entry if it is not found.
    6246             :  * ----------
    6247             :  */
    6248             : static PgStat_StatReplSlotEntry *
    6249        8576 : pgstat_get_replslot_entry(NameData name, bool create)
    6250             : {
    6251             :     PgStat_StatReplSlotEntry *slotent;
    6252             :     bool        found;
    6253             : 
    6254        8576 :     if (replSlotStatHash == NULL)
    6255             :     {
    6256             :         HASHCTL     hash_ctl;
    6257             : 
    6258             :         /*
    6259             :          * Quick return NULL if the hash table is empty and the caller didn't
    6260             :          * request to create the entry.
    6261             :          */
    6262         108 :         if (!create)
    6263          24 :             return NULL;
    6264             : 
    6265          84 :         hash_ctl.keysize = sizeof(NameData);
    6266          84 :         hash_ctl.entrysize = sizeof(PgStat_StatReplSlotEntry);
    6267          84 :         replSlotStatHash = hash_create("Replication slots hash",
    6268             :                                        PGSTAT_REPLSLOT_HASH_SIZE,
    6269             :                                        &hash_ctl,
    6270             :                                        HASH_ELEM | HASH_BLOBS);
    6271             :     }
    6272             : 
    6273        8552 :     slotent = (PgStat_StatReplSlotEntry *) hash_search(replSlotStatHash,
    6274             :                                                        (void *) &name,
    6275             :                                                        create ? HASH_ENTER : HASH_FIND,
    6276             :                                                        &found);
    6277             : 
    6278        8552 :     if (!slotent)
    6279             :     {
    6280             :         /* not found */
    6281             :         Assert(!create && !found);
    6282           0 :         return NULL;
    6283             :     }
    6284             : 
    6285             :     /* initialize the entry */
    6286        8552 :     if (create && !found)
    6287             :     {
    6288         444 :         namestrcpy(&(slotent->slotname), NameStr(name));
    6289         444 :         pgstat_reset_replslot(slotent, 0);
    6290             :     }
    6291             : 
    6292        8552 :     return slotent;
    6293             : }
    6294             : 
    6295             : /* ----------
    6296             :  * pgstat_reset_replslot
    6297             :  *
    6298             :  * Reset the given replication slot stats.
    6299             :  * ----------
    6300             :  */
    6301             : static void
    6302         880 : pgstat_reset_replslot(PgStat_StatReplSlotEntry *slotent, TimestampTz ts)
    6303             : {
    6304             :     /* reset only counters. Don't clear slot name */
    6305         880 :     slotent->spill_txns = 0;
    6306         880 :     slotent->spill_count = 0;
    6307         880 :     slotent->spill_bytes = 0;
    6308         880 :     slotent->stream_txns = 0;
    6309         880 :     slotent->stream_count = 0;
    6310         880 :     slotent->stream_bytes = 0;
    6311         880 :     slotent->total_txns = 0;
    6312         880 :     slotent->total_bytes = 0;
    6313         880 :     slotent->stat_reset_timestamp = ts;
    6314         880 : }
    6315             : 
    6316             : /*
    6317             :  * pgstat_slru_index
    6318             :  *
    6319             :  * Determine index of entry for a SLRU with a given name. If there's no exact
    6320             :  * match, returns index of the last "other" entry used for SLRUs defined in
    6321             :  * external projects.
    6322             :  */
    6323             : int
    6324       20258 : pgstat_slru_index(const char *name)
    6325             : {
    6326             :     int         i;
    6327             : 
    6328       81032 :     for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
    6329             :     {
    6330       81032 :         if (strcmp(slru_names[i], name) == 0)
    6331       20258 :             return i;
    6332             :     }
    6333             : 
    6334             :     /* return index of the last entry (which is the "other" one) */
    6335           0 :     return (SLRU_NUM_ELEMENTS - 1);
    6336             : }
    6337             : 
    6338             : /*
    6339             :  * pgstat_slru_name
    6340             :  *
    6341             :  * Returns SLRU name for an index. The index may be above SLRU_NUM_ELEMENTS,
    6342             :  * in which case this returns NULL. This allows writing code that does not
    6343             :  * know the number of entries in advance.
    6344             :  */
    6345             : const char *
    6346          36 : pgstat_slru_name(int slru_idx)
    6347             : {
    6348          36 :     if (slru_idx < 0 || slru_idx >= SLRU_NUM_ELEMENTS)
    6349           4 :         return NULL;
    6350             : 
    6351          32 :     return slru_names[slru_idx];
    6352             : }
    6353             : 
    6354             : /*
    6355             :  * slru_entry
    6356             :  *
    6357             :  * Returns pointer to entry with counters for given SLRU (based on the name
    6358             :  * stored in SlruCtl as lwlock tranche name).
    6359             :  */
    6360             : static inline PgStat_MsgSLRU *
    6361     3418276 : slru_entry(int slru_idx)
    6362             : {
    6363     3418276 :     pgstat_assert_is_up();
    6364             : 
    6365             :     /*
    6366             :      * The postmaster should never register any SLRU statistics counts; if it
    6367             :      * did, the counts would be duplicated into child processes via fork().
    6368             :      */
    6369             :     Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
    6370             : 
    6371             :     Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS));
    6372             : 
    6373     3418276 :     return &SLRUStats[slru_idx];
    6374             : }
    6375             : 
    6376             : /*
    6377             :  * SLRU statistics count accumulation functions --- called from slru.c
    6378             :  */
    6379             : 
    6380             : void
    6381        4844 : pgstat_count_slru_page_zeroed(int slru_idx)
    6382             : {
    6383        4844 :     slru_entry(slru_idx)->m_blocks_zeroed += 1;
    6384        4844 : }
    6385             : 
    6386             : void
    6387     3377930 : pgstat_count_slru_page_hit(int slru_idx)
    6388             : {
    6389     3377930 :     slru_entry(slru_idx)->m_blocks_hit += 1;
    6390     3377930 : }
    6391             : 
    6392             : void
    6393          52 : pgstat_count_slru_page_exists(int slru_idx)
    6394             : {
    6395          52 :     slru_entry(slru_idx)->m_blocks_exists += 1;
    6396          52 : }
    6397             : 
    6398             : void
    6399        2712 : pgstat_count_slru_page_read(int slru_idx)
    6400             : {
    6401        2712 :     slru_entry(slru_idx)->m_blocks_read += 1;
    6402        2712 : }
    6403             : 
    6404             : void
    6405        7908 : pgstat_count_slru_page_written(int slru_idx)
    6406             : {
    6407        7908 :     slru_entry(slru_idx)->m_blocks_written += 1;
    6408        7908 : }
    6409             : 
    6410             : void
    6411       20734 : pgstat_count_slru_flush(int slru_idx)
    6412             : {
    6413       20734 :     slru_entry(slru_idx)->m_flush += 1;
    6414       20734 : }
    6415             : 
    6416             : void
    6417        4096 : pgstat_count_slru_truncate(int slru_idx)
    6418             : {
    6419        4096 :     slru_entry(slru_idx)->m_truncate += 1;
    6420        4096 : }

Generated by: LCOV version 1.14