Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "access/xlog.h"
108 : #include "lib/dshash.h"
109 : #include "pgstat.h"
110 : #include "storage/fd.h"
111 : #include "storage/ipc.h"
112 : #include "storage/lwlock.h"
113 : #include "utils/guc_hooks.h"
114 : #include "utils/memutils.h"
115 : #include "utils/pgstat_internal.h"
116 : #include "utils/timestamp.h"
117 :
118 :
119 : /* ----------
120 : * Timer definitions.
121 : *
122 : * In milliseconds.
123 : * ----------
124 : */
125 :
126 : /* minimum interval non-forced stats flushes.*/
127 : #define PGSTAT_MIN_INTERVAL 1000
128 : /* how long until to block flushing pending stats updates */
129 : #define PGSTAT_MAX_INTERVAL 60000
130 : /* when to call pgstat_report_stat() again, even when idle */
131 : #define PGSTAT_IDLE_INTERVAL 10000
132 :
133 : /* ----------
134 : * Initial size hints for the hash tables used in statistics.
135 : * ----------
136 : */
137 :
138 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
139 :
140 : /* ---------
141 : * Identifiers in stats file.
142 : * ---------
143 : */
144 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
145 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
146 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
147 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
148 : * PgStat_HashKey */
149 :
150 : /* hash table for statistics snapshots entry */
151 : typedef struct PgStat_SnapshotEntry
152 : {
153 : PgStat_HashKey key;
154 : char status; /* for simplehash use */
155 : void *data; /* the stats data itself */
156 : } PgStat_SnapshotEntry;
157 :
158 :
159 : /* ----------
160 : * Backend-local Hash Table Definitions
161 : * ----------
162 : */
163 :
164 : /* for stats snapshot entries */
165 : #define SH_PREFIX pgstat_snapshot
166 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
167 : #define SH_KEY_TYPE PgStat_HashKey
168 : #define SH_KEY key
169 : #define SH_HASH_KEY(tb, key) \
170 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
171 : #define SH_EQUAL(tb, a, b) \
172 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
173 : #define SH_SCOPE static inline
174 : #define SH_DEFINE
175 : #define SH_DECLARE
176 : #include "lib/simplehash.h"
177 :
178 :
179 : /* ----------
180 : * Local function forward declarations
181 : * ----------
182 : */
183 :
184 : static void pgstat_write_statsfile(XLogRecPtr redo);
185 : static void pgstat_read_statsfile(XLogRecPtr redo);
186 :
187 : static void pgstat_init_snapshot_fixed(void);
188 :
189 : static void pgstat_reset_after_failure(void);
190 :
191 : static bool pgstat_flush_pending_entries(bool nowait);
192 :
193 : static void pgstat_prep_snapshot(void);
194 : static void pgstat_build_snapshot(void);
195 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
196 :
197 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
198 :
199 :
200 : /* ----------
201 : * GUC parameters
202 : * ----------
203 : */
204 :
205 : bool pgstat_track_counts = false;
206 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
207 :
208 :
209 : /* ----------
210 : * state shared with pgstat_*.c
211 : * ----------
212 : */
213 :
214 : PgStat_LocalState pgStatLocal;
215 :
216 :
217 : /* ----------
218 : * Local data
219 : *
220 : * NB: There should be only variables related to stats infrastructure here,
221 : * not for specific kinds of stats.
222 : * ----------
223 : */
224 :
225 : /*
226 : * Memory contexts containing the pgStatEntryRefHash table, the
227 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
228 : * easier to track / attribute memory usage.
229 : */
230 :
231 : static MemoryContext pgStatPendingContext = NULL;
232 :
233 : /*
234 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
235 : *
236 : * Newly pending entries should only ever be added to the end of the list,
237 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
238 : */
239 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
240 :
241 :
242 : /*
243 : * Force the next stats flush to happen regardless of
244 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
245 : */
246 : static bool pgStatForceNextFlush = false;
247 :
248 : /*
249 : * Force-clear existing snapshot before next use when stats_fetch_consistency
250 : * is changed.
251 : */
252 : static bool force_stats_snapshot_clear = false;
253 :
254 :
255 : /*
256 : * For assertions that check pgstat is not used before initialization / after
257 : * shutdown.
258 : */
259 : #ifdef USE_ASSERT_CHECKING
260 : static bool pgstat_is_initialized = false;
261 : static bool pgstat_is_shutdown = false;
262 : #endif
263 :
264 :
265 : /*
266 : * The different kinds of built-in statistics.
267 : *
268 : * If reasonably possible, handling specific to one kind of stats should go
269 : * through this abstraction, rather than making more of pgstat.c aware.
270 : *
271 : * See comments for struct PgStat_KindInfo for details about the individual
272 : * fields.
273 : *
274 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
275 : * seem to be a great way of doing that, given the split across multiple
276 : * files.
277 : */
278 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
279 :
280 : /* stats kinds for variable-numbered objects */
281 :
282 : [PGSTAT_KIND_DATABASE] = {
283 : .name = "database",
284 :
285 : .fixed_amount = false,
286 : .write_to_file = true,
287 : /* so pg_stat_database entries can be seen in all databases */
288 : .accessed_across_databases = true,
289 :
290 : .shared_size = sizeof(PgStatShared_Database),
291 : .shared_data_off = offsetof(PgStatShared_Database, stats),
292 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
293 : .pending_size = sizeof(PgStat_StatDBEntry),
294 :
295 : .flush_pending_cb = pgstat_database_flush_cb,
296 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
297 : },
298 :
299 : [PGSTAT_KIND_RELATION] = {
300 : .name = "relation",
301 :
302 : .fixed_amount = false,
303 : .write_to_file = true,
304 :
305 : .shared_size = sizeof(PgStatShared_Relation),
306 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
307 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
308 : .pending_size = sizeof(PgStat_TableStatus),
309 :
310 : .flush_pending_cb = pgstat_relation_flush_cb,
311 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
312 : },
313 :
314 : [PGSTAT_KIND_FUNCTION] = {
315 : .name = "function",
316 :
317 : .fixed_amount = false,
318 : .write_to_file = true,
319 :
320 : .shared_size = sizeof(PgStatShared_Function),
321 : .shared_data_off = offsetof(PgStatShared_Function, stats),
322 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
323 : .pending_size = sizeof(PgStat_FunctionCounts),
324 :
325 : .flush_pending_cb = pgstat_function_flush_cb,
326 : },
327 :
328 : [PGSTAT_KIND_REPLSLOT] = {
329 : .name = "replslot",
330 :
331 : .fixed_amount = false,
332 : .write_to_file = true,
333 :
334 : .accessed_across_databases = true,
335 :
336 : .shared_size = sizeof(PgStatShared_ReplSlot),
337 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
338 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
339 :
340 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
341 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
342 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
343 : },
344 :
345 : [PGSTAT_KIND_SUBSCRIPTION] = {
346 : .name = "subscription",
347 :
348 : .fixed_amount = false,
349 : .write_to_file = true,
350 : /* so pg_stat_subscription_stats entries can be seen in all databases */
351 : .accessed_across_databases = true,
352 :
353 : .shared_size = sizeof(PgStatShared_Subscription),
354 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
355 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
356 : .pending_size = sizeof(PgStat_BackendSubEntry),
357 :
358 : .flush_pending_cb = pgstat_subscription_flush_cb,
359 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
360 : },
361 :
362 : [PGSTAT_KIND_BACKEND] = {
363 : .name = "backend",
364 :
365 : .fixed_amount = false,
366 : .write_to_file = false,
367 :
368 : .accessed_across_databases = true,
369 :
370 : .shared_size = sizeof(PgStatShared_Backend),
371 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
372 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
373 : .pending_size = sizeof(PgStat_BackendPending),
374 :
375 : .flush_pending_cb = pgstat_backend_flush_cb,
376 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
377 : },
378 :
379 : /* stats for fixed-numbered (mostly 1) objects */
380 :
381 : [PGSTAT_KIND_ARCHIVER] = {
382 : .name = "archiver",
383 :
384 : .fixed_amount = true,
385 : .write_to_file = true,
386 :
387 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
388 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
389 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
390 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
391 :
392 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
393 : .reset_all_cb = pgstat_archiver_reset_all_cb,
394 : .snapshot_cb = pgstat_archiver_snapshot_cb,
395 : },
396 :
397 : [PGSTAT_KIND_BGWRITER] = {
398 : .name = "bgwriter",
399 :
400 : .fixed_amount = true,
401 : .write_to_file = true,
402 :
403 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
404 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
405 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
406 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
407 :
408 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
409 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
410 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
411 : },
412 :
413 : [PGSTAT_KIND_CHECKPOINTER] = {
414 : .name = "checkpointer",
415 :
416 : .fixed_amount = true,
417 : .write_to_file = true,
418 :
419 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
420 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
421 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
422 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
423 :
424 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
425 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
426 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
427 : },
428 :
429 : [PGSTAT_KIND_IO] = {
430 : .name = "io",
431 :
432 : .fixed_amount = true,
433 : .write_to_file = true,
434 :
435 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
436 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
437 : .shared_data_off = offsetof(PgStatShared_IO, stats),
438 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
439 :
440 : .flush_fixed_cb = pgstat_io_flush_cb,
441 : .have_fixed_pending_cb = pgstat_io_have_pending_cb,
442 : .init_shmem_cb = pgstat_io_init_shmem_cb,
443 : .reset_all_cb = pgstat_io_reset_all_cb,
444 : .snapshot_cb = pgstat_io_snapshot_cb,
445 : },
446 :
447 : [PGSTAT_KIND_SLRU] = {
448 : .name = "slru",
449 :
450 : .fixed_amount = true,
451 : .write_to_file = true,
452 :
453 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
454 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
455 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
456 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
457 :
458 : .flush_fixed_cb = pgstat_slru_flush_cb,
459 : .have_fixed_pending_cb = pgstat_slru_have_pending_cb,
460 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
461 : .reset_all_cb = pgstat_slru_reset_all_cb,
462 : .snapshot_cb = pgstat_slru_snapshot_cb,
463 : },
464 :
465 : [PGSTAT_KIND_WAL] = {
466 : .name = "wal",
467 :
468 : .fixed_amount = true,
469 : .write_to_file = true,
470 :
471 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
472 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
473 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
474 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
475 :
476 : .init_backend_cb = pgstat_wal_init_backend_cb,
477 : .flush_fixed_cb = pgstat_wal_flush_cb,
478 : .have_fixed_pending_cb = pgstat_wal_have_pending_cb,
479 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
480 : .reset_all_cb = pgstat_wal_reset_all_cb,
481 : .snapshot_cb = pgstat_wal_snapshot_cb,
482 : },
483 : };
484 :
485 : /*
486 : * Information about custom statistics kinds.
487 : *
488 : * These are saved in a different array than the built-in kinds to save
489 : * in clarity with the initializations.
490 : *
491 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
492 : */
493 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
494 :
495 : /* ------------------------------------------------------------
496 : * Functions managing the state of the stats system for all backends.
497 : * ------------------------------------------------------------
498 : */
499 :
500 : /*
501 : * Read on-disk stats into memory at server start.
502 : *
503 : * Should only be called by the startup process or in single user mode.
504 : */
505 : void
506 1310 : pgstat_restore_stats(XLogRecPtr redo)
507 : {
508 1310 : pgstat_read_statsfile(redo);
509 1310 : }
510 :
511 : /*
512 : * Remove the stats file. This is currently used only if WAL recovery is
513 : * needed after a crash.
514 : *
515 : * Should only be called by the startup process or in single user mode.
516 : */
517 : void
518 340 : pgstat_discard_stats(void)
519 : {
520 : int ret;
521 :
522 : /* NB: this needs to be done even in single user mode */
523 :
524 340 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
525 340 : if (ret != 0)
526 : {
527 338 : if (errno == ENOENT)
528 338 : elog(DEBUG2,
529 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
530 : PGSTAT_STAT_PERMANENT_FILENAME);
531 : else
532 0 : ereport(LOG,
533 : (errcode_for_file_access(),
534 : errmsg("could not unlink permanent statistics file \"%s\": %m",
535 : PGSTAT_STAT_PERMANENT_FILENAME)));
536 : }
537 : else
538 : {
539 2 : ereport(DEBUG2,
540 : (errcode_for_file_access(),
541 : errmsg_internal("unlinked permanent statistics file \"%s\"",
542 : PGSTAT_STAT_PERMANENT_FILENAME)));
543 : }
544 :
545 : /*
546 : * Reset stats contents. This will set reset timestamps of fixed-numbered
547 : * stats to the current time (no variable stats exist).
548 : */
549 340 : pgstat_reset_after_failure();
550 340 : }
551 :
552 : /*
553 : * pgstat_before_server_shutdown() needs to be called by exactly one process
554 : * during regular server shutdowns. Otherwise all stats will be lost.
555 : *
556 : * We currently only write out stats for proc_exit(0). We might want to change
557 : * that at some point... But right now pgstat_discard_stats() would be called
558 : * during the start after a disorderly shutdown, anyway.
559 : */
560 : void
561 1098 : pgstat_before_server_shutdown(int code, Datum arg)
562 : {
563 : Assert(pgStatLocal.shmem != NULL);
564 : Assert(!pgStatLocal.shmem->is_shutdown);
565 :
566 : /*
567 : * Stats should only be reported after pgstat_initialize() and before
568 : * pgstat_shutdown(). This is a convenient point to catch most violations
569 : * of this rule.
570 : */
571 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
572 :
573 : /* flush out our own pending changes before writing out */
574 1098 : pgstat_report_stat(true);
575 :
576 : /*
577 : * Only write out file during normal shutdown. Don't even signal that
578 : * we've shutdown during irregular shutdowns, because the shutdown
579 : * sequence isn't coordinated to ensure this backend shuts down last.
580 : */
581 1098 : if (code == 0)
582 : {
583 1088 : pgStatLocal.shmem->is_shutdown = true;
584 1088 : pgstat_write_statsfile(GetRedoRecPtr());
585 : }
586 1098 : }
587 :
588 :
589 : /* ------------------------------------------------------------
590 : * Backend initialization / shutdown functions
591 : * ------------------------------------------------------------
592 : */
593 :
594 : /*
595 : * Shut down a single backend's statistics reporting at process exit.
596 : *
597 : * Flush out any remaining statistics counts. Without this, operations
598 : * triggered during backend exit (such as temp table deletions) won't be
599 : * counted.
600 : */
601 : static void
602 34718 : pgstat_shutdown_hook(int code, Datum arg)
603 : {
604 : Assert(!pgstat_is_shutdown);
605 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
606 :
607 : /*
608 : * If we got as far as discovering our own database ID, we can flush out
609 : * what we did so far. Otherwise, we'd be reporting an invalid database
610 : * ID, so forget it. (This means that accesses to pg_database during
611 : * failed backend starts might never get counted.)
612 : */
613 34718 : if (OidIsValid(MyDatabaseId))
614 27698 : pgstat_report_disconnect(MyDatabaseId);
615 :
616 34718 : pgstat_report_stat(true);
617 :
618 : /* there shouldn't be any pending changes left */
619 : Assert(dlist_is_empty(&pgStatPending));
620 34718 : dlist_init(&pgStatPending);
621 :
622 : /* drop the backend stats entry */
623 34718 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
624 0 : pgstat_request_entry_refs_gc();
625 :
626 34718 : pgstat_detach_shmem();
627 :
628 : #ifdef USE_ASSERT_CHECKING
629 : pgstat_is_shutdown = true;
630 : #endif
631 34718 : }
632 :
633 : /*
634 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
635 : * BaseInit().
636 : *
637 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
638 : */
639 : void
640 34718 : pgstat_initialize(void)
641 : {
642 : Assert(!pgstat_is_initialized);
643 :
644 34718 : pgstat_attach_shmem();
645 :
646 34718 : pgstat_init_snapshot_fixed();
647 :
648 : /* Backend initialization callbacks */
649 8922526 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
650 : {
651 8887808 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
652 :
653 8887808 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
654 8853090 : continue;
655 :
656 34718 : kind_info->init_backend_cb();
657 : }
658 :
659 : /* Set up a process-exit hook to clean up */
660 34718 : before_shmem_exit(pgstat_shutdown_hook, 0);
661 :
662 : #ifdef USE_ASSERT_CHECKING
663 : pgstat_is_initialized = true;
664 : #endif
665 34718 : }
666 :
667 :
668 : /* ------------------------------------------------------------
669 : * Public functions used by backends follow
670 : * ------------------------------------------------------------
671 : */
672 :
673 : /*
674 : * Must be called by processes that performs DML: tcop/postgres.c, logical
675 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
676 : * shared memory.
677 : *
678 : * Unless called with 'force', pending stats updates are flushed happen once
679 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
680 : * block on lock acquisition, except if stats updates have been pending for
681 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
682 : *
683 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
684 : * suggested idle timeout is returned. Currently this is always
685 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
686 : * a timeout after which to call pgstat_report_stat(true), but are not
687 : * required to do so.
688 : *
689 : * Note that this is called only when not within a transaction, so it is fair
690 : * to use transaction stop time as an approximation of current time.
691 : */
692 : long
693 557050 : pgstat_report_stat(bool force)
694 : {
695 : static TimestampTz pending_since = 0;
696 : static TimestampTz last_flush = 0;
697 : bool partial_flush;
698 : TimestampTz now;
699 : bool nowait;
700 :
701 : pgstat_assert_is_up();
702 : Assert(!IsTransactionOrTransactionBlock());
703 :
704 : /* "absorb" the forced flush even if there's nothing to flush */
705 557050 : if (pgStatForceNextFlush)
706 : {
707 420 : force = true;
708 420 : pgStatForceNextFlush = false;
709 : }
710 :
711 : /* Don't expend a clock check if nothing to do */
712 557050 : if (dlist_is_empty(&pgStatPending))
713 : {
714 13076 : bool do_flush = false;
715 :
716 : /* Check for pending fixed-numbered stats */
717 2636626 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
718 : {
719 2626490 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
720 :
721 2626490 : if (!kind_info)
722 2473148 : continue;
723 153342 : if (!kind_info->fixed_amount)
724 : {
725 : Assert(kind_info->have_fixed_pending_cb == NULL);
726 78474 : continue;
727 : }
728 74868 : if (!kind_info->have_fixed_pending_cb)
729 39246 : continue;
730 :
731 35622 : if (kind_info->have_fixed_pending_cb())
732 : {
733 2940 : do_flush = true;
734 2940 : break;
735 : }
736 : }
737 :
738 13076 : if (!do_flush)
739 : {
740 : Assert(pending_since == 0);
741 10136 : return 0;
742 : }
743 : }
744 :
745 : /*
746 : * There should never be stats to report once stats are shut down. Can't
747 : * assert that before the checks above, as there is an unconditional
748 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
749 : * the process that ran pgstat_before_server_shutdown() will still call.
750 : */
751 : Assert(!pgStatLocal.shmem->is_shutdown);
752 :
753 546914 : if (force)
754 : {
755 : /*
756 : * Stats reports are forced either when it's been too long since stats
757 : * have been reported or in processes that force stats reporting to
758 : * happen at specific points (including shutdown). In the former case
759 : * the transaction stop time might be quite old, in the latter it
760 : * would never get cleared.
761 : */
762 34898 : now = GetCurrentTimestamp();
763 : }
764 : else
765 : {
766 512016 : now = GetCurrentTransactionStopTimestamp();
767 :
768 977128 : if (pending_since > 0 &&
769 465112 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
770 : {
771 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
772 0 : force = true;
773 : }
774 512016 : else if (last_flush > 0 &&
775 489300 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
776 : {
777 : /* don't flush too frequently */
778 487516 : if (pending_since == 0)
779 23926 : pending_since = now;
780 :
781 487516 : return PGSTAT_IDLE_INTERVAL;
782 : }
783 : }
784 :
785 59398 : pgstat_update_dbstats(now);
786 :
787 : /* don't wait for lock acquisition when !force */
788 59398 : nowait = !force;
789 :
790 59398 : partial_flush = false;
791 :
792 : /* flush of variable-numbered stats */
793 59398 : partial_flush |= pgstat_flush_pending_entries(nowait);
794 :
795 : /* flush of fixed-numbered stats */
796 15265286 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
797 : {
798 15205888 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
799 :
800 15205888 : if (!kind_info)
801 14492816 : continue;
802 713072 : if (!kind_info->fixed_amount)
803 : {
804 : Assert(kind_info->flush_fixed_cb == NULL);
805 356536 : continue;
806 : }
807 356536 : if (!kind_info->flush_fixed_cb)
808 178342 : continue;
809 :
810 178194 : partial_flush |= kind_info->flush_fixed_cb(nowait);
811 : }
812 :
813 59398 : last_flush = now;
814 :
815 : /*
816 : * If some of the pending stats could not be flushed due to lock
817 : * contention, let the caller know when to retry.
818 : */
819 59398 : if (partial_flush)
820 : {
821 : /* force should have prevented us from getting here */
822 : Assert(!force);
823 :
824 : /* remember since when stats have been pending */
825 0 : if (pending_since == 0)
826 0 : pending_since = now;
827 :
828 0 : return PGSTAT_IDLE_INTERVAL;
829 : }
830 :
831 59398 : pending_since = 0;
832 :
833 59398 : return 0;
834 : }
835 :
836 : /*
837 : * Force locally pending stats to be flushed during the next
838 : * pgstat_report_stat() call. This is useful for writing tests.
839 : */
840 : void
841 420 : pgstat_force_next_flush(void)
842 : {
843 420 : pgStatForceNextFlush = true;
844 420 : }
845 :
846 : /*
847 : * Only for use by pgstat_reset_counters()
848 : */
849 : static bool
850 22050 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
851 : {
852 22050 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
853 : }
854 :
855 : /*
856 : * Reset counters for our database.
857 : *
858 : * Permission checking for this function is managed through the normal
859 : * GRANT system.
860 : */
861 : void
862 26 : pgstat_reset_counters(void)
863 : {
864 26 : TimestampTz ts = GetCurrentTimestamp();
865 :
866 26 : pgstat_reset_matching_entries(match_db_entries,
867 : ObjectIdGetDatum(MyDatabaseId),
868 : ts);
869 26 : }
870 :
871 : /*
872 : * Reset a single variable-numbered entry.
873 : *
874 : * If the stats kind is within a database, also reset the database's
875 : * stat_reset_timestamp.
876 : *
877 : * Permission checking for this function is managed through the normal
878 : * GRANT system.
879 : */
880 : void
881 44 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
882 : {
883 44 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
884 44 : TimestampTz ts = GetCurrentTimestamp();
885 :
886 : /* not needed atm, and doesn't make sense with the current signature */
887 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
888 :
889 : /* reset the "single counter" */
890 44 : pgstat_reset_entry(kind, dboid, objid, ts);
891 :
892 44 : if (!kind_info->accessed_across_databases)
893 16 : pgstat_reset_database_timestamp(dboid, ts);
894 44 : }
895 :
896 : /*
897 : * Reset stats for all entries of a kind.
898 : *
899 : * Permission checking for this function is managed through the normal
900 : * GRANT system.
901 : */
902 : void
903 54 : pgstat_reset_of_kind(PgStat_Kind kind)
904 : {
905 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
906 54 : TimestampTz ts = GetCurrentTimestamp();
907 :
908 54 : if (kind_info->fixed_amount)
909 46 : kind_info->reset_all_cb(ts);
910 : else
911 8 : pgstat_reset_entries_of_kind(kind, ts);
912 54 : }
913 :
914 :
915 : /* ------------------------------------------------------------
916 : * Fetching of stats
917 : * ------------------------------------------------------------
918 : */
919 :
920 : /*
921 : * Discard any data collected in the current transaction. Any subsequent
922 : * request will cause new snapshots to be read.
923 : *
924 : * This is also invoked during transaction commit or abort to discard
925 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
926 : * cause this routine to be called.
927 : */
928 : void
929 787518 : pgstat_clear_snapshot(void)
930 : {
931 : pgstat_assert_is_up();
932 :
933 787518 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
934 : sizeof(pgStatLocal.snapshot.fixed_valid));
935 787518 : memset(&pgStatLocal.snapshot.custom_valid, 0,
936 : sizeof(pgStatLocal.snapshot.custom_valid));
937 787518 : pgStatLocal.snapshot.stats = NULL;
938 787518 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
939 :
940 : /* Release memory, if any was allocated */
941 787518 : if (pgStatLocal.snapshot.context)
942 : {
943 986 : MemoryContextDelete(pgStatLocal.snapshot.context);
944 :
945 : /* Reset variables */
946 986 : pgStatLocal.snapshot.context = NULL;
947 : }
948 :
949 : /*
950 : * Historically the backend_status.c facilities lived in this file, and
951 : * were reset with the same function. For now keep it that way, and
952 : * forward the reset request.
953 : */
954 787518 : pgstat_clear_backend_activity_snapshot();
955 :
956 : /* Reset this flag, as it may be possible that a cleanup was forced. */
957 787518 : force_stats_snapshot_clear = false;
958 787518 : }
959 :
960 : void *
961 363744 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
962 : {
963 : PgStat_HashKey key;
964 : PgStat_EntryRef *entry_ref;
965 : void *stats_data;
966 363744 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
967 :
968 : /* should be called from backends */
969 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
970 : Assert(!kind_info->fixed_amount);
971 :
972 363744 : pgstat_prep_snapshot();
973 :
974 : /* clear padding */
975 363744 : memset(&key, 0, sizeof(struct PgStat_HashKey));
976 :
977 363744 : key.kind = kind;
978 363744 : key.dboid = dboid;
979 363744 : key.objid = objid;
980 :
981 : /* if we need to build a full snapshot, do so */
982 363744 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
983 460 : pgstat_build_snapshot();
984 :
985 : /* if caching is desired, look up in cache */
986 363744 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
987 : {
988 9470 : PgStat_SnapshotEntry *entry = NULL;
989 :
990 9470 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
991 :
992 9470 : if (entry)
993 620 : return entry->data;
994 :
995 : /*
996 : * If we built a full snapshot and the key is not in
997 : * pgStatLocal.snapshot.stats, there are no matching stats.
998 : */
999 8850 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1000 28 : return NULL;
1001 : }
1002 :
1003 363096 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
1004 :
1005 363096 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1006 :
1007 363096 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
1008 : {
1009 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
1010 8340 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
1011 : {
1012 1736 : PgStat_SnapshotEntry *entry = NULL;
1013 : bool found;
1014 :
1015 1736 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1016 : Assert(!found);
1017 1736 : entry->data = NULL;
1018 : }
1019 8340 : return NULL;
1020 : }
1021 :
1022 : /*
1023 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
1024 : * otherwise we could quickly end up with a fair bit of memory used due to
1025 : * repeated accesses.
1026 : */
1027 354756 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1028 347670 : stats_data = palloc(kind_info->shared_data_len);
1029 : else
1030 7086 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1031 7086 : kind_info->shared_data_len);
1032 :
1033 354756 : pgstat_lock_entry_shared(entry_ref, false);
1034 709512 : memcpy(stats_data,
1035 354756 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1036 354756 : kind_info->shared_data_len);
1037 354756 : pgstat_unlock_entry(entry_ref);
1038 :
1039 354756 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1040 : {
1041 7086 : PgStat_SnapshotEntry *entry = NULL;
1042 : bool found;
1043 :
1044 7086 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1045 7086 : entry->data = stats_data;
1046 : }
1047 :
1048 354756 : return stats_data;
1049 : }
1050 :
1051 : /*
1052 : * If a stats snapshot has been taken, return the timestamp at which that was
1053 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1054 : * false.
1055 : */
1056 : TimestampTz
1057 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1058 : {
1059 60 : if (force_stats_snapshot_clear)
1060 18 : pgstat_clear_snapshot();
1061 :
1062 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1063 : {
1064 24 : *have_snapshot = true;
1065 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1066 : }
1067 :
1068 36 : *have_snapshot = false;
1069 :
1070 36 : return 0;
1071 : }
1072 :
1073 : bool
1074 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1075 : {
1076 : /* fixed-numbered stats always exist */
1077 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1078 12 : return true;
1079 :
1080 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1081 : }
1082 :
1083 : /*
1084 : * Ensure snapshot for fixed-numbered 'kind' exists.
1085 : *
1086 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1087 : * massaging the data into the desired format.
1088 : */
1089 : void
1090 430 : pgstat_snapshot_fixed(PgStat_Kind kind)
1091 : {
1092 : Assert(pgstat_is_kind_valid(kind));
1093 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1094 :
1095 430 : if (force_stats_snapshot_clear)
1096 0 : pgstat_clear_snapshot();
1097 :
1098 430 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1099 24 : pgstat_build_snapshot();
1100 : else
1101 406 : pgstat_build_snapshot_fixed(kind);
1102 :
1103 430 : if (pgstat_is_kind_builtin(kind))
1104 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1105 8 : else if (pgstat_is_kind_custom(kind))
1106 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1107 430 : }
1108 :
1109 : static void
1110 34718 : pgstat_init_snapshot_fixed(void)
1111 : {
1112 : /*
1113 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1114 : * stats kinds.
1115 : */
1116 4513340 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1117 : {
1118 4478622 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1119 :
1120 4478622 : if (!kind_info || !kind_info->fixed_amount)
1121 4478524 : continue;
1122 :
1123 98 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1124 98 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1125 : }
1126 34718 : }
1127 :
1128 : static void
1129 363796 : pgstat_prep_snapshot(void)
1130 : {
1131 363796 : if (force_stats_snapshot_clear)
1132 18 : pgstat_clear_snapshot();
1133 :
1134 363796 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1135 9522 : pgStatLocal.snapshot.stats != NULL)
1136 362810 : return;
1137 :
1138 986 : if (!pgStatLocal.snapshot.context)
1139 986 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1140 : "PgStat Snapshot",
1141 : ALLOCSET_SMALL_SIZES);
1142 :
1143 986 : pgStatLocal.snapshot.stats =
1144 986 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1145 : PGSTAT_SNAPSHOT_HASH_SIZE,
1146 : NULL);
1147 : }
1148 :
1149 : static void
1150 484 : pgstat_build_snapshot(void)
1151 : {
1152 : dshash_seq_status hstat;
1153 : PgStatShared_HashEntry *p;
1154 :
1155 : /* should only be called when we need a snapshot */
1156 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1157 :
1158 : /* snapshot already built */
1159 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1160 432 : return;
1161 :
1162 52 : pgstat_prep_snapshot();
1163 :
1164 : Assert(pgStatLocal.snapshot.stats->members == 0);
1165 :
1166 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1167 :
1168 : /*
1169 : * Snapshot all variable stats.
1170 : */
1171 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1172 52342 : while ((p = dshash_seq_next(&hstat)) != NULL)
1173 : {
1174 52290 : PgStat_Kind kind = p->key.kind;
1175 52290 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1176 : bool found;
1177 : PgStat_SnapshotEntry *entry;
1178 : PgStatShared_Common *stats_data;
1179 :
1180 : /*
1181 : * Check if the stats object should be included in the snapshot.
1182 : * Unless the stats kind can be accessed from all databases (e.g.,
1183 : * database stats themselves), we only include stats for the current
1184 : * database or objects not associated with a database (e.g. shared
1185 : * relations).
1186 : */
1187 52290 : if (p->key.dboid != MyDatabaseId &&
1188 15720 : p->key.dboid != InvalidOid &&
1189 12872 : !kind_info->accessed_across_databases)
1190 12972 : continue;
1191 :
1192 39522 : if (p->dropped)
1193 204 : continue;
1194 :
1195 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1196 :
1197 39318 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1198 : Assert(stats_data);
1199 :
1200 39318 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1201 : Assert(!found);
1202 :
1203 78636 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1204 39318 : kind_info->shared_size);
1205 :
1206 : /*
1207 : * Acquire the LWLock directly instead of using
1208 : * pg_stat_lock_entry_shared() which requires a reference.
1209 : */
1210 39318 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1211 78636 : memcpy(entry->data,
1212 39318 : pgstat_get_entry_data(kind, stats_data),
1213 39318 : kind_info->shared_size);
1214 39318 : LWLockRelease(&stats_data->lock);
1215 : }
1216 52 : dshash_seq_term(&hstat);
1217 :
1218 : /*
1219 : * Build snapshot of all fixed-numbered stats.
1220 : */
1221 13364 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1222 : {
1223 13312 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1224 :
1225 13312 : if (!kind_info)
1226 12688 : continue;
1227 624 : if (!kind_info->fixed_amount)
1228 : {
1229 : Assert(kind_info->snapshot_cb == NULL);
1230 312 : continue;
1231 : }
1232 :
1233 312 : pgstat_build_snapshot_fixed(kind);
1234 : }
1235 :
1236 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1237 : }
1238 :
1239 : static void
1240 7252 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1241 : {
1242 7252 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1243 : int idx;
1244 : bool *valid;
1245 :
1246 : /* Position in fixed_valid or custom_valid */
1247 7252 : if (pgstat_is_kind_builtin(kind))
1248 : {
1249 7238 : idx = kind;
1250 7238 : valid = pgStatLocal.snapshot.fixed_valid;
1251 : }
1252 : else
1253 : {
1254 14 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1255 14 : valid = pgStatLocal.snapshot.custom_valid;
1256 : }
1257 :
1258 : Assert(kind_info->fixed_amount);
1259 : Assert(kind_info->snapshot_cb != NULL);
1260 :
1261 7252 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1262 : {
1263 : /* rebuild every time */
1264 6564 : valid[idx] = false;
1265 : }
1266 688 : else if (valid[idx])
1267 : {
1268 : /* in snapshot mode we shouldn't get called again */
1269 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1270 12 : return;
1271 : }
1272 :
1273 : Assert(!valid[idx]);
1274 :
1275 7240 : kind_info->snapshot_cb();
1276 :
1277 : Assert(!valid[idx]);
1278 7240 : valid[idx] = true;
1279 : }
1280 :
1281 :
1282 : /* ------------------------------------------------------------
1283 : * Backend-local pending stats infrastructure
1284 : * ------------------------------------------------------------
1285 : */
1286 :
1287 : /*
1288 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1289 : * stats if not already done.
1290 : *
1291 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1292 : * created, false otherwise.
1293 : */
1294 : PgStat_EntryRef *
1295 105095594 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1296 : {
1297 : PgStat_EntryRef *entry_ref;
1298 :
1299 : /* need to be able to flush out */
1300 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1301 :
1302 105095594 : if (unlikely(!pgStatPendingContext))
1303 : {
1304 30204 : pgStatPendingContext =
1305 30204 : AllocSetContextCreate(TopMemoryContext,
1306 : "PgStat Pending",
1307 : ALLOCSET_SMALL_SIZES);
1308 : }
1309 :
1310 105095594 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1311 : true, created_entry);
1312 :
1313 105095594 : if (entry_ref->pending == NULL)
1314 : {
1315 1722984 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1316 :
1317 : Assert(entrysize != (size_t) -1);
1318 :
1319 1722984 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1320 1722984 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1321 : }
1322 :
1323 105095594 : return entry_ref;
1324 : }
1325 :
1326 : /*
1327 : * Return an existing stats entry, or NULL.
1328 : *
1329 : * This should only be used for helper function for pgstatfuncs.c - outside of
1330 : * that it shouldn't be needed.
1331 : */
1332 : PgStat_EntryRef *
1333 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1334 : {
1335 : PgStat_EntryRef *entry_ref;
1336 :
1337 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1338 :
1339 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1340 30 : return NULL;
1341 :
1342 54 : return entry_ref;
1343 : }
1344 :
1345 : void
1346 1722984 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1347 : {
1348 1722984 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1349 1722984 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1350 1722984 : void *pending_data = entry_ref->pending;
1351 :
1352 : Assert(pending_data != NULL);
1353 : /* !fixed_amount stats should be handled explicitly */
1354 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1355 :
1356 1722984 : if (kind_info->delete_pending_cb)
1357 1570430 : kind_info->delete_pending_cb(entry_ref);
1358 :
1359 1722984 : pfree(pending_data);
1360 1722984 : entry_ref->pending = NULL;
1361 :
1362 1722984 : dlist_delete(&entry_ref->pending_node);
1363 1722984 : }
1364 :
1365 : /*
1366 : * Flush out pending variable-numbered stats.
1367 : */
1368 : static bool
1369 59398 : pgstat_flush_pending_entries(bool nowait)
1370 : {
1371 59398 : bool have_pending = false;
1372 59398 : dlist_node *cur = NULL;
1373 :
1374 : /*
1375 : * Need to be a bit careful iterating over the list of pending entries.
1376 : * Processing a pending entry may queue further pending entries to the end
1377 : * of the list that we want to process, so a simple iteration won't do.
1378 : * Further complicating matters is that we want to delete the current
1379 : * entry in each iteration from the list if we flushed successfully.
1380 : *
1381 : * So we just keep track of the next pointer in each loop iteration.
1382 : */
1383 59398 : if (!dlist_is_empty(&pgStatPending))
1384 56498 : cur = dlist_head_node(&pgStatPending);
1385 :
1386 1720142 : while (cur)
1387 : {
1388 1660744 : PgStat_EntryRef *entry_ref =
1389 1660744 : dlist_container(PgStat_EntryRef, pending_node, cur);
1390 1660744 : PgStat_HashKey key = entry_ref->shared_entry->key;
1391 1660744 : PgStat_Kind kind = key.kind;
1392 1660744 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1393 : bool did_flush;
1394 : dlist_node *next;
1395 :
1396 : Assert(!kind_info->fixed_amount);
1397 : Assert(kind_info->flush_pending_cb != NULL);
1398 :
1399 : /* flush the stats, if possible */
1400 1660744 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1401 :
1402 : Assert(did_flush || nowait);
1403 :
1404 : /* determine next entry, before deleting the pending entry */
1405 1660744 : if (dlist_has_next(&pgStatPending, cur))
1406 1604246 : next = dlist_next_node(&pgStatPending, cur);
1407 : else
1408 56498 : next = NULL;
1409 :
1410 : /* if successfully flushed, remove entry */
1411 1660744 : if (did_flush)
1412 1660744 : pgstat_delete_pending_entry(entry_ref);
1413 : else
1414 0 : have_pending = true;
1415 :
1416 1660744 : cur = next;
1417 : }
1418 :
1419 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1420 :
1421 59398 : return have_pending;
1422 : }
1423 :
1424 :
1425 : /* ------------------------------------------------------------
1426 : * Helper / infrastructure functions
1427 : * ------------------------------------------------------------
1428 : */
1429 :
1430 : PgStat_Kind
1431 166 : pgstat_get_kind_from_str(char *kind_str)
1432 : {
1433 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1434 : {
1435 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1436 160 : return kind;
1437 : }
1438 :
1439 : /* Check the custom set of cumulative stats */
1440 6 : if (pgstat_kind_custom_infos)
1441 : {
1442 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1443 : {
1444 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1445 :
1446 0 : if (pgstat_kind_custom_infos[idx] &&
1447 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1448 0 : return kind;
1449 : }
1450 : }
1451 :
1452 6 : ereport(ERROR,
1453 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1454 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1455 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1456 : }
1457 :
1458 : static inline bool
1459 643514 : pgstat_is_kind_valid(PgStat_Kind kind)
1460 : {
1461 643514 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1462 : }
1463 :
1464 : const PgStat_KindInfo *
1465 40908154 : pgstat_get_kind_info(PgStat_Kind kind)
1466 : {
1467 40908154 : if (pgstat_is_kind_builtin(kind))
1468 9426638 : return &pgstat_kind_builtin_infos[kind];
1469 :
1470 31481516 : if (pgstat_is_kind_custom(kind))
1471 : {
1472 19088196 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1473 :
1474 19088196 : if (pgstat_kind_custom_infos == NULL ||
1475 51922 : pgstat_kind_custom_infos[idx] == NULL)
1476 19087328 : return NULL;
1477 868 : return pgstat_kind_custom_infos[idx];
1478 : }
1479 :
1480 12393320 : return NULL;
1481 : }
1482 :
1483 : /*
1484 : * Register a new stats kind.
1485 : *
1486 : * PgStat_Kinds must be globally unique across all extensions. Refer
1487 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1488 : * unique ID for your extension, to avoid conflicts with other extension
1489 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1490 : * needlessly reserving a new ID.
1491 : */
1492 : void
1493 16 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1494 : {
1495 16 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1496 :
1497 16 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1498 0 : ereport(ERROR,
1499 : (errmsg("custom cumulative statistics name is invalid"),
1500 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1501 :
1502 16 : if (!pgstat_is_kind_custom(kind))
1503 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1504 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1505 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1506 :
1507 16 : if (!process_shared_preload_libraries_in_progress)
1508 0 : ereport(ERROR,
1509 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1510 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1511 :
1512 : /*
1513 : * Check some data for fixed-numbered stats.
1514 : */
1515 16 : if (kind_info->fixed_amount)
1516 : {
1517 8 : if (kind_info->shared_size == 0)
1518 0 : ereport(ERROR,
1519 : (errmsg("custom cumulative statistics property is invalid"),
1520 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1521 : }
1522 :
1523 : /*
1524 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1525 : */
1526 16 : if (pgstat_kind_custom_infos == NULL)
1527 : {
1528 8 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1529 8 : MemoryContextAllocZero(TopMemoryContext,
1530 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1531 : }
1532 :
1533 16 : if (pgstat_kind_custom_infos[idx] != NULL &&
1534 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1535 0 : ereport(ERROR,
1536 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1537 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1538 : pgstat_kind_custom_infos[idx]->name)));
1539 :
1540 : /* check for existing custom stats with the same name */
1541 2080 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1542 : {
1543 2064 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1544 :
1545 2064 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1546 2056 : continue;
1547 8 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1548 0 : ereport(ERROR,
1549 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1550 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1551 : }
1552 :
1553 : /* Register it */
1554 16 : pgstat_kind_custom_infos[idx] = kind_info;
1555 16 : ereport(LOG,
1556 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1557 : kind_info->name, kind)));
1558 16 : }
1559 :
1560 : /*
1561 : * Stats should only be reported after pgstat_initialize() and before
1562 : * pgstat_shutdown(). This check is put in a few central places to catch
1563 : * violations of this rule more easily.
1564 : */
1565 : #ifdef USE_ASSERT_CHECKING
1566 : void
1567 : pgstat_assert_is_up(void)
1568 : {
1569 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1570 : }
1571 : #endif
1572 :
1573 :
1574 : /* ------------------------------------------------------------
1575 : * reading and writing of on-disk stats file
1576 : * ------------------------------------------------------------
1577 : */
1578 :
1579 : /* helpers for pgstat_write_statsfile() */
1580 : static void
1581 639938 : write_chunk(FILE *fpout, void *ptr, size_t len)
1582 : {
1583 : int rc;
1584 :
1585 639938 : rc = fwrite(ptr, len, 1, fpout);
1586 :
1587 : /* we'll check for errors with ferror once at the end */
1588 : (void) rc;
1589 639938 : }
1590 :
1591 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1592 :
1593 : /*
1594 : * This function is called in the last process that is accessing the shared
1595 : * stats so locking is not required.
1596 : */
1597 : static void
1598 1088 : pgstat_write_statsfile(XLogRecPtr redo)
1599 : {
1600 : FILE *fpout;
1601 : int32 format_id;
1602 1088 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1603 1088 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1604 : dshash_seq_status hstat;
1605 : PgStatShared_HashEntry *ps;
1606 :
1607 : pgstat_assert_is_up();
1608 :
1609 : /* should be called only by the checkpointer or single user mode */
1610 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1611 :
1612 : /* we're shutting down, so it's ok to just override this */
1613 1088 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1614 :
1615 1088 : elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
1616 : LSN_FORMAT_ARGS(redo));
1617 :
1618 : /*
1619 : * Open the statistics temp file to write out the current values.
1620 : */
1621 1088 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1622 1088 : if (fpout == NULL)
1623 : {
1624 0 : ereport(LOG,
1625 : (errcode_for_file_access(),
1626 : errmsg("could not open temporary statistics file \"%s\": %m",
1627 : tmpfile)));
1628 0 : return;
1629 : }
1630 :
1631 : /*
1632 : * Write the file header --- currently just a format ID.
1633 : */
1634 1088 : format_id = PGSTAT_FILE_FORMAT_ID;
1635 1088 : write_chunk_s(fpout, &format_id);
1636 :
1637 : /* Write the redo LSN, used to cross check the file read */
1638 1088 : write_chunk_s(fpout, &redo);
1639 :
1640 : /* Write various stats structs for fixed number of objects */
1641 279616 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1642 : {
1643 : char *ptr;
1644 278528 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1645 :
1646 278528 : if (!info || !info->fixed_amount)
1647 271994 : continue;
1648 :
1649 6534 : if (pgstat_is_kind_builtin(kind))
1650 : Assert(info->snapshot_ctl_off != 0);
1651 :
1652 : /* skip if no need to write to file */
1653 6534 : if (!info->write_to_file)
1654 0 : continue;
1655 :
1656 6534 : pgstat_build_snapshot_fixed(kind);
1657 6534 : if (pgstat_is_kind_builtin(kind))
1658 6528 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1659 : else
1660 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1661 :
1662 6534 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1663 6534 : write_chunk_s(fpout, &kind);
1664 6534 : write_chunk(fpout, ptr, info->shared_data_len);
1665 : }
1666 :
1667 : /*
1668 : * Walk through the stats entries
1669 : */
1670 1088 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1671 313606 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1672 : {
1673 : PgStatShared_Common *shstats;
1674 312518 : const PgStat_KindInfo *kind_info = NULL;
1675 :
1676 312518 : CHECK_FOR_INTERRUPTS();
1677 :
1678 : /*
1679 : * We should not see any "dropped" entries when writing the stats
1680 : * file, as all backends and auxiliary processes should have cleaned
1681 : * up their references before they terminated.
1682 : *
1683 : * However, since we are already shutting down, it is not worth
1684 : * crashing the server over any potential cleanup issues, so we simply
1685 : * skip such entries if encountered.
1686 : */
1687 : Assert(!ps->dropped);
1688 312518 : if (ps->dropped)
1689 0 : continue;
1690 :
1691 : /*
1692 : * This discards data related to custom stats kinds that are unknown
1693 : * to this process.
1694 : */
1695 312518 : if (!pgstat_is_kind_valid(ps->key.kind))
1696 : {
1697 0 : elog(WARNING, "found unknown stats entry %u/%u/%llu",
1698 : ps->key.kind, ps->key.dboid,
1699 : (unsigned long long) ps->key.objid);
1700 0 : continue;
1701 : }
1702 :
1703 312518 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1704 :
1705 312518 : kind_info = pgstat_get_kind_info(ps->key.kind);
1706 :
1707 : /* if not dropped the valid-entry refcount should exist */
1708 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1709 :
1710 : /* skip if no need to write to file */
1711 312518 : if (!kind_info->write_to_file)
1712 246 : continue;
1713 :
1714 312272 : if (!kind_info->to_serialized_name)
1715 : {
1716 : /* normal stats entry, identified by PgStat_HashKey */
1717 312122 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1718 312122 : write_chunk_s(fpout, &ps->key);
1719 : }
1720 : else
1721 : {
1722 : /* stats entry identified by name on disk (e.g. slots) */
1723 : NameData name;
1724 :
1725 150 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1726 :
1727 150 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1728 150 : write_chunk_s(fpout, &ps->key.kind);
1729 150 : write_chunk_s(fpout, &name);
1730 : }
1731 :
1732 : /* Write except the header part of the entry */
1733 312272 : write_chunk(fpout,
1734 : pgstat_get_entry_data(ps->key.kind, shstats),
1735 : pgstat_get_entry_len(ps->key.kind));
1736 : }
1737 1088 : dshash_seq_term(&hstat);
1738 :
1739 : /*
1740 : * No more output to be done. Close the temp file and replace the old
1741 : * pgstat.stat with it. The ferror() check replaces testing for error
1742 : * after each individual fputc or fwrite (in write_chunk()) above.
1743 : */
1744 1088 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1745 :
1746 1088 : if (ferror(fpout))
1747 : {
1748 0 : ereport(LOG,
1749 : (errcode_for_file_access(),
1750 : errmsg("could not write temporary statistics file \"%s\": %m",
1751 : tmpfile)));
1752 0 : FreeFile(fpout);
1753 0 : unlink(tmpfile);
1754 : }
1755 1088 : else if (FreeFile(fpout) < 0)
1756 : {
1757 0 : ereport(LOG,
1758 : (errcode_for_file_access(),
1759 : errmsg("could not close temporary statistics file \"%s\": %m",
1760 : tmpfile)));
1761 0 : unlink(tmpfile);
1762 : }
1763 1088 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1764 : {
1765 : /* durable_rename already emitted log message */
1766 0 : unlink(tmpfile);
1767 : }
1768 : }
1769 :
1770 : /* helpers for pgstat_read_statsfile() */
1771 : static bool
1772 664514 : read_chunk(FILE *fpin, void *ptr, size_t len)
1773 : {
1774 664514 : return fread(ptr, 1, len, fpin) == len;
1775 : }
1776 :
1777 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1778 :
1779 : /*
1780 : * Reads in existing statistics file into memory.
1781 : *
1782 : * This function is called in the only process that is accessing the shared
1783 : * stats so locking is not required.
1784 : */
1785 : static void
1786 1310 : pgstat_read_statsfile(XLogRecPtr redo)
1787 : {
1788 : FILE *fpin;
1789 : int32 format_id;
1790 : bool found;
1791 1310 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1792 1310 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1793 : XLogRecPtr file_redo;
1794 :
1795 : /* shouldn't be called from postmaster */
1796 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1797 :
1798 1310 : elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
1799 : LSN_FORMAT_ARGS(redo));
1800 :
1801 : /*
1802 : * Try to open the stats file. If it doesn't exist, the backends simply
1803 : * returns zero for anything and statistics simply starts from scratch
1804 : * with empty counters.
1805 : *
1806 : * ENOENT is a possibility if stats collection was previously disabled or
1807 : * has not yet written the stats file for the first time. Any other
1808 : * failure condition is suspicious.
1809 : */
1810 1310 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1811 : {
1812 92 : if (errno != ENOENT)
1813 0 : ereport(LOG,
1814 : (errcode_for_file_access(),
1815 : errmsg("could not open statistics file \"%s\": %m",
1816 : statfile)));
1817 92 : pgstat_reset_after_failure();
1818 92 : return;
1819 : }
1820 :
1821 : /*
1822 : * Verify it's of the expected format.
1823 : */
1824 1218 : if (!read_chunk_s(fpin, &format_id))
1825 : {
1826 0 : elog(WARNING, "could not read format ID");
1827 0 : goto error;
1828 : }
1829 :
1830 1218 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1831 : {
1832 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1833 : format_id, PGSTAT_FILE_FORMAT_ID);
1834 2 : goto error;
1835 : }
1836 :
1837 : /*
1838 : * Read the redo LSN stored in the file.
1839 : */
1840 1216 : if (!read_chunk_s(fpin, &file_redo))
1841 : {
1842 0 : elog(WARNING, "could not read redo LSN");
1843 0 : goto error;
1844 : }
1845 :
1846 1216 : if (file_redo != redo)
1847 : {
1848 20 : elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
1849 : LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
1850 20 : goto error;
1851 : }
1852 :
1853 : /*
1854 : * We found an existing statistics file. Read it and put all the stats
1855 : * data into place.
1856 : */
1857 : for (;;)
1858 330994 : {
1859 332190 : int t = fgetc(fpin);
1860 :
1861 332190 : switch (t)
1862 : {
1863 7180 : case PGSTAT_FILE_ENTRY_FIXED:
1864 : {
1865 : PgStat_Kind kind;
1866 : const PgStat_KindInfo *info;
1867 : char *ptr;
1868 :
1869 : /* entry for fixed-numbered stats */
1870 7180 : if (!read_chunk_s(fpin, &kind))
1871 : {
1872 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1873 2 : goto error;
1874 : }
1875 :
1876 7180 : if (!pgstat_is_kind_valid(kind))
1877 : {
1878 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1879 : kind, t);
1880 0 : goto error;
1881 : }
1882 :
1883 7180 : info = pgstat_get_kind_info(kind);
1884 7180 : if (!info)
1885 : {
1886 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1887 : kind, t);
1888 2 : goto error;
1889 : }
1890 :
1891 7178 : if (!info->fixed_amount)
1892 : {
1893 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1894 : kind, t);
1895 0 : goto error;
1896 : }
1897 :
1898 : /* Load back stats into shared memory */
1899 7178 : if (pgstat_is_kind_builtin(kind))
1900 7176 : ptr = ((char *) shmem) + info->shared_ctl_off +
1901 7176 : info->shared_data_off;
1902 : else
1903 : {
1904 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1905 :
1906 2 : ptr = ((char *) shmem->custom_data[idx]) +
1907 2 : info->shared_data_off;
1908 : }
1909 :
1910 7178 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1911 : {
1912 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1913 : kind, t, info->shared_data_len);
1914 0 : goto error;
1915 : }
1916 :
1917 7178 : break;
1918 : }
1919 323816 : case PGSTAT_FILE_ENTRY_HASH:
1920 : case PGSTAT_FILE_ENTRY_NAME:
1921 : {
1922 : PgStat_HashKey key;
1923 : PgStatShared_HashEntry *p;
1924 : PgStatShared_Common *header;
1925 :
1926 323816 : CHECK_FOR_INTERRUPTS();
1927 :
1928 323816 : if (t == PGSTAT_FILE_ENTRY_HASH)
1929 : {
1930 : /* normal stats entry, identified by PgStat_HashKey */
1931 323724 : if (!read_chunk_s(fpin, &key))
1932 : {
1933 0 : elog(WARNING, "could not read key for entry of type %c", t);
1934 0 : goto error;
1935 : }
1936 :
1937 323724 : if (!pgstat_is_kind_valid(key.kind))
1938 : {
1939 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%llu of type %c",
1940 : key.kind, key.dboid,
1941 : (unsigned long long) key.objid, t);
1942 0 : goto error;
1943 : }
1944 : }
1945 : else
1946 : {
1947 : /* stats entry identified by name on disk (e.g. slots) */
1948 92 : const PgStat_KindInfo *kind_info = NULL;
1949 : PgStat_Kind kind;
1950 : NameData name;
1951 :
1952 92 : if (!read_chunk_s(fpin, &kind))
1953 : {
1954 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1955 0 : goto error;
1956 : }
1957 92 : if (!read_chunk_s(fpin, &name))
1958 : {
1959 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1960 : kind, t);
1961 0 : goto error;
1962 : }
1963 92 : if (!pgstat_is_kind_valid(kind))
1964 : {
1965 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1966 : kind, t);
1967 0 : goto error;
1968 : }
1969 :
1970 92 : kind_info = pgstat_get_kind_info(kind);
1971 92 : if (!kind_info)
1972 : {
1973 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1974 : kind, t);
1975 0 : goto error;
1976 : }
1977 :
1978 92 : if (!kind_info->from_serialized_name)
1979 : {
1980 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1981 : kind, t);
1982 0 : goto error;
1983 : }
1984 :
1985 92 : if (!kind_info->from_serialized_name(&name, &key))
1986 : {
1987 : /* skip over data for entry we don't care about */
1988 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1989 : {
1990 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1991 : NameStr(name), kind, t);
1992 0 : goto error;
1993 : }
1994 :
1995 2 : continue;
1996 : }
1997 :
1998 : Assert(key.kind == kind);
1999 : }
2000 :
2001 : /*
2002 : * This intentionally doesn't use pgstat_get_entry_ref() -
2003 : * putting all stats into checkpointer's
2004 : * pgStatEntryRefHash would be wasted effort and memory.
2005 : */
2006 323814 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
2007 :
2008 : /* don't allow duplicate entries */
2009 323814 : if (found)
2010 : {
2011 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
2012 0 : elog(WARNING, "found duplicate stats entry %u/%u/%llu of type %c",
2013 : key.kind, key.dboid,
2014 : (unsigned long long) key.objid, t);
2015 0 : goto error;
2016 : }
2017 :
2018 323814 : header = pgstat_init_entry(key.kind, p);
2019 323814 : dshash_release_lock(pgStatLocal.shared_hash, p);
2020 :
2021 323814 : if (!read_chunk(fpin,
2022 : pgstat_get_entry_data(key.kind, header),
2023 : pgstat_get_entry_len(key.kind)))
2024 : {
2025 0 : elog(WARNING, "could not read data for entry %u/%u/%llu of type %c",
2026 : key.kind, key.dboid,
2027 : (unsigned long long) key.objid, t);
2028 0 : goto error;
2029 : }
2030 :
2031 323814 : break;
2032 : }
2033 1194 : case PGSTAT_FILE_ENTRY_END:
2034 :
2035 : /*
2036 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2037 : * file
2038 : */
2039 1194 : if (fgetc(fpin) != EOF)
2040 : {
2041 2 : elog(WARNING, "could not read end-of-file");
2042 2 : goto error;
2043 : }
2044 :
2045 1192 : goto done;
2046 :
2047 0 : default:
2048 0 : elog(WARNING, "could not read entry of type %c", t);
2049 0 : goto error;
2050 : }
2051 : }
2052 :
2053 1218 : done:
2054 1218 : FreeFile(fpin);
2055 :
2056 1218 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2057 1218 : unlink(statfile);
2058 :
2059 1218 : return;
2060 :
2061 26 : error:
2062 26 : ereport(LOG,
2063 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2064 :
2065 26 : pgstat_reset_after_failure();
2066 :
2067 26 : goto done;
2068 : }
2069 :
2070 : /*
2071 : * Helper to reset / drop stats after a crash or after restoring stats from
2072 : * disk failed, potentially after already loading parts.
2073 : */
2074 : static void
2075 458 : pgstat_reset_after_failure(void)
2076 : {
2077 458 : TimestampTz ts = GetCurrentTimestamp();
2078 :
2079 : /* reset fixed-numbered stats */
2080 117706 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2081 : {
2082 117248 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2083 :
2084 117248 : if (!kind_info || !kind_info->fixed_amount)
2085 114498 : continue;
2086 :
2087 2750 : kind_info->reset_all_cb(ts);
2088 : }
2089 :
2090 : /* and drop variable-numbered ones */
2091 458 : pgstat_drop_all_entries();
2092 458 : }
2093 :
2094 : /*
2095 : * GUC assign_hook for stats_fetch_consistency.
2096 : */
2097 : void
2098 5280 : assign_stats_fetch_consistency(int newval, void *extra)
2099 : {
2100 : /*
2101 : * Changing this value in a transaction may cause snapshot state
2102 : * inconsistencies, so force a clear of the current snapshot on the next
2103 : * snapshot build attempt.
2104 : */
2105 5280 : if (pgstat_fetch_consistency != newval)
2106 3084 : force_stats_snapshot_clear = true;
2107 5280 : }
|