Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down, except when shutting down in immediate mode.
16 : *
17 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
18 : *
19 : * Statistics for variable-numbered objects are stored in dynamic shared
20 : * memory and can be found via a dshash hashtable. The statistics counters are
21 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 : * separately allocated (PgStatShared_HashEntry->body). The separate
23 : * allocation allows different kinds of statistics to be stored in the same
24 : * hashtable without wasting space in PgStatShared_HashEntry.
25 : *
26 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 : * is not possible to have statistics for an object that cannot be addressed
28 : * that way at runtime. A wider identifier can be used when serializing to
29 : * disk (used for replication slot stats).
30 : *
31 : * To avoid contention on the shared hashtable, each backend has a
32 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 : * entries. The shared hashtable only needs to be accessed when no prior
35 : * reference is found in the local hashtable. Besides pointing to the
36 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 : * contains a pointer to the shared statistics data, as a process-local
38 : * address, to reduce access costs.
39 : *
40 : * The names for structs stored in shared memory are prefixed with
41 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 : * protected by a dedicated lwlock.
43 : *
44 : * Most stats updates are first accumulated locally in each process as pending
45 : * entries, then later flushed to shared memory (just after commit, or by
46 : * idle-timeout). This practically eliminates contention on individual stats
47 : * entries. For most kinds of variable-numbered pending stats data is stored
48 : * in PgStat_EntryRef->pending. All entries with pending data are in the
49 : * pgStatPending list. Pending statistics updates are flushed out by
50 : * pgstat_report_stat().
51 : *
52 : * The behavior of different kinds of statistics is determined by the kind's
53 : * entry in pgstat_kind_infos, see PgStat_KindInfo for details.
54 : *
55 : * The consistency of read accesses to statistics can be configured using the
56 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
57 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
58 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
59 : * pgStatLocal.snapshot.
60 : *
61 : * To keep things manageable, stats handling is split across several
62 : * files. Infrastructure pieces are in:
63 : * - pgstat.c - this file, to tie it all together
64 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
65 : * the maintenance of hashtable entries
66 : * - pgstat_xact.c - transactional integration, including the transactional
67 : * creation and dropping of stats entries
68 : *
69 : * Each statistics kind is handled in a dedicated file:
70 : * - pgstat_archiver.c
71 : * - pgstat_bgwriter.c
72 : * - pgstat_checkpointer.c
73 : * - pgstat_database.c
74 : * - pgstat_function.c
75 : * - pgstat_io.c
76 : * - pgstat_relation.c
77 : * - pgstat_replslot.c
78 : * - pgstat_slru.c
79 : * - pgstat_subscription.c
80 : * - pgstat_wal.c
81 : *
82 : * Whenever possible infrastructure files should not contain code related to
83 : * specific kinds of stats.
84 : *
85 : *
86 : * Copyright (c) 2001-2023, PostgreSQL Global Development Group
87 : *
88 : * IDENTIFICATION
89 : * src/backend/utils/activity/pgstat.c
90 : * ----------
91 : */
92 : #include "postgres.h"
93 :
94 : #include <unistd.h>
95 :
96 : #include "access/transam.h"
97 : #include "access/xact.h"
98 : #include "lib/dshash.h"
99 : #include "pgstat.h"
100 : #include "port/atomics.h"
101 : #include "storage/fd.h"
102 : #include "storage/ipc.h"
103 : #include "storage/lwlock.h"
104 : #include "storage/pg_shmem.h"
105 : #include "storage/shmem.h"
106 : #include "utils/guc_hooks.h"
107 : #include "utils/memutils.h"
108 : #include "utils/pgstat_internal.h"
109 : #include "utils/timestamp.h"
110 :
111 :
112 : /* ----------
113 : * Timer definitions.
114 : *
115 : * In milliseconds.
116 : * ----------
117 : */
118 :
119 : /* minimum interval non-forced stats flushes.*/
120 : #define PGSTAT_MIN_INTERVAL 1000
121 : /* how long until to block flushing pending stats updates */
122 : #define PGSTAT_MAX_INTERVAL 60000
123 : /* when to call pgstat_report_stat() again, even when idle */
124 : #define PGSTAT_IDLE_INTERVAL 10000
125 :
126 : /* ----------
127 : * Initial size hints for the hash tables used in statistics.
128 : * ----------
129 : */
130 :
131 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
132 :
133 :
134 : /* hash table for statistics snapshots entry */
135 : typedef struct PgStat_SnapshotEntry
136 : {
137 : PgStat_HashKey key;
138 : char status; /* for simplehash use */
139 : void *data; /* the stats data itself */
140 : } PgStat_SnapshotEntry;
141 :
142 :
143 : /* ----------
144 : * Backend-local Hash Table Definitions
145 : * ----------
146 : */
147 :
148 : /* for stats snapshot entries */
149 : #define SH_PREFIX pgstat_snapshot
150 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
151 : #define SH_KEY_TYPE PgStat_HashKey
152 : #define SH_KEY key
153 : #define SH_HASH_KEY(tb, key) \
154 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
155 : #define SH_EQUAL(tb, a, b) \
156 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
157 : #define SH_SCOPE static inline
158 : #define SH_DEFINE
159 : #define SH_DECLARE
160 : #include "lib/simplehash.h"
161 :
162 :
163 : /* ----------
164 : * Local function forward declarations
165 : * ----------
166 : */
167 :
168 : static void pgstat_write_statsfile(void);
169 : static void pgstat_read_statsfile(void);
170 :
171 : static void pgstat_reset_after_failure(void);
172 :
173 : static bool pgstat_flush_pending_entries(bool nowait);
174 :
175 : static void pgstat_prep_snapshot(void);
176 : static void pgstat_build_snapshot(void);
177 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
178 :
179 : static inline bool pgstat_is_kind_valid(int ikind);
180 :
181 :
182 : /* ----------
183 : * GUC parameters
184 : * ----------
185 : */
186 :
187 : bool pgstat_track_counts = false;
188 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
189 :
190 :
191 : /* ----------
192 : * state shared with pgstat_*.c
193 : * ----------
194 : */
195 :
196 : PgStat_LocalState pgStatLocal;
197 :
198 :
199 : /* ----------
200 : * Local data
201 : *
202 : * NB: There should be only variables related to stats infrastructure here,
203 : * not for specific kinds of stats.
204 : * ----------
205 : */
206 :
207 : /*
208 : * Memory contexts containing the pgStatEntryRefHash table, the
209 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
210 : * easier to track / attribute memory usage.
211 : */
212 :
213 : static MemoryContext pgStatPendingContext = NULL;
214 :
215 : /*
216 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
217 : *
218 : * Newly pending entries should only ever be added to the end of the list,
219 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
220 : */
221 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
222 :
223 :
224 : /*
225 : * Force the next stats flush to happen regardless of
226 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
227 : */
228 : static bool pgStatForceNextFlush = false;
229 :
230 : /*
231 : * Force-clear existing snapshot before next use when stats_fetch_consistency
232 : * is changed.
233 : */
234 : static bool force_stats_snapshot_clear = false;
235 :
236 :
237 : /*
238 : * For assertions that check pgstat is not used before initialization / after
239 : * shutdown.
240 : */
241 : #ifdef USE_ASSERT_CHECKING
242 : static bool pgstat_is_initialized = false;
243 : static bool pgstat_is_shutdown = false;
244 : #endif
245 :
246 :
247 : /*
248 : * The different kinds of statistics.
249 : *
250 : * If reasonably possible, handling specific to one kind of stats should go
251 : * through this abstraction, rather than making more of pgstat.c aware.
252 : *
253 : * See comments for struct PgStat_KindInfo for details about the individual
254 : * fields.
255 : *
256 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
257 : * seem to be a great way of doing that, given the split across multiple
258 : * files.
259 : */
260 : static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
261 :
262 : /* stats kinds for variable-numbered objects */
263 :
264 : [PGSTAT_KIND_DATABASE] = {
265 : .name = "database",
266 :
267 : .fixed_amount = false,
268 : /* so pg_stat_database entries can be seen in all databases */
269 : .accessed_across_databases = true,
270 :
271 : .shared_size = sizeof(PgStatShared_Database),
272 : .shared_data_off = offsetof(PgStatShared_Database, stats),
273 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
274 : .pending_size = sizeof(PgStat_StatDBEntry),
275 :
276 : .flush_pending_cb = pgstat_database_flush_cb,
277 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
278 : },
279 :
280 : [PGSTAT_KIND_RELATION] = {
281 : .name = "relation",
282 :
283 : .fixed_amount = false,
284 :
285 : .shared_size = sizeof(PgStatShared_Relation),
286 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
287 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
288 : .pending_size = sizeof(PgStat_TableStatus),
289 :
290 : .flush_pending_cb = pgstat_relation_flush_cb,
291 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
292 : },
293 :
294 : [PGSTAT_KIND_FUNCTION] = {
295 : .name = "function",
296 :
297 : .fixed_amount = false,
298 :
299 : .shared_size = sizeof(PgStatShared_Function),
300 : .shared_data_off = offsetof(PgStatShared_Function, stats),
301 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
302 : .pending_size = sizeof(PgStat_FunctionCounts),
303 :
304 : .flush_pending_cb = pgstat_function_flush_cb,
305 : },
306 :
307 : [PGSTAT_KIND_REPLSLOT] = {
308 : .name = "replslot",
309 :
310 : .fixed_amount = false,
311 :
312 : .accessed_across_databases = true,
313 : .named_on_disk = true,
314 :
315 : .shared_size = sizeof(PgStatShared_ReplSlot),
316 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
317 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
318 :
319 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
320 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
321 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
322 : },
323 :
324 : [PGSTAT_KIND_SUBSCRIPTION] = {
325 : .name = "subscription",
326 :
327 : .fixed_amount = false,
328 : /* so pg_stat_subscription_stats entries can be seen in all databases */
329 : .accessed_across_databases = true,
330 :
331 : .shared_size = sizeof(PgStatShared_Subscription),
332 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
333 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
334 : .pending_size = sizeof(PgStat_BackendSubEntry),
335 :
336 : .flush_pending_cb = pgstat_subscription_flush_cb,
337 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
338 : },
339 :
340 :
341 : /* stats for fixed-numbered (mostly 1) objects */
342 :
343 : [PGSTAT_KIND_ARCHIVER] = {
344 : .name = "archiver",
345 :
346 : .fixed_amount = true,
347 :
348 : .reset_all_cb = pgstat_archiver_reset_all_cb,
349 : .snapshot_cb = pgstat_archiver_snapshot_cb,
350 : },
351 :
352 : [PGSTAT_KIND_BGWRITER] = {
353 : .name = "bgwriter",
354 :
355 : .fixed_amount = true,
356 :
357 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
358 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
359 : },
360 :
361 : [PGSTAT_KIND_CHECKPOINTER] = {
362 : .name = "checkpointer",
363 :
364 : .fixed_amount = true,
365 :
366 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
367 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
368 : },
369 :
370 : [PGSTAT_KIND_IO] = {
371 : .name = "io",
372 :
373 : .fixed_amount = true,
374 :
375 : .reset_all_cb = pgstat_io_reset_all_cb,
376 : .snapshot_cb = pgstat_io_snapshot_cb,
377 : },
378 :
379 : [PGSTAT_KIND_SLRU] = {
380 : .name = "slru",
381 :
382 : .fixed_amount = true,
383 :
384 : .reset_all_cb = pgstat_slru_reset_all_cb,
385 : .snapshot_cb = pgstat_slru_snapshot_cb,
386 : },
387 :
388 : [PGSTAT_KIND_WAL] = {
389 : .name = "wal",
390 :
391 : .fixed_amount = true,
392 :
393 : .reset_all_cb = pgstat_wal_reset_all_cb,
394 : .snapshot_cb = pgstat_wal_snapshot_cb,
395 : },
396 : };
397 :
398 :
399 : /* ------------------------------------------------------------
400 : * Functions managing the state of the stats system for all backends.
401 : * ------------------------------------------------------------
402 : */
403 :
404 : /*
405 : * Read on-disk stats into memory at server start.
406 : *
407 : * Should only be called by the startup process or in single user mode.
408 : */
409 : void
410 1046 : pgstat_restore_stats(void)
411 : {
412 1046 : pgstat_read_statsfile();
413 1046 : }
414 :
415 : /*
416 : * Remove the stats file. This is currently used only if WAL recovery is
417 : * needed after a crash.
418 : *
419 : * Should only be called by the startup process or in single user mode.
420 : */
421 : void
422 302 : pgstat_discard_stats(void)
423 : {
424 : int ret;
425 :
426 : /* NB: this needs to be done even in single user mode */
427 :
428 302 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
429 302 : if (ret != 0)
430 : {
431 300 : if (errno == ENOENT)
432 300 : elog(DEBUG2,
433 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
434 : PGSTAT_STAT_PERMANENT_FILENAME);
435 : else
436 0 : ereport(LOG,
437 : (errcode_for_file_access(),
438 : errmsg("could not unlink permanent statistics file \"%s\": %m",
439 : PGSTAT_STAT_PERMANENT_FILENAME)));
440 : }
441 : else
442 : {
443 2 : ereport(DEBUG2,
444 : (errcode_for_file_access(),
445 : errmsg_internal("unlinked permanent statistics file \"%s\"",
446 : PGSTAT_STAT_PERMANENT_FILENAME)));
447 : }
448 :
449 : /*
450 : * Reset stats contents. This will set reset timestamps of fixed-numbered
451 : * stats to the current time (no variable stats exist).
452 : */
453 302 : pgstat_reset_after_failure();
454 302 : }
455 :
456 : /*
457 : * pgstat_before_server_shutdown() needs to be called by exactly one process
458 : * during regular server shutdowns. Otherwise all stats will be lost.
459 : *
460 : * We currently only write out stats for proc_exit(0). We might want to change
461 : * that at some point... But right now pgstat_discard_stats() would be called
462 : * during the start after a disorderly shutdown, anyway.
463 : */
464 : void
465 886 : pgstat_before_server_shutdown(int code, Datum arg)
466 : {
467 : Assert(pgStatLocal.shmem != NULL);
468 : Assert(!pgStatLocal.shmem->is_shutdown);
469 :
470 : /*
471 : * Stats should only be reported after pgstat_initialize() and before
472 : * pgstat_shutdown(). This is a convenient point to catch most violations
473 : * of this rule.
474 : */
475 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
476 :
477 : /* flush out our own pending changes before writing out */
478 886 : pgstat_report_stat(true);
479 :
480 : /*
481 : * Only write out file during normal shutdown. Don't even signal that
482 : * we've shutdown during irregular shutdowns, because the shutdown
483 : * sequence isn't coordinated to ensure this backend shuts down last.
484 : */
485 886 : if (code == 0)
486 : {
487 876 : pgStatLocal.shmem->is_shutdown = true;
488 876 : pgstat_write_statsfile();
489 : }
490 886 : }
491 :
492 :
493 : /* ------------------------------------------------------------
494 : * Backend initialization / shutdown functions
495 : * ------------------------------------------------------------
496 : */
497 :
498 : /*
499 : * Shut down a single backend's statistics reporting at process exit.
500 : *
501 : * Flush out any remaining statistics counts. Without this, operations
502 : * triggered during backend exit (such as temp table deletions) won't be
503 : * counted.
504 : */
505 : static void
506 27846 : pgstat_shutdown_hook(int code, Datum arg)
507 : {
508 : Assert(!pgstat_is_shutdown);
509 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
510 :
511 : /*
512 : * If we got as far as discovering our own database ID, we can flush out
513 : * what we did so far. Otherwise, we'd be reporting an invalid database
514 : * ID, so forget it. (This means that accesses to pg_database during
515 : * failed backend starts might never get counted.)
516 : */
517 27846 : if (OidIsValid(MyDatabaseId))
518 22000 : pgstat_report_disconnect(MyDatabaseId);
519 :
520 27846 : pgstat_report_stat(true);
521 :
522 : /* there shouldn't be any pending changes left */
523 : Assert(dlist_is_empty(&pgStatPending));
524 27846 : dlist_init(&pgStatPending);
525 :
526 27846 : pgstat_detach_shmem();
527 :
528 : #ifdef USE_ASSERT_CHECKING
529 : pgstat_is_shutdown = true;
530 : #endif
531 27846 : }
532 :
533 : /*
534 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
535 : * BaseInit().
536 : *
537 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
538 : */
539 : void
540 27846 : pgstat_initialize(void)
541 : {
542 : Assert(!pgstat_is_initialized);
543 :
544 27846 : pgstat_attach_shmem();
545 :
546 27846 : pgstat_init_wal();
547 :
548 : /* Set up a process-exit hook to clean up */
549 27846 : before_shmem_exit(pgstat_shutdown_hook, 0);
550 :
551 : #ifdef USE_ASSERT_CHECKING
552 : pgstat_is_initialized = true;
553 : #endif
554 27846 : }
555 :
556 :
557 : /* ------------------------------------------------------------
558 : * Public functions used by backends follow
559 : * ------------------------------------------------------------
560 : */
561 :
562 : /*
563 : * Must be called by processes that performs DML: tcop/postgres.c, logical
564 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
565 : * shared memory.
566 : *
567 : * Unless called with 'force', pending stats updates are flushed happen once
568 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
569 : * block on lock acquisition, except if stats updates have been pending for
570 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
571 : *
572 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
573 : * suggested idle timeout is returned. Currently this is always
574 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
575 : * a timeout after which to call pgstat_report_stat(true), but are not
576 : * required to do so.
577 : *
578 : * Note that this is called only when not within a transaction, so it is fair
579 : * to use transaction stop time as an approximation of current time.
580 : */
581 : long
582 483500 : pgstat_report_stat(bool force)
583 : {
584 : static TimestampTz pending_since = 0;
585 : static TimestampTz last_flush = 0;
586 : bool partial_flush;
587 : TimestampTz now;
588 : bool nowait;
589 :
590 : pgstat_assert_is_up();
591 : Assert(!IsTransactionOrTransactionBlock());
592 :
593 : /* "absorb" the forced flush even if there's nothing to flush */
594 483500 : if (pgStatForceNextFlush)
595 : {
596 408 : force = true;
597 408 : pgStatForceNextFlush = false;
598 : }
599 :
600 : /* Don't expend a clock check if nothing to do */
601 483500 : if (dlist_is_empty(&pgStatPending) &&
602 10360 : !have_iostats &&
603 9998 : !have_slrustats &&
604 8146 : !pgstat_have_pending_wal())
605 : {
606 : Assert(pending_since == 0);
607 8134 : return 0;
608 : }
609 :
610 : /*
611 : * There should never be stats to report once stats are shut down. Can't
612 : * assert that before the checks above, as there is an unconditional
613 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
614 : * the process that ran pgstat_before_server_shutdown() will still call.
615 : */
616 : Assert(!pgStatLocal.shmem->is_shutdown);
617 :
618 475366 : if (force)
619 : {
620 : /*
621 : * Stats reports are forced either when it's been too long since stats
622 : * have been reported or in processes that force stats reporting to
623 : * happen at specific points (including shutdown). In the former case
624 : * the transaction stop time might be quite old, in the latter it
625 : * would never get cleared.
626 : */
627 27896 : now = GetCurrentTimestamp();
628 : }
629 : else
630 : {
631 447470 : now = GetCurrentTransactionStopTimestamp();
632 :
633 855532 : if (pending_since > 0 &&
634 408062 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
635 : {
636 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
637 0 : force = true;
638 : }
639 447470 : else if (last_flush > 0 &&
640 428138 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
641 : {
642 : /* don't flush too frequently */
643 427070 : if (pending_since == 0)
644 20044 : pending_since = now;
645 :
646 427070 : return PGSTAT_IDLE_INTERVAL;
647 : }
648 : }
649 :
650 48296 : pgstat_update_dbstats(now);
651 :
652 : /* don't wait for lock acquisition when !force */
653 48296 : nowait = !force;
654 :
655 48296 : partial_flush = false;
656 :
657 : /* flush database / relation / function / ... stats */
658 48296 : partial_flush |= pgstat_flush_pending_entries(nowait);
659 :
660 : /* flush IO stats */
661 48296 : partial_flush |= pgstat_flush_io(nowait);
662 :
663 : /* flush wal stats */
664 48296 : partial_flush |= pgstat_flush_wal(nowait);
665 :
666 : /* flush SLRU stats */
667 48296 : partial_flush |= pgstat_slru_flush(nowait);
668 :
669 48296 : last_flush = now;
670 :
671 : /*
672 : * If some of the pending stats could not be flushed due to lock
673 : * contention, let the caller know when to retry.
674 : */
675 48296 : if (partial_flush)
676 : {
677 : /* force should have prevented us from getting here */
678 : Assert(!force);
679 :
680 : /* remember since when stats have been pending */
681 0 : if (pending_since == 0)
682 0 : pending_since = now;
683 :
684 0 : return PGSTAT_IDLE_INTERVAL;
685 : }
686 :
687 48296 : pending_since = 0;
688 :
689 48296 : return 0;
690 : }
691 :
692 : /*
693 : * Force locally pending stats to be flushed during the next
694 : * pgstat_report_stat() call. This is useful for writing tests.
695 : */
696 : void
697 408 : pgstat_force_next_flush(void)
698 : {
699 408 : pgStatForceNextFlush = true;
700 408 : }
701 :
702 : /*
703 : * Only for use by pgstat_reset_counters()
704 : */
705 : static bool
706 21308 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
707 : {
708 21308 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
709 : }
710 :
711 : /*
712 : * Reset counters for our database.
713 : *
714 : * Permission checking for this function is managed through the normal
715 : * GRANT system.
716 : */
717 : void
718 26 : pgstat_reset_counters(void)
719 : {
720 26 : TimestampTz ts = GetCurrentTimestamp();
721 :
722 26 : pgstat_reset_matching_entries(match_db_entries,
723 : ObjectIdGetDatum(MyDatabaseId),
724 : ts);
725 26 : }
726 :
727 : /*
728 : * Reset a single variable-numbered entry.
729 : *
730 : * If the stats kind is within a database, also reset the database's
731 : * stat_reset_timestamp.
732 : *
733 : * Permission checking for this function is managed through the normal
734 : * GRANT system.
735 : */
736 : void
737 38 : pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objoid)
738 : {
739 38 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
740 38 : TimestampTz ts = GetCurrentTimestamp();
741 :
742 : /* not needed atm, and doesn't make sense with the current signature */
743 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
744 :
745 : /* reset the "single counter" */
746 38 : pgstat_reset_entry(kind, dboid, objoid, ts);
747 :
748 38 : if (!kind_info->accessed_across_databases)
749 16 : pgstat_reset_database_timestamp(dboid, ts);
750 38 : }
751 :
752 : /*
753 : * Reset stats for all entries of a kind.
754 : *
755 : * Permission checking for this function is managed through the normal
756 : * GRANT system.
757 : */
758 : void
759 54 : pgstat_reset_of_kind(PgStat_Kind kind)
760 : {
761 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
762 54 : TimestampTz ts = GetCurrentTimestamp();
763 :
764 54 : if (kind_info->fixed_amount)
765 46 : kind_info->reset_all_cb(ts);
766 : else
767 8 : pgstat_reset_entries_of_kind(kind, ts);
768 54 : }
769 :
770 :
771 : /* ------------------------------------------------------------
772 : * Fetching of stats
773 : * ------------------------------------------------------------
774 : */
775 :
776 : /*
777 : * Discard any data collected in the current transaction. Any subsequent
778 : * request will cause new snapshots to be read.
779 : *
780 : * This is also invoked during transaction commit or abort to discard
781 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
782 : * cause this routine to be called.
783 : */
784 : void
785 516404 : pgstat_clear_snapshot(void)
786 : {
787 : pgstat_assert_is_up();
788 :
789 516404 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
790 : sizeof(pgStatLocal.snapshot.fixed_valid));
791 516404 : pgStatLocal.snapshot.stats = NULL;
792 516404 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
793 :
794 : /* Release memory, if any was allocated */
795 516404 : if (pgStatLocal.snapshot.context)
796 : {
797 834 : MemoryContextDelete(pgStatLocal.snapshot.context);
798 :
799 : /* Reset variables */
800 834 : pgStatLocal.snapshot.context = NULL;
801 : }
802 :
803 : /*
804 : * Historically the backend_status.c facilities lived in this file, and
805 : * were reset with the same function. For now keep it that way, and
806 : * forward the reset request.
807 : */
808 516404 : pgstat_clear_backend_activity_snapshot();
809 :
810 : /* Reset this flag, as it may be possible that a cleanup was forced. */
811 516404 : force_stats_snapshot_clear = false;
812 516404 : }
813 :
814 : void *
815 17112 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
816 : {
817 : PgStat_HashKey key;
818 : PgStat_EntryRef *entry_ref;
819 : void *stats_data;
820 17112 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
821 :
822 : /* should be called from backends */
823 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
824 : Assert(!kind_info->fixed_amount);
825 :
826 17112 : pgstat_prep_snapshot();
827 :
828 17112 : key.kind = kind;
829 17112 : key.dboid = dboid;
830 17112 : key.objoid = objoid;
831 :
832 : /* if we need to build a full snapshot, do so */
833 17112 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
834 460 : pgstat_build_snapshot();
835 :
836 : /* if caching is desired, look up in cache */
837 17112 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
838 : {
839 9144 : PgStat_SnapshotEntry *entry = NULL;
840 :
841 9144 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
842 :
843 9144 : if (entry)
844 536 : return entry->data;
845 :
846 : /*
847 : * If we built a full snapshot and the key is not in
848 : * pgStatLocal.snapshot.stats, there are no matching stats.
849 : */
850 8608 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
851 28 : return NULL;
852 : }
853 :
854 16548 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
855 :
856 16548 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
857 :
858 16548 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
859 : {
860 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
861 4752 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
862 : {
863 1808 : PgStat_SnapshotEntry *entry = NULL;
864 : bool found;
865 :
866 1808 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
867 : Assert(!found);
868 1808 : entry->data = NULL;
869 : }
870 4752 : return NULL;
871 : }
872 :
873 : /*
874 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
875 : * otherwise we could quickly end up with a fair bit of memory used due to
876 : * repeated accesses.
877 : */
878 11796 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
879 5024 : stats_data = palloc(kind_info->shared_data_len);
880 : else
881 6772 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
882 6772 : kind_info->shared_data_len);
883 :
884 11796 : pgstat_lock_entry_shared(entry_ref, false);
885 23592 : memcpy(stats_data,
886 11796 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
887 11796 : kind_info->shared_data_len);
888 11796 : pgstat_unlock_entry(entry_ref);
889 :
890 11796 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
891 : {
892 6772 : PgStat_SnapshotEntry *entry = NULL;
893 : bool found;
894 :
895 6772 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
896 6772 : entry->data = stats_data;
897 : }
898 :
899 11796 : return stats_data;
900 : }
901 :
902 : /*
903 : * If a stats snapshot has been taken, return the timestamp at which that was
904 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
905 : * false.
906 : */
907 : TimestampTz
908 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
909 : {
910 60 : if (force_stats_snapshot_clear)
911 18 : pgstat_clear_snapshot();
912 :
913 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
914 : {
915 24 : *have_snapshot = true;
916 24 : return pgStatLocal.snapshot.snapshot_timestamp;
917 : }
918 :
919 36 : *have_snapshot = false;
920 :
921 36 : return 0;
922 : }
923 :
924 : bool
925 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
926 : {
927 : /* fixed-numbered stats always exist */
928 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
929 12 : return true;
930 :
931 148 : return pgstat_get_entry_ref(kind, dboid, objoid, false, NULL) != NULL;
932 : }
933 :
934 : /*
935 : * Ensure snapshot for fixed-numbered 'kind' exists.
936 : *
937 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
938 : * massaging the data into the desired format.
939 : */
940 : void
941 398 : pgstat_snapshot_fixed(PgStat_Kind kind)
942 : {
943 : Assert(pgstat_is_kind_valid(kind));
944 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
945 :
946 398 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
947 24 : pgstat_build_snapshot();
948 : else
949 374 : pgstat_build_snapshot_fixed(kind);
950 :
951 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
952 398 : }
953 :
954 : static void
955 17164 : pgstat_prep_snapshot(void)
956 : {
957 17164 : if (force_stats_snapshot_clear)
958 18 : pgstat_clear_snapshot();
959 :
960 17164 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
961 9196 : pgStatLocal.snapshot.stats != NULL)
962 16330 : return;
963 :
964 834 : if (!pgStatLocal.snapshot.context)
965 834 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
966 : "PgStat Snapshot",
967 : ALLOCSET_SMALL_SIZES);
968 :
969 834 : pgStatLocal.snapshot.stats =
970 834 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
971 : PGSTAT_SNAPSHOT_HASH_SIZE,
972 : NULL);
973 : }
974 :
975 : static void
976 484 : pgstat_build_snapshot(void)
977 : {
978 : dshash_seq_status hstat;
979 : PgStatShared_HashEntry *p;
980 :
981 : /* should only be called when we need a snapshot */
982 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
983 :
984 : /* snapshot already built */
985 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
986 432 : return;
987 :
988 52 : pgstat_prep_snapshot();
989 :
990 : Assert(pgStatLocal.snapshot.stats->members == 0);
991 :
992 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
993 :
994 : /*
995 : * Snapshot all variable stats.
996 : */
997 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
998 47998 : while ((p = dshash_seq_next(&hstat)) != NULL)
999 : {
1000 47946 : PgStat_Kind kind = p->key.kind;
1001 47946 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1002 : bool found;
1003 : PgStat_SnapshotEntry *entry;
1004 : PgStatShared_Common *stats_data;
1005 :
1006 : /*
1007 : * Check if the stats object should be included in the snapshot.
1008 : * Unless the stats kind can be accessed from all databases (e.g.,
1009 : * database stats themselves), we only include stats for the current
1010 : * database or objects not associated with a database (e.g. shared
1011 : * relations).
1012 : */
1013 47946 : if (p->key.dboid != MyDatabaseId &&
1014 15276 : p->key.dboid != InvalidOid &&
1015 12624 : !kind_info->accessed_across_databases)
1016 12724 : continue;
1017 :
1018 35426 : if (p->dropped)
1019 204 : continue;
1020 :
1021 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1022 :
1023 35222 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1024 : Assert(stats_data);
1025 :
1026 35222 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1027 : Assert(!found);
1028 :
1029 70444 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1030 35222 : kind_info->shared_size);
1031 :
1032 : /*
1033 : * Acquire the LWLock directly instead of using
1034 : * pg_stat_lock_entry_shared() which requires a reference.
1035 : */
1036 35222 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1037 70444 : memcpy(entry->data,
1038 35222 : pgstat_get_entry_data(kind, stats_data),
1039 35222 : kind_info->shared_size);
1040 35222 : LWLockRelease(&stats_data->lock);
1041 : }
1042 52 : dshash_seq_term(&hstat);
1043 :
1044 : /*
1045 : * Build snapshot of all fixed-numbered stats.
1046 : */
1047 624 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1048 : {
1049 572 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1050 :
1051 572 : if (!kind_info->fixed_amount)
1052 : {
1053 : Assert(kind_info->snapshot_cb == NULL);
1054 260 : continue;
1055 : }
1056 :
1057 312 : pgstat_build_snapshot_fixed(kind);
1058 : }
1059 :
1060 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1061 : }
1062 :
1063 : static void
1064 5942 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1065 : {
1066 5942 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1067 :
1068 : Assert(kind_info->fixed_amount);
1069 : Assert(kind_info->snapshot_cb != NULL);
1070 :
1071 5942 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1072 : {
1073 : /* rebuild every time */
1074 5286 : pgStatLocal.snapshot.fixed_valid[kind] = false;
1075 : }
1076 656 : else if (pgStatLocal.snapshot.fixed_valid[kind])
1077 : {
1078 : /* in snapshot mode we shouldn't get called again */
1079 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1080 12 : return;
1081 : }
1082 :
1083 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1084 :
1085 5930 : kind_info->snapshot_cb();
1086 :
1087 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1088 5930 : pgStatLocal.snapshot.fixed_valid[kind] = true;
1089 : }
1090 :
1091 :
1092 : /* ------------------------------------------------------------
1093 : * Backend-local pending stats infrastructure
1094 : * ------------------------------------------------------------
1095 : */
1096 :
1097 : /*
1098 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1099 : * stats if not already done.
1100 : *
1101 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1102 : * created, false otherwise.
1103 : */
1104 : PgStat_EntryRef *
1105 2518986 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry)
1106 : {
1107 : PgStat_EntryRef *entry_ref;
1108 :
1109 : /* need to be able to flush out */
1110 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1111 :
1112 2518986 : if (unlikely(!pgStatPendingContext))
1113 : {
1114 24100 : pgStatPendingContext =
1115 24100 : AllocSetContextCreate(TopMemoryContext,
1116 : "PgStat Pending",
1117 : ALLOCSET_SMALL_SIZES);
1118 : }
1119 :
1120 2518986 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid,
1121 : true, created_entry);
1122 :
1123 2518986 : if (entry_ref->pending == NULL)
1124 : {
1125 1287610 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1126 :
1127 : Assert(entrysize != (size_t) -1);
1128 :
1129 1287610 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1130 1287610 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1131 : }
1132 :
1133 2518986 : return entry_ref;
1134 : }
1135 :
1136 : /*
1137 : * Return an existing stats entry, or NULL.
1138 : *
1139 : * This should only be used for helper function for pgstatfuncs.c - outside of
1140 : * that it shouldn't be needed.
1141 : */
1142 : PgStat_EntryRef *
1143 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
1144 : {
1145 : PgStat_EntryRef *entry_ref;
1146 :
1147 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
1148 :
1149 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1150 30 : return NULL;
1151 :
1152 54 : return entry_ref;
1153 : }
1154 :
1155 : void
1156 1287610 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1157 : {
1158 1287610 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1159 1287610 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1160 1287610 : void *pending_data = entry_ref->pending;
1161 :
1162 : Assert(pending_data != NULL);
1163 : /* !fixed_amount stats should be handled explicitly */
1164 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1165 :
1166 1287610 : if (kind_info->delete_pending_cb)
1167 1207904 : kind_info->delete_pending_cb(entry_ref);
1168 :
1169 1287610 : pfree(pending_data);
1170 1287610 : entry_ref->pending = NULL;
1171 :
1172 1287610 : dlist_delete(&entry_ref->pending_node);
1173 1287610 : }
1174 :
1175 : /*
1176 : * Flush out pending stats for database objects (databases, relations,
1177 : * functions).
1178 : */
1179 : static bool
1180 48296 : pgstat_flush_pending_entries(bool nowait)
1181 : {
1182 48296 : bool have_pending = false;
1183 48296 : dlist_node *cur = NULL;
1184 :
1185 : /*
1186 : * Need to be a bit careful iterating over the list of pending entries.
1187 : * Processing a pending entry may queue further pending entries to the end
1188 : * of the list that we want to process, so a simple iteration won't do.
1189 : * Further complicating matters is that we want to delete the current
1190 : * entry in each iteration from the list if we flushed successfully.
1191 : *
1192 : * So we just keep track of the next pointer in each loop iteration.
1193 : */
1194 48296 : if (!dlist_is_empty(&pgStatPending))
1195 46136 : cur = dlist_head_node(&pgStatPending);
1196 :
1197 1279640 : while (cur)
1198 : {
1199 1231344 : PgStat_EntryRef *entry_ref =
1200 1231344 : dlist_container(PgStat_EntryRef, pending_node, cur);
1201 1231344 : PgStat_HashKey key = entry_ref->shared_entry->key;
1202 1231344 : PgStat_Kind kind = key.kind;
1203 1231344 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1204 : bool did_flush;
1205 : dlist_node *next;
1206 :
1207 : Assert(!kind_info->fixed_amount);
1208 : Assert(kind_info->flush_pending_cb != NULL);
1209 :
1210 : /* flush the stats, if possible */
1211 1231344 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1212 :
1213 : Assert(did_flush || nowait);
1214 :
1215 : /* determine next entry, before deleting the pending entry */
1216 1231344 : if (dlist_has_next(&pgStatPending, cur))
1217 1185208 : next = dlist_next_node(&pgStatPending, cur);
1218 : else
1219 46136 : next = NULL;
1220 :
1221 : /* if successfully flushed, remove entry */
1222 1231344 : if (did_flush)
1223 1231344 : pgstat_delete_pending_entry(entry_ref);
1224 : else
1225 0 : have_pending = true;
1226 :
1227 1231344 : cur = next;
1228 : }
1229 :
1230 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1231 :
1232 48296 : return have_pending;
1233 : }
1234 :
1235 :
1236 : /* ------------------------------------------------------------
1237 : * Helper / infrastructure functions
1238 : * ------------------------------------------------------------
1239 : */
1240 :
1241 : PgStat_Kind
1242 166 : pgstat_get_kind_from_str(char *kind_str)
1243 : {
1244 476 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1245 : {
1246 470 : if (pg_strcasecmp(kind_str, pgstat_kind_infos[kind].name) == 0)
1247 160 : return kind;
1248 : }
1249 :
1250 6 : ereport(ERROR,
1251 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1252 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1253 : return PGSTAT_KIND_DATABASE; /* avoid compiler warnings */
1254 : }
1255 :
1256 : static inline bool
1257 273934 : pgstat_is_kind_valid(int ikind)
1258 : {
1259 273934 : return ikind >= PGSTAT_KIND_FIRST_VALID && ikind <= PGSTAT_KIND_LAST;
1260 : }
1261 :
1262 : const PgStat_KindInfo *
1263 5730050 : pgstat_get_kind_info(PgStat_Kind kind)
1264 : {
1265 : Assert(pgstat_is_kind_valid(kind));
1266 :
1267 5730050 : return &pgstat_kind_infos[kind];
1268 : }
1269 :
1270 : /*
1271 : * Stats should only be reported after pgstat_initialize() and before
1272 : * pgstat_shutdown(). This check is put in a few central places to catch
1273 : * violations of this rule more easily.
1274 : */
1275 : #ifdef USE_ASSERT_CHECKING
1276 : void
1277 : pgstat_assert_is_up(void)
1278 : {
1279 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1280 : }
1281 : #endif
1282 :
1283 :
1284 : /* ------------------------------------------------------------
1285 : * reading and writing of on-disk stats file
1286 : * ------------------------------------------------------------
1287 : */
1288 :
1289 : /* helpers for pgstat_write_statsfile() */
1290 : static void
1291 524880 : write_chunk(FILE *fpout, void *ptr, size_t len)
1292 : {
1293 : int rc;
1294 :
1295 524880 : rc = fwrite(ptr, len, 1, fpout);
1296 :
1297 : /* we'll check for errors with ferror once at the end */
1298 : (void) rc;
1299 524880 : }
1300 :
1301 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1302 :
1303 : /*
1304 : * This function is called in the last process that is accessing the shared
1305 : * stats so locking is not required.
1306 : */
1307 : static void
1308 876 : pgstat_write_statsfile(void)
1309 : {
1310 : FILE *fpout;
1311 : int32 format_id;
1312 876 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1313 876 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1314 : dshash_seq_status hstat;
1315 : PgStatShared_HashEntry *ps;
1316 :
1317 : pgstat_assert_is_up();
1318 :
1319 : /* we're shutting down, so it's ok to just override this */
1320 876 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1321 :
1322 876 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1323 :
1324 : /*
1325 : * Open the statistics temp file to write out the current values.
1326 : */
1327 876 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1328 876 : if (fpout == NULL)
1329 : {
1330 0 : ereport(LOG,
1331 : (errcode_for_file_access(),
1332 : errmsg("could not open temporary statistics file \"%s\": %m",
1333 : tmpfile)));
1334 0 : return;
1335 : }
1336 :
1337 : /*
1338 : * Write the file header --- currently just a format ID.
1339 : */
1340 876 : format_id = PGSTAT_FILE_FORMAT_ID;
1341 876 : write_chunk_s(fpout, &format_id);
1342 :
1343 : /*
1344 : * XXX: The following could now be generalized to just iterate over
1345 : * pgstat_kind_infos instead of knowing about the different kinds of
1346 : * stats.
1347 : */
1348 :
1349 : /*
1350 : * Write archiver stats struct
1351 : */
1352 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_ARCHIVER);
1353 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.archiver);
1354 :
1355 : /*
1356 : * Write bgwriter stats struct
1357 : */
1358 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_BGWRITER);
1359 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.bgwriter);
1360 :
1361 : /*
1362 : * Write checkpointer stats struct
1363 : */
1364 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
1365 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer);
1366 :
1367 : /*
1368 : * Write IO stats struct
1369 : */
1370 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_IO);
1371 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.io);
1372 :
1373 : /*
1374 : * Write SLRU stats struct
1375 : */
1376 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_SLRU);
1377 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.slru);
1378 :
1379 : /*
1380 : * Write WAL stats struct
1381 : */
1382 876 : pgstat_build_snapshot_fixed(PGSTAT_KIND_WAL);
1383 876 : write_chunk_s(fpout, &pgStatLocal.snapshot.wal);
1384 :
1385 : /*
1386 : * Walk through the stats entries
1387 : */
1388 876 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1389 260192 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1390 : {
1391 : PgStatShared_Common *shstats;
1392 259316 : const PgStat_KindInfo *kind_info = NULL;
1393 :
1394 259316 : CHECK_FOR_INTERRUPTS();
1395 :
1396 : /* we may have some "dropped" entries not yet removed, skip them */
1397 : Assert(!ps->dropped);
1398 259316 : if (ps->dropped)
1399 0 : continue;
1400 :
1401 259316 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1402 :
1403 259316 : kind_info = pgstat_get_kind_info(ps->key.kind);
1404 :
1405 : /* if not dropped the valid-entry refcount should exist */
1406 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1407 :
1408 259316 : if (!kind_info->to_serialized_name)
1409 : {
1410 : /* normal stats entry, identified by PgStat_HashKey */
1411 259200 : fputc('S', fpout);
1412 259200 : write_chunk_s(fpout, &ps->key);
1413 : }
1414 : else
1415 : {
1416 : /* stats entry identified by name on disk (e.g. slots) */
1417 : NameData name;
1418 :
1419 116 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1420 :
1421 116 : fputc('N', fpout);
1422 116 : write_chunk_s(fpout, &ps->key.kind);
1423 116 : write_chunk_s(fpout, &name);
1424 : }
1425 :
1426 : /* Write except the header part of the entry */
1427 259316 : write_chunk(fpout,
1428 : pgstat_get_entry_data(ps->key.kind, shstats),
1429 : pgstat_get_entry_len(ps->key.kind));
1430 : }
1431 876 : dshash_seq_term(&hstat);
1432 :
1433 : /*
1434 : * No more output to be done. Close the temp file and replace the old
1435 : * pgstat.stat with it. The ferror() check replaces testing for error
1436 : * after each individual fputc or fwrite (in write_chunk()) above.
1437 : */
1438 876 : fputc('E', fpout);
1439 :
1440 876 : if (ferror(fpout))
1441 : {
1442 0 : ereport(LOG,
1443 : (errcode_for_file_access(),
1444 : errmsg("could not write temporary statistics file \"%s\": %m",
1445 : tmpfile)));
1446 0 : FreeFile(fpout);
1447 0 : unlink(tmpfile);
1448 : }
1449 876 : else if (FreeFile(fpout) < 0)
1450 : {
1451 0 : ereport(LOG,
1452 : (errcode_for_file_access(),
1453 : errmsg("could not close temporary statistics file \"%s\": %m",
1454 : tmpfile)));
1455 0 : unlink(tmpfile);
1456 : }
1457 876 : else if (rename(tmpfile, statfile) < 0)
1458 : {
1459 0 : ereport(LOG,
1460 : (errcode_for_file_access(),
1461 : errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
1462 : tmpfile, statfile)));
1463 0 : unlink(tmpfile);
1464 : }
1465 : }
1466 :
1467 : /* helpers for pgstat_read_statsfile() */
1468 : static bool
1469 554774 : read_chunk(FILE *fpin, void *ptr, size_t len)
1470 : {
1471 554774 : return fread(ptr, 1, len, fpin) == len;
1472 : }
1473 :
1474 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1475 :
1476 : /*
1477 : * Reads in existing statistics file into the shared stats hash.
1478 : *
1479 : * This function is called in the only process that is accessing the shared
1480 : * stats so locking is not required.
1481 : */
1482 : static void
1483 1046 : pgstat_read_statsfile(void)
1484 : {
1485 : FILE *fpin;
1486 : int32 format_id;
1487 : bool found;
1488 1046 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1489 1046 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1490 :
1491 : /* shouldn't be called from postmaster */
1492 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1493 :
1494 1046 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1495 :
1496 : /*
1497 : * Try to open the stats file. If it doesn't exist, the backends simply
1498 : * returns zero for anything and statistics simply starts from scratch
1499 : * with empty counters.
1500 : *
1501 : * ENOENT is a possibility if stats collection was previously disabled or
1502 : * has not yet written the stats file for the first time. Any other
1503 : * failure condition is suspicious.
1504 : */
1505 1046 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1506 : {
1507 66 : if (errno != ENOENT)
1508 0 : ereport(LOG,
1509 : (errcode_for_file_access(),
1510 : errmsg("could not open statistics file \"%s\": %m",
1511 : statfile)));
1512 66 : pgstat_reset_after_failure();
1513 66 : return;
1514 : }
1515 :
1516 : /*
1517 : * Verify it's of the expected format.
1518 : */
1519 980 : if (!read_chunk_s(fpin, &format_id) ||
1520 980 : format_id != PGSTAT_FILE_FORMAT_ID)
1521 2 : goto error;
1522 :
1523 : /*
1524 : * XXX: The following could now be generalized to just iterate over
1525 : * pgstat_kind_infos instead of knowing about the different kinds of
1526 : * stats.
1527 : */
1528 :
1529 : /*
1530 : * Read archiver stats struct
1531 : */
1532 978 : if (!read_chunk_s(fpin, &shmem->archiver.stats))
1533 0 : goto error;
1534 :
1535 : /*
1536 : * Read bgwriter stats struct
1537 : */
1538 978 : if (!read_chunk_s(fpin, &shmem->bgwriter.stats))
1539 0 : goto error;
1540 :
1541 : /*
1542 : * Read checkpointer stats struct
1543 : */
1544 978 : if (!read_chunk_s(fpin, &shmem->checkpointer.stats))
1545 0 : goto error;
1546 :
1547 : /*
1548 : * Read IO stats struct
1549 : */
1550 978 : if (!read_chunk_s(fpin, &shmem->io.stats))
1551 0 : goto error;
1552 :
1553 : /*
1554 : * Read SLRU stats struct
1555 : */
1556 978 : if (!read_chunk_s(fpin, &shmem->slru.stats))
1557 0 : goto error;
1558 :
1559 : /*
1560 : * Read WAL stats struct
1561 : */
1562 978 : if (!read_chunk_s(fpin, &shmem->wal.stats))
1563 0 : goto error;
1564 :
1565 : /*
1566 : * We found an existing statistics file. Read it and put all the hash
1567 : * table entries into place.
1568 : */
1569 : for (;;)
1570 273934 : {
1571 274912 : int t = fgetc(fpin);
1572 :
1573 274912 : switch (t)
1574 : {
1575 273934 : case 'S':
1576 : case 'N':
1577 : {
1578 : PgStat_HashKey key;
1579 : PgStatShared_HashEntry *p;
1580 : PgStatShared_Common *header;
1581 :
1582 273934 : CHECK_FOR_INTERRUPTS();
1583 :
1584 273934 : if (t == 'S')
1585 : {
1586 : /* normal stats entry, identified by PgStat_HashKey */
1587 273874 : if (!read_chunk_s(fpin, &key))
1588 0 : goto error;
1589 :
1590 273874 : if (!pgstat_is_kind_valid(key.kind))
1591 0 : goto error;
1592 : }
1593 : else
1594 : {
1595 : /* stats entry identified by name on disk (e.g. slots) */
1596 60 : const PgStat_KindInfo *kind_info = NULL;
1597 : PgStat_Kind kind;
1598 : NameData name;
1599 :
1600 60 : if (!read_chunk_s(fpin, &kind))
1601 0 : goto error;
1602 60 : if (!read_chunk_s(fpin, &name))
1603 0 : goto error;
1604 60 : if (!pgstat_is_kind_valid(kind))
1605 0 : goto error;
1606 :
1607 60 : kind_info = pgstat_get_kind_info(kind);
1608 :
1609 60 : if (!kind_info->from_serialized_name)
1610 0 : goto error;
1611 :
1612 60 : if (!kind_info->from_serialized_name(&name, &key))
1613 : {
1614 : /* skip over data for entry we don't care about */
1615 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1616 0 : goto error;
1617 :
1618 2 : continue;
1619 : }
1620 :
1621 : Assert(key.kind == kind);
1622 : }
1623 :
1624 : /*
1625 : * This intentionally doesn't use pgstat_get_entry_ref() -
1626 : * putting all stats into checkpointer's
1627 : * pgStatEntryRefHash would be wasted effort and memory.
1628 : */
1629 273932 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1630 :
1631 : /* don't allow duplicate entries */
1632 273932 : if (found)
1633 : {
1634 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1635 0 : elog(WARNING, "found duplicate stats entry %d/%u/%u",
1636 : key.kind, key.dboid, key.objoid);
1637 0 : goto error;
1638 : }
1639 :
1640 273932 : header = pgstat_init_entry(key.kind, p);
1641 273932 : dshash_release_lock(pgStatLocal.shared_hash, p);
1642 :
1643 273932 : if (!read_chunk(fpin,
1644 : pgstat_get_entry_data(key.kind, header),
1645 : pgstat_get_entry_len(key.kind)))
1646 0 : goto error;
1647 :
1648 273932 : break;
1649 : }
1650 978 : case 'E':
1651 : /* check that 'E' actually signals end of file */
1652 978 : if (fgetc(fpin) != EOF)
1653 2 : goto error;
1654 :
1655 976 : goto done;
1656 :
1657 0 : default:
1658 0 : goto error;
1659 : }
1660 : }
1661 :
1662 980 : done:
1663 980 : FreeFile(fpin);
1664 :
1665 980 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
1666 980 : unlink(statfile);
1667 :
1668 980 : return;
1669 :
1670 4 : error:
1671 4 : ereport(LOG,
1672 : (errmsg("corrupted statistics file \"%s\"", statfile)));
1673 :
1674 4 : pgstat_reset_after_failure();
1675 :
1676 4 : goto done;
1677 : }
1678 :
1679 : /*
1680 : * Helper to reset / drop stats after a crash or after restoring stats from
1681 : * disk failed, potentially after already loading parts.
1682 : */
1683 : static void
1684 372 : pgstat_reset_after_failure(void)
1685 : {
1686 372 : TimestampTz ts = GetCurrentTimestamp();
1687 :
1688 : /* reset fixed-numbered stats */
1689 4464 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1690 : {
1691 4092 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1692 :
1693 4092 : if (!kind_info->fixed_amount)
1694 1860 : continue;
1695 :
1696 2232 : kind_info->reset_all_cb(ts);
1697 : }
1698 :
1699 : /* and drop variable-numbered ones */
1700 372 : pgstat_drop_all_entries();
1701 372 : }
1702 :
1703 : /*
1704 : * GUC assign_hook for stats_fetch_consistency.
1705 : */
1706 : void
1707 2590 : assign_stats_fetch_consistency(int newval, void *extra)
1708 : {
1709 : /*
1710 : * Changing this value in a transaction may cause snapshot state
1711 : * inconsistencies, so force a clear of the current snapshot on the next
1712 : * snapshot build attempt.
1713 : */
1714 2590 : if (pgstat_fetch_consistency != newval)
1715 752 : force_stats_snapshot_clear = true;
1716 2590 : }
|