Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down, except when shutting down in immediate mode.
16 : *
17 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
18 : *
19 : * Statistics for variable-numbered objects are stored in dynamic shared
20 : * memory and can be found via a dshash hashtable. The statistics counters are
21 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 : * separately allocated (PgStatShared_HashEntry->body). The separate
23 : * allocation allows different kinds of statistics to be stored in the same
24 : * hashtable without wasting space in PgStatShared_HashEntry.
25 : *
26 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 : * is not possible to have statistics for an object that cannot be addressed
28 : * that way at runtime. A wider identifier can be used when serializing to
29 : * disk (used for replication slot stats).
30 : *
31 : * To avoid contention on the shared hashtable, each backend has a
32 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 : * entries. The shared hashtable only needs to be accessed when no prior
35 : * reference is found in the local hashtable. Besides pointing to the
36 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 : * contains a pointer to the shared statistics data, as a process-local
38 : * address, to reduce access costs.
39 : *
40 : * The names for structs stored in shared memory are prefixed with
41 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 : * protected by a dedicated lwlock.
43 : *
44 : * Most stats updates are first accumulated locally in each process as pending
45 : * entries, then later flushed to shared memory (just after commit, or by
46 : * idle-timeout). This practically eliminates contention on individual stats
47 : * entries. For most kinds of variable-numbered pending stats data is stored
48 : * in PgStat_EntryRef->pending. All entries with pending data are in the
49 : * pgStatPending list. Pending statistics updates are flushed out by
50 : * pgstat_report_stat().
51 : *
52 : * The behavior of different kinds of statistics is determined by the kind's
53 : * entry in pgstat_kind_infos, see PgStat_KindInfo for details.
54 : *
55 : * The consistency of read accesses to statistics can be configured using the
56 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
57 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
58 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
59 : * pgStatLocal.snapshot.
60 : *
61 : * To keep things manageable, stats handling is split across several
62 : * files. Infrastructure pieces are in:
63 : * - pgstat.c - this file, to tie it all together
64 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
65 : * the maintenance of hashtable entries
66 : * - pgstat_xact.c - transactional integration, including the transactional
67 : * creation and dropping of stats entries
68 : *
69 : * Each statistics kind is handled in a dedicated file:
70 : * - pgstat_archiver.c
71 : * - pgstat_bgwriter.c
72 : * - pgstat_checkpointer.c
73 : * - pgstat_database.c
74 : * - pgstat_function.c
75 : * - pgstat_io.c
76 : * - pgstat_relation.c
77 : * - pgstat_replslot.c
78 : * - pgstat_slru.c
79 : * - pgstat_subscription.c
80 : * - pgstat_wal.c
81 : *
82 : * Whenever possible infrastructure files should not contain code related to
83 : * specific kinds of stats.
84 : *
85 : *
86 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
87 : *
88 : * IDENTIFICATION
89 : * src/backend/utils/activity/pgstat.c
90 : * ----------
91 : */
92 : #include "postgres.h"
93 :
94 : #include <unistd.h>
95 :
96 : #include "access/xact.h"
97 : #include "lib/dshash.h"
98 : #include "pgstat.h"
99 : #include "port/atomics.h"
100 : #include "storage/fd.h"
101 : #include "storage/ipc.h"
102 : #include "storage/lwlock.h"
103 : #include "utils/guc_hooks.h"
104 : #include "utils/memutils.h"
105 : #include "utils/pgstat_internal.h"
106 : #include "utils/timestamp.h"
107 :
108 :
109 : /* ----------
110 : * Timer definitions.
111 : *
112 : * In milliseconds.
113 : * ----------
114 : */
115 :
116 : /* minimum interval non-forced stats flushes.*/
117 : #define PGSTAT_MIN_INTERVAL 1000
118 : /* how long until to block flushing pending stats updates */
119 : #define PGSTAT_MAX_INTERVAL 60000
120 : /* when to call pgstat_report_stat() again, even when idle */
121 : #define PGSTAT_IDLE_INTERVAL 10000
122 :
123 : /* ----------
124 : * Initial size hints for the hash tables used in statistics.
125 : * ----------
126 : */
127 :
128 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
129 :
130 : /* ---------
131 : * Identifiers in stats file.
132 : * ---------
133 : */
134 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
135 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
136 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
137 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
138 : * PgStat_HashKey */
139 :
140 : /* hash table for statistics snapshots entry */
141 : typedef struct PgStat_SnapshotEntry
142 : {
143 : PgStat_HashKey key;
144 : char status; /* for simplehash use */
145 : void *data; /* the stats data itself */
146 : } PgStat_SnapshotEntry;
147 :
148 :
149 : /* ----------
150 : * Backend-local Hash Table Definitions
151 : * ----------
152 : */
153 :
154 : /* for stats snapshot entries */
155 : #define SH_PREFIX pgstat_snapshot
156 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
157 : #define SH_KEY_TYPE PgStat_HashKey
158 : #define SH_KEY key
159 : #define SH_HASH_KEY(tb, key) \
160 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
161 : #define SH_EQUAL(tb, a, b) \
162 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
163 : #define SH_SCOPE static inline
164 : #define SH_DEFINE
165 : #define SH_DECLARE
166 : #include "lib/simplehash.h"
167 :
168 :
169 : /* ----------
170 : * Local function forward declarations
171 : * ----------
172 : */
173 :
174 : static void pgstat_write_statsfile(void);
175 : static void pgstat_read_statsfile(void);
176 :
177 : static void pgstat_reset_after_failure(void);
178 :
179 : static bool pgstat_flush_pending_entries(bool nowait);
180 :
181 : static void pgstat_prep_snapshot(void);
182 : static void pgstat_build_snapshot(void);
183 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
184 :
185 : static inline bool pgstat_is_kind_valid(int ikind);
186 :
187 :
188 : /* ----------
189 : * GUC parameters
190 : * ----------
191 : */
192 :
193 : bool pgstat_track_counts = false;
194 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
195 :
196 :
197 : /* ----------
198 : * state shared with pgstat_*.c
199 : * ----------
200 : */
201 :
202 : PgStat_LocalState pgStatLocal;
203 :
204 :
205 : /* ----------
206 : * Local data
207 : *
208 : * NB: There should be only variables related to stats infrastructure here,
209 : * not for specific kinds of stats.
210 : * ----------
211 : */
212 :
213 : /*
214 : * Memory contexts containing the pgStatEntryRefHash table, the
215 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
216 : * easier to track / attribute memory usage.
217 : */
218 :
219 : static MemoryContext pgStatPendingContext = NULL;
220 :
221 : /*
222 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
223 : *
224 : * Newly pending entries should only ever be added to the end of the list,
225 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
226 : */
227 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
228 :
229 :
230 : /*
231 : * Force the next stats flush to happen regardless of
232 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
233 : */
234 : static bool pgStatForceNextFlush = false;
235 :
236 : /*
237 : * Force-clear existing snapshot before next use when stats_fetch_consistency
238 : * is changed.
239 : */
240 : static bool force_stats_snapshot_clear = false;
241 :
242 :
243 : /*
244 : * For assertions that check pgstat is not used before initialization / after
245 : * shutdown.
246 : */
247 : #ifdef USE_ASSERT_CHECKING
248 : static bool pgstat_is_initialized = false;
249 : static bool pgstat_is_shutdown = false;
250 : #endif
251 :
252 :
253 : /*
254 : * The different kinds of statistics.
255 : *
256 : * If reasonably possible, handling specific to one kind of stats should go
257 : * through this abstraction, rather than making more of pgstat.c aware.
258 : *
259 : * See comments for struct PgStat_KindInfo for details about the individual
260 : * fields.
261 : *
262 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
263 : * seem to be a great way of doing that, given the split across multiple
264 : * files.
265 : */
266 : static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
267 :
268 : /* stats kinds for variable-numbered objects */
269 :
270 : [PGSTAT_KIND_DATABASE] = {
271 : .name = "database",
272 :
273 : .fixed_amount = false,
274 : /* so pg_stat_database entries can be seen in all databases */
275 : .accessed_across_databases = true,
276 :
277 : .shared_size = sizeof(PgStatShared_Database),
278 : .shared_data_off = offsetof(PgStatShared_Database, stats),
279 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
280 : .pending_size = sizeof(PgStat_StatDBEntry),
281 :
282 : .flush_pending_cb = pgstat_database_flush_cb,
283 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
284 : },
285 :
286 : [PGSTAT_KIND_RELATION] = {
287 : .name = "relation",
288 :
289 : .fixed_amount = false,
290 :
291 : .shared_size = sizeof(PgStatShared_Relation),
292 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
293 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
294 : .pending_size = sizeof(PgStat_TableStatus),
295 :
296 : .flush_pending_cb = pgstat_relation_flush_cb,
297 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
298 : },
299 :
300 : [PGSTAT_KIND_FUNCTION] = {
301 : .name = "function",
302 :
303 : .fixed_amount = false,
304 :
305 : .shared_size = sizeof(PgStatShared_Function),
306 : .shared_data_off = offsetof(PgStatShared_Function, stats),
307 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
308 : .pending_size = sizeof(PgStat_FunctionCounts),
309 :
310 : .flush_pending_cb = pgstat_function_flush_cb,
311 : },
312 :
313 : [PGSTAT_KIND_REPLSLOT] = {
314 : .name = "replslot",
315 :
316 : .fixed_amount = false,
317 :
318 : .accessed_across_databases = true,
319 :
320 : .shared_size = sizeof(PgStatShared_ReplSlot),
321 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
322 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
323 :
324 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
325 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
326 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
327 : },
328 :
329 : [PGSTAT_KIND_SUBSCRIPTION] = {
330 : .name = "subscription",
331 :
332 : .fixed_amount = false,
333 : /* so pg_stat_subscription_stats entries can be seen in all databases */
334 : .accessed_across_databases = true,
335 :
336 : .shared_size = sizeof(PgStatShared_Subscription),
337 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
338 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
339 : .pending_size = sizeof(PgStat_BackendSubEntry),
340 :
341 : .flush_pending_cb = pgstat_subscription_flush_cb,
342 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
343 : },
344 :
345 :
346 : /* stats for fixed-numbered (mostly 1) objects */
347 :
348 : [PGSTAT_KIND_ARCHIVER] = {
349 : .name = "archiver",
350 :
351 : .fixed_amount = true,
352 :
353 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
354 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
355 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
356 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
357 :
358 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
359 : .reset_all_cb = pgstat_archiver_reset_all_cb,
360 : .snapshot_cb = pgstat_archiver_snapshot_cb,
361 : },
362 :
363 : [PGSTAT_KIND_BGWRITER] = {
364 : .name = "bgwriter",
365 :
366 : .fixed_amount = true,
367 :
368 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
369 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
370 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
371 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
372 :
373 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
374 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
375 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
376 : },
377 :
378 : [PGSTAT_KIND_CHECKPOINTER] = {
379 : .name = "checkpointer",
380 :
381 : .fixed_amount = true,
382 :
383 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
384 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
385 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
386 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
387 :
388 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
389 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
390 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
391 : },
392 :
393 : [PGSTAT_KIND_IO] = {
394 : .name = "io",
395 :
396 : .fixed_amount = true,
397 :
398 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
399 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
400 : .shared_data_off = offsetof(PgStatShared_IO, stats),
401 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
402 :
403 : .init_shmem_cb = pgstat_io_init_shmem_cb,
404 : .reset_all_cb = pgstat_io_reset_all_cb,
405 : .snapshot_cb = pgstat_io_snapshot_cb,
406 : },
407 :
408 : [PGSTAT_KIND_SLRU] = {
409 : .name = "slru",
410 :
411 : .fixed_amount = true,
412 :
413 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
414 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
415 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
416 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
417 :
418 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
419 : .reset_all_cb = pgstat_slru_reset_all_cb,
420 : .snapshot_cb = pgstat_slru_snapshot_cb,
421 : },
422 :
423 : [PGSTAT_KIND_WAL] = {
424 : .name = "wal",
425 :
426 : .fixed_amount = true,
427 :
428 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
429 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
430 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
431 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
432 :
433 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
434 : .reset_all_cb = pgstat_wal_reset_all_cb,
435 : .snapshot_cb = pgstat_wal_snapshot_cb,
436 : },
437 : };
438 :
439 :
440 : /* ------------------------------------------------------------
441 : * Functions managing the state of the stats system for all backends.
442 : * ------------------------------------------------------------
443 : */
444 :
445 : /*
446 : * Read on-disk stats into memory at server start.
447 : *
448 : * Should only be called by the startup process or in single user mode.
449 : */
450 : void
451 1212 : pgstat_restore_stats(void)
452 : {
453 1212 : pgstat_read_statsfile();
454 1212 : }
455 :
456 : /*
457 : * Remove the stats file. This is currently used only if WAL recovery is
458 : * needed after a crash.
459 : *
460 : * Should only be called by the startup process or in single user mode.
461 : */
462 : void
463 330 : pgstat_discard_stats(void)
464 : {
465 : int ret;
466 :
467 : /* NB: this needs to be done even in single user mode */
468 :
469 330 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
470 330 : if (ret != 0)
471 : {
472 328 : if (errno == ENOENT)
473 328 : elog(DEBUG2,
474 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
475 : PGSTAT_STAT_PERMANENT_FILENAME);
476 : else
477 0 : ereport(LOG,
478 : (errcode_for_file_access(),
479 : errmsg("could not unlink permanent statistics file \"%s\": %m",
480 : PGSTAT_STAT_PERMANENT_FILENAME)));
481 : }
482 : else
483 : {
484 2 : ereport(DEBUG2,
485 : (errcode_for_file_access(),
486 : errmsg_internal("unlinked permanent statistics file \"%s\"",
487 : PGSTAT_STAT_PERMANENT_FILENAME)));
488 : }
489 :
490 : /*
491 : * Reset stats contents. This will set reset timestamps of fixed-numbered
492 : * stats to the current time (no variable stats exist).
493 : */
494 330 : pgstat_reset_after_failure();
495 330 : }
496 :
497 : /*
498 : * pgstat_before_server_shutdown() needs to be called by exactly one process
499 : * during regular server shutdowns. Otherwise all stats will be lost.
500 : *
501 : * We currently only write out stats for proc_exit(0). We might want to change
502 : * that at some point... But right now pgstat_discard_stats() would be called
503 : * during the start after a disorderly shutdown, anyway.
504 : */
505 : void
506 1020 : pgstat_before_server_shutdown(int code, Datum arg)
507 : {
508 : Assert(pgStatLocal.shmem != NULL);
509 : Assert(!pgStatLocal.shmem->is_shutdown);
510 :
511 : /*
512 : * Stats should only be reported after pgstat_initialize() and before
513 : * pgstat_shutdown(). This is a convenient point to catch most violations
514 : * of this rule.
515 : */
516 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
517 :
518 : /* flush out our own pending changes before writing out */
519 1020 : pgstat_report_stat(true);
520 :
521 : /*
522 : * Only write out file during normal shutdown. Don't even signal that
523 : * we've shutdown during irregular shutdowns, because the shutdown
524 : * sequence isn't coordinated to ensure this backend shuts down last.
525 : */
526 1020 : if (code == 0)
527 : {
528 1010 : pgStatLocal.shmem->is_shutdown = true;
529 1010 : pgstat_write_statsfile();
530 : }
531 1020 : }
532 :
533 :
534 : /* ------------------------------------------------------------
535 : * Backend initialization / shutdown functions
536 : * ------------------------------------------------------------
537 : */
538 :
539 : /*
540 : * Shut down a single backend's statistics reporting at process exit.
541 : *
542 : * Flush out any remaining statistics counts. Without this, operations
543 : * triggered during backend exit (such as temp table deletions) won't be
544 : * counted.
545 : */
546 : static void
547 30130 : pgstat_shutdown_hook(int code, Datum arg)
548 : {
549 : Assert(!pgstat_is_shutdown);
550 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
551 :
552 : /*
553 : * If we got as far as discovering our own database ID, we can flush out
554 : * what we did so far. Otherwise, we'd be reporting an invalid database
555 : * ID, so forget it. (This means that accesses to pg_database during
556 : * failed backend starts might never get counted.)
557 : */
558 30130 : if (OidIsValid(MyDatabaseId))
559 23556 : pgstat_report_disconnect(MyDatabaseId);
560 :
561 30130 : pgstat_report_stat(true);
562 :
563 : /* there shouldn't be any pending changes left */
564 : Assert(dlist_is_empty(&pgStatPending));
565 30130 : dlist_init(&pgStatPending);
566 :
567 30130 : pgstat_detach_shmem();
568 :
569 : #ifdef USE_ASSERT_CHECKING
570 : pgstat_is_shutdown = true;
571 : #endif
572 30130 : }
573 :
574 : /*
575 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
576 : * BaseInit().
577 : *
578 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
579 : */
580 : void
581 30130 : pgstat_initialize(void)
582 : {
583 : Assert(!pgstat_is_initialized);
584 :
585 30130 : pgstat_attach_shmem();
586 :
587 30130 : pgstat_init_wal();
588 :
589 : /* Set up a process-exit hook to clean up */
590 30130 : before_shmem_exit(pgstat_shutdown_hook, 0);
591 :
592 : #ifdef USE_ASSERT_CHECKING
593 : pgstat_is_initialized = true;
594 : #endif
595 30130 : }
596 :
597 :
598 : /* ------------------------------------------------------------
599 : * Public functions used by backends follow
600 : * ------------------------------------------------------------
601 : */
602 :
603 : /*
604 : * Must be called by processes that performs DML: tcop/postgres.c, logical
605 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
606 : * shared memory.
607 : *
608 : * Unless called with 'force', pending stats updates are flushed happen once
609 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
610 : * block on lock acquisition, except if stats updates have been pending for
611 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
612 : *
613 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
614 : * suggested idle timeout is returned. Currently this is always
615 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
616 : * a timeout after which to call pgstat_report_stat(true), but are not
617 : * required to do so.
618 : *
619 : * Note that this is called only when not within a transaction, so it is fair
620 : * to use transaction stop time as an approximation of current time.
621 : */
622 : long
623 525960 : pgstat_report_stat(bool force)
624 : {
625 : static TimestampTz pending_since = 0;
626 : static TimestampTz last_flush = 0;
627 : bool partial_flush;
628 : TimestampTz now;
629 : bool nowait;
630 :
631 : pgstat_assert_is_up();
632 : Assert(!IsTransactionOrTransactionBlock());
633 :
634 : /* "absorb" the forced flush even if there's nothing to flush */
635 525960 : if (pgStatForceNextFlush)
636 : {
637 408 : force = true;
638 408 : pgStatForceNextFlush = false;
639 : }
640 :
641 : /* Don't expend a clock check if nothing to do */
642 525960 : if (dlist_is_empty(&pgStatPending) &&
643 12060 : !have_iostats &&
644 11680 : !have_slrustats &&
645 9570 : !pgstat_have_pending_wal())
646 : {
647 : Assert(pending_since == 0);
648 9562 : return 0;
649 : }
650 :
651 : /*
652 : * There should never be stats to report once stats are shut down. Can't
653 : * assert that before the checks above, as there is an unconditional
654 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
655 : * the process that ran pgstat_before_server_shutdown() will still call.
656 : */
657 : Assert(!pgStatLocal.shmem->is_shutdown);
658 :
659 516398 : if (force)
660 : {
661 : /*
662 : * Stats reports are forced either when it's been too long since stats
663 : * have been reported or in processes that force stats reporting to
664 : * happen at specific points (including shutdown). In the former case
665 : * the transaction stop time might be quite old, in the latter it
666 : * would never get cleared.
667 : */
668 29962 : now = GetCurrentTimestamp();
669 : }
670 : else
671 : {
672 486436 : now = GetCurrentTransactionStopTimestamp();
673 :
674 929960 : if (pending_since > 0 &&
675 443524 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
676 : {
677 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
678 0 : force = true;
679 : }
680 486436 : else if (last_flush > 0 &&
681 465520 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
682 : {
683 : /* don't flush too frequently */
684 464302 : if (pending_since == 0)
685 21952 : pending_since = now;
686 :
687 464302 : return PGSTAT_IDLE_INTERVAL;
688 : }
689 : }
690 :
691 52096 : pgstat_update_dbstats(now);
692 :
693 : /* don't wait for lock acquisition when !force */
694 52096 : nowait = !force;
695 :
696 52096 : partial_flush = false;
697 :
698 : /* flush database / relation / function / ... stats */
699 52096 : partial_flush |= pgstat_flush_pending_entries(nowait);
700 :
701 : /* flush IO stats */
702 52096 : partial_flush |= pgstat_flush_io(nowait);
703 :
704 : /* flush wal stats */
705 52096 : partial_flush |= pgstat_flush_wal(nowait);
706 :
707 : /* flush SLRU stats */
708 52096 : partial_flush |= pgstat_slru_flush(nowait);
709 :
710 52096 : last_flush = now;
711 :
712 : /*
713 : * If some of the pending stats could not be flushed due to lock
714 : * contention, let the caller know when to retry.
715 : */
716 52096 : if (partial_flush)
717 : {
718 : /* force should have prevented us from getting here */
719 : Assert(!force);
720 :
721 : /* remember since when stats have been pending */
722 0 : if (pending_since == 0)
723 0 : pending_since = now;
724 :
725 0 : return PGSTAT_IDLE_INTERVAL;
726 : }
727 :
728 52096 : pending_since = 0;
729 :
730 52096 : return 0;
731 : }
732 :
733 : /*
734 : * Force locally pending stats to be flushed during the next
735 : * pgstat_report_stat() call. This is useful for writing tests.
736 : */
737 : void
738 408 : pgstat_force_next_flush(void)
739 : {
740 408 : pgStatForceNextFlush = true;
741 408 : }
742 :
743 : /*
744 : * Only for use by pgstat_reset_counters()
745 : */
746 : static bool
747 21592 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
748 : {
749 21592 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
750 : }
751 :
752 : /*
753 : * Reset counters for our database.
754 : *
755 : * Permission checking for this function is managed through the normal
756 : * GRANT system.
757 : */
758 : void
759 26 : pgstat_reset_counters(void)
760 : {
761 26 : TimestampTz ts = GetCurrentTimestamp();
762 :
763 26 : pgstat_reset_matching_entries(match_db_entries,
764 : ObjectIdGetDatum(MyDatabaseId),
765 : ts);
766 26 : }
767 :
768 : /*
769 : * Reset a single variable-numbered entry.
770 : *
771 : * If the stats kind is within a database, also reset the database's
772 : * stat_reset_timestamp.
773 : *
774 : * Permission checking for this function is managed through the normal
775 : * GRANT system.
776 : */
777 : void
778 38 : pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objoid)
779 : {
780 38 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
781 38 : TimestampTz ts = GetCurrentTimestamp();
782 :
783 : /* not needed atm, and doesn't make sense with the current signature */
784 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
785 :
786 : /* reset the "single counter" */
787 38 : pgstat_reset_entry(kind, dboid, objoid, ts);
788 :
789 38 : if (!kind_info->accessed_across_databases)
790 16 : pgstat_reset_database_timestamp(dboid, ts);
791 38 : }
792 :
793 : /*
794 : * Reset stats for all entries of a kind.
795 : *
796 : * Permission checking for this function is managed through the normal
797 : * GRANT system.
798 : */
799 : void
800 54 : pgstat_reset_of_kind(PgStat_Kind kind)
801 : {
802 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
803 54 : TimestampTz ts = GetCurrentTimestamp();
804 :
805 54 : if (kind_info->fixed_amount)
806 46 : kind_info->reset_all_cb(ts);
807 : else
808 8 : pgstat_reset_entries_of_kind(kind, ts);
809 54 : }
810 :
811 :
812 : /* ------------------------------------------------------------
813 : * Fetching of stats
814 : * ------------------------------------------------------------
815 : */
816 :
817 : /*
818 : * Discard any data collected in the current transaction. Any subsequent
819 : * request will cause new snapshots to be read.
820 : *
821 : * This is also invoked during transaction commit or abort to discard
822 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
823 : * cause this routine to be called.
824 : */
825 : void
826 563358 : pgstat_clear_snapshot(void)
827 : {
828 : pgstat_assert_is_up();
829 :
830 563358 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
831 : sizeof(pgStatLocal.snapshot.fixed_valid));
832 563358 : pgStatLocal.snapshot.stats = NULL;
833 563358 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
834 :
835 : /* Release memory, if any was allocated */
836 563358 : if (pgStatLocal.snapshot.context)
837 : {
838 862 : MemoryContextDelete(pgStatLocal.snapshot.context);
839 :
840 : /* Reset variables */
841 862 : pgStatLocal.snapshot.context = NULL;
842 : }
843 :
844 : /*
845 : * Historically the backend_status.c facilities lived in this file, and
846 : * were reset with the same function. For now keep it that way, and
847 : * forward the reset request.
848 : */
849 563358 : pgstat_clear_backend_activity_snapshot();
850 :
851 : /* Reset this flag, as it may be possible that a cleanup was forced. */
852 563358 : force_stats_snapshot_clear = false;
853 563358 : }
854 :
855 : void *
856 16242 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
857 : {
858 : PgStat_HashKey key;
859 : PgStat_EntryRef *entry_ref;
860 : void *stats_data;
861 16242 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
862 :
863 : /* should be called from backends */
864 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
865 : Assert(!kind_info->fixed_amount);
866 :
867 16242 : pgstat_prep_snapshot();
868 :
869 16242 : key.kind = kind;
870 16242 : key.dboid = dboid;
871 16242 : key.objoid = objoid;
872 :
873 : /* if we need to build a full snapshot, do so */
874 16242 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
875 460 : pgstat_build_snapshot();
876 :
877 : /* if caching is desired, look up in cache */
878 16242 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
879 : {
880 9076 : PgStat_SnapshotEntry *entry = NULL;
881 :
882 9076 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
883 :
884 9076 : if (entry)
885 536 : return entry->data;
886 :
887 : /*
888 : * If we built a full snapshot and the key is not in
889 : * pgStatLocal.snapshot.stats, there are no matching stats.
890 : */
891 8540 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
892 28 : return NULL;
893 : }
894 :
895 15678 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
896 :
897 15678 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
898 :
899 15678 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
900 : {
901 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
902 4860 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
903 : {
904 1730 : PgStat_SnapshotEntry *entry = NULL;
905 : bool found;
906 :
907 1730 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
908 : Assert(!found);
909 1730 : entry->data = NULL;
910 : }
911 4860 : return NULL;
912 : }
913 :
914 : /*
915 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
916 : * otherwise we could quickly end up with a fair bit of memory used due to
917 : * repeated accesses.
918 : */
919 10818 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
920 4036 : stats_data = palloc(kind_info->shared_data_len);
921 : else
922 6782 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
923 6782 : kind_info->shared_data_len);
924 :
925 10818 : pgstat_lock_entry_shared(entry_ref, false);
926 21636 : memcpy(stats_data,
927 10818 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
928 10818 : kind_info->shared_data_len);
929 10818 : pgstat_unlock_entry(entry_ref);
930 :
931 10818 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
932 : {
933 6782 : PgStat_SnapshotEntry *entry = NULL;
934 : bool found;
935 :
936 6782 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
937 6782 : entry->data = stats_data;
938 : }
939 :
940 10818 : return stats_data;
941 : }
942 :
943 : /*
944 : * If a stats snapshot has been taken, return the timestamp at which that was
945 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
946 : * false.
947 : */
948 : TimestampTz
949 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
950 : {
951 60 : if (force_stats_snapshot_clear)
952 18 : pgstat_clear_snapshot();
953 :
954 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
955 : {
956 24 : *have_snapshot = true;
957 24 : return pgStatLocal.snapshot.snapshot_timestamp;
958 : }
959 :
960 36 : *have_snapshot = false;
961 :
962 36 : return 0;
963 : }
964 :
965 : bool
966 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
967 : {
968 : /* fixed-numbered stats always exist */
969 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
970 12 : return true;
971 :
972 148 : return pgstat_get_entry_ref(kind, dboid, objoid, false, NULL) != NULL;
973 : }
974 :
975 : /*
976 : * Ensure snapshot for fixed-numbered 'kind' exists.
977 : *
978 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
979 : * massaging the data into the desired format.
980 : */
981 : void
982 402 : pgstat_snapshot_fixed(PgStat_Kind kind)
983 : {
984 : Assert(pgstat_is_kind_valid(kind));
985 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
986 :
987 402 : if (force_stats_snapshot_clear)
988 0 : pgstat_clear_snapshot();
989 :
990 402 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
991 24 : pgstat_build_snapshot();
992 : else
993 378 : pgstat_build_snapshot_fixed(kind);
994 :
995 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
996 402 : }
997 :
998 : static void
999 16294 : pgstat_prep_snapshot(void)
1000 : {
1001 16294 : if (force_stats_snapshot_clear)
1002 18 : pgstat_clear_snapshot();
1003 :
1004 16294 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1005 9128 : pgStatLocal.snapshot.stats != NULL)
1006 15432 : return;
1007 :
1008 862 : if (!pgStatLocal.snapshot.context)
1009 862 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1010 : "PgStat Snapshot",
1011 : ALLOCSET_SMALL_SIZES);
1012 :
1013 862 : pgStatLocal.snapshot.stats =
1014 862 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1015 : PGSTAT_SNAPSHOT_HASH_SIZE,
1016 : NULL);
1017 : }
1018 :
1019 : static void
1020 484 : pgstat_build_snapshot(void)
1021 : {
1022 : dshash_seq_status hstat;
1023 : PgStatShared_HashEntry *p;
1024 :
1025 : /* should only be called when we need a snapshot */
1026 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1027 :
1028 : /* snapshot already built */
1029 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1030 432 : return;
1031 :
1032 52 : pgstat_prep_snapshot();
1033 :
1034 : Assert(pgStatLocal.snapshot.stats->members == 0);
1035 :
1036 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1037 :
1038 : /*
1039 : * Snapshot all variable stats.
1040 : */
1041 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1042 51536 : while ((p = dshash_seq_next(&hstat)) != NULL)
1043 : {
1044 51484 : PgStat_Kind kind = p->key.kind;
1045 51484 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1046 : bool found;
1047 : PgStat_SnapshotEntry *entry;
1048 : PgStatShared_Common *stats_data;
1049 :
1050 : /*
1051 : * Check if the stats object should be included in the snapshot.
1052 : * Unless the stats kind can be accessed from all databases (e.g.,
1053 : * database stats themselves), we only include stats for the current
1054 : * database or objects not associated with a database (e.g. shared
1055 : * relations).
1056 : */
1057 51484 : if (p->key.dboid != MyDatabaseId &&
1058 15276 : p->key.dboid != InvalidOid &&
1059 12624 : !kind_info->accessed_across_databases)
1060 12724 : continue;
1061 :
1062 38964 : if (p->dropped)
1063 204 : continue;
1064 :
1065 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1066 :
1067 38760 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1068 : Assert(stats_data);
1069 :
1070 38760 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1071 : Assert(!found);
1072 :
1073 77520 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1074 38760 : kind_info->shared_size);
1075 :
1076 : /*
1077 : * Acquire the LWLock directly instead of using
1078 : * pg_stat_lock_entry_shared() which requires a reference.
1079 : */
1080 38760 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1081 77520 : memcpy(entry->data,
1082 38760 : pgstat_get_entry_data(kind, stats_data),
1083 38760 : kind_info->shared_size);
1084 38760 : LWLockRelease(&stats_data->lock);
1085 : }
1086 52 : dshash_seq_term(&hstat);
1087 :
1088 : /*
1089 : * Build snapshot of all fixed-numbered stats.
1090 : */
1091 624 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1092 : {
1093 572 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1094 :
1095 572 : if (!kind_info->fixed_amount)
1096 : {
1097 : Assert(kind_info->snapshot_cb == NULL);
1098 260 : continue;
1099 : }
1100 :
1101 312 : pgstat_build_snapshot_fixed(kind);
1102 : }
1103 :
1104 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1105 : }
1106 :
1107 : static void
1108 6750 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1109 : {
1110 6750 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1111 :
1112 : Assert(kind_info->fixed_amount);
1113 : Assert(kind_info->snapshot_cb != NULL);
1114 :
1115 6750 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1116 : {
1117 : /* rebuild every time */
1118 6090 : pgStatLocal.snapshot.fixed_valid[kind] = false;
1119 : }
1120 660 : else if (pgStatLocal.snapshot.fixed_valid[kind])
1121 : {
1122 : /* in snapshot mode we shouldn't get called again */
1123 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1124 12 : return;
1125 : }
1126 :
1127 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1128 :
1129 6738 : kind_info->snapshot_cb();
1130 :
1131 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1132 6738 : pgStatLocal.snapshot.fixed_valid[kind] = true;
1133 : }
1134 :
1135 :
1136 : /* ------------------------------------------------------------
1137 : * Backend-local pending stats infrastructure
1138 : * ------------------------------------------------------------
1139 : */
1140 :
1141 : /*
1142 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1143 : * stats if not already done.
1144 : *
1145 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1146 : * created, false otherwise.
1147 : */
1148 : PgStat_EntryRef *
1149 2708944 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry)
1150 : {
1151 : PgStat_EntryRef *entry_ref;
1152 :
1153 : /* need to be able to flush out */
1154 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1155 :
1156 2708944 : if (unlikely(!pgStatPendingContext))
1157 : {
1158 25886 : pgStatPendingContext =
1159 25886 : AllocSetContextCreate(TopMemoryContext,
1160 : "PgStat Pending",
1161 : ALLOCSET_SMALL_SIZES);
1162 : }
1163 :
1164 2708944 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid,
1165 : true, created_entry);
1166 :
1167 2708944 : if (entry_ref->pending == NULL)
1168 : {
1169 1386590 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1170 :
1171 : Assert(entrysize != (size_t) -1);
1172 :
1173 1386590 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1174 1386590 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1175 : }
1176 :
1177 2708944 : return entry_ref;
1178 : }
1179 :
1180 : /*
1181 : * Return an existing stats entry, or NULL.
1182 : *
1183 : * This should only be used for helper function for pgstatfuncs.c - outside of
1184 : * that it shouldn't be needed.
1185 : */
1186 : PgStat_EntryRef *
1187 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
1188 : {
1189 : PgStat_EntryRef *entry_ref;
1190 :
1191 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
1192 :
1193 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1194 30 : return NULL;
1195 :
1196 54 : return entry_ref;
1197 : }
1198 :
1199 : void
1200 1386590 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1201 : {
1202 1386590 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1203 1386590 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1204 1386590 : void *pending_data = entry_ref->pending;
1205 :
1206 : Assert(pending_data != NULL);
1207 : /* !fixed_amount stats should be handled explicitly */
1208 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1209 :
1210 1386590 : if (kind_info->delete_pending_cb)
1211 1300252 : kind_info->delete_pending_cb(entry_ref);
1212 :
1213 1386590 : pfree(pending_data);
1214 1386590 : entry_ref->pending = NULL;
1215 :
1216 1386590 : dlist_delete(&entry_ref->pending_node);
1217 1386590 : }
1218 :
1219 : /*
1220 : * Flush out pending stats for database objects (databases, relations,
1221 : * functions).
1222 : */
1223 : static bool
1224 52096 : pgstat_flush_pending_entries(bool nowait)
1225 : {
1226 52096 : bool have_pending = false;
1227 52096 : dlist_node *cur = NULL;
1228 :
1229 : /*
1230 : * Need to be a bit careful iterating over the list of pending entries.
1231 : * Processing a pending entry may queue further pending entries to the end
1232 : * of the list that we want to process, so a simple iteration won't do.
1233 : * Further complicating matters is that we want to delete the current
1234 : * entry in each iteration from the list if we flushed successfully.
1235 : *
1236 : * So we just keep track of the next pointer in each loop iteration.
1237 : */
1238 52096 : if (!dlist_is_empty(&pgStatPending))
1239 49662 : cur = dlist_head_node(&pgStatPending);
1240 :
1241 1377246 : while (cur)
1242 : {
1243 1325150 : PgStat_EntryRef *entry_ref =
1244 1325150 : dlist_container(PgStat_EntryRef, pending_node, cur);
1245 1325150 : PgStat_HashKey key = entry_ref->shared_entry->key;
1246 1325150 : PgStat_Kind kind = key.kind;
1247 1325150 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1248 : bool did_flush;
1249 : dlist_node *next;
1250 :
1251 : Assert(!kind_info->fixed_amount);
1252 : Assert(kind_info->flush_pending_cb != NULL);
1253 :
1254 : /* flush the stats, if possible */
1255 1325150 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1256 :
1257 : Assert(did_flush || nowait);
1258 :
1259 : /* determine next entry, before deleting the pending entry */
1260 1325150 : if (dlist_has_next(&pgStatPending, cur))
1261 1275488 : next = dlist_next_node(&pgStatPending, cur);
1262 : else
1263 49662 : next = NULL;
1264 :
1265 : /* if successfully flushed, remove entry */
1266 1325150 : if (did_flush)
1267 1325150 : pgstat_delete_pending_entry(entry_ref);
1268 : else
1269 0 : have_pending = true;
1270 :
1271 1325150 : cur = next;
1272 : }
1273 :
1274 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1275 :
1276 52096 : return have_pending;
1277 : }
1278 :
1279 :
1280 : /* ------------------------------------------------------------
1281 : * Helper / infrastructure functions
1282 : * ------------------------------------------------------------
1283 : */
1284 :
1285 : PgStat_Kind
1286 166 : pgstat_get_kind_from_str(char *kind_str)
1287 : {
1288 476 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1289 : {
1290 470 : if (pg_strcasecmp(kind_str, pgstat_kind_infos[kind].name) == 0)
1291 160 : return kind;
1292 : }
1293 :
1294 6 : ereport(ERROR,
1295 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1296 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1297 : return PGSTAT_KIND_DATABASE; /* avoid compiler warnings */
1298 : }
1299 :
1300 : static inline bool
1301 316004 : pgstat_is_kind_valid(int ikind)
1302 : {
1303 316004 : return ikind >= PGSTAT_KIND_FIRST_VALID && ikind <= PGSTAT_KIND_LAST;
1304 : }
1305 :
1306 : const PgStat_KindInfo *
1307 6278676 : pgstat_get_kind_info(PgStat_Kind kind)
1308 : {
1309 : Assert(pgstat_is_kind_valid(kind));
1310 :
1311 6278676 : return &pgstat_kind_infos[kind];
1312 : }
1313 :
1314 : /*
1315 : * Stats should only be reported after pgstat_initialize() and before
1316 : * pgstat_shutdown(). This check is put in a few central places to catch
1317 : * violations of this rule more easily.
1318 : */
1319 : #ifdef USE_ASSERT_CHECKING
1320 : void
1321 : pgstat_assert_is_up(void)
1322 : {
1323 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1324 : }
1325 : #endif
1326 :
1327 :
1328 : /* ------------------------------------------------------------
1329 : * reading and writing of on-disk stats file
1330 : * ------------------------------------------------------------
1331 : */
1332 :
1333 : /* helpers for pgstat_write_statsfile() */
1334 : static void
1335 592046 : write_chunk(FILE *fpout, void *ptr, size_t len)
1336 : {
1337 : int rc;
1338 :
1339 592046 : rc = fwrite(ptr, len, 1, fpout);
1340 :
1341 : /* we'll check for errors with ferror once at the end */
1342 : (void) rc;
1343 592046 : }
1344 :
1345 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1346 :
1347 : /*
1348 : * This function is called in the last process that is accessing the shared
1349 : * stats so locking is not required.
1350 : */
1351 : static void
1352 1010 : pgstat_write_statsfile(void)
1353 : {
1354 : FILE *fpout;
1355 : int32 format_id;
1356 1010 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1357 1010 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1358 : dshash_seq_status hstat;
1359 : PgStatShared_HashEntry *ps;
1360 :
1361 : pgstat_assert_is_up();
1362 :
1363 : /* should be called only by the checkpointer or single user mode */
1364 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1365 :
1366 : /* we're shutting down, so it's ok to just override this */
1367 1010 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1368 :
1369 1010 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1370 :
1371 : /*
1372 : * Open the statistics temp file to write out the current values.
1373 : */
1374 1010 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1375 1010 : if (fpout == NULL)
1376 : {
1377 0 : ereport(LOG,
1378 : (errcode_for_file_access(),
1379 : errmsg("could not open temporary statistics file \"%s\": %m",
1380 : tmpfile)));
1381 0 : return;
1382 : }
1383 :
1384 : /*
1385 : * Write the file header --- currently just a format ID.
1386 : */
1387 1010 : format_id = PGSTAT_FILE_FORMAT_ID;
1388 1010 : write_chunk_s(fpout, &format_id);
1389 :
1390 : /* Write various stats structs for fixed number of objects */
1391 12120 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1392 : {
1393 : char *ptr;
1394 11110 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1395 :
1396 11110 : if (!info->fixed_amount)
1397 5050 : continue;
1398 :
1399 : Assert(info->snapshot_ctl_off != 0);
1400 :
1401 6060 : pgstat_build_snapshot_fixed(kind);
1402 6060 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1403 :
1404 6060 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1405 6060 : write_chunk_s(fpout, &kind);
1406 6060 : write_chunk(fpout, ptr, info->shared_data_len);
1407 : }
1408 :
1409 : /*
1410 : * Walk through the stats entries
1411 : */
1412 1010 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1413 290394 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1414 : {
1415 : PgStatShared_Common *shstats;
1416 289384 : const PgStat_KindInfo *kind_info = NULL;
1417 :
1418 289384 : CHECK_FOR_INTERRUPTS();
1419 :
1420 : /* we may have some "dropped" entries not yet removed, skip them */
1421 : Assert(!ps->dropped);
1422 289384 : if (ps->dropped)
1423 0 : continue;
1424 :
1425 289384 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1426 :
1427 289384 : kind_info = pgstat_get_kind_info(ps->key.kind);
1428 :
1429 : /* if not dropped the valid-entry refcount should exist */
1430 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1431 :
1432 289384 : if (!kind_info->to_serialized_name)
1433 : {
1434 : /* normal stats entry, identified by PgStat_HashKey */
1435 289236 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1436 289236 : write_chunk_s(fpout, &ps->key);
1437 : }
1438 : else
1439 : {
1440 : /* stats entry identified by name on disk (e.g. slots) */
1441 : NameData name;
1442 :
1443 148 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1444 :
1445 148 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1446 148 : write_chunk_s(fpout, &ps->key.kind);
1447 148 : write_chunk_s(fpout, &name);
1448 : }
1449 :
1450 : /* Write except the header part of the entry */
1451 289384 : write_chunk(fpout,
1452 : pgstat_get_entry_data(ps->key.kind, shstats),
1453 : pgstat_get_entry_len(ps->key.kind));
1454 : }
1455 1010 : dshash_seq_term(&hstat);
1456 :
1457 : /*
1458 : * No more output to be done. Close the temp file and replace the old
1459 : * pgstat.stat with it. The ferror() check replaces testing for error
1460 : * after each individual fputc or fwrite (in write_chunk()) above.
1461 : */
1462 1010 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1463 :
1464 1010 : if (ferror(fpout))
1465 : {
1466 0 : ereport(LOG,
1467 : (errcode_for_file_access(),
1468 : errmsg("could not write temporary statistics file \"%s\": %m",
1469 : tmpfile)));
1470 0 : FreeFile(fpout);
1471 0 : unlink(tmpfile);
1472 : }
1473 1010 : else if (FreeFile(fpout) < 0)
1474 : {
1475 0 : ereport(LOG,
1476 : (errcode_for_file_access(),
1477 : errmsg("could not close temporary statistics file \"%s\": %m",
1478 : tmpfile)));
1479 0 : unlink(tmpfile);
1480 : }
1481 1010 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1482 : {
1483 : /* durable_rename already emitted log message */
1484 0 : unlink(tmpfile);
1485 : }
1486 : }
1487 :
1488 : /* helpers for pgstat_read_statsfile() */
1489 : static bool
1490 633226 : read_chunk(FILE *fpin, void *ptr, size_t len)
1491 : {
1492 633226 : return fread(ptr, 1, len, fpin) == len;
1493 : }
1494 :
1495 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1496 :
1497 : /*
1498 : * Reads in existing statistics file into memory.
1499 : *
1500 : * This function is called in the only process that is accessing the shared
1501 : * stats so locking is not required.
1502 : */
1503 : static void
1504 1212 : pgstat_read_statsfile(void)
1505 : {
1506 : FILE *fpin;
1507 : int32 format_id;
1508 : bool found;
1509 1212 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1510 1212 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1511 :
1512 : /* shouldn't be called from postmaster */
1513 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1514 :
1515 1212 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1516 :
1517 : /*
1518 : * Try to open the stats file. If it doesn't exist, the backends simply
1519 : * returns zero for anything and statistics simply starts from scratch
1520 : * with empty counters.
1521 : *
1522 : * ENOENT is a possibility if stats collection was previously disabled or
1523 : * has not yet written the stats file for the first time. Any other
1524 : * failure condition is suspicious.
1525 : */
1526 1212 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1527 : {
1528 82 : if (errno != ENOENT)
1529 0 : ereport(LOG,
1530 : (errcode_for_file_access(),
1531 : errmsg("could not open statistics file \"%s\": %m",
1532 : statfile)));
1533 82 : pgstat_reset_after_failure();
1534 82 : return;
1535 : }
1536 :
1537 : /*
1538 : * Verify it's of the expected format.
1539 : */
1540 1130 : if (!read_chunk_s(fpin, &format_id) ||
1541 1130 : format_id != PGSTAT_FILE_FORMAT_ID)
1542 2 : goto error;
1543 :
1544 : /*
1545 : * We found an existing statistics file. Read it and put all the stats
1546 : * data into place.
1547 : */
1548 : for (;;)
1549 316004 : {
1550 317132 : int t = fgetc(fpin);
1551 :
1552 317132 : switch (t)
1553 : {
1554 6768 : case PGSTAT_FILE_ENTRY_FIXED:
1555 : {
1556 : PgStat_Kind kind;
1557 : const PgStat_KindInfo *info;
1558 : char *ptr;
1559 :
1560 : /* entry for fixed-numbered stats */
1561 6768 : if (!read_chunk_s(fpin, &kind))
1562 0 : goto error;
1563 :
1564 6768 : if (!pgstat_is_kind_valid(kind))
1565 0 : goto error;
1566 :
1567 6768 : info = pgstat_get_kind_info(kind);
1568 :
1569 6768 : if (!info->fixed_amount)
1570 0 : goto error;
1571 :
1572 : /* Load back stats into shared memory */
1573 6768 : ptr = ((char *) shmem) + info->shared_ctl_off +
1574 6768 : info->shared_data_off;
1575 :
1576 6768 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1577 0 : goto error;
1578 :
1579 6768 : break;
1580 : }
1581 309236 : case PGSTAT_FILE_ENTRY_HASH:
1582 : case PGSTAT_FILE_ENTRY_NAME:
1583 : {
1584 : PgStat_HashKey key;
1585 : PgStatShared_HashEntry *p;
1586 : PgStatShared_Common *header;
1587 :
1588 309236 : CHECK_FOR_INTERRUPTS();
1589 :
1590 309236 : if (t == PGSTAT_FILE_ENTRY_HASH)
1591 : {
1592 : /* normal stats entry, identified by PgStat_HashKey */
1593 309146 : if (!read_chunk_s(fpin, &key))
1594 0 : goto error;
1595 :
1596 309146 : if (!pgstat_is_kind_valid(key.kind))
1597 0 : goto error;
1598 : }
1599 : else
1600 : {
1601 : /* stats entry identified by name on disk (e.g. slots) */
1602 90 : const PgStat_KindInfo *kind_info = NULL;
1603 : PgStat_Kind kind;
1604 : NameData name;
1605 :
1606 90 : if (!read_chunk_s(fpin, &kind))
1607 0 : goto error;
1608 90 : if (!read_chunk_s(fpin, &name))
1609 0 : goto error;
1610 90 : if (!pgstat_is_kind_valid(kind))
1611 0 : goto error;
1612 :
1613 90 : kind_info = pgstat_get_kind_info(kind);
1614 :
1615 90 : if (!kind_info->from_serialized_name)
1616 0 : goto error;
1617 :
1618 90 : if (!kind_info->from_serialized_name(&name, &key))
1619 : {
1620 : /* skip over data for entry we don't care about */
1621 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1622 0 : goto error;
1623 :
1624 2 : continue;
1625 : }
1626 :
1627 : Assert(key.kind == kind);
1628 : }
1629 :
1630 : /*
1631 : * This intentionally doesn't use pgstat_get_entry_ref() -
1632 : * putting all stats into checkpointer's
1633 : * pgStatEntryRefHash would be wasted effort and memory.
1634 : */
1635 309234 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1636 :
1637 : /* don't allow duplicate entries */
1638 309234 : if (found)
1639 : {
1640 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1641 0 : elog(WARNING, "found duplicate stats entry %d/%u/%u",
1642 : key.kind, key.dboid, key.objoid);
1643 0 : goto error;
1644 : }
1645 :
1646 309234 : header = pgstat_init_entry(key.kind, p);
1647 309234 : dshash_release_lock(pgStatLocal.shared_hash, p);
1648 :
1649 309234 : if (!read_chunk(fpin,
1650 : pgstat_get_entry_data(key.kind, header),
1651 : pgstat_get_entry_len(key.kind)))
1652 0 : goto error;
1653 :
1654 309234 : break;
1655 : }
1656 1128 : case PGSTAT_FILE_ENTRY_END:
1657 :
1658 : /*
1659 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
1660 : * file
1661 : */
1662 1128 : if (fgetc(fpin) != EOF)
1663 2 : goto error;
1664 :
1665 1126 : goto done;
1666 :
1667 0 : default:
1668 0 : goto error;
1669 : }
1670 : }
1671 :
1672 1130 : done:
1673 1130 : FreeFile(fpin);
1674 :
1675 1130 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
1676 1130 : unlink(statfile);
1677 :
1678 1130 : return;
1679 :
1680 4 : error:
1681 4 : ereport(LOG,
1682 : (errmsg("corrupted statistics file \"%s\"", statfile)));
1683 :
1684 4 : pgstat_reset_after_failure();
1685 :
1686 4 : goto done;
1687 : }
1688 :
1689 : /*
1690 : * Helper to reset / drop stats after a crash or after restoring stats from
1691 : * disk failed, potentially after already loading parts.
1692 : */
1693 : static void
1694 416 : pgstat_reset_after_failure(void)
1695 : {
1696 416 : TimestampTz ts = GetCurrentTimestamp();
1697 :
1698 : /* reset fixed-numbered stats */
1699 4992 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1700 : {
1701 4576 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1702 :
1703 4576 : if (!kind_info->fixed_amount)
1704 2080 : continue;
1705 :
1706 2496 : kind_info->reset_all_cb(ts);
1707 : }
1708 :
1709 : /* and drop variable-numbered ones */
1710 416 : pgstat_drop_all_entries();
1711 416 : }
1712 :
1713 : /*
1714 : * GUC assign_hook for stats_fetch_consistency.
1715 : */
1716 : void
1717 2850 : assign_stats_fetch_consistency(int newval, void *extra)
1718 : {
1719 : /*
1720 : * Changing this value in a transaction may cause snapshot state
1721 : * inconsistencies, so force a clear of the current snapshot on the next
1722 : * snapshot build attempt.
1723 : */
1724 2850 : if (pgstat_fetch_consistency != newval)
1725 784 : force_stats_snapshot_clear = true;
1726 2850 : }
|