Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down, except when shutting down in immediate mode.
16 : *
17 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
18 : *
19 : * Statistics for variable-numbered objects are stored in dynamic shared
20 : * memory and can be found via a dshash hashtable. The statistics counters are
21 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 : * separately allocated (PgStatShared_HashEntry->body). The separate
23 : * allocation allows different kinds of statistics to be stored in the same
24 : * hashtable without wasting space in PgStatShared_HashEntry.
25 : *
26 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 : * is not possible to have statistics for an object that cannot be addressed
28 : * that way at runtime. A wider identifier can be used when serializing to
29 : * disk (used for replication slot stats).
30 : *
31 : * To avoid contention on the shared hashtable, each backend has a
32 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 : * entries. The shared hashtable only needs to be accessed when no prior
35 : * reference is found in the local hashtable. Besides pointing to the
36 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 : * contains a pointer to the shared statistics data, as a process-local
38 : * address, to reduce access costs.
39 : *
40 : * The names for structs stored in shared memory are prefixed with
41 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 : * protected by a dedicated lwlock.
43 : *
44 : * Most stats updates are first accumulated locally in each process as pending
45 : * entries, then later flushed to shared memory (just after commit, or by
46 : * idle-timeout). This practically eliminates contention on individual stats
47 : * entries. For most kinds of variable-numbered pending stats data is stored
48 : * in PgStat_EntryRef->pending. All entries with pending data are in the
49 : * pgStatPending list. Pending statistics updates are flushed out by
50 : * pgstat_report_stat().
51 : *
52 : * The behavior of different kinds of statistics is determined by the kind's
53 : * entry in pgstat_kind_infos, see PgStat_KindInfo for details.
54 : *
55 : * The consistency of read accesses to statistics can be configured using the
56 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
57 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
58 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
59 : * pgStatLocal.snapshot.
60 : *
61 : * To keep things manageable, stats handling is split across several
62 : * files. Infrastructure pieces are in:
63 : * - pgstat.c - this file, to tie it all together
64 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
65 : * the maintenance of hashtable entries
66 : * - pgstat_xact.c - transactional integration, including the transactional
67 : * creation and dropping of stats entries
68 : *
69 : * Each statistics kind is handled in a dedicated file:
70 : * - pgstat_archiver.c
71 : * - pgstat_bgwriter.c
72 : * - pgstat_checkpointer.c
73 : * - pgstat_database.c
74 : * - pgstat_function.c
75 : * - pgstat_relation.c
76 : * - pgstat_replslot.c
77 : * - pgstat_slru.c
78 : * - pgstat_subscription.c
79 : * - pgstat_wal.c
80 : *
81 : * Whenever possible infrastructure files should not contain code related to
82 : * specific kinds of stats.
83 : *
84 : *
85 : * Copyright (c) 2001-2022, PostgreSQL Global Development Group
86 : *
87 : * IDENTIFICATION
88 : * src/backend/utils/activity/pgstat.c
89 : * ----------
90 : */
91 : #include "postgres.h"
92 :
93 : #include <unistd.h>
94 :
95 : #include "access/transam.h"
96 : #include "access/xact.h"
97 : #include "lib/dshash.h"
98 : #include "pgstat.h"
99 : #include "port/atomics.h"
100 : #include "storage/fd.h"
101 : #include "storage/ipc.h"
102 : #include "storage/lwlock.h"
103 : #include "storage/pg_shmem.h"
104 : #include "storage/shmem.h"
105 : #include "utils/guc.h"
106 : #include "utils/memutils.h"
107 : #include "utils/pgstat_internal.h"
108 : #include "utils/timestamp.h"
109 :
110 :
111 : /* ----------
112 : * Timer definitions.
113 : *
114 : * In milliseconds.
115 : * ----------
116 : */
117 :
118 : /* minimum interval non-forced stats flushes.*/
119 : #define PGSTAT_MIN_INTERVAL 1000
120 : /* how long until to block flushing pending stats updates */
121 : #define PGSTAT_MAX_INTERVAL 60000
122 : /* when to call pgstat_report_stat() again, even when idle */
123 : #define PGSTAT_IDLE_INTERVAL 10000
124 :
125 : /* ----------
126 : * Initial size hints for the hash tables used in statistics.
127 : * ----------
128 : */
129 :
130 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
131 :
132 :
133 : /* hash table for statistics snapshots entry */
134 : typedef struct PgStat_SnapshotEntry
135 : {
136 : PgStat_HashKey key;
137 : char status; /* for simplehash use */
138 : void *data; /* the stats data itself */
139 : } PgStat_SnapshotEntry;
140 :
141 :
142 : /* ----------
143 : * Backend-local Hash Table Definitions
144 : * ----------
145 : */
146 :
147 : /* for stats snapshot entries */
148 : #define SH_PREFIX pgstat_snapshot
149 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
150 : #define SH_KEY_TYPE PgStat_HashKey
151 : #define SH_KEY key
152 : #define SH_HASH_KEY(tb, key) \
153 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
154 : #define SH_EQUAL(tb, a, b) \
155 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
156 : #define SH_SCOPE static inline
157 : #define SH_DEFINE
158 : #define SH_DECLARE
159 : #include "lib/simplehash.h"
160 :
161 :
162 : /* ----------
163 : * Local function forward declarations
164 : * ----------
165 : */
166 :
167 : static void pgstat_write_statsfile(void);
168 : static void pgstat_read_statsfile(void);
169 :
170 : static void pgstat_reset_after_failure(void);
171 :
172 : static bool pgstat_flush_pending_entries(bool nowait);
173 :
174 : static void pgstat_prep_snapshot(void);
175 : static void pgstat_build_snapshot(void);
176 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
177 :
178 : static inline bool pgstat_is_kind_valid(int ikind);
179 :
180 :
181 : /* ----------
182 : * GUC parameters
183 : * ----------
184 : */
185 :
186 : bool pgstat_track_counts = false;
187 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
188 :
189 :
190 : /* ----------
191 : * state shared with pgstat_*.c
192 : * ----------
193 : */
194 :
195 : PgStat_LocalState pgStatLocal;
196 :
197 :
198 : /* ----------
199 : * Local data
200 : *
201 : * NB: There should be only variables related to stats infrastructure here,
202 : * not for specific kinds of stats.
203 : * ----------
204 : */
205 :
206 : /*
207 : * Memory contexts containing the pgStatEntryRefHash table, the
208 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
209 : * easier to track / attribute memory usage.
210 : */
211 :
212 : static MemoryContext pgStatPendingContext = NULL;
213 :
214 : /*
215 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
216 : *
217 : * Newly pending entries should only ever be added to the end of the list,
218 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
219 : */
220 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
221 :
222 :
223 : /*
224 : * Force the next stats flush to happen regardless of
225 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
226 : */
227 : static bool pgStatForceNextFlush = false;
228 :
229 : /*
230 : * For assertions that check pgstat is not used before initialization / after
231 : * shutdown.
232 : */
233 : #ifdef USE_ASSERT_CHECKING
234 : static bool pgstat_is_initialized = false;
235 : static bool pgstat_is_shutdown = false;
236 : #endif
237 :
238 :
239 : /*
240 : * The different kinds of statistics.
241 : *
242 : * If reasonably possible, handling specific to one kind of stats should go
243 : * through this abstraction, rather than making more of pgstat.c aware.
244 : *
245 : * See comments for struct PgStat_KindInfo for details about the individual
246 : * fields.
247 : *
248 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
249 : * seem to be a great way of doing that, given the split across multiple
250 : * files.
251 : */
252 : static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
253 :
254 : /* stats kinds for variable-numbered objects */
255 :
256 : [PGSTAT_KIND_DATABASE] = {
257 : .name = "database",
258 :
259 : .fixed_amount = false,
260 : /* so pg_stat_database entries can be seen in all databases */
261 : .accessed_across_databases = true,
262 :
263 : .shared_size = sizeof(PgStatShared_Database),
264 : .shared_data_off = offsetof(PgStatShared_Database, stats),
265 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
266 : .pending_size = sizeof(PgStat_StatDBEntry),
267 :
268 : .flush_pending_cb = pgstat_database_flush_cb,
269 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
270 : },
271 :
272 : [PGSTAT_KIND_RELATION] = {
273 : .name = "relation",
274 :
275 : .fixed_amount = false,
276 :
277 : .shared_size = sizeof(PgStatShared_Relation),
278 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
279 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
280 : .pending_size = sizeof(PgStat_TableStatus),
281 :
282 : .flush_pending_cb = pgstat_relation_flush_cb,
283 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
284 : },
285 :
286 : [PGSTAT_KIND_FUNCTION] = {
287 : .name = "function",
288 :
289 : .fixed_amount = false,
290 :
291 : .shared_size = sizeof(PgStatShared_Function),
292 : .shared_data_off = offsetof(PgStatShared_Function, stats),
293 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
294 : .pending_size = sizeof(PgStat_BackendFunctionEntry),
295 :
296 : .flush_pending_cb = pgstat_function_flush_cb,
297 : },
298 :
299 : [PGSTAT_KIND_REPLSLOT] = {
300 : .name = "replslot",
301 :
302 : .fixed_amount = false,
303 :
304 : .accessed_across_databases = true,
305 : .named_on_disk = true,
306 :
307 : .shared_size = sizeof(PgStatShared_ReplSlot),
308 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
309 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
310 :
311 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
312 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
313 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
314 : },
315 :
316 : [PGSTAT_KIND_SUBSCRIPTION] = {
317 : .name = "subscription",
318 :
319 : .fixed_amount = false,
320 : /* so pg_stat_subscription_stats entries can be seen in all databases */
321 : .accessed_across_databases = true,
322 :
323 : .shared_size = sizeof(PgStatShared_Subscription),
324 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
325 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
326 : .pending_size = sizeof(PgStat_BackendSubEntry),
327 :
328 : .flush_pending_cb = pgstat_subscription_flush_cb,
329 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
330 : },
331 :
332 :
333 : /* stats for fixed-numbered (mostly 1) objects */
334 :
335 : [PGSTAT_KIND_ARCHIVER] = {
336 : .name = "archiver",
337 :
338 : .fixed_amount = true,
339 :
340 : .reset_all_cb = pgstat_archiver_reset_all_cb,
341 : .snapshot_cb = pgstat_archiver_snapshot_cb,
342 : },
343 :
344 : [PGSTAT_KIND_BGWRITER] = {
345 : .name = "bgwriter",
346 :
347 : .fixed_amount = true,
348 :
349 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
350 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
351 : },
352 :
353 : [PGSTAT_KIND_CHECKPOINTER] = {
354 : .name = "checkpointer",
355 :
356 : .fixed_amount = true,
357 :
358 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
359 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
360 : },
361 :
362 : [PGSTAT_KIND_SLRU] = {
363 : .name = "slru",
364 :
365 : .fixed_amount = true,
366 :
367 : .reset_all_cb = pgstat_slru_reset_all_cb,
368 : .snapshot_cb = pgstat_slru_snapshot_cb,
369 : },
370 :
371 : [PGSTAT_KIND_WAL] = {
372 : .name = "wal",
373 :
374 : .fixed_amount = true,
375 :
376 : .reset_all_cb = pgstat_wal_reset_all_cb,
377 : .snapshot_cb = pgstat_wal_snapshot_cb,
378 : },
379 : };
380 :
381 :
382 : /* ------------------------------------------------------------
383 : * Functions managing the state of the stats system for all backends.
384 : * ------------------------------------------------------------
385 : */
386 :
387 : /*
388 : * Read on-disk stats into memory at server start.
389 : *
390 : * Should only be called by the startup process or in single user mode.
391 : */
392 : void
393 1882 : pgstat_restore_stats(void)
394 : {
395 1882 : pgstat_read_statsfile();
396 1882 : }
397 :
398 : /*
399 : * Remove the stats file. This is currently used only if WAL recovery is
400 : * needed after a crash.
401 : *
402 : * Should only be called by the startup process or in single user mode.
403 : */
404 : void
405 252 : pgstat_discard_stats(void)
406 : {
407 : int ret;
408 :
409 : /* NB: this needs to be done even in single user mode */
410 :
411 252 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
412 252 : if (ret != 0)
413 : {
414 250 : if (errno == ENOENT)
415 250 : elog(DEBUG2,
416 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
417 : PGSTAT_STAT_PERMANENT_FILENAME);
418 : else
419 0 : ereport(LOG,
420 : (errcode_for_file_access(),
421 : errmsg("could not unlink permanent statistics file \"%s\": %m",
422 : PGSTAT_STAT_PERMANENT_FILENAME)));
423 : }
424 : else
425 : {
426 2 : ereport(DEBUG2,
427 : (errcode_for_file_access(),
428 : errmsg("unlinked permanent statistics file \"%s\"",
429 : PGSTAT_STAT_PERMANENT_FILENAME)));
430 : }
431 :
432 : /*
433 : * Reset stats contents. This will set reset timestamps of fixed-numbered
434 : * stats to the current time (no variable stats exist).
435 : */
436 252 : pgstat_reset_after_failure();
437 252 : }
438 :
439 : /*
440 : * pgstat_before_server_shutdown() needs to be called by exactly one process
441 : * during regular server shutdowns. Otherwise all stats will be lost.
442 : *
443 : * We currently only write out stats for proc_exit(0). We might want to change
444 : * that at some point... But right now pgstat_discard_stats() would be called
445 : * during the start after a disorderly shutdown, anyway.
446 : */
447 : void
448 1770 : pgstat_before_server_shutdown(int code, Datum arg)
449 : {
450 : Assert(pgStatLocal.shmem != NULL);
451 : Assert(!pgStatLocal.shmem->is_shutdown);
452 :
453 : /*
454 : * Stats should only be reported after pgstat_initialize() and before
455 : * pgstat_shutdown(). This is a convenient point to catch most violations
456 : * of this rule.
457 : */
458 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
459 :
460 : /* flush out our own pending changes before writing out */
461 1770 : pgstat_report_stat(true);
462 :
463 : /*
464 : * Only write out file during normal shutdown. Don't even signal that
465 : * we've shutdown during irregular shutdowns, because the shutdown
466 : * sequence isn't coordinated to ensure this backend shuts down last.
467 : */
468 1770 : if (code == 0)
469 : {
470 1764 : pgStatLocal.shmem->is_shutdown = true;
471 1764 : pgstat_write_statsfile();
472 : }
473 1770 : }
474 :
475 :
476 : /* ------------------------------------------------------------
477 : * Backend initialization / shutdown functions
478 : * ------------------------------------------------------------
479 : */
480 :
481 : /*
482 : * Shut down a single backend's statistics reporting at process exit.
483 : *
484 : * Flush out any remaining statistics counts. Without this, operations
485 : * triggered during backend exit (such as temp table deletions) won't be
486 : * counted.
487 : */
488 : static void
489 24076 : pgstat_shutdown_hook(int code, Datum arg)
490 : {
491 : Assert(!pgstat_is_shutdown);
492 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
493 :
494 : /*
495 : * If we got as far as discovering our own database ID, we can flush out
496 : * what we did so far. Otherwise, we'd be reporting an invalid database
497 : * ID, so forget it. (This means that accesses to pg_database during
498 : * failed backend starts might never get counted.)
499 : */
500 24076 : if (OidIsValid(MyDatabaseId))
501 19012 : pgstat_report_disconnect(MyDatabaseId);
502 :
503 24076 : pgstat_report_stat(true);
504 :
505 : /* there shouldn't be any pending changes left */
506 : Assert(dlist_is_empty(&pgStatPending));
507 24076 : dlist_init(&pgStatPending);
508 :
509 24076 : pgstat_detach_shmem();
510 :
511 : #ifdef USE_ASSERT_CHECKING
512 : pgstat_is_shutdown = true;
513 : #endif
514 24076 : }
515 :
516 : /*
517 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
518 : * BaseInit().
519 : *
520 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
521 : */
522 : void
523 24076 : pgstat_initialize(void)
524 : {
525 : Assert(!pgstat_is_initialized);
526 :
527 24076 : pgstat_attach_shmem();
528 :
529 24076 : pgstat_init_wal();
530 :
531 : /* Set up a process-exit hook to clean up */
532 24076 : before_shmem_exit(pgstat_shutdown_hook, 0);
533 :
534 : #ifdef USE_ASSERT_CHECKING
535 : pgstat_is_initialized = true;
536 : #endif
537 24076 : }
538 :
539 :
540 : /* ------------------------------------------------------------
541 : * Public functions used by backends follow
542 : * ------------------------------------------------------------
543 : */
544 :
545 : /*
546 : * Must be called by processes that performs DML: tcop/postgres.c, logical
547 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
548 : * shared memory.
549 : *
550 : * Unless called with 'force', pending stats updates are flushed happen once
551 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
552 : * block on lock acquisition, except if stats updates have been pending for
553 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
554 : *
555 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
556 : * suggested idle timeout is returned. Currently this is always
557 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
558 : * a timeout after which to call pgstat_report_stat(true), but are not
559 : * required to to do so.
560 : *
561 : * Note that this is called only when not within a transaction, so it is fair
562 : * to use transaction stop time as an approximation of current time.
563 : */
564 : long
565 787662 : pgstat_report_stat(bool force)
566 : {
567 : static TimestampTz pending_since = 0;
568 : static TimestampTz last_flush = 0;
569 : bool partial_flush;
570 : TimestampTz now;
571 : bool nowait;
572 :
573 : pgstat_assert_is_up();
574 : Assert(!IsTransactionOrTransactionBlock());
575 :
576 : /* "absorb" the forced flush even if there's nothing to flush */
577 787662 : if (pgStatForceNextFlush)
578 : {
579 322 : force = true;
580 322 : pgStatForceNextFlush = false;
581 : }
582 :
583 : /* Don't expend a clock check if nothing to do */
584 787662 : if (dlist_is_empty(&pgStatPending) &&
585 12892 : !have_slrustats &&
586 11226 : !pgstat_have_pending_wal())
587 : {
588 : Assert(pending_since == 0);
589 11166 : return 0;
590 : }
591 :
592 : /*
593 : * There should never be stats to report once stats are shut down. Can't
594 : * assert that before the checks above, as there is an unconditional
595 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
596 : * the process that ran pgstat_before_server_shutdown() will still call.
597 : */
598 : Assert(!pgStatLocal.shmem->is_shutdown);
599 :
600 776496 : now = GetCurrentTransactionStopTimestamp();
601 :
602 776496 : if (!force)
603 : {
604 1472932 : if (pending_since > 0 &&
605 720218 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
606 : {
607 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
608 0 : force = true;
609 : }
610 752714 : else if (last_flush > 0 &&
611 736718 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
612 : {
613 : /* don't flush too frequently */
614 735538 : if (pending_since == 0)
615 16468 : pending_since = now;
616 :
617 735538 : return PGSTAT_IDLE_INTERVAL;
618 : }
619 : }
620 :
621 40958 : pgstat_update_dbstats(now);
622 :
623 : /* don't wait for lock acquisition when !force */
624 40958 : nowait = !force;
625 :
626 40958 : partial_flush = false;
627 :
628 : /* flush database / relation / function / ... stats */
629 40958 : partial_flush |= pgstat_flush_pending_entries(nowait);
630 :
631 : /* flush wal stats */
632 40958 : partial_flush |= pgstat_flush_wal(nowait);
633 :
634 : /* flush SLRU stats */
635 40958 : partial_flush |= pgstat_slru_flush(nowait);
636 :
637 40958 : last_flush = now;
638 :
639 : /*
640 : * If some of the pending stats could not be flushed due to lock
641 : * contention, let the caller know when to retry.
642 : */
643 40958 : if (partial_flush)
644 : {
645 : /* force should have prevented us from getting here */
646 : Assert(!force);
647 :
648 : /* remember since when stats have been pending */
649 0 : if (pending_since == 0)
650 0 : pending_since = now;
651 :
652 0 : return PGSTAT_IDLE_INTERVAL;
653 : }
654 :
655 40958 : pending_since = 0;
656 :
657 40958 : return 0;
658 : }
659 :
660 : /*
661 : * Force locally pending stats to be flushed during the next
662 : * pgstat_report_stat() call. This is useful for writing tests.
663 : */
664 : void
665 322 : pgstat_force_next_flush(void)
666 : {
667 322 : pgStatForceNextFlush = true;
668 322 : }
669 :
670 : /*
671 : * Only for use by pgstat_reset_counters()
672 : */
673 : static bool
674 28264 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
675 : {
676 28264 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
677 : }
678 :
679 : /*
680 : * Reset counters for our database.
681 : *
682 : * Permission checking for this function is managed through the normal
683 : * GRANT system.
684 : */
685 : void
686 14 : pgstat_reset_counters(void)
687 : {
688 14 : TimestampTz ts = GetCurrentTimestamp();
689 :
690 14 : pgstat_reset_matching_entries(match_db_entries,
691 : ObjectIdGetDatum(MyDatabaseId),
692 : ts);
693 14 : }
694 :
695 : /*
696 : * Reset a single variable-numbered entry.
697 : *
698 : * If the stats kind is within a database, also reset the database's
699 : * stat_reset_timestamp.
700 : *
701 : * Permission checking for this function is managed through the normal
702 : * GRANT system.
703 : */
704 : void
705 14 : pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objoid)
706 : {
707 14 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
708 14 : TimestampTz ts = GetCurrentTimestamp();
709 :
710 : /* not needed atm, and doesn't make sense with the current signature */
711 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
712 :
713 : /* reset the "single counter" */
714 14 : pgstat_reset_entry(kind, dboid, objoid, ts);
715 :
716 14 : if (!kind_info->accessed_across_databases)
717 4 : pgstat_reset_database_timestamp(dboid, ts);
718 14 : }
719 :
720 : /*
721 : * Reset stats for all entries of a kind.
722 : *
723 : * Permission checking for this function is managed through the normal
724 : * GRANT system.
725 : */
726 : void
727 44 : pgstat_reset_of_kind(PgStat_Kind kind)
728 : {
729 44 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
730 44 : TimestampTz ts = GetCurrentTimestamp();
731 :
732 44 : if (kind_info->fixed_amount)
733 36 : kind_info->reset_all_cb(ts);
734 : else
735 8 : pgstat_reset_entries_of_kind(kind, ts);
736 44 : }
737 :
738 :
739 : /* ------------------------------------------------------------
740 : * Fetching of stats
741 : * ------------------------------------------------------------
742 : */
743 :
744 : /*
745 : * Discard any data collected in the current transaction. Any subsequent
746 : * request will cause new snapshots to be read.
747 : *
748 : * This is also invoked during transaction commit or abort to discard
749 : * the no-longer-wanted snapshot.
750 : */
751 : void
752 906498 : pgstat_clear_snapshot(void)
753 : {
754 : pgstat_assert_is_up();
755 :
756 906498 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
757 : sizeof(pgStatLocal.snapshot.fixed_valid));
758 906498 : pgStatLocal.snapshot.stats = NULL;
759 906498 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
760 :
761 : /* Release memory, if any was allocated */
762 906498 : if (pgStatLocal.snapshot.context)
763 : {
764 630 : MemoryContextDelete(pgStatLocal.snapshot.context);
765 :
766 : /* Reset variables */
767 630 : pgStatLocal.snapshot.context = NULL;
768 : }
769 :
770 : /*
771 : * Historically the backend_status.c facilities lived in this file, and
772 : * were reset with the same function. For now keep it that way, and
773 : * forward the reset request.
774 : */
775 906498 : pgstat_clear_backend_activity_snapshot();
776 906498 : }
777 :
778 : void *
779 7620 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
780 : {
781 : PgStat_HashKey key;
782 : PgStat_EntryRef *entry_ref;
783 : void *stats_data;
784 7620 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
785 :
786 : /* should be called from backends */
787 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
788 : AssertArg(!kind_info->fixed_amount);
789 :
790 7620 : pgstat_prep_snapshot();
791 :
792 7620 : key.kind = kind;
793 7620 : key.dboid = dboid;
794 7620 : key.objoid = objoid;
795 :
796 : /* if we need to build a full snapshot, do so */
797 7620 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
798 454 : pgstat_build_snapshot();
799 :
800 : /* if caching is desired, look up in cache */
801 7620 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
802 : {
803 1256 : PgStat_SnapshotEntry *entry = NULL;
804 :
805 1256 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
806 :
807 1256 : if (entry)
808 488 : return entry->data;
809 :
810 : /*
811 : * If we built a full snapshot and the key is not in
812 : * pgStatLocal.snapshot.stats, there are no matching stats.
813 : */
814 768 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
815 22 : return NULL;
816 : }
817 :
818 7110 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
819 :
820 7110 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
821 :
822 7110 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
823 : {
824 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
825 2760 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
826 : {
827 292 : PgStat_SnapshotEntry *entry = NULL;
828 : bool found;
829 :
830 292 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
831 : Assert(!found);
832 292 : entry->data = NULL;
833 : }
834 2760 : return NULL;
835 : }
836 :
837 : /*
838 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
839 : * otherwise we could quickly end up with a fair bit of memory used due to
840 : * repeated accesses.
841 : */
842 4350 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
843 3896 : stats_data = palloc(kind_info->shared_data_len);
844 : else
845 454 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
846 454 : kind_info->shared_data_len);
847 8700 : memcpy(stats_data,
848 4350 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
849 4350 : kind_info->shared_data_len);
850 :
851 4350 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
852 : {
853 454 : PgStat_SnapshotEntry *entry = NULL;
854 : bool found;
855 :
856 454 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
857 454 : entry->data = stats_data;
858 : }
859 :
860 4350 : return stats_data;
861 : }
862 :
863 : /*
864 : * If a stats snapshot has been taken, return the timestamp at which that was
865 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
866 : * false.
867 : */
868 : TimestampTz
869 30 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
870 : {
871 30 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
872 : {
873 18 : *have_snapshot = true;
874 18 : return pgStatLocal.snapshot.snapshot_timestamp;
875 : }
876 :
877 12 : *have_snapshot = false;
878 :
879 12 : return 0;
880 : }
881 :
882 : bool
883 92 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
884 : {
885 : /* fixed-numbered stats always exist */
886 92 : if (pgstat_get_kind_info(kind)->fixed_amount)
887 6 : return true;
888 :
889 86 : return pgstat_get_entry_ref(kind, dboid, objoid, false, NULL) != NULL;
890 : }
891 :
892 : /*
893 : * Ensure snapshot for fixed-numbered 'kind' exists.
894 : *
895 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
896 : * massaging the data into the desired format.
897 : */
898 : void
899 298 : pgstat_snapshot_fixed(PgStat_Kind kind)
900 : {
901 : AssertArg(pgstat_is_kind_valid(kind));
902 : AssertArg(pgstat_get_kind_info(kind)->fixed_amount);
903 :
904 298 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
905 24 : pgstat_build_snapshot();
906 : else
907 274 : pgstat_build_snapshot_fixed(kind);
908 :
909 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
910 298 : }
911 :
912 : static void
913 7666 : pgstat_prep_snapshot(void)
914 : {
915 7666 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
916 1302 : pgStatLocal.snapshot.stats != NULL)
917 7036 : return;
918 :
919 630 : if (!pgStatLocal.snapshot.context)
920 630 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
921 : "PgStat Snapshot",
922 : ALLOCSET_SMALL_SIZES);
923 :
924 630 : pgStatLocal.snapshot.stats =
925 630 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
926 : PGSTAT_SNAPSHOT_HASH_SIZE,
927 : NULL);
928 : }
929 :
930 : static void
931 478 : pgstat_build_snapshot(void)
932 : {
933 : dshash_seq_status hstat;
934 : PgStatShared_HashEntry *p;
935 :
936 : /* should only be called when we need a snapshot */
937 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
938 :
939 : /* snapshot already built */
940 478 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
941 432 : return;
942 :
943 46 : pgstat_prep_snapshot();
944 :
945 : Assert(pgStatLocal.snapshot.stats->members == 0);
946 :
947 46 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
948 :
949 : /*
950 : * Snapshot all variable stats.
951 : */
952 46 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
953 91236 : while ((p = dshash_seq_next(&hstat)) != NULL)
954 : {
955 91190 : PgStat_Kind kind = p->key.kind;
956 91190 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
957 : bool found;
958 : PgStat_SnapshotEntry *entry;
959 : PgStatShared_Common *stats_data;
960 :
961 : /*
962 : * Check if the stats object should be included in the snapshot.
963 : * Unless the stats kind can be accessed from all databases (e.g.,
964 : * database stats themselves), we only include stats for the current
965 : * database or objects not associated with a database (e.g. shared
966 : * relations).
967 : */
968 91190 : if (p->key.dboid != MyDatabaseId &&
969 13362 : p->key.dboid != InvalidOid &&
970 11108 : !kind_info->accessed_across_databases)
971 11220 : continue;
972 :
973 80174 : if (p->dropped)
974 204 : continue;
975 :
976 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
977 :
978 79970 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
979 : Assert(stats_data);
980 :
981 79970 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
982 : Assert(!found);
983 :
984 159940 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
985 79970 : kind_info->shared_size);
986 79970 : memcpy(entry->data,
987 79970 : pgstat_get_entry_data(kind, stats_data),
988 79970 : kind_info->shared_size);
989 : }
990 46 : dshash_seq_term(&hstat);
991 :
992 : /*
993 : * Build snapshot of all fixed-numbered stats.
994 : */
995 506 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
996 : {
997 460 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
998 :
999 460 : if (!kind_info->fixed_amount)
1000 : {
1001 : Assert(kind_info->snapshot_cb == NULL);
1002 230 : continue;
1003 : }
1004 :
1005 230 : pgstat_build_snapshot_fixed(kind);
1006 : }
1007 :
1008 46 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1009 : }
1010 :
1011 : static void
1012 9324 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1013 : {
1014 9324 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1015 :
1016 : Assert(kind_info->fixed_amount);
1017 : Assert(kind_info->snapshot_cb != NULL);
1018 :
1019 9324 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1020 : {
1021 : /* rebuild every time */
1022 8850 : pgStatLocal.snapshot.fixed_valid[kind] = false;
1023 : }
1024 474 : else if (pgStatLocal.snapshot.fixed_valid[kind])
1025 : {
1026 : /* in snapshot mode we shouldn't get called again */
1027 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1028 12 : return;
1029 : }
1030 :
1031 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1032 :
1033 9312 : kind_info->snapshot_cb();
1034 :
1035 : Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
1036 9312 : pgStatLocal.snapshot.fixed_valid[kind] = true;
1037 : }
1038 :
1039 :
1040 : /* ------------------------------------------------------------
1041 : * Backend-local pending stats infrastructure
1042 : * ------------------------------------------------------------
1043 : */
1044 :
1045 : /*
1046 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1047 : * stats if not already done.
1048 : *
1049 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1050 : * created, false otherwise.
1051 : */
1052 : PgStat_EntryRef *
1053 2559394 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry)
1054 : {
1055 : PgStat_EntryRef *entry_ref;
1056 :
1057 : /* need to be able to flush out */
1058 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1059 :
1060 2559394 : if (unlikely(!pgStatPendingContext))
1061 : {
1062 22524 : pgStatPendingContext =
1063 22524 : AllocSetContextCreate(CacheMemoryContext,
1064 : "PgStat Pending",
1065 : ALLOCSET_SMALL_SIZES);
1066 : }
1067 :
1068 2559394 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid,
1069 : true, created_entry);
1070 :
1071 2559394 : if (entry_ref->pending == NULL)
1072 : {
1073 1286322 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1074 :
1075 : Assert(entrysize != (size_t) -1);
1076 :
1077 1286322 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1078 1286322 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1079 : }
1080 :
1081 2559394 : return entry_ref;
1082 : }
1083 :
1084 : /*
1085 : * Return an existing stats entry, or NULL.
1086 : *
1087 : * This should only be used for helper function for pgstatfuncs.c - outside of
1088 : * that it shouldn't be needed.
1089 : */
1090 : PgStat_EntryRef *
1091 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
1092 : {
1093 : PgStat_EntryRef *entry_ref;
1094 :
1095 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
1096 :
1097 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1098 30 : return NULL;
1099 :
1100 54 : return entry_ref;
1101 : }
1102 :
1103 : void
1104 1286322 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1105 : {
1106 1286322 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1107 1286322 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1108 1286322 : void *pending_data = entry_ref->pending;
1109 :
1110 : Assert(pending_data != NULL);
1111 : /* !fixed_amount stats should be handled explicitly */
1112 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1113 :
1114 1286322 : if (kind_info->delete_pending_cb)
1115 1215838 : kind_info->delete_pending_cb(entry_ref);
1116 :
1117 1286322 : pfree(pending_data);
1118 1286322 : entry_ref->pending = NULL;
1119 :
1120 1286322 : dlist_delete(&entry_ref->pending_node);
1121 1286322 : }
1122 :
1123 : /*
1124 : * Flush out pending stats for database objects (databases, relations,
1125 : * functions).
1126 : */
1127 : static bool
1128 40958 : pgstat_flush_pending_entries(bool nowait)
1129 : {
1130 40958 : bool have_pending = false;
1131 40958 : dlist_node *cur = NULL;
1132 :
1133 : /*
1134 : * Need to be a bit careful iterating over the list of pending entries.
1135 : * Processing a pending entry may queue further pending entries to the end
1136 : * of the list that we want to process, so a simple iteration won't do.
1137 : * Further complicating matters is that we want to delete the current
1138 : * entry in each iteration from the list if we flushed successfully.
1139 : *
1140 : * So we just keep track of the next pointer in each loop iteration.
1141 : */
1142 40958 : if (!dlist_is_empty(&pgStatPending))
1143 40958 : cur = dlist_head_node(&pgStatPending);
1144 :
1145 1292996 : while (cur)
1146 : {
1147 1252038 : PgStat_EntryRef *entry_ref =
1148 1252038 : dlist_container(PgStat_EntryRef, pending_node, cur);
1149 1252038 : PgStat_HashKey key = entry_ref->shared_entry->key;
1150 1252038 : PgStat_Kind kind = key.kind;
1151 1252038 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1152 : bool did_flush;
1153 : dlist_node *next;
1154 :
1155 : Assert(!kind_info->fixed_amount);
1156 : Assert(kind_info->flush_pending_cb != NULL);
1157 :
1158 : /* flush the stats, if possible */
1159 1252038 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1160 :
1161 : Assert(did_flush || nowait);
1162 :
1163 : /* determine next entry, before deleting the pending entry */
1164 1252038 : if (dlist_has_next(&pgStatPending, cur))
1165 1211080 : next = dlist_next_node(&pgStatPending, cur);
1166 : else
1167 40958 : next = NULL;
1168 :
1169 : /* if successfully flushed, remove entry */
1170 1252038 : if (did_flush)
1171 1252038 : pgstat_delete_pending_entry(entry_ref);
1172 : else
1173 0 : have_pending = true;
1174 :
1175 1252038 : cur = next;
1176 : }
1177 :
1178 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1179 :
1180 40958 : return have_pending;
1181 : }
1182 :
1183 :
1184 : /* ------------------------------------------------------------
1185 : * Helper / infrastructure functions
1186 : * ------------------------------------------------------------
1187 : */
1188 :
1189 : PgStat_Kind
1190 98 : pgstat_get_kind_from_str(char *kind_str)
1191 : {
1192 286 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1193 : {
1194 280 : if (pg_strcasecmp(kind_str, pgstat_kind_infos[kind].name) == 0)
1195 92 : return kind;
1196 : }
1197 :
1198 6 : ereport(ERROR,
1199 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1200 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1201 : return PGSTAT_KIND_DATABASE; /* avoid compiler warnings */
1202 : }
1203 :
1204 : static inline bool
1205 275816 : pgstat_is_kind_valid(int ikind)
1206 : {
1207 275816 : return ikind >= PGSTAT_KIND_FIRST_VALID && ikind <= PGSTAT_KIND_LAST;
1208 : }
1209 :
1210 : const PgStat_KindInfo *
1211 6432266 : pgstat_get_kind_info(PgStat_Kind kind)
1212 : {
1213 : AssertArg(pgstat_is_kind_valid(kind));
1214 :
1215 6432266 : return &pgstat_kind_infos[kind];
1216 : }
1217 :
1218 : /*
1219 : * Stats should only be reported after pgstat_initialize() and before
1220 : * pgstat_shutdown(). This check is put in a few central places to catch
1221 : * violations of this rule more easily.
1222 : */
1223 : #ifdef USE_ASSERT_CHECKING
1224 : void
1225 : pgstat_assert_is_up(void)
1226 : {
1227 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1228 : }
1229 : #endif
1230 :
1231 :
1232 : /* ------------------------------------------------------------
1233 : * reading and writing of on-disk stats file
1234 : * ------------------------------------------------------------
1235 : */
1236 :
1237 : /* helpers for pgstat_write_statsfile() */
1238 : static void
1239 840712 : write_chunk(FILE *fpout, void *ptr, size_t len)
1240 : {
1241 : int rc;
1242 :
1243 840712 : rc = fwrite(ptr, len, 1, fpout);
1244 :
1245 : /* we'll check for errors with ferror once at the end */
1246 : (void) rc;
1247 840712 : }
1248 :
1249 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1250 :
1251 : /*
1252 : * This function is called in the last process that is accessing the shared
1253 : * stats so locking is not required.
1254 : */
1255 : static void
1256 1764 : pgstat_write_statsfile(void)
1257 : {
1258 : FILE *fpout;
1259 : int32 format_id;
1260 1764 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1261 1764 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1262 : dshash_seq_status hstat;
1263 : PgStatShared_HashEntry *ps;
1264 :
1265 : pgstat_assert_is_up();
1266 :
1267 : /* we're shutting down, so it's ok to just override this */
1268 1764 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1269 :
1270 1764 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1271 :
1272 : /*
1273 : * Open the statistics temp file to write out the current values.
1274 : */
1275 1764 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1276 1764 : if (fpout == NULL)
1277 : {
1278 0 : ereport(LOG,
1279 : (errcode_for_file_access(),
1280 : errmsg("could not open temporary statistics file \"%s\": %m",
1281 : tmpfile)));
1282 0 : return;
1283 : }
1284 :
1285 : /*
1286 : * Write the file header --- currently just a format ID.
1287 : */
1288 1764 : format_id = PGSTAT_FILE_FORMAT_ID;
1289 1764 : write_chunk_s(fpout, &format_id);
1290 :
1291 : /*
1292 : * XXX: The following could now be generalized to just iterate over
1293 : * pgstat_kind_infos instead of knowing about the different kinds of
1294 : * stats.
1295 : */
1296 :
1297 : /*
1298 : * Write archiver stats struct
1299 : */
1300 1764 : pgstat_build_snapshot_fixed(PGSTAT_KIND_ARCHIVER);
1301 1764 : write_chunk_s(fpout, &pgStatLocal.snapshot.archiver);
1302 :
1303 : /*
1304 : * Write bgwriter stats struct
1305 : */
1306 1764 : pgstat_build_snapshot_fixed(PGSTAT_KIND_BGWRITER);
1307 1764 : write_chunk_s(fpout, &pgStatLocal.snapshot.bgwriter);
1308 :
1309 : /*
1310 : * Write checkpointer stats struct
1311 : */
1312 1764 : pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
1313 1764 : write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer);
1314 :
1315 : /*
1316 : * Write SLRU stats struct
1317 : */
1318 1764 : pgstat_build_snapshot_fixed(PGSTAT_KIND_SLRU);
1319 1764 : write_chunk_s(fpout, &pgStatLocal.snapshot.slru);
1320 :
1321 : /*
1322 : * Write WAL stats struct
1323 : */
1324 1764 : pgstat_build_snapshot_fixed(PGSTAT_KIND_WAL);
1325 1764 : write_chunk_s(fpout, &pgStatLocal.snapshot.wal);
1326 :
1327 : /*
1328 : * Walk through the stats entries
1329 : */
1330 1764 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1331 416788 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1332 : {
1333 : PgStatShared_Common *shstats;
1334 415024 : const PgStat_KindInfo *kind_info = NULL;
1335 :
1336 415024 : CHECK_FOR_INTERRUPTS();
1337 :
1338 : /* we may have some "dropped" entries not yet removed, skip them */
1339 : Assert(!ps->dropped);
1340 415024 : if (ps->dropped)
1341 0 : continue;
1342 :
1343 415024 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1344 :
1345 415024 : kind_info = pgstat_get_kind_info(ps->key.kind);
1346 :
1347 : /* if not dropped the valid-entry refcount should exist */
1348 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1349 :
1350 415024 : if (!kind_info->to_serialized_name)
1351 : {
1352 : /* normal stats entry, identified by PgStat_HashKey */
1353 414944 : fputc('S', fpout);
1354 414944 : write_chunk_s(fpout, &ps->key);
1355 : }
1356 : else
1357 : {
1358 : /* stats entry identified by name on disk (e.g. slots) */
1359 : NameData name;
1360 :
1361 80 : kind_info->to_serialized_name(shstats, &name);
1362 :
1363 80 : fputc('N', fpout);
1364 80 : write_chunk_s(fpout, &ps->key.kind);
1365 80 : write_chunk_s(fpout, &name);
1366 : }
1367 :
1368 : /* Write except the header part of the entry */
1369 415024 : write_chunk(fpout,
1370 : pgstat_get_entry_data(ps->key.kind, shstats),
1371 : pgstat_get_entry_len(ps->key.kind));
1372 : }
1373 1764 : dshash_seq_term(&hstat);
1374 :
1375 : /*
1376 : * No more output to be done. Close the temp file and replace the old
1377 : * pgstat.stat with it. The ferror() check replaces testing for error
1378 : * after each individual fputc or fwrite (in write_chunk()) above.
1379 : */
1380 1764 : fputc('E', fpout);
1381 :
1382 1764 : if (ferror(fpout))
1383 : {
1384 0 : ereport(LOG,
1385 : (errcode_for_file_access(),
1386 : errmsg("could not write temporary statistics file \"%s\": %m",
1387 : tmpfile)));
1388 0 : FreeFile(fpout);
1389 0 : unlink(tmpfile);
1390 : }
1391 1764 : else if (FreeFile(fpout) < 0)
1392 : {
1393 0 : ereport(LOG,
1394 : (errcode_for_file_access(),
1395 : errmsg("could not close temporary statistics file \"%s\": %m",
1396 : tmpfile)));
1397 0 : unlink(tmpfile);
1398 : }
1399 1764 : else if (rename(tmpfile, statfile) < 0)
1400 : {
1401 0 : ereport(LOG,
1402 : (errcode_for_file_access(),
1403 : errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
1404 : tmpfile, statfile)));
1405 0 : unlink(tmpfile);
1406 : }
1407 : }
1408 :
1409 : /* helpers for pgstat_read_statsfile() */
1410 : static bool
1411 559616 : read_chunk(FILE *fpin, void *ptr, size_t len)
1412 : {
1413 559616 : return fread(ptr, 1, len, fpin) == len;
1414 : }
1415 :
1416 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1417 :
1418 : /*
1419 : * Reads in existing statistics file into the shared stats hash.
1420 : *
1421 : * This function is called in the only process that is accessing the shared
1422 : * stats so locking is not required.
1423 : */
1424 : static void
1425 1882 : pgstat_read_statsfile(void)
1426 : {
1427 : FILE *fpin;
1428 : int32 format_id;
1429 : bool found;
1430 1882 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1431 1882 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1432 :
1433 : /* shouldn't be called from postmaster */
1434 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1435 :
1436 1882 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1437 :
1438 : /*
1439 : * Try to open the stats file. If it doesn't exist, the backends simply
1440 : * returns zero for anything and statistics simply starts from scratch
1441 : * with empty counters.
1442 : *
1443 : * ENOENT is a possibility if stats collection was previously disabled or
1444 : * has not yet written the stats file for the first time. Any other
1445 : * failure condition is suspicious.
1446 : */
1447 1882 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1448 : {
1449 554 : if (errno != ENOENT)
1450 0 : ereport(LOG,
1451 : (errcode_for_file_access(),
1452 : errmsg("could not open statistics file \"%s\": %m",
1453 : statfile)));
1454 554 : pgstat_reset_after_failure();
1455 554 : return;
1456 : }
1457 :
1458 : /*
1459 : * Verify it's of the expected format.
1460 : */
1461 1328 : if (!read_chunk_s(fpin, &format_id) ||
1462 1328 : format_id != PGSTAT_FILE_FORMAT_ID)
1463 2 : goto error;
1464 :
1465 : /*
1466 : * XXX: The following could now be generalized to just iterate over
1467 : * pgstat_kind_infos instead of knowing about the different kinds of
1468 : * stats.
1469 : */
1470 :
1471 : /*
1472 : * Read archiver stats struct
1473 : */
1474 1326 : if (!read_chunk_s(fpin, &shmem->archiver.stats))
1475 0 : goto error;
1476 :
1477 : /*
1478 : * Read bgwriter stats struct
1479 : */
1480 1326 : if (!read_chunk_s(fpin, &shmem->bgwriter.stats))
1481 0 : goto error;
1482 :
1483 : /*
1484 : * Read checkpointer stats struct
1485 : */
1486 1326 : if (!read_chunk_s(fpin, &shmem->checkpointer.stats))
1487 0 : goto error;
1488 :
1489 : /*
1490 : * Read SLRU stats struct
1491 : */
1492 1326 : if (!read_chunk_s(fpin, &shmem->slru.stats))
1493 0 : goto error;
1494 :
1495 : /*
1496 : * Read WAL stats struct
1497 : */
1498 1326 : if (!read_chunk_s(fpin, &shmem->wal.stats))
1499 0 : goto error;
1500 :
1501 : /*
1502 : * We found an existing statistics file. Read it and put all the hash
1503 : * table entries into place.
1504 : */
1505 : for (;;)
1506 275816 : {
1507 277142 : int t = fgetc(fpin);
1508 :
1509 277142 : switch (t)
1510 : {
1511 275816 : case 'S':
1512 : case 'N':
1513 : {
1514 : PgStat_HashKey key;
1515 : PgStatShared_HashEntry *p;
1516 : PgStatShared_Common *header;
1517 :
1518 275816 : CHECK_FOR_INTERRUPTS();
1519 :
1520 275816 : if (t == 'S')
1521 : {
1522 : /* normal stats entry, identified by PgStat_HashKey */
1523 275788 : if (!read_chunk_s(fpin, &key))
1524 0 : goto error;
1525 :
1526 275788 : if (!pgstat_is_kind_valid(key.kind))
1527 0 : goto error;
1528 : }
1529 : else
1530 : {
1531 : /* stats entry identified by name on disk (e.g. slots) */
1532 28 : const PgStat_KindInfo *kind_info = NULL;
1533 : PgStat_Kind kind;
1534 : NameData name;
1535 :
1536 28 : if (!read_chunk_s(fpin, &kind))
1537 0 : goto error;
1538 28 : if (!read_chunk_s(fpin, &name))
1539 0 : goto error;
1540 28 : if (!pgstat_is_kind_valid(kind))
1541 0 : goto error;
1542 :
1543 28 : kind_info = pgstat_get_kind_info(kind);
1544 :
1545 28 : if (!kind_info->from_serialized_name)
1546 0 : goto error;
1547 :
1548 28 : if (!kind_info->from_serialized_name(&name, &key))
1549 : {
1550 : /* skip over data for entry we don't care about */
1551 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1552 0 : goto error;
1553 :
1554 2 : continue;
1555 : }
1556 :
1557 : Assert(key.kind == kind);
1558 : }
1559 :
1560 : /*
1561 : * This intentionally doesn't use pgstat_get_entry_ref() -
1562 : * putting all stats into checkpointer's
1563 : * pgStatEntryRefHash would be wasted effort and memory.
1564 : */
1565 275814 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1566 :
1567 : /* don't allow duplicate entries */
1568 275814 : if (found)
1569 : {
1570 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1571 0 : elog(WARNING, "found duplicate stats entry %d/%u/%u",
1572 : key.kind, key.dboid, key.objoid);
1573 0 : goto error;
1574 : }
1575 :
1576 275814 : header = pgstat_init_entry(key.kind, p);
1577 275814 : dshash_release_lock(pgStatLocal.shared_hash, p);
1578 :
1579 275814 : if (!read_chunk(fpin,
1580 : pgstat_get_entry_data(key.kind, header),
1581 : pgstat_get_entry_len(key.kind)))
1582 0 : goto error;
1583 :
1584 275814 : break;
1585 : }
1586 1326 : case 'E':
1587 : /* check that 'E' actually signals end of file */
1588 1326 : if (fgetc(fpin) != EOF)
1589 2 : goto error;
1590 :
1591 1324 : goto done;
1592 :
1593 0 : default:
1594 0 : goto error;
1595 : }
1596 : }
1597 :
1598 1328 : done:
1599 1328 : FreeFile(fpin);
1600 :
1601 1328 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
1602 1328 : unlink(statfile);
1603 :
1604 1328 : return;
1605 :
1606 4 : error:
1607 4 : ereport(LOG,
1608 : (errmsg("corrupted statistics file \"%s\"", statfile)));
1609 :
1610 4 : pgstat_reset_after_failure();
1611 :
1612 4 : goto done;
1613 : }
1614 :
1615 : /*
1616 : * Helper to reset / drop stats after a crash or after restoring stats from
1617 : * disk failed, potentially after already loading parts.
1618 : */
1619 : static void
1620 810 : pgstat_reset_after_failure(void)
1621 : {
1622 810 : TimestampTz ts = GetCurrentTimestamp();
1623 :
1624 : /* reset fixed-numbered stats */
1625 8910 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
1626 : {
1627 8100 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1628 :
1629 8100 : if (!kind_info->fixed_amount)
1630 4050 : continue;
1631 :
1632 4050 : kind_info->reset_all_cb(ts);
1633 : }
1634 :
1635 : /* and drop variable-numbered ones */
1636 810 : pgstat_drop_all_entries();
1637 810 : }
|