Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down, except when shutting down in immediate mode.
16 : *
17 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
18 : *
19 : * Statistics for variable-numbered objects are stored in dynamic shared
20 : * memory and can be found via a dshash hashtable. The statistics counters are
21 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 : * separately allocated (PgStatShared_HashEntry->body). The separate
23 : * allocation allows different kinds of statistics to be stored in the same
24 : * hashtable without wasting space in PgStatShared_HashEntry.
25 : *
26 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 : * is not possible to have statistics for an object that cannot be addressed
28 : * that way at runtime. A wider identifier can be used when serializing to
29 : * disk (used for replication slot stats).
30 : *
31 : * To avoid contention on the shared hashtable, each backend has a
32 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 : * entries. The shared hashtable only needs to be accessed when no prior
35 : * reference is found in the local hashtable. Besides pointing to the
36 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 : * contains a pointer to the shared statistics data, as a process-local
38 : * address, to reduce access costs.
39 : *
40 : * The names for structs stored in shared memory are prefixed with
41 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 : * protected by a dedicated lwlock.
43 : *
44 : * Most stats updates are first accumulated locally in each process as pending
45 : * entries, then later flushed to shared memory (just after commit, or by
46 : * idle-timeout). This practically eliminates contention on individual stats
47 : * entries. For most kinds of variable-numbered pending stats data is stored
48 : * in PgStat_EntryRef->pending. All entries with pending data are in the
49 : * pgStatPending list. Pending statistics updates are flushed out by
50 : * pgstat_report_stat().
51 : *
52 : * It is possible for external modules to define custom statistics kinds,
53 : * that can use the same properties as any built-in stats kinds. Each custom
54 : * stats kind needs to assign a unique ID to ensure that it does not overlap
55 : * with other extensions. In order to reserve a unique stats kind ID, refer
56 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
57 : *
58 : * The behavior of different kinds of statistics is determined by the kind's
59 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
60 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
61 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
62 : *
63 : * The consistency of read accesses to statistics can be configured using the
64 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
65 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
66 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
67 : * pgStatLocal.snapshot.
68 : *
69 : * To keep things manageable, stats handling is split across several
70 : * files. Infrastructure pieces are in:
71 : * - pgstat.c - this file, to tie it all together
72 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
73 : * the maintenance of hashtable entries
74 : * - pgstat_xact.c - transactional integration, including the transactional
75 : * creation and dropping of stats entries
76 : *
77 : * Each statistics kind is handled in a dedicated file:
78 : * - pgstat_archiver.c
79 : * - pgstat_bgwriter.c
80 : * - pgstat_checkpointer.c
81 : * - pgstat_database.c
82 : * - pgstat_function.c
83 : * - pgstat_io.c
84 : * - pgstat_relation.c
85 : * - pgstat_replslot.c
86 : * - pgstat_slru.c
87 : * - pgstat_subscription.c
88 : * - pgstat_wal.c
89 : *
90 : * Whenever possible infrastructure files should not contain code related to
91 : * specific kinds of stats.
92 : *
93 : *
94 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
95 : *
96 : * IDENTIFICATION
97 : * src/backend/utils/activity/pgstat.c
98 : * ----------
99 : */
100 : #include "postgres.h"
101 :
102 : #include <unistd.h>
103 :
104 : #include "access/xact.h"
105 : #include "access/xlog.h"
106 : #include "lib/dshash.h"
107 : #include "pgstat.h"
108 : #include "storage/fd.h"
109 : #include "storage/ipc.h"
110 : #include "storage/lwlock.h"
111 : #include "utils/guc_hooks.h"
112 : #include "utils/memutils.h"
113 : #include "utils/pgstat_internal.h"
114 : #include "utils/timestamp.h"
115 :
116 :
117 : /* ----------
118 : * Timer definitions.
119 : *
120 : * In milliseconds.
121 : * ----------
122 : */
123 :
124 : /* minimum interval non-forced stats flushes.*/
125 : #define PGSTAT_MIN_INTERVAL 1000
126 : /* how long until to block flushing pending stats updates */
127 : #define PGSTAT_MAX_INTERVAL 60000
128 : /* when to call pgstat_report_stat() again, even when idle */
129 : #define PGSTAT_IDLE_INTERVAL 10000
130 :
131 : /* ----------
132 : * Initial size hints for the hash tables used in statistics.
133 : * ----------
134 : */
135 :
136 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
137 :
138 : /* ---------
139 : * Identifiers in stats file.
140 : * ---------
141 : */
142 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
143 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
144 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
145 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
146 : * PgStat_HashKey */
147 :
148 : /* hash table for statistics snapshots entry */
149 : typedef struct PgStat_SnapshotEntry
150 : {
151 : PgStat_HashKey key;
152 : char status; /* for simplehash use */
153 : void *data; /* the stats data itself */
154 : } PgStat_SnapshotEntry;
155 :
156 :
157 : /* ----------
158 : * Backend-local Hash Table Definitions
159 : * ----------
160 : */
161 :
162 : /* for stats snapshot entries */
163 : #define SH_PREFIX pgstat_snapshot
164 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
165 : #define SH_KEY_TYPE PgStat_HashKey
166 : #define SH_KEY key
167 : #define SH_HASH_KEY(tb, key) \
168 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
169 : #define SH_EQUAL(tb, a, b) \
170 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
171 : #define SH_SCOPE static inline
172 : #define SH_DEFINE
173 : #define SH_DECLARE
174 : #include "lib/simplehash.h"
175 :
176 :
177 : /* ----------
178 : * Local function forward declarations
179 : * ----------
180 : */
181 :
182 : static void pgstat_write_statsfile(XLogRecPtr redo);
183 : static void pgstat_read_statsfile(XLogRecPtr redo);
184 :
185 : static void pgstat_init_snapshot_fixed(void);
186 :
187 : static void pgstat_reset_after_failure(void);
188 :
189 : static bool pgstat_flush_pending_entries(bool nowait);
190 :
191 : static void pgstat_prep_snapshot(void);
192 : static void pgstat_build_snapshot(void);
193 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
194 :
195 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
196 :
197 :
198 : /* ----------
199 : * GUC parameters
200 : * ----------
201 : */
202 :
203 : bool pgstat_track_counts = false;
204 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
205 :
206 :
207 : /* ----------
208 : * state shared with pgstat_*.c
209 : * ----------
210 : */
211 :
212 : PgStat_LocalState pgStatLocal;
213 :
214 :
215 : /* ----------
216 : * Local data
217 : *
218 : * NB: There should be only variables related to stats infrastructure here,
219 : * not for specific kinds of stats.
220 : * ----------
221 : */
222 :
223 : /*
224 : * Memory contexts containing the pgStatEntryRefHash table, the
225 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
226 : * easier to track / attribute memory usage.
227 : */
228 :
229 : static MemoryContext pgStatPendingContext = NULL;
230 :
231 : /*
232 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
233 : *
234 : * Newly pending entries should only ever be added to the end of the list,
235 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
236 : */
237 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
238 :
239 :
240 : /*
241 : * Force the next stats flush to happen regardless of
242 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
243 : */
244 : static bool pgStatForceNextFlush = false;
245 :
246 : /*
247 : * Force-clear existing snapshot before next use when stats_fetch_consistency
248 : * is changed.
249 : */
250 : static bool force_stats_snapshot_clear = false;
251 :
252 :
253 : /*
254 : * For assertions that check pgstat is not used before initialization / after
255 : * shutdown.
256 : */
257 : #ifdef USE_ASSERT_CHECKING
258 : static bool pgstat_is_initialized = false;
259 : static bool pgstat_is_shutdown = false;
260 : #endif
261 :
262 :
263 : /*
264 : * The different kinds of built-in statistics.
265 : *
266 : * If reasonably possible, handling specific to one kind of stats should go
267 : * through this abstraction, rather than making more of pgstat.c aware.
268 : *
269 : * See comments for struct PgStat_KindInfo for details about the individual
270 : * fields.
271 : *
272 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
273 : * seem to be a great way of doing that, given the split across multiple
274 : * files.
275 : */
276 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
277 :
278 : /* stats kinds for variable-numbered objects */
279 :
280 : [PGSTAT_KIND_DATABASE] = {
281 : .name = "database",
282 :
283 : .fixed_amount = false,
284 : /* so pg_stat_database entries can be seen in all databases */
285 : .accessed_across_databases = true,
286 :
287 : .shared_size = sizeof(PgStatShared_Database),
288 : .shared_data_off = offsetof(PgStatShared_Database, stats),
289 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
290 : .pending_size = sizeof(PgStat_StatDBEntry),
291 :
292 : .flush_pending_cb = pgstat_database_flush_cb,
293 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
294 : },
295 :
296 : [PGSTAT_KIND_RELATION] = {
297 : .name = "relation",
298 :
299 : .fixed_amount = false,
300 :
301 : .shared_size = sizeof(PgStatShared_Relation),
302 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
303 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
304 : .pending_size = sizeof(PgStat_TableStatus),
305 :
306 : .flush_pending_cb = pgstat_relation_flush_cb,
307 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
308 : },
309 :
310 : [PGSTAT_KIND_FUNCTION] = {
311 : .name = "function",
312 :
313 : .fixed_amount = false,
314 :
315 : .shared_size = sizeof(PgStatShared_Function),
316 : .shared_data_off = offsetof(PgStatShared_Function, stats),
317 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
318 : .pending_size = sizeof(PgStat_FunctionCounts),
319 :
320 : .flush_pending_cb = pgstat_function_flush_cb,
321 : },
322 :
323 : [PGSTAT_KIND_REPLSLOT] = {
324 : .name = "replslot",
325 :
326 : .fixed_amount = false,
327 :
328 : .accessed_across_databases = true,
329 :
330 : .shared_size = sizeof(PgStatShared_ReplSlot),
331 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
332 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
333 :
334 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
335 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
336 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
337 : },
338 :
339 : [PGSTAT_KIND_SUBSCRIPTION] = {
340 : .name = "subscription",
341 :
342 : .fixed_amount = false,
343 : /* so pg_stat_subscription_stats entries can be seen in all databases */
344 : .accessed_across_databases = true,
345 :
346 : .shared_size = sizeof(PgStatShared_Subscription),
347 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
348 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
349 : .pending_size = sizeof(PgStat_BackendSubEntry),
350 :
351 : .flush_pending_cb = pgstat_subscription_flush_cb,
352 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
353 : },
354 :
355 :
356 : /* stats for fixed-numbered (mostly 1) objects */
357 :
358 : [PGSTAT_KIND_ARCHIVER] = {
359 : .name = "archiver",
360 :
361 : .fixed_amount = true,
362 :
363 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
364 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
365 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
366 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
367 :
368 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
369 : .reset_all_cb = pgstat_archiver_reset_all_cb,
370 : .snapshot_cb = pgstat_archiver_snapshot_cb,
371 : },
372 :
373 : [PGSTAT_KIND_BGWRITER] = {
374 : .name = "bgwriter",
375 :
376 : .fixed_amount = true,
377 :
378 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
379 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
380 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
381 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
382 :
383 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
384 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
385 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
386 : },
387 :
388 : [PGSTAT_KIND_CHECKPOINTER] = {
389 : .name = "checkpointer",
390 :
391 : .fixed_amount = true,
392 :
393 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
394 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
395 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
396 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
397 :
398 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
399 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
400 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
401 : },
402 :
403 : [PGSTAT_KIND_IO] = {
404 : .name = "io",
405 :
406 : .fixed_amount = true,
407 :
408 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
409 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
410 : .shared_data_off = offsetof(PgStatShared_IO, stats),
411 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
412 :
413 : .flush_fixed_cb = pgstat_io_flush_cb,
414 : .have_fixed_pending_cb = pgstat_io_have_pending_cb,
415 : .init_shmem_cb = pgstat_io_init_shmem_cb,
416 : .reset_all_cb = pgstat_io_reset_all_cb,
417 : .snapshot_cb = pgstat_io_snapshot_cb,
418 : },
419 :
420 : [PGSTAT_KIND_SLRU] = {
421 : .name = "slru",
422 :
423 : .fixed_amount = true,
424 :
425 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
426 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
427 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
428 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
429 :
430 : .flush_fixed_cb = pgstat_slru_flush_cb,
431 : .have_fixed_pending_cb = pgstat_slru_have_pending_cb,
432 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
433 : .reset_all_cb = pgstat_slru_reset_all_cb,
434 : .snapshot_cb = pgstat_slru_snapshot_cb,
435 : },
436 :
437 : [PGSTAT_KIND_WAL] = {
438 : .name = "wal",
439 :
440 : .fixed_amount = true,
441 :
442 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
443 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
444 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
445 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
446 :
447 : .init_backend_cb = pgstat_wal_init_backend_cb,
448 : .flush_fixed_cb = pgstat_wal_flush_cb,
449 : .have_fixed_pending_cb = pgstat_wal_have_pending_cb,
450 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
451 : .reset_all_cb = pgstat_wal_reset_all_cb,
452 : .snapshot_cb = pgstat_wal_snapshot_cb,
453 : },
454 : };
455 :
456 : /*
457 : * Information about custom statistics kinds.
458 : *
459 : * These are saved in a different array than the built-in kinds to save
460 : * in clarity with the initializations.
461 : *
462 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
463 : */
464 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
465 :
466 : /* ------------------------------------------------------------
467 : * Functions managing the state of the stats system for all backends.
468 : * ------------------------------------------------------------
469 : */
470 :
471 : /*
472 : * Read on-disk stats into memory at server start.
473 : *
474 : * Should only be called by the startup process or in single user mode.
475 : */
476 : void
477 1294 : pgstat_restore_stats(XLogRecPtr redo)
478 : {
479 1294 : pgstat_read_statsfile(redo);
480 1294 : }
481 :
482 : /*
483 : * Remove the stats file. This is currently used only if WAL recovery is
484 : * needed after a crash.
485 : *
486 : * Should only be called by the startup process or in single user mode.
487 : */
488 : void
489 340 : pgstat_discard_stats(void)
490 : {
491 : int ret;
492 :
493 : /* NB: this needs to be done even in single user mode */
494 :
495 340 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
496 340 : if (ret != 0)
497 : {
498 338 : if (errno == ENOENT)
499 338 : elog(DEBUG2,
500 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
501 : PGSTAT_STAT_PERMANENT_FILENAME);
502 : else
503 0 : ereport(LOG,
504 : (errcode_for_file_access(),
505 : errmsg("could not unlink permanent statistics file \"%s\": %m",
506 : PGSTAT_STAT_PERMANENT_FILENAME)));
507 : }
508 : else
509 : {
510 2 : ereport(DEBUG2,
511 : (errcode_for_file_access(),
512 : errmsg_internal("unlinked permanent statistics file \"%s\"",
513 : PGSTAT_STAT_PERMANENT_FILENAME)));
514 : }
515 :
516 : /*
517 : * Reset stats contents. This will set reset timestamps of fixed-numbered
518 : * stats to the current time (no variable stats exist).
519 : */
520 340 : pgstat_reset_after_failure();
521 340 : }
522 :
523 : /*
524 : * pgstat_before_server_shutdown() needs to be called by exactly one process
525 : * during regular server shutdowns. Otherwise all stats will be lost.
526 : *
527 : * We currently only write out stats for proc_exit(0). We might want to change
528 : * that at some point... But right now pgstat_discard_stats() would be called
529 : * during the start after a disorderly shutdown, anyway.
530 : */
531 : void
532 1092 : pgstat_before_server_shutdown(int code, Datum arg)
533 : {
534 : Assert(pgStatLocal.shmem != NULL);
535 : Assert(!pgStatLocal.shmem->is_shutdown);
536 :
537 : /*
538 : * Stats should only be reported after pgstat_initialize() and before
539 : * pgstat_shutdown(). This is a convenient point to catch most violations
540 : * of this rule.
541 : */
542 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
543 :
544 : /* flush out our own pending changes before writing out */
545 1092 : pgstat_report_stat(true);
546 :
547 : /*
548 : * Only write out file during normal shutdown. Don't even signal that
549 : * we've shutdown during irregular shutdowns, because the shutdown
550 : * sequence isn't coordinated to ensure this backend shuts down last.
551 : */
552 1092 : if (code == 0)
553 : {
554 1082 : pgStatLocal.shmem->is_shutdown = true;
555 1082 : pgstat_write_statsfile(GetRedoRecPtr());
556 : }
557 1092 : }
558 :
559 :
560 : /* ------------------------------------------------------------
561 : * Backend initialization / shutdown functions
562 : * ------------------------------------------------------------
563 : */
564 :
565 : /*
566 : * Shut down a single backend's statistics reporting at process exit.
567 : *
568 : * Flush out any remaining statistics counts. Without this, operations
569 : * triggered during backend exit (such as temp table deletions) won't be
570 : * counted.
571 : */
572 : static void
573 35576 : pgstat_shutdown_hook(int code, Datum arg)
574 : {
575 : Assert(!pgstat_is_shutdown);
576 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
577 :
578 : /*
579 : * If we got as far as discovering our own database ID, we can flush out
580 : * what we did so far. Otherwise, we'd be reporting an invalid database
581 : * ID, so forget it. (This means that accesses to pg_database during
582 : * failed backend starts might never get counted.)
583 : */
584 35576 : if (OidIsValid(MyDatabaseId))
585 28606 : pgstat_report_disconnect(MyDatabaseId);
586 :
587 35576 : pgstat_report_stat(true);
588 :
589 : /* there shouldn't be any pending changes left */
590 : Assert(dlist_is_empty(&pgStatPending));
591 35576 : dlist_init(&pgStatPending);
592 :
593 35576 : pgstat_detach_shmem();
594 :
595 : #ifdef USE_ASSERT_CHECKING
596 : pgstat_is_shutdown = true;
597 : #endif
598 35576 : }
599 :
600 : /*
601 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
602 : * BaseInit().
603 : *
604 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
605 : */
606 : void
607 35576 : pgstat_initialize(void)
608 : {
609 : Assert(!pgstat_is_initialized);
610 :
611 35576 : pgstat_attach_shmem();
612 :
613 35576 : pgstat_init_snapshot_fixed();
614 :
615 : /* Backend initialization callbacks */
616 9143032 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
617 : {
618 9107456 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
619 :
620 9107456 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
621 9071880 : continue;
622 :
623 35576 : kind_info->init_backend_cb();
624 : }
625 :
626 : /* Set up a process-exit hook to clean up */
627 35576 : before_shmem_exit(pgstat_shutdown_hook, 0);
628 :
629 : #ifdef USE_ASSERT_CHECKING
630 : pgstat_is_initialized = true;
631 : #endif
632 35576 : }
633 :
634 :
635 : /* ------------------------------------------------------------
636 : * Public functions used by backends follow
637 : * ------------------------------------------------------------
638 : */
639 :
640 : /*
641 : * Must be called by processes that performs DML: tcop/postgres.c, logical
642 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
643 : * shared memory.
644 : *
645 : * Unless called with 'force', pending stats updates are flushed happen once
646 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
647 : * block on lock acquisition, except if stats updates have been pending for
648 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
649 : *
650 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
651 : * suggested idle timeout is returned. Currently this is always
652 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
653 : * a timeout after which to call pgstat_report_stat(true), but are not
654 : * required to do so.
655 : *
656 : * Note that this is called only when not within a transaction, so it is fair
657 : * to use transaction stop time as an approximation of current time.
658 : */
659 : long
660 559004 : pgstat_report_stat(bool force)
661 : {
662 : static TimestampTz pending_since = 0;
663 : static TimestampTz last_flush = 0;
664 : bool partial_flush;
665 : TimestampTz now;
666 : bool nowait;
667 :
668 : pgstat_assert_is_up();
669 : Assert(!IsTransactionOrTransactionBlock());
670 :
671 : /* "absorb" the forced flush even if there's nothing to flush */
672 559004 : if (pgStatForceNextFlush)
673 : {
674 420 : force = true;
675 420 : pgStatForceNextFlush = false;
676 : }
677 :
678 : /* Don't expend a clock check if nothing to do */
679 559004 : if (dlist_is_empty(&pgStatPending))
680 : {
681 13266 : bool do_flush = false;
682 :
683 : /* Check for pending fixed-numbered stats */
684 2681006 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
685 : {
686 2670686 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
687 :
688 2670686 : if (!kind_info)
689 2528364 : continue;
690 142322 : if (!kind_info->fixed_amount)
691 : {
692 : Assert(kind_info->have_fixed_pending_cb == NULL);
693 66348 : continue;
694 : }
695 75974 : if (!kind_info->have_fixed_pending_cb)
696 39816 : continue;
697 :
698 36158 : if (kind_info->have_fixed_pending_cb())
699 : {
700 2946 : do_flush = true;
701 2946 : break;
702 : }
703 : }
704 :
705 13266 : if (!do_flush)
706 : {
707 : Assert(pending_since == 0);
708 10320 : return 0;
709 : }
710 : }
711 :
712 : /*
713 : * There should never be stats to report once stats are shut down. Can't
714 : * assert that before the checks above, as there is an unconditional
715 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
716 : * the process that ran pgstat_before_server_shutdown() will still call.
717 : */
718 : Assert(!pgStatLocal.shmem->is_shutdown);
719 :
720 548684 : if (force)
721 : {
722 : /*
723 : * Stats reports are forced either when it's been too long since stats
724 : * have been reported or in processes that force stats reporting to
725 : * happen at specific points (including shutdown). In the former case
726 : * the transaction stop time might be quite old, in the latter it
727 : * would never get cleared.
728 : */
729 35610 : now = GetCurrentTimestamp();
730 : }
731 : else
732 : {
733 513074 : now = GetCurrentTransactionStopTimestamp();
734 :
735 975066 : if (pending_since > 0 &&
736 461992 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
737 : {
738 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
739 0 : force = true;
740 : }
741 513074 : else if (last_flush > 0 &&
742 488200 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
743 : {
744 : /* don't flush too frequently */
745 486494 : if (pending_since == 0)
746 25938 : pending_since = now;
747 :
748 486494 : return PGSTAT_IDLE_INTERVAL;
749 : }
750 : }
751 :
752 62190 : pgstat_update_dbstats(now);
753 :
754 : /* don't wait for lock acquisition when !force */
755 62190 : nowait = !force;
756 :
757 62190 : partial_flush = false;
758 :
759 : /* flush database / relation / function / ... stats */
760 62190 : partial_flush |= pgstat_flush_pending_entries(nowait);
761 :
762 : /* flush of fixed-numbered stats */
763 15982830 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
764 : {
765 15920640 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
766 :
767 15920640 : if (!kind_info)
768 15236258 : continue;
769 684382 : if (!kind_info->fixed_amount)
770 : {
771 : Assert(kind_info->flush_fixed_cb == NULL);
772 311096 : continue;
773 : }
774 373286 : if (!kind_info->flush_fixed_cb)
775 186716 : continue;
776 :
777 186570 : partial_flush |= kind_info->flush_fixed_cb(nowait);
778 : }
779 :
780 62190 : last_flush = now;
781 :
782 : /*
783 : * If some of the pending stats could not be flushed due to lock
784 : * contention, let the caller know when to retry.
785 : */
786 62190 : if (partial_flush)
787 : {
788 : /* force should have prevented us from getting here */
789 : Assert(!force);
790 :
791 : /* remember since when stats have been pending */
792 0 : if (pending_since == 0)
793 0 : pending_since = now;
794 :
795 0 : return PGSTAT_IDLE_INTERVAL;
796 : }
797 :
798 62190 : pending_since = 0;
799 :
800 62190 : return 0;
801 : }
802 :
803 : /*
804 : * Force locally pending stats to be flushed during the next
805 : * pgstat_report_stat() call. This is useful for writing tests.
806 : */
807 : void
808 420 : pgstat_force_next_flush(void)
809 : {
810 420 : pgStatForceNextFlush = true;
811 420 : }
812 :
813 : /*
814 : * Only for use by pgstat_reset_counters()
815 : */
816 : static bool
817 22086 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
818 : {
819 22086 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
820 : }
821 :
822 : /*
823 : * Reset counters for our database.
824 : *
825 : * Permission checking for this function is managed through the normal
826 : * GRANT system.
827 : */
828 : void
829 26 : pgstat_reset_counters(void)
830 : {
831 26 : TimestampTz ts = GetCurrentTimestamp();
832 :
833 26 : pgstat_reset_matching_entries(match_db_entries,
834 : ObjectIdGetDatum(MyDatabaseId),
835 : ts);
836 26 : }
837 :
838 : /*
839 : * Reset a single variable-numbered entry.
840 : *
841 : * If the stats kind is within a database, also reset the database's
842 : * stat_reset_timestamp.
843 : *
844 : * Permission checking for this function is managed through the normal
845 : * GRANT system.
846 : */
847 : void
848 38 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
849 : {
850 38 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
851 38 : TimestampTz ts = GetCurrentTimestamp();
852 :
853 : /* not needed atm, and doesn't make sense with the current signature */
854 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
855 :
856 : /* reset the "single counter" */
857 38 : pgstat_reset_entry(kind, dboid, objid, ts);
858 :
859 38 : if (!kind_info->accessed_across_databases)
860 16 : pgstat_reset_database_timestamp(dboid, ts);
861 38 : }
862 :
863 : /*
864 : * Reset stats for all entries of a kind.
865 : *
866 : * Permission checking for this function is managed through the normal
867 : * GRANT system.
868 : */
869 : void
870 54 : pgstat_reset_of_kind(PgStat_Kind kind)
871 : {
872 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
873 54 : TimestampTz ts = GetCurrentTimestamp();
874 :
875 54 : if (kind_info->fixed_amount)
876 46 : kind_info->reset_all_cb(ts);
877 : else
878 8 : pgstat_reset_entries_of_kind(kind, ts);
879 54 : }
880 :
881 :
882 : /* ------------------------------------------------------------
883 : * Fetching of stats
884 : * ------------------------------------------------------------
885 : */
886 :
887 : /*
888 : * Discard any data collected in the current transaction. Any subsequent
889 : * request will cause new snapshots to be read.
890 : *
891 : * This is also invoked during transaction commit or abort to discard
892 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
893 : * cause this routine to be called.
894 : */
895 : void
896 749336 : pgstat_clear_snapshot(void)
897 : {
898 : pgstat_assert_is_up();
899 :
900 749336 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
901 : sizeof(pgStatLocal.snapshot.fixed_valid));
902 749336 : memset(&pgStatLocal.snapshot.custom_valid, 0,
903 : sizeof(pgStatLocal.snapshot.custom_valid));
904 749336 : pgStatLocal.snapshot.stats = NULL;
905 749336 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
906 :
907 : /* Release memory, if any was allocated */
908 749336 : if (pgStatLocal.snapshot.context)
909 : {
910 938 : MemoryContextDelete(pgStatLocal.snapshot.context);
911 :
912 : /* Reset variables */
913 938 : pgStatLocal.snapshot.context = NULL;
914 : }
915 :
916 : /*
917 : * Historically the backend_status.c facilities lived in this file, and
918 : * were reset with the same function. For now keep it that way, and
919 : * forward the reset request.
920 : */
921 749336 : pgstat_clear_backend_activity_snapshot();
922 :
923 : /* Reset this flag, as it may be possible that a cleanup was forced. */
924 749336 : force_stats_snapshot_clear = false;
925 749336 : }
926 :
927 : void *
928 209250 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
929 : {
930 : PgStat_HashKey key;
931 : PgStat_EntryRef *entry_ref;
932 : void *stats_data;
933 209250 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
934 :
935 : /* should be called from backends */
936 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
937 : Assert(!kind_info->fixed_amount);
938 :
939 209250 : pgstat_prep_snapshot();
940 :
941 : /* clear padding */
942 209250 : memset(&key, 0, sizeof(struct PgStat_HashKey));
943 :
944 209250 : key.kind = kind;
945 209250 : key.dboid = dboid;
946 209250 : key.objid = objid;
947 :
948 : /* if we need to build a full snapshot, do so */
949 209250 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
950 460 : pgstat_build_snapshot();
951 :
952 : /* if caching is desired, look up in cache */
953 209250 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
954 : {
955 9498 : PgStat_SnapshotEntry *entry = NULL;
956 :
957 9498 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
958 :
959 9498 : if (entry)
960 620 : return entry->data;
961 :
962 : /*
963 : * If we built a full snapshot and the key is not in
964 : * pgStatLocal.snapshot.stats, there are no matching stats.
965 : */
966 8878 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
967 28 : return NULL;
968 : }
969 :
970 208602 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
971 :
972 208602 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
973 :
974 208602 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
975 : {
976 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
977 9878 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
978 : {
979 1844 : PgStat_SnapshotEntry *entry = NULL;
980 : bool found;
981 :
982 1844 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
983 : Assert(!found);
984 1844 : entry->data = NULL;
985 : }
986 9878 : return NULL;
987 : }
988 :
989 : /*
990 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
991 : * otherwise we could quickly end up with a fair bit of memory used due to
992 : * repeated accesses.
993 : */
994 198724 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
995 191718 : stats_data = palloc(kind_info->shared_data_len);
996 : else
997 7006 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
998 7006 : kind_info->shared_data_len);
999 :
1000 198724 : pgstat_lock_entry_shared(entry_ref, false);
1001 397448 : memcpy(stats_data,
1002 198724 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1003 198724 : kind_info->shared_data_len);
1004 198724 : pgstat_unlock_entry(entry_ref);
1005 :
1006 198724 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1007 : {
1008 7006 : PgStat_SnapshotEntry *entry = NULL;
1009 : bool found;
1010 :
1011 7006 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1012 7006 : entry->data = stats_data;
1013 : }
1014 :
1015 198724 : return stats_data;
1016 : }
1017 :
1018 : /*
1019 : * If a stats snapshot has been taken, return the timestamp at which that was
1020 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1021 : * false.
1022 : */
1023 : TimestampTz
1024 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1025 : {
1026 60 : if (force_stats_snapshot_clear)
1027 18 : pgstat_clear_snapshot();
1028 :
1029 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1030 : {
1031 24 : *have_snapshot = true;
1032 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1033 : }
1034 :
1035 36 : *have_snapshot = false;
1036 :
1037 36 : return 0;
1038 : }
1039 :
1040 : bool
1041 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1042 : {
1043 : /* fixed-numbered stats always exist */
1044 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1045 12 : return true;
1046 :
1047 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1048 : }
1049 :
1050 : /*
1051 : * Ensure snapshot for fixed-numbered 'kind' exists.
1052 : *
1053 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1054 : * massaging the data into the desired format.
1055 : */
1056 : void
1057 432 : pgstat_snapshot_fixed(PgStat_Kind kind)
1058 : {
1059 : Assert(pgstat_is_kind_valid(kind));
1060 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1061 :
1062 432 : if (force_stats_snapshot_clear)
1063 0 : pgstat_clear_snapshot();
1064 :
1065 432 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1066 24 : pgstat_build_snapshot();
1067 : else
1068 408 : pgstat_build_snapshot_fixed(kind);
1069 :
1070 432 : if (pgstat_is_kind_builtin(kind))
1071 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1072 8 : else if (pgstat_is_kind_custom(kind))
1073 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1074 432 : }
1075 :
1076 : static void
1077 35576 : pgstat_init_snapshot_fixed(void)
1078 : {
1079 : /*
1080 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1081 : * stats kinds.
1082 : */
1083 4624880 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1084 : {
1085 4589304 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1086 :
1087 4589304 : if (!kind_info || !kind_info->fixed_amount)
1088 4589206 : continue;
1089 :
1090 98 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1091 98 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1092 : }
1093 35576 : }
1094 :
1095 : static void
1096 209302 : pgstat_prep_snapshot(void)
1097 : {
1098 209302 : if (force_stats_snapshot_clear)
1099 18 : pgstat_clear_snapshot();
1100 :
1101 209302 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1102 9550 : pgStatLocal.snapshot.stats != NULL)
1103 208364 : return;
1104 :
1105 938 : if (!pgStatLocal.snapshot.context)
1106 938 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1107 : "PgStat Snapshot",
1108 : ALLOCSET_SMALL_SIZES);
1109 :
1110 938 : pgStatLocal.snapshot.stats =
1111 938 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1112 : PGSTAT_SNAPSHOT_HASH_SIZE,
1113 : NULL);
1114 : }
1115 :
1116 : static void
1117 484 : pgstat_build_snapshot(void)
1118 : {
1119 : dshash_seq_status hstat;
1120 : PgStatShared_HashEntry *p;
1121 :
1122 : /* should only be called when we need a snapshot */
1123 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1124 :
1125 : /* snapshot already built */
1126 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1127 432 : return;
1128 :
1129 52 : pgstat_prep_snapshot();
1130 :
1131 : Assert(pgStatLocal.snapshot.stats->members == 0);
1132 :
1133 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1134 :
1135 : /*
1136 : * Snapshot all variable stats.
1137 : */
1138 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1139 52598 : while ((p = dshash_seq_next(&hstat)) != NULL)
1140 : {
1141 52546 : PgStat_Kind kind = p->key.kind;
1142 52546 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1143 : bool found;
1144 : PgStat_SnapshotEntry *entry;
1145 : PgStatShared_Common *stats_data;
1146 :
1147 : /*
1148 : * Check if the stats object should be included in the snapshot.
1149 : * Unless the stats kind can be accessed from all databases (e.g.,
1150 : * database stats themselves), we only include stats for the current
1151 : * database or objects not associated with a database (e.g. shared
1152 : * relations).
1153 : */
1154 52546 : if (p->key.dboid != MyDatabaseId &&
1155 15420 : p->key.dboid != InvalidOid &&
1156 12872 : !kind_info->accessed_across_databases)
1157 12972 : continue;
1158 :
1159 39778 : if (p->dropped)
1160 204 : continue;
1161 :
1162 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1163 :
1164 39574 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1165 : Assert(stats_data);
1166 :
1167 39574 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1168 : Assert(!found);
1169 :
1170 79148 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1171 39574 : kind_info->shared_size);
1172 :
1173 : /*
1174 : * Acquire the LWLock directly instead of using
1175 : * pg_stat_lock_entry_shared() which requires a reference.
1176 : */
1177 39574 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1178 79148 : memcpy(entry->data,
1179 39574 : pgstat_get_entry_data(kind, stats_data),
1180 39574 : kind_info->shared_size);
1181 39574 : LWLockRelease(&stats_data->lock);
1182 : }
1183 52 : dshash_seq_term(&hstat);
1184 :
1185 : /*
1186 : * Build snapshot of all fixed-numbered stats.
1187 : */
1188 13364 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1189 : {
1190 13312 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1191 :
1192 13312 : if (!kind_info)
1193 12740 : continue;
1194 572 : if (!kind_info->fixed_amount)
1195 : {
1196 : Assert(kind_info->snapshot_cb == NULL);
1197 260 : continue;
1198 : }
1199 :
1200 312 : pgstat_build_snapshot_fixed(kind);
1201 : }
1202 :
1203 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1204 : }
1205 :
1206 : static void
1207 7218 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1208 : {
1209 7218 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1210 : int idx;
1211 : bool *valid;
1212 :
1213 : /* Position in fixed_valid or custom_valid */
1214 7218 : if (pgstat_is_kind_builtin(kind))
1215 : {
1216 7204 : idx = kind;
1217 7204 : valid = pgStatLocal.snapshot.fixed_valid;
1218 : }
1219 : else
1220 : {
1221 14 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1222 14 : valid = pgStatLocal.snapshot.custom_valid;
1223 : }
1224 :
1225 : Assert(kind_info->fixed_amount);
1226 : Assert(kind_info->snapshot_cb != NULL);
1227 :
1228 7218 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1229 : {
1230 : /* rebuild every time */
1231 6528 : valid[idx] = false;
1232 : }
1233 690 : else if (valid[idx])
1234 : {
1235 : /* in snapshot mode we shouldn't get called again */
1236 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1237 12 : return;
1238 : }
1239 :
1240 : Assert(!valid[idx]);
1241 :
1242 7206 : kind_info->snapshot_cb();
1243 :
1244 : Assert(!valid[idx]);
1245 7206 : valid[idx] = true;
1246 : }
1247 :
1248 :
1249 : /* ------------------------------------------------------------
1250 : * Backend-local pending stats infrastructure
1251 : * ------------------------------------------------------------
1252 : */
1253 :
1254 : /*
1255 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1256 : * stats if not already done.
1257 : *
1258 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1259 : * created, false otherwise.
1260 : */
1261 : PgStat_EntryRef *
1262 3423266 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1263 : {
1264 : PgStat_EntryRef *entry_ref;
1265 :
1266 : /* need to be able to flush out */
1267 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1268 :
1269 3423266 : if (unlikely(!pgStatPendingContext))
1270 : {
1271 31062 : pgStatPendingContext =
1272 31062 : AllocSetContextCreate(TopMemoryContext,
1273 : "PgStat Pending",
1274 : ALLOCSET_SMALL_SIZES);
1275 : }
1276 :
1277 3423266 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1278 : true, created_entry);
1279 :
1280 3423266 : if (entry_ref->pending == NULL)
1281 : {
1282 1751390 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1283 :
1284 : Assert(entrysize != (size_t) -1);
1285 :
1286 1751390 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1287 1751390 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1288 : }
1289 :
1290 3423266 : return entry_ref;
1291 : }
1292 :
1293 : /*
1294 : * Return an existing stats entry, or NULL.
1295 : *
1296 : * This should only be used for helper function for pgstatfuncs.c - outside of
1297 : * that it shouldn't be needed.
1298 : */
1299 : PgStat_EntryRef *
1300 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1301 : {
1302 : PgStat_EntryRef *entry_ref;
1303 :
1304 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1305 :
1306 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1307 30 : return NULL;
1308 :
1309 54 : return entry_ref;
1310 : }
1311 :
1312 : void
1313 1751390 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1314 : {
1315 1751390 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1316 1751390 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1317 1751390 : void *pending_data = entry_ref->pending;
1318 :
1319 : Assert(pending_data != NULL);
1320 : /* !fixed_amount stats should be handled explicitly */
1321 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1322 :
1323 1751390 : if (kind_info->delete_pending_cb)
1324 1647004 : kind_info->delete_pending_cb(entry_ref);
1325 :
1326 1751390 : pfree(pending_data);
1327 1751390 : entry_ref->pending = NULL;
1328 :
1329 1751390 : dlist_delete(&entry_ref->pending_node);
1330 1751390 : }
1331 :
1332 : /*
1333 : * Flush out pending stats for database objects (databases, relations,
1334 : * functions).
1335 : */
1336 : static bool
1337 62190 : pgstat_flush_pending_entries(bool nowait)
1338 : {
1339 62190 : bool have_pending = false;
1340 62190 : dlist_node *cur = NULL;
1341 :
1342 : /*
1343 : * Need to be a bit careful iterating over the list of pending entries.
1344 : * Processing a pending entry may queue further pending entries to the end
1345 : * of the list that we want to process, so a simple iteration won't do.
1346 : * Further complicating matters is that we want to delete the current
1347 : * entry in each iteration from the list if we flushed successfully.
1348 : *
1349 : * So we just keep track of the next pointer in each loop iteration.
1350 : */
1351 62190 : if (!dlist_is_empty(&pgStatPending))
1352 59312 : cur = dlist_head_node(&pgStatPending);
1353 :
1354 1751982 : while (cur)
1355 : {
1356 1689792 : PgStat_EntryRef *entry_ref =
1357 1689792 : dlist_container(PgStat_EntryRef, pending_node, cur);
1358 1689792 : PgStat_HashKey key = entry_ref->shared_entry->key;
1359 1689792 : PgStat_Kind kind = key.kind;
1360 1689792 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1361 : bool did_flush;
1362 : dlist_node *next;
1363 :
1364 : Assert(!kind_info->fixed_amount);
1365 : Assert(kind_info->flush_pending_cb != NULL);
1366 :
1367 : /* flush the stats, if possible */
1368 1689792 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1369 :
1370 : Assert(did_flush || nowait);
1371 :
1372 : /* determine next entry, before deleting the pending entry */
1373 1689792 : if (dlist_has_next(&pgStatPending, cur))
1374 1630480 : next = dlist_next_node(&pgStatPending, cur);
1375 : else
1376 59312 : next = NULL;
1377 :
1378 : /* if successfully flushed, remove entry */
1379 1689792 : if (did_flush)
1380 1689792 : pgstat_delete_pending_entry(entry_ref);
1381 : else
1382 0 : have_pending = true;
1383 :
1384 1689792 : cur = next;
1385 : }
1386 :
1387 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1388 :
1389 62190 : return have_pending;
1390 : }
1391 :
1392 :
1393 : /* ------------------------------------------------------------
1394 : * Helper / infrastructure functions
1395 : * ------------------------------------------------------------
1396 : */
1397 :
1398 : PgStat_Kind
1399 166 : pgstat_get_kind_from_str(char *kind_str)
1400 : {
1401 476 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1402 : {
1403 470 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1404 160 : return kind;
1405 : }
1406 :
1407 : /* Check the custom set of cumulative stats */
1408 6 : if (pgstat_kind_custom_infos)
1409 : {
1410 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1411 : {
1412 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1413 :
1414 0 : if (pgstat_kind_custom_infos[idx] &&
1415 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1416 0 : return kind;
1417 : }
1418 : }
1419 :
1420 6 : ereport(ERROR,
1421 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1422 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1423 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1424 : }
1425 :
1426 : static inline bool
1427 636282 : pgstat_is_kind_valid(PgStat_Kind kind)
1428 : {
1429 636282 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1430 : }
1431 :
1432 : const PgStat_KindInfo *
1433 41709350 : pgstat_get_kind_info(PgStat_Kind kind)
1434 : {
1435 41709350 : if (pgstat_is_kind_builtin(kind))
1436 9081660 : return &pgstat_kind_builtin_infos[kind];
1437 :
1438 32627690 : if (pgstat_is_kind_custom(kind))
1439 : {
1440 19684410 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1441 :
1442 19684410 : if (pgstat_kind_custom_infos == NULL ||
1443 51640 : pgstat_kind_custom_infos[idx] == NULL)
1444 19683570 : return NULL;
1445 840 : return pgstat_kind_custom_infos[idx];
1446 : }
1447 :
1448 12943280 : return NULL;
1449 : }
1450 :
1451 : /*
1452 : * Register a new stats kind.
1453 : *
1454 : * PgStat_Kinds must be globally unique across all extensions. Refer
1455 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1456 : * unique ID for your extension, to avoid conflicts with other extension
1457 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1458 : * needlessly reserving a new ID.
1459 : */
1460 : void
1461 16 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1462 : {
1463 16 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1464 :
1465 16 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1466 0 : ereport(ERROR,
1467 : (errmsg("custom cumulative statistics name is invalid"),
1468 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1469 :
1470 16 : if (!pgstat_is_kind_custom(kind))
1471 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1472 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1473 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1474 :
1475 16 : if (!process_shared_preload_libraries_in_progress)
1476 0 : ereport(ERROR,
1477 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1478 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1479 :
1480 : /*
1481 : * Check some data for fixed-numbered stats.
1482 : */
1483 16 : if (kind_info->fixed_amount)
1484 : {
1485 8 : if (kind_info->shared_size == 0)
1486 0 : ereport(ERROR,
1487 : (errmsg("custom cumulative statistics property is invalid"),
1488 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1489 : }
1490 :
1491 : /*
1492 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1493 : */
1494 16 : if (pgstat_kind_custom_infos == NULL)
1495 : {
1496 8 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1497 8 : MemoryContextAllocZero(TopMemoryContext,
1498 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1499 : }
1500 :
1501 16 : if (pgstat_kind_custom_infos[idx] != NULL &&
1502 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1503 0 : ereport(ERROR,
1504 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1505 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1506 : pgstat_kind_custom_infos[idx]->name)));
1507 :
1508 : /* check for existing custom stats with the same name */
1509 2080 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1510 : {
1511 2064 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1512 :
1513 2064 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1514 2056 : continue;
1515 8 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1516 0 : ereport(ERROR,
1517 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1518 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1519 : }
1520 :
1521 : /* Register it */
1522 16 : pgstat_kind_custom_infos[idx] = kind_info;
1523 16 : ereport(LOG,
1524 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1525 : kind_info->name, kind)));
1526 16 : }
1527 :
1528 : /*
1529 : * Stats should only be reported after pgstat_initialize() and before
1530 : * pgstat_shutdown(). This check is put in a few central places to catch
1531 : * violations of this rule more easily.
1532 : */
1533 : #ifdef USE_ASSERT_CHECKING
1534 : void
1535 : pgstat_assert_is_up(void)
1536 : {
1537 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1538 : }
1539 : #endif
1540 :
1541 :
1542 : /* ------------------------------------------------------------
1543 : * reading and writing of on-disk stats file
1544 : * ------------------------------------------------------------
1545 : */
1546 :
1547 : /* helpers for pgstat_write_statsfile() */
1548 : static void
1549 635694 : write_chunk(FILE *fpout, void *ptr, size_t len)
1550 : {
1551 : int rc;
1552 :
1553 635694 : rc = fwrite(ptr, len, 1, fpout);
1554 :
1555 : /* we'll check for errors with ferror once at the end */
1556 : (void) rc;
1557 635694 : }
1558 :
1559 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1560 :
1561 : /*
1562 : * This function is called in the last process that is accessing the shared
1563 : * stats so locking is not required.
1564 : */
1565 : static void
1566 1082 : pgstat_write_statsfile(XLogRecPtr redo)
1567 : {
1568 : FILE *fpout;
1569 : int32 format_id;
1570 1082 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1571 1082 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1572 : dshash_seq_status hstat;
1573 : PgStatShared_HashEntry *ps;
1574 :
1575 : pgstat_assert_is_up();
1576 :
1577 : /* should be called only by the checkpointer or single user mode */
1578 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1579 :
1580 : /* we're shutting down, so it's ok to just override this */
1581 1082 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1582 :
1583 1082 : elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
1584 : LSN_FORMAT_ARGS(redo));
1585 :
1586 : /*
1587 : * Open the statistics temp file to write out the current values.
1588 : */
1589 1082 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1590 1082 : if (fpout == NULL)
1591 : {
1592 0 : ereport(LOG,
1593 : (errcode_for_file_access(),
1594 : errmsg("could not open temporary statistics file \"%s\": %m",
1595 : tmpfile)));
1596 0 : return;
1597 : }
1598 :
1599 : /*
1600 : * Write the file header --- currently just a format ID.
1601 : */
1602 1082 : format_id = PGSTAT_FILE_FORMAT_ID;
1603 1082 : write_chunk_s(fpout, &format_id);
1604 :
1605 : /* Write the redo LSN, used to cross check the file read */
1606 1082 : write_chunk_s(fpout, &redo);
1607 :
1608 : /* Write various stats structs for fixed number of objects */
1609 278074 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1610 : {
1611 : char *ptr;
1612 276992 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1613 :
1614 276992 : if (!info || !info->fixed_amount)
1615 270494 : continue;
1616 :
1617 6498 : if (pgstat_is_kind_builtin(kind))
1618 : Assert(info->snapshot_ctl_off != 0);
1619 :
1620 6498 : pgstat_build_snapshot_fixed(kind);
1621 6498 : if (pgstat_is_kind_builtin(kind))
1622 6492 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1623 : else
1624 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1625 :
1626 6498 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1627 6498 : write_chunk_s(fpout, &kind);
1628 6498 : write_chunk(fpout, ptr, info->shared_data_len);
1629 : }
1630 :
1631 : /*
1632 : * Walk through the stats entries
1633 : */
1634 1082 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1635 311274 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1636 : {
1637 : PgStatShared_Common *shstats;
1638 310192 : const PgStat_KindInfo *kind_info = NULL;
1639 :
1640 310192 : CHECK_FOR_INTERRUPTS();
1641 :
1642 : /* we may have some "dropped" entries not yet removed, skip them */
1643 : Assert(!ps->dropped);
1644 310192 : if (ps->dropped)
1645 0 : continue;
1646 :
1647 : /*
1648 : * This discards data related to custom stats kinds that are unknown
1649 : * to this process.
1650 : */
1651 310192 : if (!pgstat_is_kind_valid(ps->key.kind))
1652 : {
1653 0 : elog(WARNING, "found unknown stats entry %u/%u/%llu",
1654 : ps->key.kind, ps->key.dboid,
1655 : (unsigned long long) ps->key.objid);
1656 0 : continue;
1657 : }
1658 :
1659 310192 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1660 :
1661 310192 : kind_info = pgstat_get_kind_info(ps->key.kind);
1662 :
1663 : /* if not dropped the valid-entry refcount should exist */
1664 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1665 :
1666 310192 : if (!kind_info->to_serialized_name)
1667 : {
1668 : /* normal stats entry, identified by PgStat_HashKey */
1669 310042 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1670 310042 : write_chunk_s(fpout, &ps->key);
1671 : }
1672 : else
1673 : {
1674 : /* stats entry identified by name on disk (e.g. slots) */
1675 : NameData name;
1676 :
1677 150 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1678 :
1679 150 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1680 150 : write_chunk_s(fpout, &ps->key.kind);
1681 150 : write_chunk_s(fpout, &name);
1682 : }
1683 :
1684 : /* Write except the header part of the entry */
1685 310192 : write_chunk(fpout,
1686 : pgstat_get_entry_data(ps->key.kind, shstats),
1687 : pgstat_get_entry_len(ps->key.kind));
1688 : }
1689 1082 : dshash_seq_term(&hstat);
1690 :
1691 : /*
1692 : * No more output to be done. Close the temp file and replace the old
1693 : * pgstat.stat with it. The ferror() check replaces testing for error
1694 : * after each individual fputc or fwrite (in write_chunk()) above.
1695 : */
1696 1082 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1697 :
1698 1082 : if (ferror(fpout))
1699 : {
1700 0 : ereport(LOG,
1701 : (errcode_for_file_access(),
1702 : errmsg("could not write temporary statistics file \"%s\": %m",
1703 : tmpfile)));
1704 0 : FreeFile(fpout);
1705 0 : unlink(tmpfile);
1706 : }
1707 1082 : else if (FreeFile(fpout) < 0)
1708 : {
1709 0 : ereport(LOG,
1710 : (errcode_for_file_access(),
1711 : errmsg("could not close temporary statistics file \"%s\": %m",
1712 : tmpfile)));
1713 0 : unlink(tmpfile);
1714 : }
1715 1082 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1716 : {
1717 : /* durable_rename already emitted log message */
1718 0 : unlink(tmpfile);
1719 : }
1720 : }
1721 :
1722 : /* helpers for pgstat_read_statsfile() */
1723 : static bool
1724 654670 : read_chunk(FILE *fpin, void *ptr, size_t len)
1725 : {
1726 654670 : return fread(ptr, 1, len, fpin) == len;
1727 : }
1728 :
1729 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1730 :
1731 : /*
1732 : * Reads in existing statistics file into memory.
1733 : *
1734 : * This function is called in the only process that is accessing the shared
1735 : * stats so locking is not required.
1736 : */
1737 : static void
1738 1294 : pgstat_read_statsfile(XLogRecPtr redo)
1739 : {
1740 : FILE *fpin;
1741 : int32 format_id;
1742 : bool found;
1743 1294 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1744 1294 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1745 : XLogRecPtr file_redo;
1746 :
1747 : /* shouldn't be called from postmaster */
1748 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1749 :
1750 1294 : elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
1751 : LSN_FORMAT_ARGS(redo));
1752 :
1753 : /*
1754 : * Try to open the stats file. If it doesn't exist, the backends simply
1755 : * returns zero for anything and statistics simply starts from scratch
1756 : * with empty counters.
1757 : *
1758 : * ENOENT is a possibility if stats collection was previously disabled or
1759 : * has not yet written the stats file for the first time. Any other
1760 : * failure condition is suspicious.
1761 : */
1762 1294 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1763 : {
1764 92 : if (errno != ENOENT)
1765 0 : ereport(LOG,
1766 : (errcode_for_file_access(),
1767 : errmsg("could not open statistics file \"%s\": %m",
1768 : statfile)));
1769 92 : pgstat_reset_after_failure();
1770 92 : return;
1771 : }
1772 :
1773 : /*
1774 : * Verify it's of the expected format.
1775 : */
1776 1202 : if (!read_chunk_s(fpin, &format_id))
1777 : {
1778 0 : elog(WARNING, "could not read format ID");
1779 0 : goto error;
1780 : }
1781 :
1782 1202 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1783 : {
1784 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1785 : format_id, PGSTAT_FILE_FORMAT_ID);
1786 2 : goto error;
1787 : }
1788 :
1789 : /*
1790 : * Read the redo LSN stored in the file.
1791 : */
1792 1200 : if (!read_chunk_s(fpin, &file_redo))
1793 : {
1794 0 : elog(WARNING, "could not read redo LSN");
1795 0 : goto error;
1796 : }
1797 :
1798 1200 : if (file_redo != redo)
1799 : {
1800 20 : elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
1801 : LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
1802 20 : goto error;
1803 : }
1804 :
1805 : /*
1806 : * We found an existing statistics file. Read it and put all the stats
1807 : * data into place.
1808 : */
1809 : for (;;)
1810 326088 : {
1811 327268 : int t = fgetc(fpin);
1812 :
1813 327268 : switch (t)
1814 : {
1815 7084 : case PGSTAT_FILE_ENTRY_FIXED:
1816 : {
1817 : PgStat_Kind kind;
1818 : const PgStat_KindInfo *info;
1819 : char *ptr;
1820 :
1821 : /* entry for fixed-numbered stats */
1822 7084 : if (!read_chunk_s(fpin, &kind))
1823 : {
1824 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1825 2 : goto error;
1826 : }
1827 :
1828 7084 : if (!pgstat_is_kind_valid(kind))
1829 : {
1830 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1831 : kind, t);
1832 0 : goto error;
1833 : }
1834 :
1835 7084 : info = pgstat_get_kind_info(kind);
1836 7084 : if (!info)
1837 : {
1838 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1839 : kind, t);
1840 2 : goto error;
1841 : }
1842 :
1843 7082 : if (!info->fixed_amount)
1844 : {
1845 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1846 : kind, t);
1847 0 : goto error;
1848 : }
1849 :
1850 : /* Load back stats into shared memory */
1851 7082 : if (pgstat_is_kind_builtin(kind))
1852 7080 : ptr = ((char *) shmem) + info->shared_ctl_off +
1853 7080 : info->shared_data_off;
1854 : else
1855 : {
1856 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1857 :
1858 2 : ptr = ((char *) shmem->custom_data[idx]) +
1859 2 : info->shared_data_off;
1860 : }
1861 :
1862 7082 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1863 : {
1864 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1865 : kind, t, info->shared_data_len);
1866 0 : goto error;
1867 : }
1868 :
1869 7082 : break;
1870 : }
1871 319006 : case PGSTAT_FILE_ENTRY_HASH:
1872 : case PGSTAT_FILE_ENTRY_NAME:
1873 : {
1874 : PgStat_HashKey key;
1875 : PgStatShared_HashEntry *p;
1876 : PgStatShared_Common *header;
1877 :
1878 319006 : CHECK_FOR_INTERRUPTS();
1879 :
1880 319006 : if (t == PGSTAT_FILE_ENTRY_HASH)
1881 : {
1882 : /* normal stats entry, identified by PgStat_HashKey */
1883 318914 : if (!read_chunk_s(fpin, &key))
1884 : {
1885 0 : elog(WARNING, "could not read key for entry of type %c", t);
1886 0 : goto error;
1887 : }
1888 :
1889 318914 : if (!pgstat_is_kind_valid(key.kind))
1890 : {
1891 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%llu of type %c",
1892 : key.kind, key.dboid,
1893 : (unsigned long long) key.objid, t);
1894 0 : goto error;
1895 : }
1896 : }
1897 : else
1898 : {
1899 : /* stats entry identified by name on disk (e.g. slots) */
1900 92 : const PgStat_KindInfo *kind_info = NULL;
1901 : PgStat_Kind kind;
1902 : NameData name;
1903 :
1904 92 : if (!read_chunk_s(fpin, &kind))
1905 : {
1906 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1907 0 : goto error;
1908 : }
1909 92 : if (!read_chunk_s(fpin, &name))
1910 : {
1911 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1912 : kind, t);
1913 0 : goto error;
1914 : }
1915 92 : if (!pgstat_is_kind_valid(kind))
1916 : {
1917 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1918 : kind, t);
1919 0 : goto error;
1920 : }
1921 :
1922 92 : kind_info = pgstat_get_kind_info(kind);
1923 92 : if (!kind_info)
1924 : {
1925 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1926 : kind, t);
1927 0 : goto error;
1928 : }
1929 :
1930 92 : if (!kind_info->from_serialized_name)
1931 : {
1932 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1933 : kind, t);
1934 0 : goto error;
1935 : }
1936 :
1937 92 : if (!kind_info->from_serialized_name(&name, &key))
1938 : {
1939 : /* skip over data for entry we don't care about */
1940 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1941 : {
1942 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1943 : NameStr(name), kind, t);
1944 0 : goto error;
1945 : }
1946 :
1947 2 : continue;
1948 : }
1949 :
1950 : Assert(key.kind == kind);
1951 : }
1952 :
1953 : /*
1954 : * This intentionally doesn't use pgstat_get_entry_ref() -
1955 : * putting all stats into checkpointer's
1956 : * pgStatEntryRefHash would be wasted effort and memory.
1957 : */
1958 319004 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1959 :
1960 : /* don't allow duplicate entries */
1961 319004 : if (found)
1962 : {
1963 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1964 0 : elog(WARNING, "found duplicate stats entry %u/%u/%llu of type %c",
1965 : key.kind, key.dboid,
1966 : (unsigned long long) key.objid, t);
1967 0 : goto error;
1968 : }
1969 :
1970 319004 : header = pgstat_init_entry(key.kind, p);
1971 319004 : dshash_release_lock(pgStatLocal.shared_hash, p);
1972 :
1973 319004 : if (!read_chunk(fpin,
1974 : pgstat_get_entry_data(key.kind, header),
1975 : pgstat_get_entry_len(key.kind)))
1976 : {
1977 0 : elog(WARNING, "could not read data for entry %u/%u/%llu of type %c",
1978 : key.kind, key.dboid,
1979 : (unsigned long long) key.objid, t);
1980 0 : goto error;
1981 : }
1982 :
1983 319004 : break;
1984 : }
1985 1178 : case PGSTAT_FILE_ENTRY_END:
1986 :
1987 : /*
1988 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
1989 : * file
1990 : */
1991 1178 : if (fgetc(fpin) != EOF)
1992 : {
1993 2 : elog(WARNING, "could not read end-of-file");
1994 2 : goto error;
1995 : }
1996 :
1997 1176 : goto done;
1998 :
1999 0 : default:
2000 0 : elog(WARNING, "could not read entry of type %c", t);
2001 0 : goto error;
2002 : }
2003 : }
2004 :
2005 1202 : done:
2006 1202 : FreeFile(fpin);
2007 :
2008 1202 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2009 1202 : unlink(statfile);
2010 :
2011 1202 : return;
2012 :
2013 26 : error:
2014 26 : ereport(LOG,
2015 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2016 :
2017 26 : pgstat_reset_after_failure();
2018 :
2019 26 : goto done;
2020 : }
2021 :
2022 : /*
2023 : * Helper to reset / drop stats after a crash or after restoring stats from
2024 : * disk failed, potentially after already loading parts.
2025 : */
2026 : static void
2027 458 : pgstat_reset_after_failure(void)
2028 : {
2029 458 : TimestampTz ts = GetCurrentTimestamp();
2030 :
2031 : /* reset fixed-numbered stats */
2032 117706 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2033 : {
2034 117248 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2035 :
2036 117248 : if (!kind_info || !kind_info->fixed_amount)
2037 114498 : continue;
2038 :
2039 2750 : kind_info->reset_all_cb(ts);
2040 : }
2041 :
2042 : /* and drop variable-numbered ones */
2043 458 : pgstat_drop_all_entries();
2044 458 : }
2045 :
2046 : /*
2047 : * GUC assign_hook for stats_fetch_consistency.
2048 : */
2049 : void
2050 4044 : assign_stats_fetch_consistency(int newval, void *extra)
2051 : {
2052 : /*
2053 : * Changing this value in a transaction may cause snapshot state
2054 : * inconsistencies, so force a clear of the current snapshot on the next
2055 : * snapshot build attempt.
2056 : */
2057 4044 : if (pgstat_fetch_consistency != newval)
2058 1864 : force_stats_snapshot_clear = true;
2059 4044 : }
|