Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down, except when shutting down in immediate mode.
16 : *
17 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
18 : *
19 : * Statistics for variable-numbered objects are stored in dynamic shared
20 : * memory and can be found via a dshash hashtable. The statistics counters are
21 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 : * separately allocated (PgStatShared_HashEntry->body). The separate
23 : * allocation allows different kinds of statistics to be stored in the same
24 : * hashtable without wasting space in PgStatShared_HashEntry.
25 : *
26 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 : * is not possible to have statistics for an object that cannot be addressed
28 : * that way at runtime. A wider identifier can be used when serializing to
29 : * disk (used for replication slot stats).
30 : *
31 : * To avoid contention on the shared hashtable, each backend has a
32 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 : * entries. The shared hashtable only needs to be accessed when no prior
35 : * reference is found in the local hashtable. Besides pointing to the
36 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 : * contains a pointer to the shared statistics data, as a process-local
38 : * address, to reduce access costs.
39 : *
40 : * The names for structs stored in shared memory are prefixed with
41 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 : * protected by a dedicated lwlock.
43 : *
44 : * Most stats updates are first accumulated locally in each process as pending
45 : * entries, then later flushed to shared memory (just after commit, or by
46 : * idle-timeout). This practically eliminates contention on individual stats
47 : * entries. For most kinds of variable-numbered pending stats data is stored
48 : * in PgStat_EntryRef->pending. All entries with pending data are in the
49 : * pgStatPending list. Pending statistics updates are flushed out by
50 : * pgstat_report_stat().
51 : *
52 : * It is possible for external modules to define custom statistics kinds,
53 : * that can use the same properties as any built-in stats kinds. Each custom
54 : * stats kind needs to assign a unique ID to ensure that it does not overlap
55 : * with other extensions. In order to reserve a unique stats kind ID, refer
56 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
57 : *
58 : * The behavior of different kinds of statistics is determined by the kind's
59 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
60 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
61 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
62 : *
63 : * The consistency of read accesses to statistics can be configured using the
64 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
65 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
66 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
67 : * pgStatLocal.snapshot.
68 : *
69 : * To keep things manageable, stats handling is split across several
70 : * files. Infrastructure pieces are in:
71 : * - pgstat.c - this file, to tie it all together
72 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
73 : * the maintenance of hashtable entries
74 : * - pgstat_xact.c - transactional integration, including the transactional
75 : * creation and dropping of stats entries
76 : *
77 : * Each statistics kind is handled in a dedicated file:
78 : * - pgstat_archiver.c
79 : * - pgstat_bgwriter.c
80 : * - pgstat_checkpointer.c
81 : * - pgstat_database.c
82 : * - pgstat_function.c
83 : * - pgstat_io.c
84 : * - pgstat_relation.c
85 : * - pgstat_replslot.c
86 : * - pgstat_slru.c
87 : * - pgstat_subscription.c
88 : * - pgstat_wal.c
89 : *
90 : * Whenever possible infrastructure files should not contain code related to
91 : * specific kinds of stats.
92 : *
93 : *
94 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
95 : *
96 : * IDENTIFICATION
97 : * src/backend/utils/activity/pgstat.c
98 : * ----------
99 : */
100 : #include "postgres.h"
101 :
102 : #include <unistd.h>
103 :
104 : #include "access/xact.h"
105 : #include "access/xlog.h"
106 : #include "lib/dshash.h"
107 : #include "pgstat.h"
108 : #include "port/atomics.h"
109 : #include "storage/fd.h"
110 : #include "storage/ipc.h"
111 : #include "storage/lwlock.h"
112 : #include "utils/guc_hooks.h"
113 : #include "utils/memutils.h"
114 : #include "utils/pgstat_internal.h"
115 : #include "utils/timestamp.h"
116 :
117 :
118 : /* ----------
119 : * Timer definitions.
120 : *
121 : * In milliseconds.
122 : * ----------
123 : */
124 :
125 : /* minimum interval non-forced stats flushes.*/
126 : #define PGSTAT_MIN_INTERVAL 1000
127 : /* how long until to block flushing pending stats updates */
128 : #define PGSTAT_MAX_INTERVAL 60000
129 : /* when to call pgstat_report_stat() again, even when idle */
130 : #define PGSTAT_IDLE_INTERVAL 10000
131 :
132 : /* ----------
133 : * Initial size hints for the hash tables used in statistics.
134 : * ----------
135 : */
136 :
137 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 :
139 : /* ---------
140 : * Identifiers in stats file.
141 : * ---------
142 : */
143 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
146 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
147 : * PgStat_HashKey */
148 :
149 : /* hash table for statistics snapshots entry */
150 : typedef struct PgStat_SnapshotEntry
151 : {
152 : PgStat_HashKey key;
153 : char status; /* for simplehash use */
154 : void *data; /* the stats data itself */
155 : } PgStat_SnapshotEntry;
156 :
157 :
158 : /* ----------
159 : * Backend-local Hash Table Definitions
160 : * ----------
161 : */
162 :
163 : /* for stats snapshot entries */
164 : #define SH_PREFIX pgstat_snapshot
165 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166 : #define SH_KEY_TYPE PgStat_HashKey
167 : #define SH_KEY key
168 : #define SH_HASH_KEY(tb, key) \
169 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170 : #define SH_EQUAL(tb, a, b) \
171 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172 : #define SH_SCOPE static inline
173 : #define SH_DEFINE
174 : #define SH_DECLARE
175 : #include "lib/simplehash.h"
176 :
177 :
178 : /* ----------
179 : * Local function forward declarations
180 : * ----------
181 : */
182 :
183 : static void pgstat_write_statsfile(XLogRecPtr redo);
184 : static void pgstat_read_statsfile(XLogRecPtr redo);
185 :
186 : static void pgstat_init_snapshot_fixed(void);
187 :
188 : static void pgstat_reset_after_failure(void);
189 :
190 : static bool pgstat_flush_pending_entries(bool nowait);
191 :
192 : static void pgstat_prep_snapshot(void);
193 : static void pgstat_build_snapshot(void);
194 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 :
196 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197 :
198 :
199 : /* ----------
200 : * GUC parameters
201 : * ----------
202 : */
203 :
204 : bool pgstat_track_counts = false;
205 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206 :
207 :
208 : /* ----------
209 : * state shared with pgstat_*.c
210 : * ----------
211 : */
212 :
213 : PgStat_LocalState pgStatLocal;
214 :
215 :
216 : /* ----------
217 : * Local data
218 : *
219 : * NB: There should be only variables related to stats infrastructure here,
220 : * not for specific kinds of stats.
221 : * ----------
222 : */
223 :
224 : /*
225 : * Memory contexts containing the pgStatEntryRefHash table, the
226 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
227 : * easier to track / attribute memory usage.
228 : */
229 :
230 : static MemoryContext pgStatPendingContext = NULL;
231 :
232 : /*
233 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
234 : *
235 : * Newly pending entries should only ever be added to the end of the list,
236 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
237 : */
238 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
239 :
240 :
241 : /*
242 : * Force the next stats flush to happen regardless of
243 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
244 : */
245 : static bool pgStatForceNextFlush = false;
246 :
247 : /*
248 : * Force-clear existing snapshot before next use when stats_fetch_consistency
249 : * is changed.
250 : */
251 : static bool force_stats_snapshot_clear = false;
252 :
253 :
254 : /*
255 : * For assertions that check pgstat is not used before initialization / after
256 : * shutdown.
257 : */
258 : #ifdef USE_ASSERT_CHECKING
259 : static bool pgstat_is_initialized = false;
260 : static bool pgstat_is_shutdown = false;
261 : #endif
262 :
263 :
264 : /*
265 : * The different kinds of built-in statistics.
266 : *
267 : * If reasonably possible, handling specific to one kind of stats should go
268 : * through this abstraction, rather than making more of pgstat.c aware.
269 : *
270 : * See comments for struct PgStat_KindInfo for details about the individual
271 : * fields.
272 : *
273 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
274 : * seem to be a great way of doing that, given the split across multiple
275 : * files.
276 : */
277 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
278 :
279 : /* stats kinds for variable-numbered objects */
280 :
281 : [PGSTAT_KIND_DATABASE] = {
282 : .name = "database",
283 :
284 : .fixed_amount = false,
285 : /* so pg_stat_database entries can be seen in all databases */
286 : .accessed_across_databases = true,
287 :
288 : .shared_size = sizeof(PgStatShared_Database),
289 : .shared_data_off = offsetof(PgStatShared_Database, stats),
290 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
291 : .pending_size = sizeof(PgStat_StatDBEntry),
292 :
293 : .flush_pending_cb = pgstat_database_flush_cb,
294 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
295 : },
296 :
297 : [PGSTAT_KIND_RELATION] = {
298 : .name = "relation",
299 :
300 : .fixed_amount = false,
301 :
302 : .shared_size = sizeof(PgStatShared_Relation),
303 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
304 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
305 : .pending_size = sizeof(PgStat_TableStatus),
306 :
307 : .flush_pending_cb = pgstat_relation_flush_cb,
308 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
309 : },
310 :
311 : [PGSTAT_KIND_FUNCTION] = {
312 : .name = "function",
313 :
314 : .fixed_amount = false,
315 :
316 : .shared_size = sizeof(PgStatShared_Function),
317 : .shared_data_off = offsetof(PgStatShared_Function, stats),
318 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
319 : .pending_size = sizeof(PgStat_FunctionCounts),
320 :
321 : .flush_pending_cb = pgstat_function_flush_cb,
322 : },
323 :
324 : [PGSTAT_KIND_REPLSLOT] = {
325 : .name = "replslot",
326 :
327 : .fixed_amount = false,
328 :
329 : .accessed_across_databases = true,
330 :
331 : .shared_size = sizeof(PgStatShared_ReplSlot),
332 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
333 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
334 :
335 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
336 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
337 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
338 : },
339 :
340 : [PGSTAT_KIND_SUBSCRIPTION] = {
341 : .name = "subscription",
342 :
343 : .fixed_amount = false,
344 : /* so pg_stat_subscription_stats entries can be seen in all databases */
345 : .accessed_across_databases = true,
346 :
347 : .shared_size = sizeof(PgStatShared_Subscription),
348 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
349 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
350 : .pending_size = sizeof(PgStat_BackendSubEntry),
351 :
352 : .flush_pending_cb = pgstat_subscription_flush_cb,
353 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
354 : },
355 :
356 :
357 : /* stats for fixed-numbered (mostly 1) objects */
358 :
359 : [PGSTAT_KIND_ARCHIVER] = {
360 : .name = "archiver",
361 :
362 : .fixed_amount = true,
363 :
364 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
365 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
366 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
367 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
368 :
369 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
370 : .reset_all_cb = pgstat_archiver_reset_all_cb,
371 : .snapshot_cb = pgstat_archiver_snapshot_cb,
372 : },
373 :
374 : [PGSTAT_KIND_BGWRITER] = {
375 : .name = "bgwriter",
376 :
377 : .fixed_amount = true,
378 :
379 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
380 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
381 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
382 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
383 :
384 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
385 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
386 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
387 : },
388 :
389 : [PGSTAT_KIND_CHECKPOINTER] = {
390 : .name = "checkpointer",
391 :
392 : .fixed_amount = true,
393 :
394 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
395 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
396 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
397 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
398 :
399 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
400 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
401 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
402 : },
403 :
404 : [PGSTAT_KIND_IO] = {
405 : .name = "io",
406 :
407 : .fixed_amount = true,
408 :
409 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
410 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
411 : .shared_data_off = offsetof(PgStatShared_IO, stats),
412 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
413 :
414 : .flush_fixed_cb = pgstat_io_flush_cb,
415 : .have_fixed_pending_cb = pgstat_io_have_pending_cb,
416 : .init_shmem_cb = pgstat_io_init_shmem_cb,
417 : .reset_all_cb = pgstat_io_reset_all_cb,
418 : .snapshot_cb = pgstat_io_snapshot_cb,
419 : },
420 :
421 : [PGSTAT_KIND_SLRU] = {
422 : .name = "slru",
423 :
424 : .fixed_amount = true,
425 :
426 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
427 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
428 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
429 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
430 :
431 : .flush_fixed_cb = pgstat_slru_flush_cb,
432 : .have_fixed_pending_cb = pgstat_slru_have_pending_cb,
433 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
434 : .reset_all_cb = pgstat_slru_reset_all_cb,
435 : .snapshot_cb = pgstat_slru_snapshot_cb,
436 : },
437 :
438 : [PGSTAT_KIND_WAL] = {
439 : .name = "wal",
440 :
441 : .fixed_amount = true,
442 :
443 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
444 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
445 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
446 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
447 :
448 : .init_backend_cb = pgstat_wal_init_backend_cb,
449 : .flush_fixed_cb = pgstat_wal_flush_cb,
450 : .have_fixed_pending_cb = pgstat_wal_have_pending_cb,
451 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
452 : .reset_all_cb = pgstat_wal_reset_all_cb,
453 : .snapshot_cb = pgstat_wal_snapshot_cb,
454 : },
455 : };
456 :
457 : /*
458 : * Information about custom statistics kinds.
459 : *
460 : * These are saved in a different array than the built-in kinds to save
461 : * in clarity with the initializations.
462 : *
463 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
464 : */
465 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
466 :
467 : /* ------------------------------------------------------------
468 : * Functions managing the state of the stats system for all backends.
469 : * ------------------------------------------------------------
470 : */
471 :
472 : /*
473 : * Read on-disk stats into memory at server start.
474 : *
475 : * Should only be called by the startup process or in single user mode.
476 : */
477 : void
478 1242 : pgstat_restore_stats(XLogRecPtr redo)
479 : {
480 1242 : pgstat_read_statsfile(redo);
481 1242 : }
482 :
483 : /*
484 : * Remove the stats file. This is currently used only if WAL recovery is
485 : * needed after a crash.
486 : *
487 : * Should only be called by the startup process or in single user mode.
488 : */
489 : void
490 334 : pgstat_discard_stats(void)
491 : {
492 : int ret;
493 :
494 : /* NB: this needs to be done even in single user mode */
495 :
496 334 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
497 334 : if (ret != 0)
498 : {
499 332 : if (errno == ENOENT)
500 332 : elog(DEBUG2,
501 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
502 : PGSTAT_STAT_PERMANENT_FILENAME);
503 : else
504 0 : ereport(LOG,
505 : (errcode_for_file_access(),
506 : errmsg("could not unlink permanent statistics file \"%s\": %m",
507 : PGSTAT_STAT_PERMANENT_FILENAME)));
508 : }
509 : else
510 : {
511 2 : ereport(DEBUG2,
512 : (errcode_for_file_access(),
513 : errmsg_internal("unlinked permanent statistics file \"%s\"",
514 : PGSTAT_STAT_PERMANENT_FILENAME)));
515 : }
516 :
517 : /*
518 : * Reset stats contents. This will set reset timestamps of fixed-numbered
519 : * stats to the current time (no variable stats exist).
520 : */
521 334 : pgstat_reset_after_failure();
522 334 : }
523 :
524 : /*
525 : * pgstat_before_server_shutdown() needs to be called by exactly one process
526 : * during regular server shutdowns. Otherwise all stats will be lost.
527 : *
528 : * We currently only write out stats for proc_exit(0). We might want to change
529 : * that at some point... But right now pgstat_discard_stats() would be called
530 : * during the start after a disorderly shutdown, anyway.
531 : */
532 : void
533 1050 : pgstat_before_server_shutdown(int code, Datum arg)
534 : {
535 : Assert(pgStatLocal.shmem != NULL);
536 : Assert(!pgStatLocal.shmem->is_shutdown);
537 :
538 : /*
539 : * Stats should only be reported after pgstat_initialize() and before
540 : * pgstat_shutdown(). This is a convenient point to catch most violations
541 : * of this rule.
542 : */
543 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
544 :
545 : /* flush out our own pending changes before writing out */
546 1050 : pgstat_report_stat(true);
547 :
548 : /*
549 : * Only write out file during normal shutdown. Don't even signal that
550 : * we've shutdown during irregular shutdowns, because the shutdown
551 : * sequence isn't coordinated to ensure this backend shuts down last.
552 : */
553 1050 : if (code == 0)
554 : {
555 1040 : pgStatLocal.shmem->is_shutdown = true;
556 1040 : pgstat_write_statsfile(GetRedoRecPtr());
557 : }
558 1050 : }
559 :
560 :
561 : /* ------------------------------------------------------------
562 : * Backend initialization / shutdown functions
563 : * ------------------------------------------------------------
564 : */
565 :
566 : /*
567 : * Shut down a single backend's statistics reporting at process exit.
568 : *
569 : * Flush out any remaining statistics counts. Without this, operations
570 : * triggered during backend exit (such as temp table deletions) won't be
571 : * counted.
572 : */
573 : static void
574 33436 : pgstat_shutdown_hook(int code, Datum arg)
575 : {
576 : Assert(!pgstat_is_shutdown);
577 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
578 :
579 : /*
580 : * If we got as far as discovering our own database ID, we can flush out
581 : * what we did so far. Otherwise, we'd be reporting an invalid database
582 : * ID, so forget it. (This means that accesses to pg_database during
583 : * failed backend starts might never get counted.)
584 : */
585 33436 : if (OidIsValid(MyDatabaseId))
586 26670 : pgstat_report_disconnect(MyDatabaseId);
587 :
588 33436 : pgstat_report_stat(true);
589 :
590 : /* there shouldn't be any pending changes left */
591 : Assert(dlist_is_empty(&pgStatPending));
592 33436 : dlist_init(&pgStatPending);
593 :
594 33436 : pgstat_detach_shmem();
595 :
596 : #ifdef USE_ASSERT_CHECKING
597 : pgstat_is_shutdown = true;
598 : #endif
599 33436 : }
600 :
601 : /*
602 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
603 : * BaseInit().
604 : *
605 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
606 : */
607 : void
608 33436 : pgstat_initialize(void)
609 : {
610 : Assert(!pgstat_is_initialized);
611 :
612 33436 : pgstat_attach_shmem();
613 :
614 33436 : pgstat_init_snapshot_fixed();
615 :
616 : /* Backend initialization callbacks */
617 8593052 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
618 : {
619 8559616 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
620 :
621 8559616 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
622 8526180 : continue;
623 :
624 33436 : kind_info->init_backend_cb();
625 : }
626 :
627 : /* Set up a process-exit hook to clean up */
628 33436 : before_shmem_exit(pgstat_shutdown_hook, 0);
629 :
630 : #ifdef USE_ASSERT_CHECKING
631 : pgstat_is_initialized = true;
632 : #endif
633 33436 : }
634 :
635 :
636 : /* ------------------------------------------------------------
637 : * Public functions used by backends follow
638 : * ------------------------------------------------------------
639 : */
640 :
641 : /*
642 : * Must be called by processes that performs DML: tcop/postgres.c, logical
643 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
644 : * shared memory.
645 : *
646 : * Unless called with 'force', pending stats updates are flushed happen once
647 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
648 : * block on lock acquisition, except if stats updates have been pending for
649 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
650 : *
651 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
652 : * suggested idle timeout is returned. Currently this is always
653 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
654 : * a timeout after which to call pgstat_report_stat(true), but are not
655 : * required to do so.
656 : *
657 : * Note that this is called only when not within a transaction, so it is fair
658 : * to use transaction stop time as an approximation of current time.
659 : */
660 : long
661 533824 : pgstat_report_stat(bool force)
662 : {
663 : static TimestampTz pending_since = 0;
664 : static TimestampTz last_flush = 0;
665 : bool partial_flush;
666 : TimestampTz now;
667 : bool nowait;
668 :
669 : pgstat_assert_is_up();
670 : Assert(!IsTransactionOrTransactionBlock());
671 :
672 : /* "absorb" the forced flush even if there's nothing to flush */
673 533824 : if (pgStatForceNextFlush)
674 : {
675 408 : force = true;
676 408 : pgStatForceNextFlush = false;
677 : }
678 :
679 : /* Don't expend a clock check if nothing to do */
680 533824 : if (dlist_is_empty(&pgStatPending))
681 : {
682 12560 : bool do_flush = false;
683 :
684 : /* Check for pending fixed-numbered stats */
685 2517858 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
686 : {
687 2508170 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
688 :
689 2508170 : if (!kind_info)
690 2373560 : continue;
691 134610 : if (!kind_info->fixed_amount)
692 : {
693 : Assert(kind_info->have_fixed_pending_cb == NULL);
694 62800 : continue;
695 : }
696 71810 : if (!kind_info->have_fixed_pending_cb)
697 37680 : continue;
698 :
699 34130 : if (kind_info->have_fixed_pending_cb())
700 : {
701 2872 : do_flush = true;
702 2872 : break;
703 : }
704 : }
705 :
706 12560 : if (!do_flush)
707 : {
708 : Assert(pending_since == 0);
709 9688 : return 0;
710 : }
711 : }
712 :
713 : /*
714 : * There should never be stats to report once stats are shut down. Can't
715 : * assert that before the checks above, as there is an unconditional
716 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
717 : * the process that ran pgstat_before_server_shutdown() will still call.
718 : */
719 : Assert(!pgStatLocal.shmem->is_shutdown);
720 :
721 524136 : if (force)
722 : {
723 : /*
724 : * Stats reports are forced either when it's been too long since stats
725 : * have been reported or in processes that force stats reporting to
726 : * happen at specific points (including shutdown). In the former case
727 : * the transaction stop time might be quite old, in the latter it
728 : * would never get cleared.
729 : */
730 33508 : now = GetCurrentTimestamp();
731 : }
732 : else
733 : {
734 490628 : now = GetCurrentTransactionStopTimestamp();
735 :
736 936492 : if (pending_since > 0 &&
737 445864 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
738 : {
739 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
740 0 : force = true;
741 : }
742 490628 : else if (last_flush > 0 &&
743 468860 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
744 : {
745 : /* don't flush too frequently */
746 467274 : if (pending_since == 0)
747 22732 : pending_since = now;
748 :
749 467274 : return PGSTAT_IDLE_INTERVAL;
750 : }
751 : }
752 :
753 56862 : pgstat_update_dbstats(now);
754 :
755 : /* don't wait for lock acquisition when !force */
756 56862 : nowait = !force;
757 :
758 56862 : partial_flush = false;
759 :
760 : /* flush database / relation / function / ... stats */
761 56862 : partial_flush |= pgstat_flush_pending_entries(nowait);
762 :
763 : /* flush of fixed-numbered stats */
764 14613534 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
765 : {
766 14556672 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
767 :
768 14556672 : if (!kind_info)
769 13931190 : continue;
770 625482 : if (!kind_info->fixed_amount)
771 : {
772 : Assert(kind_info->flush_fixed_cb == NULL);
773 284310 : continue;
774 : }
775 341172 : if (!kind_info->flush_fixed_cb)
776 170586 : continue;
777 :
778 170586 : partial_flush |= kind_info->flush_fixed_cb(nowait);
779 : }
780 :
781 56862 : last_flush = now;
782 :
783 : /*
784 : * If some of the pending stats could not be flushed due to lock
785 : * contention, let the caller know when to retry.
786 : */
787 56862 : if (partial_flush)
788 : {
789 : /* force should have prevented us from getting here */
790 : Assert(!force);
791 :
792 : /* remember since when stats have been pending */
793 0 : if (pending_since == 0)
794 0 : pending_since = now;
795 :
796 0 : return PGSTAT_IDLE_INTERVAL;
797 : }
798 :
799 56862 : pending_since = 0;
800 :
801 56862 : return 0;
802 : }
803 :
804 : /*
805 : * Force locally pending stats to be flushed during the next
806 : * pgstat_report_stat() call. This is useful for writing tests.
807 : */
808 : void
809 408 : pgstat_force_next_flush(void)
810 : {
811 408 : pgStatForceNextFlush = true;
812 408 : }
813 :
814 : /*
815 : * Only for use by pgstat_reset_counters()
816 : */
817 : static bool
818 21720 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
819 : {
820 21720 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
821 : }
822 :
823 : /*
824 : * Reset counters for our database.
825 : *
826 : * Permission checking for this function is managed through the normal
827 : * GRANT system.
828 : */
829 : void
830 26 : pgstat_reset_counters(void)
831 : {
832 26 : TimestampTz ts = GetCurrentTimestamp();
833 :
834 26 : pgstat_reset_matching_entries(match_db_entries,
835 : ObjectIdGetDatum(MyDatabaseId),
836 : ts);
837 26 : }
838 :
839 : /*
840 : * Reset a single variable-numbered entry.
841 : *
842 : * If the stats kind is within a database, also reset the database's
843 : * stat_reset_timestamp.
844 : *
845 : * Permission checking for this function is managed through the normal
846 : * GRANT system.
847 : */
848 : void
849 38 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
850 : {
851 38 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
852 38 : TimestampTz ts = GetCurrentTimestamp();
853 :
854 : /* not needed atm, and doesn't make sense with the current signature */
855 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
856 :
857 : /* reset the "single counter" */
858 38 : pgstat_reset_entry(kind, dboid, objid, ts);
859 :
860 38 : if (!kind_info->accessed_across_databases)
861 16 : pgstat_reset_database_timestamp(dboid, ts);
862 38 : }
863 :
864 : /*
865 : * Reset stats for all entries of a kind.
866 : *
867 : * Permission checking for this function is managed through the normal
868 : * GRANT system.
869 : */
870 : void
871 54 : pgstat_reset_of_kind(PgStat_Kind kind)
872 : {
873 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
874 54 : TimestampTz ts = GetCurrentTimestamp();
875 :
876 54 : if (kind_info->fixed_amount)
877 46 : kind_info->reset_all_cb(ts);
878 : else
879 8 : pgstat_reset_entries_of_kind(kind, ts);
880 54 : }
881 :
882 :
883 : /* ------------------------------------------------------------
884 : * Fetching of stats
885 : * ------------------------------------------------------------
886 : */
887 :
888 : /*
889 : * Discard any data collected in the current transaction. Any subsequent
890 : * request will cause new snapshots to be read.
891 : *
892 : * This is also invoked during transaction commit or abort to discard
893 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
894 : * cause this routine to be called.
895 : */
896 : void
897 765290 : pgstat_clear_snapshot(void)
898 : {
899 : pgstat_assert_is_up();
900 :
901 765290 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
902 : sizeof(pgStatLocal.snapshot.fixed_valid));
903 765290 : memset(&pgStatLocal.snapshot.custom_valid, 0,
904 : sizeof(pgStatLocal.snapshot.custom_valid));
905 765290 : pgStatLocal.snapshot.stats = NULL;
906 765290 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
907 :
908 : /* Release memory, if any was allocated */
909 765290 : if (pgStatLocal.snapshot.context)
910 : {
911 908 : MemoryContextDelete(pgStatLocal.snapshot.context);
912 :
913 : /* Reset variables */
914 908 : pgStatLocal.snapshot.context = NULL;
915 : }
916 :
917 : /*
918 : * Historically the backend_status.c facilities lived in this file, and
919 : * were reset with the same function. For now keep it that way, and
920 : * forward the reset request.
921 : */
922 765290 : pgstat_clear_backend_activity_snapshot();
923 :
924 : /* Reset this flag, as it may be possible that a cleanup was forced. */
925 765290 : force_stats_snapshot_clear = false;
926 765290 : }
927 :
928 : void *
929 360304 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
930 : {
931 : PgStat_HashKey key;
932 : PgStat_EntryRef *entry_ref;
933 : void *stats_data;
934 360304 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
935 :
936 : /* should be called from backends */
937 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
938 : Assert(!kind_info->fixed_amount);
939 :
940 360304 : pgstat_prep_snapshot();
941 :
942 360304 : key.kind = kind;
943 360304 : key.dboid = dboid;
944 360304 : key.objid = objid;
945 :
946 : /* if we need to build a full snapshot, do so */
947 360304 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
948 460 : pgstat_build_snapshot();
949 :
950 : /* if caching is desired, look up in cache */
951 360304 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
952 : {
953 9210 : PgStat_SnapshotEntry *entry = NULL;
954 :
955 9210 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
956 :
957 9210 : if (entry)
958 608 : return entry->data;
959 :
960 : /*
961 : * If we built a full snapshot and the key is not in
962 : * pgStatLocal.snapshot.stats, there are no matching stats.
963 : */
964 8602 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
965 28 : return NULL;
966 : }
967 :
968 359668 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
969 :
970 359668 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
971 :
972 359668 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
973 : {
974 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
975 8354 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
976 : {
977 1766 : PgStat_SnapshotEntry *entry = NULL;
978 : bool found;
979 :
980 1766 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
981 : Assert(!found);
982 1766 : entry->data = NULL;
983 : }
984 8354 : return NULL;
985 : }
986 :
987 : /*
988 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
989 : * otherwise we could quickly end up with a fair bit of memory used due to
990 : * repeated accesses.
991 : */
992 351314 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
993 344506 : stats_data = palloc(kind_info->shared_data_len);
994 : else
995 6808 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
996 6808 : kind_info->shared_data_len);
997 :
998 351314 : pgstat_lock_entry_shared(entry_ref, false);
999 702628 : memcpy(stats_data,
1000 351314 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1001 351314 : kind_info->shared_data_len);
1002 351314 : pgstat_unlock_entry(entry_ref);
1003 :
1004 351314 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1005 : {
1006 6808 : PgStat_SnapshotEntry *entry = NULL;
1007 : bool found;
1008 :
1009 6808 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1010 6808 : entry->data = stats_data;
1011 : }
1012 :
1013 351314 : return stats_data;
1014 : }
1015 :
1016 : /*
1017 : * If a stats snapshot has been taken, return the timestamp at which that was
1018 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1019 : * false.
1020 : */
1021 : TimestampTz
1022 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1023 : {
1024 60 : if (force_stats_snapshot_clear)
1025 18 : pgstat_clear_snapshot();
1026 :
1027 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1028 : {
1029 24 : *have_snapshot = true;
1030 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1031 : }
1032 :
1033 36 : *have_snapshot = false;
1034 :
1035 36 : return 0;
1036 : }
1037 :
1038 : bool
1039 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1040 : {
1041 : /* fixed-numbered stats always exist */
1042 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1043 12 : return true;
1044 :
1045 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1046 : }
1047 :
1048 : /*
1049 : * Ensure snapshot for fixed-numbered 'kind' exists.
1050 : *
1051 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1052 : * massaging the data into the desired format.
1053 : */
1054 : void
1055 424 : pgstat_snapshot_fixed(PgStat_Kind kind)
1056 : {
1057 : Assert(pgstat_is_kind_valid(kind));
1058 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1059 :
1060 424 : if (force_stats_snapshot_clear)
1061 0 : pgstat_clear_snapshot();
1062 :
1063 424 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1064 24 : pgstat_build_snapshot();
1065 : else
1066 400 : pgstat_build_snapshot_fixed(kind);
1067 :
1068 424 : if (pgstat_is_kind_builtin(kind))
1069 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1070 0 : else if (pgstat_is_kind_custom(kind))
1071 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1072 424 : }
1073 :
1074 : static void
1075 33436 : pgstat_init_snapshot_fixed(void)
1076 : {
1077 : /*
1078 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1079 : * stats kinds.
1080 : */
1081 4346680 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1082 : {
1083 4313244 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1084 :
1085 4313244 : if (!kind_info || !kind_info->fixed_amount)
1086 4313244 : continue;
1087 :
1088 0 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1089 0 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1090 : }
1091 33436 : }
1092 :
1093 : static void
1094 360356 : pgstat_prep_snapshot(void)
1095 : {
1096 360356 : if (force_stats_snapshot_clear)
1097 18 : pgstat_clear_snapshot();
1098 :
1099 360356 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1100 9262 : pgStatLocal.snapshot.stats != NULL)
1101 359448 : return;
1102 :
1103 908 : if (!pgStatLocal.snapshot.context)
1104 908 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1105 : "PgStat Snapshot",
1106 : ALLOCSET_SMALL_SIZES);
1107 :
1108 908 : pgStatLocal.snapshot.stats =
1109 908 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1110 : PGSTAT_SNAPSHOT_HASH_SIZE,
1111 : NULL);
1112 : }
1113 :
1114 : static void
1115 484 : pgstat_build_snapshot(void)
1116 : {
1117 : dshash_seq_status hstat;
1118 : PgStatShared_HashEntry *p;
1119 :
1120 : /* should only be called when we need a snapshot */
1121 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1122 :
1123 : /* snapshot already built */
1124 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1125 432 : return;
1126 :
1127 52 : pgstat_prep_snapshot();
1128 :
1129 : Assert(pgStatLocal.snapshot.stats->members == 0);
1130 :
1131 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1132 :
1133 : /*
1134 : * Snapshot all variable stats.
1135 : */
1136 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1137 52076 : while ((p = dshash_seq_next(&hstat)) != NULL)
1138 : {
1139 52024 : PgStat_Kind kind = p->key.kind;
1140 52024 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1141 : bool found;
1142 : PgStat_SnapshotEntry *entry;
1143 : PgStatShared_Common *stats_data;
1144 :
1145 : /*
1146 : * Check if the stats object should be included in the snapshot.
1147 : * Unless the stats kind can be accessed from all databases (e.g.,
1148 : * database stats themselves), we only include stats for the current
1149 : * database or objects not associated with a database (e.g. shared
1150 : * relations).
1151 : */
1152 52024 : if (p->key.dboid != MyDatabaseId &&
1153 15420 : p->key.dboid != InvalidOid &&
1154 12872 : !kind_info->accessed_across_databases)
1155 12972 : continue;
1156 :
1157 39256 : if (p->dropped)
1158 204 : continue;
1159 :
1160 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1161 :
1162 39052 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1163 : Assert(stats_data);
1164 :
1165 39052 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1166 : Assert(!found);
1167 :
1168 78104 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1169 39052 : kind_info->shared_size);
1170 :
1171 : /*
1172 : * Acquire the LWLock directly instead of using
1173 : * pg_stat_lock_entry_shared() which requires a reference.
1174 : */
1175 39052 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1176 78104 : memcpy(entry->data,
1177 39052 : pgstat_get_entry_data(kind, stats_data),
1178 39052 : kind_info->shared_size);
1179 39052 : LWLockRelease(&stats_data->lock);
1180 : }
1181 52 : dshash_seq_term(&hstat);
1182 :
1183 : /*
1184 : * Build snapshot of all fixed-numbered stats.
1185 : */
1186 13364 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1187 : {
1188 13312 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1189 :
1190 13312 : if (!kind_info)
1191 12740 : continue;
1192 572 : if (!kind_info->fixed_amount)
1193 : {
1194 : Assert(kind_info->snapshot_cb == NULL);
1195 260 : continue;
1196 : }
1197 :
1198 312 : pgstat_build_snapshot_fixed(kind);
1199 : }
1200 :
1201 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1202 : }
1203 :
1204 : static void
1205 6952 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1206 : {
1207 6952 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1208 : int idx;
1209 : bool *valid;
1210 :
1211 : /* Position in fixed_valid or custom_valid */
1212 6952 : if (pgstat_is_kind_builtin(kind))
1213 : {
1214 6952 : idx = kind;
1215 6952 : valid = pgStatLocal.snapshot.fixed_valid;
1216 : }
1217 : else
1218 : {
1219 0 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1220 0 : valid = pgStatLocal.snapshot.custom_valid;
1221 : }
1222 :
1223 : Assert(kind_info->fixed_amount);
1224 : Assert(kind_info->snapshot_cb != NULL);
1225 :
1226 6952 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1227 : {
1228 : /* rebuild every time */
1229 6270 : valid[idx] = false;
1230 : }
1231 682 : else if (valid[idx])
1232 : {
1233 : /* in snapshot mode we shouldn't get called again */
1234 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1235 12 : return;
1236 : }
1237 :
1238 : Assert(!valid[idx]);
1239 :
1240 6940 : kind_info->snapshot_cb();
1241 :
1242 : Assert(!valid[idx]);
1243 6940 : valid[idx] = true;
1244 : }
1245 :
1246 :
1247 : /* ------------------------------------------------------------
1248 : * Backend-local pending stats infrastructure
1249 : * ------------------------------------------------------------
1250 : */
1251 :
1252 : /*
1253 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1254 : * stats if not already done.
1255 : *
1256 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1257 : * created, false otherwise.
1258 : */
1259 : PgStat_EntryRef *
1260 3146098 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1261 : {
1262 : PgStat_EntryRef *entry_ref;
1263 :
1264 : /* need to be able to flush out */
1265 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1266 :
1267 3146098 : if (unlikely(!pgStatPendingContext))
1268 : {
1269 29064 : pgStatPendingContext =
1270 29064 : AllocSetContextCreate(TopMemoryContext,
1271 : "PgStat Pending",
1272 : ALLOCSET_SMALL_SIZES);
1273 : }
1274 :
1275 3146098 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1276 : true, created_entry);
1277 :
1278 3146098 : if (entry_ref->pending == NULL)
1279 : {
1280 1608718 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1281 :
1282 : Assert(entrysize != (size_t) -1);
1283 :
1284 1608718 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1285 1608718 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1286 : }
1287 :
1288 3146098 : return entry_ref;
1289 : }
1290 :
1291 : /*
1292 : * Return an existing stats entry, or NULL.
1293 : *
1294 : * This should only be used for helper function for pgstatfuncs.c - outside of
1295 : * that it shouldn't be needed.
1296 : */
1297 : PgStat_EntryRef *
1298 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1299 : {
1300 : PgStat_EntryRef *entry_ref;
1301 :
1302 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1303 :
1304 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1305 30 : return NULL;
1306 :
1307 54 : return entry_ref;
1308 : }
1309 :
1310 : void
1311 1608718 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1312 : {
1313 1608718 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1314 1608718 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1315 1608718 : void *pending_data = entry_ref->pending;
1316 :
1317 : Assert(pending_data != NULL);
1318 : /* !fixed_amount stats should be handled explicitly */
1319 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1320 :
1321 1608718 : if (kind_info->delete_pending_cb)
1322 1514556 : kind_info->delete_pending_cb(entry_ref);
1323 :
1324 1608718 : pfree(pending_data);
1325 1608718 : entry_ref->pending = NULL;
1326 :
1327 1608718 : dlist_delete(&entry_ref->pending_node);
1328 1608718 : }
1329 :
1330 : /*
1331 : * Flush out pending stats for database objects (databases, relations,
1332 : * functions).
1333 : */
1334 : static bool
1335 56862 : pgstat_flush_pending_entries(bool nowait)
1336 : {
1337 56862 : bool have_pending = false;
1338 56862 : dlist_node *cur = NULL;
1339 :
1340 : /*
1341 : * Need to be a bit careful iterating over the list of pending entries.
1342 : * Processing a pending entry may queue further pending entries to the end
1343 : * of the list that we want to process, so a simple iteration won't do.
1344 : * Further complicating matters is that we want to delete the current
1345 : * entry in each iteration from the list if we flushed successfully.
1346 : *
1347 : * So we just keep track of the next pointer in each loop iteration.
1348 : */
1349 56862 : if (!dlist_is_empty(&pgStatPending))
1350 54058 : cur = dlist_head_node(&pgStatPending);
1351 :
1352 1605198 : while (cur)
1353 : {
1354 1548336 : PgStat_EntryRef *entry_ref =
1355 1548336 : dlist_container(PgStat_EntryRef, pending_node, cur);
1356 1548336 : PgStat_HashKey key = entry_ref->shared_entry->key;
1357 1548336 : PgStat_Kind kind = key.kind;
1358 1548336 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1359 : bool did_flush;
1360 : dlist_node *next;
1361 :
1362 : Assert(!kind_info->fixed_amount);
1363 : Assert(kind_info->flush_pending_cb != NULL);
1364 :
1365 : /* flush the stats, if possible */
1366 1548336 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1367 :
1368 : Assert(did_flush || nowait);
1369 :
1370 : /* determine next entry, before deleting the pending entry */
1371 1548336 : if (dlist_has_next(&pgStatPending, cur))
1372 1494278 : next = dlist_next_node(&pgStatPending, cur);
1373 : else
1374 54058 : next = NULL;
1375 :
1376 : /* if successfully flushed, remove entry */
1377 1548336 : if (did_flush)
1378 1548336 : pgstat_delete_pending_entry(entry_ref);
1379 : else
1380 0 : have_pending = true;
1381 :
1382 1548336 : cur = next;
1383 : }
1384 :
1385 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1386 :
1387 56862 : return have_pending;
1388 : }
1389 :
1390 :
1391 : /* ------------------------------------------------------------
1392 : * Helper / infrastructure functions
1393 : * ------------------------------------------------------------
1394 : */
1395 :
1396 : PgStat_Kind
1397 166 : pgstat_get_kind_from_str(char *kind_str)
1398 : {
1399 476 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1400 : {
1401 470 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1402 160 : return kind;
1403 : }
1404 :
1405 : /* Check the custom set of cumulative stats */
1406 6 : if (pgstat_kind_custom_infos)
1407 : {
1408 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1409 : {
1410 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1411 :
1412 0 : if (pgstat_kind_custom_infos[idx] &&
1413 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1414 0 : return kind;
1415 : }
1416 : }
1417 :
1418 6 : ereport(ERROR,
1419 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1420 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1421 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1422 : }
1423 :
1424 : static inline bool
1425 613706 : pgstat_is_kind_valid(PgStat_Kind kind)
1426 : {
1427 613706 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1428 : }
1429 :
1430 : const PgStat_KindInfo *
1431 39101590 : pgstat_get_kind_info(PgStat_Kind kind)
1432 : {
1433 39101590 : if (pgstat_is_kind_builtin(kind))
1434 8792126 : return &pgstat_kind_builtin_infos[kind];
1435 :
1436 30309464 : if (pgstat_is_kind_custom(kind))
1437 : {
1438 18321096 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1439 :
1440 18321096 : if (pgstat_kind_custom_infos == NULL ||
1441 0 : pgstat_kind_custom_infos[idx] == NULL)
1442 18321096 : return NULL;
1443 0 : return pgstat_kind_custom_infos[idx];
1444 : }
1445 :
1446 11988368 : return NULL;
1447 : }
1448 :
1449 : /*
1450 : * Register a new stats kind.
1451 : *
1452 : * PgStat_Kinds must be globally unique across all extensions. Refer
1453 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1454 : * unique ID for your extension, to avoid conflicts with other extension
1455 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1456 : * needlessly reserving a new ID.
1457 : */
1458 : void
1459 0 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1460 : {
1461 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1462 :
1463 0 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1464 0 : ereport(ERROR,
1465 : (errmsg("custom cumulative statistics name is invalid"),
1466 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1467 :
1468 0 : if (!pgstat_is_kind_custom(kind))
1469 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1470 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1471 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1472 :
1473 0 : if (!process_shared_preload_libraries_in_progress)
1474 0 : ereport(ERROR,
1475 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1476 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1477 :
1478 : /*
1479 : * Check some data for fixed-numbered stats.
1480 : */
1481 0 : if (kind_info->fixed_amount)
1482 : {
1483 0 : if (kind_info->shared_size == 0)
1484 0 : ereport(ERROR,
1485 : (errmsg("custom cumulative statistics property is invalid"),
1486 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1487 : }
1488 :
1489 : /*
1490 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1491 : */
1492 0 : if (pgstat_kind_custom_infos == NULL)
1493 : {
1494 0 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1495 0 : MemoryContextAllocZero(TopMemoryContext,
1496 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1497 : }
1498 :
1499 0 : if (pgstat_kind_custom_infos[idx] != NULL &&
1500 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1501 0 : ereport(ERROR,
1502 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1503 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1504 : pgstat_kind_custom_infos[idx]->name)));
1505 :
1506 : /* check for existing custom stats with the same name */
1507 0 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1508 : {
1509 0 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1510 :
1511 0 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1512 0 : continue;
1513 0 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1514 0 : ereport(ERROR,
1515 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1516 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1517 : }
1518 :
1519 : /* Register it */
1520 0 : pgstat_kind_custom_infos[idx] = kind_info;
1521 0 : ereport(LOG,
1522 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1523 : kind_info->name, kind)));
1524 0 : }
1525 :
1526 : /*
1527 : * Stats should only be reported after pgstat_initialize() and before
1528 : * pgstat_shutdown(). This check is put in a few central places to catch
1529 : * violations of this rule more easily.
1530 : */
1531 : #ifdef USE_ASSERT_CHECKING
1532 : void
1533 : pgstat_assert_is_up(void)
1534 : {
1535 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1536 : }
1537 : #endif
1538 :
1539 :
1540 : /* ------------------------------------------------------------
1541 : * reading and writing of on-disk stats file
1542 : * ------------------------------------------------------------
1543 : */
1544 :
1545 : /* helpers for pgstat_write_statsfile() */
1546 : static void
1547 612110 : write_chunk(FILE *fpout, void *ptr, size_t len)
1548 : {
1549 : int rc;
1550 :
1551 612110 : rc = fwrite(ptr, len, 1, fpout);
1552 :
1553 : /* we'll check for errors with ferror once at the end */
1554 : (void) rc;
1555 612110 : }
1556 :
1557 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1558 :
1559 : /*
1560 : * This function is called in the last process that is accessing the shared
1561 : * stats so locking is not required.
1562 : */
1563 : static void
1564 1040 : pgstat_write_statsfile(XLogRecPtr redo)
1565 : {
1566 : FILE *fpout;
1567 : int32 format_id;
1568 1040 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1569 1040 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1570 : dshash_seq_status hstat;
1571 : PgStatShared_HashEntry *ps;
1572 :
1573 : pgstat_assert_is_up();
1574 :
1575 : /* should be called only by the checkpointer or single user mode */
1576 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1577 :
1578 : /* we're shutting down, so it's ok to just override this */
1579 1040 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1580 :
1581 1040 : elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
1582 : LSN_FORMAT_ARGS(redo));
1583 :
1584 : /*
1585 : * Open the statistics temp file to write out the current values.
1586 : */
1587 1040 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1588 1040 : if (fpout == NULL)
1589 : {
1590 0 : ereport(LOG,
1591 : (errcode_for_file_access(),
1592 : errmsg("could not open temporary statistics file \"%s\": %m",
1593 : tmpfile)));
1594 0 : return;
1595 : }
1596 :
1597 : /*
1598 : * Write the file header --- currently just a format ID.
1599 : */
1600 1040 : format_id = PGSTAT_FILE_FORMAT_ID;
1601 1040 : write_chunk_s(fpout, &format_id);
1602 :
1603 : /* Write the redo LSN, used to cross check the file read */
1604 1040 : write_chunk_s(fpout, &redo);
1605 :
1606 : /* Write various stats structs for fixed number of objects */
1607 267280 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1608 : {
1609 : char *ptr;
1610 266240 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1611 :
1612 266240 : if (!info || !info->fixed_amount)
1613 260000 : continue;
1614 :
1615 6240 : if (pgstat_is_kind_builtin(kind))
1616 : Assert(info->snapshot_ctl_off != 0);
1617 :
1618 6240 : pgstat_build_snapshot_fixed(kind);
1619 6240 : if (pgstat_is_kind_builtin(kind))
1620 6240 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1621 : else
1622 0 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1623 :
1624 6240 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1625 6240 : write_chunk_s(fpout, &kind);
1626 6240 : write_chunk(fpout, ptr, info->shared_data_len);
1627 : }
1628 :
1629 : /*
1630 : * Walk through the stats entries
1631 : */
1632 1040 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1633 299740 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1634 : {
1635 : PgStatShared_Common *shstats;
1636 298700 : const PgStat_KindInfo *kind_info = NULL;
1637 :
1638 298700 : CHECK_FOR_INTERRUPTS();
1639 :
1640 : /* we may have some "dropped" entries not yet removed, skip them */
1641 : Assert(!ps->dropped);
1642 298700 : if (ps->dropped)
1643 0 : continue;
1644 :
1645 : /*
1646 : * This discards data related to custom stats kinds that are unknown
1647 : * to this process.
1648 : */
1649 298700 : if (!pgstat_is_kind_valid(ps->key.kind))
1650 : {
1651 0 : elog(WARNING, "found unknown stats entry %u/%u/%llu",
1652 : ps->key.kind, ps->key.dboid,
1653 : (unsigned long long) ps->key.objid);
1654 0 : continue;
1655 : }
1656 :
1657 298700 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1658 :
1659 298700 : kind_info = pgstat_get_kind_info(ps->key.kind);
1660 :
1661 : /* if not dropped the valid-entry refcount should exist */
1662 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1663 :
1664 298700 : if (!kind_info->to_serialized_name)
1665 : {
1666 : /* normal stats entry, identified by PgStat_HashKey */
1667 298550 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1668 298550 : write_chunk_s(fpout, &ps->key);
1669 : }
1670 : else
1671 : {
1672 : /* stats entry identified by name on disk (e.g. slots) */
1673 : NameData name;
1674 :
1675 150 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1676 :
1677 150 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1678 150 : write_chunk_s(fpout, &ps->key.kind);
1679 150 : write_chunk_s(fpout, &name);
1680 : }
1681 :
1682 : /* Write except the header part of the entry */
1683 298700 : write_chunk(fpout,
1684 : pgstat_get_entry_data(ps->key.kind, shstats),
1685 : pgstat_get_entry_len(ps->key.kind));
1686 : }
1687 1040 : dshash_seq_term(&hstat);
1688 :
1689 : /*
1690 : * No more output to be done. Close the temp file and replace the old
1691 : * pgstat.stat with it. The ferror() check replaces testing for error
1692 : * after each individual fputc or fwrite (in write_chunk()) above.
1693 : */
1694 1040 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1695 :
1696 1040 : if (ferror(fpout))
1697 : {
1698 0 : ereport(LOG,
1699 : (errcode_for_file_access(),
1700 : errmsg("could not write temporary statistics file \"%s\": %m",
1701 : tmpfile)));
1702 0 : FreeFile(fpout);
1703 0 : unlink(tmpfile);
1704 : }
1705 1040 : else if (FreeFile(fpout) < 0)
1706 : {
1707 0 : ereport(LOG,
1708 : (errcode_for_file_access(),
1709 : errmsg("could not close temporary statistics file \"%s\": %m",
1710 : tmpfile)));
1711 0 : unlink(tmpfile);
1712 : }
1713 1040 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1714 : {
1715 : /* durable_rename already emitted log message */
1716 0 : unlink(tmpfile);
1717 : }
1718 : }
1719 :
1720 : /* helpers for pgstat_read_statsfile() */
1721 : static bool
1722 632416 : read_chunk(FILE *fpin, void *ptr, size_t len)
1723 : {
1724 632416 : return fread(ptr, 1, len, fpin) == len;
1725 : }
1726 :
1727 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1728 :
1729 : /*
1730 : * Reads in existing statistics file into memory.
1731 : *
1732 : * This function is called in the only process that is accessing the shared
1733 : * stats so locking is not required.
1734 : */
1735 : static void
1736 1242 : pgstat_read_statsfile(XLogRecPtr redo)
1737 : {
1738 : FILE *fpin;
1739 : int32 format_id;
1740 : bool found;
1741 1242 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1742 1242 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1743 : XLogRecPtr file_redo;
1744 :
1745 : /* shouldn't be called from postmaster */
1746 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1747 :
1748 1242 : elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
1749 : LSN_FORMAT_ARGS(redo));
1750 :
1751 : /*
1752 : * Try to open the stats file. If it doesn't exist, the backends simply
1753 : * returns zero for anything and statistics simply starts from scratch
1754 : * with empty counters.
1755 : *
1756 : * ENOENT is a possibility if stats collection was previously disabled or
1757 : * has not yet written the stats file for the first time. Any other
1758 : * failure condition is suspicious.
1759 : */
1760 1242 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1761 : {
1762 84 : if (errno != ENOENT)
1763 0 : ereport(LOG,
1764 : (errcode_for_file_access(),
1765 : errmsg("could not open statistics file \"%s\": %m",
1766 : statfile)));
1767 84 : pgstat_reset_after_failure();
1768 84 : return;
1769 : }
1770 :
1771 : /*
1772 : * Verify it's of the expected format.
1773 : */
1774 1158 : if (!read_chunk_s(fpin, &format_id))
1775 : {
1776 0 : elog(WARNING, "could not read format ID");
1777 0 : goto error;
1778 : }
1779 :
1780 1158 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1781 : {
1782 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1783 : format_id, PGSTAT_FILE_FORMAT_ID);
1784 2 : goto error;
1785 : }
1786 :
1787 : /*
1788 : * Read the redo LSN stored in the file.
1789 : */
1790 1156 : if (!read_chunk_s(fpin, &file_redo))
1791 : {
1792 0 : elog(WARNING, "could not read redo LSN");
1793 0 : goto error;
1794 : }
1795 :
1796 1156 : if (file_redo != redo)
1797 : {
1798 20 : elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
1799 : LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
1800 20 : goto error;
1801 : }
1802 :
1803 : /*
1804 : * We found an existing statistics file. Read it and put all the stats
1805 : * data into place.
1806 : */
1807 : for (;;)
1808 315006 : {
1809 316142 : int t = fgetc(fpin);
1810 :
1811 316142 : switch (t)
1812 : {
1813 6816 : case PGSTAT_FILE_ENTRY_FIXED:
1814 : {
1815 : PgStat_Kind kind;
1816 : const PgStat_KindInfo *info;
1817 : char *ptr;
1818 :
1819 : /* entry for fixed-numbered stats */
1820 6816 : if (!read_chunk_s(fpin, &kind))
1821 : {
1822 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1823 0 : goto error;
1824 : }
1825 :
1826 6816 : if (!pgstat_is_kind_valid(kind))
1827 : {
1828 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1829 : kind, t);
1830 0 : goto error;
1831 : }
1832 :
1833 6816 : info = pgstat_get_kind_info(kind);
1834 6816 : if (!info)
1835 : {
1836 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1837 : kind, t);
1838 0 : goto error;
1839 : }
1840 :
1841 6816 : if (!info->fixed_amount)
1842 : {
1843 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1844 : kind, t);
1845 0 : goto error;
1846 : }
1847 :
1848 : /* Load back stats into shared memory */
1849 6816 : if (pgstat_is_kind_builtin(kind))
1850 6816 : ptr = ((char *) shmem) + info->shared_ctl_off +
1851 6816 : info->shared_data_off;
1852 : else
1853 : {
1854 0 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1855 :
1856 0 : ptr = ((char *) shmem->custom_data[idx]) +
1857 0 : info->shared_data_off;
1858 : }
1859 :
1860 6816 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1861 : {
1862 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1863 : kind, t, info->shared_data_len);
1864 0 : goto error;
1865 : }
1866 :
1867 6816 : break;
1868 : }
1869 308190 : case PGSTAT_FILE_ENTRY_HASH:
1870 : case PGSTAT_FILE_ENTRY_NAME:
1871 : {
1872 : PgStat_HashKey key;
1873 : PgStatShared_HashEntry *p;
1874 : PgStatShared_Common *header;
1875 :
1876 308190 : CHECK_FOR_INTERRUPTS();
1877 :
1878 308190 : if (t == PGSTAT_FILE_ENTRY_HASH)
1879 : {
1880 : /* normal stats entry, identified by PgStat_HashKey */
1881 308098 : if (!read_chunk_s(fpin, &key))
1882 : {
1883 0 : elog(WARNING, "could not read key for entry of type %c", t);
1884 0 : goto error;
1885 : }
1886 :
1887 308098 : if (!pgstat_is_kind_valid(key.kind))
1888 : {
1889 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%llu of type %c",
1890 : key.kind, key.dboid,
1891 : (unsigned long long) key.objid, t);
1892 0 : goto error;
1893 : }
1894 : }
1895 : else
1896 : {
1897 : /* stats entry identified by name on disk (e.g. slots) */
1898 92 : const PgStat_KindInfo *kind_info = NULL;
1899 : PgStat_Kind kind;
1900 : NameData name;
1901 :
1902 92 : if (!read_chunk_s(fpin, &kind))
1903 : {
1904 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1905 0 : goto error;
1906 : }
1907 92 : if (!read_chunk_s(fpin, &name))
1908 : {
1909 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1910 : kind, t);
1911 0 : goto error;
1912 : }
1913 92 : if (!pgstat_is_kind_valid(kind))
1914 : {
1915 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1916 : kind, t);
1917 0 : goto error;
1918 : }
1919 :
1920 92 : kind_info = pgstat_get_kind_info(kind);
1921 92 : if (!kind_info)
1922 : {
1923 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1924 : kind, t);
1925 0 : goto error;
1926 : }
1927 :
1928 92 : if (!kind_info->from_serialized_name)
1929 : {
1930 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1931 : kind, t);
1932 0 : goto error;
1933 : }
1934 :
1935 92 : if (!kind_info->from_serialized_name(&name, &key))
1936 : {
1937 : /* skip over data for entry we don't care about */
1938 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1939 : {
1940 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1941 : NameStr(name), kind, t);
1942 0 : goto error;
1943 : }
1944 :
1945 2 : continue;
1946 : }
1947 :
1948 : Assert(key.kind == kind);
1949 : }
1950 :
1951 : /*
1952 : * This intentionally doesn't use pgstat_get_entry_ref() -
1953 : * putting all stats into checkpointer's
1954 : * pgStatEntryRefHash would be wasted effort and memory.
1955 : */
1956 308188 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1957 :
1958 : /* don't allow duplicate entries */
1959 308188 : if (found)
1960 : {
1961 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1962 0 : elog(WARNING, "found duplicate stats entry %u/%u/%llu of type %c",
1963 : key.kind, key.dboid,
1964 : (unsigned long long) key.objid, t);
1965 0 : goto error;
1966 : }
1967 :
1968 308188 : header = pgstat_init_entry(key.kind, p);
1969 308188 : dshash_release_lock(pgStatLocal.shared_hash, p);
1970 :
1971 308188 : if (!read_chunk(fpin,
1972 : pgstat_get_entry_data(key.kind, header),
1973 : pgstat_get_entry_len(key.kind)))
1974 : {
1975 0 : elog(WARNING, "could not read data for entry %u/%u/%llu of type %c",
1976 : key.kind, key.dboid,
1977 : (unsigned long long) key.objid, t);
1978 0 : goto error;
1979 : }
1980 :
1981 308188 : break;
1982 : }
1983 1136 : case PGSTAT_FILE_ENTRY_END:
1984 :
1985 : /*
1986 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
1987 : * file
1988 : */
1989 1136 : if (fgetc(fpin) != EOF)
1990 : {
1991 2 : elog(WARNING, "could not read end-of-file");
1992 2 : goto error;
1993 : }
1994 :
1995 1134 : goto done;
1996 :
1997 0 : default:
1998 0 : elog(WARNING, "could not read entry of type %c", t);
1999 0 : goto error;
2000 : }
2001 : }
2002 :
2003 1158 : done:
2004 1158 : FreeFile(fpin);
2005 :
2006 1158 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2007 1158 : unlink(statfile);
2008 :
2009 1158 : return;
2010 :
2011 24 : error:
2012 24 : ereport(LOG,
2013 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2014 :
2015 24 : pgstat_reset_after_failure();
2016 :
2017 24 : goto done;
2018 : }
2019 :
2020 : /*
2021 : * Helper to reset / drop stats after a crash or after restoring stats from
2022 : * disk failed, potentially after already loading parts.
2023 : */
2024 : static void
2025 442 : pgstat_reset_after_failure(void)
2026 : {
2027 442 : TimestampTz ts = GetCurrentTimestamp();
2028 :
2029 : /* reset fixed-numbered stats */
2030 113594 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2031 : {
2032 113152 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2033 :
2034 113152 : if (!kind_info || !kind_info->fixed_amount)
2035 110500 : continue;
2036 :
2037 2652 : kind_info->reset_all_cb(ts);
2038 : }
2039 :
2040 : /* and drop variable-numbered ones */
2041 442 : pgstat_drop_all_entries();
2042 442 : }
2043 :
2044 : /*
2045 : * GUC assign_hook for stats_fetch_consistency.
2046 : */
2047 : void
2048 5142 : assign_stats_fetch_consistency(int newval, void *extra)
2049 : {
2050 : /*
2051 : * Changing this value in a transaction may cause snapshot state
2052 : * inconsistencies, so force a clear of the current snapshot on the next
2053 : * snapshot build attempt.
2054 : */
2055 5142 : if (pgstat_fetch_consistency != newval)
2056 3038 : force_stats_snapshot_clear = true;
2057 5142 : }
|