Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "lib/dshash.h"
108 : #include "pgstat.h"
109 : #include "storage/fd.h"
110 : #include "storage/ipc.h"
111 : #include "storage/lwlock.h"
112 : #include "utils/guc_hooks.h"
113 : #include "utils/memutils.h"
114 : #include "utils/pgstat_internal.h"
115 : #include "utils/timestamp.h"
116 :
117 :
118 : /* ----------
119 : * Timer definitions.
120 : *
121 : * In milliseconds.
122 : * ----------
123 : */
124 :
125 : /* minimum interval non-forced stats flushes.*/
126 : #define PGSTAT_MIN_INTERVAL 1000
127 : /* how long until to block flushing pending stats updates */
128 : #define PGSTAT_MAX_INTERVAL 60000
129 : /* when to call pgstat_report_stat() again, even when idle */
130 : #define PGSTAT_IDLE_INTERVAL 10000
131 :
132 : /* ----------
133 : * Initial size hints for the hash tables used in statistics.
134 : * ----------
135 : */
136 :
137 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 :
139 : /* ---------
140 : * Identifiers in stats file.
141 : * ---------
142 : */
143 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
146 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
147 : * PgStat_HashKey */
148 :
149 : /* hash table for statistics snapshots entry */
150 : typedef struct PgStat_SnapshotEntry
151 : {
152 : PgStat_HashKey key;
153 : char status; /* for simplehash use */
154 : void *data; /* the stats data itself */
155 : } PgStat_SnapshotEntry;
156 :
157 :
158 : /* ----------
159 : * Backend-local Hash Table Definitions
160 : * ----------
161 : */
162 :
163 : /* for stats snapshot entries */
164 : #define SH_PREFIX pgstat_snapshot
165 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166 : #define SH_KEY_TYPE PgStat_HashKey
167 : #define SH_KEY key
168 : #define SH_HASH_KEY(tb, key) \
169 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170 : #define SH_EQUAL(tb, a, b) \
171 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172 : #define SH_SCOPE static inline
173 : #define SH_DEFINE
174 : #define SH_DECLARE
175 : #include "lib/simplehash.h"
176 :
177 :
178 : /* ----------
179 : * Local function forward declarations
180 : * ----------
181 : */
182 :
183 : static void pgstat_write_statsfile(void);
184 : static void pgstat_read_statsfile(void);
185 :
186 : static void pgstat_init_snapshot_fixed(void);
187 :
188 : static void pgstat_reset_after_failure(void);
189 :
190 : static bool pgstat_flush_pending_entries(bool nowait);
191 :
192 : static void pgstat_prep_snapshot(void);
193 : static void pgstat_build_snapshot(void);
194 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 :
196 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197 :
198 :
199 : /* ----------
200 : * GUC parameters
201 : * ----------
202 : */
203 :
204 : bool pgstat_track_counts = false;
205 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206 :
207 :
208 : /* ----------
209 : * state shared with pgstat_*.c
210 : * ----------
211 : */
212 :
213 : PgStat_LocalState pgStatLocal;
214 :
215 : /*
216 : * Track pending reports for fixed-numbered stats, used by
217 : * pgstat_report_stat().
218 : */
219 : bool pgstat_report_fixed = false;
220 :
221 : /* ----------
222 : * Local data
223 : *
224 : * NB: There should be only variables related to stats infrastructure here,
225 : * not for specific kinds of stats.
226 : * ----------
227 : */
228 :
229 : /*
230 : * Memory contexts containing the pgStatEntryRefHash table, the
231 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
232 : * easier to track / attribute memory usage.
233 : */
234 :
235 : static MemoryContext pgStatPendingContext = NULL;
236 :
237 : /*
238 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
239 : *
240 : * Newly pending entries should only ever be added to the end of the list,
241 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
242 : */
243 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
244 :
245 :
246 : /*
247 : * Force the next stats flush to happen regardless of
248 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
249 : */
250 : static bool pgStatForceNextFlush = false;
251 :
252 : /*
253 : * Force-clear existing snapshot before next use when stats_fetch_consistency
254 : * is changed.
255 : */
256 : static bool force_stats_snapshot_clear = false;
257 :
258 :
259 : /*
260 : * For assertions that check pgstat is not used before initialization / after
261 : * shutdown.
262 : */
263 : #ifdef USE_ASSERT_CHECKING
264 : static bool pgstat_is_initialized = false;
265 : static bool pgstat_is_shutdown = false;
266 : #endif
267 :
268 :
269 : /*
270 : * The different kinds of built-in statistics.
271 : *
272 : * If reasonably possible, handling specific to one kind of stats should go
273 : * through this abstraction, rather than making more of pgstat.c aware.
274 : *
275 : * See comments for struct PgStat_KindInfo for details about the individual
276 : * fields.
277 : *
278 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
279 : * seem to be a great way of doing that, given the split across multiple
280 : * files.
281 : */
282 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
283 :
284 : /* stats kinds for variable-numbered objects */
285 :
286 : [PGSTAT_KIND_DATABASE] = {
287 : .name = "database",
288 :
289 : .fixed_amount = false,
290 : .write_to_file = true,
291 : /* so pg_stat_database entries can be seen in all databases */
292 : .accessed_across_databases = true,
293 :
294 : .shared_size = sizeof(PgStatShared_Database),
295 : .shared_data_off = offsetof(PgStatShared_Database, stats),
296 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
297 : .pending_size = sizeof(PgStat_StatDBEntry),
298 :
299 : .flush_pending_cb = pgstat_database_flush_cb,
300 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
301 : },
302 :
303 : [PGSTAT_KIND_RELATION] = {
304 : .name = "relation",
305 :
306 : .fixed_amount = false,
307 : .write_to_file = true,
308 :
309 : .shared_size = sizeof(PgStatShared_Relation),
310 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
311 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
312 : .pending_size = sizeof(PgStat_TableStatus),
313 :
314 : .flush_pending_cb = pgstat_relation_flush_cb,
315 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
316 : .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
317 : },
318 :
319 : [PGSTAT_KIND_FUNCTION] = {
320 : .name = "function",
321 :
322 : .fixed_amount = false,
323 : .write_to_file = true,
324 :
325 : .shared_size = sizeof(PgStatShared_Function),
326 : .shared_data_off = offsetof(PgStatShared_Function, stats),
327 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
328 : .pending_size = sizeof(PgStat_FunctionCounts),
329 :
330 : .flush_pending_cb = pgstat_function_flush_cb,
331 : .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
332 : },
333 :
334 : [PGSTAT_KIND_REPLSLOT] = {
335 : .name = "replslot",
336 :
337 : .fixed_amount = false,
338 : .write_to_file = true,
339 :
340 : .accessed_across_databases = true,
341 :
342 : .shared_size = sizeof(PgStatShared_ReplSlot),
343 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
344 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
345 :
346 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
347 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
348 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
349 : },
350 :
351 : [PGSTAT_KIND_SUBSCRIPTION] = {
352 : .name = "subscription",
353 :
354 : .fixed_amount = false,
355 : .write_to_file = true,
356 : /* so pg_stat_subscription_stats entries can be seen in all databases */
357 : .accessed_across_databases = true,
358 :
359 : .shared_size = sizeof(PgStatShared_Subscription),
360 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
361 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
362 : .pending_size = sizeof(PgStat_BackendSubEntry),
363 :
364 : .flush_pending_cb = pgstat_subscription_flush_cb,
365 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
366 : },
367 :
368 : [PGSTAT_KIND_BACKEND] = {
369 : .name = "backend",
370 :
371 : .fixed_amount = false,
372 : .write_to_file = false,
373 :
374 : .accessed_across_databases = true,
375 :
376 : .shared_size = sizeof(PgStatShared_Backend),
377 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
378 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
379 :
380 : .flush_static_cb = pgstat_backend_flush_cb,
381 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
382 : },
383 :
384 : /* stats for fixed-numbered (mostly 1) objects */
385 :
386 : [PGSTAT_KIND_ARCHIVER] = {
387 : .name = "archiver",
388 :
389 : .fixed_amount = true,
390 : .write_to_file = true,
391 :
392 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
393 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
394 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
395 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
396 :
397 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
398 : .reset_all_cb = pgstat_archiver_reset_all_cb,
399 : .snapshot_cb = pgstat_archiver_snapshot_cb,
400 : },
401 :
402 : [PGSTAT_KIND_BGWRITER] = {
403 : .name = "bgwriter",
404 :
405 : .fixed_amount = true,
406 : .write_to_file = true,
407 :
408 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
409 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
410 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
411 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
412 :
413 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
414 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
415 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
416 : },
417 :
418 : [PGSTAT_KIND_CHECKPOINTER] = {
419 : .name = "checkpointer",
420 :
421 : .fixed_amount = true,
422 : .write_to_file = true,
423 :
424 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
425 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
426 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
427 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
428 :
429 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
430 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
431 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
432 : },
433 :
434 : [PGSTAT_KIND_IO] = {
435 : .name = "io",
436 :
437 : .fixed_amount = true,
438 : .write_to_file = true,
439 :
440 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
441 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
442 : .shared_data_off = offsetof(PgStatShared_IO, stats),
443 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
444 :
445 : .flush_static_cb = pgstat_io_flush_cb,
446 : .init_shmem_cb = pgstat_io_init_shmem_cb,
447 : .reset_all_cb = pgstat_io_reset_all_cb,
448 : .snapshot_cb = pgstat_io_snapshot_cb,
449 : },
450 :
451 : [PGSTAT_KIND_SLRU] = {
452 : .name = "slru",
453 :
454 : .fixed_amount = true,
455 : .write_to_file = true,
456 :
457 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
458 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
459 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
460 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
461 :
462 : .flush_static_cb = pgstat_slru_flush_cb,
463 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
464 : .reset_all_cb = pgstat_slru_reset_all_cb,
465 : .snapshot_cb = pgstat_slru_snapshot_cb,
466 : },
467 :
468 : [PGSTAT_KIND_WAL] = {
469 : .name = "wal",
470 :
471 : .fixed_amount = true,
472 : .write_to_file = true,
473 :
474 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
475 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
476 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
477 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
478 :
479 : .init_backend_cb = pgstat_wal_init_backend_cb,
480 : .flush_static_cb = pgstat_wal_flush_cb,
481 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
482 : .reset_all_cb = pgstat_wal_reset_all_cb,
483 : .snapshot_cb = pgstat_wal_snapshot_cb,
484 : },
485 : };
486 :
487 : /*
488 : * Information about custom statistics kinds.
489 : *
490 : * These are saved in a different array than the built-in kinds to save
491 : * in clarity with the initializations.
492 : *
493 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
494 : */
495 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
496 :
497 : /* ------------------------------------------------------------
498 : * Functions managing the state of the stats system for all backends.
499 : * ------------------------------------------------------------
500 : */
501 :
502 : /*
503 : * Read on-disk stats into memory at server start.
504 : *
505 : * Should only be called by the startup process or in single user mode.
506 : */
507 : void
508 1602 : pgstat_restore_stats(void)
509 : {
510 1602 : pgstat_read_statsfile();
511 1602 : }
512 :
513 : /*
514 : * Remove the stats file. This is currently used only if WAL recovery is
515 : * needed after a crash.
516 : *
517 : * Should only be called by the startup process or in single user mode.
518 : */
519 : void
520 370 : pgstat_discard_stats(void)
521 : {
522 : int ret;
523 :
524 : /* NB: this needs to be done even in single user mode */
525 :
526 : /* First, cleanup the main pgstats file */
527 370 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
528 370 : if (ret != 0)
529 : {
530 368 : if (errno == ENOENT)
531 368 : elog(DEBUG2,
532 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
533 : PGSTAT_STAT_PERMANENT_FILENAME);
534 : else
535 0 : ereport(LOG,
536 : (errcode_for_file_access(),
537 : errmsg("could not unlink permanent statistics file \"%s\": %m",
538 : PGSTAT_STAT_PERMANENT_FILENAME)));
539 : }
540 : else
541 : {
542 2 : ereport(DEBUG2,
543 : (errcode_for_file_access(),
544 : errmsg_internal("unlinked permanent statistics file \"%s\"",
545 : PGSTAT_STAT_PERMANENT_FILENAME)));
546 : }
547 :
548 : /* Finish callbacks, if required */
549 12210 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
550 : {
551 11840 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
552 :
553 11840 : if (kind_info && kind_info->finish)
554 2 : kind_info->finish(STATS_DISCARD);
555 : }
556 :
557 : /*
558 : * Reset stats contents. This will set reset timestamps of fixed-numbered
559 : * stats to the current time (no variable stats exist).
560 : */
561 370 : pgstat_reset_after_failure();
562 370 : }
563 :
564 : /*
565 : * pgstat_before_server_shutdown() needs to be called by exactly one process
566 : * during regular server shutdowns. Otherwise all stats will be lost.
567 : *
568 : * We currently only write out stats for proc_exit(0). We might want to change
569 : * that at some point... But right now pgstat_discard_stats() would be called
570 : * during the start after a disorderly shutdown, anyway.
571 : */
572 : void
573 1372 : pgstat_before_server_shutdown(int code, Datum arg)
574 : {
575 : Assert(pgStatLocal.shmem != NULL);
576 : Assert(!pgStatLocal.shmem->is_shutdown);
577 :
578 : /*
579 : * Stats should only be reported after pgstat_initialize() and before
580 : * pgstat_shutdown(). This is a convenient point to catch most violations
581 : * of this rule.
582 : */
583 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
584 :
585 : /* flush out our own pending changes before writing out */
586 1372 : pgstat_report_stat(true);
587 :
588 : /*
589 : * Only write out file during normal shutdown. Don't even signal that
590 : * we've shutdown during irregular shutdowns, because the shutdown
591 : * sequence isn't coordinated to ensure this backend shuts down last.
592 : */
593 1372 : if (code == 0)
594 : {
595 1356 : pgStatLocal.shmem->is_shutdown = true;
596 1356 : pgstat_write_statsfile();
597 : }
598 1372 : }
599 :
600 :
601 : /* ------------------------------------------------------------
602 : * Backend initialization / shutdown functions
603 : * ------------------------------------------------------------
604 : */
605 :
606 : /*
607 : * Shut down a single backend's statistics reporting at process exit.
608 : *
609 : * Flush out any remaining statistics counts. Without this, operations
610 : * triggered during backend exit (such as temp table deletions) won't be
611 : * counted.
612 : */
613 : static void
614 45198 : pgstat_shutdown_hook(int code, Datum arg)
615 : {
616 : Assert(!pgstat_is_shutdown);
617 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
618 :
619 : /*
620 : * If we got as far as discovering our own database ID, we can flush out
621 : * what we did so far. Otherwise, we'd be reporting an invalid database
622 : * ID, so forget it. (This means that accesses to pg_database during
623 : * failed backend starts might never get counted.)
624 : */
625 45198 : if (OidIsValid(MyDatabaseId))
626 33374 : pgstat_report_disconnect(MyDatabaseId);
627 :
628 45198 : pgstat_report_stat(true);
629 :
630 : /* there shouldn't be any pending changes left */
631 : Assert(dlist_is_empty(&pgStatPending));
632 45198 : dlist_init(&pgStatPending);
633 :
634 : /* drop the backend stats entry */
635 45198 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
636 0 : pgstat_request_entry_refs_gc();
637 :
638 45198 : pgstat_detach_shmem();
639 :
640 : #ifdef USE_ASSERT_CHECKING
641 : pgstat_is_shutdown = true;
642 : #endif
643 45198 : }
644 :
645 : /*
646 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
647 : * BaseInit().
648 : *
649 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
650 : */
651 : void
652 45198 : pgstat_initialize(void)
653 : {
654 : Assert(!pgstat_is_initialized);
655 :
656 45198 : pgstat_attach_shmem();
657 :
658 45198 : pgstat_init_snapshot_fixed();
659 :
660 : /* Backend initialization callbacks */
661 1491534 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
662 : {
663 1446336 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
664 :
665 1446336 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
666 1401138 : continue;
667 :
668 45198 : kind_info->init_backend_cb();
669 : }
670 :
671 : /* Set up a process-exit hook to clean up */
672 45198 : before_shmem_exit(pgstat_shutdown_hook, 0);
673 :
674 : #ifdef USE_ASSERT_CHECKING
675 : pgstat_is_initialized = true;
676 : #endif
677 45198 : }
678 :
679 :
680 : /* ------------------------------------------------------------
681 : * Public functions used by backends follow
682 : * ------------------------------------------------------------
683 : */
684 :
685 : /*
686 : * Must be called by processes that performs DML: tcop/postgres.c, logical
687 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
688 : * shared memory.
689 : *
690 : * Unless called with 'force', pending stats updates are flushed happen once
691 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
692 : * block on lock acquisition, except if stats updates have been pending for
693 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
694 : *
695 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
696 : * suggested idle timeout is returned. Currently this is always
697 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
698 : * a timeout after which to call pgstat_report_stat(true), but are not
699 : * required to do so.
700 : *
701 : * Note that this is called only when not within a transaction, so it is fair
702 : * to use transaction stop time as an approximation of current time.
703 : */
704 : long
705 635520 : pgstat_report_stat(bool force)
706 : {
707 : static TimestampTz pending_since = 0;
708 : static TimestampTz last_flush = 0;
709 : bool partial_flush;
710 : TimestampTz now;
711 : bool nowait;
712 :
713 : pgstat_assert_is_up();
714 : Assert(!IsTransactionOrTransactionBlock());
715 :
716 : /* "absorb" the forced flush even if there's nothing to flush */
717 635520 : if (pgStatForceNextFlush)
718 : {
719 574 : force = true;
720 574 : pgStatForceNextFlush = false;
721 : }
722 :
723 : /* Don't expend a clock check if nothing to do */
724 635520 : if (dlist_is_empty(&pgStatPending) &&
725 20310 : !pgstat_report_fixed)
726 : {
727 14866 : return 0;
728 : }
729 :
730 : /*
731 : * There should never be stats to report once stats are shut down. Can't
732 : * assert that before the checks above, as there is an unconditional
733 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
734 : * the process that ran pgstat_before_server_shutdown() will still call.
735 : */
736 : Assert(!pgStatLocal.shmem->is_shutdown);
737 :
738 620654 : if (force)
739 : {
740 : /*
741 : * Stats reports are forced either when it's been too long since stats
742 : * have been reported or in processes that force stats reporting to
743 : * happen at specific points (including shutdown). In the former case
744 : * the transaction stop time might be quite old, in the latter it
745 : * would never get cleared.
746 : */
747 43766 : now = GetCurrentTimestamp();
748 : }
749 : else
750 : {
751 576888 : now = GetCurrentTransactionStopTimestamp();
752 :
753 1097196 : if (pending_since > 0 &&
754 520308 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
755 : {
756 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
757 0 : force = true;
758 : }
759 576888 : else if (last_flush > 0 &&
760 550014 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
761 : {
762 : /* don't flush too frequently */
763 547018 : if (pending_since == 0)
764 29196 : pending_since = now;
765 :
766 547018 : return PGSTAT_IDLE_INTERVAL;
767 : }
768 : }
769 :
770 73636 : pgstat_update_dbstats(now);
771 :
772 : /* don't wait for lock acquisition when !force */
773 73636 : nowait = !force;
774 :
775 73636 : partial_flush = false;
776 :
777 : /* flush of variable-numbered stats tracked in pending entries list */
778 73636 : partial_flush |= pgstat_flush_pending_entries(nowait);
779 :
780 : /* flush of other stats kinds */
781 73636 : if (pgstat_report_fixed)
782 : {
783 2358510 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
784 : {
785 2287040 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
786 :
787 2287040 : if (!kind_info)
788 1429076 : continue;
789 857964 : if (!kind_info->flush_static_cb)
790 572084 : continue;
791 :
792 285880 : partial_flush |= kind_info->flush_static_cb(nowait);
793 : }
794 : }
795 :
796 73636 : last_flush = now;
797 :
798 : /*
799 : * If some of the pending stats could not be flushed due to lock
800 : * contention, let the caller know when to retry.
801 : */
802 73636 : if (partial_flush)
803 : {
804 : /* force should have prevented us from getting here */
805 : Assert(!force);
806 :
807 : /* remember since when stats have been pending */
808 0 : if (pending_since == 0)
809 0 : pending_since = now;
810 :
811 0 : return PGSTAT_IDLE_INTERVAL;
812 : }
813 :
814 73636 : pending_since = 0;
815 73636 : pgstat_report_fixed = false;
816 :
817 73636 : return 0;
818 : }
819 :
820 : /*
821 : * Force locally pending stats to be flushed during the next
822 : * pgstat_report_stat() call. This is useful for writing tests.
823 : */
824 : void
825 574 : pgstat_force_next_flush(void)
826 : {
827 574 : pgStatForceNextFlush = true;
828 574 : }
829 :
830 : /*
831 : * Only for use by pgstat_reset_counters()
832 : */
833 : static bool
834 23140 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
835 : {
836 23140 : return entry->key.dboid == MyDatabaseId;
837 : }
838 :
839 : /*
840 : * Reset counters for our database.
841 : *
842 : * Permission checking for this function is managed through the normal
843 : * GRANT system.
844 : */
845 : void
846 26 : pgstat_reset_counters(void)
847 : {
848 26 : TimestampTz ts = GetCurrentTimestamp();
849 :
850 26 : pgstat_reset_matching_entries(match_db_entries,
851 : ObjectIdGetDatum(MyDatabaseId),
852 : ts);
853 26 : }
854 :
855 : /*
856 : * Reset a single variable-numbered entry.
857 : *
858 : * If the stats kind is within a database, also reset the database's
859 : * stat_reset_timestamp.
860 : *
861 : * Permission checking for this function is managed through the normal
862 : * GRANT system.
863 : */
864 : void
865 50 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
866 : {
867 50 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
868 50 : TimestampTz ts = GetCurrentTimestamp();
869 :
870 : /* not needed atm, and doesn't make sense with the current signature */
871 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
872 :
873 : /* reset the "single counter" */
874 50 : pgstat_reset_entry(kind, dboid, objid, ts);
875 :
876 50 : if (!kind_info->accessed_across_databases)
877 22 : pgstat_reset_database_timestamp(dboid, ts);
878 50 : }
879 :
880 : /*
881 : * Reset stats for all entries of a kind.
882 : *
883 : * Permission checking for this function is managed through the normal
884 : * GRANT system.
885 : */
886 : void
887 60 : pgstat_reset_of_kind(PgStat_Kind kind)
888 : {
889 60 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
890 60 : TimestampTz ts = GetCurrentTimestamp();
891 :
892 60 : if (kind_info->fixed_amount)
893 52 : kind_info->reset_all_cb(ts);
894 : else
895 8 : pgstat_reset_entries_of_kind(kind, ts);
896 60 : }
897 :
898 :
899 : /* ------------------------------------------------------------
900 : * Fetching of stats
901 : * ------------------------------------------------------------
902 : */
903 :
904 : /*
905 : * Discard any data collected in the current transaction. Any subsequent
906 : * request will cause new snapshots to be read.
907 : *
908 : * This is also invoked during transaction commit or abort to discard
909 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
910 : * cause this routine to be called.
911 : */
912 : void
913 1089656 : pgstat_clear_snapshot(void)
914 : {
915 : pgstat_assert_is_up();
916 :
917 1089656 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
918 : sizeof(pgStatLocal.snapshot.fixed_valid));
919 1089656 : memset(&pgStatLocal.snapshot.custom_valid, 0,
920 : sizeof(pgStatLocal.snapshot.custom_valid));
921 1089656 : pgStatLocal.snapshot.stats = NULL;
922 1089656 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
923 :
924 : /* Release memory, if any was allocated */
925 1089656 : if (pgStatLocal.snapshot.context)
926 : {
927 1394 : MemoryContextDelete(pgStatLocal.snapshot.context);
928 :
929 : /* Reset variables */
930 1394 : pgStatLocal.snapshot.context = NULL;
931 : }
932 :
933 : /*
934 : * Historically the backend_status.c facilities lived in this file, and
935 : * were reset with the same function. For now keep it that way, and
936 : * forward the reset request.
937 : */
938 1089656 : pgstat_clear_backend_activity_snapshot();
939 :
940 : /* Reset this flag, as it may be possible that a cleanup was forced. */
941 1089656 : force_stats_snapshot_clear = false;
942 1089656 : }
943 :
944 : void *
945 587340 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
946 : {
947 587340 : PgStat_HashKey key = {0};
948 : PgStat_EntryRef *entry_ref;
949 : void *stats_data;
950 587340 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
951 :
952 : /* should be called from backends */
953 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
954 : Assert(!kind_info->fixed_amount);
955 :
956 587340 : pgstat_prep_snapshot();
957 :
958 587340 : key.kind = kind;
959 587340 : key.dboid = dboid;
960 587340 : key.objid = objid;
961 :
962 : /* if we need to build a full snapshot, do so */
963 587340 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
964 460 : pgstat_build_snapshot();
965 :
966 : /* if caching is desired, look up in cache */
967 587340 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
968 : {
969 10432 : PgStat_SnapshotEntry *entry = NULL;
970 :
971 10432 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
972 :
973 10432 : if (entry)
974 944 : return entry->data;
975 :
976 : /*
977 : * If we built a full snapshot and the key is not in
978 : * pgStatLocal.snapshot.stats, there are no matching stats.
979 : */
980 9488 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
981 28 : return NULL;
982 : }
983 :
984 586368 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
985 :
986 586368 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
987 :
988 586368 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
989 : {
990 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
991 17838 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
992 : {
993 1838 : PgStat_SnapshotEntry *entry = NULL;
994 : bool found;
995 :
996 1838 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
997 : Assert(!found);
998 1838 : entry->data = NULL;
999 : }
1000 17838 : return NULL;
1001 : }
1002 :
1003 : /*
1004 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
1005 : * otherwise we could quickly end up with a fair bit of memory used due to
1006 : * repeated accesses.
1007 : */
1008 568530 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1009 560908 : stats_data = palloc(kind_info->shared_data_len);
1010 : else
1011 7622 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1012 7622 : kind_info->shared_data_len);
1013 :
1014 568530 : (void) pgstat_lock_entry_shared(entry_ref, false);
1015 1137060 : memcpy(stats_data,
1016 568530 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1017 568530 : kind_info->shared_data_len);
1018 568530 : pgstat_unlock_entry(entry_ref);
1019 :
1020 568530 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1021 : {
1022 7622 : PgStat_SnapshotEntry *entry = NULL;
1023 : bool found;
1024 :
1025 7622 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1026 7622 : entry->data = stats_data;
1027 : }
1028 :
1029 568530 : return stats_data;
1030 : }
1031 :
1032 : /*
1033 : * If a stats snapshot has been taken, return the timestamp at which that was
1034 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1035 : * false.
1036 : */
1037 : TimestampTz
1038 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1039 : {
1040 60 : if (force_stats_snapshot_clear)
1041 18 : pgstat_clear_snapshot();
1042 :
1043 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1044 : {
1045 24 : *have_snapshot = true;
1046 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1047 : }
1048 :
1049 36 : *have_snapshot = false;
1050 :
1051 36 : return 0;
1052 : }
1053 :
1054 : bool
1055 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1056 : {
1057 : /* fixed-numbered stats always exist */
1058 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1059 12 : return true;
1060 :
1061 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1062 : }
1063 :
1064 : /*
1065 : * Ensure snapshot for fixed-numbered 'kind' exists.
1066 : *
1067 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1068 : * massaging the data into the desired format.
1069 : */
1070 : void
1071 452 : pgstat_snapshot_fixed(PgStat_Kind kind)
1072 : {
1073 : Assert(pgstat_is_kind_valid(kind));
1074 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1075 :
1076 452 : if (force_stats_snapshot_clear)
1077 0 : pgstat_clear_snapshot();
1078 :
1079 452 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1080 24 : pgstat_build_snapshot();
1081 : else
1082 428 : pgstat_build_snapshot_fixed(kind);
1083 :
1084 452 : if (pgstat_is_kind_builtin(kind))
1085 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1086 6 : else if (pgstat_is_kind_custom(kind))
1087 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1088 452 : }
1089 :
1090 : static void
1091 45198 : pgstat_init_snapshot_fixed(void)
1092 : {
1093 : /*
1094 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1095 : * stats kinds.
1096 : */
1097 451980 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1098 : {
1099 406782 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1100 :
1101 406782 : if (!kind_info || !kind_info->fixed_amount)
1102 406684 : continue;
1103 :
1104 98 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1105 98 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1106 : }
1107 45198 : }
1108 :
1109 : static void
1110 587392 : pgstat_prep_snapshot(void)
1111 : {
1112 587392 : if (force_stats_snapshot_clear)
1113 18 : pgstat_clear_snapshot();
1114 :
1115 587392 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1116 10484 : pgStatLocal.snapshot.stats != NULL)
1117 585998 : return;
1118 :
1119 1394 : if (!pgStatLocal.snapshot.context)
1120 1394 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1121 : "PgStat Snapshot",
1122 : ALLOCSET_SMALL_SIZES);
1123 :
1124 1394 : pgStatLocal.snapshot.stats =
1125 1394 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1126 : PGSTAT_SNAPSHOT_HASH_SIZE,
1127 : NULL);
1128 : }
1129 :
1130 : static void
1131 484 : pgstat_build_snapshot(void)
1132 : {
1133 : dshash_seq_status hstat;
1134 : PgStatShared_HashEntry *p;
1135 :
1136 : /* should only be called when we need a snapshot */
1137 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1138 :
1139 : /* snapshot already built */
1140 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1141 432 : return;
1142 :
1143 52 : pgstat_prep_snapshot();
1144 :
1145 : Assert(pgStatLocal.snapshot.stats->members == 0);
1146 :
1147 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1148 :
1149 : /*
1150 : * Snapshot all variable stats.
1151 : */
1152 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1153 54722 : while ((p = dshash_seq_next(&hstat)) != NULL)
1154 : {
1155 54670 : PgStat_Kind kind = p->key.kind;
1156 54670 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1157 : bool found;
1158 : PgStat_SnapshotEntry *entry;
1159 : PgStatShared_Common *stats_data;
1160 :
1161 : /*
1162 : * Check if the stats object should be included in the snapshot.
1163 : * Unless the stats kind can be accessed from all databases (e.g.,
1164 : * database stats themselves), we only include stats for the current
1165 : * database or objects not associated with a database (e.g. shared
1166 : * relations).
1167 : */
1168 54670 : if (p->key.dboid != MyDatabaseId &&
1169 16044 : p->key.dboid != InvalidOid &&
1170 13092 : !kind_info->accessed_across_databases)
1171 13140 : continue;
1172 :
1173 41734 : if (p->dropped)
1174 204 : continue;
1175 :
1176 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1177 :
1178 41530 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1179 : Assert(stats_data);
1180 :
1181 41530 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1182 : Assert(!found);
1183 :
1184 41530 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1185 : pgstat_get_entry_len(kind));
1186 :
1187 : /*
1188 : * Acquire the LWLock directly instead of using
1189 : * pg_stat_lock_entry_shared() which requires a reference.
1190 : */
1191 41530 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1192 41530 : memcpy(entry->data,
1193 41530 : pgstat_get_entry_data(kind, stats_data),
1194 : pgstat_get_entry_len(kind));
1195 41530 : LWLockRelease(&stats_data->lock);
1196 : }
1197 52 : dshash_seq_term(&hstat);
1198 :
1199 : /*
1200 : * Build snapshot of all fixed-numbered stats.
1201 : */
1202 1716 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1203 : {
1204 1664 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1205 :
1206 1664 : if (!kind_info)
1207 1040 : continue;
1208 624 : if (!kind_info->fixed_amount)
1209 : {
1210 : Assert(kind_info->snapshot_cb == NULL);
1211 312 : continue;
1212 : }
1213 :
1214 312 : pgstat_build_snapshot_fixed(kind);
1215 : }
1216 :
1217 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1218 : }
1219 :
1220 : static void
1221 8878 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1222 : {
1223 8878 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1224 : int idx;
1225 : bool *valid;
1226 :
1227 : /* Position in fixed_valid or custom_valid */
1228 8878 : if (pgstat_is_kind_builtin(kind))
1229 : {
1230 8870 : idx = kind;
1231 8870 : valid = pgStatLocal.snapshot.fixed_valid;
1232 : }
1233 : else
1234 : {
1235 8 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1236 8 : valid = pgStatLocal.snapshot.custom_valid;
1237 : }
1238 :
1239 : Assert(kind_info->fixed_amount);
1240 : Assert(kind_info->snapshot_cb != NULL);
1241 :
1242 8878 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1243 : {
1244 : /* rebuild every time */
1245 8168 : valid[idx] = false;
1246 : }
1247 710 : else if (valid[idx])
1248 : {
1249 : /* in snapshot mode we shouldn't get called again */
1250 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1251 12 : return;
1252 : }
1253 :
1254 : Assert(!valid[idx]);
1255 :
1256 8866 : kind_info->snapshot_cb();
1257 :
1258 : Assert(!valid[idx]);
1259 8866 : valid[idx] = true;
1260 : }
1261 :
1262 :
1263 : /* ------------------------------------------------------------
1264 : * Backend-local pending stats infrastructure
1265 : * ------------------------------------------------------------
1266 : */
1267 :
1268 : /*
1269 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1270 : * stats if not already done.
1271 : *
1272 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1273 : * created, false otherwise.
1274 : */
1275 : PgStat_EntryRef *
1276 4171102 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1277 : {
1278 : PgStat_EntryRef *entry_ref;
1279 :
1280 : /* need to be able to flush out */
1281 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1282 :
1283 4171102 : if (unlikely(!pgStatPendingContext))
1284 : {
1285 36182 : pgStatPendingContext =
1286 36182 : AllocSetContextCreate(TopMemoryContext,
1287 : "PgStat Pending",
1288 : ALLOCSET_SMALL_SIZES);
1289 : }
1290 :
1291 4171102 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1292 : true, created_entry);
1293 :
1294 4171102 : if (entry_ref->pending == NULL)
1295 : {
1296 2133828 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1297 :
1298 : Assert(entrysize != (size_t) -1);
1299 :
1300 2133828 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1301 2133828 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1302 : }
1303 :
1304 4171102 : return entry_ref;
1305 : }
1306 :
1307 : /*
1308 : * Return an existing stats entry, or NULL.
1309 : *
1310 : * This should only be used for helper function for pgstatfuncs.c - outside of
1311 : * that it shouldn't be needed.
1312 : */
1313 : PgStat_EntryRef *
1314 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1315 : {
1316 : PgStat_EntryRef *entry_ref;
1317 :
1318 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1319 :
1320 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1321 30 : return NULL;
1322 :
1323 54 : return entry_ref;
1324 : }
1325 :
1326 : void
1327 2133828 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1328 : {
1329 2133828 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1330 2133828 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1331 2133828 : void *pending_data = entry_ref->pending;
1332 :
1333 : Assert(pending_data != NULL);
1334 : /* !fixed_amount stats should be handled explicitly */
1335 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1336 :
1337 2133828 : if (kind_info->delete_pending_cb)
1338 2014270 : kind_info->delete_pending_cb(entry_ref);
1339 :
1340 2133828 : pfree(pending_data);
1341 2133828 : entry_ref->pending = NULL;
1342 :
1343 2133828 : dlist_delete(&entry_ref->pending_node);
1344 2133828 : }
1345 :
1346 : /*
1347 : * Flush out pending variable-numbered stats.
1348 : */
1349 : static bool
1350 73636 : pgstat_flush_pending_entries(bool nowait)
1351 : {
1352 73636 : bool have_pending = false;
1353 73636 : dlist_node *cur = NULL;
1354 :
1355 : /*
1356 : * Need to be a bit careful iterating over the list of pending entries.
1357 : * Processing a pending entry may queue further pending entries to the end
1358 : * of the list that we want to process, so a simple iteration won't do.
1359 : * Further complicating matters is that we want to delete the current
1360 : * entry in each iteration from the list if we flushed successfully.
1361 : *
1362 : * So we just keep track of the next pointer in each loop iteration.
1363 : */
1364 73636 : if (!dlist_is_empty(&pgStatPending))
1365 68550 : cur = dlist_head_node(&pgStatPending);
1366 :
1367 2138044 : while (cur)
1368 : {
1369 2064408 : PgStat_EntryRef *entry_ref =
1370 : dlist_container(PgStat_EntryRef, pending_node, cur);
1371 2064408 : PgStat_HashKey key = entry_ref->shared_entry->key;
1372 2064408 : PgStat_Kind kind = key.kind;
1373 2064408 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1374 : bool did_flush;
1375 : dlist_node *next;
1376 :
1377 : Assert(!kind_info->fixed_amount);
1378 : Assert(kind_info->flush_pending_cb != NULL);
1379 :
1380 : /* flush the stats, if possible */
1381 2064408 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1382 :
1383 : Assert(did_flush || nowait);
1384 :
1385 : /* determine next entry, before deleting the pending entry */
1386 2064408 : if (dlist_has_next(&pgStatPending, cur))
1387 1995858 : next = dlist_next_node(&pgStatPending, cur);
1388 : else
1389 68550 : next = NULL;
1390 :
1391 : /* if successfully flushed, remove entry */
1392 2064408 : if (did_flush)
1393 2064408 : pgstat_delete_pending_entry(entry_ref);
1394 : else
1395 0 : have_pending = true;
1396 :
1397 2064408 : cur = next;
1398 : }
1399 :
1400 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1401 :
1402 73636 : return have_pending;
1403 : }
1404 :
1405 :
1406 : /* ------------------------------------------------------------
1407 : * Helper / infrastructure functions
1408 : * ------------------------------------------------------------
1409 : */
1410 :
1411 : PgStat_Kind
1412 166 : pgstat_get_kind_from_str(char *kind_str)
1413 : {
1414 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1415 : {
1416 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1417 160 : return kind;
1418 : }
1419 :
1420 : /* Check the custom set of cumulative stats */
1421 6 : if (pgstat_kind_custom_infos)
1422 : {
1423 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1424 : {
1425 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1426 :
1427 0 : if (pgstat_kind_custom_infos[idx] &&
1428 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1429 0 : return kind;
1430 : }
1431 : }
1432 :
1433 6 : ereport(ERROR,
1434 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1435 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1436 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1437 : }
1438 :
1439 : static inline bool
1440 830442 : pgstat_is_kind_valid(PgStat_Kind kind)
1441 : {
1442 830442 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1443 : }
1444 :
1445 : const PgStat_KindInfo *
1446 15418640 : pgstat_get_kind_info(PgStat_Kind kind)
1447 : {
1448 15418640 : if (pgstat_is_kind_builtin(kind))
1449 12472594 : return &pgstat_kind_builtin_infos[kind];
1450 :
1451 2946046 : if (pgstat_is_kind_custom(kind))
1452 : {
1453 1581606 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1454 :
1455 1581606 : if (pgstat_kind_custom_infos == NULL ||
1456 3690 : pgstat_kind_custom_infos[idx] == NULL)
1457 1580674 : return NULL;
1458 932 : return pgstat_kind_custom_infos[idx];
1459 : }
1460 :
1461 1364440 : return NULL;
1462 : }
1463 :
1464 : /*
1465 : * Register a new stats kind.
1466 : *
1467 : * PgStat_Kinds must be globally unique across all extensions. Refer
1468 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1469 : * unique ID for your extension, to avoid conflicts with other extension
1470 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1471 : * needlessly reserving a new ID.
1472 : */
1473 : void
1474 12 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1475 : {
1476 12 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1477 :
1478 12 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1479 0 : ereport(ERROR,
1480 : (errmsg("custom cumulative statistics name is invalid"),
1481 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1482 :
1483 12 : if (!pgstat_is_kind_custom(kind))
1484 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1485 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1486 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1487 :
1488 12 : if (!process_shared_preload_libraries_in_progress)
1489 0 : ereport(ERROR,
1490 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1491 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1492 :
1493 : /*
1494 : * Check some data for fixed-numbered stats.
1495 : */
1496 12 : if (kind_info->fixed_amount)
1497 : {
1498 6 : if (kind_info->shared_size == 0)
1499 0 : ereport(ERROR,
1500 : (errmsg("custom cumulative statistics property is invalid"),
1501 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1502 6 : if (kind_info->track_entry_count)
1503 0 : ereport(ERROR,
1504 : (errmsg("custom cumulative statistics property is invalid"),
1505 : errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
1506 : }
1507 :
1508 : /*
1509 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1510 : */
1511 12 : if (pgstat_kind_custom_infos == NULL)
1512 : {
1513 6 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1514 6 : MemoryContextAllocZero(TopMemoryContext,
1515 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1516 : }
1517 :
1518 12 : if (pgstat_kind_custom_infos[idx] != NULL &&
1519 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1520 0 : ereport(ERROR,
1521 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1522 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1523 : pgstat_kind_custom_infos[idx]->name)));
1524 :
1525 : /* check for existing custom stats with the same name */
1526 120 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1527 : {
1528 108 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1529 :
1530 108 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1531 102 : continue;
1532 6 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1533 0 : ereport(ERROR,
1534 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1535 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1536 : }
1537 :
1538 : /* Register it */
1539 12 : pgstat_kind_custom_infos[idx] = kind_info;
1540 12 : ereport(LOG,
1541 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1542 : kind_info->name, kind)));
1543 12 : }
1544 :
1545 : /*
1546 : * Stats should only be reported after pgstat_initialize() and before
1547 : * pgstat_shutdown(). This check is put in a few central places to catch
1548 : * violations of this rule more easily.
1549 : */
1550 : #ifdef USE_ASSERT_CHECKING
1551 : void
1552 : pgstat_assert_is_up(void)
1553 : {
1554 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1555 : }
1556 : #endif
1557 :
1558 :
1559 : /* ------------------------------------------------------------
1560 : * reading and writing of on-disk stats file
1561 : * ------------------------------------------------------------
1562 : */
1563 :
1564 : /* helper for pgstat_write_statsfile() */
1565 : void
1566 822696 : pgstat_write_chunk(FILE *fpout, void *ptr, size_t len)
1567 : {
1568 : int rc;
1569 :
1570 822696 : rc = fwrite(ptr, len, 1, fpout);
1571 :
1572 : /* We check for errors with ferror() when done writing the stats. */
1573 : (void) rc;
1574 822696 : }
1575 :
1576 : /*
1577 : * This function is called in the last process that is accessing the shared
1578 : * stats so locking is not required.
1579 : */
1580 : static void
1581 1356 : pgstat_write_statsfile(void)
1582 : {
1583 : FILE *fpout;
1584 : int32 format_id;
1585 1356 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1586 1356 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1587 : dshash_seq_status hstat;
1588 : PgStatShared_HashEntry *ps;
1589 :
1590 : pgstat_assert_is_up();
1591 :
1592 : /* should be called only by the checkpointer or single user mode */
1593 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1594 :
1595 : /* we're shutting down, so it's ok to just override this */
1596 1356 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1597 :
1598 1356 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1599 :
1600 : /*
1601 : * Open the statistics temp file to write out the current values.
1602 : */
1603 1356 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1604 1356 : if (fpout == NULL)
1605 : {
1606 0 : ereport(LOG,
1607 : (errcode_for_file_access(),
1608 : errmsg("could not open temporary statistics file \"%s\": %m",
1609 : tmpfile)));
1610 0 : return;
1611 : }
1612 :
1613 : /*
1614 : * Write the file header --- currently just a format ID.
1615 : */
1616 1356 : format_id = PGSTAT_FILE_FORMAT_ID;
1617 1356 : pgstat_write_chunk_s(fpout, &format_id);
1618 :
1619 : /* Write various stats structs for fixed number of objects */
1620 44748 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1621 : {
1622 : char *ptr;
1623 43392 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1624 :
1625 43392 : if (!info || !info->fixed_amount)
1626 35254 : continue;
1627 :
1628 8138 : if (pgstat_is_kind_builtin(kind))
1629 : Assert(info->snapshot_ctl_off != 0);
1630 :
1631 : /* skip if no need to write to file */
1632 8138 : if (!info->write_to_file)
1633 0 : continue;
1634 :
1635 8138 : pgstat_build_snapshot_fixed(kind);
1636 8138 : if (pgstat_is_kind_builtin(kind))
1637 8136 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1638 : else
1639 2 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1640 :
1641 8138 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1642 8138 : pgstat_write_chunk_s(fpout, &kind);
1643 8138 : pgstat_write_chunk(fpout, ptr, info->shared_data_len);
1644 : }
1645 :
1646 : /*
1647 : * Walk through the stats entries
1648 : */
1649 1356 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1650 404014 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1651 : {
1652 : PgStatShared_Common *shstats;
1653 402658 : const PgStat_KindInfo *kind_info = NULL;
1654 :
1655 402658 : CHECK_FOR_INTERRUPTS();
1656 :
1657 : /*
1658 : * We should not see any "dropped" entries when writing the stats
1659 : * file, as all backends and auxiliary processes should have cleaned
1660 : * up their references before they terminated.
1661 : *
1662 : * However, since we are already shutting down, it is not worth
1663 : * crashing the server over any potential cleanup issues, so we simply
1664 : * skip such entries if encountered.
1665 : */
1666 : Assert(!ps->dropped);
1667 402658 : if (ps->dropped)
1668 0 : continue;
1669 :
1670 : /*
1671 : * This discards data related to custom stats kinds that are unknown
1672 : * to this process.
1673 : */
1674 402658 : if (!pgstat_is_kind_valid(ps->key.kind))
1675 : {
1676 0 : elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1677 : ps->key.kind, ps->key.dboid,
1678 : ps->key.objid);
1679 0 : continue;
1680 : }
1681 :
1682 402658 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1683 :
1684 402658 : kind_info = pgstat_get_kind_info(ps->key.kind);
1685 :
1686 : /* if not dropped the valid-entry refcount should exist */
1687 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1688 :
1689 : /* skip if no need to write to file */
1690 402658 : if (!kind_info->write_to_file)
1691 238 : continue;
1692 :
1693 402420 : if (!kind_info->to_serialized_name)
1694 : {
1695 : /* normal stats entry, identified by PgStat_HashKey */
1696 402216 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1697 402216 : pgstat_write_chunk_s(fpout, &ps->key);
1698 : }
1699 : else
1700 : {
1701 : /* stats entry identified by name on disk (e.g. slots) */
1702 : NameData name;
1703 :
1704 204 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1705 :
1706 204 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1707 204 : pgstat_write_chunk_s(fpout, &ps->key.kind);
1708 204 : pgstat_write_chunk_s(fpout, &name);
1709 : }
1710 :
1711 : /* Write except the header part of the entry */
1712 402420 : pgstat_write_chunk(fpout,
1713 : pgstat_get_entry_data(ps->key.kind, shstats),
1714 : pgstat_get_entry_len(ps->key.kind));
1715 :
1716 : /* Write more data for the entry, if required */
1717 402420 : if (kind_info->to_serialized_data)
1718 4 : kind_info->to_serialized_data(&ps->key, shstats, fpout);
1719 : }
1720 1356 : dshash_seq_term(&hstat);
1721 :
1722 : /*
1723 : * No more output to be done. Close the temp file and replace the old
1724 : * pgstat.stat with it. The ferror() check replaces testing for error
1725 : * after each individual fputc or fwrite (in pgstat_write_chunk()) above.
1726 : */
1727 1356 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1728 :
1729 1356 : if (ferror(fpout))
1730 : {
1731 0 : ereport(LOG,
1732 : (errcode_for_file_access(),
1733 : errmsg("could not write temporary statistics file \"%s\": %m",
1734 : tmpfile)));
1735 0 : FreeFile(fpout);
1736 0 : unlink(tmpfile);
1737 : }
1738 1356 : else if (FreeFile(fpout) < 0)
1739 : {
1740 0 : ereport(LOG,
1741 : (errcode_for_file_access(),
1742 : errmsg("could not close temporary statistics file \"%s\": %m",
1743 : tmpfile)));
1744 0 : unlink(tmpfile);
1745 : }
1746 1356 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1747 : {
1748 : /* durable_rename already emitted log message */
1749 0 : unlink(tmpfile);
1750 : }
1751 :
1752 : /* Finish callbacks, if required */
1753 44748 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1754 : {
1755 43392 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1756 :
1757 43392 : if (kind_info && kind_info->finish)
1758 2 : kind_info->finish(STATS_WRITE);
1759 : }
1760 : }
1761 :
1762 : /* helper for pgstat_read_statsfile() */
1763 : bool
1764 857228 : pgstat_read_chunk(FILE *fpin, void *ptr, size_t len)
1765 : {
1766 857228 : return fread(ptr, 1, len, fpin) == len;
1767 : }
1768 :
1769 : /*
1770 : * Reads in existing statistics file into memory.
1771 : *
1772 : * This function is called in the only process that is accessing the shared
1773 : * stats so locking is not required.
1774 : */
1775 : static void
1776 1602 : pgstat_read_statsfile(void)
1777 : {
1778 : FILE *fpin;
1779 : int32 format_id;
1780 : bool found;
1781 1602 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1782 1602 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1783 :
1784 : /* shouldn't be called from postmaster */
1785 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1786 :
1787 1602 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1788 :
1789 : /*
1790 : * Try to open the stats file. If it doesn't exist, the backends simply
1791 : * returns zero for anything and statistics simply starts from scratch
1792 : * with empty counters.
1793 : *
1794 : * ENOENT is a possibility if stats collection was previously disabled or
1795 : * has not yet written the stats file for the first time. Any other
1796 : * failure condition is suspicious.
1797 : */
1798 1602 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1799 : {
1800 104 : if (errno != ENOENT)
1801 0 : ereport(LOG,
1802 : (errcode_for_file_access(),
1803 : errmsg("could not open statistics file \"%s\": %m",
1804 : statfile)));
1805 104 : pgstat_reset_after_failure();
1806 104 : return;
1807 : }
1808 :
1809 : /*
1810 : * Verify it's of the expected format.
1811 : */
1812 1498 : if (!pgstat_read_chunk_s(fpin, &format_id))
1813 : {
1814 0 : elog(WARNING, "could not read format ID");
1815 0 : goto error;
1816 : }
1817 :
1818 1498 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1819 : {
1820 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1821 : format_id, PGSTAT_FILE_FORMAT_ID);
1822 2 : goto error;
1823 : }
1824 :
1825 : /*
1826 : * We found an existing statistics file. Read it and put all the stats
1827 : * data into place.
1828 : */
1829 : for (;;)
1830 427784 : {
1831 429280 : int t = fgetc(fpin);
1832 :
1833 429280 : switch (t)
1834 : {
1835 8978 : case PGSTAT_FILE_ENTRY_FIXED:
1836 : {
1837 : PgStat_Kind kind;
1838 : const PgStat_KindInfo *info;
1839 : char *ptr;
1840 :
1841 : /* entry for fixed-numbered stats */
1842 8978 : if (!pgstat_read_chunk_s(fpin, &kind))
1843 : {
1844 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1845 0 : goto error;
1846 : }
1847 :
1848 8978 : if (!pgstat_is_kind_valid(kind))
1849 : {
1850 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1851 : kind, t);
1852 0 : goto error;
1853 : }
1854 :
1855 8978 : info = pgstat_get_kind_info(kind);
1856 8978 : if (!info)
1857 : {
1858 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1859 : kind, t);
1860 0 : goto error;
1861 : }
1862 :
1863 8978 : if (!info->fixed_amount)
1864 : {
1865 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1866 : kind, t);
1867 0 : goto error;
1868 : }
1869 :
1870 : /* Load back stats into shared memory */
1871 8978 : if (pgstat_is_kind_builtin(kind))
1872 8976 : ptr = ((char *) shmem) + info->shared_ctl_off +
1873 8976 : info->shared_data_off;
1874 : else
1875 : {
1876 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1877 :
1878 2 : ptr = ((char *) shmem->custom_data[idx]) +
1879 2 : info->shared_data_off;
1880 : }
1881 :
1882 8978 : if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len))
1883 : {
1884 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1885 : kind, t, info->shared_data_len);
1886 0 : goto error;
1887 : }
1888 :
1889 8978 : break;
1890 : }
1891 418806 : case PGSTAT_FILE_ENTRY_HASH:
1892 : case PGSTAT_FILE_ENTRY_NAME:
1893 : {
1894 : PgStat_HashKey key;
1895 : PgStatShared_HashEntry *p;
1896 : PgStatShared_Common *header;
1897 418806 : const PgStat_KindInfo *kind_info = NULL;
1898 :
1899 418806 : CHECK_FOR_INTERRUPTS();
1900 :
1901 418806 : if (t == PGSTAT_FILE_ENTRY_HASH)
1902 : {
1903 : /* normal stats entry, identified by PgStat_HashKey */
1904 418662 : if (!pgstat_read_chunk_s(fpin, &key))
1905 : {
1906 0 : elog(WARNING, "could not read key for entry of type %c", t);
1907 0 : goto error;
1908 : }
1909 :
1910 418662 : if (!pgstat_is_kind_valid(key.kind))
1911 : {
1912 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1913 : key.kind, key.dboid,
1914 : key.objid, t);
1915 0 : goto error;
1916 : }
1917 :
1918 418662 : kind_info = pgstat_get_kind_info(key.kind);
1919 418662 : if (!kind_info)
1920 : {
1921 0 : elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1922 : key.kind, key.dboid,
1923 : key.objid, t);
1924 0 : goto error;
1925 : }
1926 : }
1927 : else
1928 : {
1929 : /* stats entry identified by name on disk (e.g. slots) */
1930 : PgStat_Kind kind;
1931 : NameData name;
1932 :
1933 144 : if (!pgstat_read_chunk_s(fpin, &kind))
1934 : {
1935 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1936 0 : goto error;
1937 : }
1938 144 : if (!pgstat_read_chunk_s(fpin, &name))
1939 : {
1940 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1941 : kind, t);
1942 0 : goto error;
1943 : }
1944 144 : if (!pgstat_is_kind_valid(kind))
1945 : {
1946 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1947 : kind, t);
1948 0 : goto error;
1949 : }
1950 :
1951 144 : kind_info = pgstat_get_kind_info(kind);
1952 144 : if (!kind_info)
1953 : {
1954 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1955 : kind, t);
1956 0 : goto error;
1957 : }
1958 :
1959 144 : if (!kind_info->from_serialized_name)
1960 : {
1961 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1962 : kind, t);
1963 0 : goto error;
1964 : }
1965 :
1966 144 : if (!kind_info->from_serialized_name(&name, &key))
1967 : {
1968 : /* skip over data for entry we don't care about */
1969 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1970 : {
1971 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1972 : NameStr(name), kind, t);
1973 0 : goto error;
1974 : }
1975 :
1976 2 : continue;
1977 : }
1978 :
1979 : Assert(key.kind == kind);
1980 : }
1981 :
1982 : /*
1983 : * This intentionally doesn't use pgstat_get_entry_ref() -
1984 : * putting all stats into checkpointer's
1985 : * pgStatEntryRefHash would be wasted effort and memory.
1986 : */
1987 418804 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1988 :
1989 : /* don't allow duplicate entries */
1990 418804 : if (found)
1991 : {
1992 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1993 0 : elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
1994 : key.kind, key.dboid,
1995 : key.objid, t);
1996 0 : goto error;
1997 : }
1998 :
1999 418804 : header = pgstat_init_entry(key.kind, p);
2000 418804 : dshash_release_lock(pgStatLocal.shared_hash, p);
2001 418804 : if (header == NULL)
2002 : {
2003 : /*
2004 : * It would be tempting to switch this ERROR to a
2005 : * WARNING, but it would mean that all the statistics
2006 : * are discarded when the environment fails on OOM.
2007 : */
2008 0 : elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
2009 : key.kind, key.dboid,
2010 : key.objid, t);
2011 : }
2012 :
2013 418804 : if (!pgstat_read_chunk(fpin,
2014 : pgstat_get_entry_data(key.kind, header),
2015 : pgstat_get_entry_len(key.kind)))
2016 : {
2017 0 : elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
2018 : key.kind, key.dboid,
2019 : key.objid, t);
2020 0 : goto error;
2021 : }
2022 :
2023 : /* read more data for the entry, if required */
2024 418804 : if (kind_info->from_serialized_data)
2025 : {
2026 4 : if (!kind_info->from_serialized_data(&key, header, fpin))
2027 : {
2028 0 : elog(WARNING, "could not read auxiliary data for entry %u/%u/%" PRIu64 " of type %c",
2029 : key.kind, key.dboid,
2030 : key.objid, t);
2031 0 : goto error;
2032 : }
2033 : }
2034 :
2035 418804 : break;
2036 : }
2037 1496 : case PGSTAT_FILE_ENTRY_END:
2038 :
2039 : /*
2040 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2041 : * file
2042 : */
2043 1496 : if (fgetc(fpin) != EOF)
2044 : {
2045 2 : elog(WARNING, "could not read end-of-file");
2046 2 : goto error;
2047 : }
2048 :
2049 1494 : goto done;
2050 :
2051 0 : default:
2052 0 : elog(WARNING, "could not read entry of type %c", t);
2053 0 : goto error;
2054 : }
2055 : }
2056 :
2057 1498 : done:
2058 : /* First, cleanup the main stats file */
2059 1498 : FreeFile(fpin);
2060 :
2061 1498 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2062 1498 : unlink(statfile);
2063 :
2064 : /* Finish callbacks, if required */
2065 49434 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2066 : {
2067 47936 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2068 :
2069 47936 : if (kind_info && kind_info->finish)
2070 4 : kind_info->finish(STATS_READ);
2071 : }
2072 :
2073 1498 : return;
2074 :
2075 4 : error:
2076 4 : ereport(LOG,
2077 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2078 :
2079 4 : pgstat_reset_after_failure();
2080 :
2081 4 : goto done;
2082 : }
2083 :
2084 : /*
2085 : * Helper to reset / drop stats after a crash or after restoring stats from
2086 : * disk failed, potentially after already loading parts.
2087 : */
2088 : static void
2089 478 : pgstat_reset_after_failure(void)
2090 : {
2091 478 : TimestampTz ts = GetCurrentTimestamp();
2092 :
2093 : /* reset fixed-numbered stats */
2094 15774 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2095 : {
2096 15296 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2097 :
2098 15296 : if (!kind_info || !kind_info->fixed_amount)
2099 12426 : continue;
2100 :
2101 2870 : kind_info->reset_all_cb(ts);
2102 : }
2103 :
2104 : /* and drop variable-numbered ones */
2105 478 : pgstat_drop_all_entries();
2106 478 : }
2107 :
2108 : /*
2109 : * GUC assign_hook for stats_fetch_consistency.
2110 : */
2111 : void
2112 6830 : assign_stats_fetch_consistency(int newval, void *extra)
2113 : {
2114 : /*
2115 : * Changing this value in a transaction may cause snapshot state
2116 : * inconsistencies, so force a clear of the current snapshot on the next
2117 : * snapshot build attempt.
2118 : */
2119 6830 : if (pgstat_fetch_consistency != newval)
2120 4280 : force_stats_snapshot_clear = true;
2121 6830 : }
|