Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "lib/dshash.h"
108 : #include "pgstat.h"
109 : #include "storage/fd.h"
110 : #include "storage/ipc.h"
111 : #include "storage/lwlock.h"
112 : #include "utils/guc_hooks.h"
113 : #include "utils/memutils.h"
114 : #include "utils/pgstat_internal.h"
115 : #include "utils/timestamp.h"
116 :
117 :
118 : /* ----------
119 : * Timer definitions.
120 : *
121 : * In milliseconds.
122 : * ----------
123 : */
124 :
125 : /* minimum interval non-forced stats flushes.*/
126 : #define PGSTAT_MIN_INTERVAL 1000
127 : /* how long until to block flushing pending stats updates */
128 : #define PGSTAT_MAX_INTERVAL 60000
129 : /* when to call pgstat_report_stat() again, even when idle */
130 : #define PGSTAT_IDLE_INTERVAL 10000
131 :
132 : /* ----------
133 : * Initial size hints for the hash tables used in statistics.
134 : * ----------
135 : */
136 :
137 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 :
139 : /* ---------
140 : * Identifiers in stats file.
141 : * ---------
142 : */
143 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
146 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
147 : * PgStat_HashKey */
148 :
149 : /* hash table for statistics snapshots entry */
150 : typedef struct PgStat_SnapshotEntry
151 : {
152 : PgStat_HashKey key;
153 : char status; /* for simplehash use */
154 : void *data; /* the stats data itself */
155 : } PgStat_SnapshotEntry;
156 :
157 :
158 : /* ----------
159 : * Backend-local Hash Table Definitions
160 : * ----------
161 : */
162 :
163 : /* for stats snapshot entries */
164 : #define SH_PREFIX pgstat_snapshot
165 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166 : #define SH_KEY_TYPE PgStat_HashKey
167 : #define SH_KEY key
168 : #define SH_HASH_KEY(tb, key) \
169 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170 : #define SH_EQUAL(tb, a, b) \
171 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172 : #define SH_SCOPE static inline
173 : #define SH_DEFINE
174 : #define SH_DECLARE
175 : #include "lib/simplehash.h"
176 :
177 :
178 : /* ----------
179 : * Local function forward declarations
180 : * ----------
181 : */
182 :
183 : static void pgstat_write_statsfile(void);
184 : static void pgstat_read_statsfile(void);
185 :
186 : static void pgstat_init_snapshot_fixed(void);
187 :
188 : static void pgstat_reset_after_failure(void);
189 :
190 : static bool pgstat_flush_pending_entries(bool nowait);
191 :
192 : static void pgstat_prep_snapshot(void);
193 : static void pgstat_build_snapshot(void);
194 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 :
196 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197 :
198 :
199 : /* ----------
200 : * GUC parameters
201 : * ----------
202 : */
203 :
204 : bool pgstat_track_counts = false;
205 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206 :
207 :
208 : /* ----------
209 : * state shared with pgstat_*.c
210 : * ----------
211 : */
212 :
213 : PgStat_LocalState pgStatLocal;
214 :
215 : /*
216 : * Track pending reports for fixed-numbered stats, used by
217 : * pgstat_report_stat().
218 : */
219 : bool pgstat_report_fixed = false;
220 :
221 : /* ----------
222 : * Local data
223 : *
224 : * NB: There should be only variables related to stats infrastructure here,
225 : * not for specific kinds of stats.
226 : * ----------
227 : */
228 :
229 : /*
230 : * Memory contexts containing the pgStatEntryRefHash table, the
231 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
232 : * easier to track / attribute memory usage.
233 : */
234 :
235 : static MemoryContext pgStatPendingContext = NULL;
236 :
237 : /*
238 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
239 : *
240 : * Newly pending entries should only ever be added to the end of the list,
241 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
242 : */
243 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
244 :
245 :
246 : /*
247 : * Force the next stats flush to happen regardless of
248 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
249 : */
250 : static bool pgStatForceNextFlush = false;
251 :
252 : /*
253 : * Force-clear existing snapshot before next use when stats_fetch_consistency
254 : * is changed.
255 : */
256 : static bool force_stats_snapshot_clear = false;
257 :
258 :
259 : /*
260 : * For assertions that check pgstat is not used before initialization / after
261 : * shutdown.
262 : */
263 : #ifdef USE_ASSERT_CHECKING
264 : static bool pgstat_is_initialized = false;
265 : static bool pgstat_is_shutdown = false;
266 : #endif
267 :
268 :
269 : /*
270 : * The different kinds of built-in statistics.
271 : *
272 : * If reasonably possible, handling specific to one kind of stats should go
273 : * through this abstraction, rather than making more of pgstat.c aware.
274 : *
275 : * See comments for struct PgStat_KindInfo for details about the individual
276 : * fields.
277 : *
278 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
279 : * seem to be a great way of doing that, given the split across multiple
280 : * files.
281 : */
282 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
283 :
284 : /* stats kinds for variable-numbered objects */
285 :
286 : [PGSTAT_KIND_DATABASE] = {
287 : .name = "database",
288 :
289 : .fixed_amount = false,
290 : .write_to_file = true,
291 : /* so pg_stat_database entries can be seen in all databases */
292 : .accessed_across_databases = true,
293 :
294 : .shared_size = sizeof(PgStatShared_Database),
295 : .shared_data_off = offsetof(PgStatShared_Database, stats),
296 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
297 : .pending_size = sizeof(PgStat_StatDBEntry),
298 :
299 : .flush_pending_cb = pgstat_database_flush_cb,
300 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
301 : },
302 :
303 : [PGSTAT_KIND_RELATION] = {
304 : .name = "relation",
305 :
306 : .fixed_amount = false,
307 : .write_to_file = true,
308 :
309 : .shared_size = sizeof(PgStatShared_Relation),
310 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
311 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
312 : .pending_size = sizeof(PgStat_TableStatus),
313 :
314 : .flush_pending_cb = pgstat_relation_flush_cb,
315 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
316 : },
317 :
318 : [PGSTAT_KIND_FUNCTION] = {
319 : .name = "function",
320 :
321 : .fixed_amount = false,
322 : .write_to_file = true,
323 :
324 : .shared_size = sizeof(PgStatShared_Function),
325 : .shared_data_off = offsetof(PgStatShared_Function, stats),
326 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
327 : .pending_size = sizeof(PgStat_FunctionCounts),
328 :
329 : .flush_pending_cb = pgstat_function_flush_cb,
330 : },
331 :
332 : [PGSTAT_KIND_REPLSLOT] = {
333 : .name = "replslot",
334 :
335 : .fixed_amount = false,
336 : .write_to_file = true,
337 :
338 : .accessed_across_databases = true,
339 :
340 : .shared_size = sizeof(PgStatShared_ReplSlot),
341 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
342 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
343 :
344 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
345 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
346 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
347 : },
348 :
349 : [PGSTAT_KIND_SUBSCRIPTION] = {
350 : .name = "subscription",
351 :
352 : .fixed_amount = false,
353 : .write_to_file = true,
354 : /* so pg_stat_subscription_stats entries can be seen in all databases */
355 : .accessed_across_databases = true,
356 :
357 : .shared_size = sizeof(PgStatShared_Subscription),
358 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
359 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
360 : .pending_size = sizeof(PgStat_BackendSubEntry),
361 :
362 : .flush_pending_cb = pgstat_subscription_flush_cb,
363 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
364 : },
365 :
366 : [PGSTAT_KIND_BACKEND] = {
367 : .name = "backend",
368 :
369 : .fixed_amount = false,
370 : .write_to_file = false,
371 :
372 : .accessed_across_databases = true,
373 :
374 : .shared_size = sizeof(PgStatShared_Backend),
375 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
376 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
377 :
378 : .flush_static_cb = pgstat_backend_flush_cb,
379 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
380 : },
381 :
382 : /* stats for fixed-numbered (mostly 1) objects */
383 :
384 : [PGSTAT_KIND_ARCHIVER] = {
385 : .name = "archiver",
386 :
387 : .fixed_amount = true,
388 : .write_to_file = true,
389 :
390 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
391 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
392 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
393 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
394 :
395 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
396 : .reset_all_cb = pgstat_archiver_reset_all_cb,
397 : .snapshot_cb = pgstat_archiver_snapshot_cb,
398 : },
399 :
400 : [PGSTAT_KIND_BGWRITER] = {
401 : .name = "bgwriter",
402 :
403 : .fixed_amount = true,
404 : .write_to_file = true,
405 :
406 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
407 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
408 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
409 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
410 :
411 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
412 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
413 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
414 : },
415 :
416 : [PGSTAT_KIND_CHECKPOINTER] = {
417 : .name = "checkpointer",
418 :
419 : .fixed_amount = true,
420 : .write_to_file = true,
421 :
422 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
423 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
424 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
425 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
426 :
427 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
428 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
429 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
430 : },
431 :
432 : [PGSTAT_KIND_IO] = {
433 : .name = "io",
434 :
435 : .fixed_amount = true,
436 : .write_to_file = true,
437 :
438 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
439 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
440 : .shared_data_off = offsetof(PgStatShared_IO, stats),
441 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
442 :
443 : .flush_static_cb = pgstat_io_flush_cb,
444 : .init_shmem_cb = pgstat_io_init_shmem_cb,
445 : .reset_all_cb = pgstat_io_reset_all_cb,
446 : .snapshot_cb = pgstat_io_snapshot_cb,
447 : },
448 :
449 : [PGSTAT_KIND_SLRU] = {
450 : .name = "slru",
451 :
452 : .fixed_amount = true,
453 : .write_to_file = true,
454 :
455 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
456 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
457 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
458 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
459 :
460 : .flush_static_cb = pgstat_slru_flush_cb,
461 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
462 : .reset_all_cb = pgstat_slru_reset_all_cb,
463 : .snapshot_cb = pgstat_slru_snapshot_cb,
464 : },
465 :
466 : [PGSTAT_KIND_WAL] = {
467 : .name = "wal",
468 :
469 : .fixed_amount = true,
470 : .write_to_file = true,
471 :
472 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
473 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
474 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
475 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
476 :
477 : .init_backend_cb = pgstat_wal_init_backend_cb,
478 : .flush_static_cb = pgstat_wal_flush_cb,
479 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
480 : .reset_all_cb = pgstat_wal_reset_all_cb,
481 : .snapshot_cb = pgstat_wal_snapshot_cb,
482 : },
483 : };
484 :
485 : /*
486 : * Information about custom statistics kinds.
487 : *
488 : * These are saved in a different array than the built-in kinds to save
489 : * in clarity with the initializations.
490 : *
491 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
492 : */
493 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
494 :
495 : /* ------------------------------------------------------------
496 : * Functions managing the state of the stats system for all backends.
497 : * ------------------------------------------------------------
498 : */
499 :
500 : /*
501 : * Read on-disk stats into memory at server start.
502 : *
503 : * Should only be called by the startup process or in single user mode.
504 : */
505 : void
506 1492 : pgstat_restore_stats(void)
507 : {
508 1492 : pgstat_read_statsfile();
509 1492 : }
510 :
511 : /*
512 : * Remove the stats file. This is currently used only if WAL recovery is
513 : * needed after a crash.
514 : *
515 : * Should only be called by the startup process or in single user mode.
516 : */
517 : void
518 350 : pgstat_discard_stats(void)
519 : {
520 : int ret;
521 :
522 : /* NB: this needs to be done even in single user mode */
523 :
524 350 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
525 350 : if (ret != 0)
526 : {
527 348 : if (errno == ENOENT)
528 348 : elog(DEBUG2,
529 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
530 : PGSTAT_STAT_PERMANENT_FILENAME);
531 : else
532 0 : ereport(LOG,
533 : (errcode_for_file_access(),
534 : errmsg("could not unlink permanent statistics file \"%s\": %m",
535 : PGSTAT_STAT_PERMANENT_FILENAME)));
536 : }
537 : else
538 : {
539 2 : ereport(DEBUG2,
540 : (errcode_for_file_access(),
541 : errmsg_internal("unlinked permanent statistics file \"%s\"",
542 : PGSTAT_STAT_PERMANENT_FILENAME)));
543 : }
544 :
545 : /*
546 : * Reset stats contents. This will set reset timestamps of fixed-numbered
547 : * stats to the current time (no variable stats exist).
548 : */
549 350 : pgstat_reset_after_failure();
550 350 : }
551 :
552 : /*
553 : * pgstat_before_server_shutdown() needs to be called by exactly one process
554 : * during regular server shutdowns. Otherwise all stats will be lost.
555 : *
556 : * We currently only write out stats for proc_exit(0). We might want to change
557 : * that at some point... But right now pgstat_discard_stats() would be called
558 : * during the start after a disorderly shutdown, anyway.
559 : */
560 : void
561 1256 : pgstat_before_server_shutdown(int code, Datum arg)
562 : {
563 : Assert(pgStatLocal.shmem != NULL);
564 : Assert(!pgStatLocal.shmem->is_shutdown);
565 :
566 : /*
567 : * Stats should only be reported after pgstat_initialize() and before
568 : * pgstat_shutdown(). This is a convenient point to catch most violations
569 : * of this rule.
570 : */
571 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
572 :
573 : /* flush out our own pending changes before writing out */
574 1256 : pgstat_report_stat(true);
575 :
576 : /*
577 : * Only write out file during normal shutdown. Don't even signal that
578 : * we've shutdown during irregular shutdowns, because the shutdown
579 : * sequence isn't coordinated to ensure this backend shuts down last.
580 : */
581 1256 : if (code == 0)
582 : {
583 1244 : pgStatLocal.shmem->is_shutdown = true;
584 1244 : pgstat_write_statsfile();
585 : }
586 1256 : }
587 :
588 :
589 : /* ------------------------------------------------------------
590 : * Backend initialization / shutdown functions
591 : * ------------------------------------------------------------
592 : */
593 :
594 : /*
595 : * Shut down a single backend's statistics reporting at process exit.
596 : *
597 : * Flush out any remaining statistics counts. Without this, operations
598 : * triggered during backend exit (such as temp table deletions) won't be
599 : * counted.
600 : */
601 : static void
602 43256 : pgstat_shutdown_hook(int code, Datum arg)
603 : {
604 : Assert(!pgstat_is_shutdown);
605 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
606 :
607 : /*
608 : * If we got as far as discovering our own database ID, we can flush out
609 : * what we did so far. Otherwise, we'd be reporting an invalid database
610 : * ID, so forget it. (This means that accesses to pg_database during
611 : * failed backend starts might never get counted.)
612 : */
613 43256 : if (OidIsValid(MyDatabaseId))
614 32298 : pgstat_report_disconnect(MyDatabaseId);
615 :
616 43256 : pgstat_report_stat(true);
617 :
618 : /* there shouldn't be any pending changes left */
619 : Assert(dlist_is_empty(&pgStatPending));
620 43256 : dlist_init(&pgStatPending);
621 :
622 : /* drop the backend stats entry */
623 43256 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
624 0 : pgstat_request_entry_refs_gc();
625 :
626 43256 : pgstat_detach_shmem();
627 :
628 : #ifdef USE_ASSERT_CHECKING
629 : pgstat_is_shutdown = true;
630 : #endif
631 43256 : }
632 :
633 : /*
634 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
635 : * BaseInit().
636 : *
637 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
638 : */
639 : void
640 43256 : pgstat_initialize(void)
641 : {
642 : Assert(!pgstat_is_initialized);
643 :
644 43256 : pgstat_attach_shmem();
645 :
646 43256 : pgstat_init_snapshot_fixed();
647 :
648 : /* Backend initialization callbacks */
649 1427448 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
650 : {
651 1384192 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
652 :
653 1384192 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
654 1340936 : continue;
655 :
656 43256 : kind_info->init_backend_cb();
657 : }
658 :
659 : /* Set up a process-exit hook to clean up */
660 43256 : before_shmem_exit(pgstat_shutdown_hook, 0);
661 :
662 : #ifdef USE_ASSERT_CHECKING
663 : pgstat_is_initialized = true;
664 : #endif
665 43256 : }
666 :
667 :
668 : /* ------------------------------------------------------------
669 : * Public functions used by backends follow
670 : * ------------------------------------------------------------
671 : */
672 :
673 : /*
674 : * Must be called by processes that performs DML: tcop/postgres.c, logical
675 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
676 : * shared memory.
677 : *
678 : * Unless called with 'force', pending stats updates are flushed happen once
679 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
680 : * block on lock acquisition, except if stats updates have been pending for
681 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
682 : *
683 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
684 : * suggested idle timeout is returned. Currently this is always
685 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
686 : * a timeout after which to call pgstat_report_stat(true), but are not
687 : * required to do so.
688 : *
689 : * Note that this is called only when not within a transaction, so it is fair
690 : * to use transaction stop time as an approximation of current time.
691 : */
692 : long
693 606876 : pgstat_report_stat(bool force)
694 : {
695 : static TimestampTz pending_since = 0;
696 : static TimestampTz last_flush = 0;
697 : bool partial_flush;
698 : TimestampTz now;
699 : bool nowait;
700 :
701 : pgstat_assert_is_up();
702 : Assert(!IsTransactionOrTransactionBlock());
703 :
704 : /* "absorb" the forced flush even if there's nothing to flush */
705 606876 : if (pgStatForceNextFlush)
706 : {
707 484 : force = true;
708 484 : pgStatForceNextFlush = false;
709 : }
710 :
711 : /* Don't expend a clock check if nothing to do */
712 606876 : if (dlist_is_empty(&pgStatPending) &&
713 18392 : !pgstat_report_fixed)
714 : {
715 13336 : return 0;
716 : }
717 :
718 : /*
719 : * There should never be stats to report once stats are shut down. Can't
720 : * assert that before the checks above, as there is an unconditional
721 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
722 : * the process that ran pgstat_before_server_shutdown() will still call.
723 : */
724 : Assert(!pgStatLocal.shmem->is_shutdown);
725 :
726 593540 : if (force)
727 : {
728 : /*
729 : * Stats reports are forced either when it's been too long since stats
730 : * have been reported or in processes that force stats reporting to
731 : * happen at specific points (including shutdown). In the former case
732 : * the transaction stop time might be quite old, in the latter it
733 : * would never get cleared.
734 : */
735 41646 : now = GetCurrentTimestamp();
736 : }
737 : else
738 : {
739 551894 : now = GetCurrentTransactionStopTimestamp();
740 :
741 1050570 : if (pending_since > 0 &&
742 498676 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
743 : {
744 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
745 0 : force = true;
746 : }
747 551894 : else if (last_flush > 0 &&
748 526412 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
749 : {
750 : /* don't flush too frequently */
751 523982 : if (pending_since == 0)
752 27292 : pending_since = now;
753 :
754 523982 : return PGSTAT_IDLE_INTERVAL;
755 : }
756 : }
757 :
758 69558 : pgstat_update_dbstats(now);
759 :
760 : /* don't wait for lock acquisition when !force */
761 69558 : nowait = !force;
762 :
763 69558 : partial_flush = false;
764 :
765 : /* flush of variable-numbered stats tracked in pending entries list */
766 69558 : partial_flush |= pgstat_flush_pending_entries(nowait);
767 :
768 : /* flush of other stats kinds */
769 69558 : if (pgstat_report_fixed)
770 : {
771 2227698 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
772 : {
773 2160192 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
774 :
775 2160192 : if (!kind_info)
776 1349800 : continue;
777 810392 : if (!kind_info->flush_static_cb)
778 540368 : continue;
779 :
780 270024 : partial_flush |= kind_info->flush_static_cb(nowait);
781 : }
782 : }
783 :
784 69558 : last_flush = now;
785 :
786 : /*
787 : * If some of the pending stats could not be flushed due to lock
788 : * contention, let the caller know when to retry.
789 : */
790 69558 : if (partial_flush)
791 : {
792 : /* force should have prevented us from getting here */
793 : Assert(!force);
794 :
795 : /* remember since when stats have been pending */
796 22 : if (pending_since == 0)
797 16 : pending_since = now;
798 :
799 22 : return PGSTAT_IDLE_INTERVAL;
800 : }
801 :
802 69536 : pending_since = 0;
803 69536 : pgstat_report_fixed = false;
804 :
805 69536 : return 0;
806 : }
807 :
808 : /*
809 : * Force locally pending stats to be flushed during the next
810 : * pgstat_report_stat() call. This is useful for writing tests.
811 : */
812 : void
813 484 : pgstat_force_next_flush(void)
814 : {
815 484 : pgStatForceNextFlush = true;
816 484 : }
817 :
818 : /*
819 : * Only for use by pgstat_reset_counters()
820 : */
821 : static bool
822 23220 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
823 : {
824 23220 : return entry->key.dboid == MyDatabaseId;
825 : }
826 :
827 : /*
828 : * Reset counters for our database.
829 : *
830 : * Permission checking for this function is managed through the normal
831 : * GRANT system.
832 : */
833 : void
834 26 : pgstat_reset_counters(void)
835 : {
836 26 : TimestampTz ts = GetCurrentTimestamp();
837 :
838 26 : pgstat_reset_matching_entries(match_db_entries,
839 : ObjectIdGetDatum(MyDatabaseId),
840 : ts);
841 26 : }
842 :
843 : /*
844 : * Reset a single variable-numbered entry.
845 : *
846 : * If the stats kind is within a database, also reset the database's
847 : * stat_reset_timestamp.
848 : *
849 : * Permission checking for this function is managed through the normal
850 : * GRANT system.
851 : */
852 : void
853 44 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
854 : {
855 44 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
856 44 : TimestampTz ts = GetCurrentTimestamp();
857 :
858 : /* not needed atm, and doesn't make sense with the current signature */
859 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
860 :
861 : /* reset the "single counter" */
862 44 : pgstat_reset_entry(kind, dboid, objid, ts);
863 :
864 44 : if (!kind_info->accessed_across_databases)
865 16 : pgstat_reset_database_timestamp(dboid, ts);
866 44 : }
867 :
868 : /*
869 : * Reset stats for all entries of a kind.
870 : *
871 : * Permission checking for this function is managed through the normal
872 : * GRANT system.
873 : */
874 : void
875 58 : pgstat_reset_of_kind(PgStat_Kind kind)
876 : {
877 58 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
878 58 : TimestampTz ts = GetCurrentTimestamp();
879 :
880 58 : if (kind_info->fixed_amount)
881 50 : kind_info->reset_all_cb(ts);
882 : else
883 8 : pgstat_reset_entries_of_kind(kind, ts);
884 58 : }
885 :
886 :
887 : /* ------------------------------------------------------------
888 : * Fetching of stats
889 : * ------------------------------------------------------------
890 : */
891 :
892 : /*
893 : * Discard any data collected in the current transaction. Any subsequent
894 : * request will cause new snapshots to be read.
895 : *
896 : * This is also invoked during transaction commit or abort to discard
897 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
898 : * cause this routine to be called.
899 : */
900 : void
901 1057294 : pgstat_clear_snapshot(void)
902 : {
903 : pgstat_assert_is_up();
904 :
905 1057294 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
906 : sizeof(pgStatLocal.snapshot.fixed_valid));
907 1057294 : memset(&pgStatLocal.snapshot.custom_valid, 0,
908 : sizeof(pgStatLocal.snapshot.custom_valid));
909 1057294 : pgStatLocal.snapshot.stats = NULL;
910 1057294 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
911 :
912 : /* Release memory, if any was allocated */
913 1057294 : if (pgStatLocal.snapshot.context)
914 : {
915 1156 : MemoryContextDelete(pgStatLocal.snapshot.context);
916 :
917 : /* Reset variables */
918 1156 : pgStatLocal.snapshot.context = NULL;
919 : }
920 :
921 : /*
922 : * Historically the backend_status.c facilities lived in this file, and
923 : * were reset with the same function. For now keep it that way, and
924 : * forward the reset request.
925 : */
926 1057294 : pgstat_clear_backend_activity_snapshot();
927 :
928 : /* Reset this flag, as it may be possible that a cleanup was forced. */
929 1057294 : force_stats_snapshot_clear = false;
930 1057294 : }
931 :
932 : void *
933 658632 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
934 : {
935 : PgStat_HashKey key;
936 : PgStat_EntryRef *entry_ref;
937 : void *stats_data;
938 658632 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
939 :
940 : /* should be called from backends */
941 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
942 : Assert(!kind_info->fixed_amount);
943 :
944 658632 : pgstat_prep_snapshot();
945 :
946 : /* clear padding */
947 658632 : memset(&key, 0, sizeof(struct PgStat_HashKey));
948 :
949 658632 : key.kind = kind;
950 658632 : key.dboid = dboid;
951 658632 : key.objid = objid;
952 :
953 : /* if we need to build a full snapshot, do so */
954 658632 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
955 460 : pgstat_build_snapshot();
956 :
957 : /* if caching is desired, look up in cache */
958 658632 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
959 : {
960 9830 : PgStat_SnapshotEntry *entry = NULL;
961 :
962 9830 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
963 :
964 9830 : if (entry)
965 764 : return entry->data;
966 :
967 : /*
968 : * If we built a full snapshot and the key is not in
969 : * pgStatLocal.snapshot.stats, there are no matching stats.
970 : */
971 9066 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
972 28 : return NULL;
973 : }
974 :
975 657840 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
976 :
977 657840 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
978 :
979 657840 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
980 : {
981 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
982 10726 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
983 : {
984 1780 : PgStat_SnapshotEntry *entry = NULL;
985 : bool found;
986 :
987 1780 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
988 : Assert(!found);
989 1780 : entry->data = NULL;
990 : }
991 10726 : return NULL;
992 : }
993 :
994 : /*
995 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
996 : * otherwise we could quickly end up with a fair bit of memory used due to
997 : * repeated accesses.
998 : */
999 647114 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1000 639856 : stats_data = palloc(kind_info->shared_data_len);
1001 : else
1002 7258 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1003 7258 : kind_info->shared_data_len);
1004 :
1005 647114 : (void) pgstat_lock_entry_shared(entry_ref, false);
1006 1294228 : memcpy(stats_data,
1007 647114 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1008 647114 : kind_info->shared_data_len);
1009 647114 : pgstat_unlock_entry(entry_ref);
1010 :
1011 647114 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1012 : {
1013 7258 : PgStat_SnapshotEntry *entry = NULL;
1014 : bool found;
1015 :
1016 7258 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1017 7258 : entry->data = stats_data;
1018 : }
1019 :
1020 647114 : return stats_data;
1021 : }
1022 :
1023 : /*
1024 : * If a stats snapshot has been taken, return the timestamp at which that was
1025 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1026 : * false.
1027 : */
1028 : TimestampTz
1029 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1030 : {
1031 60 : if (force_stats_snapshot_clear)
1032 18 : pgstat_clear_snapshot();
1033 :
1034 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1035 : {
1036 24 : *have_snapshot = true;
1037 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1038 : }
1039 :
1040 36 : *have_snapshot = false;
1041 :
1042 36 : return 0;
1043 : }
1044 :
1045 : bool
1046 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1047 : {
1048 : /* fixed-numbered stats always exist */
1049 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1050 12 : return true;
1051 :
1052 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1053 : }
1054 :
1055 : /*
1056 : * Ensure snapshot for fixed-numbered 'kind' exists.
1057 : *
1058 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1059 : * massaging the data into the desired format.
1060 : */
1061 : void
1062 506 : pgstat_snapshot_fixed(PgStat_Kind kind)
1063 : {
1064 : Assert(pgstat_is_kind_valid(kind));
1065 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1066 :
1067 506 : if (force_stats_snapshot_clear)
1068 0 : pgstat_clear_snapshot();
1069 :
1070 506 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1071 24 : pgstat_build_snapshot();
1072 : else
1073 482 : pgstat_build_snapshot_fixed(kind);
1074 :
1075 506 : if (pgstat_is_kind_builtin(kind))
1076 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1077 6 : else if (pgstat_is_kind_custom(kind))
1078 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1079 506 : }
1080 :
1081 : static void
1082 43256 : pgstat_init_snapshot_fixed(void)
1083 : {
1084 : /*
1085 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1086 : * stats kinds.
1087 : */
1088 432560 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1089 : {
1090 389304 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1091 :
1092 389304 : if (!kind_info || !kind_info->fixed_amount)
1093 389180 : continue;
1094 :
1095 124 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1096 124 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1097 : }
1098 43256 : }
1099 :
1100 : static void
1101 658684 : pgstat_prep_snapshot(void)
1102 : {
1103 658684 : if (force_stats_snapshot_clear)
1104 18 : pgstat_clear_snapshot();
1105 :
1106 658684 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1107 9882 : pgStatLocal.snapshot.stats != NULL)
1108 657528 : return;
1109 :
1110 1156 : if (!pgStatLocal.snapshot.context)
1111 1156 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1112 : "PgStat Snapshot",
1113 : ALLOCSET_SMALL_SIZES);
1114 :
1115 1156 : pgStatLocal.snapshot.stats =
1116 1156 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1117 : PGSTAT_SNAPSHOT_HASH_SIZE,
1118 : NULL);
1119 : }
1120 :
1121 : static void
1122 484 : pgstat_build_snapshot(void)
1123 : {
1124 : dshash_seq_status hstat;
1125 : PgStatShared_HashEntry *p;
1126 :
1127 : /* should only be called when we need a snapshot */
1128 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1129 :
1130 : /* snapshot already built */
1131 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1132 432 : return;
1133 :
1134 52 : pgstat_prep_snapshot();
1135 :
1136 : Assert(pgStatLocal.snapshot.stats->members == 0);
1137 :
1138 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1139 :
1140 : /*
1141 : * Snapshot all variable stats.
1142 : */
1143 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1144 54570 : while ((p = dshash_seq_next(&hstat)) != NULL)
1145 : {
1146 54518 : PgStat_Kind kind = p->key.kind;
1147 54518 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1148 : bool found;
1149 : PgStat_SnapshotEntry *entry;
1150 : PgStatShared_Common *stats_data;
1151 :
1152 : /*
1153 : * Check if the stats object should be included in the snapshot.
1154 : * Unless the stats kind can be accessed from all databases (e.g.,
1155 : * database stats themselves), we only include stats for the current
1156 : * database or objects not associated with a database (e.g. shared
1157 : * relations).
1158 : */
1159 54518 : if (p->key.dboid != MyDatabaseId &&
1160 16268 : p->key.dboid != InvalidOid &&
1161 13388 : !kind_info->accessed_across_databases)
1162 13436 : continue;
1163 :
1164 41286 : if (p->dropped)
1165 204 : continue;
1166 :
1167 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1168 :
1169 41082 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1170 : Assert(stats_data);
1171 :
1172 41082 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1173 : Assert(!found);
1174 :
1175 41082 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1176 : pgstat_get_entry_len(kind));
1177 :
1178 : /*
1179 : * Acquire the LWLock directly instead of using
1180 : * pg_stat_lock_entry_shared() which requires a reference.
1181 : */
1182 41082 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1183 41082 : memcpy(entry->data,
1184 41082 : pgstat_get_entry_data(kind, stats_data),
1185 : pgstat_get_entry_len(kind));
1186 41082 : LWLockRelease(&stats_data->lock);
1187 : }
1188 52 : dshash_seq_term(&hstat);
1189 :
1190 : /*
1191 : * Build snapshot of all fixed-numbered stats.
1192 : */
1193 1716 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1194 : {
1195 1664 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1196 :
1197 1664 : if (!kind_info)
1198 1040 : continue;
1199 624 : if (!kind_info->fixed_amount)
1200 : {
1201 : Assert(kind_info->snapshot_cb == NULL);
1202 312 : continue;
1203 : }
1204 :
1205 312 : pgstat_build_snapshot_fixed(kind);
1206 : }
1207 :
1208 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1209 : }
1210 :
1211 : static void
1212 8264 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1213 : {
1214 8264 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1215 : int idx;
1216 : bool *valid;
1217 :
1218 : /* Position in fixed_valid or custom_valid */
1219 8264 : if (pgstat_is_kind_builtin(kind))
1220 : {
1221 8252 : idx = kind;
1222 8252 : valid = pgStatLocal.snapshot.fixed_valid;
1223 : }
1224 : else
1225 : {
1226 12 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1227 12 : valid = pgStatLocal.snapshot.custom_valid;
1228 : }
1229 :
1230 : Assert(kind_info->fixed_amount);
1231 : Assert(kind_info->snapshot_cb != NULL);
1232 :
1233 8264 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1234 : {
1235 : /* rebuild every time */
1236 7500 : valid[idx] = false;
1237 : }
1238 764 : else if (valid[idx])
1239 : {
1240 : /* in snapshot mode we shouldn't get called again */
1241 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1242 12 : return;
1243 : }
1244 :
1245 : Assert(!valid[idx]);
1246 :
1247 8252 : kind_info->snapshot_cb();
1248 :
1249 : Assert(!valid[idx]);
1250 8252 : valid[idx] = true;
1251 : }
1252 :
1253 :
1254 : /* ------------------------------------------------------------
1255 : * Backend-local pending stats infrastructure
1256 : * ------------------------------------------------------------
1257 : */
1258 :
1259 : /*
1260 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1261 : * stats if not already done.
1262 : *
1263 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1264 : * created, false otherwise.
1265 : */
1266 : PgStat_EntryRef *
1267 3928892 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1268 : {
1269 : PgStat_EntryRef *entry_ref;
1270 :
1271 : /* need to be able to flush out */
1272 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1273 :
1274 3928892 : if (unlikely(!pgStatPendingContext))
1275 : {
1276 34946 : pgStatPendingContext =
1277 34946 : AllocSetContextCreate(TopMemoryContext,
1278 : "PgStat Pending",
1279 : ALLOCSET_SMALL_SIZES);
1280 : }
1281 :
1282 3928892 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1283 : true, created_entry);
1284 :
1285 3928892 : if (entry_ref->pending == NULL)
1286 : {
1287 2010748 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1288 :
1289 : Assert(entrysize != (size_t) -1);
1290 :
1291 2010748 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1292 2010748 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1293 : }
1294 :
1295 3928892 : return entry_ref;
1296 : }
1297 :
1298 : /*
1299 : * Return an existing stats entry, or NULL.
1300 : *
1301 : * This should only be used for helper function for pgstatfuncs.c - outside of
1302 : * that it shouldn't be needed.
1303 : */
1304 : PgStat_EntryRef *
1305 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1306 : {
1307 : PgStat_EntryRef *entry_ref;
1308 :
1309 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1310 :
1311 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1312 30 : return NULL;
1313 :
1314 54 : return entry_ref;
1315 : }
1316 :
1317 : void
1318 2010748 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1319 : {
1320 2010748 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1321 2010748 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1322 2010748 : void *pending_data = entry_ref->pending;
1323 :
1324 : Assert(pending_data != NULL);
1325 : /* !fixed_amount stats should be handled explicitly */
1326 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1327 :
1328 2010748 : if (kind_info->delete_pending_cb)
1329 1896990 : kind_info->delete_pending_cb(entry_ref);
1330 :
1331 2010748 : pfree(pending_data);
1332 2010748 : entry_ref->pending = NULL;
1333 :
1334 2010748 : dlist_delete(&entry_ref->pending_node);
1335 2010748 : }
1336 :
1337 : /*
1338 : * Flush out pending variable-numbered stats.
1339 : */
1340 : static bool
1341 69558 : pgstat_flush_pending_entries(bool nowait)
1342 : {
1343 69558 : bool have_pending = false;
1344 69558 : dlist_node *cur = NULL;
1345 :
1346 : /*
1347 : * Need to be a bit careful iterating over the list of pending entries.
1348 : * Processing a pending entry may queue further pending entries to the end
1349 : * of the list that we want to process, so a simple iteration won't do.
1350 : * Further complicating matters is that we want to delete the current
1351 : * entry in each iteration from the list if we flushed successfully.
1352 : *
1353 : * So we just keep track of the next pointer in each loop iteration.
1354 : */
1355 69558 : if (!dlist_is_empty(&pgStatPending))
1356 64846 : cur = dlist_head_node(&pgStatPending);
1357 :
1358 2015896 : while (cur)
1359 : {
1360 1946338 : PgStat_EntryRef *entry_ref =
1361 1946338 : dlist_container(PgStat_EntryRef, pending_node, cur);
1362 1946338 : PgStat_HashKey key = entry_ref->shared_entry->key;
1363 1946338 : PgStat_Kind kind = key.kind;
1364 1946338 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1365 : bool did_flush;
1366 : dlist_node *next;
1367 :
1368 : Assert(!kind_info->fixed_amount);
1369 : Assert(kind_info->flush_pending_cb != NULL);
1370 :
1371 : /* flush the stats, if possible */
1372 1946338 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1373 :
1374 : Assert(did_flush || nowait);
1375 :
1376 : /* determine next entry, before deleting the pending entry */
1377 1946338 : if (dlist_has_next(&pgStatPending, cur))
1378 1881492 : next = dlist_next_node(&pgStatPending, cur);
1379 : else
1380 64846 : next = NULL;
1381 :
1382 : /* if successfully flushed, remove entry */
1383 1946338 : if (did_flush)
1384 1946296 : pgstat_delete_pending_entry(entry_ref);
1385 : else
1386 42 : have_pending = true;
1387 :
1388 1946338 : cur = next;
1389 : }
1390 :
1391 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1392 :
1393 69558 : return have_pending;
1394 : }
1395 :
1396 :
1397 : /* ------------------------------------------------------------
1398 : * Helper / infrastructure functions
1399 : * ------------------------------------------------------------
1400 : */
1401 :
1402 : PgStat_Kind
1403 166 : pgstat_get_kind_from_str(char *kind_str)
1404 : {
1405 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1406 : {
1407 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1408 160 : return kind;
1409 : }
1410 :
1411 : /* Check the custom set of cumulative stats */
1412 6 : if (pgstat_kind_custom_infos)
1413 : {
1414 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1415 : {
1416 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1417 :
1418 0 : if (pgstat_kind_custom_infos[idx] &&
1419 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1420 0 : return kind;
1421 : }
1422 : }
1423 :
1424 6 : ereport(ERROR,
1425 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1426 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1427 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1428 : }
1429 :
1430 : static inline bool
1431 767252 : pgstat_is_kind_valid(PgStat_Kind kind)
1432 : {
1433 767252 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1434 : }
1435 :
1436 : const PgStat_KindInfo *
1437 14532494 : pgstat_get_kind_info(PgStat_Kind kind)
1438 : {
1439 14532494 : if (pgstat_is_kind_builtin(kind))
1440 11795466 : return &pgstat_kind_builtin_infos[kind];
1441 :
1442 2737028 : if (pgstat_is_kind_custom(kind))
1443 : {
1444 1475944 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1445 :
1446 1475944 : if (pgstat_kind_custom_infos == NULL ||
1447 4118 : pgstat_kind_custom_infos[idx] == NULL)
1448 1474962 : return NULL;
1449 982 : return pgstat_kind_custom_infos[idx];
1450 : }
1451 :
1452 1261084 : return NULL;
1453 : }
1454 :
1455 : /*
1456 : * Register a new stats kind.
1457 : *
1458 : * PgStat_Kinds must be globally unique across all extensions. Refer
1459 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1460 : * unique ID for your extension, to avoid conflicts with other extension
1461 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1462 : * needlessly reserving a new ID.
1463 : */
1464 : void
1465 16 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1466 : {
1467 16 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1468 :
1469 16 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1470 0 : ereport(ERROR,
1471 : (errmsg("custom cumulative statistics name is invalid"),
1472 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1473 :
1474 16 : if (!pgstat_is_kind_custom(kind))
1475 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1476 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1477 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1478 :
1479 16 : if (!process_shared_preload_libraries_in_progress)
1480 0 : ereport(ERROR,
1481 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1482 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1483 :
1484 : /*
1485 : * Check some data for fixed-numbered stats.
1486 : */
1487 16 : if (kind_info->fixed_amount)
1488 : {
1489 8 : if (kind_info->shared_size == 0)
1490 0 : ereport(ERROR,
1491 : (errmsg("custom cumulative statistics property is invalid"),
1492 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1493 : }
1494 :
1495 : /*
1496 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1497 : */
1498 16 : if (pgstat_kind_custom_infos == NULL)
1499 : {
1500 8 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1501 8 : MemoryContextAllocZero(TopMemoryContext,
1502 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1503 : }
1504 :
1505 16 : if (pgstat_kind_custom_infos[idx] != NULL &&
1506 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1507 0 : ereport(ERROR,
1508 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1509 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1510 : pgstat_kind_custom_infos[idx]->name)));
1511 :
1512 : /* check for existing custom stats with the same name */
1513 160 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1514 : {
1515 144 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1516 :
1517 144 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1518 136 : continue;
1519 8 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1520 0 : ereport(ERROR,
1521 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1522 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1523 : }
1524 :
1525 : /* Register it */
1526 16 : pgstat_kind_custom_infos[idx] = kind_info;
1527 16 : ereport(LOG,
1528 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1529 : kind_info->name, kind)));
1530 16 : }
1531 :
1532 : /*
1533 : * Stats should only be reported after pgstat_initialize() and before
1534 : * pgstat_shutdown(). This check is put in a few central places to catch
1535 : * violations of this rule more easily.
1536 : */
1537 : #ifdef USE_ASSERT_CHECKING
1538 : void
1539 : pgstat_assert_is_up(void)
1540 : {
1541 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1542 : }
1543 : #endif
1544 :
1545 :
1546 : /* ------------------------------------------------------------
1547 : * reading and writing of on-disk stats file
1548 : * ------------------------------------------------------------
1549 : */
1550 :
1551 : /* helpers for pgstat_write_statsfile() */
1552 : static void
1553 759718 : write_chunk(FILE *fpout, void *ptr, size_t len)
1554 : {
1555 : int rc;
1556 :
1557 759718 : rc = fwrite(ptr, len, 1, fpout);
1558 :
1559 : /* we'll check for errors with ferror once at the end */
1560 : (void) rc;
1561 759718 : }
1562 :
1563 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1564 :
1565 : /*
1566 : * This function is called in the last process that is accessing the shared
1567 : * stats so locking is not required.
1568 : */
1569 : static void
1570 1244 : pgstat_write_statsfile(void)
1571 : {
1572 : FILE *fpout;
1573 : int32 format_id;
1574 1244 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1575 1244 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1576 : dshash_seq_status hstat;
1577 : PgStatShared_HashEntry *ps;
1578 :
1579 : pgstat_assert_is_up();
1580 :
1581 : /* should be called only by the checkpointer or single user mode */
1582 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1583 :
1584 : /* we're shutting down, so it's ok to just override this */
1585 1244 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1586 :
1587 1244 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1588 :
1589 : /*
1590 : * Open the statistics temp file to write out the current values.
1591 : */
1592 1244 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1593 1244 : if (fpout == NULL)
1594 : {
1595 0 : ereport(LOG,
1596 : (errcode_for_file_access(),
1597 : errmsg("could not open temporary statistics file \"%s\": %m",
1598 : tmpfile)));
1599 0 : return;
1600 : }
1601 :
1602 : /*
1603 : * Write the file header --- currently just a format ID.
1604 : */
1605 1244 : format_id = PGSTAT_FILE_FORMAT_ID;
1606 1244 : write_chunk_s(fpout, &format_id);
1607 :
1608 : /* Write various stats structs for fixed number of objects */
1609 41052 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1610 : {
1611 : char *ptr;
1612 39808 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1613 :
1614 39808 : if (!info || !info->fixed_amount)
1615 32338 : continue;
1616 :
1617 7470 : if (pgstat_is_kind_builtin(kind))
1618 : Assert(info->snapshot_ctl_off != 0);
1619 :
1620 : /* skip if no need to write to file */
1621 7470 : if (!info->write_to_file)
1622 0 : continue;
1623 :
1624 7470 : pgstat_build_snapshot_fixed(kind);
1625 7470 : if (pgstat_is_kind_builtin(kind))
1626 7464 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1627 : else
1628 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1629 :
1630 7470 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1631 7470 : write_chunk_s(fpout, &kind);
1632 7470 : write_chunk(fpout, ptr, info->shared_data_len);
1633 : }
1634 :
1635 : /*
1636 : * Walk through the stats entries
1637 : */
1638 1244 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1639 373146 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1640 : {
1641 : PgStatShared_Common *shstats;
1642 371902 : const PgStat_KindInfo *kind_info = NULL;
1643 :
1644 371902 : CHECK_FOR_INTERRUPTS();
1645 :
1646 : /*
1647 : * We should not see any "dropped" entries when writing the stats
1648 : * file, as all backends and auxiliary processes should have cleaned
1649 : * up their references before they terminated.
1650 : *
1651 : * However, since we are already shutting down, it is not worth
1652 : * crashing the server over any potential cleanup issues, so we simply
1653 : * skip such entries if encountered.
1654 : */
1655 : Assert(!ps->dropped);
1656 371902 : if (ps->dropped)
1657 0 : continue;
1658 :
1659 : /*
1660 : * This discards data related to custom stats kinds that are unknown
1661 : * to this process.
1662 : */
1663 371902 : if (!pgstat_is_kind_valid(ps->key.kind))
1664 : {
1665 0 : elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1666 : ps->key.kind, ps->key.dboid,
1667 : ps->key.objid);
1668 0 : continue;
1669 : }
1670 :
1671 371902 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1672 :
1673 371902 : kind_info = pgstat_get_kind_info(ps->key.kind);
1674 :
1675 : /* if not dropped the valid-entry refcount should exist */
1676 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1677 :
1678 : /* skip if no need to write to file */
1679 371902 : if (!kind_info->write_to_file)
1680 214 : continue;
1681 :
1682 371688 : if (!kind_info->to_serialized_name)
1683 : {
1684 : /* normal stats entry, identified by PgStat_HashKey */
1685 371530 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1686 371530 : write_chunk_s(fpout, &ps->key);
1687 : }
1688 : else
1689 : {
1690 : /* stats entry identified by name on disk (e.g. slots) */
1691 : NameData name;
1692 :
1693 158 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1694 :
1695 158 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1696 158 : write_chunk_s(fpout, &ps->key.kind);
1697 158 : write_chunk_s(fpout, &name);
1698 : }
1699 :
1700 : /* Write except the header part of the entry */
1701 371688 : write_chunk(fpout,
1702 : pgstat_get_entry_data(ps->key.kind, shstats),
1703 : pgstat_get_entry_len(ps->key.kind));
1704 : }
1705 1244 : dshash_seq_term(&hstat);
1706 :
1707 : /*
1708 : * No more output to be done. Close the temp file and replace the old
1709 : * pgstat.stat with it. The ferror() check replaces testing for error
1710 : * after each individual fputc or fwrite (in write_chunk()) above.
1711 : */
1712 1244 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1713 :
1714 1244 : if (ferror(fpout))
1715 : {
1716 0 : ereport(LOG,
1717 : (errcode_for_file_access(),
1718 : errmsg("could not write temporary statistics file \"%s\": %m",
1719 : tmpfile)));
1720 0 : FreeFile(fpout);
1721 0 : unlink(tmpfile);
1722 : }
1723 1244 : else if (FreeFile(fpout) < 0)
1724 : {
1725 0 : ereport(LOG,
1726 : (errcode_for_file_access(),
1727 : errmsg("could not close temporary statistics file \"%s\": %m",
1728 : tmpfile)));
1729 0 : unlink(tmpfile);
1730 : }
1731 1244 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1732 : {
1733 : /* durable_rename already emitted log message */
1734 0 : unlink(tmpfile);
1735 : }
1736 : }
1737 :
1738 : /* helpers for pgstat_read_statsfile() */
1739 : static bool
1740 792190 : read_chunk(FILE *fpin, void *ptr, size_t len)
1741 : {
1742 792190 : return fread(ptr, 1, len, fpin) == len;
1743 : }
1744 :
1745 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1746 :
1747 : /*
1748 : * Reads in existing statistics file into memory.
1749 : *
1750 : * This function is called in the only process that is accessing the shared
1751 : * stats so locking is not required.
1752 : */
1753 : static void
1754 1492 : pgstat_read_statsfile(void)
1755 : {
1756 : FILE *fpin;
1757 : int32 format_id;
1758 : bool found;
1759 1492 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1760 1492 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1761 :
1762 : /* shouldn't be called from postmaster */
1763 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1764 :
1765 1492 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1766 :
1767 : /*
1768 : * Try to open the stats file. If it doesn't exist, the backends simply
1769 : * returns zero for anything and statistics simply starts from scratch
1770 : * with empty counters.
1771 : *
1772 : * ENOENT is a possibility if stats collection was previously disabled or
1773 : * has not yet written the stats file for the first time. Any other
1774 : * failure condition is suspicious.
1775 : */
1776 1492 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1777 : {
1778 102 : if (errno != ENOENT)
1779 0 : ereport(LOG,
1780 : (errcode_for_file_access(),
1781 : errmsg("could not open statistics file \"%s\": %m",
1782 : statfile)));
1783 102 : pgstat_reset_after_failure();
1784 102 : return;
1785 : }
1786 :
1787 : /*
1788 : * Verify it's of the expected format.
1789 : */
1790 1390 : if (!read_chunk_s(fpin, &format_id))
1791 : {
1792 0 : elog(WARNING, "could not read format ID");
1793 0 : goto error;
1794 : }
1795 :
1796 1390 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1797 : {
1798 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1799 : format_id, PGSTAT_FILE_FORMAT_ID);
1800 2 : goto error;
1801 : }
1802 :
1803 : /*
1804 : * We found an existing statistics file. Read it and put all the stats
1805 : * data into place.
1806 : */
1807 : for (;;)
1808 395348 : {
1809 396736 : int t = fgetc(fpin);
1810 :
1811 396736 : switch (t)
1812 : {
1813 8332 : case PGSTAT_FILE_ENTRY_FIXED:
1814 : {
1815 : PgStat_Kind kind;
1816 : const PgStat_KindInfo *info;
1817 : char *ptr;
1818 :
1819 : /* entry for fixed-numbered stats */
1820 8332 : if (!read_chunk_s(fpin, &kind))
1821 : {
1822 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1823 2 : goto error;
1824 : }
1825 :
1826 8332 : if (!pgstat_is_kind_valid(kind))
1827 : {
1828 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1829 : kind, t);
1830 0 : goto error;
1831 : }
1832 :
1833 8332 : info = pgstat_get_kind_info(kind);
1834 8332 : if (!info)
1835 : {
1836 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1837 : kind, t);
1838 2 : goto error;
1839 : }
1840 :
1841 8330 : if (!info->fixed_amount)
1842 : {
1843 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1844 : kind, t);
1845 0 : goto error;
1846 : }
1847 :
1848 : /* Load back stats into shared memory */
1849 8330 : if (pgstat_is_kind_builtin(kind))
1850 8328 : ptr = ((char *) shmem) + info->shared_ctl_off +
1851 8328 : info->shared_data_off;
1852 : else
1853 : {
1854 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1855 :
1856 2 : ptr = ((char *) shmem->custom_data[idx]) +
1857 2 : info->shared_data_off;
1858 : }
1859 :
1860 8330 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1861 : {
1862 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1863 : kind, t, info->shared_data_len);
1864 0 : goto error;
1865 : }
1866 :
1867 8330 : break;
1868 : }
1869 387018 : case PGSTAT_FILE_ENTRY_HASH:
1870 : case PGSTAT_FILE_ENTRY_NAME:
1871 : {
1872 : PgStat_HashKey key;
1873 : PgStatShared_HashEntry *p;
1874 : PgStatShared_Common *header;
1875 :
1876 387018 : CHECK_FOR_INTERRUPTS();
1877 :
1878 387018 : if (t == PGSTAT_FILE_ENTRY_HASH)
1879 : {
1880 : /* normal stats entry, identified by PgStat_HashKey */
1881 386914 : if (!read_chunk_s(fpin, &key))
1882 : {
1883 0 : elog(WARNING, "could not read key for entry of type %c", t);
1884 0 : goto error;
1885 : }
1886 :
1887 386914 : if (!pgstat_is_kind_valid(key.kind))
1888 : {
1889 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1890 : key.kind, key.dboid,
1891 : key.objid, t);
1892 0 : goto error;
1893 : }
1894 :
1895 386914 : if (!pgstat_get_kind_info(key.kind))
1896 : {
1897 0 : elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1898 : key.kind, key.dboid,
1899 : key.objid, t);
1900 0 : goto error;
1901 : }
1902 : }
1903 : else
1904 : {
1905 : /* stats entry identified by name on disk (e.g. slots) */
1906 104 : const PgStat_KindInfo *kind_info = NULL;
1907 : PgStat_Kind kind;
1908 : NameData name;
1909 :
1910 104 : if (!read_chunk_s(fpin, &kind))
1911 : {
1912 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1913 0 : goto error;
1914 : }
1915 104 : if (!read_chunk_s(fpin, &name))
1916 : {
1917 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1918 : kind, t);
1919 0 : goto error;
1920 : }
1921 104 : if (!pgstat_is_kind_valid(kind))
1922 : {
1923 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1924 : kind, t);
1925 0 : goto error;
1926 : }
1927 :
1928 104 : kind_info = pgstat_get_kind_info(kind);
1929 104 : if (!kind_info)
1930 : {
1931 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1932 : kind, t);
1933 0 : goto error;
1934 : }
1935 :
1936 104 : if (!kind_info->from_serialized_name)
1937 : {
1938 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1939 : kind, t);
1940 0 : goto error;
1941 : }
1942 :
1943 104 : if (!kind_info->from_serialized_name(&name, &key))
1944 : {
1945 : /* skip over data for entry we don't care about */
1946 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1947 : {
1948 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1949 : NameStr(name), kind, t);
1950 0 : goto error;
1951 : }
1952 :
1953 2 : continue;
1954 : }
1955 :
1956 : Assert(key.kind == kind);
1957 : }
1958 :
1959 : /*
1960 : * This intentionally doesn't use pgstat_get_entry_ref() -
1961 : * putting all stats into checkpointer's
1962 : * pgStatEntryRefHash would be wasted effort and memory.
1963 : */
1964 387016 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1965 :
1966 : /* don't allow duplicate entries */
1967 387016 : if (found)
1968 : {
1969 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1970 0 : elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
1971 : key.kind, key.dboid,
1972 : key.objid, t);
1973 0 : goto error;
1974 : }
1975 :
1976 387016 : header = pgstat_init_entry(key.kind, p);
1977 387016 : dshash_release_lock(pgStatLocal.shared_hash, p);
1978 :
1979 387016 : if (!read_chunk(fpin,
1980 : pgstat_get_entry_data(key.kind, header),
1981 : pgstat_get_entry_len(key.kind)))
1982 : {
1983 0 : elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
1984 : key.kind, key.dboid,
1985 : key.objid, t);
1986 0 : goto error;
1987 : }
1988 :
1989 387016 : break;
1990 : }
1991 1386 : case PGSTAT_FILE_ENTRY_END:
1992 :
1993 : /*
1994 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
1995 : * file
1996 : */
1997 1386 : if (fgetc(fpin) != EOF)
1998 : {
1999 2 : elog(WARNING, "could not read end-of-file");
2000 2 : goto error;
2001 : }
2002 :
2003 1384 : goto done;
2004 :
2005 0 : default:
2006 0 : elog(WARNING, "could not read entry of type %c", t);
2007 0 : goto error;
2008 : }
2009 : }
2010 :
2011 1390 : done:
2012 1390 : FreeFile(fpin);
2013 :
2014 1390 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2015 1390 : unlink(statfile);
2016 :
2017 1390 : return;
2018 :
2019 6 : error:
2020 6 : ereport(LOG,
2021 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2022 :
2023 6 : pgstat_reset_after_failure();
2024 :
2025 6 : goto done;
2026 : }
2027 :
2028 : /*
2029 : * Helper to reset / drop stats after a crash or after restoring stats from
2030 : * disk failed, potentially after already loading parts.
2031 : */
2032 : static void
2033 458 : pgstat_reset_after_failure(void)
2034 : {
2035 458 : TimestampTz ts = GetCurrentTimestamp();
2036 :
2037 : /* reset fixed-numbered stats */
2038 15114 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2039 : {
2040 14656 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2041 :
2042 14656 : if (!kind_info || !kind_info->fixed_amount)
2043 11906 : continue;
2044 :
2045 2750 : kind_info->reset_all_cb(ts);
2046 : }
2047 :
2048 : /* and drop variable-numbered ones */
2049 458 : pgstat_drop_all_entries();
2050 458 : }
2051 :
2052 : /*
2053 : * GUC assign_hook for stats_fetch_consistency.
2054 : */
2055 : void
2056 7284 : assign_stats_fetch_consistency(int newval, void *extra)
2057 : {
2058 : /*
2059 : * Changing this value in a transaction may cause snapshot state
2060 : * inconsistencies, so force a clear of the current snapshot on the next
2061 : * snapshot build attempt.
2062 : */
2063 7284 : if (pgstat_fetch_consistency != newval)
2064 4866 : force_stats_snapshot_clear = true;
2065 7284 : }
|