Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "lib/dshash.h"
108 : #include "pgstat.h"
109 : #include "storage/fd.h"
110 : #include "storage/ipc.h"
111 : #include "storage/lwlock.h"
112 : #include "utils/guc_hooks.h"
113 : #include "utils/memutils.h"
114 : #include "utils/pgstat_internal.h"
115 : #include "utils/timestamp.h"
116 :
117 :
118 : /* ----------
119 : * Timer definitions.
120 : *
121 : * In milliseconds.
122 : * ----------
123 : */
124 :
125 : /* minimum interval non-forced stats flushes.*/
126 : #define PGSTAT_MIN_INTERVAL 1000
127 : /* how long until to block flushing pending stats updates */
128 : #define PGSTAT_MAX_INTERVAL 60000
129 : /* when to call pgstat_report_stat() again, even when idle */
130 : #define PGSTAT_IDLE_INTERVAL 10000
131 :
132 : /* ----------
133 : * Initial size hints for the hash tables used in statistics.
134 : * ----------
135 : */
136 :
137 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 :
139 : /* ---------
140 : * Identifiers in stats file.
141 : * ---------
142 : */
143 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
146 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
147 : * PgStat_HashKey */
148 :
149 : /* hash table for statistics snapshots entry */
150 : typedef struct PgStat_SnapshotEntry
151 : {
152 : PgStat_HashKey key;
153 : char status; /* for simplehash use */
154 : void *data; /* the stats data itself */
155 : } PgStat_SnapshotEntry;
156 :
157 :
158 : /* ----------
159 : * Backend-local Hash Table Definitions
160 : * ----------
161 : */
162 :
163 : /* for stats snapshot entries */
164 : #define SH_PREFIX pgstat_snapshot
165 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166 : #define SH_KEY_TYPE PgStat_HashKey
167 : #define SH_KEY key
168 : #define SH_HASH_KEY(tb, key) \
169 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170 : #define SH_EQUAL(tb, a, b) \
171 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172 : #define SH_SCOPE static inline
173 : #define SH_DEFINE
174 : #define SH_DECLARE
175 : #include "lib/simplehash.h"
176 :
177 :
178 : /* ----------
179 : * Local function forward declarations
180 : * ----------
181 : */
182 :
183 : static void pgstat_write_statsfile(void);
184 : static void pgstat_read_statsfile(void);
185 :
186 : static void pgstat_init_snapshot_fixed(void);
187 :
188 : static void pgstat_reset_after_failure(void);
189 :
190 : static bool pgstat_flush_pending_entries(bool nowait);
191 :
192 : static void pgstat_prep_snapshot(void);
193 : static void pgstat_build_snapshot(void);
194 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 :
196 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197 :
198 :
199 : /* ----------
200 : * GUC parameters
201 : * ----------
202 : */
203 :
204 : bool pgstat_track_counts = false;
205 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206 :
207 :
208 : /* ----------
209 : * state shared with pgstat_*.c
210 : * ----------
211 : */
212 :
213 : PgStat_LocalState pgStatLocal;
214 :
215 :
216 : /* ----------
217 : * Local data
218 : *
219 : * NB: There should be only variables related to stats infrastructure here,
220 : * not for specific kinds of stats.
221 : * ----------
222 : */
223 :
224 : /*
225 : * Memory contexts containing the pgStatEntryRefHash table, the
226 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
227 : * easier to track / attribute memory usage.
228 : */
229 :
230 : static MemoryContext pgStatPendingContext = NULL;
231 :
232 : /*
233 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
234 : *
235 : * Newly pending entries should only ever be added to the end of the list,
236 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
237 : */
238 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
239 :
240 :
241 : /*
242 : * Force the next stats flush to happen regardless of
243 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
244 : */
245 : static bool pgStatForceNextFlush = false;
246 :
247 : /*
248 : * Force-clear existing snapshot before next use when stats_fetch_consistency
249 : * is changed.
250 : */
251 : static bool force_stats_snapshot_clear = false;
252 :
253 :
254 : /*
255 : * For assertions that check pgstat is not used before initialization / after
256 : * shutdown.
257 : */
258 : #ifdef USE_ASSERT_CHECKING
259 : static bool pgstat_is_initialized = false;
260 : static bool pgstat_is_shutdown = false;
261 : #endif
262 :
263 :
264 : /*
265 : * The different kinds of built-in statistics.
266 : *
267 : * If reasonably possible, handling specific to one kind of stats should go
268 : * through this abstraction, rather than making more of pgstat.c aware.
269 : *
270 : * See comments for struct PgStat_KindInfo for details about the individual
271 : * fields.
272 : *
273 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
274 : * seem to be a great way of doing that, given the split across multiple
275 : * files.
276 : */
277 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
278 :
279 : /* stats kinds for variable-numbered objects */
280 :
281 : [PGSTAT_KIND_DATABASE] = {
282 : .name = "database",
283 :
284 : .fixed_amount = false,
285 : .write_to_file = true,
286 : /* so pg_stat_database entries can be seen in all databases */
287 : .accessed_across_databases = true,
288 :
289 : .shared_size = sizeof(PgStatShared_Database),
290 : .shared_data_off = offsetof(PgStatShared_Database, stats),
291 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
292 : .pending_size = sizeof(PgStat_StatDBEntry),
293 :
294 : .flush_pending_cb = pgstat_database_flush_cb,
295 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
296 : },
297 :
298 : [PGSTAT_KIND_RELATION] = {
299 : .name = "relation",
300 :
301 : .fixed_amount = false,
302 : .write_to_file = true,
303 :
304 : .shared_size = sizeof(PgStatShared_Relation),
305 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
306 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
307 : .pending_size = sizeof(PgStat_TableStatus),
308 :
309 : .flush_pending_cb = pgstat_relation_flush_cb,
310 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
311 : },
312 :
313 : [PGSTAT_KIND_FUNCTION] = {
314 : .name = "function",
315 :
316 : .fixed_amount = false,
317 : .write_to_file = true,
318 :
319 : .shared_size = sizeof(PgStatShared_Function),
320 : .shared_data_off = offsetof(PgStatShared_Function, stats),
321 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
322 : .pending_size = sizeof(PgStat_FunctionCounts),
323 :
324 : .flush_pending_cb = pgstat_function_flush_cb,
325 : },
326 :
327 : [PGSTAT_KIND_REPLSLOT] = {
328 : .name = "replslot",
329 :
330 : .fixed_amount = false,
331 : .write_to_file = true,
332 :
333 : .accessed_across_databases = true,
334 :
335 : .shared_size = sizeof(PgStatShared_ReplSlot),
336 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
337 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
338 :
339 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
340 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
341 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
342 : },
343 :
344 : [PGSTAT_KIND_SUBSCRIPTION] = {
345 : .name = "subscription",
346 :
347 : .fixed_amount = false,
348 : .write_to_file = true,
349 : /* so pg_stat_subscription_stats entries can be seen in all databases */
350 : .accessed_across_databases = true,
351 :
352 : .shared_size = sizeof(PgStatShared_Subscription),
353 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
354 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
355 : .pending_size = sizeof(PgStat_BackendSubEntry),
356 :
357 : .flush_pending_cb = pgstat_subscription_flush_cb,
358 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
359 : },
360 :
361 : [PGSTAT_KIND_BACKEND] = {
362 : .name = "backend",
363 :
364 : .fixed_amount = false,
365 : .write_to_file = false,
366 :
367 : .accessed_across_databases = true,
368 :
369 : .shared_size = sizeof(PgStatShared_Backend),
370 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
371 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
372 :
373 : .have_static_pending_cb = pgstat_backend_have_pending_cb,
374 : .flush_static_cb = pgstat_backend_flush_cb,
375 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
376 : },
377 :
378 : /* stats for fixed-numbered (mostly 1) objects */
379 :
380 : [PGSTAT_KIND_ARCHIVER] = {
381 : .name = "archiver",
382 :
383 : .fixed_amount = true,
384 : .write_to_file = true,
385 :
386 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
387 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
388 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
389 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
390 :
391 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
392 : .reset_all_cb = pgstat_archiver_reset_all_cb,
393 : .snapshot_cb = pgstat_archiver_snapshot_cb,
394 : },
395 :
396 : [PGSTAT_KIND_BGWRITER] = {
397 : .name = "bgwriter",
398 :
399 : .fixed_amount = true,
400 : .write_to_file = true,
401 :
402 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
403 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
404 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
405 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
406 :
407 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
408 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
409 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
410 : },
411 :
412 : [PGSTAT_KIND_CHECKPOINTER] = {
413 : .name = "checkpointer",
414 :
415 : .fixed_amount = true,
416 : .write_to_file = true,
417 :
418 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
419 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
420 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
421 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
422 :
423 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
424 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
425 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
426 : },
427 :
428 : [PGSTAT_KIND_IO] = {
429 : .name = "io",
430 :
431 : .fixed_amount = true,
432 : .write_to_file = true,
433 :
434 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
435 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
436 : .shared_data_off = offsetof(PgStatShared_IO, stats),
437 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
438 :
439 : .flush_static_cb = pgstat_io_flush_cb,
440 : .have_static_pending_cb = pgstat_io_have_pending_cb,
441 : .init_shmem_cb = pgstat_io_init_shmem_cb,
442 : .reset_all_cb = pgstat_io_reset_all_cb,
443 : .snapshot_cb = pgstat_io_snapshot_cb,
444 : },
445 :
446 : [PGSTAT_KIND_SLRU] = {
447 : .name = "slru",
448 :
449 : .fixed_amount = true,
450 : .write_to_file = true,
451 :
452 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
453 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
454 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
455 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
456 :
457 : .flush_static_cb = pgstat_slru_flush_cb,
458 : .have_static_pending_cb = pgstat_slru_have_pending_cb,
459 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
460 : .reset_all_cb = pgstat_slru_reset_all_cb,
461 : .snapshot_cb = pgstat_slru_snapshot_cb,
462 : },
463 :
464 : [PGSTAT_KIND_WAL] = {
465 : .name = "wal",
466 :
467 : .fixed_amount = true,
468 : .write_to_file = true,
469 :
470 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
471 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
472 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
473 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
474 :
475 : .init_backend_cb = pgstat_wal_init_backend_cb,
476 : .flush_static_cb = pgstat_wal_flush_cb,
477 : .have_static_pending_cb = pgstat_wal_have_pending_cb,
478 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
479 : .reset_all_cb = pgstat_wal_reset_all_cb,
480 : .snapshot_cb = pgstat_wal_snapshot_cb,
481 : },
482 : };
483 :
484 : /*
485 : * Information about custom statistics kinds.
486 : *
487 : * These are saved in a different array than the built-in kinds to save
488 : * in clarity with the initializations.
489 : *
490 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
491 : */
492 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
493 :
494 : /* ------------------------------------------------------------
495 : * Functions managing the state of the stats system for all backends.
496 : * ------------------------------------------------------------
497 : */
498 :
499 : /*
500 : * Read on-disk stats into memory at server start.
501 : *
502 : * Should only be called by the startup process or in single user mode.
503 : */
504 : void
505 1468 : pgstat_restore_stats(void)
506 : {
507 1468 : pgstat_read_statsfile();
508 1468 : }
509 :
510 : /*
511 : * Remove the stats file. This is currently used only if WAL recovery is
512 : * needed after a crash.
513 : *
514 : * Should only be called by the startup process or in single user mode.
515 : */
516 : void
517 344 : pgstat_discard_stats(void)
518 : {
519 : int ret;
520 :
521 : /* NB: this needs to be done even in single user mode */
522 :
523 344 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
524 344 : if (ret != 0)
525 : {
526 342 : if (errno == ENOENT)
527 342 : elog(DEBUG2,
528 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
529 : PGSTAT_STAT_PERMANENT_FILENAME);
530 : else
531 0 : ereport(LOG,
532 : (errcode_for_file_access(),
533 : errmsg("could not unlink permanent statistics file \"%s\": %m",
534 : PGSTAT_STAT_PERMANENT_FILENAME)));
535 : }
536 : else
537 : {
538 2 : ereport(DEBUG2,
539 : (errcode_for_file_access(),
540 : errmsg_internal("unlinked permanent statistics file \"%s\"",
541 : PGSTAT_STAT_PERMANENT_FILENAME)));
542 : }
543 :
544 : /*
545 : * Reset stats contents. This will set reset timestamps of fixed-numbered
546 : * stats to the current time (no variable stats exist).
547 : */
548 344 : pgstat_reset_after_failure();
549 344 : }
550 :
551 : /*
552 : * pgstat_before_server_shutdown() needs to be called by exactly one process
553 : * during regular server shutdowns. Otherwise all stats will be lost.
554 : *
555 : * We currently only write out stats for proc_exit(0). We might want to change
556 : * that at some point... But right now pgstat_discard_stats() would be called
557 : * during the start after a disorderly shutdown, anyway.
558 : */
559 : void
560 1220 : pgstat_before_server_shutdown(int code, Datum arg)
561 : {
562 : Assert(pgStatLocal.shmem != NULL);
563 : Assert(!pgStatLocal.shmem->is_shutdown);
564 :
565 : /*
566 : * Stats should only be reported after pgstat_initialize() and before
567 : * pgstat_shutdown(). This is a convenient point to catch most violations
568 : * of this rule.
569 : */
570 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
571 :
572 : /* flush out our own pending changes before writing out */
573 1220 : pgstat_report_stat(true);
574 :
575 : /*
576 : * Only write out file during normal shutdown. Don't even signal that
577 : * we've shutdown during irregular shutdowns, because the shutdown
578 : * sequence isn't coordinated to ensure this backend shuts down last.
579 : */
580 1220 : if (code == 0)
581 : {
582 1208 : pgStatLocal.shmem->is_shutdown = true;
583 1208 : pgstat_write_statsfile();
584 : }
585 1220 : }
586 :
587 :
588 : /* ------------------------------------------------------------
589 : * Backend initialization / shutdown functions
590 : * ------------------------------------------------------------
591 : */
592 :
593 : /*
594 : * Shut down a single backend's statistics reporting at process exit.
595 : *
596 : * Flush out any remaining statistics counts. Without this, operations
597 : * triggered during backend exit (such as temp table deletions) won't be
598 : * counted.
599 : */
600 : static void
601 40846 : pgstat_shutdown_hook(int code, Datum arg)
602 : {
603 : Assert(!pgstat_is_shutdown);
604 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
605 :
606 : /*
607 : * If we got as far as discovering our own database ID, we can flush out
608 : * what we did so far. Otherwise, we'd be reporting an invalid database
609 : * ID, so forget it. (This means that accesses to pg_database during
610 : * failed backend starts might never get counted.)
611 : */
612 40846 : if (OidIsValid(MyDatabaseId))
613 30130 : pgstat_report_disconnect(MyDatabaseId);
614 :
615 40846 : pgstat_report_stat(true);
616 :
617 : /* there shouldn't be any pending changes left */
618 : Assert(dlist_is_empty(&pgStatPending));
619 40846 : dlist_init(&pgStatPending);
620 :
621 : /* drop the backend stats entry */
622 40846 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
623 0 : pgstat_request_entry_refs_gc();
624 :
625 40846 : pgstat_detach_shmem();
626 :
627 : #ifdef USE_ASSERT_CHECKING
628 : pgstat_is_shutdown = true;
629 : #endif
630 40846 : }
631 :
632 : /*
633 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
634 : * BaseInit().
635 : *
636 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
637 : */
638 : void
639 40846 : pgstat_initialize(void)
640 : {
641 : Assert(!pgstat_is_initialized);
642 :
643 40846 : pgstat_attach_shmem();
644 :
645 40846 : pgstat_init_snapshot_fixed();
646 :
647 : /* Backend initialization callbacks */
648 10497422 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
649 : {
650 10456576 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
651 :
652 10456576 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
653 10415730 : continue;
654 :
655 40846 : kind_info->init_backend_cb();
656 : }
657 :
658 : /* Set up a process-exit hook to clean up */
659 40846 : before_shmem_exit(pgstat_shutdown_hook, 0);
660 :
661 : #ifdef USE_ASSERT_CHECKING
662 : pgstat_is_initialized = true;
663 : #endif
664 40846 : }
665 :
666 :
667 : /* ------------------------------------------------------------
668 : * Public functions used by backends follow
669 : * ------------------------------------------------------------
670 : */
671 :
672 : /*
673 : * Must be called by processes that performs DML: tcop/postgres.c, logical
674 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
675 : * shared memory.
676 : *
677 : * Unless called with 'force', pending stats updates are flushed happen once
678 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
679 : * block on lock acquisition, except if stats updates have been pending for
680 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
681 : *
682 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
683 : * suggested idle timeout is returned. Currently this is always
684 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
685 : * a timeout after which to call pgstat_report_stat(true), but are not
686 : * required to do so.
687 : *
688 : * Note that this is called only when not within a transaction, so it is fair
689 : * to use transaction stop time as an approximation of current time.
690 : */
691 : long
692 610822 : pgstat_report_stat(bool force)
693 : {
694 : static TimestampTz pending_since = 0;
695 : static TimestampTz last_flush = 0;
696 : bool partial_flush;
697 : TimestampTz now;
698 : bool nowait;
699 :
700 : pgstat_assert_is_up();
701 : Assert(!IsTransactionOrTransactionBlock());
702 :
703 : /* "absorb" the forced flush even if there's nothing to flush */
704 610822 : if (pgStatForceNextFlush)
705 : {
706 428 : force = true;
707 428 : pgStatForceNextFlush = false;
708 : }
709 :
710 : /* Don't expend a clock check if nothing to do */
711 610822 : if (dlist_is_empty(&pgStatPending))
712 : {
713 17824 : bool do_flush = false;
714 :
715 : /* Check for pending stats */
716 3534008 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
717 : {
718 3520410 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
719 :
720 3520410 : if (!kind_info)
721 3317840 : continue;
722 202570 : if (!kind_info->have_static_pending_cb)
723 139748 : continue;
724 :
725 62822 : if (kind_info->have_static_pending_cb())
726 : {
727 4226 : do_flush = true;
728 4226 : break;
729 : }
730 : }
731 :
732 17824 : if (!do_flush)
733 13598 : return 0;
734 : }
735 :
736 : /*
737 : * There should never be stats to report once stats are shut down. Can't
738 : * assert that before the checks above, as there is an unconditional
739 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
740 : * the process that ran pgstat_before_server_shutdown() will still call.
741 : */
742 : Assert(!pgStatLocal.shmem->is_shutdown);
743 :
744 597224 : if (force)
745 : {
746 : /*
747 : * Stats reports are forced either when it's been too long since stats
748 : * have been reported or in processes that force stats reporting to
749 : * happen at specific points (including shutdown). In the former case
750 : * the transaction stop time might be quite old, in the latter it
751 : * would never get cleared.
752 : */
753 38508 : now = GetCurrentTimestamp();
754 : }
755 : else
756 : {
757 558716 : now = GetCurrentTransactionStopTimestamp();
758 :
759 1065702 : if (pending_since > 0 &&
760 506986 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
761 : {
762 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
763 0 : force = true;
764 : }
765 558716 : else if (last_flush > 0 &&
766 533742 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
767 : {
768 : /* don't flush too frequently */
769 531848 : if (pending_since == 0)
770 26480 : pending_since = now;
771 :
772 531848 : return PGSTAT_IDLE_INTERVAL;
773 : }
774 : }
775 :
776 65376 : pgstat_update_dbstats(now);
777 :
778 : /* don't wait for lock acquisition when !force */
779 65376 : nowait = !force;
780 :
781 65376 : partial_flush = false;
782 :
783 : /* flush of variable-numbered stats tracked in pending entries list */
784 65376 : partial_flush |= pgstat_flush_pending_entries(nowait);
785 :
786 : /* flush of other stats kinds */
787 16801632 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
788 : {
789 16736256 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
790 :
791 16736256 : if (!kind_info)
792 15951408 : continue;
793 784848 : if (!kind_info->flush_static_cb)
794 523344 : continue;
795 :
796 261504 : partial_flush |= kind_info->flush_static_cb(nowait);
797 : }
798 :
799 65376 : last_flush = now;
800 :
801 : /*
802 : * If some of the pending stats could not be flushed due to lock
803 : * contention, let the caller know when to retry.
804 : */
805 65376 : if (partial_flush)
806 : {
807 : /* force should have prevented us from getting here */
808 : Assert(!force);
809 :
810 : /* remember since when stats have been pending */
811 10 : if (pending_since == 0)
812 8 : pending_since = now;
813 :
814 10 : return PGSTAT_IDLE_INTERVAL;
815 : }
816 :
817 65366 : pending_since = 0;
818 :
819 65366 : return 0;
820 : }
821 :
822 : /*
823 : * Force locally pending stats to be flushed during the next
824 : * pgstat_report_stat() call. This is useful for writing tests.
825 : */
826 : void
827 428 : pgstat_force_next_flush(void)
828 : {
829 428 : pgStatForceNextFlush = true;
830 428 : }
831 :
832 : /*
833 : * Only for use by pgstat_reset_counters()
834 : */
835 : static bool
836 23294 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
837 : {
838 23294 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
839 : }
840 :
841 : /*
842 : * Reset counters for our database.
843 : *
844 : * Permission checking for this function is managed through the normal
845 : * GRANT system.
846 : */
847 : void
848 26 : pgstat_reset_counters(void)
849 : {
850 26 : TimestampTz ts = GetCurrentTimestamp();
851 :
852 26 : pgstat_reset_matching_entries(match_db_entries,
853 : ObjectIdGetDatum(MyDatabaseId),
854 : ts);
855 26 : }
856 :
857 : /*
858 : * Reset a single variable-numbered entry.
859 : *
860 : * If the stats kind is within a database, also reset the database's
861 : * stat_reset_timestamp.
862 : *
863 : * Permission checking for this function is managed through the normal
864 : * GRANT system.
865 : */
866 : void
867 44 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
868 : {
869 44 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
870 44 : TimestampTz ts = GetCurrentTimestamp();
871 :
872 : /* not needed atm, and doesn't make sense with the current signature */
873 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
874 :
875 : /* reset the "single counter" */
876 44 : pgstat_reset_entry(kind, dboid, objid, ts);
877 :
878 44 : if (!kind_info->accessed_across_databases)
879 16 : pgstat_reset_database_timestamp(dboid, ts);
880 44 : }
881 :
882 : /*
883 : * Reset stats for all entries of a kind.
884 : *
885 : * Permission checking for this function is managed through the normal
886 : * GRANT system.
887 : */
888 : void
889 58 : pgstat_reset_of_kind(PgStat_Kind kind)
890 : {
891 58 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
892 58 : TimestampTz ts = GetCurrentTimestamp();
893 :
894 58 : if (kind_info->fixed_amount)
895 50 : kind_info->reset_all_cb(ts);
896 : else
897 8 : pgstat_reset_entries_of_kind(kind, ts);
898 58 : }
899 :
900 :
901 : /* ------------------------------------------------------------
902 : * Fetching of stats
903 : * ------------------------------------------------------------
904 : */
905 :
906 : /*
907 : * Discard any data collected in the current transaction. Any subsequent
908 : * request will cause new snapshots to be read.
909 : *
910 : * This is also invoked during transaction commit or abort to discard
911 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
912 : * cause this routine to be called.
913 : */
914 : void
915 865820 : pgstat_clear_snapshot(void)
916 : {
917 : pgstat_assert_is_up();
918 :
919 865820 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
920 : sizeof(pgStatLocal.snapshot.fixed_valid));
921 865820 : memset(&pgStatLocal.snapshot.custom_valid, 0,
922 : sizeof(pgStatLocal.snapshot.custom_valid));
923 865820 : pgStatLocal.snapshot.stats = NULL;
924 865820 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
925 :
926 : /* Release memory, if any was allocated */
927 865820 : if (pgStatLocal.snapshot.context)
928 : {
929 1076 : MemoryContextDelete(pgStatLocal.snapshot.context);
930 :
931 : /* Reset variables */
932 1076 : pgStatLocal.snapshot.context = NULL;
933 : }
934 :
935 : /*
936 : * Historically the backend_status.c facilities lived in this file, and
937 : * were reset with the same function. For now keep it that way, and
938 : * forward the reset request.
939 : */
940 865820 : pgstat_clear_backend_activity_snapshot();
941 :
942 : /* Reset this flag, as it may be possible that a cleanup was forced. */
943 865820 : force_stats_snapshot_clear = false;
944 865820 : }
945 :
946 : void *
947 384030 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
948 : {
949 : PgStat_HashKey key;
950 : PgStat_EntryRef *entry_ref;
951 : void *stats_data;
952 384030 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
953 :
954 : /* should be called from backends */
955 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
956 : Assert(!kind_info->fixed_amount);
957 :
958 384030 : pgstat_prep_snapshot();
959 :
960 : /* clear padding */
961 384030 : memset(&key, 0, sizeof(struct PgStat_HashKey));
962 :
963 384030 : key.kind = kind;
964 384030 : key.dboid = dboid;
965 384030 : key.objid = objid;
966 :
967 : /* if we need to build a full snapshot, do so */
968 384030 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
969 460 : pgstat_build_snapshot();
970 :
971 : /* if caching is desired, look up in cache */
972 384030 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
973 : {
974 9630 : PgStat_SnapshotEntry *entry = NULL;
975 :
976 9630 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
977 :
978 9630 : if (entry)
979 620 : return entry->data;
980 :
981 : /*
982 : * If we built a full snapshot and the key is not in
983 : * pgStatLocal.snapshot.stats, there are no matching stats.
984 : */
985 9010 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
986 28 : return NULL;
987 : }
988 :
989 383382 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
990 :
991 383382 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
992 :
993 383382 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
994 : {
995 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
996 8954 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
997 : {
998 1762 : PgStat_SnapshotEntry *entry = NULL;
999 : bool found;
1000 :
1001 1762 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1002 : Assert(!found);
1003 1762 : entry->data = NULL;
1004 : }
1005 8954 : return NULL;
1006 : }
1007 :
1008 : /*
1009 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
1010 : * otherwise we could quickly end up with a fair bit of memory used due to
1011 : * repeated accesses.
1012 : */
1013 374428 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1014 367208 : stats_data = palloc(kind_info->shared_data_len);
1015 : else
1016 7220 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1017 7220 : kind_info->shared_data_len);
1018 :
1019 374428 : (void) pgstat_lock_entry_shared(entry_ref, false);
1020 748856 : memcpy(stats_data,
1021 374428 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1022 374428 : kind_info->shared_data_len);
1023 374428 : pgstat_unlock_entry(entry_ref);
1024 :
1025 374428 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1026 : {
1027 7220 : PgStat_SnapshotEntry *entry = NULL;
1028 : bool found;
1029 :
1030 7220 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1031 7220 : entry->data = stats_data;
1032 : }
1033 :
1034 374428 : return stats_data;
1035 : }
1036 :
1037 : /*
1038 : * If a stats snapshot has been taken, return the timestamp at which that was
1039 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1040 : * false.
1041 : */
1042 : TimestampTz
1043 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1044 : {
1045 60 : if (force_stats_snapshot_clear)
1046 18 : pgstat_clear_snapshot();
1047 :
1048 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1049 : {
1050 24 : *have_snapshot = true;
1051 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1052 : }
1053 :
1054 36 : *have_snapshot = false;
1055 :
1056 36 : return 0;
1057 : }
1058 :
1059 : bool
1060 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1061 : {
1062 : /* fixed-numbered stats always exist */
1063 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1064 12 : return true;
1065 :
1066 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1067 : }
1068 :
1069 : /*
1070 : * Ensure snapshot for fixed-numbered 'kind' exists.
1071 : *
1072 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1073 : * massaging the data into the desired format.
1074 : */
1075 : void
1076 466 : pgstat_snapshot_fixed(PgStat_Kind kind)
1077 : {
1078 : Assert(pgstat_is_kind_valid(kind));
1079 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1080 :
1081 466 : if (force_stats_snapshot_clear)
1082 0 : pgstat_clear_snapshot();
1083 :
1084 466 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1085 24 : pgstat_build_snapshot();
1086 : else
1087 442 : pgstat_build_snapshot_fixed(kind);
1088 :
1089 466 : if (pgstat_is_kind_builtin(kind))
1090 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1091 6 : else if (pgstat_is_kind_custom(kind))
1092 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1093 466 : }
1094 :
1095 : static void
1096 40846 : pgstat_init_snapshot_fixed(void)
1097 : {
1098 : /*
1099 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1100 : * stats kinds.
1101 : */
1102 5309980 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1103 : {
1104 5269134 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1105 :
1106 5269134 : if (!kind_info || !kind_info->fixed_amount)
1107 5269008 : continue;
1108 :
1109 126 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1110 126 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1111 : }
1112 40846 : }
1113 :
1114 : static void
1115 384082 : pgstat_prep_snapshot(void)
1116 : {
1117 384082 : if (force_stats_snapshot_clear)
1118 18 : pgstat_clear_snapshot();
1119 :
1120 384082 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1121 9682 : pgStatLocal.snapshot.stats != NULL)
1122 383006 : return;
1123 :
1124 1076 : if (!pgStatLocal.snapshot.context)
1125 1076 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1126 : "PgStat Snapshot",
1127 : ALLOCSET_SMALL_SIZES);
1128 :
1129 1076 : pgStatLocal.snapshot.stats =
1130 1076 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1131 : PGSTAT_SNAPSHOT_HASH_SIZE,
1132 : NULL);
1133 : }
1134 :
1135 : static void
1136 484 : pgstat_build_snapshot(void)
1137 : {
1138 : dshash_seq_status hstat;
1139 : PgStatShared_HashEntry *p;
1140 :
1141 : /* should only be called when we need a snapshot */
1142 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1143 :
1144 : /* snapshot already built */
1145 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1146 432 : return;
1147 :
1148 52 : pgstat_prep_snapshot();
1149 :
1150 : Assert(pgStatLocal.snapshot.stats->members == 0);
1151 :
1152 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1153 :
1154 : /*
1155 : * Snapshot all variable stats.
1156 : */
1157 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1158 54742 : while ((p = dshash_seq_next(&hstat)) != NULL)
1159 : {
1160 54690 : PgStat_Kind kind = p->key.kind;
1161 54690 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1162 : bool found;
1163 : PgStat_SnapshotEntry *entry;
1164 : PgStatShared_Common *stats_data;
1165 :
1166 : /*
1167 : * Check if the stats object should be included in the snapshot.
1168 : * Unless the stats kind can be accessed from all databases (e.g.,
1169 : * database stats themselves), we only include stats for the current
1170 : * database or objects not associated with a database (e.g. shared
1171 : * relations).
1172 : */
1173 54690 : if (p->key.dboid != MyDatabaseId &&
1174 16086 : p->key.dboid != InvalidOid &&
1175 13156 : !kind_info->accessed_across_databases)
1176 13204 : continue;
1177 :
1178 41690 : if (p->dropped)
1179 204 : continue;
1180 :
1181 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1182 :
1183 41486 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1184 : Assert(stats_data);
1185 :
1186 41486 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1187 : Assert(!found);
1188 :
1189 82972 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1190 41486 : kind_info->shared_size);
1191 :
1192 : /*
1193 : * Acquire the LWLock directly instead of using
1194 : * pg_stat_lock_entry_shared() which requires a reference.
1195 : */
1196 41486 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1197 82972 : memcpy(entry->data,
1198 41486 : pgstat_get_entry_data(kind, stats_data),
1199 41486 : kind_info->shared_size);
1200 41486 : LWLockRelease(&stats_data->lock);
1201 : }
1202 52 : dshash_seq_term(&hstat);
1203 :
1204 : /*
1205 : * Build snapshot of all fixed-numbered stats.
1206 : */
1207 13364 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1208 : {
1209 13312 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1210 :
1211 13312 : if (!kind_info)
1212 12688 : continue;
1213 624 : if (!kind_info->fixed_amount)
1214 : {
1215 : Assert(kind_info->snapshot_cb == NULL);
1216 312 : continue;
1217 : }
1218 :
1219 312 : pgstat_build_snapshot_fixed(kind);
1220 : }
1221 :
1222 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1223 : }
1224 :
1225 : static void
1226 8008 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1227 : {
1228 8008 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1229 : int idx;
1230 : bool *valid;
1231 :
1232 : /* Position in fixed_valid or custom_valid */
1233 8008 : if (pgstat_is_kind_builtin(kind))
1234 : {
1235 7996 : idx = kind;
1236 7996 : valid = pgStatLocal.snapshot.fixed_valid;
1237 : }
1238 : else
1239 : {
1240 12 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1241 12 : valid = pgStatLocal.snapshot.custom_valid;
1242 : }
1243 :
1244 : Assert(kind_info->fixed_amount);
1245 : Assert(kind_info->snapshot_cb != NULL);
1246 :
1247 8008 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1248 : {
1249 : /* rebuild every time */
1250 7284 : valid[idx] = false;
1251 : }
1252 724 : else if (valid[idx])
1253 : {
1254 : /* in snapshot mode we shouldn't get called again */
1255 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1256 12 : return;
1257 : }
1258 :
1259 : Assert(!valid[idx]);
1260 :
1261 7996 : kind_info->snapshot_cb();
1262 :
1263 : Assert(!valid[idx]);
1264 7996 : valid[idx] = true;
1265 : }
1266 :
1267 :
1268 : /* ------------------------------------------------------------
1269 : * Backend-local pending stats infrastructure
1270 : * ------------------------------------------------------------
1271 : */
1272 :
1273 : /*
1274 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1275 : * stats if not already done.
1276 : *
1277 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1278 : * created, false otherwise.
1279 : */
1280 : PgStat_EntryRef *
1281 3677344 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1282 : {
1283 : PgStat_EntryRef *entry_ref;
1284 :
1285 : /* need to be able to flush out */
1286 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1287 :
1288 3677344 : if (unlikely(!pgStatPendingContext))
1289 : {
1290 32714 : pgStatPendingContext =
1291 32714 : AllocSetContextCreate(TopMemoryContext,
1292 : "PgStat Pending",
1293 : ALLOCSET_SMALL_SIZES);
1294 : }
1295 :
1296 3677344 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1297 : true, created_entry);
1298 :
1299 3677344 : if (entry_ref->pending == NULL)
1300 : {
1301 1881100 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1302 :
1303 : Assert(entrysize != (size_t) -1);
1304 :
1305 1881100 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1306 1881100 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1307 : }
1308 :
1309 3677344 : return entry_ref;
1310 : }
1311 :
1312 : /*
1313 : * Return an existing stats entry, or NULL.
1314 : *
1315 : * This should only be used for helper function for pgstatfuncs.c - outside of
1316 : * that it shouldn't be needed.
1317 : */
1318 : PgStat_EntryRef *
1319 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1320 : {
1321 : PgStat_EntryRef *entry_ref;
1322 :
1323 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1324 :
1325 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1326 30 : return NULL;
1327 :
1328 54 : return entry_ref;
1329 : }
1330 :
1331 : void
1332 1881100 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1333 : {
1334 1881100 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1335 1881100 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1336 1881100 : void *pending_data = entry_ref->pending;
1337 :
1338 : Assert(pending_data != NULL);
1339 : /* !fixed_amount stats should be handled explicitly */
1340 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1341 :
1342 1881100 : if (kind_info->delete_pending_cb)
1343 1773390 : kind_info->delete_pending_cb(entry_ref);
1344 :
1345 1881100 : pfree(pending_data);
1346 1881100 : entry_ref->pending = NULL;
1347 :
1348 1881100 : dlist_delete(&entry_ref->pending_node);
1349 1881100 : }
1350 :
1351 : /*
1352 : * Flush out pending variable-numbered stats.
1353 : */
1354 : static bool
1355 65376 : pgstat_flush_pending_entries(bool nowait)
1356 : {
1357 65376 : bool have_pending = false;
1358 65376 : dlist_node *cur = NULL;
1359 :
1360 : /*
1361 : * Need to be a bit careful iterating over the list of pending entries.
1362 : * Processing a pending entry may queue further pending entries to the end
1363 : * of the list that we want to process, so a simple iteration won't do.
1364 : * Further complicating matters is that we want to delete the current
1365 : * entry in each iteration from the list if we flushed successfully.
1366 : *
1367 : * So we just keep track of the next pointer in each loop iteration.
1368 : */
1369 65376 : if (!dlist_is_empty(&pgStatPending))
1370 61510 : cur = dlist_head_node(&pgStatPending);
1371 :
1372 1882402 : while (cur)
1373 : {
1374 1817026 : PgStat_EntryRef *entry_ref =
1375 1817026 : dlist_container(PgStat_EntryRef, pending_node, cur);
1376 1817026 : PgStat_HashKey key = entry_ref->shared_entry->key;
1377 1817026 : PgStat_Kind kind = key.kind;
1378 1817026 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1379 : bool did_flush;
1380 : dlist_node *next;
1381 :
1382 : Assert(!kind_info->fixed_amount);
1383 : Assert(kind_info->flush_pending_cb != NULL);
1384 :
1385 : /* flush the stats, if possible */
1386 1817026 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1387 :
1388 : Assert(did_flush || nowait);
1389 :
1390 : /* determine next entry, before deleting the pending entry */
1391 1817026 : if (dlist_has_next(&pgStatPending, cur))
1392 1755516 : next = dlist_next_node(&pgStatPending, cur);
1393 : else
1394 61510 : next = NULL;
1395 :
1396 : /* if successfully flushed, remove entry */
1397 1817026 : if (did_flush)
1398 1816998 : pgstat_delete_pending_entry(entry_ref);
1399 : else
1400 28 : have_pending = true;
1401 :
1402 1817026 : cur = next;
1403 : }
1404 :
1405 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1406 :
1407 65376 : return have_pending;
1408 : }
1409 :
1410 :
1411 : /* ------------------------------------------------------------
1412 : * Helper / infrastructure functions
1413 : * ------------------------------------------------------------
1414 : */
1415 :
1416 : PgStat_Kind
1417 166 : pgstat_get_kind_from_str(char *kind_str)
1418 : {
1419 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1420 : {
1421 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1422 160 : return kind;
1423 : }
1424 :
1425 : /* Check the custom set of cumulative stats */
1426 6 : if (pgstat_kind_custom_infos)
1427 : {
1428 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1429 : {
1430 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1431 :
1432 0 : if (pgstat_kind_custom_infos[idx] &&
1433 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1434 0 : return kind;
1435 : }
1436 : }
1437 :
1438 6 : ereport(ERROR,
1439 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1440 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1441 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1442 : }
1443 :
1444 : static inline bool
1445 750876 : pgstat_is_kind_valid(PgStat_Kind kind)
1446 : {
1447 750876 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1448 : }
1449 :
1450 : const PgStat_KindInfo *
1451 47079046 : pgstat_get_kind_info(PgStat_Kind kind)
1452 : {
1453 47079046 : if (pgstat_is_kind_builtin(kind))
1454 10869326 : return &pgstat_kind_builtin_infos[kind];
1455 :
1456 36209720 : if (pgstat_is_kind_custom(kind))
1457 : {
1458 21992270 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1459 :
1460 21992270 : if (pgstat_kind_custom_infos == NULL ||
1461 64074 : pgstat_kind_custom_infos[idx] == NULL)
1462 21991188 : return NULL;
1463 1082 : return pgstat_kind_custom_infos[idx];
1464 : }
1465 :
1466 14217450 : return NULL;
1467 : }
1468 :
1469 : /*
1470 : * Register a new stats kind.
1471 : *
1472 : * PgStat_Kinds must be globally unique across all extensions. Refer
1473 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1474 : * unique ID for your extension, to avoid conflicts with other extension
1475 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1476 : * needlessly reserving a new ID.
1477 : */
1478 : void
1479 16 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1480 : {
1481 16 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1482 :
1483 16 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1484 0 : ereport(ERROR,
1485 : (errmsg("custom cumulative statistics name is invalid"),
1486 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1487 :
1488 16 : if (!pgstat_is_kind_custom(kind))
1489 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1490 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1491 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1492 :
1493 16 : if (!process_shared_preload_libraries_in_progress)
1494 0 : ereport(ERROR,
1495 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1496 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1497 :
1498 : /*
1499 : * Check some data for fixed-numbered stats.
1500 : */
1501 16 : if (kind_info->fixed_amount)
1502 : {
1503 8 : if (kind_info->shared_size == 0)
1504 0 : ereport(ERROR,
1505 : (errmsg("custom cumulative statistics property is invalid"),
1506 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1507 : }
1508 :
1509 : /*
1510 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1511 : */
1512 16 : if (pgstat_kind_custom_infos == NULL)
1513 : {
1514 8 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1515 8 : MemoryContextAllocZero(TopMemoryContext,
1516 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1517 : }
1518 :
1519 16 : if (pgstat_kind_custom_infos[idx] != NULL &&
1520 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1521 0 : ereport(ERROR,
1522 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1523 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1524 : pgstat_kind_custom_infos[idx]->name)));
1525 :
1526 : /* check for existing custom stats with the same name */
1527 2080 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1528 : {
1529 2064 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1530 :
1531 2064 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1532 2056 : continue;
1533 8 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1534 0 : ereport(ERROR,
1535 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1536 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1537 : }
1538 :
1539 : /* Register it */
1540 16 : pgstat_kind_custom_infos[idx] = kind_info;
1541 16 : ereport(LOG,
1542 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1543 : kind_info->name, kind)));
1544 16 : }
1545 :
1546 : /*
1547 : * Stats should only be reported after pgstat_initialize() and before
1548 : * pgstat_shutdown(). This check is put in a few central places to catch
1549 : * violations of this rule more easily.
1550 : */
1551 : #ifdef USE_ASSERT_CHECKING
1552 : void
1553 : pgstat_assert_is_up(void)
1554 : {
1555 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1556 : }
1557 : #endif
1558 :
1559 :
1560 : /* ------------------------------------------------------------
1561 : * reading and writing of on-disk stats file
1562 : * ------------------------------------------------------------
1563 : */
1564 :
1565 : /* helpers for pgstat_write_statsfile() */
1566 : static void
1567 738540 : write_chunk(FILE *fpout, void *ptr, size_t len)
1568 : {
1569 : int rc;
1570 :
1571 738540 : rc = fwrite(ptr, len, 1, fpout);
1572 :
1573 : /* we'll check for errors with ferror once at the end */
1574 : (void) rc;
1575 738540 : }
1576 :
1577 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1578 :
1579 : /*
1580 : * This function is called in the last process that is accessing the shared
1581 : * stats so locking is not required.
1582 : */
1583 : static void
1584 1208 : pgstat_write_statsfile(void)
1585 : {
1586 : FILE *fpout;
1587 : int32 format_id;
1588 1208 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1589 1208 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1590 : dshash_seq_status hstat;
1591 : PgStatShared_HashEntry *ps;
1592 :
1593 : pgstat_assert_is_up();
1594 :
1595 : /* should be called only by the checkpointer or single user mode */
1596 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1597 :
1598 : /* we're shutting down, so it's ok to just override this */
1599 1208 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1600 :
1601 1208 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1602 :
1603 : /*
1604 : * Open the statistics temp file to write out the current values.
1605 : */
1606 1208 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1607 1208 : if (fpout == NULL)
1608 : {
1609 0 : ereport(LOG,
1610 : (errcode_for_file_access(),
1611 : errmsg("could not open temporary statistics file \"%s\": %m",
1612 : tmpfile)));
1613 0 : return;
1614 : }
1615 :
1616 : /*
1617 : * Write the file header --- currently just a format ID.
1618 : */
1619 1208 : format_id = PGSTAT_FILE_FORMAT_ID;
1620 1208 : write_chunk_s(fpout, &format_id);
1621 :
1622 : /* Write various stats structs for fixed number of objects */
1623 310456 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1624 : {
1625 : char *ptr;
1626 309248 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1627 :
1628 309248 : if (!info || !info->fixed_amount)
1629 301994 : continue;
1630 :
1631 7254 : if (pgstat_is_kind_builtin(kind))
1632 : Assert(info->snapshot_ctl_off != 0);
1633 :
1634 : /* skip if no need to write to file */
1635 7254 : if (!info->write_to_file)
1636 0 : continue;
1637 :
1638 7254 : pgstat_build_snapshot_fixed(kind);
1639 7254 : if (pgstat_is_kind_builtin(kind))
1640 7248 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1641 : else
1642 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1643 :
1644 7254 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1645 7254 : write_chunk_s(fpout, &kind);
1646 7254 : write_chunk(fpout, ptr, info->shared_data_len);
1647 : }
1648 :
1649 : /*
1650 : * Walk through the stats entries
1651 : */
1652 1208 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1653 362752 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1654 : {
1655 : PgStatShared_Common *shstats;
1656 361544 : const PgStat_KindInfo *kind_info = NULL;
1657 :
1658 361544 : CHECK_FOR_INTERRUPTS();
1659 :
1660 : /*
1661 : * We should not see any "dropped" entries when writing the stats
1662 : * file, as all backends and auxiliary processes should have cleaned
1663 : * up their references before they terminated.
1664 : *
1665 : * However, since we are already shutting down, it is not worth
1666 : * crashing the server over any potential cleanup issues, so we simply
1667 : * skip such entries if encountered.
1668 : */
1669 : Assert(!ps->dropped);
1670 361544 : if (ps->dropped)
1671 0 : continue;
1672 :
1673 : /*
1674 : * This discards data related to custom stats kinds that are unknown
1675 : * to this process.
1676 : */
1677 361544 : if (!pgstat_is_kind_valid(ps->key.kind))
1678 : {
1679 0 : elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1680 : ps->key.kind, ps->key.dboid,
1681 : ps->key.objid);
1682 0 : continue;
1683 : }
1684 :
1685 361544 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1686 :
1687 361544 : kind_info = pgstat_get_kind_info(ps->key.kind);
1688 :
1689 : /* if not dropped the valid-entry refcount should exist */
1690 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1691 :
1692 : /* skip if no need to write to file */
1693 361544 : if (!kind_info->write_to_file)
1694 210 : continue;
1695 :
1696 361334 : if (!kind_info->to_serialized_name)
1697 : {
1698 : /* normal stats entry, identified by PgStat_HashKey */
1699 361178 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1700 361178 : write_chunk_s(fpout, &ps->key);
1701 : }
1702 : else
1703 : {
1704 : /* stats entry identified by name on disk (e.g. slots) */
1705 : NameData name;
1706 :
1707 156 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1708 :
1709 156 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1710 156 : write_chunk_s(fpout, &ps->key.kind);
1711 156 : write_chunk_s(fpout, &name);
1712 : }
1713 :
1714 : /* Write except the header part of the entry */
1715 361334 : write_chunk(fpout,
1716 : pgstat_get_entry_data(ps->key.kind, shstats),
1717 : pgstat_get_entry_len(ps->key.kind));
1718 : }
1719 1208 : dshash_seq_term(&hstat);
1720 :
1721 : /*
1722 : * No more output to be done. Close the temp file and replace the old
1723 : * pgstat.stat with it. The ferror() check replaces testing for error
1724 : * after each individual fputc or fwrite (in write_chunk()) above.
1725 : */
1726 1208 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1727 :
1728 1208 : if (ferror(fpout))
1729 : {
1730 0 : ereport(LOG,
1731 : (errcode_for_file_access(),
1732 : errmsg("could not write temporary statistics file \"%s\": %m",
1733 : tmpfile)));
1734 0 : FreeFile(fpout);
1735 0 : unlink(tmpfile);
1736 : }
1737 1208 : else if (FreeFile(fpout) < 0)
1738 : {
1739 0 : ereport(LOG,
1740 : (errcode_for_file_access(),
1741 : errmsg("could not close temporary statistics file \"%s\": %m",
1742 : tmpfile)));
1743 0 : unlink(tmpfile);
1744 : }
1745 1208 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1746 : {
1747 : /* durable_rename already emitted log message */
1748 0 : unlink(tmpfile);
1749 : }
1750 : }
1751 :
1752 : /* helpers for pgstat_read_statsfile() */
1753 : static bool
1754 780130 : read_chunk(FILE *fpin, void *ptr, size_t len)
1755 : {
1756 780130 : return fread(ptr, 1, len, fpin) == len;
1757 : }
1758 :
1759 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1760 :
1761 : /*
1762 : * Reads in existing statistics file into memory.
1763 : *
1764 : * This function is called in the only process that is accessing the shared
1765 : * stats so locking is not required.
1766 : */
1767 : static void
1768 1468 : pgstat_read_statsfile(void)
1769 : {
1770 : FILE *fpin;
1771 : int32 format_id;
1772 : bool found;
1773 1468 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1774 1468 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1775 :
1776 : /* shouldn't be called from postmaster */
1777 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1778 :
1779 1468 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1780 :
1781 : /*
1782 : * Try to open the stats file. If it doesn't exist, the backends simply
1783 : * returns zero for anything and statistics simply starts from scratch
1784 : * with empty counters.
1785 : *
1786 : * ENOENT is a possibility if stats collection was previously disabled or
1787 : * has not yet written the stats file for the first time. Any other
1788 : * failure condition is suspicious.
1789 : */
1790 1468 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1791 : {
1792 100 : if (errno != ENOENT)
1793 0 : ereport(LOG,
1794 : (errcode_for_file_access(),
1795 : errmsg("could not open statistics file \"%s\": %m",
1796 : statfile)));
1797 100 : pgstat_reset_after_failure();
1798 100 : return;
1799 : }
1800 :
1801 : /*
1802 : * Verify it's of the expected format.
1803 : */
1804 1368 : if (!read_chunk_s(fpin, &format_id))
1805 : {
1806 0 : elog(WARNING, "could not read format ID");
1807 0 : goto error;
1808 : }
1809 :
1810 1368 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1811 : {
1812 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1813 : format_id, PGSTAT_FILE_FORMAT_ID);
1814 2 : goto error;
1815 : }
1816 :
1817 : /*
1818 : * We found an existing statistics file. Read it and put all the stats
1819 : * data into place.
1820 : */
1821 : for (;;)
1822 389330 : {
1823 390696 : int t = fgetc(fpin);
1824 :
1825 390696 : switch (t)
1826 : {
1827 8200 : case PGSTAT_FILE_ENTRY_FIXED:
1828 : {
1829 : PgStat_Kind kind;
1830 : const PgStat_KindInfo *info;
1831 : char *ptr;
1832 :
1833 : /* entry for fixed-numbered stats */
1834 8200 : if (!read_chunk_s(fpin, &kind))
1835 : {
1836 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1837 2 : goto error;
1838 : }
1839 :
1840 8200 : if (!pgstat_is_kind_valid(kind))
1841 : {
1842 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1843 : kind, t);
1844 0 : goto error;
1845 : }
1846 :
1847 8200 : info = pgstat_get_kind_info(kind);
1848 8200 : if (!info)
1849 : {
1850 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1851 : kind, t);
1852 2 : goto error;
1853 : }
1854 :
1855 8198 : if (!info->fixed_amount)
1856 : {
1857 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1858 : kind, t);
1859 0 : goto error;
1860 : }
1861 :
1862 : /* Load back stats into shared memory */
1863 8198 : if (pgstat_is_kind_builtin(kind))
1864 8196 : ptr = ((char *) shmem) + info->shared_ctl_off +
1865 8196 : info->shared_data_off;
1866 : else
1867 : {
1868 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1869 :
1870 2 : ptr = ((char *) shmem->custom_data[idx]) +
1871 2 : info->shared_data_off;
1872 : }
1873 :
1874 8198 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1875 : {
1876 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1877 : kind, t, info->shared_data_len);
1878 0 : goto error;
1879 : }
1880 :
1881 8198 : break;
1882 : }
1883 381132 : case PGSTAT_FILE_ENTRY_HASH:
1884 : case PGSTAT_FILE_ENTRY_NAME:
1885 : {
1886 : PgStat_HashKey key;
1887 : PgStatShared_HashEntry *p;
1888 : PgStatShared_Common *header;
1889 :
1890 381132 : CHECK_FOR_INTERRUPTS();
1891 :
1892 381132 : if (t == PGSTAT_FILE_ENTRY_HASH)
1893 : {
1894 : /* normal stats entry, identified by PgStat_HashKey */
1895 381030 : if (!read_chunk_s(fpin, &key))
1896 : {
1897 0 : elog(WARNING, "could not read key for entry of type %c", t);
1898 0 : goto error;
1899 : }
1900 :
1901 381030 : if (!pgstat_is_kind_valid(key.kind))
1902 : {
1903 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1904 : key.kind, key.dboid,
1905 : key.objid, t);
1906 0 : goto error;
1907 : }
1908 :
1909 381030 : if (!pgstat_get_kind_info(key.kind))
1910 : {
1911 0 : elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1912 : key.kind, key.dboid,
1913 : key.objid, t);
1914 0 : goto error;
1915 : }
1916 : }
1917 : else
1918 : {
1919 : /* stats entry identified by name on disk (e.g. slots) */
1920 102 : const PgStat_KindInfo *kind_info = NULL;
1921 : PgStat_Kind kind;
1922 : NameData name;
1923 :
1924 102 : if (!read_chunk_s(fpin, &kind))
1925 : {
1926 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1927 0 : goto error;
1928 : }
1929 102 : if (!read_chunk_s(fpin, &name))
1930 : {
1931 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1932 : kind, t);
1933 0 : goto error;
1934 : }
1935 102 : if (!pgstat_is_kind_valid(kind))
1936 : {
1937 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1938 : kind, t);
1939 0 : goto error;
1940 : }
1941 :
1942 102 : kind_info = pgstat_get_kind_info(kind);
1943 102 : if (!kind_info)
1944 : {
1945 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1946 : kind, t);
1947 0 : goto error;
1948 : }
1949 :
1950 102 : if (!kind_info->from_serialized_name)
1951 : {
1952 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1953 : kind, t);
1954 0 : goto error;
1955 : }
1956 :
1957 102 : if (!kind_info->from_serialized_name(&name, &key))
1958 : {
1959 : /* skip over data for entry we don't care about */
1960 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1961 : {
1962 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1963 : NameStr(name), kind, t);
1964 0 : goto error;
1965 : }
1966 :
1967 2 : continue;
1968 : }
1969 :
1970 : Assert(key.kind == kind);
1971 : }
1972 :
1973 : /*
1974 : * This intentionally doesn't use pgstat_get_entry_ref() -
1975 : * putting all stats into checkpointer's
1976 : * pgStatEntryRefHash would be wasted effort and memory.
1977 : */
1978 381130 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1979 :
1980 : /* don't allow duplicate entries */
1981 381130 : if (found)
1982 : {
1983 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1984 0 : elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
1985 : key.kind, key.dboid,
1986 : key.objid, t);
1987 0 : goto error;
1988 : }
1989 :
1990 381130 : header = pgstat_init_entry(key.kind, p);
1991 381130 : dshash_release_lock(pgStatLocal.shared_hash, p);
1992 :
1993 381130 : if (!read_chunk(fpin,
1994 : pgstat_get_entry_data(key.kind, header),
1995 : pgstat_get_entry_len(key.kind)))
1996 : {
1997 0 : elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
1998 : key.kind, key.dboid,
1999 : key.objid, t);
2000 0 : goto error;
2001 : }
2002 :
2003 381130 : break;
2004 : }
2005 1364 : case PGSTAT_FILE_ENTRY_END:
2006 :
2007 : /*
2008 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2009 : * file
2010 : */
2011 1364 : if (fgetc(fpin) != EOF)
2012 : {
2013 2 : elog(WARNING, "could not read end-of-file");
2014 2 : goto error;
2015 : }
2016 :
2017 1362 : goto done;
2018 :
2019 0 : default:
2020 0 : elog(WARNING, "could not read entry of type %c", t);
2021 0 : goto error;
2022 : }
2023 : }
2024 :
2025 1368 : done:
2026 1368 : FreeFile(fpin);
2027 :
2028 1368 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2029 1368 : unlink(statfile);
2030 :
2031 1368 : return;
2032 :
2033 6 : error:
2034 6 : ereport(LOG,
2035 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2036 :
2037 6 : pgstat_reset_after_failure();
2038 :
2039 6 : goto done;
2040 : }
2041 :
2042 : /*
2043 : * Helper to reset / drop stats after a crash or after restoring stats from
2044 : * disk failed, potentially after already loading parts.
2045 : */
2046 : static void
2047 450 : pgstat_reset_after_failure(void)
2048 : {
2049 450 : TimestampTz ts = GetCurrentTimestamp();
2050 :
2051 : /* reset fixed-numbered stats */
2052 115650 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2053 : {
2054 115200 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2055 :
2056 115200 : if (!kind_info || !kind_info->fixed_amount)
2057 112498 : continue;
2058 :
2059 2702 : kind_info->reset_all_cb(ts);
2060 : }
2061 :
2062 : /* and drop variable-numbered ones */
2063 450 : pgstat_drop_all_entries();
2064 450 : }
2065 :
2066 : /*
2067 : * GUC assign_hook for stats_fetch_consistency.
2068 : */
2069 : void
2070 5618 : assign_stats_fetch_consistency(int newval, void *extra)
2071 : {
2072 : /*
2073 : * Changing this value in a transaction may cause snapshot state
2074 : * inconsistencies, so force a clear of the current snapshot on the next
2075 : * snapshot build attempt.
2076 : */
2077 5618 : if (pgstat_fetch_consistency != newval)
2078 3236 : force_stats_snapshot_clear = true;
2079 5618 : }
|