Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "lib/dshash.h"
108 : #include "pgstat.h"
109 : #include "storage/fd.h"
110 : #include "storage/ipc.h"
111 : #include "storage/lwlock.h"
112 : #include "utils/guc_hooks.h"
113 : #include "utils/memutils.h"
114 : #include "utils/pgstat_internal.h"
115 : #include "utils/timestamp.h"
116 :
117 :
118 : /* ----------
119 : * Timer definitions.
120 : *
121 : * In milliseconds.
122 : * ----------
123 : */
124 :
125 : /* minimum interval non-forced stats flushes.*/
126 : #define PGSTAT_MIN_INTERVAL 1000
127 : /* how long until to block flushing pending stats updates */
128 : #define PGSTAT_MAX_INTERVAL 60000
129 : /* when to call pgstat_report_stat() again, even when idle */
130 : #define PGSTAT_IDLE_INTERVAL 10000
131 :
132 : /* ----------
133 : * Initial size hints for the hash tables used in statistics.
134 : * ----------
135 : */
136 :
137 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 :
139 : /* ---------
140 : * Identifiers in stats file.
141 : * ---------
142 : */
143 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
144 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
145 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
146 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
147 : * PgStat_HashKey */
148 :
149 : /* hash table for statistics snapshots entry */
150 : typedef struct PgStat_SnapshotEntry
151 : {
152 : PgStat_HashKey key;
153 : char status; /* for simplehash use */
154 : void *data; /* the stats data itself */
155 : } PgStat_SnapshotEntry;
156 :
157 :
158 : /* ----------
159 : * Backend-local Hash Table Definitions
160 : * ----------
161 : */
162 :
163 : /* for stats snapshot entries */
164 : #define SH_PREFIX pgstat_snapshot
165 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
166 : #define SH_KEY_TYPE PgStat_HashKey
167 : #define SH_KEY key
168 : #define SH_HASH_KEY(tb, key) \
169 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
170 : #define SH_EQUAL(tb, a, b) \
171 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
172 : #define SH_SCOPE static inline
173 : #define SH_DEFINE
174 : #define SH_DECLARE
175 : #include "lib/simplehash.h"
176 :
177 :
178 : /* ----------
179 : * Local function forward declarations
180 : * ----------
181 : */
182 :
183 : static void pgstat_write_statsfile(void);
184 : static void pgstat_read_statsfile(void);
185 :
186 : static void pgstat_init_snapshot_fixed(void);
187 :
188 : static void pgstat_reset_after_failure(void);
189 :
190 : static bool pgstat_flush_pending_entries(bool nowait);
191 :
192 : static void pgstat_prep_snapshot(void);
193 : static void pgstat_build_snapshot(void);
194 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 :
196 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
197 :
198 :
199 : /* ----------
200 : * GUC parameters
201 : * ----------
202 : */
203 :
204 : bool pgstat_track_counts = false;
205 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
206 :
207 :
208 : /* ----------
209 : * state shared with pgstat_*.c
210 : * ----------
211 : */
212 :
213 : PgStat_LocalState pgStatLocal;
214 :
215 : /*
216 : * Track pending reports for fixed-numbered stats, used by
217 : * pgstat_report_stat().
218 : */
219 : bool pgstat_report_fixed = false;
220 :
221 : /* ----------
222 : * Local data
223 : *
224 : * NB: There should be only variables related to stats infrastructure here,
225 : * not for specific kinds of stats.
226 : * ----------
227 : */
228 :
229 : /*
230 : * Memory contexts containing the pgStatEntryRefHash table, the
231 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
232 : * easier to track / attribute memory usage.
233 : */
234 :
235 : static MemoryContext pgStatPendingContext = NULL;
236 :
237 : /*
238 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
239 : *
240 : * Newly pending entries should only ever be added to the end of the list,
241 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
242 : */
243 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
244 :
245 :
246 : /*
247 : * Force the next stats flush to happen regardless of
248 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
249 : */
250 : static bool pgStatForceNextFlush = false;
251 :
252 : /*
253 : * Force-clear existing snapshot before next use when stats_fetch_consistency
254 : * is changed.
255 : */
256 : static bool force_stats_snapshot_clear = false;
257 :
258 :
259 : /*
260 : * For assertions that check pgstat is not used before initialization / after
261 : * shutdown.
262 : */
263 : #ifdef USE_ASSERT_CHECKING
264 : static bool pgstat_is_initialized = false;
265 : static bool pgstat_is_shutdown = false;
266 : #endif
267 :
268 :
269 : /*
270 : * The different kinds of built-in statistics.
271 : *
272 : * If reasonably possible, handling specific to one kind of stats should go
273 : * through this abstraction, rather than making more of pgstat.c aware.
274 : *
275 : * See comments for struct PgStat_KindInfo for details about the individual
276 : * fields.
277 : *
278 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
279 : * seem to be a great way of doing that, given the split across multiple
280 : * files.
281 : */
282 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
283 :
284 : /* stats kinds for variable-numbered objects */
285 :
286 : [PGSTAT_KIND_DATABASE] = {
287 : .name = "database",
288 :
289 : .fixed_amount = false,
290 : .write_to_file = true,
291 : /* so pg_stat_database entries can be seen in all databases */
292 : .accessed_across_databases = true,
293 :
294 : .shared_size = sizeof(PgStatShared_Database),
295 : .shared_data_off = offsetof(PgStatShared_Database, stats),
296 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
297 : .pending_size = sizeof(PgStat_StatDBEntry),
298 :
299 : .flush_pending_cb = pgstat_database_flush_cb,
300 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
301 : },
302 :
303 : [PGSTAT_KIND_RELATION] = {
304 : .name = "relation",
305 :
306 : .fixed_amount = false,
307 : .write_to_file = true,
308 :
309 : .shared_size = sizeof(PgStatShared_Relation),
310 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
311 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
312 : .pending_size = sizeof(PgStat_TableStatus),
313 :
314 : .flush_pending_cb = pgstat_relation_flush_cb,
315 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
316 : .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
317 : },
318 :
319 : [PGSTAT_KIND_FUNCTION] = {
320 : .name = "function",
321 :
322 : .fixed_amount = false,
323 : .write_to_file = true,
324 :
325 : .shared_size = sizeof(PgStatShared_Function),
326 : .shared_data_off = offsetof(PgStatShared_Function, stats),
327 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
328 : .pending_size = sizeof(PgStat_FunctionCounts),
329 :
330 : .flush_pending_cb = pgstat_function_flush_cb,
331 : .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
332 : },
333 :
334 : [PGSTAT_KIND_REPLSLOT] = {
335 : .name = "replslot",
336 :
337 : .fixed_amount = false,
338 : .write_to_file = true,
339 :
340 : .accessed_across_databases = true,
341 :
342 : .shared_size = sizeof(PgStatShared_ReplSlot),
343 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
344 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
345 :
346 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
347 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
348 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
349 : },
350 :
351 : [PGSTAT_KIND_SUBSCRIPTION] = {
352 : .name = "subscription",
353 :
354 : .fixed_amount = false,
355 : .write_to_file = true,
356 : /* so pg_stat_subscription_stats entries can be seen in all databases */
357 : .accessed_across_databases = true,
358 :
359 : .shared_size = sizeof(PgStatShared_Subscription),
360 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
361 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
362 : .pending_size = sizeof(PgStat_BackendSubEntry),
363 :
364 : .flush_pending_cb = pgstat_subscription_flush_cb,
365 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
366 : },
367 :
368 : [PGSTAT_KIND_BACKEND] = {
369 : .name = "backend",
370 :
371 : .fixed_amount = false,
372 : .write_to_file = false,
373 :
374 : .accessed_across_databases = true,
375 :
376 : .shared_size = sizeof(PgStatShared_Backend),
377 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
378 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
379 :
380 : .flush_static_cb = pgstat_backend_flush_cb,
381 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
382 : },
383 :
384 : /* stats for fixed-numbered (mostly 1) objects */
385 :
386 : [PGSTAT_KIND_ARCHIVER] = {
387 : .name = "archiver",
388 :
389 : .fixed_amount = true,
390 : .write_to_file = true,
391 :
392 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
393 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
394 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
395 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
396 :
397 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
398 : .reset_all_cb = pgstat_archiver_reset_all_cb,
399 : .snapshot_cb = pgstat_archiver_snapshot_cb,
400 : },
401 :
402 : [PGSTAT_KIND_BGWRITER] = {
403 : .name = "bgwriter",
404 :
405 : .fixed_amount = true,
406 : .write_to_file = true,
407 :
408 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
409 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
410 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
411 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
412 :
413 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
414 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
415 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
416 : },
417 :
418 : [PGSTAT_KIND_CHECKPOINTER] = {
419 : .name = "checkpointer",
420 :
421 : .fixed_amount = true,
422 : .write_to_file = true,
423 :
424 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
425 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
426 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
427 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
428 :
429 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
430 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
431 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
432 : },
433 :
434 : [PGSTAT_KIND_IO] = {
435 : .name = "io",
436 :
437 : .fixed_amount = true,
438 : .write_to_file = true,
439 :
440 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
441 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
442 : .shared_data_off = offsetof(PgStatShared_IO, stats),
443 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
444 :
445 : .flush_static_cb = pgstat_io_flush_cb,
446 : .init_shmem_cb = pgstat_io_init_shmem_cb,
447 : .reset_all_cb = pgstat_io_reset_all_cb,
448 : .snapshot_cb = pgstat_io_snapshot_cb,
449 : },
450 :
451 : [PGSTAT_KIND_SLRU] = {
452 : .name = "slru",
453 :
454 : .fixed_amount = true,
455 : .write_to_file = true,
456 :
457 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
458 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
459 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
460 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
461 :
462 : .flush_static_cb = pgstat_slru_flush_cb,
463 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
464 : .reset_all_cb = pgstat_slru_reset_all_cb,
465 : .snapshot_cb = pgstat_slru_snapshot_cb,
466 : },
467 :
468 : [PGSTAT_KIND_WAL] = {
469 : .name = "wal",
470 :
471 : .fixed_amount = true,
472 : .write_to_file = true,
473 :
474 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
475 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
476 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
477 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
478 :
479 : .init_backend_cb = pgstat_wal_init_backend_cb,
480 : .flush_static_cb = pgstat_wal_flush_cb,
481 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
482 : .reset_all_cb = pgstat_wal_reset_all_cb,
483 : .snapshot_cb = pgstat_wal_snapshot_cb,
484 : },
485 : };
486 :
487 : /*
488 : * Information about custom statistics kinds.
489 : *
490 : * These are saved in a different array than the built-in kinds to save
491 : * in clarity with the initializations.
492 : *
493 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
494 : */
495 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
496 :
497 : /* ------------------------------------------------------------
498 : * Functions managing the state of the stats system for all backends.
499 : * ------------------------------------------------------------
500 : */
501 :
502 : /*
503 : * Read on-disk stats into memory at server start.
504 : *
505 : * Should only be called by the startup process or in single user mode.
506 : */
507 : void
508 1544 : pgstat_restore_stats(void)
509 : {
510 1544 : pgstat_read_statsfile();
511 1544 : }
512 :
513 : /*
514 : * Remove the stats file. This is currently used only if WAL recovery is
515 : * needed after a crash.
516 : *
517 : * Should only be called by the startup process or in single user mode.
518 : */
519 : void
520 350 : pgstat_discard_stats(void)
521 : {
522 : int ret;
523 :
524 : /* NB: this needs to be done even in single user mode */
525 :
526 350 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
527 350 : if (ret != 0)
528 : {
529 348 : if (errno == ENOENT)
530 348 : elog(DEBUG2,
531 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
532 : PGSTAT_STAT_PERMANENT_FILENAME);
533 : else
534 0 : ereport(LOG,
535 : (errcode_for_file_access(),
536 : errmsg("could not unlink permanent statistics file \"%s\": %m",
537 : PGSTAT_STAT_PERMANENT_FILENAME)));
538 : }
539 : else
540 : {
541 2 : ereport(DEBUG2,
542 : (errcode_for_file_access(),
543 : errmsg_internal("unlinked permanent statistics file \"%s\"",
544 : PGSTAT_STAT_PERMANENT_FILENAME)));
545 : }
546 :
547 : /*
548 : * Reset stats contents. This will set reset timestamps of fixed-numbered
549 : * stats to the current time (no variable stats exist).
550 : */
551 350 : pgstat_reset_after_failure();
552 350 : }
553 :
554 : /*
555 : * pgstat_before_server_shutdown() needs to be called by exactly one process
556 : * during regular server shutdowns. Otherwise all stats will be lost.
557 : *
558 : * We currently only write out stats for proc_exit(0). We might want to change
559 : * that at some point... But right now pgstat_discard_stats() would be called
560 : * during the start after a disorderly shutdown, anyway.
561 : */
562 : void
563 1308 : pgstat_before_server_shutdown(int code, Datum arg)
564 : {
565 : Assert(pgStatLocal.shmem != NULL);
566 : Assert(!pgStatLocal.shmem->is_shutdown);
567 :
568 : /*
569 : * Stats should only be reported after pgstat_initialize() and before
570 : * pgstat_shutdown(). This is a convenient point to catch most violations
571 : * of this rule.
572 : */
573 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
574 :
575 : /* flush out our own pending changes before writing out */
576 1308 : pgstat_report_stat(true);
577 :
578 : /*
579 : * Only write out file during normal shutdown. Don't even signal that
580 : * we've shutdown during irregular shutdowns, because the shutdown
581 : * sequence isn't coordinated to ensure this backend shuts down last.
582 : */
583 1308 : if (code == 0)
584 : {
585 1296 : pgStatLocal.shmem->is_shutdown = true;
586 1296 : pgstat_write_statsfile();
587 : }
588 1308 : }
589 :
590 :
591 : /* ------------------------------------------------------------
592 : * Backend initialization / shutdown functions
593 : * ------------------------------------------------------------
594 : */
595 :
596 : /*
597 : * Shut down a single backend's statistics reporting at process exit.
598 : *
599 : * Flush out any remaining statistics counts. Without this, operations
600 : * triggered during backend exit (such as temp table deletions) won't be
601 : * counted.
602 : */
603 : static void
604 44370 : pgstat_shutdown_hook(int code, Datum arg)
605 : {
606 : Assert(!pgstat_is_shutdown);
607 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
608 :
609 : /*
610 : * If we got as far as discovering our own database ID, we can flush out
611 : * what we did so far. Otherwise, we'd be reporting an invalid database
612 : * ID, so forget it. (This means that accesses to pg_database during
613 : * failed backend starts might never get counted.)
614 : */
615 44370 : if (OidIsValid(MyDatabaseId))
616 33104 : pgstat_report_disconnect(MyDatabaseId);
617 :
618 44370 : pgstat_report_stat(true);
619 :
620 : /* there shouldn't be any pending changes left */
621 : Assert(dlist_is_empty(&pgStatPending));
622 44370 : dlist_init(&pgStatPending);
623 :
624 : /* drop the backend stats entry */
625 44370 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
626 0 : pgstat_request_entry_refs_gc();
627 :
628 44370 : pgstat_detach_shmem();
629 :
630 : #ifdef USE_ASSERT_CHECKING
631 : pgstat_is_shutdown = true;
632 : #endif
633 44370 : }
634 :
635 : /*
636 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
637 : * BaseInit().
638 : *
639 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
640 : */
641 : void
642 44370 : pgstat_initialize(void)
643 : {
644 : Assert(!pgstat_is_initialized);
645 :
646 44370 : pgstat_attach_shmem();
647 :
648 44370 : pgstat_init_snapshot_fixed();
649 :
650 : /* Backend initialization callbacks */
651 1464210 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
652 : {
653 1419840 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
654 :
655 1419840 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
656 1375470 : continue;
657 :
658 44370 : kind_info->init_backend_cb();
659 : }
660 :
661 : /* Set up a process-exit hook to clean up */
662 44370 : before_shmem_exit(pgstat_shutdown_hook, 0);
663 :
664 : #ifdef USE_ASSERT_CHECKING
665 : pgstat_is_initialized = true;
666 : #endif
667 44370 : }
668 :
669 :
670 : /* ------------------------------------------------------------
671 : * Public functions used by backends follow
672 : * ------------------------------------------------------------
673 : */
674 :
675 : /*
676 : * Must be called by processes that performs DML: tcop/postgres.c, logical
677 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
678 : * shared memory.
679 : *
680 : * Unless called with 'force', pending stats updates are flushed happen once
681 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
682 : * block on lock acquisition, except if stats updates have been pending for
683 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
684 : *
685 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
686 : * suggested idle timeout is returned. Currently this is always
687 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
688 : * a timeout after which to call pgstat_report_stat(true), but are not
689 : * required to do so.
690 : *
691 : * Note that this is called only when not within a transaction, so it is fair
692 : * to use transaction stop time as an approximation of current time.
693 : */
694 : long
695 613556 : pgstat_report_stat(bool force)
696 : {
697 : static TimestampTz pending_since = 0;
698 : static TimestampTz last_flush = 0;
699 : bool partial_flush;
700 : TimestampTz now;
701 : bool nowait;
702 :
703 : pgstat_assert_is_up();
704 : Assert(!IsTransactionOrTransactionBlock());
705 :
706 : /* "absorb" the forced flush even if there's nothing to flush */
707 613556 : if (pgStatForceNextFlush)
708 : {
709 484 : force = true;
710 484 : pgStatForceNextFlush = false;
711 : }
712 :
713 : /* Don't expend a clock check if nothing to do */
714 613556 : if (dlist_is_empty(&pgStatPending) &&
715 18428 : !pgstat_report_fixed)
716 : {
717 13256 : return 0;
718 : }
719 :
720 : /*
721 : * There should never be stats to report once stats are shut down. Can't
722 : * assert that before the checks above, as there is an unconditional
723 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
724 : * the process that ran pgstat_before_server_shutdown() will still call.
725 : */
726 : Assert(!pgStatLocal.shmem->is_shutdown);
727 :
728 600300 : if (force)
729 : {
730 : /*
731 : * Stats reports are forced either when it's been too long since stats
732 : * have been reported or in processes that force stats reporting to
733 : * happen at specific points (including shutdown). In the former case
734 : * the transaction stop time might be quite old, in the latter it
735 : * would never get cleared.
736 : */
737 42606 : now = GetCurrentTimestamp();
738 : }
739 : else
740 : {
741 557694 : now = GetCurrentTransactionStopTimestamp();
742 :
743 1061680 : if (pending_since > 0 &&
744 503986 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
745 : {
746 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
747 0 : force = true;
748 : }
749 557694 : else if (last_flush > 0 &&
750 531802 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
751 : {
752 : /* don't flush too frequently */
753 529672 : if (pending_since == 0)
754 27388 : pending_since = now;
755 :
756 529672 : return PGSTAT_IDLE_INTERVAL;
757 : }
758 : }
759 :
760 70628 : pgstat_update_dbstats(now);
761 :
762 : /* don't wait for lock acquisition when !force */
763 70628 : nowait = !force;
764 :
765 70628 : partial_flush = false;
766 :
767 : /* flush of variable-numbered stats tracked in pending entries list */
768 70628 : partial_flush |= pgstat_flush_pending_entries(nowait);
769 :
770 : /* flush of other stats kinds */
771 70628 : if (pgstat_report_fixed)
772 : {
773 2262216 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
774 : {
775 2193664 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
776 :
777 2193664 : if (!kind_info)
778 1370388 : continue;
779 823276 : if (!kind_info->flush_static_cb)
780 549068 : continue;
781 :
782 274208 : partial_flush |= kind_info->flush_static_cb(nowait);
783 : }
784 : }
785 :
786 70628 : last_flush = now;
787 :
788 : /*
789 : * If some of the pending stats could not be flushed due to lock
790 : * contention, let the caller know when to retry.
791 : */
792 70628 : if (partial_flush)
793 : {
794 : /* force should have prevented us from getting here */
795 : Assert(!force);
796 :
797 : /* remember since when stats have been pending */
798 16 : if (pending_since == 0)
799 16 : pending_since = now;
800 :
801 16 : return PGSTAT_IDLE_INTERVAL;
802 : }
803 :
804 70612 : pending_since = 0;
805 70612 : pgstat_report_fixed = false;
806 :
807 70612 : return 0;
808 : }
809 :
810 : /*
811 : * Force locally pending stats to be flushed during the next
812 : * pgstat_report_stat() call. This is useful for writing tests.
813 : */
814 : void
815 484 : pgstat_force_next_flush(void)
816 : {
817 484 : pgStatForceNextFlush = true;
818 484 : }
819 :
820 : /*
821 : * Only for use by pgstat_reset_counters()
822 : */
823 : static bool
824 23266 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
825 : {
826 23266 : return entry->key.dboid == MyDatabaseId;
827 : }
828 :
829 : /*
830 : * Reset counters for our database.
831 : *
832 : * Permission checking for this function is managed through the normal
833 : * GRANT system.
834 : */
835 : void
836 26 : pgstat_reset_counters(void)
837 : {
838 26 : TimestampTz ts = GetCurrentTimestamp();
839 :
840 26 : pgstat_reset_matching_entries(match_db_entries,
841 : ObjectIdGetDatum(MyDatabaseId),
842 : ts);
843 26 : }
844 :
845 : /*
846 : * Reset a single variable-numbered entry.
847 : *
848 : * If the stats kind is within a database, also reset the database's
849 : * stat_reset_timestamp.
850 : *
851 : * Permission checking for this function is managed through the normal
852 : * GRANT system.
853 : */
854 : void
855 50 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
856 : {
857 50 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
858 50 : TimestampTz ts = GetCurrentTimestamp();
859 :
860 : /* not needed atm, and doesn't make sense with the current signature */
861 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
862 :
863 : /* reset the "single counter" */
864 50 : pgstat_reset_entry(kind, dboid, objid, ts);
865 :
866 50 : if (!kind_info->accessed_across_databases)
867 22 : pgstat_reset_database_timestamp(dboid, ts);
868 50 : }
869 :
870 : /*
871 : * Reset stats for all entries of a kind.
872 : *
873 : * Permission checking for this function is managed through the normal
874 : * GRANT system.
875 : */
876 : void
877 58 : pgstat_reset_of_kind(PgStat_Kind kind)
878 : {
879 58 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
880 58 : TimestampTz ts = GetCurrentTimestamp();
881 :
882 58 : if (kind_info->fixed_amount)
883 50 : kind_info->reset_all_cb(ts);
884 : else
885 8 : pgstat_reset_entries_of_kind(kind, ts);
886 58 : }
887 :
888 :
889 : /* ------------------------------------------------------------
890 : * Fetching of stats
891 : * ------------------------------------------------------------
892 : */
893 :
894 : /*
895 : * Discard any data collected in the current transaction. Any subsequent
896 : * request will cause new snapshots to be read.
897 : *
898 : * This is also invoked during transaction commit or abort to discard
899 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
900 : * cause this routine to be called.
901 : */
902 : void
903 1097368 : pgstat_clear_snapshot(void)
904 : {
905 : pgstat_assert_is_up();
906 :
907 1097368 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
908 : sizeof(pgStatLocal.snapshot.fixed_valid));
909 1097368 : memset(&pgStatLocal.snapshot.custom_valid, 0,
910 : sizeof(pgStatLocal.snapshot.custom_valid));
911 1097368 : pgStatLocal.snapshot.stats = NULL;
912 1097368 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
913 :
914 : /* Release memory, if any was allocated */
915 1097368 : if (pgStatLocal.snapshot.context)
916 : {
917 1148 : MemoryContextDelete(pgStatLocal.snapshot.context);
918 :
919 : /* Reset variables */
920 1148 : pgStatLocal.snapshot.context = NULL;
921 : }
922 :
923 : /*
924 : * Historically the backend_status.c facilities lived in this file, and
925 : * were reset with the same function. For now keep it that way, and
926 : * forward the reset request.
927 : */
928 1097368 : pgstat_clear_backend_activity_snapshot();
929 :
930 : /* Reset this flag, as it may be possible that a cleanup was forced. */
931 1097368 : force_stats_snapshot_clear = false;
932 1097368 : }
933 :
934 : void *
935 700422 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
936 : {
937 700422 : PgStat_HashKey key = {0};
938 : PgStat_EntryRef *entry_ref;
939 : void *stats_data;
940 700422 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
941 :
942 : /* should be called from backends */
943 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
944 : Assert(!kind_info->fixed_amount);
945 :
946 700422 : pgstat_prep_snapshot();
947 :
948 700422 : key.kind = kind;
949 700422 : key.dboid = dboid;
950 700422 : key.objid = objid;
951 :
952 : /* if we need to build a full snapshot, do so */
953 700422 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
954 460 : pgstat_build_snapshot();
955 :
956 : /* if caching is desired, look up in cache */
957 700422 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
958 : {
959 10232 : PgStat_SnapshotEntry *entry = NULL;
960 :
961 10232 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
962 :
963 10232 : if (entry)
964 788 : return entry->data;
965 :
966 : /*
967 : * If we built a full snapshot and the key is not in
968 : * pgStatLocal.snapshot.stats, there are no matching stats.
969 : */
970 9444 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
971 28 : return NULL;
972 : }
973 :
974 699606 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
975 :
976 699606 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
977 :
978 699606 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
979 : {
980 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
981 11700 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
982 : {
983 1888 : PgStat_SnapshotEntry *entry = NULL;
984 : bool found;
985 :
986 1888 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
987 : Assert(!found);
988 1888 : entry->data = NULL;
989 : }
990 11700 : return NULL;
991 : }
992 :
993 : /*
994 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
995 : * otherwise we could quickly end up with a fair bit of memory used due to
996 : * repeated accesses.
997 : */
998 687906 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
999 680378 : stats_data = palloc(kind_info->shared_data_len);
1000 : else
1001 7528 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1002 7528 : kind_info->shared_data_len);
1003 :
1004 687906 : (void) pgstat_lock_entry_shared(entry_ref, false);
1005 1375812 : memcpy(stats_data,
1006 687906 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1007 687906 : kind_info->shared_data_len);
1008 687906 : pgstat_unlock_entry(entry_ref);
1009 :
1010 687906 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1011 : {
1012 7528 : PgStat_SnapshotEntry *entry = NULL;
1013 : bool found;
1014 :
1015 7528 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1016 7528 : entry->data = stats_data;
1017 : }
1018 :
1019 687906 : return stats_data;
1020 : }
1021 :
1022 : /*
1023 : * If a stats snapshot has been taken, return the timestamp at which that was
1024 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1025 : * false.
1026 : */
1027 : TimestampTz
1028 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1029 : {
1030 60 : if (force_stats_snapshot_clear)
1031 18 : pgstat_clear_snapshot();
1032 :
1033 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1034 : {
1035 24 : *have_snapshot = true;
1036 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1037 : }
1038 :
1039 36 : *have_snapshot = false;
1040 :
1041 36 : return 0;
1042 : }
1043 :
1044 : bool
1045 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1046 : {
1047 : /* fixed-numbered stats always exist */
1048 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1049 12 : return true;
1050 :
1051 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1052 : }
1053 :
1054 : /*
1055 : * Ensure snapshot for fixed-numbered 'kind' exists.
1056 : *
1057 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1058 : * massaging the data into the desired format.
1059 : */
1060 : void
1061 502 : pgstat_snapshot_fixed(PgStat_Kind kind)
1062 : {
1063 : Assert(pgstat_is_kind_valid(kind));
1064 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1065 :
1066 502 : if (force_stats_snapshot_clear)
1067 0 : pgstat_clear_snapshot();
1068 :
1069 502 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1070 24 : pgstat_build_snapshot();
1071 : else
1072 478 : pgstat_build_snapshot_fixed(kind);
1073 :
1074 502 : if (pgstat_is_kind_builtin(kind))
1075 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1076 6 : else if (pgstat_is_kind_custom(kind))
1077 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1078 502 : }
1079 :
1080 : static void
1081 44370 : pgstat_init_snapshot_fixed(void)
1082 : {
1083 : /*
1084 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1085 : * stats kinds.
1086 : */
1087 443700 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1088 : {
1089 399330 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1090 :
1091 399330 : if (!kind_info || !kind_info->fixed_amount)
1092 399118 : continue;
1093 :
1094 212 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1095 212 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1096 : }
1097 44370 : }
1098 :
1099 : static void
1100 700474 : pgstat_prep_snapshot(void)
1101 : {
1102 700474 : if (force_stats_snapshot_clear)
1103 18 : pgstat_clear_snapshot();
1104 :
1105 700474 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1106 10284 : pgStatLocal.snapshot.stats != NULL)
1107 699326 : return;
1108 :
1109 1148 : if (!pgStatLocal.snapshot.context)
1110 1148 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1111 : "PgStat Snapshot",
1112 : ALLOCSET_SMALL_SIZES);
1113 :
1114 1148 : pgStatLocal.snapshot.stats =
1115 1148 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1116 : PGSTAT_SNAPSHOT_HASH_SIZE,
1117 : NULL);
1118 : }
1119 :
1120 : static void
1121 484 : pgstat_build_snapshot(void)
1122 : {
1123 : dshash_seq_status hstat;
1124 : PgStatShared_HashEntry *p;
1125 :
1126 : /* should only be called when we need a snapshot */
1127 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1128 :
1129 : /* snapshot already built */
1130 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1131 432 : return;
1132 :
1133 52 : pgstat_prep_snapshot();
1134 :
1135 : Assert(pgStatLocal.snapshot.stats->members == 0);
1136 :
1137 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1138 :
1139 : /*
1140 : * Snapshot all variable stats.
1141 : */
1142 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1143 54802 : while ((p = dshash_seq_next(&hstat)) != NULL)
1144 : {
1145 54750 : PgStat_Kind kind = p->key.kind;
1146 54750 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1147 : bool found;
1148 : PgStat_SnapshotEntry *entry;
1149 : PgStatShared_Common *stats_data;
1150 :
1151 : /*
1152 : * Check if the stats object should be included in the snapshot.
1153 : * Unless the stats kind can be accessed from all databases (e.g.,
1154 : * database stats themselves), we only include stats for the current
1155 : * database or objects not associated with a database (e.g. shared
1156 : * relations).
1157 : */
1158 54750 : if (p->key.dboid != MyDatabaseId &&
1159 16520 : p->key.dboid != InvalidOid &&
1160 13620 : !kind_info->accessed_across_databases)
1161 13668 : continue;
1162 :
1163 41286 : if (p->dropped)
1164 204 : continue;
1165 :
1166 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1167 :
1168 41082 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1169 : Assert(stats_data);
1170 :
1171 41082 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1172 : Assert(!found);
1173 :
1174 41082 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1175 : pgstat_get_entry_len(kind));
1176 :
1177 : /*
1178 : * Acquire the LWLock directly instead of using
1179 : * pg_stat_lock_entry_shared() which requires a reference.
1180 : */
1181 41082 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1182 41082 : memcpy(entry->data,
1183 41082 : pgstat_get_entry_data(kind, stats_data),
1184 : pgstat_get_entry_len(kind));
1185 41082 : LWLockRelease(&stats_data->lock);
1186 : }
1187 52 : dshash_seq_term(&hstat);
1188 :
1189 : /*
1190 : * Build snapshot of all fixed-numbered stats.
1191 : */
1192 1716 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1193 : {
1194 1664 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1195 :
1196 1664 : if (!kind_info)
1197 1040 : continue;
1198 624 : if (!kind_info->fixed_amount)
1199 : {
1200 : Assert(kind_info->snapshot_cb == NULL);
1201 312 : continue;
1202 : }
1203 :
1204 312 : pgstat_build_snapshot_fixed(kind);
1205 : }
1206 :
1207 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1208 : }
1209 :
1210 : static void
1211 8572 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1212 : {
1213 8572 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1214 : int idx;
1215 : bool *valid;
1216 :
1217 : /* Position in fixed_valid or custom_valid */
1218 8572 : if (pgstat_is_kind_builtin(kind))
1219 : {
1220 8560 : idx = kind;
1221 8560 : valid = pgStatLocal.snapshot.fixed_valid;
1222 : }
1223 : else
1224 : {
1225 12 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1226 12 : valid = pgStatLocal.snapshot.custom_valid;
1227 : }
1228 :
1229 : Assert(kind_info->fixed_amount);
1230 : Assert(kind_info->snapshot_cb != NULL);
1231 :
1232 8572 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1233 : {
1234 : /* rebuild every time */
1235 7812 : valid[idx] = false;
1236 : }
1237 760 : else if (valid[idx])
1238 : {
1239 : /* in snapshot mode we shouldn't get called again */
1240 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1241 12 : return;
1242 : }
1243 :
1244 : Assert(!valid[idx]);
1245 :
1246 8560 : kind_info->snapshot_cb();
1247 :
1248 : Assert(!valid[idx]);
1249 8560 : valid[idx] = true;
1250 : }
1251 :
1252 :
1253 : /* ------------------------------------------------------------
1254 : * Backend-local pending stats infrastructure
1255 : * ------------------------------------------------------------
1256 : */
1257 :
1258 : /*
1259 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1260 : * stats if not already done.
1261 : *
1262 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1263 : * created, false otherwise.
1264 : */
1265 : PgStat_EntryRef *
1266 4015960 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1267 : {
1268 : PgStat_EntryRef *entry_ref;
1269 :
1270 : /* need to be able to flush out */
1271 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1272 :
1273 4015960 : if (unlikely(!pgStatPendingContext))
1274 : {
1275 35814 : pgStatPendingContext =
1276 35814 : AllocSetContextCreate(TopMemoryContext,
1277 : "PgStat Pending",
1278 : ALLOCSET_SMALL_SIZES);
1279 : }
1280 :
1281 4015960 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1282 : true, created_entry);
1283 :
1284 4015960 : if (entry_ref->pending == NULL)
1285 : {
1286 2053624 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1287 :
1288 : Assert(entrysize != (size_t) -1);
1289 :
1290 2053624 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1291 2053624 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1292 : }
1293 :
1294 4015960 : return entry_ref;
1295 : }
1296 :
1297 : /*
1298 : * Return an existing stats entry, or NULL.
1299 : *
1300 : * This should only be used for helper function for pgstatfuncs.c - outside of
1301 : * that it shouldn't be needed.
1302 : */
1303 : PgStat_EntryRef *
1304 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1305 : {
1306 : PgStat_EntryRef *entry_ref;
1307 :
1308 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1309 :
1310 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1311 30 : return NULL;
1312 :
1313 54 : return entry_ref;
1314 : }
1315 :
1316 : void
1317 2053624 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1318 : {
1319 2053624 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1320 2053624 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1321 2053624 : void *pending_data = entry_ref->pending;
1322 :
1323 : Assert(pending_data != NULL);
1324 : /* !fixed_amount stats should be handled explicitly */
1325 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1326 :
1327 2053624 : if (kind_info->delete_pending_cb)
1328 1938150 : kind_info->delete_pending_cb(entry_ref);
1329 :
1330 2053624 : pfree(pending_data);
1331 2053624 : entry_ref->pending = NULL;
1332 :
1333 2053624 : dlist_delete(&entry_ref->pending_node);
1334 2053624 : }
1335 :
1336 : /*
1337 : * Flush out pending variable-numbered stats.
1338 : */
1339 : static bool
1340 70628 : pgstat_flush_pending_entries(bool nowait)
1341 : {
1342 70628 : bool have_pending = false;
1343 70628 : dlist_node *cur = NULL;
1344 :
1345 : /*
1346 : * Need to be a bit careful iterating over the list of pending entries.
1347 : * Processing a pending entry may queue further pending entries to the end
1348 : * of the list that we want to process, so a simple iteration won't do.
1349 : * Further complicating matters is that we want to delete the current
1350 : * entry in each iteration from the list if we flushed successfully.
1351 : *
1352 : * So we just keep track of the next pointer in each loop iteration.
1353 : */
1354 70628 : if (!dlist_is_empty(&pgStatPending))
1355 65802 : cur = dlist_head_node(&pgStatPending);
1356 :
1357 2058672 : while (cur)
1358 : {
1359 1988044 : PgStat_EntryRef *entry_ref =
1360 1988044 : dlist_container(PgStat_EntryRef, pending_node, cur);
1361 1988044 : PgStat_HashKey key = entry_ref->shared_entry->key;
1362 1988044 : PgStat_Kind kind = key.kind;
1363 1988044 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1364 : bool did_flush;
1365 : dlist_node *next;
1366 :
1367 : Assert(!kind_info->fixed_amount);
1368 : Assert(kind_info->flush_pending_cb != NULL);
1369 :
1370 : /* flush the stats, if possible */
1371 1988044 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1372 :
1373 : Assert(did_flush || nowait);
1374 :
1375 : /* determine next entry, before deleting the pending entry */
1376 1988044 : if (dlist_has_next(&pgStatPending, cur))
1377 1922242 : next = dlist_next_node(&pgStatPending, cur);
1378 : else
1379 65802 : next = NULL;
1380 :
1381 : /* if successfully flushed, remove entry */
1382 1988044 : if (did_flush)
1383 1988028 : pgstat_delete_pending_entry(entry_ref);
1384 : else
1385 16 : have_pending = true;
1386 :
1387 1988044 : cur = next;
1388 : }
1389 :
1390 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1391 :
1392 70628 : return have_pending;
1393 : }
1394 :
1395 :
1396 : /* ------------------------------------------------------------
1397 : * Helper / infrastructure functions
1398 : * ------------------------------------------------------------
1399 : */
1400 :
1401 : PgStat_Kind
1402 166 : pgstat_get_kind_from_str(char *kind_str)
1403 : {
1404 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1405 : {
1406 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1407 160 : return kind;
1408 : }
1409 :
1410 : /* Check the custom set of cumulative stats */
1411 6 : if (pgstat_kind_custom_infos)
1412 : {
1413 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1414 : {
1415 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1416 :
1417 0 : if (pgstat_kind_custom_infos[idx] &&
1418 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1419 0 : return kind;
1420 : }
1421 : }
1422 :
1423 6 : ereport(ERROR,
1424 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1425 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1426 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1427 : }
1428 :
1429 : static inline bool
1430 801226 : pgstat_is_kind_valid(PgStat_Kind kind)
1431 : {
1432 801226 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1433 : }
1434 :
1435 : const PgStat_KindInfo *
1436 15059192 : pgstat_get_kind_info(PgStat_Kind kind)
1437 : {
1438 15059192 : if (pgstat_is_kind_builtin(kind))
1439 12265448 : return &pgstat_kind_builtin_infos[kind];
1440 :
1441 2793744 : if (pgstat_is_kind_custom(kind))
1442 : {
1443 1507756 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1444 :
1445 1507756 : if (pgstat_kind_custom_infos == NULL ||
1446 7274 : pgstat_kind_custom_infos[idx] == NULL)
1447 1506068 : return NULL;
1448 1688 : return pgstat_kind_custom_infos[idx];
1449 : }
1450 :
1451 1285988 : return NULL;
1452 : }
1453 :
1454 : /*
1455 : * Register a new stats kind.
1456 : *
1457 : * PgStat_Kinds must be globally unique across all extensions. Refer
1458 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1459 : * unique ID for your extension, to avoid conflicts with other extension
1460 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1461 : * needlessly reserving a new ID.
1462 : */
1463 : void
1464 20 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1465 : {
1466 20 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1467 :
1468 20 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1469 0 : ereport(ERROR,
1470 : (errmsg("custom cumulative statistics name is invalid"),
1471 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1472 :
1473 20 : if (!pgstat_is_kind_custom(kind))
1474 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1475 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1476 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1477 :
1478 20 : if (!process_shared_preload_libraries_in_progress)
1479 0 : ereport(ERROR,
1480 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1481 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1482 :
1483 : /*
1484 : * Check some data for fixed-numbered stats.
1485 : */
1486 20 : if (kind_info->fixed_amount)
1487 : {
1488 10 : if (kind_info->shared_size == 0)
1489 0 : ereport(ERROR,
1490 : (errmsg("custom cumulative statistics property is invalid"),
1491 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1492 10 : if (kind_info->track_entry_count)
1493 0 : ereport(ERROR,
1494 : (errmsg("custom cumulative statistics property is invalid"),
1495 : errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
1496 : }
1497 :
1498 : /*
1499 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1500 : */
1501 20 : if (pgstat_kind_custom_infos == NULL)
1502 : {
1503 10 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1504 10 : MemoryContextAllocZero(TopMemoryContext,
1505 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1506 : }
1507 :
1508 20 : if (pgstat_kind_custom_infos[idx] != NULL &&
1509 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1510 0 : ereport(ERROR,
1511 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1512 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1513 : pgstat_kind_custom_infos[idx]->name)));
1514 :
1515 : /* check for existing custom stats with the same name */
1516 200 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1517 : {
1518 180 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1519 :
1520 180 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1521 170 : continue;
1522 10 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1523 0 : ereport(ERROR,
1524 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1525 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1526 : }
1527 :
1528 : /* Register it */
1529 20 : pgstat_kind_custom_infos[idx] = kind_info;
1530 20 : ereport(LOG,
1531 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1532 : kind_info->name, kind)));
1533 20 : }
1534 :
1535 : /*
1536 : * Stats should only be reported after pgstat_initialize() and before
1537 : * pgstat_shutdown(). This check is put in a few central places to catch
1538 : * violations of this rule more easily.
1539 : */
1540 : #ifdef USE_ASSERT_CHECKING
1541 : void
1542 : pgstat_assert_is_up(void)
1543 : {
1544 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1545 : }
1546 : #endif
1547 :
1548 :
1549 : /* ------------------------------------------------------------
1550 : * reading and writing of on-disk stats file
1551 : * ------------------------------------------------------------
1552 : */
1553 :
1554 : /* helpers for pgstat_write_statsfile() */
1555 : static void
1556 794654 : write_chunk(FILE *fpout, void *ptr, size_t len)
1557 : {
1558 : int rc;
1559 :
1560 794654 : rc = fwrite(ptr, len, 1, fpout);
1561 :
1562 : /* we'll check for errors with ferror once at the end */
1563 : (void) rc;
1564 794654 : }
1565 :
1566 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1567 :
1568 : /*
1569 : * This function is called in the last process that is accessing the shared
1570 : * stats so locking is not required.
1571 : */
1572 : static void
1573 1296 : pgstat_write_statsfile(void)
1574 : {
1575 : FILE *fpout;
1576 : int32 format_id;
1577 1296 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1578 1296 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1579 : dshash_seq_status hstat;
1580 : PgStatShared_HashEntry *ps;
1581 :
1582 : pgstat_assert_is_up();
1583 :
1584 : /* should be called only by the checkpointer or single user mode */
1585 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1586 :
1587 : /* we're shutting down, so it's ok to just override this */
1588 1296 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1589 :
1590 1296 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1591 :
1592 : /*
1593 : * Open the statistics temp file to write out the current values.
1594 : */
1595 1296 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1596 1296 : if (fpout == NULL)
1597 : {
1598 0 : ereport(LOG,
1599 : (errcode_for_file_access(),
1600 : errmsg("could not open temporary statistics file \"%s\": %m",
1601 : tmpfile)));
1602 0 : return;
1603 : }
1604 :
1605 : /*
1606 : * Write the file header --- currently just a format ID.
1607 : */
1608 1296 : format_id = PGSTAT_FILE_FORMAT_ID;
1609 1296 : write_chunk_s(fpout, &format_id);
1610 :
1611 : /* Write various stats structs for fixed number of objects */
1612 42768 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1613 : {
1614 : char *ptr;
1615 41472 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1616 :
1617 41472 : if (!info || !info->fixed_amount)
1618 33690 : continue;
1619 :
1620 7782 : if (pgstat_is_kind_builtin(kind))
1621 : Assert(info->snapshot_ctl_off != 0);
1622 :
1623 : /* skip if no need to write to file */
1624 7782 : if (!info->write_to_file)
1625 0 : continue;
1626 :
1627 7782 : pgstat_build_snapshot_fixed(kind);
1628 7782 : if (pgstat_is_kind_builtin(kind))
1629 7776 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1630 : else
1631 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1632 :
1633 7782 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1634 7782 : write_chunk_s(fpout, &kind);
1635 7782 : write_chunk(fpout, ptr, info->shared_data_len);
1636 : }
1637 :
1638 : /*
1639 : * Walk through the stats entries
1640 : */
1641 1296 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1642 390332 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1643 : {
1644 : PgStatShared_Common *shstats;
1645 389036 : const PgStat_KindInfo *kind_info = NULL;
1646 :
1647 389036 : CHECK_FOR_INTERRUPTS();
1648 :
1649 : /*
1650 : * We should not see any "dropped" entries when writing the stats
1651 : * file, as all backends and auxiliary processes should have cleaned
1652 : * up their references before they terminated.
1653 : *
1654 : * However, since we are already shutting down, it is not worth
1655 : * crashing the server over any potential cleanup issues, so we simply
1656 : * skip such entries if encountered.
1657 : */
1658 : Assert(!ps->dropped);
1659 389036 : if (ps->dropped)
1660 0 : continue;
1661 :
1662 : /*
1663 : * This discards data related to custom stats kinds that are unknown
1664 : * to this process.
1665 : */
1666 389036 : if (!pgstat_is_kind_valid(ps->key.kind))
1667 : {
1668 0 : elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1669 : ps->key.kind, ps->key.dboid,
1670 : ps->key.objid);
1671 0 : continue;
1672 : }
1673 :
1674 389036 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1675 :
1676 389036 : kind_info = pgstat_get_kind_info(ps->key.kind);
1677 :
1678 : /* if not dropped the valid-entry refcount should exist */
1679 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1680 :
1681 : /* skip if no need to write to file */
1682 389036 : if (!kind_info->write_to_file)
1683 234 : continue;
1684 :
1685 388802 : if (!kind_info->to_serialized_name)
1686 : {
1687 : /* normal stats entry, identified by PgStat_HashKey */
1688 388612 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1689 388612 : write_chunk_s(fpout, &ps->key);
1690 : }
1691 : else
1692 : {
1693 : /* stats entry identified by name on disk (e.g. slots) */
1694 : NameData name;
1695 :
1696 190 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1697 :
1698 190 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1699 190 : write_chunk_s(fpout, &ps->key.kind);
1700 190 : write_chunk_s(fpout, &name);
1701 : }
1702 :
1703 : /* Write except the header part of the entry */
1704 388802 : write_chunk(fpout,
1705 : pgstat_get_entry_data(ps->key.kind, shstats),
1706 : pgstat_get_entry_len(ps->key.kind));
1707 : }
1708 1296 : dshash_seq_term(&hstat);
1709 :
1710 : /*
1711 : * No more output to be done. Close the temp file and replace the old
1712 : * pgstat.stat with it. The ferror() check replaces testing for error
1713 : * after each individual fputc or fwrite (in write_chunk()) above.
1714 : */
1715 1296 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1716 :
1717 1296 : if (ferror(fpout))
1718 : {
1719 0 : ereport(LOG,
1720 : (errcode_for_file_access(),
1721 : errmsg("could not write temporary statistics file \"%s\": %m",
1722 : tmpfile)));
1723 0 : FreeFile(fpout);
1724 0 : unlink(tmpfile);
1725 : }
1726 1296 : else if (FreeFile(fpout) < 0)
1727 : {
1728 0 : ereport(LOG,
1729 : (errcode_for_file_access(),
1730 : errmsg("could not close temporary statistics file \"%s\": %m",
1731 : tmpfile)));
1732 0 : unlink(tmpfile);
1733 : }
1734 1296 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1735 : {
1736 : /* durable_rename already emitted log message */
1737 0 : unlink(tmpfile);
1738 : }
1739 : }
1740 :
1741 : /* helpers for pgstat_read_statsfile() */
1742 : static bool
1743 825952 : read_chunk(FILE *fpin, void *ptr, size_t len)
1744 : {
1745 825952 : return fread(ptr, 1, len, fpin) == len;
1746 : }
1747 :
1748 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1749 :
1750 : /*
1751 : * Reads in existing statistics file into memory.
1752 : *
1753 : * This function is called in the only process that is accessing the shared
1754 : * stats so locking is not required.
1755 : */
1756 : static void
1757 1544 : pgstat_read_statsfile(void)
1758 : {
1759 : FILE *fpin;
1760 : int32 format_id;
1761 : bool found;
1762 1544 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1763 1544 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1764 :
1765 : /* shouldn't be called from postmaster */
1766 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1767 :
1768 1544 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1769 :
1770 : /*
1771 : * Try to open the stats file. If it doesn't exist, the backends simply
1772 : * returns zero for anything and statistics simply starts from scratch
1773 : * with empty counters.
1774 : *
1775 : * ENOENT is a possibility if stats collection was previously disabled or
1776 : * has not yet written the stats file for the first time. Any other
1777 : * failure condition is suspicious.
1778 : */
1779 1544 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1780 : {
1781 102 : if (errno != ENOENT)
1782 0 : ereport(LOG,
1783 : (errcode_for_file_access(),
1784 : errmsg("could not open statistics file \"%s\": %m",
1785 : statfile)));
1786 102 : pgstat_reset_after_failure();
1787 102 : return;
1788 : }
1789 :
1790 : /*
1791 : * Verify it's of the expected format.
1792 : */
1793 1442 : if (!read_chunk_s(fpin, &format_id))
1794 : {
1795 0 : elog(WARNING, "could not read format ID");
1796 0 : goto error;
1797 : }
1798 :
1799 1442 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1800 : {
1801 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1802 : format_id, PGSTAT_FILE_FORMAT_ID);
1803 2 : goto error;
1804 : }
1805 :
1806 : /*
1807 : * We found an existing statistics file. Read it and put all the stats
1808 : * data into place.
1809 : */
1810 : for (;;)
1811 412188 : {
1812 413628 : int t = fgetc(fpin);
1813 :
1814 413628 : switch (t)
1815 : {
1816 8644 : case PGSTAT_FILE_ENTRY_FIXED:
1817 : {
1818 : PgStat_Kind kind;
1819 : const PgStat_KindInfo *info;
1820 : char *ptr;
1821 :
1822 : /* entry for fixed-numbered stats */
1823 8644 : if (!read_chunk_s(fpin, &kind))
1824 : {
1825 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1826 2 : goto error;
1827 : }
1828 :
1829 8644 : if (!pgstat_is_kind_valid(kind))
1830 : {
1831 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1832 : kind, t);
1833 0 : goto error;
1834 : }
1835 :
1836 8644 : info = pgstat_get_kind_info(kind);
1837 8644 : if (!info)
1838 : {
1839 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1840 : kind, t);
1841 2 : goto error;
1842 : }
1843 :
1844 8642 : if (!info->fixed_amount)
1845 : {
1846 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1847 : kind, t);
1848 0 : goto error;
1849 : }
1850 :
1851 : /* Load back stats into shared memory */
1852 8642 : if (pgstat_is_kind_builtin(kind))
1853 8640 : ptr = ((char *) shmem) + info->shared_ctl_off +
1854 8640 : info->shared_data_off;
1855 : else
1856 : {
1857 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1858 :
1859 2 : ptr = ((char *) shmem->custom_data[idx]) +
1860 2 : info->shared_data_off;
1861 : }
1862 :
1863 8642 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1864 : {
1865 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1866 : kind, t, info->shared_data_len);
1867 0 : goto error;
1868 : }
1869 :
1870 8642 : break;
1871 : }
1872 403546 : case PGSTAT_FILE_ENTRY_HASH:
1873 : case PGSTAT_FILE_ENTRY_NAME:
1874 : {
1875 : PgStat_HashKey key;
1876 : PgStatShared_HashEntry *p;
1877 : PgStatShared_Common *header;
1878 :
1879 403546 : CHECK_FOR_INTERRUPTS();
1880 :
1881 403546 : if (t == PGSTAT_FILE_ENTRY_HASH)
1882 : {
1883 : /* normal stats entry, identified by PgStat_HashKey */
1884 403412 : if (!read_chunk_s(fpin, &key))
1885 : {
1886 0 : elog(WARNING, "could not read key for entry of type %c", t);
1887 0 : goto error;
1888 : }
1889 :
1890 403412 : if (!pgstat_is_kind_valid(key.kind))
1891 : {
1892 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1893 : key.kind, key.dboid,
1894 : key.objid, t);
1895 0 : goto error;
1896 : }
1897 :
1898 403412 : if (!pgstat_get_kind_info(key.kind))
1899 : {
1900 0 : elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1901 : key.kind, key.dboid,
1902 : key.objid, t);
1903 0 : goto error;
1904 : }
1905 : }
1906 : else
1907 : {
1908 : /* stats entry identified by name on disk (e.g. slots) */
1909 134 : const PgStat_KindInfo *kind_info = NULL;
1910 : PgStat_Kind kind;
1911 : NameData name;
1912 :
1913 134 : if (!read_chunk_s(fpin, &kind))
1914 : {
1915 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1916 0 : goto error;
1917 : }
1918 134 : if (!read_chunk_s(fpin, &name))
1919 : {
1920 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1921 : kind, t);
1922 0 : goto error;
1923 : }
1924 134 : if (!pgstat_is_kind_valid(kind))
1925 : {
1926 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1927 : kind, t);
1928 0 : goto error;
1929 : }
1930 :
1931 134 : kind_info = pgstat_get_kind_info(kind);
1932 134 : if (!kind_info)
1933 : {
1934 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1935 : kind, t);
1936 0 : goto error;
1937 : }
1938 :
1939 134 : if (!kind_info->from_serialized_name)
1940 : {
1941 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1942 : kind, t);
1943 0 : goto error;
1944 : }
1945 :
1946 134 : if (!kind_info->from_serialized_name(&name, &key))
1947 : {
1948 : /* skip over data for entry we don't care about */
1949 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1950 : {
1951 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1952 : NameStr(name), kind, t);
1953 0 : goto error;
1954 : }
1955 :
1956 2 : continue;
1957 : }
1958 :
1959 : Assert(key.kind == kind);
1960 : }
1961 :
1962 : /*
1963 : * This intentionally doesn't use pgstat_get_entry_ref() -
1964 : * putting all stats into checkpointer's
1965 : * pgStatEntryRefHash would be wasted effort and memory.
1966 : */
1967 403544 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1968 :
1969 : /* don't allow duplicate entries */
1970 403544 : if (found)
1971 : {
1972 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
1973 0 : elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
1974 : key.kind, key.dboid,
1975 : key.objid, t);
1976 0 : goto error;
1977 : }
1978 :
1979 403544 : header = pgstat_init_entry(key.kind, p);
1980 403544 : dshash_release_lock(pgStatLocal.shared_hash, p);
1981 403544 : if (header == NULL)
1982 : {
1983 : /*
1984 : * It would be tempting to switch this ERROR to a
1985 : * WARNING, but it would mean that all the statistics
1986 : * are discarded when the environment fails on OOM.
1987 : */
1988 0 : elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
1989 : key.kind, key.dboid,
1990 : key.objid, t);
1991 : }
1992 :
1993 403544 : if (!read_chunk(fpin,
1994 : pgstat_get_entry_data(key.kind, header),
1995 : pgstat_get_entry_len(key.kind)))
1996 : {
1997 0 : elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
1998 : key.kind, key.dboid,
1999 : key.objid, t);
2000 0 : goto error;
2001 : }
2002 :
2003 403544 : break;
2004 : }
2005 1438 : case PGSTAT_FILE_ENTRY_END:
2006 :
2007 : /*
2008 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2009 : * file
2010 : */
2011 1438 : if (fgetc(fpin) != EOF)
2012 : {
2013 2 : elog(WARNING, "could not read end-of-file");
2014 2 : goto error;
2015 : }
2016 :
2017 1436 : goto done;
2018 :
2019 0 : default:
2020 0 : elog(WARNING, "could not read entry of type %c", t);
2021 0 : goto error;
2022 : }
2023 : }
2024 :
2025 1442 : done:
2026 1442 : FreeFile(fpin);
2027 :
2028 1442 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2029 1442 : unlink(statfile);
2030 :
2031 1442 : return;
2032 :
2033 6 : error:
2034 6 : ereport(LOG,
2035 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2036 :
2037 6 : pgstat_reset_after_failure();
2038 :
2039 6 : goto done;
2040 : }
2041 :
2042 : /*
2043 : * Helper to reset / drop stats after a crash or after restoring stats from
2044 : * disk failed, potentially after already loading parts.
2045 : */
2046 : static void
2047 458 : pgstat_reset_after_failure(void)
2048 : {
2049 458 : TimestampTz ts = GetCurrentTimestamp();
2050 :
2051 : /* reset fixed-numbered stats */
2052 15114 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2053 : {
2054 14656 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2055 :
2056 14656 : if (!kind_info || !kind_info->fixed_amount)
2057 11906 : continue;
2058 :
2059 2750 : kind_info->reset_all_cb(ts);
2060 : }
2061 :
2062 : /* and drop variable-numbered ones */
2063 458 : pgstat_drop_all_entries();
2064 458 : }
2065 :
2066 : /*
2067 : * GUC assign_hook for stats_fetch_consistency.
2068 : */
2069 : void
2070 7610 : assign_stats_fetch_consistency(int newval, void *extra)
2071 : {
2072 : /*
2073 : * Changing this value in a transaction may cause snapshot state
2074 : * inconsistencies, so force a clear of the current snapshot on the next
2075 : * snapshot build attempt.
2076 : */
2077 7610 : if (pgstat_fetch_consistency != newval)
2078 5142 : force_stats_snapshot_clear = true;
2079 7610 : }
|