Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_relation.c
87 : * - pgstat_replslot.c
88 : * - pgstat_slru.c
89 : * - pgstat_subscription.c
90 : * - pgstat_wal.c
91 : *
92 : * Whenever possible infrastructure files should not contain code related to
93 : * specific kinds of stats.
94 : *
95 : *
96 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
97 : *
98 : * IDENTIFICATION
99 : * src/backend/utils/activity/pgstat.c
100 : * ----------
101 : */
102 : #include "postgres.h"
103 :
104 : #include <unistd.h>
105 :
106 : #include "access/xact.h"
107 : #include "access/xlog.h"
108 : #include "lib/dshash.h"
109 : #include "pgstat.h"
110 : #include "storage/fd.h"
111 : #include "storage/ipc.h"
112 : #include "storage/lwlock.h"
113 : #include "utils/guc_hooks.h"
114 : #include "utils/memutils.h"
115 : #include "utils/pgstat_internal.h"
116 : #include "utils/timestamp.h"
117 :
118 :
119 : /* ----------
120 : * Timer definitions.
121 : *
122 : * In milliseconds.
123 : * ----------
124 : */
125 :
126 : /* minimum interval non-forced stats flushes.*/
127 : #define PGSTAT_MIN_INTERVAL 1000
128 : /* how long until to block flushing pending stats updates */
129 : #define PGSTAT_MAX_INTERVAL 60000
130 : /* when to call pgstat_report_stat() again, even when idle */
131 : #define PGSTAT_IDLE_INTERVAL 10000
132 :
133 : /* ----------
134 : * Initial size hints for the hash tables used in statistics.
135 : * ----------
136 : */
137 :
138 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
139 :
140 : /* ---------
141 : * Identifiers in stats file.
142 : * ---------
143 : */
144 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
145 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
146 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
147 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
148 : * PgStat_HashKey */
149 :
150 : /* hash table for statistics snapshots entry */
151 : typedef struct PgStat_SnapshotEntry
152 : {
153 : PgStat_HashKey key;
154 : char status; /* for simplehash use */
155 : void *data; /* the stats data itself */
156 : } PgStat_SnapshotEntry;
157 :
158 :
159 : /* ----------
160 : * Backend-local Hash Table Definitions
161 : * ----------
162 : */
163 :
164 : /* for stats snapshot entries */
165 : #define SH_PREFIX pgstat_snapshot
166 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
167 : #define SH_KEY_TYPE PgStat_HashKey
168 : #define SH_KEY key
169 : #define SH_HASH_KEY(tb, key) \
170 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
171 : #define SH_EQUAL(tb, a, b) \
172 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
173 : #define SH_SCOPE static inline
174 : #define SH_DEFINE
175 : #define SH_DECLARE
176 : #include "lib/simplehash.h"
177 :
178 :
179 : /* ----------
180 : * Local function forward declarations
181 : * ----------
182 : */
183 :
184 : static void pgstat_write_statsfile(XLogRecPtr redo);
185 : static void pgstat_read_statsfile(XLogRecPtr redo);
186 :
187 : static void pgstat_init_snapshot_fixed(void);
188 :
189 : static void pgstat_reset_after_failure(void);
190 :
191 : static bool pgstat_flush_pending_entries(bool nowait);
192 :
193 : static void pgstat_prep_snapshot(void);
194 : static void pgstat_build_snapshot(void);
195 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
196 :
197 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
198 :
199 :
200 : /* ----------
201 : * GUC parameters
202 : * ----------
203 : */
204 :
205 : bool pgstat_track_counts = false;
206 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
207 :
208 :
209 : /* ----------
210 : * state shared with pgstat_*.c
211 : * ----------
212 : */
213 :
214 : PgStat_LocalState pgStatLocal;
215 :
216 :
217 : /* ----------
218 : * Local data
219 : *
220 : * NB: There should be only variables related to stats infrastructure here,
221 : * not for specific kinds of stats.
222 : * ----------
223 : */
224 :
225 : /*
226 : * Memory contexts containing the pgStatEntryRefHash table, the
227 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
228 : * easier to track / attribute memory usage.
229 : */
230 :
231 : static MemoryContext pgStatPendingContext = NULL;
232 :
233 : /*
234 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
235 : *
236 : * Newly pending entries should only ever be added to the end of the list,
237 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
238 : */
239 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
240 :
241 :
242 : /*
243 : * Force the next stats flush to happen regardless of
244 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
245 : */
246 : static bool pgStatForceNextFlush = false;
247 :
248 : /*
249 : * Force-clear existing snapshot before next use when stats_fetch_consistency
250 : * is changed.
251 : */
252 : static bool force_stats_snapshot_clear = false;
253 :
254 :
255 : /*
256 : * For assertions that check pgstat is not used before initialization / after
257 : * shutdown.
258 : */
259 : #ifdef USE_ASSERT_CHECKING
260 : static bool pgstat_is_initialized = false;
261 : static bool pgstat_is_shutdown = false;
262 : #endif
263 :
264 :
265 : /*
266 : * The different kinds of built-in statistics.
267 : *
268 : * If reasonably possible, handling specific to one kind of stats should go
269 : * through this abstraction, rather than making more of pgstat.c aware.
270 : *
271 : * See comments for struct PgStat_KindInfo for details about the individual
272 : * fields.
273 : *
274 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
275 : * seem to be a great way of doing that, given the split across multiple
276 : * files.
277 : */
278 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
279 :
280 : /* stats kinds for variable-numbered objects */
281 :
282 : [PGSTAT_KIND_DATABASE] = {
283 : .name = "database",
284 :
285 : .fixed_amount = false,
286 : .write_to_file = true,
287 : /* so pg_stat_database entries can be seen in all databases */
288 : .accessed_across_databases = true,
289 :
290 : .shared_size = sizeof(PgStatShared_Database),
291 : .shared_data_off = offsetof(PgStatShared_Database, stats),
292 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
293 : .pending_size = sizeof(PgStat_StatDBEntry),
294 :
295 : .flush_pending_cb = pgstat_database_flush_cb,
296 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
297 : },
298 :
299 : [PGSTAT_KIND_RELATION] = {
300 : .name = "relation",
301 :
302 : .fixed_amount = false,
303 : .write_to_file = true,
304 :
305 : .shared_size = sizeof(PgStatShared_Relation),
306 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
307 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
308 : .pending_size = sizeof(PgStat_TableStatus),
309 :
310 : .flush_pending_cb = pgstat_relation_flush_cb,
311 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
312 : },
313 :
314 : [PGSTAT_KIND_FUNCTION] = {
315 : .name = "function",
316 :
317 : .fixed_amount = false,
318 : .write_to_file = true,
319 :
320 : .shared_size = sizeof(PgStatShared_Function),
321 : .shared_data_off = offsetof(PgStatShared_Function, stats),
322 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
323 : .pending_size = sizeof(PgStat_FunctionCounts),
324 :
325 : .flush_pending_cb = pgstat_function_flush_cb,
326 : },
327 :
328 : [PGSTAT_KIND_REPLSLOT] = {
329 : .name = "replslot",
330 :
331 : .fixed_amount = false,
332 : .write_to_file = true,
333 :
334 : .accessed_across_databases = true,
335 :
336 : .shared_size = sizeof(PgStatShared_ReplSlot),
337 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
338 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
339 :
340 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
341 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
342 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
343 : },
344 :
345 : [PGSTAT_KIND_SUBSCRIPTION] = {
346 : .name = "subscription",
347 :
348 : .fixed_amount = false,
349 : .write_to_file = true,
350 : /* so pg_stat_subscription_stats entries can be seen in all databases */
351 : .accessed_across_databases = true,
352 :
353 : .shared_size = sizeof(PgStatShared_Subscription),
354 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
355 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
356 : .pending_size = sizeof(PgStat_BackendSubEntry),
357 :
358 : .flush_pending_cb = pgstat_subscription_flush_cb,
359 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
360 : },
361 :
362 : [PGSTAT_KIND_BACKEND] = {
363 : .name = "backend",
364 :
365 : .fixed_amount = false,
366 : .write_to_file = false,
367 :
368 : .accessed_across_databases = true,
369 :
370 : .shared_size = sizeof(PgStatShared_Backend),
371 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
372 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
373 :
374 : .have_static_pending_cb = pgstat_backend_have_pending_cb,
375 : .flush_static_cb = pgstat_backend_flush_cb,
376 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
377 : },
378 :
379 : /* stats for fixed-numbered (mostly 1) objects */
380 :
381 : [PGSTAT_KIND_ARCHIVER] = {
382 : .name = "archiver",
383 :
384 : .fixed_amount = true,
385 : .write_to_file = true,
386 :
387 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
388 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
389 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
390 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
391 :
392 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
393 : .reset_all_cb = pgstat_archiver_reset_all_cb,
394 : .snapshot_cb = pgstat_archiver_snapshot_cb,
395 : },
396 :
397 : [PGSTAT_KIND_BGWRITER] = {
398 : .name = "bgwriter",
399 :
400 : .fixed_amount = true,
401 : .write_to_file = true,
402 :
403 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
404 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
405 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
406 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
407 :
408 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
409 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
410 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
411 : },
412 :
413 : [PGSTAT_KIND_CHECKPOINTER] = {
414 : .name = "checkpointer",
415 :
416 : .fixed_amount = true,
417 : .write_to_file = true,
418 :
419 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
420 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
421 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
422 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
423 :
424 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
425 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
426 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
427 : },
428 :
429 : [PGSTAT_KIND_IO] = {
430 : .name = "io",
431 :
432 : .fixed_amount = true,
433 : .write_to_file = true,
434 :
435 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
436 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
437 : .shared_data_off = offsetof(PgStatShared_IO, stats),
438 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
439 :
440 : .flush_static_cb = pgstat_io_flush_cb,
441 : .have_static_pending_cb = pgstat_io_have_pending_cb,
442 : .init_shmem_cb = pgstat_io_init_shmem_cb,
443 : .reset_all_cb = pgstat_io_reset_all_cb,
444 : .snapshot_cb = pgstat_io_snapshot_cb,
445 : },
446 :
447 : [PGSTAT_KIND_SLRU] = {
448 : .name = "slru",
449 :
450 : .fixed_amount = true,
451 : .write_to_file = true,
452 :
453 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
454 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
455 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
456 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
457 :
458 : .flush_static_cb = pgstat_slru_flush_cb,
459 : .have_static_pending_cb = pgstat_slru_have_pending_cb,
460 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
461 : .reset_all_cb = pgstat_slru_reset_all_cb,
462 : .snapshot_cb = pgstat_slru_snapshot_cb,
463 : },
464 :
465 : [PGSTAT_KIND_WAL] = {
466 : .name = "wal",
467 :
468 : .fixed_amount = true,
469 : .write_to_file = true,
470 :
471 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
472 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
473 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
474 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
475 :
476 : .init_backend_cb = pgstat_wal_init_backend_cb,
477 : .flush_static_cb = pgstat_wal_flush_cb,
478 : .have_static_pending_cb = pgstat_wal_have_pending_cb,
479 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
480 : .reset_all_cb = pgstat_wal_reset_all_cb,
481 : .snapshot_cb = pgstat_wal_snapshot_cb,
482 : },
483 : };
484 :
485 : /*
486 : * Information about custom statistics kinds.
487 : *
488 : * These are saved in a different array than the built-in kinds to save
489 : * in clarity with the initializations.
490 : *
491 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
492 : */
493 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
494 :
495 : /* ------------------------------------------------------------
496 : * Functions managing the state of the stats system for all backends.
497 : * ------------------------------------------------------------
498 : */
499 :
500 : /*
501 : * Read on-disk stats into memory at server start.
502 : *
503 : * Should only be called by the startup process or in single user mode.
504 : */
505 : void
506 1326 : pgstat_restore_stats(XLogRecPtr redo)
507 : {
508 1326 : pgstat_read_statsfile(redo);
509 1326 : }
510 :
511 : /*
512 : * Remove the stats file. This is currently used only if WAL recovery is
513 : * needed after a crash.
514 : *
515 : * Should only be called by the startup process or in single user mode.
516 : */
517 : void
518 342 : pgstat_discard_stats(void)
519 : {
520 : int ret;
521 :
522 : /* NB: this needs to be done even in single user mode */
523 :
524 342 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
525 342 : if (ret != 0)
526 : {
527 340 : if (errno == ENOENT)
528 340 : elog(DEBUG2,
529 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
530 : PGSTAT_STAT_PERMANENT_FILENAME);
531 : else
532 0 : ereport(LOG,
533 : (errcode_for_file_access(),
534 : errmsg("could not unlink permanent statistics file \"%s\": %m",
535 : PGSTAT_STAT_PERMANENT_FILENAME)));
536 : }
537 : else
538 : {
539 2 : ereport(DEBUG2,
540 : (errcode_for_file_access(),
541 : errmsg_internal("unlinked permanent statistics file \"%s\"",
542 : PGSTAT_STAT_PERMANENT_FILENAME)));
543 : }
544 :
545 : /*
546 : * Reset stats contents. This will set reset timestamps of fixed-numbered
547 : * stats to the current time (no variable stats exist).
548 : */
549 342 : pgstat_reset_after_failure();
550 342 : }
551 :
552 : /*
553 : * pgstat_before_server_shutdown() needs to be called by exactly one process
554 : * during regular server shutdowns. Otherwise all stats will be lost.
555 : *
556 : * We currently only write out stats for proc_exit(0). We might want to change
557 : * that at some point... But right now pgstat_discard_stats() would be called
558 : * during the start after a disorderly shutdown, anyway.
559 : */
560 : void
561 1108 : pgstat_before_server_shutdown(int code, Datum arg)
562 : {
563 : Assert(pgStatLocal.shmem != NULL);
564 : Assert(!pgStatLocal.shmem->is_shutdown);
565 :
566 : /*
567 : * Stats should only be reported after pgstat_initialize() and before
568 : * pgstat_shutdown(). This is a convenient point to catch most violations
569 : * of this rule.
570 : */
571 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
572 :
573 : /* flush out our own pending changes before writing out */
574 1108 : pgstat_report_stat(true);
575 :
576 : /*
577 : * Only write out file during normal shutdown. Don't even signal that
578 : * we've shutdown during irregular shutdowns, because the shutdown
579 : * sequence isn't coordinated to ensure this backend shuts down last.
580 : */
581 1108 : if (code == 0)
582 : {
583 1098 : pgStatLocal.shmem->is_shutdown = true;
584 1098 : pgstat_write_statsfile(GetRedoRecPtr());
585 : }
586 1108 : }
587 :
588 :
589 : /* ------------------------------------------------------------
590 : * Backend initialization / shutdown functions
591 : * ------------------------------------------------------------
592 : */
593 :
594 : /*
595 : * Shut down a single backend's statistics reporting at process exit.
596 : *
597 : * Flush out any remaining statistics counts. Without this, operations
598 : * triggered during backend exit (such as temp table deletions) won't be
599 : * counted.
600 : */
601 : static void
602 35032 : pgstat_shutdown_hook(int code, Datum arg)
603 : {
604 : Assert(!pgstat_is_shutdown);
605 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
606 :
607 : /*
608 : * If we got as far as discovering our own database ID, we can flush out
609 : * what we did so far. Otherwise, we'd be reporting an invalid database
610 : * ID, so forget it. (This means that accesses to pg_database during
611 : * failed backend starts might never get counted.)
612 : */
613 35032 : if (OidIsValid(MyDatabaseId))
614 27968 : pgstat_report_disconnect(MyDatabaseId);
615 :
616 35032 : pgstat_report_stat(true);
617 :
618 : /* there shouldn't be any pending changes left */
619 : Assert(dlist_is_empty(&pgStatPending));
620 35032 : dlist_init(&pgStatPending);
621 :
622 : /* drop the backend stats entry */
623 35032 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
624 0 : pgstat_request_entry_refs_gc();
625 :
626 35032 : pgstat_detach_shmem();
627 :
628 : #ifdef USE_ASSERT_CHECKING
629 : pgstat_is_shutdown = true;
630 : #endif
631 35032 : }
632 :
633 : /*
634 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
635 : * BaseInit().
636 : *
637 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
638 : */
639 : void
640 35032 : pgstat_initialize(void)
641 : {
642 : Assert(!pgstat_is_initialized);
643 :
644 35032 : pgstat_attach_shmem();
645 :
646 35032 : pgstat_init_snapshot_fixed();
647 :
648 : /* Backend initialization callbacks */
649 9003224 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
650 : {
651 8968192 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
652 :
653 8968192 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
654 8933160 : continue;
655 :
656 35032 : kind_info->init_backend_cb();
657 : }
658 :
659 : /* Set up a process-exit hook to clean up */
660 35032 : before_shmem_exit(pgstat_shutdown_hook, 0);
661 :
662 : #ifdef USE_ASSERT_CHECKING
663 : pgstat_is_initialized = true;
664 : #endif
665 35032 : }
666 :
667 :
668 : /* ------------------------------------------------------------
669 : * Public functions used by backends follow
670 : * ------------------------------------------------------------
671 : */
672 :
673 : /*
674 : * Must be called by processes that performs DML: tcop/postgres.c, logical
675 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
676 : * shared memory.
677 : *
678 : * Unless called with 'force', pending stats updates are flushed happen once
679 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
680 : * block on lock acquisition, except if stats updates have been pending for
681 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
682 : *
683 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
684 : * suggested idle timeout is returned. Currently this is always
685 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
686 : * a timeout after which to call pgstat_report_stat(true), but are not
687 : * required to do so.
688 : *
689 : * Note that this is called only when not within a transaction, so it is fair
690 : * to use transaction stop time as an approximation of current time.
691 : */
692 : long
693 566374 : pgstat_report_stat(bool force)
694 : {
695 : static TimestampTz pending_since = 0;
696 : static TimestampTz last_flush = 0;
697 : bool partial_flush;
698 : TimestampTz now;
699 : bool nowait;
700 :
701 : pgstat_assert_is_up();
702 : Assert(!IsTransactionOrTransactionBlock());
703 :
704 : /* "absorb" the forced flush even if there's nothing to flush */
705 566374 : if (pgStatForceNextFlush)
706 : {
707 422 : force = true;
708 422 : pgStatForceNextFlush = false;
709 : }
710 :
711 : /* Don't expend a clock check if nothing to do */
712 566374 : if (dlist_is_empty(&pgStatPending))
713 : {
714 13270 : bool do_flush = false;
715 :
716 : /* Check for pending stats */
717 2331032 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
718 : {
719 2322114 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
720 :
721 2322114 : if (!kind_info)
722 2175960 : continue;
723 146154 : if (!kind_info->have_static_pending_cb)
724 102226 : continue;
725 :
726 43928 : if (kind_info->have_static_pending_cb())
727 : {
728 4352 : do_flush = true;
729 4352 : break;
730 : }
731 : }
732 :
733 13270 : if (!do_flush)
734 : {
735 : Assert(pending_since == 0);
736 8918 : return 0;
737 : }
738 : }
739 :
740 : /*
741 : * There should never be stats to report once stats are shut down. Can't
742 : * assert that before the checks above, as there is an unconditional
743 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
744 : * the process that ran pgstat_before_server_shutdown() will still call.
745 : */
746 : Assert(!pgStatLocal.shmem->is_shutdown);
747 :
748 557456 : if (force)
749 : {
750 : /*
751 : * Stats reports are forced either when it's been too long since stats
752 : * have been reported or in processes that force stats reporting to
753 : * happen at specific points (including shutdown). In the former case
754 : * the transaction stop time might be quite old, in the latter it
755 : * would never get cleared.
756 : */
757 36238 : now = GetCurrentTimestamp();
758 : }
759 : else
760 : {
761 521218 : now = GetCurrentTransactionStopTimestamp();
762 :
763 994854 : if (pending_since > 0 &&
764 473636 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
765 : {
766 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
767 0 : force = true;
768 : }
769 521218 : else if (last_flush > 0 &&
770 498302 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
771 : {
772 : /* don't flush too frequently */
773 496510 : if (pending_since == 0)
774 24396 : pending_since = now;
775 :
776 496510 : return PGSTAT_IDLE_INTERVAL;
777 : }
778 : }
779 :
780 60946 : pgstat_update_dbstats(now);
781 :
782 : /* don't wait for lock acquisition when !force */
783 60946 : nowait = !force;
784 :
785 60946 : partial_flush = false;
786 :
787 : /* flush of variable-numbered stats tracked in pending entries list */
788 60946 : partial_flush |= pgstat_flush_pending_entries(nowait);
789 :
790 : /* flush of other stats kinds */
791 15663122 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
792 : {
793 15602176 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
794 :
795 15602176 : if (!kind_info)
796 14870476 : continue;
797 731700 : if (!kind_info->flush_static_cb)
798 487916 : continue;
799 :
800 243784 : partial_flush |= kind_info->flush_static_cb(nowait);
801 : }
802 :
803 60946 : last_flush = now;
804 :
805 : /*
806 : * If some of the pending stats could not be flushed due to lock
807 : * contention, let the caller know when to retry.
808 : */
809 60946 : if (partial_flush)
810 : {
811 : /* force should have prevented us from getting here */
812 : Assert(!force);
813 :
814 : /* remember since when stats have been pending */
815 6 : if (pending_since == 0)
816 6 : pending_since = now;
817 :
818 6 : return PGSTAT_IDLE_INTERVAL;
819 : }
820 :
821 60940 : pending_since = 0;
822 :
823 60940 : return 0;
824 : }
825 :
826 : /*
827 : * Force locally pending stats to be flushed during the next
828 : * pgstat_report_stat() call. This is useful for writing tests.
829 : */
830 : void
831 422 : pgstat_force_next_flush(void)
832 : {
833 422 : pgStatForceNextFlush = true;
834 422 : }
835 :
836 : /*
837 : * Only for use by pgstat_reset_counters()
838 : */
839 : static bool
840 22578 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
841 : {
842 22578 : return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
843 : }
844 :
845 : /*
846 : * Reset counters for our database.
847 : *
848 : * Permission checking for this function is managed through the normal
849 : * GRANT system.
850 : */
851 : void
852 26 : pgstat_reset_counters(void)
853 : {
854 26 : TimestampTz ts = GetCurrentTimestamp();
855 :
856 26 : pgstat_reset_matching_entries(match_db_entries,
857 : ObjectIdGetDatum(MyDatabaseId),
858 : ts);
859 26 : }
860 :
861 : /*
862 : * Reset a single variable-numbered entry.
863 : *
864 : * If the stats kind is within a database, also reset the database's
865 : * stat_reset_timestamp.
866 : *
867 : * Permission checking for this function is managed through the normal
868 : * GRANT system.
869 : */
870 : void
871 44 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
872 : {
873 44 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
874 44 : TimestampTz ts = GetCurrentTimestamp();
875 :
876 : /* not needed atm, and doesn't make sense with the current signature */
877 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
878 :
879 : /* reset the "single counter" */
880 44 : pgstat_reset_entry(kind, dboid, objid, ts);
881 :
882 44 : if (!kind_info->accessed_across_databases)
883 16 : pgstat_reset_database_timestamp(dboid, ts);
884 44 : }
885 :
886 : /*
887 : * Reset stats for all entries of a kind.
888 : *
889 : * Permission checking for this function is managed through the normal
890 : * GRANT system.
891 : */
892 : void
893 54 : pgstat_reset_of_kind(PgStat_Kind kind)
894 : {
895 54 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
896 54 : TimestampTz ts = GetCurrentTimestamp();
897 :
898 54 : if (kind_info->fixed_amount)
899 46 : kind_info->reset_all_cb(ts);
900 : else
901 8 : pgstat_reset_entries_of_kind(kind, ts);
902 54 : }
903 :
904 :
905 : /* ------------------------------------------------------------
906 : * Fetching of stats
907 : * ------------------------------------------------------------
908 : */
909 :
910 : /*
911 : * Discard any data collected in the current transaction. Any subsequent
912 : * request will cause new snapshots to be read.
913 : *
914 : * This is also invoked during transaction commit or abort to discard
915 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
916 : * cause this routine to be called.
917 : */
918 : void
919 802936 : pgstat_clear_snapshot(void)
920 : {
921 : pgstat_assert_is_up();
922 :
923 802936 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
924 : sizeof(pgStatLocal.snapshot.fixed_valid));
925 802936 : memset(&pgStatLocal.snapshot.custom_valid, 0,
926 : sizeof(pgStatLocal.snapshot.custom_valid));
927 802936 : pgStatLocal.snapshot.stats = NULL;
928 802936 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
929 :
930 : /* Release memory, if any was allocated */
931 802936 : if (pgStatLocal.snapshot.context)
932 : {
933 1002 : MemoryContextDelete(pgStatLocal.snapshot.context);
934 :
935 : /* Reset variables */
936 1002 : pgStatLocal.snapshot.context = NULL;
937 : }
938 :
939 : /*
940 : * Historically the backend_status.c facilities lived in this file, and
941 : * were reset with the same function. For now keep it that way, and
942 : * forward the reset request.
943 : */
944 802936 : pgstat_clear_backend_activity_snapshot();
945 :
946 : /* Reset this flag, as it may be possible that a cleanup was forced. */
947 802936 : force_stats_snapshot_clear = false;
948 802936 : }
949 :
950 : void *
951 371342 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
952 : {
953 : PgStat_HashKey key;
954 : PgStat_EntryRef *entry_ref;
955 : void *stats_data;
956 371342 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
957 :
958 : /* should be called from backends */
959 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
960 : Assert(!kind_info->fixed_amount);
961 :
962 371342 : pgstat_prep_snapshot();
963 :
964 : /* clear padding */
965 371342 : memset(&key, 0, sizeof(struct PgStat_HashKey));
966 :
967 371342 : key.kind = kind;
968 371342 : key.dboid = dboid;
969 371342 : key.objid = objid;
970 :
971 : /* if we need to build a full snapshot, do so */
972 371342 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
973 460 : pgstat_build_snapshot();
974 :
975 : /* if caching is desired, look up in cache */
976 371342 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
977 : {
978 9478 : PgStat_SnapshotEntry *entry = NULL;
979 :
980 9478 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
981 :
982 9478 : if (entry)
983 620 : return entry->data;
984 :
985 : /*
986 : * If we built a full snapshot and the key is not in
987 : * pgStatLocal.snapshot.stats, there are no matching stats.
988 : */
989 8858 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
990 28 : return NULL;
991 : }
992 :
993 370694 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
994 :
995 370694 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
996 :
997 370694 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
998 : {
999 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
1000 8654 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
1001 : {
1002 1816 : PgStat_SnapshotEntry *entry = NULL;
1003 : bool found;
1004 :
1005 1816 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1006 : Assert(!found);
1007 1816 : entry->data = NULL;
1008 : }
1009 8654 : return NULL;
1010 : }
1011 :
1012 : /*
1013 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
1014 : * otherwise we could quickly end up with a fair bit of memory used due to
1015 : * repeated accesses.
1016 : */
1017 362040 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1018 355026 : stats_data = palloc(kind_info->shared_data_len);
1019 : else
1020 7014 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1021 7014 : kind_info->shared_data_len);
1022 :
1023 362040 : pgstat_lock_entry_shared(entry_ref, false);
1024 724080 : memcpy(stats_data,
1025 362040 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1026 362040 : kind_info->shared_data_len);
1027 362040 : pgstat_unlock_entry(entry_ref);
1028 :
1029 362040 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1030 : {
1031 7014 : PgStat_SnapshotEntry *entry = NULL;
1032 : bool found;
1033 :
1034 7014 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1035 7014 : entry->data = stats_data;
1036 : }
1037 :
1038 362040 : return stats_data;
1039 : }
1040 :
1041 : /*
1042 : * If a stats snapshot has been taken, return the timestamp at which that was
1043 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1044 : * false.
1045 : */
1046 : TimestampTz
1047 60 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1048 : {
1049 60 : if (force_stats_snapshot_clear)
1050 18 : pgstat_clear_snapshot();
1051 :
1052 60 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1053 : {
1054 24 : *have_snapshot = true;
1055 24 : return pgStatLocal.snapshot.snapshot_timestamp;
1056 : }
1057 :
1058 36 : *have_snapshot = false;
1059 :
1060 36 : return 0;
1061 : }
1062 :
1063 : bool
1064 160 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1065 : {
1066 : /* fixed-numbered stats always exist */
1067 160 : if (pgstat_get_kind_info(kind)->fixed_amount)
1068 12 : return true;
1069 :
1070 148 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1071 : }
1072 :
1073 : /*
1074 : * Ensure snapshot for fixed-numbered 'kind' exists.
1075 : *
1076 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1077 : * massaging the data into the desired format.
1078 : */
1079 : void
1080 452 : pgstat_snapshot_fixed(PgStat_Kind kind)
1081 : {
1082 : Assert(pgstat_is_kind_valid(kind));
1083 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1084 :
1085 452 : if (force_stats_snapshot_clear)
1086 0 : pgstat_clear_snapshot();
1087 :
1088 452 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1089 24 : pgstat_build_snapshot();
1090 : else
1091 428 : pgstat_build_snapshot_fixed(kind);
1092 :
1093 452 : if (pgstat_is_kind_builtin(kind))
1094 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1095 8 : else if (pgstat_is_kind_custom(kind))
1096 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1097 452 : }
1098 :
1099 : static void
1100 35032 : pgstat_init_snapshot_fixed(void)
1101 : {
1102 : /*
1103 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1104 : * stats kinds.
1105 : */
1106 4554160 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1107 : {
1108 4519128 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1109 :
1110 4519128 : if (!kind_info || !kind_info->fixed_amount)
1111 4519018 : continue;
1112 :
1113 110 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1114 110 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1115 : }
1116 35032 : }
1117 :
1118 : static void
1119 371394 : pgstat_prep_snapshot(void)
1120 : {
1121 371394 : if (force_stats_snapshot_clear)
1122 18 : pgstat_clear_snapshot();
1123 :
1124 371394 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1125 9530 : pgStatLocal.snapshot.stats != NULL)
1126 370392 : return;
1127 :
1128 1002 : if (!pgStatLocal.snapshot.context)
1129 1002 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1130 : "PgStat Snapshot",
1131 : ALLOCSET_SMALL_SIZES);
1132 :
1133 1002 : pgStatLocal.snapshot.stats =
1134 1002 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1135 : PGSTAT_SNAPSHOT_HASH_SIZE,
1136 : NULL);
1137 : }
1138 :
1139 : static void
1140 484 : pgstat_build_snapshot(void)
1141 : {
1142 : dshash_seq_status hstat;
1143 : PgStatShared_HashEntry *p;
1144 :
1145 : /* should only be called when we need a snapshot */
1146 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1147 :
1148 : /* snapshot already built */
1149 484 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1150 432 : return;
1151 :
1152 52 : pgstat_prep_snapshot();
1153 :
1154 : Assert(pgStatLocal.snapshot.stats->members == 0);
1155 :
1156 52 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1157 :
1158 : /*
1159 : * Snapshot all variable stats.
1160 : */
1161 52 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1162 54048 : while ((p = dshash_seq_next(&hstat)) != NULL)
1163 : {
1164 53996 : PgStat_Kind kind = p->key.kind;
1165 53996 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1166 : bool found;
1167 : PgStat_SnapshotEntry *entry;
1168 : PgStatShared_Common *stats_data;
1169 :
1170 : /*
1171 : * Check if the stats object should be included in the snapshot.
1172 : * Unless the stats kind can be accessed from all databases (e.g.,
1173 : * database stats themselves), we only include stats for the current
1174 : * database or objects not associated with a database (e.g. shared
1175 : * relations).
1176 : */
1177 53996 : if (p->key.dboid != MyDatabaseId &&
1178 15820 : p->key.dboid != InvalidOid &&
1179 12872 : !kind_info->accessed_across_databases)
1180 12972 : continue;
1181 :
1182 41228 : if (p->dropped)
1183 204 : continue;
1184 :
1185 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1186 :
1187 41024 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1188 : Assert(stats_data);
1189 :
1190 41024 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1191 : Assert(!found);
1192 :
1193 82048 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1194 41024 : kind_info->shared_size);
1195 :
1196 : /*
1197 : * Acquire the LWLock directly instead of using
1198 : * pg_stat_lock_entry_shared() which requires a reference.
1199 : */
1200 41024 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1201 82048 : memcpy(entry->data,
1202 41024 : pgstat_get_entry_data(kind, stats_data),
1203 41024 : kind_info->shared_size);
1204 41024 : LWLockRelease(&stats_data->lock);
1205 : }
1206 52 : dshash_seq_term(&hstat);
1207 :
1208 : /*
1209 : * Build snapshot of all fixed-numbered stats.
1210 : */
1211 13364 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1212 : {
1213 13312 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1214 :
1215 13312 : if (!kind_info)
1216 12688 : continue;
1217 624 : if (!kind_info->fixed_amount)
1218 : {
1219 : Assert(kind_info->snapshot_cb == NULL);
1220 312 : continue;
1221 : }
1222 :
1223 312 : pgstat_build_snapshot_fixed(kind);
1224 : }
1225 :
1226 52 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1227 : }
1228 :
1229 : static void
1230 7334 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1231 : {
1232 7334 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1233 : int idx;
1234 : bool *valid;
1235 :
1236 : /* Position in fixed_valid or custom_valid */
1237 7334 : if (pgstat_is_kind_builtin(kind))
1238 : {
1239 7320 : idx = kind;
1240 7320 : valid = pgStatLocal.snapshot.fixed_valid;
1241 : }
1242 : else
1243 : {
1244 14 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1245 14 : valid = pgStatLocal.snapshot.custom_valid;
1246 : }
1247 :
1248 : Assert(kind_info->fixed_amount);
1249 : Assert(kind_info->snapshot_cb != NULL);
1250 :
1251 7334 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1252 : {
1253 : /* rebuild every time */
1254 6624 : valid[idx] = false;
1255 : }
1256 710 : else if (valid[idx])
1257 : {
1258 : /* in snapshot mode we shouldn't get called again */
1259 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1260 12 : return;
1261 : }
1262 :
1263 : Assert(!valid[idx]);
1264 :
1265 7322 : kind_info->snapshot_cb();
1266 :
1267 : Assert(!valid[idx]);
1268 7322 : valid[idx] = true;
1269 : }
1270 :
1271 :
1272 : /* ------------------------------------------------------------
1273 : * Backend-local pending stats infrastructure
1274 : * ------------------------------------------------------------
1275 : */
1276 :
1277 : /*
1278 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1279 : * stats if not already done.
1280 : *
1281 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1282 : * created, false otherwise.
1283 : */
1284 : PgStat_EntryRef *
1285 3306796 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1286 : {
1287 : PgStat_EntryRef *entry_ref;
1288 :
1289 : /* need to be able to flush out */
1290 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1291 :
1292 3306796 : if (unlikely(!pgStatPendingContext))
1293 : {
1294 30438 : pgStatPendingContext =
1295 30438 : AllocSetContextCreate(TopMemoryContext,
1296 : "PgStat Pending",
1297 : ALLOCSET_SMALL_SIZES);
1298 : }
1299 :
1300 3306796 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1301 : true, created_entry);
1302 :
1303 3306796 : if (entry_ref->pending == NULL)
1304 : {
1305 1691912 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1306 :
1307 : Assert(entrysize != (size_t) -1);
1308 :
1309 1691912 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1310 1691912 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1311 : }
1312 :
1313 3306796 : return entry_ref;
1314 : }
1315 :
1316 : /*
1317 : * Return an existing stats entry, or NULL.
1318 : *
1319 : * This should only be used for helper function for pgstatfuncs.c - outside of
1320 : * that it shouldn't be needed.
1321 : */
1322 : PgStat_EntryRef *
1323 84 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1324 : {
1325 : PgStat_EntryRef *entry_ref;
1326 :
1327 84 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1328 :
1329 84 : if (entry_ref == NULL || entry_ref->pending == NULL)
1330 30 : return NULL;
1331 :
1332 54 : return entry_ref;
1333 : }
1334 :
1335 : void
1336 1691912 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1337 : {
1338 1691912 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1339 1691912 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1340 1691912 : void *pending_data = entry_ref->pending;
1341 :
1342 : Assert(pending_data != NULL);
1343 : /* !fixed_amount stats should be handled explicitly */
1344 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1345 :
1346 1691912 : if (kind_info->delete_pending_cb)
1347 1592610 : kind_info->delete_pending_cb(entry_ref);
1348 :
1349 1691912 : pfree(pending_data);
1350 1691912 : entry_ref->pending = NULL;
1351 :
1352 1691912 : dlist_delete(&entry_ref->pending_node);
1353 1691912 : }
1354 :
1355 : /*
1356 : * Flush out pending variable-numbered stats.
1357 : */
1358 : static bool
1359 60946 : pgstat_flush_pending_entries(bool nowait)
1360 : {
1361 60946 : bool have_pending = false;
1362 60946 : dlist_node *cur = NULL;
1363 :
1364 : /*
1365 : * Need to be a bit careful iterating over the list of pending entries.
1366 : * Processing a pending entry may queue further pending entries to the end
1367 : * of the list that we want to process, so a simple iteration won't do.
1368 : * Further complicating matters is that we want to delete the current
1369 : * entry in each iteration from the list if we flushed successfully.
1370 : *
1371 : * So we just keep track of the next pointer in each loop iteration.
1372 : */
1373 60946 : if (!dlist_is_empty(&pgStatPending))
1374 56962 : cur = dlist_head_node(&pgStatPending);
1375 :
1376 1689784 : while (cur)
1377 : {
1378 1628838 : PgStat_EntryRef *entry_ref =
1379 1628838 : dlist_container(PgStat_EntryRef, pending_node, cur);
1380 1628838 : PgStat_HashKey key = entry_ref->shared_entry->key;
1381 1628838 : PgStat_Kind kind = key.kind;
1382 1628838 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1383 : bool did_flush;
1384 : dlist_node *next;
1385 :
1386 : Assert(!kind_info->fixed_amount);
1387 : Assert(kind_info->flush_pending_cb != NULL);
1388 :
1389 : /* flush the stats, if possible */
1390 1628838 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1391 :
1392 : Assert(did_flush || nowait);
1393 :
1394 : /* determine next entry, before deleting the pending entry */
1395 1628838 : if (dlist_has_next(&pgStatPending, cur))
1396 1571876 : next = dlist_next_node(&pgStatPending, cur);
1397 : else
1398 56962 : next = NULL;
1399 :
1400 : /* if successfully flushed, remove entry */
1401 1628838 : if (did_flush)
1402 1628826 : pgstat_delete_pending_entry(entry_ref);
1403 : else
1404 12 : have_pending = true;
1405 :
1406 1628838 : cur = next;
1407 : }
1408 :
1409 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1410 :
1411 60946 : return have_pending;
1412 : }
1413 :
1414 :
1415 : /* ------------------------------------------------------------
1416 : * Helper / infrastructure functions
1417 : * ------------------------------------------------------------
1418 : */
1419 :
1420 : PgStat_Kind
1421 166 : pgstat_get_kind_from_str(char *kind_str)
1422 : {
1423 494 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1424 : {
1425 488 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1426 160 : return kind;
1427 : }
1428 :
1429 : /* Check the custom set of cumulative stats */
1430 6 : if (pgstat_kind_custom_infos)
1431 : {
1432 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1433 : {
1434 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1435 :
1436 0 : if (pgstat_kind_custom_infos[idx] &&
1437 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1438 0 : return kind;
1439 : }
1440 : }
1441 :
1442 6 : ereport(ERROR,
1443 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1444 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1445 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1446 : }
1447 :
1448 : static inline bool
1449 656786 : pgstat_is_kind_valid(PgStat_Kind kind)
1450 : {
1451 656786 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1452 : }
1453 :
1454 : const PgStat_KindInfo *
1455 41109352 : pgstat_get_kind_info(PgStat_Kind kind)
1456 : {
1457 41109352 : if (pgstat_is_kind_builtin(kind))
1458 9413156 : return &pgstat_kind_builtin_infos[kind];
1459 :
1460 31696196 : if (pgstat_is_kind_custom(kind))
1461 : {
1462 19224446 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1463 :
1464 19224446 : if (pgstat_kind_custom_infos == NULL ||
1465 58140 : pgstat_kind_custom_infos[idx] == NULL)
1466 19223456 : return NULL;
1467 990 : return pgstat_kind_custom_infos[idx];
1468 : }
1469 :
1470 12471750 : return NULL;
1471 : }
1472 :
1473 : /*
1474 : * Register a new stats kind.
1475 : *
1476 : * PgStat_Kinds must be globally unique across all extensions. Refer
1477 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1478 : * unique ID for your extension, to avoid conflicts with other extension
1479 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1480 : * needlessly reserving a new ID.
1481 : */
1482 : void
1483 16 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1484 : {
1485 16 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1486 :
1487 16 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1488 0 : ereport(ERROR,
1489 : (errmsg("custom cumulative statistics name is invalid"),
1490 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1491 :
1492 16 : if (!pgstat_is_kind_custom(kind))
1493 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1494 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1495 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1496 :
1497 16 : if (!process_shared_preload_libraries_in_progress)
1498 0 : ereport(ERROR,
1499 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1500 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1501 :
1502 : /*
1503 : * Check some data for fixed-numbered stats.
1504 : */
1505 16 : if (kind_info->fixed_amount)
1506 : {
1507 8 : if (kind_info->shared_size == 0)
1508 0 : ereport(ERROR,
1509 : (errmsg("custom cumulative statistics property is invalid"),
1510 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1511 : }
1512 :
1513 : /*
1514 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1515 : */
1516 16 : if (pgstat_kind_custom_infos == NULL)
1517 : {
1518 8 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1519 8 : MemoryContextAllocZero(TopMemoryContext,
1520 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1521 : }
1522 :
1523 16 : if (pgstat_kind_custom_infos[idx] != NULL &&
1524 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1525 0 : ereport(ERROR,
1526 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1527 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1528 : pgstat_kind_custom_infos[idx]->name)));
1529 :
1530 : /* check for existing custom stats with the same name */
1531 2080 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1532 : {
1533 2064 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1534 :
1535 2064 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1536 2056 : continue;
1537 8 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1538 0 : ereport(ERROR,
1539 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1540 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1541 : }
1542 :
1543 : /* Register it */
1544 16 : pgstat_kind_custom_infos[idx] = kind_info;
1545 16 : ereport(LOG,
1546 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1547 : kind_info->name, kind)));
1548 16 : }
1549 :
1550 : /*
1551 : * Stats should only be reported after pgstat_initialize() and before
1552 : * pgstat_shutdown(). This check is put in a few central places to catch
1553 : * violations of this rule more easily.
1554 : */
1555 : #ifdef USE_ASSERT_CHECKING
1556 : void
1557 : pgstat_assert_is_up(void)
1558 : {
1559 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1560 : }
1561 : #endif
1562 :
1563 :
1564 : /* ------------------------------------------------------------
1565 : * reading and writing of on-disk stats file
1566 : * ------------------------------------------------------------
1567 : */
1568 :
1569 : /* helpers for pgstat_write_statsfile() */
1570 : static void
1571 653834 : write_chunk(FILE *fpout, void *ptr, size_t len)
1572 : {
1573 : int rc;
1574 :
1575 653834 : rc = fwrite(ptr, len, 1, fpout);
1576 :
1577 : /* we'll check for errors with ferror once at the end */
1578 : (void) rc;
1579 653834 : }
1580 :
1581 : #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1582 :
1583 : /*
1584 : * This function is called in the last process that is accessing the shared
1585 : * stats so locking is not required.
1586 : */
1587 : static void
1588 1098 : pgstat_write_statsfile(XLogRecPtr redo)
1589 : {
1590 : FILE *fpout;
1591 : int32 format_id;
1592 1098 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1593 1098 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1594 : dshash_seq_status hstat;
1595 : PgStatShared_HashEntry *ps;
1596 :
1597 : pgstat_assert_is_up();
1598 :
1599 : /* should be called only by the checkpointer or single user mode */
1600 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1601 :
1602 : /* we're shutting down, so it's ok to just override this */
1603 1098 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1604 :
1605 1098 : elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
1606 : LSN_FORMAT_ARGS(redo));
1607 :
1608 : /*
1609 : * Open the statistics temp file to write out the current values.
1610 : */
1611 1098 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1612 1098 : if (fpout == NULL)
1613 : {
1614 0 : ereport(LOG,
1615 : (errcode_for_file_access(),
1616 : errmsg("could not open temporary statistics file \"%s\": %m",
1617 : tmpfile)));
1618 0 : return;
1619 : }
1620 :
1621 : /*
1622 : * Write the file header --- currently just a format ID.
1623 : */
1624 1098 : format_id = PGSTAT_FILE_FORMAT_ID;
1625 1098 : write_chunk_s(fpout, &format_id);
1626 :
1627 : /* Write the redo LSN, used to cross check the file read */
1628 1098 : write_chunk_s(fpout, &redo);
1629 :
1630 : /* Write various stats structs for fixed number of objects */
1631 282186 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1632 : {
1633 : char *ptr;
1634 281088 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1635 :
1636 281088 : if (!info || !info->fixed_amount)
1637 274494 : continue;
1638 :
1639 6594 : if (pgstat_is_kind_builtin(kind))
1640 : Assert(info->snapshot_ctl_off != 0);
1641 :
1642 : /* skip if no need to write to file */
1643 6594 : if (!info->write_to_file)
1644 0 : continue;
1645 :
1646 6594 : pgstat_build_snapshot_fixed(kind);
1647 6594 : if (pgstat_is_kind_builtin(kind))
1648 6588 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1649 : else
1650 6 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1651 :
1652 6594 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1653 6594 : write_chunk_s(fpout, &kind);
1654 6594 : write_chunk(fpout, ptr, info->shared_data_len);
1655 : }
1656 :
1657 : /*
1658 : * Walk through the stats entries
1659 : */
1660 1098 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1661 320444 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1662 : {
1663 : PgStatShared_Common *shstats;
1664 319346 : const PgStat_KindInfo *kind_info = NULL;
1665 :
1666 319346 : CHECK_FOR_INTERRUPTS();
1667 :
1668 : /*
1669 : * We should not see any "dropped" entries when writing the stats
1670 : * file, as all backends and auxiliary processes should have cleaned
1671 : * up their references before they terminated.
1672 : *
1673 : * However, since we are already shutting down, it is not worth
1674 : * crashing the server over any potential cleanup issues, so we simply
1675 : * skip such entries if encountered.
1676 : */
1677 : Assert(!ps->dropped);
1678 319346 : if (ps->dropped)
1679 0 : continue;
1680 :
1681 : /*
1682 : * This discards data related to custom stats kinds that are unknown
1683 : * to this process.
1684 : */
1685 319346 : if (!pgstat_is_kind_valid(ps->key.kind))
1686 : {
1687 0 : elog(WARNING, "found unknown stats entry %u/%u/%llu",
1688 : ps->key.kind, ps->key.dboid,
1689 : (unsigned long long) ps->key.objid);
1690 0 : continue;
1691 : }
1692 :
1693 319346 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1694 :
1695 319346 : kind_info = pgstat_get_kind_info(ps->key.kind);
1696 :
1697 : /* if not dropped the valid-entry refcount should exist */
1698 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1699 :
1700 : /* skip if no need to write to file */
1701 319346 : if (!kind_info->write_to_file)
1702 194 : continue;
1703 :
1704 319152 : if (!kind_info->to_serialized_name)
1705 : {
1706 : /* normal stats entry, identified by PgStat_HashKey */
1707 319006 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1708 319006 : write_chunk_s(fpout, &ps->key);
1709 : }
1710 : else
1711 : {
1712 : /* stats entry identified by name on disk (e.g. slots) */
1713 : NameData name;
1714 :
1715 146 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1716 :
1717 146 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1718 146 : write_chunk_s(fpout, &ps->key.kind);
1719 146 : write_chunk_s(fpout, &name);
1720 : }
1721 :
1722 : /* Write except the header part of the entry */
1723 319152 : write_chunk(fpout,
1724 : pgstat_get_entry_data(ps->key.kind, shstats),
1725 : pgstat_get_entry_len(ps->key.kind));
1726 : }
1727 1098 : dshash_seq_term(&hstat);
1728 :
1729 : /*
1730 : * No more output to be done. Close the temp file and replace the old
1731 : * pgstat.stat with it. The ferror() check replaces testing for error
1732 : * after each individual fputc or fwrite (in write_chunk()) above.
1733 : */
1734 1098 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1735 :
1736 1098 : if (ferror(fpout))
1737 : {
1738 0 : ereport(LOG,
1739 : (errcode_for_file_access(),
1740 : errmsg("could not write temporary statistics file \"%s\": %m",
1741 : tmpfile)));
1742 0 : FreeFile(fpout);
1743 0 : unlink(tmpfile);
1744 : }
1745 1098 : else if (FreeFile(fpout) < 0)
1746 : {
1747 0 : ereport(LOG,
1748 : (errcode_for_file_access(),
1749 : errmsg("could not close temporary statistics file \"%s\": %m",
1750 : tmpfile)));
1751 0 : unlink(tmpfile);
1752 : }
1753 1098 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1754 : {
1755 : /* durable_rename already emitted log message */
1756 0 : unlink(tmpfile);
1757 : }
1758 : }
1759 :
1760 : /* helpers for pgstat_read_statsfile() */
1761 : static bool
1762 677434 : read_chunk(FILE *fpin, void *ptr, size_t len)
1763 : {
1764 677434 : return fread(ptr, 1, len, fpin) == len;
1765 : }
1766 :
1767 : #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1768 :
1769 : /*
1770 : * Reads in existing statistics file into memory.
1771 : *
1772 : * This function is called in the only process that is accessing the shared
1773 : * stats so locking is not required.
1774 : */
1775 : static void
1776 1326 : pgstat_read_statsfile(XLogRecPtr redo)
1777 : {
1778 : FILE *fpin;
1779 : int32 format_id;
1780 : bool found;
1781 1326 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1782 1326 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1783 : XLogRecPtr file_redo;
1784 :
1785 : /* shouldn't be called from postmaster */
1786 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1787 :
1788 1326 : elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
1789 : LSN_FORMAT_ARGS(redo));
1790 :
1791 : /*
1792 : * Try to open the stats file. If it doesn't exist, the backends simply
1793 : * returns zero for anything and statistics simply starts from scratch
1794 : * with empty counters.
1795 : *
1796 : * ENOENT is a possibility if stats collection was previously disabled or
1797 : * has not yet written the stats file for the first time. Any other
1798 : * failure condition is suspicious.
1799 : */
1800 1326 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1801 : {
1802 92 : if (errno != ENOENT)
1803 0 : ereport(LOG,
1804 : (errcode_for_file_access(),
1805 : errmsg("could not open statistics file \"%s\": %m",
1806 : statfile)));
1807 92 : pgstat_reset_after_failure();
1808 92 : return;
1809 : }
1810 :
1811 : /*
1812 : * Verify it's of the expected format.
1813 : */
1814 1234 : if (!read_chunk_s(fpin, &format_id))
1815 : {
1816 0 : elog(WARNING, "could not read format ID");
1817 0 : goto error;
1818 : }
1819 :
1820 1234 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1821 : {
1822 2 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1823 : format_id, PGSTAT_FILE_FORMAT_ID);
1824 2 : goto error;
1825 : }
1826 :
1827 : /*
1828 : * Read the redo LSN stored in the file.
1829 : */
1830 1232 : if (!read_chunk_s(fpin, &file_redo))
1831 : {
1832 0 : elog(WARNING, "could not read redo LSN");
1833 0 : goto error;
1834 : }
1835 :
1836 1232 : if (file_redo != redo)
1837 : {
1838 26 : elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
1839 : LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
1840 26 : goto error;
1841 : }
1842 :
1843 : /*
1844 : * We found an existing statistics file. Read it and put all the stats
1845 : * data into place.
1846 : */
1847 : for (;;)
1848 337438 : {
1849 338644 : int t = fgetc(fpin);
1850 :
1851 338644 : switch (t)
1852 : {
1853 7240 : case PGSTAT_FILE_ENTRY_FIXED:
1854 : {
1855 : PgStat_Kind kind;
1856 : const PgStat_KindInfo *info;
1857 : char *ptr;
1858 :
1859 : /* entry for fixed-numbered stats */
1860 7240 : if (!read_chunk_s(fpin, &kind))
1861 : {
1862 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1863 2 : goto error;
1864 : }
1865 :
1866 7240 : if (!pgstat_is_kind_valid(kind))
1867 : {
1868 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1869 : kind, t);
1870 0 : goto error;
1871 : }
1872 :
1873 7240 : info = pgstat_get_kind_info(kind);
1874 7240 : if (!info)
1875 : {
1876 2 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1877 : kind, t);
1878 2 : goto error;
1879 : }
1880 :
1881 7238 : if (!info->fixed_amount)
1882 : {
1883 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1884 : kind, t);
1885 0 : goto error;
1886 : }
1887 :
1888 : /* Load back stats into shared memory */
1889 7238 : if (pgstat_is_kind_builtin(kind))
1890 7236 : ptr = ((char *) shmem) + info->shared_ctl_off +
1891 7236 : info->shared_data_off;
1892 : else
1893 : {
1894 2 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1895 :
1896 2 : ptr = ((char *) shmem->custom_data[idx]) +
1897 2 : info->shared_data_off;
1898 : }
1899 :
1900 7238 : if (!read_chunk(fpin, ptr, info->shared_data_len))
1901 : {
1902 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1903 : kind, t, info->shared_data_len);
1904 0 : goto error;
1905 : }
1906 :
1907 7238 : break;
1908 : }
1909 330200 : case PGSTAT_FILE_ENTRY_HASH:
1910 : case PGSTAT_FILE_ENTRY_NAME:
1911 : {
1912 : PgStat_HashKey key;
1913 : PgStatShared_HashEntry *p;
1914 : PgStatShared_Common *header;
1915 :
1916 330200 : CHECK_FOR_INTERRUPTS();
1917 :
1918 330200 : if (t == PGSTAT_FILE_ENTRY_HASH)
1919 : {
1920 : /* normal stats entry, identified by PgStat_HashKey */
1921 330108 : if (!read_chunk_s(fpin, &key))
1922 : {
1923 0 : elog(WARNING, "could not read key for entry of type %c", t);
1924 0 : goto error;
1925 : }
1926 :
1927 330108 : if (!pgstat_is_kind_valid(key.kind))
1928 : {
1929 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%llu of type %c",
1930 : key.kind, key.dboid,
1931 : (unsigned long long) key.objid, t);
1932 0 : goto error;
1933 : }
1934 : }
1935 : else
1936 : {
1937 : /* stats entry identified by name on disk (e.g. slots) */
1938 92 : const PgStat_KindInfo *kind_info = NULL;
1939 : PgStat_Kind kind;
1940 : NameData name;
1941 :
1942 92 : if (!read_chunk_s(fpin, &kind))
1943 : {
1944 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1945 0 : goto error;
1946 : }
1947 92 : if (!read_chunk_s(fpin, &name))
1948 : {
1949 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1950 : kind, t);
1951 0 : goto error;
1952 : }
1953 92 : if (!pgstat_is_kind_valid(kind))
1954 : {
1955 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1956 : kind, t);
1957 0 : goto error;
1958 : }
1959 :
1960 92 : kind_info = pgstat_get_kind_info(kind);
1961 92 : if (!kind_info)
1962 : {
1963 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1964 : kind, t);
1965 0 : goto error;
1966 : }
1967 :
1968 92 : if (!kind_info->from_serialized_name)
1969 : {
1970 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1971 : kind, t);
1972 0 : goto error;
1973 : }
1974 :
1975 92 : if (!kind_info->from_serialized_name(&name, &key))
1976 : {
1977 : /* skip over data for entry we don't care about */
1978 2 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1979 : {
1980 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1981 : NameStr(name), kind, t);
1982 0 : goto error;
1983 : }
1984 :
1985 2 : continue;
1986 : }
1987 :
1988 : Assert(key.kind == kind);
1989 : }
1990 :
1991 : /*
1992 : * This intentionally doesn't use pgstat_get_entry_ref() -
1993 : * putting all stats into checkpointer's
1994 : * pgStatEntryRefHash would be wasted effort and memory.
1995 : */
1996 330198 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1997 :
1998 : /* don't allow duplicate entries */
1999 330198 : if (found)
2000 : {
2001 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
2002 0 : elog(WARNING, "found duplicate stats entry %u/%u/%llu of type %c",
2003 : key.kind, key.dboid,
2004 : (unsigned long long) key.objid, t);
2005 0 : goto error;
2006 : }
2007 :
2008 330198 : header = pgstat_init_entry(key.kind, p);
2009 330198 : dshash_release_lock(pgStatLocal.shared_hash, p);
2010 :
2011 330198 : if (!read_chunk(fpin,
2012 : pgstat_get_entry_data(key.kind, header),
2013 : pgstat_get_entry_len(key.kind)))
2014 : {
2015 0 : elog(WARNING, "could not read data for entry %u/%u/%llu of type %c",
2016 : key.kind, key.dboid,
2017 : (unsigned long long) key.objid, t);
2018 0 : goto error;
2019 : }
2020 :
2021 330198 : break;
2022 : }
2023 1204 : case PGSTAT_FILE_ENTRY_END:
2024 :
2025 : /*
2026 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2027 : * file
2028 : */
2029 1204 : if (fgetc(fpin) != EOF)
2030 : {
2031 2 : elog(WARNING, "could not read end-of-file");
2032 2 : goto error;
2033 : }
2034 :
2035 1202 : goto done;
2036 :
2037 0 : default:
2038 0 : elog(WARNING, "could not read entry of type %c", t);
2039 0 : goto error;
2040 : }
2041 : }
2042 :
2043 1234 : done:
2044 1234 : FreeFile(fpin);
2045 :
2046 1234 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2047 1234 : unlink(statfile);
2048 :
2049 1234 : return;
2050 :
2051 32 : error:
2052 32 : ereport(LOG,
2053 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2054 :
2055 32 : pgstat_reset_after_failure();
2056 :
2057 32 : goto done;
2058 : }
2059 :
2060 : /*
2061 : * Helper to reset / drop stats after a crash or after restoring stats from
2062 : * disk failed, potentially after already loading parts.
2063 : */
2064 : static void
2065 466 : pgstat_reset_after_failure(void)
2066 : {
2067 466 : TimestampTz ts = GetCurrentTimestamp();
2068 :
2069 : /* reset fixed-numbered stats */
2070 119762 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2071 : {
2072 119296 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2073 :
2074 119296 : if (!kind_info || !kind_info->fixed_amount)
2075 116498 : continue;
2076 :
2077 2798 : kind_info->reset_all_cb(ts);
2078 : }
2079 :
2080 : /* and drop variable-numbered ones */
2081 466 : pgstat_drop_all_entries();
2082 466 : }
2083 :
2084 : /*
2085 : * GUC assign_hook for stats_fetch_consistency.
2086 : */
2087 : void
2088 5356 : assign_stats_fetch_consistency(int newval, void *extra)
2089 : {
2090 : /*
2091 : * Changing this value in a transaction may cause snapshot state
2092 : * inconsistencies, so force a clear of the current snapshot on the next
2093 : * snapshot build attempt.
2094 : */
2095 5356 : if (pgstat_fetch_consistency != newval)
2096 3138 : force_stats_snapshot_clear = true;
2097 5356 : }
|