Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : * Infrastructure for the cumulative statistics system.
4 : *
5 : * The cumulative statistics system accumulates statistics for different kinds
6 : * of objects. Some kinds of statistics are collected for a fixed number of
7 : * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 : * statistics are collected for a varying number of objects
9 : * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 : * statistics.
11 : *
12 : * Statistics are loaded from the filesystem during startup (by the startup
13 : * process), unless preceded by a crash, in which case all stats are
14 : * discarded. They are written out by the checkpointer process just before
15 : * shutting down (if the stats kind allows it), except when shutting down in
16 : * immediate mode.
17 : *
18 : * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 : *
20 : * Statistics for variable-numbered objects are stored in dynamic shared
21 : * memory and can be found via a dshash hashtable. The statistics counters are
22 : * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
23 : * separately allocated (PgStatShared_HashEntry->body). The separate
24 : * allocation allows different kinds of statistics to be stored in the same
25 : * hashtable without wasting space in PgStatShared_HashEntry.
26 : *
27 : * Variable-numbered stats are addressed by PgStat_HashKey while running. It
28 : * is not possible to have statistics for an object that cannot be addressed
29 : * that way at runtime. A wider identifier can be used when serializing to
30 : * disk (used for replication slot stats).
31 : *
32 : * To avoid contention on the shared hashtable, each backend has a
33 : * backend-local hashtable (pgStatEntryRefHash) in front of the shared
34 : * hashtable, containing references (PgStat_EntryRef) to shared hashtable
35 : * entries. The shared hashtable only needs to be accessed when no prior
36 : * reference is found in the local hashtable. Besides pointing to the
37 : * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
38 : * contains a pointer to the shared statistics data, as a process-local
39 : * address, to reduce access costs.
40 : *
41 : * The names for structs stored in shared memory are prefixed with
42 : * PgStatShared instead of PgStat. Each stats entry in shared memory is
43 : * protected by a dedicated lwlock.
44 : *
45 : * Most stats updates are first accumulated locally in each process as pending
46 : * entries, then later flushed to shared memory (just after commit, or by
47 : * idle-timeout). This practically eliminates contention on individual stats
48 : * entries. For most kinds of variable-numbered pending stats data is stored
49 : * in PgStat_EntryRef->pending. All entries with pending data are in the
50 : * pgStatPending list. Pending statistics updates are flushed out by
51 : * pgstat_report_stat().
52 : *
53 : * It is possible for external modules to define custom statistics kinds,
54 : * that can use the same properties as any built-in stats kinds. Each custom
55 : * stats kind needs to assign a unique ID to ensure that it does not overlap
56 : * with other extensions. In order to reserve a unique stats kind ID, refer
57 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 : *
59 : * The behavior of different kinds of statistics is determined by the kind's
60 : * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
61 : * defined, and pgstat_kind_custom_infos for custom kinds registered at
62 : * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 : *
64 : * The consistency of read accesses to statistics can be configured using the
65 : * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
66 : * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
67 : * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
68 : * pgStatLocal.snapshot.
69 : *
70 : * To keep things manageable, stats handling is split across several
71 : * files. Infrastructure pieces are in:
72 : * - pgstat.c - this file, to tie it all together
73 : * - pgstat_shmem.c - nearly everything dealing with shared memory, including
74 : * the maintenance of hashtable entries
75 : * - pgstat_xact.c - transactional integration, including the transactional
76 : * creation and dropping of stats entries
77 : *
78 : * Each statistics kind is handled in a dedicated file:
79 : * - pgstat_archiver.c
80 : * - pgstat_backend.c
81 : * - pgstat_bgwriter.c
82 : * - pgstat_checkpointer.c
83 : * - pgstat_database.c
84 : * - pgstat_function.c
85 : * - pgstat_io.c
86 : * - pgstat_lock.c
87 : * - pgstat_relation.c
88 : * - pgstat_replslot.c
89 : * - pgstat_slru.c
90 : * - pgstat_subscription.c
91 : * - pgstat_wal.c
92 : *
93 : * Whenever possible infrastructure files should not contain code related to
94 : * specific kinds of stats.
95 : *
96 : *
97 : * Copyright (c) 2001-2026, PostgreSQL Global Development Group
98 : *
99 : * IDENTIFICATION
100 : * src/backend/utils/activity/pgstat.c
101 : * ----------
102 : */
103 : #include "postgres.h"
104 :
105 : #include <unistd.h>
106 :
107 : #include "access/xact.h"
108 : #include "lib/dshash.h"
109 : #include "pgstat.h"
110 : #include "storage/fd.h"
111 : #include "storage/ipc.h"
112 : #include "storage/lwlock.h"
113 : #include "utils/guc_hooks.h"
114 : #include "utils/memutils.h"
115 : #include "utils/pgstat_internal.h"
116 : #include "utils/timestamp.h"
117 :
118 :
119 : /* ----------
120 : * Timer definitions.
121 : *
122 : * In milliseconds.
123 : * ----------
124 : */
125 :
126 : /* minimum interval non-forced stats flushes.*/
127 : #define PGSTAT_MIN_INTERVAL 1000
128 : /* how long until to block flushing pending stats updates */
129 : #define PGSTAT_MAX_INTERVAL 60000
130 : /* when to call pgstat_report_stat() again, even when idle */
131 : #define PGSTAT_IDLE_INTERVAL 10000
132 :
133 : /* ----------
134 : * Initial size hints for the hash tables used in statistics.
135 : * ----------
136 : */
137 :
138 : #define PGSTAT_SNAPSHOT_HASH_SIZE 512
139 :
140 : /* ---------
141 : * Identifiers in stats file.
142 : * ---------
143 : */
144 : #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
145 : #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
146 : #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
147 : #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
148 : * PgStat_HashKey */
149 :
150 : /* hash table for statistics snapshots entry */
151 : typedef struct PgStat_SnapshotEntry
152 : {
153 : PgStat_HashKey key;
154 : char status; /* for simplehash use */
155 : void *data; /* the stats data itself */
156 : } PgStat_SnapshotEntry;
157 :
158 :
159 : /* ----------
160 : * Backend-local Hash Table Definitions
161 : * ----------
162 : */
163 :
164 : /* for stats snapshot entries */
165 : #define SH_PREFIX pgstat_snapshot
166 : #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
167 : #define SH_KEY_TYPE PgStat_HashKey
168 : #define SH_KEY key
169 : #define SH_HASH_KEY(tb, key) \
170 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
171 : #define SH_EQUAL(tb, a, b) \
172 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
173 : #define SH_SCOPE static inline
174 : #define SH_DEFINE
175 : #define SH_DECLARE
176 : #include "lib/simplehash.h"
177 :
178 :
179 : /* ----------
180 : * Local function forward declarations
181 : * ----------
182 : */
183 :
184 : static void pgstat_write_statsfile(void);
185 : static void pgstat_read_statsfile(void);
186 :
187 : static void pgstat_init_snapshot_fixed(void);
188 :
189 : static void pgstat_reset_after_failure(void);
190 :
191 : static bool pgstat_flush_pending_entries(bool nowait);
192 :
193 : static void pgstat_prep_snapshot(void);
194 : static void pgstat_build_snapshot(void);
195 : static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
196 :
197 : static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
198 :
199 :
200 : /* ----------
201 : * GUC parameters
202 : * ----------
203 : */
204 :
205 : bool pgstat_track_counts = false;
206 : int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
207 :
208 :
209 : /* ----------
210 : * state shared with pgstat_*.c
211 : * ----------
212 : */
213 :
214 : PgStat_LocalState pgStatLocal;
215 :
216 : /*
217 : * Track pending reports for fixed-numbered stats, used by
218 : * pgstat_report_stat().
219 : */
220 : bool pgstat_report_fixed = false;
221 :
222 : /* ----------
223 : * Local data
224 : *
225 : * NB: There should be only variables related to stats infrastructure here,
226 : * not for specific kinds of stats.
227 : * ----------
228 : */
229 :
230 : /*
231 : * Memory contexts containing the pgStatEntryRefHash table, the
232 : * pgStatSharedRef entries, and pending data respectively. Mostly to make it
233 : * easier to track / attribute memory usage.
234 : */
235 :
236 : static MemoryContext pgStatPendingContext = NULL;
237 :
238 : /*
239 : * Backend local list of PgStat_EntryRef with unflushed pending stats.
240 : *
241 : * Newly pending entries should only ever be added to the end of the list,
242 : * otherwise pgstat_flush_pending_entries() might not see them immediately.
243 : */
244 : static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
245 :
246 :
247 : /*
248 : * Force the next stats flush to happen regardless of
249 : * PGSTAT_MIN_INTERVAL. Useful in test scripts.
250 : */
251 : static bool pgStatForceNextFlush = false;
252 :
253 : /*
254 : * Force-clear existing snapshot before next use when stats_fetch_consistency
255 : * is changed.
256 : */
257 : static bool force_stats_snapshot_clear = false;
258 :
259 :
260 : /*
261 : * For assertions that check pgstat is not used before initialization / after
262 : * shutdown.
263 : */
264 : #ifdef USE_ASSERT_CHECKING
265 : static bool pgstat_is_initialized = false;
266 : static bool pgstat_is_shutdown = false;
267 : #endif
268 :
269 :
270 : /*
271 : * The different kinds of built-in statistics.
272 : *
273 : * If reasonably possible, handling specific to one kind of stats should go
274 : * through this abstraction, rather than making more of pgstat.c aware.
275 : *
276 : * See comments for struct PgStat_KindInfo for details about the individual
277 : * fields.
278 : *
279 : * XXX: It'd be nicer to define this outside of this file. But there doesn't
280 : * seem to be a great way of doing that, given the split across multiple
281 : * files.
282 : */
283 : static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
284 :
285 : /* stats kinds for variable-numbered objects */
286 :
287 : [PGSTAT_KIND_DATABASE] = {
288 : .name = "database",
289 :
290 : .fixed_amount = false,
291 : .write_to_file = true,
292 : /* so pg_stat_database entries can be seen in all databases */
293 : .accessed_across_databases = true,
294 :
295 : .shared_size = sizeof(PgStatShared_Database),
296 : .shared_data_off = offsetof(PgStatShared_Database, stats),
297 : .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
298 : .pending_size = sizeof(PgStat_StatDBEntry),
299 :
300 : .flush_pending_cb = pgstat_database_flush_cb,
301 : .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
302 : },
303 :
304 : [PGSTAT_KIND_RELATION] = {
305 : .name = "relation",
306 :
307 : .fixed_amount = false,
308 : .write_to_file = true,
309 :
310 : .shared_size = sizeof(PgStatShared_Relation),
311 : .shared_data_off = offsetof(PgStatShared_Relation, stats),
312 : .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
313 : .pending_size = sizeof(PgStat_TableStatus),
314 :
315 : .flush_pending_cb = pgstat_relation_flush_cb,
316 : .delete_pending_cb = pgstat_relation_delete_pending_cb,
317 : .reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
318 : },
319 :
320 : [PGSTAT_KIND_FUNCTION] = {
321 : .name = "function",
322 :
323 : .fixed_amount = false,
324 : .write_to_file = true,
325 :
326 : .shared_size = sizeof(PgStatShared_Function),
327 : .shared_data_off = offsetof(PgStatShared_Function, stats),
328 : .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
329 : .pending_size = sizeof(PgStat_FunctionCounts),
330 :
331 : .flush_pending_cb = pgstat_function_flush_cb,
332 : .reset_timestamp_cb = pgstat_function_reset_timestamp_cb,
333 : },
334 :
335 : [PGSTAT_KIND_REPLSLOT] = {
336 : .name = "replslot",
337 :
338 : .fixed_amount = false,
339 : .write_to_file = true,
340 :
341 : .accessed_across_databases = true,
342 :
343 : .shared_size = sizeof(PgStatShared_ReplSlot),
344 : .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
345 : .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
346 :
347 : .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
348 : .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
349 : .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
350 : },
351 :
352 : [PGSTAT_KIND_SUBSCRIPTION] = {
353 : .name = "subscription",
354 :
355 : .fixed_amount = false,
356 : .write_to_file = true,
357 : /* so pg_stat_subscription_stats entries can be seen in all databases */
358 : .accessed_across_databases = true,
359 :
360 : .shared_size = sizeof(PgStatShared_Subscription),
361 : .shared_data_off = offsetof(PgStatShared_Subscription, stats),
362 : .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
363 : .pending_size = sizeof(PgStat_BackendSubEntry),
364 :
365 : .flush_pending_cb = pgstat_subscription_flush_cb,
366 : .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
367 : },
368 :
369 : [PGSTAT_KIND_BACKEND] = {
370 : .name = "backend",
371 :
372 : .fixed_amount = false,
373 : .write_to_file = false,
374 :
375 : .accessed_across_databases = true,
376 :
377 : .shared_size = sizeof(PgStatShared_Backend),
378 : .shared_data_off = offsetof(PgStatShared_Backend, stats),
379 : .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
380 :
381 : .flush_static_cb = pgstat_backend_flush_cb,
382 : .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
383 : },
384 :
385 : /* stats for fixed-numbered (mostly 1) objects */
386 :
387 : [PGSTAT_KIND_ARCHIVER] = {
388 : .name = "archiver",
389 :
390 : .fixed_amount = true,
391 : .write_to_file = true,
392 :
393 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
394 : .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
395 : .shared_data_off = offsetof(PgStatShared_Archiver, stats),
396 : .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
397 :
398 : .init_shmem_cb = pgstat_archiver_init_shmem_cb,
399 : .reset_all_cb = pgstat_archiver_reset_all_cb,
400 : .snapshot_cb = pgstat_archiver_snapshot_cb,
401 : },
402 :
403 : [PGSTAT_KIND_BGWRITER] = {
404 : .name = "bgwriter",
405 :
406 : .fixed_amount = true,
407 : .write_to_file = true,
408 :
409 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
410 : .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
411 : .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
412 : .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
413 :
414 : .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
415 : .reset_all_cb = pgstat_bgwriter_reset_all_cb,
416 : .snapshot_cb = pgstat_bgwriter_snapshot_cb,
417 : },
418 :
419 : [PGSTAT_KIND_CHECKPOINTER] = {
420 : .name = "checkpointer",
421 :
422 : .fixed_amount = true,
423 : .write_to_file = true,
424 :
425 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
426 : .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
427 : .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
428 : .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
429 :
430 : .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
431 : .reset_all_cb = pgstat_checkpointer_reset_all_cb,
432 : .snapshot_cb = pgstat_checkpointer_snapshot_cb,
433 : },
434 :
435 : [PGSTAT_KIND_IO] = {
436 : .name = "io",
437 :
438 : .fixed_amount = true,
439 : .write_to_file = true,
440 :
441 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
442 : .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
443 : .shared_data_off = offsetof(PgStatShared_IO, stats),
444 : .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
445 :
446 : .flush_static_cb = pgstat_io_flush_cb,
447 : .init_shmem_cb = pgstat_io_init_shmem_cb,
448 : .reset_all_cb = pgstat_io_reset_all_cb,
449 : .snapshot_cb = pgstat_io_snapshot_cb,
450 : },
451 :
452 : [PGSTAT_KIND_LOCK] = {
453 : .name = "lock",
454 :
455 : .fixed_amount = true,
456 : .write_to_file = true,
457 :
458 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, lock),
459 : .shared_ctl_off = offsetof(PgStat_ShmemControl, lock),
460 : .shared_data_off = offsetof(PgStatShared_Lock, stats),
461 : .shared_data_len = sizeof(((PgStatShared_Lock *) 0)->stats),
462 :
463 : .flush_static_cb = pgstat_lock_flush_cb,
464 : .init_shmem_cb = pgstat_lock_init_shmem_cb,
465 : .reset_all_cb = pgstat_lock_reset_all_cb,
466 : .snapshot_cb = pgstat_lock_snapshot_cb,
467 : },
468 :
469 : [PGSTAT_KIND_SLRU] = {
470 : .name = "slru",
471 :
472 : .fixed_amount = true,
473 : .write_to_file = true,
474 :
475 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
476 : .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
477 : .shared_data_off = offsetof(PgStatShared_SLRU, stats),
478 : .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
479 :
480 : .flush_static_cb = pgstat_slru_flush_cb,
481 : .init_shmem_cb = pgstat_slru_init_shmem_cb,
482 : .reset_all_cb = pgstat_slru_reset_all_cb,
483 : .snapshot_cb = pgstat_slru_snapshot_cb,
484 : },
485 :
486 : [PGSTAT_KIND_WAL] = {
487 : .name = "wal",
488 :
489 : .fixed_amount = true,
490 : .write_to_file = true,
491 :
492 : .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
493 : .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
494 : .shared_data_off = offsetof(PgStatShared_Wal, stats),
495 : .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
496 :
497 : .init_backend_cb = pgstat_wal_init_backend_cb,
498 : .flush_static_cb = pgstat_wal_flush_cb,
499 : .init_shmem_cb = pgstat_wal_init_shmem_cb,
500 : .reset_all_cb = pgstat_wal_reset_all_cb,
501 : .snapshot_cb = pgstat_wal_snapshot_cb,
502 : },
503 : };
504 :
505 : /*
506 : * Information about custom statistics kinds.
507 : *
508 : * These are saved in a different array than the built-in kinds to save
509 : * in clarity with the initializations.
510 : *
511 : * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
512 : */
513 : static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
514 :
515 : /* ------------------------------------------------------------
516 : * Functions managing the state of the stats system for all backends.
517 : * ------------------------------------------------------------
518 : */
519 :
520 : /*
521 : * Read on-disk stats into memory at server start.
522 : *
523 : * Should only be called by the startup process or in single user mode.
524 : */
525 : void
526 894 : pgstat_restore_stats(void)
527 : {
528 894 : pgstat_read_statsfile();
529 894 : }
530 :
531 : /*
532 : * Remove the stats file. This is currently used only if WAL recovery is
533 : * needed after a crash.
534 : *
535 : * Should only be called by the startup process or in single user mode.
536 : */
537 : void
538 190 : pgstat_discard_stats(void)
539 : {
540 : int ret;
541 :
542 : /* NB: this needs to be done even in single user mode */
543 :
544 : /* First, cleanup the main pgstats file */
545 190 : ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
546 190 : if (ret != 0)
547 : {
548 189 : if (errno == ENOENT)
549 189 : elog(DEBUG2,
550 : "didn't need to unlink permanent stats file \"%s\" - didn't exist",
551 : PGSTAT_STAT_PERMANENT_FILENAME);
552 : else
553 0 : ereport(LOG,
554 : (errcode_for_file_access(),
555 : errmsg("could not unlink permanent statistics file \"%s\": %m",
556 : PGSTAT_STAT_PERMANENT_FILENAME)));
557 : }
558 : else
559 : {
560 1 : ereport(DEBUG2,
561 : (errcode_for_file_access(),
562 : errmsg_internal("unlinked permanent statistics file \"%s\"",
563 : PGSTAT_STAT_PERMANENT_FILENAME)));
564 : }
565 :
566 : /* Finish callbacks, if required */
567 6270 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
568 : {
569 6080 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
570 :
571 6080 : if (kind_info && kind_info->finish)
572 1 : kind_info->finish(STATS_DISCARD);
573 : }
574 :
575 : /*
576 : * Reset stats contents. This will set reset timestamps of fixed-numbered
577 : * stats to the current time (no variable stats exist).
578 : */
579 190 : pgstat_reset_after_failure();
580 190 : }
581 :
582 : /*
583 : * pgstat_before_server_shutdown() needs to be called by exactly one process
584 : * during regular server shutdowns. Otherwise all stats will be lost.
585 : *
586 : * We currently only write out stats for proc_exit(0). We might want to change
587 : * that at some point... But right now pgstat_discard_stats() would be called
588 : * during the start after a disorderly shutdown, anyway.
589 : */
590 : void
591 776 : pgstat_before_server_shutdown(int code, Datum arg)
592 : {
593 : Assert(pgStatLocal.shmem != NULL);
594 : Assert(!pgStatLocal.shmem->is_shutdown);
595 :
596 : /*
597 : * Stats should only be reported after pgstat_initialize() and before
598 : * pgstat_shutdown(). This is a convenient point to catch most violations
599 : * of this rule.
600 : */
601 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
602 :
603 : /* flush out our own pending changes before writing out */
604 776 : pgstat_report_stat(true);
605 :
606 : /*
607 : * Only write out file during normal shutdown. Don't even signal that
608 : * we've shutdown during irregular shutdowns, because the shutdown
609 : * sequence isn't coordinated to ensure this backend shuts down last.
610 : */
611 776 : if (code == 0)
612 : {
613 771 : pgStatLocal.shmem->is_shutdown = true;
614 771 : pgstat_write_statsfile();
615 : }
616 776 : }
617 :
618 :
619 : /* ------------------------------------------------------------
620 : * Backend initialization / shutdown functions
621 : * ------------------------------------------------------------
622 : */
623 :
624 : /*
625 : * Shut down a single backend's statistics reporting at process exit.
626 : *
627 : * Flush out any remaining statistics counts. Without this, operations
628 : * triggered during backend exit (such as temp table deletions) won't be
629 : * counted.
630 : */
631 : static void
632 24528 : pgstat_shutdown_hook(int code, Datum arg)
633 : {
634 : Assert(!pgstat_is_shutdown);
635 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
636 :
637 : /*
638 : * If we got as far as discovering our own database ID, we can flush out
639 : * what we did so far. Otherwise, we'd be reporting an invalid database
640 : * ID, so forget it. (This means that accesses to pg_database during
641 : * failed backend starts might never get counted.)
642 : */
643 24528 : if (OidIsValid(MyDatabaseId))
644 18451 : pgstat_report_disconnect(MyDatabaseId);
645 :
646 24528 : pgstat_report_stat(true);
647 :
648 : /* there shouldn't be any pending changes left */
649 : Assert(dlist_is_empty(&pgStatPending));
650 24528 : dlist_init(&pgStatPending);
651 :
652 : /* drop the backend stats entry */
653 24528 : if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
654 0 : pgstat_request_entry_refs_gc();
655 :
656 24528 : pgstat_detach_shmem();
657 :
658 : #ifdef USE_ASSERT_CHECKING
659 : pgstat_is_shutdown = true;
660 : #endif
661 24528 : }
662 :
663 : /*
664 : * Initialize pgstats state, and set up our on-proc-exit hook. Called from
665 : * BaseInit().
666 : *
667 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
668 : */
669 : void
670 24528 : pgstat_initialize(void)
671 : {
672 : Assert(!pgstat_is_initialized);
673 :
674 24528 : pgstat_attach_shmem();
675 :
676 24528 : pgstat_init_snapshot_fixed();
677 :
678 : /* Backend initialization callbacks */
679 809424 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
680 : {
681 784896 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
682 :
683 784896 : if (kind_info == NULL || kind_info->init_backend_cb == NULL)
684 760368 : continue;
685 :
686 24528 : kind_info->init_backend_cb();
687 : }
688 :
689 : /* Set up a process-exit hook to clean up */
690 24528 : before_shmem_exit(pgstat_shutdown_hook, 0);
691 :
692 : #ifdef USE_ASSERT_CHECKING
693 : pgstat_is_initialized = true;
694 : #endif
695 24528 : }
696 :
697 :
698 : /* ------------------------------------------------------------
699 : * Public functions used by backends follow
700 : * ------------------------------------------------------------
701 : */
702 :
703 : /*
704 : * Must be called by processes that performs DML: tcop/postgres.c, logical
705 : * receiver processes, SPI worker, etc. to flush pending statistics updates to
706 : * shared memory.
707 : *
708 : * Unless called with 'force', pending stats updates are flushed happen once
709 : * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
710 : * block on lock acquisition, except if stats updates have been pending for
711 : * longer than PGSTAT_MAX_INTERVAL (60000ms).
712 : *
713 : * Whenever pending stats updates remain at the end of pgstat_report_stat() a
714 : * suggested idle timeout is returned. Currently this is always
715 : * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
716 : * a timeout after which to call pgstat_report_stat(true), but are not
717 : * required to do so.
718 : *
719 : * Note that this is called only when not within a transaction, so it is fair
720 : * to use transaction stop time as an approximation of current time.
721 : */
722 : long
723 394480 : pgstat_report_stat(bool force)
724 : {
725 : static TimestampTz pending_since = 0;
726 : static TimestampTz last_flush = 0;
727 : bool partial_flush;
728 : TimestampTz now;
729 : bool nowait;
730 :
731 : pgstat_assert_is_up();
732 : Assert(!IsTransactionOrTransactionBlock());
733 :
734 : /* "absorb" the forced flush even if there's nothing to flush */
735 394480 : if (pgStatForceNextFlush)
736 : {
737 339 : force = true;
738 339 : pgStatForceNextFlush = false;
739 : }
740 :
741 : /* Don't expend a clock check if nothing to do */
742 394480 : if (dlist_is_empty(&pgStatPending) &&
743 10752 : !pgstat_report_fixed)
744 : {
745 7734 : return 0;
746 : }
747 :
748 : /*
749 : * There should never be stats to report once stats are shut down. Can't
750 : * assert that before the checks above, as there is an unconditional
751 : * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
752 : * the process that ran pgstat_before_server_shutdown() will still call.
753 : */
754 : Assert(!pgStatLocal.shmem->is_shutdown);
755 :
756 386746 : if (force)
757 : {
758 : /*
759 : * Stats reports are forced either when it's been too long since stats
760 : * have been reported or in processes that force stats reporting to
761 : * happen at specific points (including shutdown). In the former case
762 : * the transaction stop time might be quite old, in the latter it
763 : * would never get cleared.
764 : */
765 24305 : now = GetCurrentTimestamp();
766 : }
767 : else
768 : {
769 362441 : now = GetCurrentTransactionStopTimestamp();
770 :
771 693418 : if (pending_since > 0 &&
772 330977 : TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
773 : {
774 : /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
775 0 : force = true;
776 : }
777 362441 : else if (last_flush > 0 &&
778 347695 : !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
779 : {
780 : /* don't flush too frequently */
781 345692 : if (pending_since == 0)
782 16533 : pending_since = now;
783 :
784 345692 : return PGSTAT_IDLE_INTERVAL;
785 : }
786 : }
787 :
788 41054 : pgstat_update_dbstats(now);
789 :
790 : /* don't wait for lock acquisition when !force */
791 41054 : nowait = !force;
792 :
793 41054 : partial_flush = false;
794 :
795 : /* flush of variable-numbered stats tracked in pending entries list */
796 41054 : partial_flush |= pgstat_flush_pending_entries(nowait);
797 :
798 : /* flush of other stats kinds */
799 41054 : if (pgstat_report_fixed)
800 : {
801 1314456 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
802 : {
803 1274624 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
804 :
805 1274624 : if (!kind_info)
806 756638 : continue;
807 517986 : if (!kind_info->flush_static_cb)
808 318826 : continue;
809 :
810 199160 : partial_flush |= kind_info->flush_static_cb(nowait);
811 : }
812 : }
813 :
814 41054 : last_flush = now;
815 :
816 : /*
817 : * If some of the pending stats could not be flushed due to lock
818 : * contention, let the caller know when to retry.
819 : */
820 41054 : if (partial_flush)
821 : {
822 : /* force should have prevented us from getting here */
823 : Assert(!force);
824 :
825 : /* remember since when stats have been pending */
826 8 : if (pending_since == 0)
827 7 : pending_since = now;
828 :
829 8 : return PGSTAT_IDLE_INTERVAL;
830 : }
831 :
832 41046 : pending_since = 0;
833 41046 : pgstat_report_fixed = false;
834 :
835 41046 : return 0;
836 : }
837 :
838 : /*
839 : * Force locally pending stats to be flushed during the next
840 : * pgstat_report_stat() call. This is useful for writing tests.
841 : */
842 : void
843 339 : pgstat_force_next_flush(void)
844 : {
845 339 : pgStatForceNextFlush = true;
846 339 : }
847 :
848 : /*
849 : * Only for use by pgstat_reset_counters()
850 : */
851 : static bool
852 16199 : match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
853 : {
854 16199 : return entry->key.dboid == MyDatabaseId;
855 : }
856 :
857 : /*
858 : * Reset counters for our database.
859 : *
860 : * Permission checking for this function is managed through the normal
861 : * GRANT system.
862 : */
863 : void
864 15 : pgstat_reset_counters(void)
865 : {
866 15 : TimestampTz ts = GetCurrentTimestamp();
867 :
868 15 : pgstat_reset_matching_entries(match_db_entries,
869 : ObjectIdGetDatum(MyDatabaseId),
870 : ts);
871 15 : }
872 :
873 : /*
874 : * Reset a single variable-numbered entry.
875 : *
876 : * If the stats kind is within a database, also reset the database's
877 : * stat_reset_timestamp.
878 : *
879 : * Permission checking for this function is managed through the normal
880 : * GRANT system.
881 : */
882 : void
883 43 : pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
884 : {
885 43 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
886 43 : TimestampTz ts = GetCurrentTimestamp();
887 :
888 : /* not needed atm, and doesn't make sense with the current signature */
889 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
890 :
891 : /* reset the "single counter" */
892 43 : pgstat_reset_entry(kind, dboid, objid, ts);
893 :
894 43 : if (!kind_info->accessed_across_databases)
895 26 : pgstat_reset_database_timestamp(dboid, ts);
896 43 : }
897 :
898 : /*
899 : * Reset stats for all entries of a kind.
900 : *
901 : * Permission checking for this function is managed through the normal
902 : * GRANT system.
903 : */
904 : void
905 46 : pgstat_reset_of_kind(PgStat_Kind kind)
906 : {
907 46 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
908 46 : TimestampTz ts = GetCurrentTimestamp();
909 :
910 46 : if (kind_info->fixed_amount)
911 42 : kind_info->reset_all_cb(ts);
912 : else
913 4 : pgstat_reset_entries_of_kind(kind, ts);
914 46 : }
915 :
916 :
917 : /* ------------------------------------------------------------
918 : * Fetching of stats
919 : * ------------------------------------------------------------
920 : */
921 :
922 : /*
923 : * Discard any data collected in the current transaction. Any subsequent
924 : * request will cause new snapshots to be read.
925 : *
926 : * This is also invoked during transaction commit or abort to discard
927 : * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
928 : * cause this routine to be called.
929 : */
930 : void
931 654349 : pgstat_clear_snapshot(void)
932 : {
933 : pgstat_assert_is_up();
934 :
935 654349 : memset(&pgStatLocal.snapshot.fixed_valid, 0,
936 : sizeof(pgStatLocal.snapshot.fixed_valid));
937 654349 : memset(&pgStatLocal.snapshot.custom_valid, 0,
938 : sizeof(pgStatLocal.snapshot.custom_valid));
939 654349 : pgStatLocal.snapshot.stats = NULL;
940 654349 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
941 :
942 : /* Release memory, if any was allocated */
943 654349 : if (pgStatLocal.snapshot.context)
944 : {
945 810 : MemoryContextDelete(pgStatLocal.snapshot.context);
946 :
947 : /* Reset variables */
948 810 : pgStatLocal.snapshot.context = NULL;
949 : }
950 :
951 : /*
952 : * Historically the backend_status.c facilities lived in this file, and
953 : * were reset with the same function. For now keep it that way, and
954 : * forward the reset request.
955 : */
956 654349 : pgstat_clear_backend_activity_snapshot();
957 :
958 : /* Reset this flag, as it may be possible that a cleanup was forced. */
959 654349 : force_stats_snapshot_clear = false;
960 654349 : }
961 :
962 : void *
963 298582 : pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *may_free)
964 : {
965 298582 : PgStat_HashKey key = {0};
966 : PgStat_EntryRef *entry_ref;
967 : void *stats_data;
968 298582 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
969 :
970 : /* should be called from backends */
971 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
972 : Assert(!kind_info->fixed_amount);
973 :
974 : /*
975 : * Initialize *may_free to false. We'll change it to true later if we end
976 : * up allocating the result in the caller's context and not caching it.
977 : */
978 298582 : if (may_free)
979 286437 : *may_free = false;
980 :
981 298582 : pgstat_prep_snapshot();
982 :
983 298582 : key.kind = kind;
984 298582 : key.dboid = dboid;
985 298582 : key.objid = objid;
986 :
987 : /* if we need to build a full snapshot, do so */
988 298582 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
989 299 : pgstat_build_snapshot();
990 :
991 : /* if caching is desired, look up in cache */
992 298582 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
993 : {
994 7277 : PgStat_SnapshotEntry *entry = NULL;
995 :
996 7277 : entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
997 :
998 7277 : if (entry)
999 612 : return entry->data;
1000 :
1001 : /*
1002 : * If we built a full snapshot and the key is not in
1003 : * pgStatLocal.snapshot.stats, there are no matching stats.
1004 : */
1005 6665 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1006 16 : return NULL;
1007 : }
1008 :
1009 297954 : pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
1010 :
1011 297954 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1012 :
1013 297954 : if (entry_ref == NULL || entry_ref->shared_entry->dropped)
1014 : {
1015 : /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
1016 7394 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
1017 : {
1018 1353 : PgStat_SnapshotEntry *entry = NULL;
1019 : bool found;
1020 :
1021 1353 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1022 : Assert(!found);
1023 1353 : entry->data = NULL;
1024 : }
1025 7394 : return NULL;
1026 : }
1027 :
1028 : /*
1029 : * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
1030 : * otherwise we could quickly end up with a fair bit of memory used due to
1031 : * repeated accesses.
1032 : */
1033 290560 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1034 : {
1035 285264 : stats_data = palloc(kind_info->shared_data_len);
1036 :
1037 : /*
1038 : * Since we allocated the result in the caller's context and aren't
1039 : * caching it, the caller can safely pfree() it.
1040 : */
1041 285264 : if (may_free)
1042 281711 : *may_free = true;
1043 : }
1044 : else
1045 5296 : stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1046 5296 : kind_info->shared_data_len);
1047 :
1048 290560 : (void) pgstat_lock_entry_shared(entry_ref, false);
1049 581120 : memcpy(stats_data,
1050 290560 : pgstat_get_entry_data(kind, entry_ref->shared_stats),
1051 290560 : kind_info->shared_data_len);
1052 290560 : pgstat_unlock_entry(entry_ref);
1053 :
1054 290560 : if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1055 : {
1056 5296 : PgStat_SnapshotEntry *entry = NULL;
1057 : bool found;
1058 :
1059 5296 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1060 5296 : entry->data = stats_data;
1061 : }
1062 :
1063 290560 : return stats_data;
1064 : }
1065 :
1066 : /*
1067 : * If a stats snapshot has been taken, return the timestamp at which that was
1068 : * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1069 : * false.
1070 : */
1071 : TimestampTz
1072 40 : pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1073 : {
1074 40 : if (force_stats_snapshot_clear)
1075 12 : pgstat_clear_snapshot();
1076 :
1077 40 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1078 : {
1079 16 : *have_snapshot = true;
1080 16 : return pgStatLocal.snapshot.snapshot_timestamp;
1081 : }
1082 :
1083 24 : *have_snapshot = false;
1084 :
1085 24 : return 0;
1086 : }
1087 :
1088 : bool
1089 95 : pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1090 : {
1091 : /* fixed-numbered stats always exist */
1092 95 : if (pgstat_get_kind_info(kind)->fixed_amount)
1093 8 : return true;
1094 :
1095 87 : return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1096 : }
1097 :
1098 : /*
1099 : * Ensure snapshot for fixed-numbered 'kind' exists.
1100 : *
1101 : * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1102 : * massaging the data into the desired format.
1103 : */
1104 : void
1105 283 : pgstat_snapshot_fixed(PgStat_Kind kind)
1106 : {
1107 : Assert(pgstat_is_kind_valid(kind));
1108 : Assert(pgstat_get_kind_info(kind)->fixed_amount);
1109 :
1110 283 : if (force_stats_snapshot_clear)
1111 0 : pgstat_clear_snapshot();
1112 :
1113 283 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1114 12 : pgstat_build_snapshot();
1115 : else
1116 271 : pgstat_build_snapshot_fixed(kind);
1117 :
1118 283 : if (pgstat_is_kind_builtin(kind))
1119 : Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1120 5 : else if (pgstat_is_kind_custom(kind))
1121 : Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1122 283 : }
1123 :
1124 : static void
1125 24528 : pgstat_init_snapshot_fixed(void)
1126 : {
1127 : /*
1128 : * Initialize fixed-numbered statistics data in snapshots, only for custom
1129 : * stats kinds.
1130 : */
1131 245280 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1132 : {
1133 220752 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1134 :
1135 220752 : if (!kind_info || !kind_info->fixed_amount)
1136 220702 : continue;
1137 :
1138 50 : pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1139 50 : MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1140 : }
1141 24528 : }
1142 :
1143 : static void
1144 298612 : pgstat_prep_snapshot(void)
1145 : {
1146 298612 : if (force_stats_snapshot_clear)
1147 12 : pgstat_clear_snapshot();
1148 :
1149 298612 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1150 7307 : pgStatLocal.snapshot.stats != NULL)
1151 297802 : return;
1152 :
1153 810 : if (!pgStatLocal.snapshot.context)
1154 810 : pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1155 : "PgStat Snapshot",
1156 : ALLOCSET_SMALL_SIZES);
1157 :
1158 810 : pgStatLocal.snapshot.stats =
1159 810 : pgstat_snapshot_create(pgStatLocal.snapshot.context,
1160 : PGSTAT_SNAPSHOT_HASH_SIZE,
1161 : NULL);
1162 : }
1163 :
1164 : static void
1165 311 : pgstat_build_snapshot(void)
1166 : {
1167 : dshash_seq_status hstat;
1168 : PgStatShared_HashEntry *p;
1169 :
1170 : /* should only be called when we need a snapshot */
1171 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1172 :
1173 : /* snapshot already built */
1174 311 : if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1175 281 : return;
1176 :
1177 30 : pgstat_prep_snapshot();
1178 :
1179 : Assert(pgStatLocal.snapshot.stats->members == 0);
1180 :
1181 30 : pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1182 :
1183 : /*
1184 : * Snapshot all variable stats.
1185 : */
1186 30 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1187 36633 : while ((p = dshash_seq_next(&hstat)) != NULL)
1188 : {
1189 36603 : PgStat_Kind kind = p->key.kind;
1190 36603 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1191 : bool found;
1192 : PgStat_SnapshotEntry *entry;
1193 : PgStatShared_Common *stats_data;
1194 :
1195 : /*
1196 : * Check if the stats object should be included in the snapshot.
1197 : * Unless the stats kind can be accessed from all databases (e.g.,
1198 : * database stats themselves), we only include stats for the current
1199 : * database or objects not associated with a database (e.g. shared
1200 : * relations).
1201 : */
1202 36603 : if (p->key.dboid != MyDatabaseId &&
1203 9857 : p->key.dboid != InvalidOid &&
1204 8162 : !kind_info->accessed_across_databases)
1205 8174 : continue;
1206 :
1207 28531 : if (p->dropped)
1208 102 : continue;
1209 :
1210 : Assert(pg_atomic_read_u32(&p->refcount) > 0);
1211 :
1212 28429 : stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1213 : Assert(stats_data);
1214 :
1215 28429 : entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1216 : Assert(!found);
1217 :
1218 28429 : entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1219 : pgstat_get_entry_len(kind));
1220 :
1221 : /*
1222 : * Acquire the LWLock directly instead of using
1223 : * pg_stat_lock_entry_shared() which requires a reference.
1224 : */
1225 28429 : LWLockAcquire(&stats_data->lock, LW_SHARED);
1226 28429 : memcpy(entry->data,
1227 28429 : pgstat_get_entry_data(kind, stats_data),
1228 : pgstat_get_entry_len(kind));
1229 28429 : LWLockRelease(&stats_data->lock);
1230 : }
1231 30 : dshash_seq_term(&hstat);
1232 :
1233 : /*
1234 : * Build snapshot of all fixed-numbered stats.
1235 : */
1236 990 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1237 : {
1238 960 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1239 :
1240 960 : if (!kind_info)
1241 570 : continue;
1242 390 : if (!kind_info->fixed_amount)
1243 : {
1244 : Assert(kind_info->snapshot_cb == NULL);
1245 180 : continue;
1246 : }
1247 :
1248 210 : pgstat_build_snapshot_fixed(kind);
1249 : }
1250 :
1251 30 : pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1252 : }
1253 :
1254 : static void
1255 5879 : pgstat_build_snapshot_fixed(PgStat_Kind kind)
1256 : {
1257 5879 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1258 : int idx;
1259 : bool *valid;
1260 :
1261 : /* Position in fixed_valid or custom_valid */
1262 5879 : if (pgstat_is_kind_builtin(kind))
1263 : {
1264 5873 : idx = kind;
1265 5873 : valid = pgStatLocal.snapshot.fixed_valid;
1266 : }
1267 : else
1268 : {
1269 6 : idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1270 6 : valid = pgStatLocal.snapshot.custom_valid;
1271 : }
1272 :
1273 : Assert(kind_info->fixed_amount);
1274 : Assert(kind_info->snapshot_cb != NULL);
1275 :
1276 5879 : if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1277 : {
1278 : /* rebuild every time */
1279 5413 : valid[idx] = false;
1280 : }
1281 466 : else if (valid[idx])
1282 : {
1283 : /* in snapshot mode we shouldn't get called again */
1284 : Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1285 6 : return;
1286 : }
1287 :
1288 : Assert(!valid[idx]);
1289 :
1290 5873 : kind_info->snapshot_cb();
1291 :
1292 : Assert(!valid[idx]);
1293 5873 : valid[idx] = true;
1294 : }
1295 :
1296 :
1297 : /* ------------------------------------------------------------
1298 : * Backend-local pending stats infrastructure
1299 : * ------------------------------------------------------------
1300 : */
1301 :
1302 : /*
1303 : * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1304 : * stats if not already done.
1305 : *
1306 : * If created_entry is non-NULL, it'll be set to true if the entry is newly
1307 : * created, false otherwise.
1308 : */
1309 : PgStat_EntryRef *
1310 2401673 : pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1311 : {
1312 : PgStat_EntryRef *entry_ref;
1313 :
1314 : /* need to be able to flush out */
1315 : Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1316 :
1317 2401673 : if (unlikely(!pgStatPendingContext))
1318 : {
1319 20058 : pgStatPendingContext =
1320 20058 : AllocSetContextCreate(TopMemoryContext,
1321 : "PgStat Pending",
1322 : ALLOCSET_SMALL_SIZES);
1323 : }
1324 :
1325 2401673 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1326 : true, created_entry);
1327 :
1328 2401673 : if (entry_ref->pending == NULL)
1329 : {
1330 1226112 : size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1331 :
1332 : Assert(entrysize != (size_t) -1);
1333 :
1334 1226112 : entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1335 1226112 : dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1336 : }
1337 :
1338 2401673 : return entry_ref;
1339 : }
1340 :
1341 : /*
1342 : * Return an existing stats entry, or NULL.
1343 : *
1344 : * This should only be used for helper function for pgstatfuncs.c - outside of
1345 : * that it shouldn't be needed.
1346 : */
1347 : PgStat_EntryRef *
1348 56 : pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1349 : {
1350 : PgStat_EntryRef *entry_ref;
1351 :
1352 56 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1353 :
1354 56 : if (entry_ref == NULL || entry_ref->pending == NULL)
1355 20 : return NULL;
1356 :
1357 36 : return entry_ref;
1358 : }
1359 :
1360 : void
1361 1226112 : pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1362 : {
1363 1226112 : PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1364 1226112 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1365 1226112 : void *pending_data = entry_ref->pending;
1366 :
1367 : Assert(pending_data != NULL);
1368 : /* !fixed_amount stats should be handled explicitly */
1369 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1370 :
1371 1226112 : if (kind_info->delete_pending_cb)
1372 1159635 : kind_info->delete_pending_cb(entry_ref);
1373 :
1374 1226112 : pfree(pending_data);
1375 1226112 : entry_ref->pending = NULL;
1376 :
1377 1226112 : dlist_delete(&entry_ref->pending_node);
1378 1226112 : }
1379 :
1380 : /*
1381 : * Flush out pending variable-numbered stats.
1382 : */
1383 : static bool
1384 41054 : pgstat_flush_pending_entries(bool nowait)
1385 : {
1386 41054 : bool have_pending = false;
1387 41054 : dlist_node *cur = NULL;
1388 :
1389 : /*
1390 : * Need to be a bit careful iterating over the list of pending entries.
1391 : * Processing a pending entry may queue further pending entries to the end
1392 : * of the list that we want to process, so a simple iteration won't do.
1393 : * Further complicating matters is that we want to delete the current
1394 : * entry in each iteration from the list if we flushed successfully.
1395 : *
1396 : * So we just keep track of the next pointer in each loop iteration.
1397 : */
1398 41054 : if (!dlist_is_empty(&pgStatPending))
1399 38255 : cur = dlist_head_node(&pgStatPending);
1400 :
1401 1221887 : while (cur)
1402 : {
1403 1180833 : PgStat_EntryRef *entry_ref =
1404 : dlist_container(PgStat_EntryRef, pending_node, cur);
1405 1180833 : PgStat_HashKey key = entry_ref->shared_entry->key;
1406 1180833 : PgStat_Kind kind = key.kind;
1407 1180833 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1408 : bool did_flush;
1409 : dlist_node *next;
1410 :
1411 : Assert(!kind_info->fixed_amount);
1412 : Assert(kind_info->flush_pending_cb != NULL);
1413 :
1414 : /* flush the stats, if possible */
1415 1180833 : did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1416 :
1417 : Assert(did_flush || nowait);
1418 :
1419 : /* determine next entry, before deleting the pending entry */
1420 1180833 : if (dlist_has_next(&pgStatPending, cur))
1421 1142578 : next = dlist_next_node(&pgStatPending, cur);
1422 : else
1423 38255 : next = NULL;
1424 :
1425 : /* if successfully flushed, remove entry */
1426 1180833 : if (did_flush)
1427 1180821 : pgstat_delete_pending_entry(entry_ref);
1428 : else
1429 12 : have_pending = true;
1430 :
1431 1180833 : cur = next;
1432 : }
1433 :
1434 : Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1435 :
1436 41054 : return have_pending;
1437 : }
1438 :
1439 :
1440 : /* ------------------------------------------------------------
1441 : * Helper / infrastructure functions
1442 : * ------------------------------------------------------------
1443 : */
1444 :
1445 : PgStat_Kind
1446 99 : pgstat_get_kind_from_str(char *kind_str)
1447 : {
1448 307 : for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1449 : {
1450 303 : if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1451 95 : return kind;
1452 : }
1453 :
1454 : /* Check the custom set of cumulative stats */
1455 4 : if (pgstat_kind_custom_infos)
1456 : {
1457 0 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1458 : {
1459 0 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1460 :
1461 0 : if (pgstat_kind_custom_infos[idx] &&
1462 0 : pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1463 0 : return kind;
1464 : }
1465 : }
1466 :
1467 4 : ereport(ERROR,
1468 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1469 : errmsg("invalid statistics kind: \"%s\"", kind_str)));
1470 : return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1471 : }
1472 :
1473 : static inline bool
1474 497724 : pgstat_is_kind_valid(PgStat_Kind kind)
1475 : {
1476 497724 : return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1477 : }
1478 :
1479 : const PgStat_KindInfo *
1480 8760856 : pgstat_get_kind_info(PgStat_Kind kind)
1481 : {
1482 8760856 : if (pgstat_is_kind_builtin(kind))
1483 7228191 : return &pgstat_kind_builtin_infos[kind];
1484 :
1485 1532665 : if (pgstat_is_kind_custom(kind))
1486 : {
1487 848135 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1488 :
1489 848135 : if (pgstat_kind_custom_infos == NULL ||
1490 1847 : pgstat_kind_custom_infos[idx] == NULL)
1491 847667 : return NULL;
1492 468 : return pgstat_kind_custom_infos[idx];
1493 : }
1494 :
1495 684530 : return NULL;
1496 : }
1497 :
1498 : /*
1499 : * Register a new stats kind.
1500 : *
1501 : * PgStat_Kinds must be globally unique across all extensions. Refer
1502 : * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1503 : * unique ID for your extension, to avoid conflicts with other extension
1504 : * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1505 : * needlessly reserving a new ID.
1506 : */
1507 : void
1508 6 : pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1509 : {
1510 6 : uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1511 :
1512 6 : if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1513 0 : ereport(ERROR,
1514 : (errmsg("custom cumulative statistics name is invalid"),
1515 : errhint("Provide a non-empty name for the custom cumulative statistics.")));
1516 :
1517 6 : if (!pgstat_is_kind_custom(kind))
1518 0 : ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1519 : errhint("Provide a custom cumulative statistics ID between %u and %u.",
1520 : PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1521 :
1522 6 : if (!process_shared_preload_libraries_in_progress)
1523 0 : ereport(ERROR,
1524 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1525 : errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1526 :
1527 : /*
1528 : * Check some data for fixed-numbered stats.
1529 : */
1530 6 : if (kind_info->fixed_amount)
1531 : {
1532 3 : if (kind_info->shared_size == 0)
1533 0 : ereport(ERROR,
1534 : (errmsg("custom cumulative statistics property is invalid"),
1535 : errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1536 3 : if (kind_info->track_entry_count)
1537 0 : ereport(ERROR,
1538 : (errmsg("custom cumulative statistics property is invalid"),
1539 : errhint("Custom cumulative statistics cannot use entry count tracking for fixed-numbered objects.")));
1540 : }
1541 :
1542 : /*
1543 : * If pgstat_kind_custom_infos is not available yet, allocate it.
1544 : */
1545 6 : if (pgstat_kind_custom_infos == NULL)
1546 : {
1547 3 : pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1548 3 : MemoryContextAllocZero(TopMemoryContext,
1549 : sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1550 : }
1551 :
1552 6 : if (pgstat_kind_custom_infos[idx] != NULL &&
1553 0 : pgstat_kind_custom_infos[idx]->name != NULL)
1554 0 : ereport(ERROR,
1555 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1556 : errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1557 : pgstat_kind_custom_infos[idx]->name)));
1558 :
1559 : /* check for existing custom stats with the same name */
1560 60 : for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1561 : {
1562 54 : uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1563 :
1564 54 : if (pgstat_kind_custom_infos[existing_idx] == NULL)
1565 51 : continue;
1566 3 : if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1567 0 : ereport(ERROR,
1568 : (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1569 : errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1570 : }
1571 :
1572 : /* Register it */
1573 6 : pgstat_kind_custom_infos[idx] = kind_info;
1574 6 : ereport(LOG,
1575 : (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1576 : kind_info->name, kind)));
1577 6 : }
1578 :
1579 : /*
1580 : * Stats should only be reported after pgstat_initialize() and before
1581 : * pgstat_shutdown(). This check is put in a few central places to catch
1582 : * violations of this rule more easily.
1583 : */
1584 : #ifdef USE_ASSERT_CHECKING
1585 : void
1586 : pgstat_assert_is_up(void)
1587 : {
1588 : Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1589 : }
1590 : #endif
1591 :
1592 :
1593 : /* ------------------------------------------------------------
1594 : * reading and writing of on-disk stats file
1595 : * ------------------------------------------------------------
1596 : */
1597 :
1598 : /* helper for pgstat_write_statsfile() */
1599 : void
1600 497050 : pgstat_write_chunk(FILE *fpout, void *ptr, size_t len)
1601 : {
1602 : int rc;
1603 :
1604 497050 : rc = fwrite(ptr, len, 1, fpout);
1605 :
1606 : /* We check for errors with ferror() when done writing the stats. */
1607 : (void) rc;
1608 497050 : }
1609 :
1610 : /*
1611 : * This function is called in the last process that is accessing the shared
1612 : * stats so locking is not required.
1613 : */
1614 : static void
1615 771 : pgstat_write_statsfile(void)
1616 : {
1617 : FILE *fpout;
1618 : int32 format_id;
1619 771 : const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1620 771 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1621 : dshash_seq_status hstat;
1622 : PgStatShared_HashEntry *ps;
1623 :
1624 : pgstat_assert_is_up();
1625 :
1626 : /* should be called only by the checkpointer or single user mode */
1627 : Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1628 :
1629 : /* we're shutting down, so it's ok to just override this */
1630 771 : pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1631 :
1632 771 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
1633 :
1634 : /*
1635 : * Open the statistics temp file to write out the current values.
1636 : */
1637 771 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
1638 771 : if (fpout == NULL)
1639 : {
1640 0 : ereport(LOG,
1641 : (errcode_for_file_access(),
1642 : errmsg("could not open temporary statistics file \"%s\": %m",
1643 : tmpfile)));
1644 0 : return;
1645 : }
1646 :
1647 : /*
1648 : * Write the file header --- currently just a format ID.
1649 : */
1650 771 : format_id = PGSTAT_FILE_FORMAT_ID;
1651 771 : pgstat_write_chunk_s(fpout, &format_id);
1652 :
1653 : /* Write various stats structs for fixed number of objects */
1654 25443 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1655 : {
1656 : char *ptr;
1657 24672 : const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1658 :
1659 24672 : if (!info || !info->fixed_amount)
1660 19274 : continue;
1661 :
1662 5398 : if (pgstat_is_kind_builtin(kind))
1663 : Assert(info->snapshot_ctl_off != 0);
1664 :
1665 : /* skip if no need to write to file */
1666 5398 : if (!info->write_to_file)
1667 0 : continue;
1668 :
1669 5398 : pgstat_build_snapshot_fixed(kind);
1670 5398 : if (pgstat_is_kind_builtin(kind))
1671 5397 : ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1672 : else
1673 1 : ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1674 :
1675 5398 : fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1676 5398 : pgstat_write_chunk_s(fpout, &kind);
1677 5398 : pgstat_write_chunk(fpout, ptr, info->shared_data_len);
1678 : }
1679 :
1680 : /*
1681 : * Walk through the stats entries
1682 : */
1683 771 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1684 243583 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1685 : {
1686 : PgStatShared_Common *shstats;
1687 242812 : const PgStat_KindInfo *kind_info = NULL;
1688 :
1689 242812 : CHECK_FOR_INTERRUPTS();
1690 :
1691 : /*
1692 : * We should not see any "dropped" entries when writing the stats
1693 : * file, as all backends and auxiliary processes should have cleaned
1694 : * up their references before they terminated.
1695 : *
1696 : * However, since we are already shutting down, it is not worth
1697 : * crashing the server over any potential cleanup issues, so we simply
1698 : * skip such entries if encountered.
1699 : */
1700 : Assert(!ps->dropped);
1701 242812 : if (ps->dropped)
1702 0 : continue;
1703 :
1704 : /*
1705 : * This discards data related to custom stats kinds that are unknown
1706 : * to this process.
1707 : */
1708 242812 : if (!pgstat_is_kind_valid(ps->key.kind))
1709 : {
1710 0 : elog(WARNING, "found unknown stats entry %u/%u/%" PRIu64,
1711 : ps->key.kind, ps->key.dboid,
1712 : ps->key.objid);
1713 0 : continue;
1714 : }
1715 :
1716 242812 : shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1717 :
1718 242812 : kind_info = pgstat_get_kind_info(ps->key.kind);
1719 :
1720 : /* if not dropped the valid-entry refcount should exist */
1721 : Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1722 :
1723 : /* skip if no need to write to file */
1724 242812 : if (!kind_info->write_to_file)
1725 131 : continue;
1726 :
1727 242681 : if (!kind_info->to_serialized_name)
1728 : {
1729 : /* normal stats entry, identified by PgStat_HashKey */
1730 242570 : fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1731 242570 : pgstat_write_chunk_s(fpout, &ps->key);
1732 : }
1733 : else
1734 : {
1735 : /* stats entry identified by name on disk (e.g. slots) */
1736 : NameData name;
1737 :
1738 111 : kind_info->to_serialized_name(&ps->key, shstats, &name);
1739 :
1740 111 : fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1741 111 : pgstat_write_chunk_s(fpout, &ps->key.kind);
1742 111 : pgstat_write_chunk_s(fpout, &name);
1743 : }
1744 :
1745 : /* Write except the header part of the entry */
1746 242681 : pgstat_write_chunk(fpout,
1747 : pgstat_get_entry_data(ps->key.kind, shstats),
1748 : pgstat_get_entry_len(ps->key.kind));
1749 :
1750 : /* Write more data for the entry, if required */
1751 242681 : if (kind_info->to_serialized_data)
1752 2 : kind_info->to_serialized_data(&ps->key, shstats, fpout);
1753 : }
1754 771 : dshash_seq_term(&hstat);
1755 :
1756 : /*
1757 : * No more output to be done. Close the temp file and replace the old
1758 : * pgstat.stat with it. The ferror() check replaces testing for error
1759 : * after each individual fputc or fwrite (in pgstat_write_chunk()) above.
1760 : */
1761 771 : fputc(PGSTAT_FILE_ENTRY_END, fpout);
1762 :
1763 771 : if (ferror(fpout))
1764 : {
1765 0 : ereport(LOG,
1766 : (errcode_for_file_access(),
1767 : errmsg("could not write temporary statistics file \"%s\": %m",
1768 : tmpfile)));
1769 0 : FreeFile(fpout);
1770 0 : unlink(tmpfile);
1771 : }
1772 771 : else if (FreeFile(fpout) < 0)
1773 : {
1774 0 : ereport(LOG,
1775 : (errcode_for_file_access(),
1776 : errmsg("could not close temporary statistics file \"%s\": %m",
1777 : tmpfile)));
1778 0 : unlink(tmpfile);
1779 : }
1780 771 : else if (durable_rename(tmpfile, statfile, LOG) < 0)
1781 : {
1782 : /* durable_rename already emitted log message */
1783 0 : unlink(tmpfile);
1784 : }
1785 :
1786 : /* Finish callbacks, if required */
1787 25443 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1788 : {
1789 24672 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1790 :
1791 24672 : if (kind_info && kind_info->finish)
1792 1 : kind_info->finish(STATS_WRITE);
1793 : }
1794 : }
1795 :
1796 : /* helper for pgstat_read_statsfile() */
1797 : bool
1798 510747 : pgstat_read_chunk(FILE *fpin, void *ptr, size_t len)
1799 : {
1800 510747 : return fread(ptr, 1, len, fpin) == len;
1801 : }
1802 :
1803 : /*
1804 : * Reads in existing statistics file into memory.
1805 : *
1806 : * This function is called in the only process that is accessing the shared
1807 : * stats so locking is not required.
1808 : */
1809 : static void
1810 894 : pgstat_read_statsfile(void)
1811 : {
1812 : FILE *fpin;
1813 : int32 format_id;
1814 : bool found;
1815 894 : const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1816 894 : PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1817 :
1818 : /* shouldn't be called from postmaster */
1819 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1820 :
1821 894 : elog(DEBUG2, "reading stats file \"%s\"", statfile);
1822 :
1823 : /*
1824 : * Try to open the stats file. If it doesn't exist, the backends simply
1825 : * returns zero for anything and statistics simply starts from scratch
1826 : * with empty counters.
1827 : *
1828 : * ENOENT is a possibility if stats collection was previously disabled or
1829 : * has not yet written the stats file for the first time. Any other
1830 : * failure condition is suspicious.
1831 : */
1832 894 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1833 : {
1834 58 : if (errno != ENOENT)
1835 0 : ereport(LOG,
1836 : (errcode_for_file_access(),
1837 : errmsg("could not open statistics file \"%s\": %m",
1838 : statfile)));
1839 58 : pgstat_reset_after_failure();
1840 58 : return;
1841 : }
1842 :
1843 : /*
1844 : * Verify it's of the expected format.
1845 : */
1846 836 : if (!pgstat_read_chunk_s(fpin, &format_id))
1847 : {
1848 0 : elog(WARNING, "could not read format ID");
1849 0 : goto error;
1850 : }
1851 :
1852 836 : if (format_id != PGSTAT_FILE_FORMAT_ID)
1853 : {
1854 1 : elog(WARNING, "found incorrect format ID %d (expected %d)",
1855 : format_id, PGSTAT_FILE_FORMAT_ID);
1856 1 : goto error;
1857 : }
1858 :
1859 : /*
1860 : * We found an existing statistics file. Read it and put all the stats
1861 : * data into place.
1862 : */
1863 : for (;;)
1864 254912 : {
1865 255747 : int t = fgetc(fpin);
1866 :
1867 255747 : switch (t)
1868 : {
1869 5846 : case PGSTAT_FILE_ENTRY_FIXED:
1870 : {
1871 : PgStat_Kind kind;
1872 : const PgStat_KindInfo *info;
1873 : char *ptr;
1874 :
1875 : /* entry for fixed-numbered stats */
1876 5846 : if (!pgstat_read_chunk_s(fpin, &kind))
1877 : {
1878 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1879 0 : goto error;
1880 : }
1881 :
1882 5846 : if (!pgstat_is_kind_valid(kind))
1883 : {
1884 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1885 : kind, t);
1886 0 : goto error;
1887 : }
1888 :
1889 5846 : info = pgstat_get_kind_info(kind);
1890 5846 : if (!info)
1891 : {
1892 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1893 : kind, t);
1894 0 : goto error;
1895 : }
1896 :
1897 5846 : if (!info->fixed_amount)
1898 : {
1899 0 : elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1900 : kind, t);
1901 0 : goto error;
1902 : }
1903 :
1904 : /* Load back stats into shared memory */
1905 5846 : if (pgstat_is_kind_builtin(kind))
1906 5845 : ptr = ((char *) shmem) + info->shared_ctl_off +
1907 5845 : info->shared_data_off;
1908 : else
1909 : {
1910 1 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1911 :
1912 1 : ptr = ((char *) shmem->custom_data[idx]) +
1913 1 : info->shared_data_off;
1914 : }
1915 :
1916 5846 : if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len))
1917 : {
1918 0 : elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1919 : kind, t, info->shared_data_len);
1920 0 : goto error;
1921 : }
1922 :
1923 5846 : break;
1924 : }
1925 249066 : case PGSTAT_FILE_ENTRY_HASH:
1926 : case PGSTAT_FILE_ENTRY_NAME:
1927 : {
1928 : PgStat_HashKey key;
1929 : PgStatShared_HashEntry *p;
1930 : PgStatShared_Common *header;
1931 249066 : const PgStat_KindInfo *kind_info = NULL;
1932 :
1933 249066 : CHECK_FOR_INTERRUPTS();
1934 :
1935 249066 : if (t == PGSTAT_FILE_ENTRY_HASH)
1936 : {
1937 : /* normal stats entry, identified by PgStat_HashKey */
1938 248988 : if (!pgstat_read_chunk_s(fpin, &key))
1939 : {
1940 0 : elog(WARNING, "could not read key for entry of type %c", t);
1941 0 : goto error;
1942 : }
1943 :
1944 248988 : if (!pgstat_is_kind_valid(key.kind))
1945 : {
1946 0 : elog(WARNING, "invalid stats kind for entry %u/%u/%" PRIu64 " of type %c",
1947 : key.kind, key.dboid,
1948 : key.objid, t);
1949 0 : goto error;
1950 : }
1951 :
1952 248988 : kind_info = pgstat_get_kind_info(key.kind);
1953 248988 : if (!kind_info)
1954 : {
1955 0 : elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
1956 : key.kind, key.dboid,
1957 : key.objid, t);
1958 0 : goto error;
1959 : }
1960 : }
1961 : else
1962 : {
1963 : /* stats entry identified by name on disk (e.g. slots) */
1964 : PgStat_Kind kind;
1965 : NameData name;
1966 :
1967 78 : if (!pgstat_read_chunk_s(fpin, &kind))
1968 : {
1969 0 : elog(WARNING, "could not read stats kind for entry of type %c", t);
1970 0 : goto error;
1971 : }
1972 78 : if (!pgstat_read_chunk_s(fpin, &name))
1973 : {
1974 0 : elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1975 : kind, t);
1976 0 : goto error;
1977 : }
1978 78 : if (!pgstat_is_kind_valid(kind))
1979 : {
1980 0 : elog(WARNING, "invalid stats kind %u for entry of type %c",
1981 : kind, t);
1982 0 : goto error;
1983 : }
1984 :
1985 78 : kind_info = pgstat_get_kind_info(kind);
1986 78 : if (!kind_info)
1987 : {
1988 0 : elog(WARNING, "could not find information of kind %u for entry of type %c",
1989 : kind, t);
1990 0 : goto error;
1991 : }
1992 :
1993 78 : if (!kind_info->from_serialized_name)
1994 : {
1995 0 : elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1996 : kind, t);
1997 0 : goto error;
1998 : }
1999 :
2000 78 : if (!kind_info->from_serialized_name(&name, &key))
2001 : {
2002 : /* skip over data for entry we don't care about */
2003 1 : if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
2004 : {
2005 0 : elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
2006 : NameStr(name), kind, t);
2007 0 : goto error;
2008 : }
2009 :
2010 1 : continue;
2011 : }
2012 :
2013 : Assert(key.kind == kind);
2014 : }
2015 :
2016 : /*
2017 : * This intentionally doesn't use pgstat_get_entry_ref() -
2018 : * putting all stats into checkpointer's
2019 : * pgStatEntryRefHash would be wasted effort and memory.
2020 : */
2021 249065 : p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
2022 :
2023 : /* don't allow duplicate entries */
2024 249065 : if (found)
2025 : {
2026 0 : dshash_release_lock(pgStatLocal.shared_hash, p);
2027 0 : elog(WARNING, "found duplicate stats entry %u/%u/%" PRIu64 " of type %c",
2028 : key.kind, key.dboid,
2029 : key.objid, t);
2030 0 : goto error;
2031 : }
2032 :
2033 249065 : header = pgstat_init_entry(key.kind, p);
2034 249065 : dshash_release_lock(pgStatLocal.shared_hash, p);
2035 249065 : if (header == NULL)
2036 : {
2037 : /*
2038 : * It would be tempting to switch this ERROR to a
2039 : * WARNING, but it would mean that all the statistics
2040 : * are discarded when the environment fails on OOM.
2041 : */
2042 0 : elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
2043 : key.kind, key.dboid,
2044 : key.objid, t);
2045 : }
2046 :
2047 249065 : if (!pgstat_read_chunk(fpin,
2048 : pgstat_get_entry_data(key.kind, header),
2049 : pgstat_get_entry_len(key.kind)))
2050 : {
2051 0 : elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c",
2052 : key.kind, key.dboid,
2053 : key.objid, t);
2054 0 : goto error;
2055 : }
2056 :
2057 : /* read more data for the entry, if required */
2058 249065 : if (kind_info->from_serialized_data)
2059 : {
2060 2 : if (!kind_info->from_serialized_data(&key, header, fpin))
2061 : {
2062 0 : elog(WARNING, "could not read auxiliary data for entry %u/%u/%" PRIu64 " of type %c",
2063 : key.kind, key.dboid,
2064 : key.objid, t);
2065 0 : goto error;
2066 : }
2067 : }
2068 :
2069 249065 : break;
2070 : }
2071 835 : case PGSTAT_FILE_ENTRY_END:
2072 :
2073 : /*
2074 : * check that PGSTAT_FILE_ENTRY_END actually signals end of
2075 : * file
2076 : */
2077 835 : if (fgetc(fpin) != EOF)
2078 : {
2079 1 : elog(WARNING, "could not read end-of-file");
2080 1 : goto error;
2081 : }
2082 :
2083 834 : goto done;
2084 :
2085 0 : default:
2086 0 : elog(WARNING, "could not read entry of type %c", t);
2087 0 : goto error;
2088 : }
2089 : }
2090 :
2091 836 : done:
2092 : /* First, cleanup the main stats file */
2093 836 : FreeFile(fpin);
2094 :
2095 836 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2096 836 : unlink(statfile);
2097 :
2098 : /* Finish callbacks, if required */
2099 27588 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2100 : {
2101 26752 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2102 :
2103 26752 : if (kind_info && kind_info->finish)
2104 2 : kind_info->finish(STATS_READ);
2105 : }
2106 :
2107 836 : return;
2108 :
2109 2 : error:
2110 2 : ereport(LOG,
2111 : (errmsg("corrupted statistics file \"%s\"", statfile)));
2112 :
2113 2 : pgstat_reset_after_failure();
2114 :
2115 2 : goto done;
2116 : }
2117 :
2118 : /*
2119 : * Helper to reset / drop stats after a crash or after restoring stats from
2120 : * disk failed, potentially after already loading parts.
2121 : */
2122 : static void
2123 250 : pgstat_reset_after_failure(void)
2124 : {
2125 250 : TimestampTz ts = GetCurrentTimestamp();
2126 :
2127 : /* reset fixed-numbered stats */
2128 8250 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2129 : {
2130 8000 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2131 :
2132 8000 : if (!kind_info || !kind_info->fixed_amount)
2133 6249 : continue;
2134 :
2135 1751 : kind_info->reset_all_cb(ts);
2136 : }
2137 :
2138 : /* and drop variable-numbered ones */
2139 250 : pgstat_drop_all_entries();
2140 250 : }
2141 :
2142 : /*
2143 : * GUC assign_hook for stats_fetch_consistency.
2144 : */
2145 : void
2146 3453 : assign_stats_fetch_consistency(int newval, void *extra)
2147 : {
2148 : /*
2149 : * Changing this value in a transaction may cause snapshot state
2150 : * inconsistencies, so force a clear of the current snapshot on the next
2151 : * snapshot build attempt.
2152 : */
2153 3453 : if (pgstat_fetch_consistency != newval)
2154 2056 : force_stats_snapshot_clear = true;
2155 3453 : }
|