Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "storage/subsystems.h"
18 : #include "utils/memutils.h"
19 : #include "utils/pgstat_internal.h"
20 :
21 :
22 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
23 :
24 : /* hash table entry for finding the PgStat_EntryRef for a key */
25 : typedef struct PgStat_EntryRefHashEntry
26 : {
27 : PgStat_HashKey key; /* hash key */
28 : char status; /* for simplehash use */
29 : PgStat_EntryRef *entry_ref;
30 : } PgStat_EntryRefHashEntry;
31 :
32 :
33 : /* for references to shared statistics entries */
34 : #define SH_PREFIX pgstat_entry_ref_hash
35 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
36 : #define SH_KEY_TYPE PgStat_HashKey
37 : #define SH_KEY key
38 : #define SH_HASH_KEY(tb, key) \
39 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
40 : #define SH_EQUAL(tb, a, b) \
41 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
42 : #define SH_SCOPE static inline
43 : #define SH_DEFINE
44 : #define SH_DECLARE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : static void pgstat_drop_database_and_contents(Oid dboid);
49 :
50 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
51 :
52 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
53 : static bool pgstat_need_entry_refs_gc(void);
54 : static void pgstat_gc_entry_refs(void);
55 : static void pgstat_release_all_entry_refs(bool discard_pending);
56 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
57 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
58 :
59 : static void pgstat_setup_memcxt(void);
60 :
61 : static void StatsShmemRequest(void *arg);
62 : static void StatsShmemInit(void *arg);
63 :
64 : const ShmemCallbacks StatsShmemCallbacks = {
65 : .request_fn = StatsShmemRequest,
66 : .init_fn = StatsShmemInit,
67 : };
68 :
69 : /* parameter for the shared hash */
70 : static const dshash_parameters dsh_params = {
71 : sizeof(PgStat_HashKey),
72 : sizeof(PgStatShared_HashEntry),
73 : pgstat_cmp_hash_key,
74 : pgstat_hash_hash_key,
75 : dshash_memcpy,
76 : LWTRANCHE_PGSTATS_HASH
77 : };
78 :
79 :
80 : /*
81 : * Backend local references to shared stats entries. If there are pending
82 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
83 : * list.
84 : *
85 : * When a stats entry is dropped each backend needs to release its reference
86 : * to it before the memory can be released. To trigger that
87 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
88 : * compares to their copy of pgStatSharedRefAge on a regular basis.
89 : */
90 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
91 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
92 :
93 : /*
94 : * Memory contexts containing the pgStatEntryRefHash table and the
95 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
96 : * track / attribute memory usage.
97 : */
98 : static MemoryContext pgStatSharedRefContext = NULL;
99 : static MemoryContext pgStatEntryRefHashContext = NULL;
100 :
101 :
102 : /* ------------------------------------------------------------
103 : * Public functions called from postmaster follow
104 : * ------------------------------------------------------------
105 : */
106 :
107 : /*
108 : * The size of the shared memory allocation for stats stored in the shared
109 : * stats hash table. This allocation will be done as part of the main shared
110 : * memory, rather than dynamic shared memory, allowing it to be initialized in
111 : * postmaster.
112 : */
113 : static Size
114 4927 : pgstat_dsa_init_size(void)
115 : {
116 : Size sz;
117 :
118 : /*
119 : * The dshash header / initial buckets array needs to fit into "plain"
120 : * shared memory, but it's beneficial to not need dsm segments
121 : * immediately. A size of 256kB seems works well and is not
122 : * disproportional compared to other constant sized shared memory
123 : * allocations. NB: To avoid DSMs further, the user can configure
124 : * min_dynamic_shared_memory.
125 : */
126 4927 : sz = 256 * 1024;
127 : Assert(dsa_minimum_size() <= sz);
128 4927 : return MAXALIGN(sz);
129 : }
130 :
131 : /*
132 : * Compute shared memory space needed for cumulative statistics
133 : */
134 : static Size
135 1234 : StatsShmemSize(void)
136 : {
137 : Size sz;
138 :
139 1234 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
140 1234 : sz = add_size(sz, pgstat_dsa_init_size());
141 :
142 : /* Add shared memory for all the custom fixed-numbered statistics */
143 12340 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
144 : {
145 11106 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
146 :
147 11106 : if (!kind_info)
148 11100 : continue;
149 6 : if (!kind_info->fixed_amount)
150 3 : continue;
151 :
152 : Assert(kind_info->shared_size != 0);
153 3 : sz = add_size(sz, MAXALIGN(kind_info->shared_size));
154 : }
155 :
156 1234 : return sz;
157 : }
158 :
159 : /*
160 : * Register shared memory area for cumulative statistics
161 : */
162 : static void
163 1234 : StatsShmemRequest(void *arg)
164 : {
165 1234 : ShmemRequestStruct(.name = "Shared Memory Stats",
166 : .size = StatsShmemSize(),
167 : .ptr = (void **) &pgStatLocal.shmem,
168 : );
169 1234 : }
170 :
171 : /*
172 : * Initialize cumulative statistics system during startup
173 : */
174 : static void
175 1231 : StatsShmemInit(void *arg)
176 : {
177 : dsa_area *dsa;
178 : dshash_table *dsh;
179 1231 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
180 1231 : char *p = (char *) ctl;
181 :
182 : /* the allocation of pgStatLocal.shmem itself */
183 1231 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
184 :
185 : /*
186 : * Create a small dsa allocation in plain shared memory. This is required
187 : * because postmaster cannot use dsm segments. It also provides a small
188 : * efficiency win.
189 : */
190 1231 : ctl->raw_dsa_area = p;
191 1231 : p += pgstat_dsa_init_size();
192 1231 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
193 : pgstat_dsa_init_size(),
194 : LWTRANCHE_PGSTATS_DSA, NULL);
195 1231 : dsa_pin(dsa);
196 :
197 : /*
198 : * To ensure dshash is created in "plain" shared memory, temporarily limit
199 : * size of dsa to the initial size of the dsa.
200 : */
201 1231 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
202 :
203 : /*
204 : * With the limit in place, create the dshash table. XXX: It'd be nice if
205 : * there were dshash_create_in_place().
206 : */
207 1231 : dsh = dshash_create(dsa, &dsh_params, NULL);
208 1231 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
209 :
210 : /* lift limit set above */
211 1231 : dsa_set_size_limit(dsa, -1);
212 :
213 : /*
214 : * Postmaster will never access these again, thus free the local
215 : * dsa/dshash references.
216 : */
217 1231 : dshash_detach(dsh);
218 1231 : dsa_detach(dsa);
219 :
220 1231 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
221 :
222 : /* Do the per-kind initialization */
223 40623 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
224 : {
225 39392 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
226 : char *ptr;
227 :
228 39392 : if (!kind_info)
229 23383 : continue;
230 :
231 : /* initialize entry count tracking */
232 16009 : if (kind_info->track_entry_count)
233 3 : pg_atomic_init_u64(&ctl->entry_counts[kind - 1], 0);
234 :
235 : /* initialize fixed-numbered stats */
236 16009 : if (kind_info->fixed_amount)
237 : {
238 8620 : if (pgstat_is_kind_builtin(kind))
239 8617 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
240 : else
241 : {
242 3 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
243 :
244 : Assert(kind_info->shared_size != 0);
245 3 : ctl->custom_data[idx] = p;
246 3 : p += MAXALIGN(kind_info->shared_size);
247 3 : ptr = ctl->custom_data[idx];
248 : }
249 :
250 8620 : kind_info->init_shmem_cb(ptr);
251 : }
252 : }
253 1231 : }
254 :
255 : void
256 24786 : pgstat_attach_shmem(void)
257 : {
258 : MemoryContext oldcontext;
259 :
260 : Assert(pgStatLocal.dsa == NULL);
261 :
262 : /* stats shared memory persists for the backend lifetime */
263 24786 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
264 :
265 24786 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
266 : NULL);
267 24786 : dsa_pin_mapping(pgStatLocal.dsa);
268 :
269 49572 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
270 24786 : pgStatLocal.shmem->hash_handle,
271 : NULL);
272 :
273 24786 : MemoryContextSwitchTo(oldcontext);
274 24786 : }
275 :
276 : void
277 24786 : pgstat_detach_shmem(void)
278 : {
279 : Assert(pgStatLocal.dsa);
280 :
281 : /* we shouldn't leave references to shared stats */
282 24786 : pgstat_release_all_entry_refs(false);
283 :
284 24786 : dshash_detach(pgStatLocal.shared_hash);
285 24786 : pgStatLocal.shared_hash = NULL;
286 :
287 24786 : dsa_detach(pgStatLocal.dsa);
288 :
289 : /*
290 : * dsa_detach() does not decrement the DSA reference count as no segment
291 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
292 : * be registered. Hence, release it manually now.
293 : */
294 24786 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
295 :
296 24786 : pgStatLocal.dsa = NULL;
297 24786 : }
298 :
299 :
300 : /* ------------------------------------------------------------
301 : * Maintenance of shared memory stats entries
302 : * ------------------------------------------------------------
303 : */
304 :
305 : /*
306 : * Initialize entry newly-created.
307 : *
308 : * Returns NULL in the event of an allocation failure, so as callers can
309 : * take cleanup actions as the entry initialized is already inserted in the
310 : * shared hashtable.
311 : */
312 : PgStatShared_Common *
313 392543 : pgstat_init_entry(PgStat_Kind kind,
314 : PgStatShared_HashEntry *shhashent)
315 : {
316 : /* Create new stats entry. */
317 : dsa_pointer chunk;
318 : PgStatShared_Common *shheader;
319 392543 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
320 :
321 : /*
322 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
323 : * can't be freed before the initialization because it can't be found as
324 : * long as we hold the dshash partition lock. Caller needs to increase
325 : * further if a longer lived reference is needed.
326 : */
327 392543 : pg_atomic_init_u32(&shhashent->refcount, 1);
328 :
329 : /*
330 : * Initialize "generation" to 0, as freshly created.
331 : */
332 392543 : pg_atomic_init_u32(&shhashent->generation, 0);
333 392543 : shhashent->dropped = false;
334 :
335 392543 : chunk = dsa_allocate_extended(pgStatLocal.dsa,
336 392543 : kind_info->shared_size,
337 : DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
338 392543 : if (chunk == InvalidDsaPointer)
339 0 : return NULL;
340 :
341 392543 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
342 392543 : shheader->magic = 0xdeadbeef;
343 :
344 : /* Link the new entry from the hash entry. */
345 392543 : shhashent->body = chunk;
346 :
347 : /* Increment entry count, if required. */
348 392543 : if (kind_info->track_entry_count)
349 6 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
350 :
351 392543 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
352 :
353 392543 : return shheader;
354 : }
355 :
356 : static PgStatShared_Common *
357 29 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
358 : {
359 : PgStatShared_Common *shheader;
360 :
361 29 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
362 :
363 : /* mark as not dropped anymore */
364 29 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
365 :
366 : /*
367 : * Increment "generation", to let any backend with local references know
368 : * that what they point to is outdated.
369 : */
370 29 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
371 29 : shhashent->dropped = false;
372 :
373 : /* reinitialize content */
374 : Assert(shheader->magic == 0xdeadbeef);
375 29 : memset(pgstat_get_entry_data(kind, shheader), 0,
376 : pgstat_get_entry_len(kind));
377 :
378 29 : return shheader;
379 : }
380 :
381 : static void
382 4533450 : pgstat_setup_shared_refs(void)
383 : {
384 4533450 : if (likely(pgStatEntryRefHash != NULL))
385 4512734 : return;
386 :
387 20716 : pgStatEntryRefHash =
388 20716 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
389 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
390 20716 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
391 : Assert(pgStatSharedRefAge != 0);
392 : }
393 :
394 : /*
395 : * Helper function for pgstat_get_entry_ref().
396 : */
397 : static void
398 1131413 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
399 : PgStatShared_HashEntry *shhashent,
400 : PgStatShared_Common *shheader)
401 : {
402 : Assert(shheader->magic == 0xdeadbeef);
403 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
404 :
405 1131413 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
406 :
407 1131413 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
408 :
409 1131413 : entry_ref->shared_stats = shheader;
410 1131413 : entry_ref->shared_entry = shhashent;
411 1131413 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
412 1131413 : }
413 :
414 : /*
415 : * Helper function for pgstat_get_entry_ref().
416 : */
417 : static bool
418 4533450 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
419 : {
420 : bool found;
421 : PgStat_EntryRefHashEntry *cache_entry;
422 :
423 : /*
424 : * We immediately insert a cache entry, because it avoids 1) multiple
425 : * hashtable lookups in case of a cache miss 2) having to deal with
426 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
427 : */
428 :
429 4533450 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
430 :
431 4533450 : if (!found || !cache_entry->entry_ref)
432 1238989 : {
433 : PgStat_EntryRef *entry_ref;
434 :
435 1238989 : cache_entry->entry_ref = entry_ref =
436 1238989 : MemoryContextAlloc(pgStatSharedRefContext,
437 : sizeof(PgStat_EntryRef));
438 1238989 : entry_ref->shared_stats = NULL;
439 1238989 : entry_ref->shared_entry = NULL;
440 1238989 : entry_ref->pending = NULL;
441 :
442 1238989 : found = false;
443 : }
444 3294461 : else if (cache_entry->entry_ref->shared_stats == NULL)
445 : {
446 : Assert(cache_entry->entry_ref->pending == NULL);
447 0 : found = false;
448 : }
449 : else
450 : {
451 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
452 :
453 3294461 : entry_ref = cache_entry->entry_ref;
454 : Assert(entry_ref->shared_entry != NULL);
455 : Assert(entry_ref->shared_stats != NULL);
456 :
457 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
458 : /* should have at least our reference */
459 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
460 : }
461 :
462 4533450 : *entry_ref_p = cache_entry->entry_ref;
463 4533450 : return found;
464 : }
465 :
466 : /*
467 : * Get a shared stats reference. If create is true, the shared stats object is
468 : * created if it does not exist.
469 : *
470 : * When create is true, and created_entry is non-NULL, it'll be set to true
471 : * if the entry is newly created, false otherwise.
472 : */
473 : PgStat_EntryRef *
474 4533450 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
475 : bool *created_entry)
476 : {
477 4533450 : PgStat_HashKey key = {0};
478 : PgStatShared_HashEntry *shhashent;
479 4533450 : PgStatShared_Common *shheader = NULL;
480 : PgStat_EntryRef *entry_ref;
481 :
482 4533450 : key.kind = kind;
483 4533450 : key.dboid = dboid;
484 4533450 : key.objid = objid;
485 :
486 : /*
487 : * passing in created_entry only makes sense if we possibly could create
488 : * entry.
489 : */
490 : Assert(create || created_entry == NULL);
491 : pgstat_assert_is_up();
492 : Assert(pgStatLocal.shared_hash != NULL);
493 : Assert(!pgStatLocal.shmem->is_shutdown);
494 :
495 4533450 : pgstat_setup_memcxt();
496 4533450 : pgstat_setup_shared_refs();
497 :
498 4533450 : if (created_entry != NULL)
499 116 : *created_entry = false;
500 :
501 : /*
502 : * Check if other backends dropped stats that could not be deleted because
503 : * somebody held references to it. If so, check this backend's references.
504 : * This is not expected to happen often. The location of the check is a
505 : * bit random, but this is a relatively frequently called path, so better
506 : * than most.
507 : */
508 4533450 : if (pgstat_need_entry_refs_gc())
509 6652 : pgstat_gc_entry_refs();
510 :
511 : /*
512 : * First check the lookup cache hashtable in local memory. If we find a
513 : * match here we can avoid taking locks / causing contention.
514 : */
515 4533450 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
516 3294461 : return entry_ref;
517 :
518 : Assert(entry_ref != NULL);
519 :
520 : /*
521 : * Do a lookup in the hash table first - it's quite likely that the entry
522 : * already exists, and that way we only need a shared lock.
523 : */
524 1238989 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
525 :
526 1238989 : if (create && !shhashent)
527 : {
528 : bool shfound;
529 :
530 : /*
531 : * It's possible that somebody created the entry since the above
532 : * lookup. If so, fall through to the same path as if we'd have if it
533 : * already had been created before the dshash_find() calls.
534 : */
535 146058 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
536 146058 : if (!shfound)
537 : {
538 146058 : shheader = pgstat_init_entry(kind, shhashent);
539 146058 : if (shheader == NULL)
540 : {
541 : /*
542 : * Failed the allocation of a new entry, so clean up the
543 : * shared hashtable before giving up.
544 : */
545 0 : dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
546 :
547 0 : ereport(ERROR,
548 : (errcode(ERRCODE_OUT_OF_MEMORY),
549 : errmsg("out of memory"),
550 : errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
551 : key.kind, key.dboid, key.objid)));
552 : }
553 146058 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
554 :
555 146058 : if (created_entry != NULL)
556 51 : *created_entry = true;
557 :
558 146058 : return entry_ref;
559 : }
560 : }
561 :
562 1092931 : if (!shhashent)
563 : {
564 : /*
565 : * If we're not creating, delete the reference again. In all
566 : * likelihood it's just a stats lookup - no point wasting memory for a
567 : * shared ref to nothing...
568 : */
569 107539 : pgstat_release_entry_ref(key, entry_ref, false);
570 :
571 107539 : return NULL;
572 : }
573 : else
574 : {
575 : /*
576 : * Can get here either because dshash_find() found a match, or if
577 : * dshash_find_or_insert() found a concurrently inserted entry.
578 : */
579 :
580 985392 : if (shhashent->dropped && create)
581 : {
582 : /*
583 : * There are legitimate cases where the old stats entry might not
584 : * yet have been dropped by the time it's reused. The most obvious
585 : * case are replication slot stats, where a new slot can be
586 : * created with the same index just after dropping. But oid
587 : * wraparound can lead to other cases as well. We just reset the
588 : * stats to their plain state, while incrementing its "generation"
589 : * in the shared entry for any remaining local references.
590 : */
591 29 : shheader = pgstat_reinit_entry(kind, shhashent);
592 29 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
593 :
594 29 : if (created_entry != NULL)
595 0 : *created_entry = true;
596 :
597 29 : return entry_ref;
598 : }
599 985363 : else if (shhashent->dropped)
600 : {
601 37 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
602 37 : pgstat_release_entry_ref(key, entry_ref, false);
603 :
604 37 : return NULL;
605 : }
606 : else
607 : {
608 985326 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
609 985326 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
610 :
611 985326 : return entry_ref;
612 : }
613 : }
614 : }
615 :
616 : static void
617 1238989 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
618 : bool discard_pending)
619 : {
620 1238989 : if (entry_ref && entry_ref->pending)
621 : {
622 44839 : if (discard_pending)
623 44839 : pgstat_delete_pending_entry(entry_ref);
624 : else
625 0 : elog(ERROR, "releasing ref with pending data");
626 : }
627 :
628 1238989 : if (entry_ref && entry_ref->shared_stats)
629 : {
630 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
631 : Assert(entry_ref->pending == NULL);
632 :
633 : /*
634 : * This can't race with another backend looking up the stats entry and
635 : * increasing the refcount because it is not "legal" to create
636 : * additional references to dropped entries.
637 : */
638 1131413 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
639 : {
640 : PgStatShared_HashEntry *shent;
641 :
642 : /*
643 : * We're the last referrer to this entry, try to drop the shared
644 : * entry.
645 : */
646 :
647 : /* only dropped entries can reach a 0 refcount */
648 : Assert(entry_ref->shared_entry->dropped);
649 :
650 5311 : shent = dshash_find(pgStatLocal.shared_hash,
651 5311 : &entry_ref->shared_entry->key,
652 : true);
653 5311 : if (!shent)
654 0 : elog(ERROR, "could not find just referenced shared stats entry");
655 :
656 : /*
657 : * This entry may have been reinitialized while trying to release
658 : * it, so double-check that it has not been reused while holding a
659 : * lock on its shared entry.
660 : */
661 5311 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
662 5311 : entry_ref->generation)
663 : {
664 : /* Same "generation", so we're OK with the removal */
665 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
666 : Assert(entry_ref->shared_entry == shent);
667 5311 : pgstat_free_entry(shent, NULL);
668 : }
669 : else
670 : {
671 : /*
672 : * Shared stats entry has been reinitialized, so do not drop
673 : * its shared entry, only release its lock.
674 : */
675 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
676 : }
677 : }
678 : }
679 :
680 1238989 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
681 0 : elog(ERROR, "entry ref vanished before deletion");
682 :
683 1238989 : if (entry_ref)
684 1238989 : pfree(entry_ref);
685 1238989 : }
686 :
687 : /*
688 : * Acquire exclusive lock on the entry.
689 : *
690 : * If nowait is true, it's just a conditional acquire, and the result
691 : * *must* be checked to verify success.
692 : * If nowait is false, waits as necessary, always returning true.
693 : */
694 : bool
695 1468104 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
696 : {
697 1468104 : LWLock *lock = &entry_ref->shared_stats->lock;
698 :
699 1468104 : if (nowait)
700 404477 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
701 :
702 1063627 : LWLockAcquire(lock, LW_EXCLUSIVE);
703 1063627 : return true;
704 : }
705 :
706 : /*
707 : * Acquire shared lock on the entry.
708 : *
709 : * Separate from pgstat_lock_entry() as most callers will need to lock
710 : * exclusively. The wait semantics are identical.
711 : */
712 : bool
713 269196 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
714 : {
715 269196 : LWLock *lock = &entry_ref->shared_stats->lock;
716 :
717 269196 : if (nowait)
718 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
719 :
720 269196 : LWLockAcquire(lock, LW_SHARED);
721 269196 : return true;
722 : }
723 :
724 : void
725 1737298 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
726 : {
727 1737298 : LWLockRelease(&entry_ref->shared_stats->lock);
728 1737298 : }
729 :
730 : /*
731 : * Helper function to fetch and lock shared stats.
732 : */
733 : PgStat_EntryRef *
734 310026 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
735 : bool nowait)
736 : {
737 : PgStat_EntryRef *entry_ref;
738 :
739 : /* find shared table stats entry corresponding to the local entry */
740 310026 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
741 :
742 : /* lock the shared entry to protect the content, skip if failed */
743 310026 : if (!pgstat_lock_entry(entry_ref, nowait))
744 0 : return NULL;
745 :
746 310026 : return entry_ref;
747 : }
748 :
749 : void
750 2130 : pgstat_request_entry_refs_gc(void)
751 : {
752 2130 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
753 2130 : }
754 :
755 : static bool
756 4533450 : pgstat_need_entry_refs_gc(void)
757 : {
758 : uint64 curage;
759 :
760 4533450 : if (!pgStatEntryRefHash)
761 0 : return false;
762 :
763 : /* should have been initialized when creating pgStatEntryRefHash */
764 : Assert(pgStatSharedRefAge != 0);
765 :
766 4533450 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
767 :
768 4533450 : return pgStatSharedRefAge != curage;
769 : }
770 :
771 : static void
772 6652 : pgstat_gc_entry_refs(void)
773 : {
774 : pgstat_entry_ref_hash_iterator i;
775 : PgStat_EntryRefHashEntry *ent;
776 : uint64 curage;
777 :
778 6652 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
779 : Assert(curage != 0);
780 :
781 : /*
782 : * Some entries have been dropped or reinitialized. Invalidate cache
783 : * pointer to them.
784 : */
785 6652 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
786 501729 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
787 : {
788 495077 : PgStat_EntryRef *entry_ref = ent->entry_ref;
789 :
790 : Assert(!entry_ref->shared_stats ||
791 : entry_ref->shared_stats->magic == 0xdeadbeef);
792 :
793 : /*
794 : * "generation" checks for the case of entries being reinitialized,
795 : * and "dropped" for the case where these are.. dropped.
796 : */
797 495077 : if (!entry_ref->shared_entry->dropped &&
798 357441 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
799 357441 : entry_ref->generation)
800 357412 : continue;
801 :
802 : /* cannot gc shared ref that has pending data */
803 137665 : if (entry_ref->pending != NULL)
804 132360 : continue;
805 :
806 5305 : pgstat_release_entry_ref(ent->key, entry_ref, false);
807 : }
808 :
809 6652 : pgStatSharedRefAge = curage;
810 6652 : }
811 :
812 : static void
813 20757 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
814 : Datum match_data)
815 : {
816 : pgstat_entry_ref_hash_iterator i;
817 : PgStat_EntryRefHashEntry *ent;
818 :
819 20757 : if (pgStatEntryRefHash == NULL)
820 1 : return;
821 :
822 20756 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
823 :
824 1082634 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
825 1082634 : != NULL)
826 : {
827 : Assert(ent->entry_ref != NULL);
828 :
829 1061878 : if (match && !match(ent, match_data))
830 1242 : continue;
831 :
832 1060636 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
833 : }
834 : }
835 :
836 : /*
837 : * Release all local references to shared stats entries.
838 : *
839 : * When a process exits it cannot do so while still holding references onto
840 : * stats entries, otherwise the shared stats entries could never be freed.
841 : */
842 : static void
843 24786 : pgstat_release_all_entry_refs(bool discard_pending)
844 : {
845 24786 : if (pgStatEntryRefHash == NULL)
846 4070 : return;
847 :
848 20716 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
849 : Assert(pgStatEntryRefHash->members == 0);
850 20716 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
851 20716 : pgStatEntryRefHash = NULL;
852 : }
853 :
854 : static bool
855 1242 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
856 : {
857 1242 : Oid dboid = DatumGetObjectId(match_data);
858 :
859 1242 : return ent->key.dboid == dboid;
860 : }
861 :
862 : static void
863 41 : pgstat_release_db_entry_refs(Oid dboid)
864 : {
865 41 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
866 : match_db,
867 : ObjectIdGetDatum(dboid));
868 41 : }
869 :
870 :
871 : /* ------------------------------------------------------------
872 : * Dropping and resetting of stats entries
873 : * ------------------------------------------------------------
874 : */
875 :
876 : static void
877 71356 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
878 : {
879 : dsa_pointer pdsa;
880 71356 : PgStat_Kind kind = shent->key.kind;
881 :
882 : /*
883 : * Fetch dsa pointer before deleting entry - that way we can free the
884 : * memory after releasing the lock.
885 : */
886 71356 : pdsa = shent->body;
887 :
888 71356 : if (!hstat)
889 65598 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
890 : else
891 5758 : dshash_delete_current(hstat);
892 :
893 71356 : dsa_free(pgStatLocal.dsa, pdsa);
894 :
895 : /* Decrement entry count, if required. */
896 71356 : if (pgstat_get_kind_info(kind)->track_entry_count)
897 2 : pg_atomic_sub_fetch_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
898 71356 : }
899 :
900 : /*
901 : * Helper for both pgstat_drop_database_and_contents() and
902 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
903 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
904 : * case the entry needs to be already locked.
905 : */
906 : static bool
907 71385 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
908 : dshash_seq_status *hstat)
909 : {
910 : Assert(shent->body != InvalidDsaPointer);
911 :
912 : /* should already have released local reference */
913 71385 : if (pgStatEntryRefHash)
914 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
915 :
916 : /*
917 : * Signal that the entry is dropped - this will eventually cause other
918 : * backends to release their references.
919 : */
920 71385 : if (shent->dropped)
921 0 : elog(ERROR,
922 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
923 : pgstat_get_kind_info(shent->key.kind)->name,
924 : shent->key.dboid,
925 : shent->key.objid,
926 : pg_atomic_read_u32(&shent->refcount),
927 : pg_atomic_read_u32(&shent->generation));
928 71385 : shent->dropped = true;
929 :
930 : /* release refcount marking entry as not dropped */
931 71385 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
932 : {
933 66045 : pgstat_free_entry(shent, hstat);
934 66045 : return true;
935 : }
936 : else
937 : {
938 5340 : if (!hstat)
939 5340 : dshash_release_lock(pgStatLocal.shared_hash, shent);
940 5340 : return false;
941 : }
942 : }
943 :
944 : /*
945 : * Drop stats for the database and all the objects inside that database.
946 : */
947 : static void
948 41 : pgstat_drop_database_and_contents(Oid dboid)
949 : {
950 : dshash_seq_status hstat;
951 : PgStatShared_HashEntry *p;
952 41 : uint64 not_freed_count = 0;
953 :
954 : Assert(OidIsValid(dboid));
955 :
956 : Assert(pgStatLocal.shared_hash != NULL);
957 :
958 : /*
959 : * This backend might very well be the only backend holding a reference to
960 : * about-to-be-dropped entries. Ensure that we're not preventing it from
961 : * being cleaned up till later.
962 : *
963 : * Doing this separately from the dshash iteration below avoids having to
964 : * do so while holding a partition lock on the shared hashtable.
965 : */
966 41 : pgstat_release_db_entry_refs(dboid);
967 :
968 : /* some of the dshash entries are to be removed, take exclusive lock. */
969 41 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
970 16454 : while ((p = dshash_seq_next(&hstat)) != NULL)
971 : {
972 16413 : if (p->dropped)
973 1 : continue;
974 :
975 16412 : if (p->key.dboid != dboid)
976 10710 : continue;
977 :
978 5702 : if (!pgstat_drop_entry_internal(p, &hstat))
979 : {
980 : /*
981 : * Even statistics for a dropped database might currently be
982 : * accessed (consider e.g. database stats for pg_stat_database).
983 : */
984 0 : not_freed_count++;
985 : }
986 : }
987 41 : dshash_seq_term(&hstat);
988 :
989 : /*
990 : * If some of the stats data could not be freed, signal the reference
991 : * holders to run garbage collection of their cached pgStatLocal.shmem.
992 : */
993 41 : if (not_freed_count > 0)
994 0 : pgstat_request_entry_refs_gc();
995 41 : }
996 :
997 : /*
998 : * Drop a single stats entry.
999 : *
1000 : * This routine returns false if the stats entry of the dropped object could
1001 : * not be freed, true otherwise.
1002 : *
1003 : * The callers of this function should call pgstat_request_entry_refs_gc()
1004 : * if the stats entry could not be freed, to ensure that this entry's memory
1005 : * can be reclaimed later by a different backend calling
1006 : * pgstat_gc_entry_refs().
1007 : */
1008 : bool
1009 92771 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1010 : {
1011 92771 : PgStat_HashKey key = {0};
1012 : PgStatShared_HashEntry *shent;
1013 92771 : bool freed = true;
1014 :
1015 92771 : key.kind = kind;
1016 92771 : key.dboid = dboid;
1017 92771 : key.objid = objid;
1018 :
1019 : /* delete local reference */
1020 92771 : if (pgStatEntryRefHash)
1021 : {
1022 : PgStat_EntryRefHashEntry *lohashent =
1023 88696 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
1024 :
1025 88696 : if (lohashent)
1026 65472 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1027 : true);
1028 : }
1029 :
1030 : /* mark entry in shared hashtable as deleted, drop if possible */
1031 92771 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
1032 92771 : if (shent)
1033 : {
1034 65627 : freed = pgstat_drop_entry_internal(shent, NULL);
1035 :
1036 : /*
1037 : * Database stats contain other stats. Drop those as well when
1038 : * dropping the database. XXX: Perhaps this should be done in a
1039 : * slightly more principled way? But not obvious what that'd look
1040 : * like, and so far this is the only case...
1041 : */
1042 65627 : if (key.kind == PGSTAT_KIND_DATABASE)
1043 41 : pgstat_drop_database_and_contents(key.dboid);
1044 : }
1045 :
1046 92771 : return freed;
1047 : }
1048 :
1049 : /*
1050 : * Scan through the shared hashtable of stats, dropping statistics if
1051 : * approved by the optional do_drop() function.
1052 : */
1053 : void
1054 246 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1055 : Datum match_data)
1056 : {
1057 : dshash_seq_status hstat;
1058 : PgStatShared_HashEntry *ps;
1059 246 : uint64 not_freed_count = 0;
1060 :
1061 : /* entries are removed, take an exclusive lock */
1062 246 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1063 302 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1064 : {
1065 56 : if (ps->dropped)
1066 0 : continue;
1067 :
1068 56 : if (do_drop != NULL && !do_drop(ps, match_data))
1069 0 : continue;
1070 :
1071 : /* delete local reference */
1072 56 : if (pgStatEntryRefHash)
1073 : {
1074 : PgStat_EntryRefHashEntry *lohashent =
1075 0 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1076 :
1077 0 : if (lohashent)
1078 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1079 : true);
1080 : }
1081 :
1082 56 : if (!pgstat_drop_entry_internal(ps, &hstat))
1083 0 : not_freed_count++;
1084 : }
1085 246 : dshash_seq_term(&hstat);
1086 :
1087 246 : if (not_freed_count > 0)
1088 0 : pgstat_request_entry_refs_gc();
1089 246 : }
1090 :
1091 : /*
1092 : * Scan through the shared hashtable of stats and drop all entries.
1093 : */
1094 : void
1095 246 : pgstat_drop_all_entries(void)
1096 : {
1097 246 : pgstat_drop_matching_entries(NULL, 0);
1098 246 : }
1099 :
1100 : static void
1101 13081 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1102 : TimestampTz ts)
1103 : {
1104 13081 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1105 :
1106 13081 : memset(pgstat_get_entry_data(kind, header), 0,
1107 : pgstat_get_entry_len(kind));
1108 :
1109 13081 : if (kind_info->reset_timestamp_cb)
1110 13081 : kind_info->reset_timestamp_cb(header, ts);
1111 13081 : }
1112 :
1113 : /*
1114 : * Reset one variable-numbered stats entry.
1115 : */
1116 : void
1117 229 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1118 : {
1119 : PgStat_EntryRef *entry_ref;
1120 :
1121 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1122 :
1123 229 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1124 229 : if (!entry_ref || entry_ref->shared_entry->dropped)
1125 1 : return;
1126 :
1127 228 : (void) pgstat_lock_entry(entry_ref, false);
1128 228 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1129 228 : pgstat_unlock_entry(entry_ref);
1130 : }
1131 :
1132 : /*
1133 : * Scan through the shared hashtable of stats, resetting statistics if
1134 : * approved by the provided do_reset() function.
1135 : */
1136 : void
1137 19 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1138 : Datum match_data, TimestampTz ts)
1139 : {
1140 : dshash_seq_status hstat;
1141 : PgStatShared_HashEntry *p;
1142 :
1143 : /* dshash entry is not modified, take shared lock */
1144 19 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1145 18047 : while ((p = dshash_seq_next(&hstat)) != NULL)
1146 : {
1147 : PgStatShared_Common *header;
1148 :
1149 18028 : if (p->dropped)
1150 1 : continue;
1151 :
1152 18027 : if (!do_reset(p, match_data))
1153 5174 : continue;
1154 :
1155 12853 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1156 :
1157 12853 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1158 :
1159 12853 : shared_stat_reset_contents(p->key.kind, header, ts);
1160 :
1161 12853 : LWLockRelease(&header->lock);
1162 : }
1163 19 : dshash_seq_term(&hstat);
1164 19 : }
1165 :
1166 : static bool
1167 1574 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1168 : {
1169 1574 : return p->key.kind == DatumGetInt32(match_data);
1170 : }
1171 :
1172 : void
1173 4 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1174 : {
1175 4 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1176 4 : }
1177 :
1178 : static void
1179 4533450 : pgstat_setup_memcxt(void)
1180 : {
1181 4533450 : if (unlikely(!pgStatSharedRefContext))
1182 20716 : pgStatSharedRefContext =
1183 20716 : AllocSetContextCreate(TopMemoryContext,
1184 : "PgStat Shared Ref",
1185 : ALLOCSET_SMALL_SIZES);
1186 4533450 : if (unlikely(!pgStatEntryRefHashContext))
1187 20716 : pgStatEntryRefHashContext =
1188 20716 : AllocSetContextCreate(TopMemoryContext,
1189 : "PgStat Shared Ref Hash",
1190 : ALLOCSET_SMALL_SIZES);
1191 4533450 : }
|