Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "storage/subsystems.h"
18 : #include "utils/memutils.h"
19 : #include "utils/pgstat_internal.h"
20 :
21 :
22 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
23 :
24 : /* hash table entry for finding the PgStat_EntryRef for a key */
25 : typedef struct PgStat_EntryRefHashEntry
26 : {
27 : PgStat_HashKey key; /* hash key */
28 : char status; /* for simplehash use */
29 : PgStat_EntryRef *entry_ref;
30 : } PgStat_EntryRefHashEntry;
31 :
32 :
33 : /* for references to shared statistics entries */
34 : #define SH_PREFIX pgstat_entry_ref_hash
35 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
36 : #define SH_KEY_TYPE PgStat_HashKey
37 : #define SH_KEY key
38 : #define SH_HASH_KEY(tb, key) \
39 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
40 : #define SH_EQUAL(tb, a, b) \
41 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
42 : #define SH_SCOPE static inline
43 : #define SH_DEFINE
44 : #define SH_DECLARE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : static void pgstat_drop_database_and_contents(Oid dboid);
49 :
50 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
51 :
52 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
53 : static bool pgstat_need_entry_refs_gc(void);
54 : static void pgstat_gc_entry_refs(void);
55 : static void pgstat_release_all_entry_refs(bool discard_pending);
56 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
57 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
58 :
59 : static void pgstat_setup_memcxt(void);
60 :
61 : static void StatsShmemRequest(void *arg);
62 : static void StatsShmemInit(void *arg);
63 :
64 : const ShmemCallbacks StatsShmemCallbacks = {
65 : .request_fn = StatsShmemRequest,
66 : .init_fn = StatsShmemInit,
67 : };
68 :
69 : /* parameter for the shared hash */
70 : static const dshash_parameters dsh_params = {
71 : sizeof(PgStat_HashKey),
72 : sizeof(PgStatShared_HashEntry),
73 : pgstat_cmp_hash_key,
74 : pgstat_hash_hash_key,
75 : dshash_memcpy,
76 : LWTRANCHE_PGSTATS_HASH
77 : };
78 :
79 :
80 : /*
81 : * Backend local references to shared stats entries. If there are pending
82 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
83 : * list.
84 : *
85 : * When a stats entry is dropped each backend needs to release its reference
86 : * to it before the memory can be released. To trigger that
87 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
88 : * compares to their copy of pgStatSharedRefAge on a regular basis.
89 : */
90 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
91 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
92 :
93 : /*
94 : * Memory contexts containing the pgStatEntryRefHash table and the
95 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
96 : * track / attribute memory usage.
97 : */
98 : static MemoryContext pgStatSharedRefContext = NULL;
99 : static MemoryContext pgStatEntryRefHashContext = NULL;
100 :
101 :
102 : /* ------------------------------------------------------------
103 : * Public functions called from postmaster follow
104 : * ------------------------------------------------------------
105 : */
106 :
107 : /*
108 : * The size of the shared memory allocation for stats stored in the shared
109 : * stats hash table. This allocation will be done as part of the main shared
110 : * memory, rather than dynamic shared memory, allowing it to be initialized in
111 : * postmaster.
112 : */
113 : static Size
114 3693 : pgstat_dsa_init_size(void)
115 : {
116 : Size sz;
117 :
118 : /*
119 : * The dshash header / initial buckets array needs to fit into "plain"
120 : * shared memory, but it's beneficial to not need dsm segments
121 : * immediately. A size of 256kB seems works well and is not
122 : * disproportional compared to other constant sized shared memory
123 : * allocations. NB: To avoid DSMs further, the user can configure
124 : * min_dynamic_shared_memory.
125 : */
126 3693 : sz = 256 * 1024;
127 : Assert(dsa_minimum_size() <= sz);
128 3693 : return MAXALIGN(sz);
129 : }
130 :
131 : /*
132 : * Compute shared memory space needed for cumulative statistics
133 : */
134 : static Size
135 1233 : StatsShmemSize(void)
136 : {
137 : Size sz;
138 :
139 1233 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
140 1233 : sz = add_size(sz, pgstat_dsa_init_size());
141 :
142 : /* Add shared memory for all the custom fixed-numbered statistics */
143 12330 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
144 : {
145 11097 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
146 :
147 11097 : if (!kind_info)
148 11091 : continue;
149 6 : if (!kind_info->fixed_amount)
150 3 : continue;
151 :
152 : Assert(kind_info->shared_size != 0);
153 :
154 3 : sz += MAXALIGN(kind_info->shared_size);
155 : }
156 :
157 1233 : return sz;
158 : }
159 :
160 : /*
161 : * Register shared memory area for cumulative statistics
162 : */
163 : static void
164 1233 : StatsShmemRequest(void *arg)
165 : {
166 1233 : ShmemRequestStruct(.name = "Shared Memory Stats",
167 : .size = StatsShmemSize(),
168 : .ptr = (void **) &pgStatLocal.shmem,
169 : );
170 1233 : }
171 :
172 : /*
173 : * Initialize cumulative statistics system during startup
174 : */
175 : static void
176 1230 : StatsShmemInit(void *arg)
177 : {
178 : dsa_area *dsa;
179 : dshash_table *dsh;
180 1230 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
181 1230 : char *p = (char *) ctl;
182 :
183 : /* the allocation of pgStatLocal.shmem itself */
184 1230 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
185 :
186 : /*
187 : * Create a small dsa allocation in plain shared memory. This is required
188 : * because postmaster cannot use dsm segments. It also provides a small
189 : * efficiency win.
190 : */
191 1230 : ctl->raw_dsa_area = p;
192 1230 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
193 : pgstat_dsa_init_size(),
194 : LWTRANCHE_PGSTATS_DSA, NULL);
195 1230 : dsa_pin(dsa);
196 :
197 : /*
198 : * To ensure dshash is created in "plain" shared memory, temporarily limit
199 : * size of dsa to the initial size of the dsa.
200 : */
201 1230 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
202 :
203 : /*
204 : * With the limit in place, create the dshash table. XXX: It'd be nice if
205 : * there were dshash_create_in_place().
206 : */
207 1230 : dsh = dshash_create(dsa, &dsh_params, NULL);
208 1230 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
209 :
210 : /* lift limit set above */
211 1230 : dsa_set_size_limit(dsa, -1);
212 :
213 : /*
214 : * Postmaster will never access these again, thus free the local
215 : * dsa/dshash references.
216 : */
217 1230 : dshash_detach(dsh);
218 1230 : dsa_detach(dsa);
219 :
220 1230 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
221 :
222 : /* Do the per-kind initialization */
223 40590 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
224 : {
225 39360 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
226 : char *ptr;
227 :
228 39360 : if (!kind_info)
229 23364 : continue;
230 :
231 : /* initialize entry count tracking */
232 15996 : if (kind_info->track_entry_count)
233 3 : pg_atomic_init_u64(&ctl->entry_counts[kind - 1], 0);
234 :
235 : /* initialize fixed-numbered stats */
236 15996 : if (kind_info->fixed_amount)
237 : {
238 8613 : if (pgstat_is_kind_builtin(kind))
239 8610 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
240 : else
241 : {
242 3 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
243 :
244 : Assert(kind_info->shared_size != 0);
245 3 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
246 3 : ptr = ctl->custom_data[idx];
247 : }
248 :
249 8613 : kind_info->init_shmem_cb(ptr);
250 : }
251 : }
252 1230 : }
253 :
254 : void
255 24826 : pgstat_attach_shmem(void)
256 : {
257 : MemoryContext oldcontext;
258 :
259 : Assert(pgStatLocal.dsa == NULL);
260 :
261 : /* stats shared memory persists for the backend lifetime */
262 24826 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
263 :
264 24826 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
265 : NULL);
266 24826 : dsa_pin_mapping(pgStatLocal.dsa);
267 :
268 49652 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
269 24826 : pgStatLocal.shmem->hash_handle,
270 : NULL);
271 :
272 24826 : MemoryContextSwitchTo(oldcontext);
273 24826 : }
274 :
275 : void
276 24826 : pgstat_detach_shmem(void)
277 : {
278 : Assert(pgStatLocal.dsa);
279 :
280 : /* we shouldn't leave references to shared stats */
281 24826 : pgstat_release_all_entry_refs(false);
282 :
283 24826 : dshash_detach(pgStatLocal.shared_hash);
284 24826 : pgStatLocal.shared_hash = NULL;
285 :
286 24826 : dsa_detach(pgStatLocal.dsa);
287 :
288 : /*
289 : * dsa_detach() does not decrement the DSA reference count as no segment
290 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
291 : * be registered. Hence, release it manually now.
292 : */
293 24826 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
294 :
295 24826 : pgStatLocal.dsa = NULL;
296 24826 : }
297 :
298 :
299 : /* ------------------------------------------------------------
300 : * Maintenance of shared memory stats entries
301 : * ------------------------------------------------------------
302 : */
303 :
304 : /*
305 : * Initialize entry newly-created.
306 : *
307 : * Returns NULL in the event of an allocation failure, so as callers can
308 : * take cleanup actions as the entry initialized is already inserted in the
309 : * shared hashtable.
310 : */
311 : PgStatShared_Common *
312 392214 : pgstat_init_entry(PgStat_Kind kind,
313 : PgStatShared_HashEntry *shhashent)
314 : {
315 : /* Create new stats entry. */
316 : dsa_pointer chunk;
317 : PgStatShared_Common *shheader;
318 392214 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
319 :
320 : /*
321 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
322 : * can't be freed before the initialization because it can't be found as
323 : * long as we hold the dshash partition lock. Caller needs to increase
324 : * further if a longer lived reference is needed.
325 : */
326 392214 : pg_atomic_init_u32(&shhashent->refcount, 1);
327 :
328 : /*
329 : * Initialize "generation" to 0, as freshly created.
330 : */
331 392214 : pg_atomic_init_u32(&shhashent->generation, 0);
332 392214 : shhashent->dropped = false;
333 :
334 392214 : chunk = dsa_allocate_extended(pgStatLocal.dsa,
335 392214 : kind_info->shared_size,
336 : DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
337 392214 : if (chunk == InvalidDsaPointer)
338 0 : return NULL;
339 :
340 392214 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
341 392214 : shheader->magic = 0xdeadbeef;
342 :
343 : /* Link the new entry from the hash entry. */
344 392214 : shhashent->body = chunk;
345 :
346 : /* Increment entry count, if required. */
347 392214 : if (kind_info->track_entry_count)
348 6 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
349 :
350 392214 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
351 :
352 392214 : return shheader;
353 : }
354 :
355 : static PgStatShared_Common *
356 29 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
357 : {
358 : PgStatShared_Common *shheader;
359 :
360 29 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
361 :
362 : /* mark as not dropped anymore */
363 29 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
364 :
365 : /*
366 : * Increment "generation", to let any backend with local references know
367 : * that what they point to is outdated.
368 : */
369 29 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
370 29 : shhashent->dropped = false;
371 :
372 : /* reinitialize content */
373 : Assert(shheader->magic == 0xdeadbeef);
374 29 : memset(pgstat_get_entry_data(kind, shheader), 0,
375 : pgstat_get_entry_len(kind));
376 :
377 29 : return shheader;
378 : }
379 :
380 : static void
381 4561907 : pgstat_setup_shared_refs(void)
382 : {
383 4561907 : if (likely(pgStatEntryRefHash != NULL))
384 4541146 : return;
385 :
386 20761 : pgStatEntryRefHash =
387 20761 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
388 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
389 20761 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
390 : Assert(pgStatSharedRefAge != 0);
391 : }
392 :
393 : /*
394 : * Helper function for pgstat_get_entry_ref().
395 : */
396 : static void
397 1140115 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
398 : PgStatShared_HashEntry *shhashent,
399 : PgStatShared_Common *shheader)
400 : {
401 : Assert(shheader->magic == 0xdeadbeef);
402 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
403 :
404 1140115 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
405 :
406 1140115 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
407 :
408 1140115 : entry_ref->shared_stats = shheader;
409 1140115 : entry_ref->shared_entry = shhashent;
410 1140115 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
411 1140115 : }
412 :
413 : /*
414 : * Helper function for pgstat_get_entry_ref().
415 : */
416 : static bool
417 4561907 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
418 : {
419 : bool found;
420 : PgStat_EntryRefHashEntry *cache_entry;
421 :
422 : /*
423 : * We immediately insert a cache entry, because it avoids 1) multiple
424 : * hashtable lookups in case of a cache miss 2) having to deal with
425 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
426 : */
427 :
428 4561907 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
429 :
430 4561907 : if (!found || !cache_entry->entry_ref)
431 1247097 : {
432 : PgStat_EntryRef *entry_ref;
433 :
434 1247097 : cache_entry->entry_ref = entry_ref =
435 1247097 : MemoryContextAlloc(pgStatSharedRefContext,
436 : sizeof(PgStat_EntryRef));
437 1247097 : entry_ref->shared_stats = NULL;
438 1247097 : entry_ref->shared_entry = NULL;
439 1247097 : entry_ref->pending = NULL;
440 :
441 1247097 : found = false;
442 : }
443 3314810 : else if (cache_entry->entry_ref->shared_stats == NULL)
444 : {
445 : Assert(cache_entry->entry_ref->pending == NULL);
446 0 : found = false;
447 : }
448 : else
449 : {
450 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
451 :
452 3314810 : entry_ref = cache_entry->entry_ref;
453 : Assert(entry_ref->shared_entry != NULL);
454 : Assert(entry_ref->shared_stats != NULL);
455 :
456 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
457 : /* should have at least our reference */
458 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
459 : }
460 :
461 4561907 : *entry_ref_p = cache_entry->entry_ref;
462 4561907 : return found;
463 : }
464 :
465 : /*
466 : * Get a shared stats reference. If create is true, the shared stats object is
467 : * created if it does not exist.
468 : *
469 : * When create is true, and created_entry is non-NULL, it'll be set to true
470 : * if the entry is newly created, false otherwise.
471 : */
472 : PgStat_EntryRef *
473 4561907 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
474 : bool *created_entry)
475 : {
476 4561907 : PgStat_HashKey key = {0};
477 : PgStatShared_HashEntry *shhashent;
478 4561907 : PgStatShared_Common *shheader = NULL;
479 : PgStat_EntryRef *entry_ref;
480 :
481 4561907 : key.kind = kind;
482 4561907 : key.dboid = dboid;
483 4561907 : key.objid = objid;
484 :
485 : /*
486 : * passing in created_entry only makes sense if we possibly could create
487 : * entry.
488 : */
489 : Assert(create || created_entry == NULL);
490 : pgstat_assert_is_up();
491 : Assert(pgStatLocal.shared_hash != NULL);
492 : Assert(!pgStatLocal.shmem->is_shutdown);
493 :
494 4561907 : pgstat_setup_memcxt();
495 4561907 : pgstat_setup_shared_refs();
496 :
497 4561907 : if (created_entry != NULL)
498 116 : *created_entry = false;
499 :
500 : /*
501 : * Check if other backends dropped stats that could not be deleted because
502 : * somebody held references to it. If so, check this backend's references.
503 : * This is not expected to happen often. The location of the check is a
504 : * bit random, but this is a relatively frequently called path, so better
505 : * than most.
506 : */
507 4561907 : if (pgstat_need_entry_refs_gc())
508 6245 : pgstat_gc_entry_refs();
509 :
510 : /*
511 : * First check the lookup cache hashtable in local memory. If we find a
512 : * match here we can avoid taking locks / causing contention.
513 : */
514 4561907 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
515 3314810 : return entry_ref;
516 :
517 : Assert(entry_ref != NULL);
518 :
519 : /*
520 : * Do a lookup in the hash table first - it's quite likely that the entry
521 : * already exists, and that way we only need a shared lock.
522 : */
523 1247097 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
524 :
525 1247097 : if (create && !shhashent)
526 : {
527 : bool shfound;
528 :
529 : /*
530 : * It's possible that somebody created the entry since the above
531 : * lookup. If so, fall through to the same path as if we'd have if it
532 : * already had been created before the dshash_find() calls.
533 : */
534 146117 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
535 146117 : if (!shfound)
536 : {
537 146116 : shheader = pgstat_init_entry(kind, shhashent);
538 146116 : if (shheader == NULL)
539 : {
540 : /*
541 : * Failed the allocation of a new entry, so clean up the
542 : * shared hashtable before giving up.
543 : */
544 0 : dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
545 :
546 0 : ereport(ERROR,
547 : (errcode(ERRCODE_OUT_OF_MEMORY),
548 : errmsg("out of memory"),
549 : errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
550 : key.kind, key.dboid, key.objid)));
551 : }
552 146116 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
553 :
554 146116 : if (created_entry != NULL)
555 51 : *created_entry = true;
556 :
557 146116 : return entry_ref;
558 : }
559 : }
560 :
561 1100981 : if (!shhashent)
562 : {
563 : /*
564 : * If we're not creating, delete the reference again. In all
565 : * likelihood it's just a stats lookup - no point wasting memory for a
566 : * shared ref to nothing...
567 : */
568 106945 : pgstat_release_entry_ref(key, entry_ref, false);
569 :
570 106945 : return NULL;
571 : }
572 : else
573 : {
574 : /*
575 : * Can get here either because dshash_find() found a match, or if
576 : * dshash_find_or_insert() found a concurrently inserted entry.
577 : */
578 :
579 994036 : if (shhashent->dropped && create)
580 : {
581 : /*
582 : * There are legitimate cases where the old stats entry might not
583 : * yet have been dropped by the time it's reused. The most obvious
584 : * case are replication slot stats, where a new slot can be
585 : * created with the same index just after dropping. But oid
586 : * wraparound can lead to other cases as well. We just reset the
587 : * stats to their plain state, while incrementing its "generation"
588 : * in the shared entry for any remaining local references.
589 : */
590 29 : shheader = pgstat_reinit_entry(kind, shhashent);
591 29 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
592 :
593 29 : if (created_entry != NULL)
594 0 : *created_entry = true;
595 :
596 29 : return entry_ref;
597 : }
598 994007 : else if (shhashent->dropped)
599 : {
600 37 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
601 37 : pgstat_release_entry_ref(key, entry_ref, false);
602 :
603 37 : return NULL;
604 : }
605 : else
606 : {
607 993970 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
608 993970 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
609 :
610 993970 : return entry_ref;
611 : }
612 : }
613 : }
614 :
615 : static void
616 1247097 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
617 : bool discard_pending)
618 : {
619 1247097 : if (entry_ref && entry_ref->pending)
620 : {
621 44284 : if (discard_pending)
622 44284 : pgstat_delete_pending_entry(entry_ref);
623 : else
624 0 : elog(ERROR, "releasing ref with pending data");
625 : }
626 :
627 1247097 : if (entry_ref && entry_ref->shared_stats)
628 : {
629 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
630 : Assert(entry_ref->pending == NULL);
631 :
632 : /*
633 : * This can't race with another backend looking up the stats entry and
634 : * increasing the refcount because it is not "legal" to create
635 : * additional references to dropped entries.
636 : */
637 1140115 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
638 : {
639 : PgStatShared_HashEntry *shent;
640 :
641 : /*
642 : * We're the last referrer to this entry, try to drop the shared
643 : * entry.
644 : */
645 :
646 : /* only dropped entries can reach a 0 refcount */
647 : Assert(entry_ref->shared_entry->dropped);
648 :
649 5035 : shent = dshash_find(pgStatLocal.shared_hash,
650 5035 : &entry_ref->shared_entry->key,
651 : true);
652 5035 : if (!shent)
653 0 : elog(ERROR, "could not find just referenced shared stats entry");
654 :
655 : /*
656 : * This entry may have been reinitialized while trying to release
657 : * it, so double-check that it has not been reused while holding a
658 : * lock on its shared entry.
659 : */
660 5035 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
661 5035 : entry_ref->generation)
662 : {
663 : /* Same "generation", so we're OK with the removal */
664 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
665 : Assert(entry_ref->shared_entry == shent);
666 5035 : pgstat_free_entry(shent, NULL);
667 : }
668 : else
669 : {
670 : /*
671 : * Shared stats entry has been reinitialized, so do not drop
672 : * its shared entry, only release its lock.
673 : */
674 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
675 : }
676 : }
677 : }
678 :
679 1247097 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
680 0 : elog(ERROR, "entry ref vanished before deletion");
681 :
682 1247097 : if (entry_ref)
683 1247097 : pfree(entry_ref);
684 1247097 : }
685 :
686 : /*
687 : * Acquire exclusive lock on the entry.
688 : *
689 : * If nowait is true, it's just a conditional acquire, and the result
690 : * *must* be checked to verify success.
691 : * If nowait is false, waits as necessary, always returning true.
692 : */
693 : bool
694 1473371 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
695 : {
696 1473371 : LWLock *lock = &entry_ref->shared_stats->lock;
697 :
698 1473371 : if (nowait)
699 403311 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
700 :
701 1070060 : LWLockAcquire(lock, LW_EXCLUSIVE);
702 1070060 : return true;
703 : }
704 :
705 : /*
706 : * Acquire shared lock on the entry.
707 : *
708 : * Separate from pgstat_lock_entry() as most callers will need to lock
709 : * exclusively. The wait semantics are identical.
710 : */
711 : bool
712 274250 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
713 : {
714 274250 : LWLock *lock = &entry_ref->shared_stats->lock;
715 :
716 274250 : if (nowait)
717 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
718 :
719 274250 : LWLockAcquire(lock, LW_SHARED);
720 274250 : return true;
721 : }
722 :
723 : void
724 1747614 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
725 : {
726 1747614 : LWLockRelease(&entry_ref->shared_stats->lock);
727 1747614 : }
728 :
729 : /*
730 : * Helper function to fetch and lock shared stats.
731 : */
732 : PgStat_EntryRef *
733 308978 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
734 : bool nowait)
735 : {
736 : PgStat_EntryRef *entry_ref;
737 :
738 : /* find shared table stats entry corresponding to the local entry */
739 308978 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
740 :
741 : /* lock the shared entry to protect the content, skip if failed */
742 308978 : if (!pgstat_lock_entry(entry_ref, nowait))
743 0 : return NULL;
744 :
745 308978 : return entry_ref;
746 : }
747 :
748 : void
749 2000 : pgstat_request_entry_refs_gc(void)
750 : {
751 2000 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
752 2000 : }
753 :
754 : static bool
755 4561907 : pgstat_need_entry_refs_gc(void)
756 : {
757 : uint64 curage;
758 :
759 4561907 : if (!pgStatEntryRefHash)
760 0 : return false;
761 :
762 : /* should have been initialized when creating pgStatEntryRefHash */
763 : Assert(pgStatSharedRefAge != 0);
764 :
765 4561907 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
766 :
767 4561907 : return pgStatSharedRefAge != curage;
768 : }
769 :
770 : static void
771 6245 : pgstat_gc_entry_refs(void)
772 : {
773 : pgstat_entry_ref_hash_iterator i;
774 : PgStat_EntryRefHashEntry *ent;
775 : uint64 curage;
776 :
777 6245 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
778 : Assert(curage != 0);
779 :
780 : /*
781 : * Some entries have been dropped or reinitialized. Invalidate cache
782 : * pointer to them.
783 : */
784 6245 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
785 478915 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
786 : {
787 472670 : PgStat_EntryRef *entry_ref = ent->entry_ref;
788 :
789 : Assert(!entry_ref->shared_stats ||
790 : entry_ref->shared_stats->magic == 0xdeadbeef);
791 :
792 : /*
793 : * "generation" checks for the case of entries being reinitialized,
794 : * and "dropped" for the case where these are.. dropped.
795 : */
796 472670 : if (!entry_ref->shared_entry->dropped &&
797 335374 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
798 335374 : entry_ref->generation)
799 335345 : continue;
800 :
801 : /* cannot gc shared ref that has pending data */
802 137325 : if (entry_ref->pending != NULL)
803 132673 : continue;
804 :
805 4652 : pgstat_release_entry_ref(ent->key, entry_ref, false);
806 : }
807 :
808 6245 : pgStatSharedRefAge = curage;
809 6245 : }
810 :
811 : static void
812 20802 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
813 : Datum match_data)
814 : {
815 : pgstat_entry_ref_hash_iterator i;
816 : PgStat_EntryRefHashEntry *ent;
817 :
818 20802 : if (pgStatEntryRefHash == NULL)
819 1 : return;
820 :
821 20801 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
822 :
823 1092545 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
824 1092545 : != NULL)
825 : {
826 : Assert(ent->entry_ref != NULL);
827 :
828 1071744 : if (match && !match(ent, match_data))
829 1242 : continue;
830 :
831 1070502 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
832 : }
833 : }
834 :
835 : /*
836 : * Release all local references to shared stats entries.
837 : *
838 : * When a process exits it cannot do so while still holding references onto
839 : * stats entries, otherwise the shared stats entries could never be freed.
840 : */
841 : static void
842 24826 : pgstat_release_all_entry_refs(bool discard_pending)
843 : {
844 24826 : if (pgStatEntryRefHash == NULL)
845 4065 : return;
846 :
847 20761 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
848 : Assert(pgStatEntryRefHash->members == 0);
849 20761 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
850 20761 : pgStatEntryRefHash = NULL;
851 : }
852 :
853 : static bool
854 1242 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
855 : {
856 1242 : Oid dboid = DatumGetObjectId(match_data);
857 :
858 1242 : return ent->key.dboid == dboid;
859 : }
860 :
861 : static void
862 41 : pgstat_release_db_entry_refs(Oid dboid)
863 : {
864 41 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
865 : match_db,
866 : ObjectIdGetDatum(dboid));
867 41 : }
868 :
869 :
870 : /* ------------------------------------------------------------
871 : * Dropping and resetting of stats entries
872 : * ------------------------------------------------------------
873 : */
874 :
875 : static void
876 70846 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
877 : {
878 : dsa_pointer pdsa;
879 70846 : PgStat_Kind kind = shent->key.kind;
880 :
881 : /*
882 : * Fetch dsa pointer before deleting entry - that way we can free the
883 : * memory after releasing the lock.
884 : */
885 70846 : pdsa = shent->body;
886 :
887 70846 : if (!hstat)
888 65087 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
889 : else
890 5759 : dshash_delete_current(hstat);
891 :
892 70846 : dsa_free(pgStatLocal.dsa, pdsa);
893 :
894 : /* Decrement entry count, if required. */
895 70846 : if (pgstat_get_kind_info(kind)->track_entry_count)
896 2 : pg_atomic_sub_fetch_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
897 70846 : }
898 :
899 : /*
900 : * Helper for both pgstat_drop_database_and_contents() and
901 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
902 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
903 : * case the entry needs to be already locked.
904 : */
905 : static bool
906 70875 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
907 : dshash_seq_status *hstat)
908 : {
909 : Assert(shent->body != InvalidDsaPointer);
910 :
911 : /* should already have released local reference */
912 70875 : if (pgStatEntryRefHash)
913 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
914 :
915 : /*
916 : * Signal that the entry is dropped - this will eventually cause other
917 : * backends to release their references.
918 : */
919 70875 : if (shent->dropped)
920 0 : elog(ERROR,
921 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
922 : pgstat_get_kind_info(shent->key.kind)->name,
923 : shent->key.dboid,
924 : shent->key.objid,
925 : pg_atomic_read_u32(&shent->refcount),
926 : pg_atomic_read_u32(&shent->generation));
927 70875 : shent->dropped = true;
928 :
929 : /* release refcount marking entry as not dropped */
930 70875 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
931 : {
932 65811 : pgstat_free_entry(shent, hstat);
933 65811 : return true;
934 : }
935 : else
936 : {
937 5064 : if (!hstat)
938 5064 : dshash_release_lock(pgStatLocal.shared_hash, shent);
939 5064 : return false;
940 : }
941 : }
942 :
943 : /*
944 : * Drop stats for the database and all the objects inside that database.
945 : */
946 : static void
947 41 : pgstat_drop_database_and_contents(Oid dboid)
948 : {
949 : dshash_seq_status hstat;
950 : PgStatShared_HashEntry *p;
951 41 : uint64 not_freed_count = 0;
952 :
953 : Assert(OidIsValid(dboid));
954 :
955 : Assert(pgStatLocal.shared_hash != NULL);
956 :
957 : /*
958 : * This backend might very well be the only backend holding a reference to
959 : * about-to-be-dropped entries. Ensure that we're not preventing it from
960 : * being cleaned up till later.
961 : *
962 : * Doing this separately from the dshash iteration below avoids having to
963 : * do so while holding a partition lock on the shared hashtable.
964 : */
965 41 : pgstat_release_db_entry_refs(dboid);
966 :
967 : /* some of the dshash entries are to be removed, take exclusive lock. */
968 41 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
969 16509 : while ((p = dshash_seq_next(&hstat)) != NULL)
970 : {
971 16468 : if (p->dropped)
972 1 : continue;
973 :
974 16467 : if (p->key.dboid != dboid)
975 10764 : continue;
976 :
977 5703 : if (!pgstat_drop_entry_internal(p, &hstat))
978 : {
979 : /*
980 : * Even statistics for a dropped database might currently be
981 : * accessed (consider e.g. database stats for pg_stat_database).
982 : */
983 0 : not_freed_count++;
984 : }
985 : }
986 41 : dshash_seq_term(&hstat);
987 :
988 : /*
989 : * If some of the stats data could not be freed, signal the reference
990 : * holders to run garbage collection of their cached pgStatLocal.shmem.
991 : */
992 41 : if (not_freed_count > 0)
993 0 : pgstat_request_entry_refs_gc();
994 41 : }
995 :
996 : /*
997 : * Drop a single stats entry.
998 : *
999 : * This routine returns false if the stats entry of the dropped object could
1000 : * not be freed, true otherwise.
1001 : *
1002 : * The callers of this function should call pgstat_request_entry_refs_gc()
1003 : * if the stats entry could not be freed, to ensure that this entry's memory
1004 : * can be reclaimed later by a different backend calling
1005 : * pgstat_gc_entry_refs().
1006 : */
1007 : bool
1008 92157 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1009 : {
1010 92157 : PgStat_HashKey key = {0};
1011 : PgStatShared_HashEntry *shent;
1012 92157 : bool freed = true;
1013 :
1014 92157 : key.kind = kind;
1015 92157 : key.dboid = dboid;
1016 92157 : key.objid = objid;
1017 :
1018 : /* delete local reference */
1019 92157 : if (pgStatEntryRefHash)
1020 : {
1021 : PgStat_EntryRefHashEntry *lohashent =
1022 88087 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
1023 :
1024 88087 : if (lohashent)
1025 64961 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1026 : true);
1027 : }
1028 :
1029 : /* mark entry in shared hashtable as deleted, drop if possible */
1030 92157 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
1031 92157 : if (shent)
1032 : {
1033 65116 : freed = pgstat_drop_entry_internal(shent, NULL);
1034 :
1035 : /*
1036 : * Database stats contain other stats. Drop those as well when
1037 : * dropping the database. XXX: Perhaps this should be done in a
1038 : * slightly more principled way? But not obvious what that'd look
1039 : * like, and so far this is the only case...
1040 : */
1041 65116 : if (key.kind == PGSTAT_KIND_DATABASE)
1042 41 : pgstat_drop_database_and_contents(key.dboid);
1043 : }
1044 :
1045 92157 : return freed;
1046 : }
1047 :
1048 : /*
1049 : * Scan through the shared hashtable of stats, dropping statistics if
1050 : * approved by the optional do_drop() function.
1051 : */
1052 : void
1053 246 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1054 : Datum match_data)
1055 : {
1056 : dshash_seq_status hstat;
1057 : PgStatShared_HashEntry *ps;
1058 246 : uint64 not_freed_count = 0;
1059 :
1060 : /* entries are removed, take an exclusive lock */
1061 246 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1062 302 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1063 : {
1064 56 : if (ps->dropped)
1065 0 : continue;
1066 :
1067 56 : if (do_drop != NULL && !do_drop(ps, match_data))
1068 0 : continue;
1069 :
1070 : /* delete local reference */
1071 56 : if (pgStatEntryRefHash)
1072 : {
1073 : PgStat_EntryRefHashEntry *lohashent =
1074 0 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1075 :
1076 0 : if (lohashent)
1077 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1078 : true);
1079 : }
1080 :
1081 56 : if (!pgstat_drop_entry_internal(ps, &hstat))
1082 0 : not_freed_count++;
1083 : }
1084 246 : dshash_seq_term(&hstat);
1085 :
1086 246 : if (not_freed_count > 0)
1087 0 : pgstat_request_entry_refs_gc();
1088 246 : }
1089 :
1090 : /*
1091 : * Scan through the shared hashtable of stats and drop all entries.
1092 : */
1093 : void
1094 246 : pgstat_drop_all_entries(void)
1095 : {
1096 246 : pgstat_drop_matching_entries(NULL, 0);
1097 246 : }
1098 :
1099 : static void
1100 13185 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1101 : TimestampTz ts)
1102 : {
1103 13185 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1104 :
1105 13185 : memset(pgstat_get_entry_data(kind, header), 0,
1106 : pgstat_get_entry_len(kind));
1107 :
1108 13185 : if (kind_info->reset_timestamp_cb)
1109 13185 : kind_info->reset_timestamp_cb(header, ts);
1110 13185 : }
1111 :
1112 : /*
1113 : * Reset one variable-numbered stats entry.
1114 : */
1115 : void
1116 229 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1117 : {
1118 : PgStat_EntryRef *entry_ref;
1119 :
1120 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1121 :
1122 229 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1123 229 : if (!entry_ref || entry_ref->shared_entry->dropped)
1124 1 : return;
1125 :
1126 228 : (void) pgstat_lock_entry(entry_ref, false);
1127 228 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1128 228 : pgstat_unlock_entry(entry_ref);
1129 : }
1130 :
1131 : /*
1132 : * Scan through the shared hashtable of stats, resetting statistics if
1133 : * approved by the provided do_reset() function.
1134 : */
1135 : void
1136 19 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1137 : Datum match_data, TimestampTz ts)
1138 : {
1139 : dshash_seq_status hstat;
1140 : PgStatShared_HashEntry *p;
1141 :
1142 : /* dshash entry is not modified, take shared lock */
1143 19 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1144 18157 : while ((p = dshash_seq_next(&hstat)) != NULL)
1145 : {
1146 : PgStatShared_Common *header;
1147 :
1148 18138 : if (p->dropped)
1149 1 : continue;
1150 :
1151 18137 : if (!do_reset(p, match_data))
1152 5180 : continue;
1153 :
1154 12957 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1155 :
1156 12957 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1157 :
1158 12957 : shared_stat_reset_contents(p->key.kind, header, ts);
1159 :
1160 12957 : LWLockRelease(&header->lock);
1161 : }
1162 19 : dshash_seq_term(&hstat);
1163 19 : }
1164 :
1165 : static bool
1166 1574 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1167 : {
1168 1574 : return p->key.kind == DatumGetInt32(match_data);
1169 : }
1170 :
1171 : void
1172 4 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1173 : {
1174 4 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1175 4 : }
1176 :
1177 : static void
1178 4561907 : pgstat_setup_memcxt(void)
1179 : {
1180 4561907 : if (unlikely(!pgStatSharedRefContext))
1181 20761 : pgStatSharedRefContext =
1182 20761 : AllocSetContextCreate(TopMemoryContext,
1183 : "PgStat Shared Ref",
1184 : ALLOCSET_SMALL_SIZES);
1185 4561907 : if (unlikely(!pgStatEntryRefHashContext))
1186 20761 : pgStatEntryRefHashContext =
1187 20761 : AllocSetContextCreate(TopMemoryContext,
1188 : "PgStat Shared Ref Hash",
1189 : ALLOCSET_SMALL_SIZES);
1190 4561907 : }
|