Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 12468 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 12468 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 12468 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 6084 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 6084 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 6084 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 60840 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 54756 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 54756 : if (!kind_info)
140 54708 : continue;
141 48 : if (!kind_info->fixed_amount)
142 24 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 24 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 6084 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 2128 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 2128 : sz = StatsShmemSize();
162 2128 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 2128 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 2128 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 2128 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 2128 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 2128 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 2128 : ctl->raw_dsa_area = p;
183 2128 : p += MAXALIGN(pgstat_dsa_init_size());
184 2128 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
185 : pgstat_dsa_init_size(),
186 : LWTRANCHE_PGSTATS_DSA, NULL);
187 2128 : dsa_pin(dsa);
188 :
189 : /*
190 : * To ensure dshash is created in "plain" shared memory, temporarily
191 : * limit size of dsa to the initial size of the dsa.
192 : */
193 2128 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
194 :
195 : /*
196 : * With the limit in place, create the dshash table. XXX: It'd be nice
197 : * if there were dshash_create_in_place().
198 : */
199 2128 : dsh = dshash_create(dsa, &dsh_params, NULL);
200 2128 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
201 :
202 : /* lift limit set above */
203 2128 : dsa_set_size_limit(dsa, -1);
204 :
205 : /*
206 : * Postmaster will never access these again, thus free the local
207 : * dsa/dshash references.
208 : */
209 2128 : dshash_detach(dsh);
210 2128 : dsa_detach(dsa);
211 :
212 2128 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
213 :
214 : /* initialize fixed-numbered stats */
215 70224 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
216 : {
217 68096 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
218 : char *ptr;
219 :
220 68096 : if (!kind_info || !kind_info->fixed_amount)
221 55320 : continue;
222 :
223 12776 : if (pgstat_is_kind_builtin(kind))
224 12768 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
225 : else
226 : {
227 8 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
228 :
229 : Assert(kind_info->shared_size != 0);
230 8 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
231 8 : ptr = ctl->custom_data[idx];
232 : }
233 :
234 12776 : kind_info->init_shmem_cb(ptr);
235 : }
236 : }
237 : else
238 : {
239 : Assert(found);
240 : }
241 2128 : }
242 :
243 : void
244 43262 : pgstat_attach_shmem(void)
245 : {
246 : MemoryContext oldcontext;
247 :
248 : Assert(pgStatLocal.dsa == NULL);
249 :
250 : /* stats shared memory persists for the backend lifetime */
251 43262 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
252 :
253 43262 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
254 : NULL);
255 43262 : dsa_pin_mapping(pgStatLocal.dsa);
256 :
257 86524 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
258 43262 : pgStatLocal.shmem->hash_handle,
259 : NULL);
260 :
261 43262 : MemoryContextSwitchTo(oldcontext);
262 43262 : }
263 :
264 : void
265 43262 : pgstat_detach_shmem(void)
266 : {
267 : Assert(pgStatLocal.dsa);
268 :
269 : /* we shouldn't leave references to shared stats */
270 43262 : pgstat_release_all_entry_refs(false);
271 :
272 43262 : dshash_detach(pgStatLocal.shared_hash);
273 43262 : pgStatLocal.shared_hash = NULL;
274 :
275 43262 : dsa_detach(pgStatLocal.dsa);
276 :
277 : /*
278 : * dsa_detach() does not decrement the DSA reference count as no segment
279 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
280 : * be registered. Hence, release it manually now.
281 : */
282 43262 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
283 :
284 43262 : pgStatLocal.dsa = NULL;
285 43262 : }
286 :
287 :
288 : /* ------------------------------------------------------------
289 : * Maintenance of shared memory stats entries
290 : * ------------------------------------------------------------
291 : */
292 :
293 : PgStatShared_Common *
294 621640 : pgstat_init_entry(PgStat_Kind kind,
295 : PgStatShared_HashEntry *shhashent)
296 : {
297 : /* Create new stats entry. */
298 : dsa_pointer chunk;
299 : PgStatShared_Common *shheader;
300 :
301 : /*
302 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
303 : * can't be freed before the initialization because it can't be found as
304 : * long as we hold the dshash partition lock. Caller needs to increase
305 : * further if a longer lived reference is needed.
306 : */
307 621640 : pg_atomic_init_u32(&shhashent->refcount, 1);
308 :
309 : /*
310 : * Initialize "generation" to 0, as freshly created.
311 : */
312 621640 : pg_atomic_init_u32(&shhashent->generation, 0);
313 621640 : shhashent->dropped = false;
314 :
315 621640 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
316 621640 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
317 621640 : shheader->magic = 0xdeadbeef;
318 :
319 : /* Link the new entry from the hash entry. */
320 621640 : shhashent->body = chunk;
321 :
322 621640 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
323 :
324 621640 : return shheader;
325 : }
326 :
327 : static PgStatShared_Common *
328 60 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
329 : {
330 : PgStatShared_Common *shheader;
331 :
332 60 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
333 :
334 : /* mark as not dropped anymore */
335 60 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
336 :
337 : /*
338 : * Increment "generation", to let any backend with local references know
339 : * that what they point to is outdated.
340 : */
341 60 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
342 60 : shhashent->dropped = false;
343 :
344 : /* reinitialize content */
345 : Assert(shheader->magic == 0xdeadbeef);
346 60 : memset(pgstat_get_entry_data(kind, shheader), 0,
347 : pgstat_get_entry_len(kind));
348 :
349 60 : return shheader;
350 : }
351 :
352 : static void
353 7799706 : pgstat_setup_shared_refs(void)
354 : {
355 7799706 : if (likely(pgStatEntryRefHash != NULL))
356 7763184 : return;
357 :
358 36522 : pgStatEntryRefHash =
359 36522 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
360 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
361 36522 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
362 : Assert(pgStatSharedRefAge != 0);
363 : }
364 :
365 : /*
366 : * Helper function for pgstat_get_entry_ref().
367 : */
368 : static void
369 2038616 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
370 : PgStatShared_HashEntry *shhashent,
371 : PgStatShared_Common *shheader)
372 : {
373 : Assert(shheader->magic == 0xdeadbeef);
374 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
375 :
376 2038616 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
377 :
378 2038616 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
379 :
380 2038616 : entry_ref->shared_stats = shheader;
381 2038616 : entry_ref->shared_entry = shhashent;
382 2038616 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
383 2038616 : }
384 :
385 : /*
386 : * Helper function for pgstat_get_entry_ref().
387 : */
388 : static bool
389 7799706 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
390 : {
391 : bool found;
392 : PgStat_EntryRefHashEntry *cache_entry;
393 :
394 : /*
395 : * We immediately insert a cache entry, because it avoids 1) multiple
396 : * hashtable lookups in case of a cache miss 2) having to deal with
397 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
398 : */
399 :
400 7799706 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
401 :
402 7799706 : if (!found || !cache_entry->entry_ref)
403 2202572 : {
404 : PgStat_EntryRef *entry_ref;
405 :
406 2202572 : cache_entry->entry_ref = entry_ref =
407 2202572 : MemoryContextAlloc(pgStatSharedRefContext,
408 : sizeof(PgStat_EntryRef));
409 2202572 : entry_ref->shared_stats = NULL;
410 2202572 : entry_ref->shared_entry = NULL;
411 2202572 : entry_ref->pending = NULL;
412 :
413 2202572 : found = false;
414 : }
415 5597134 : else if (cache_entry->entry_ref->shared_stats == NULL)
416 : {
417 : Assert(cache_entry->entry_ref->pending == NULL);
418 0 : found = false;
419 : }
420 : else
421 : {
422 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
423 :
424 5597134 : entry_ref = cache_entry->entry_ref;
425 : Assert(entry_ref->shared_entry != NULL);
426 : Assert(entry_ref->shared_stats != NULL);
427 :
428 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
429 : /* should have at least our reference */
430 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
431 : }
432 :
433 7799706 : *entry_ref_p = cache_entry->entry_ref;
434 7799706 : return found;
435 : }
436 :
437 : /*
438 : * Get a shared stats reference. If create is true, the shared stats object is
439 : * created if it does not exist.
440 : *
441 : * When create is true, and created_entry is non-NULL, it'll be set to true
442 : * if the entry is newly created, false otherwise.
443 : */
444 : PgStat_EntryRef *
445 7799706 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
446 : bool *created_entry)
447 : {
448 : PgStat_HashKey key;
449 : PgStatShared_HashEntry *shhashent;
450 7799706 : PgStatShared_Common *shheader = NULL;
451 : PgStat_EntryRef *entry_ref;
452 :
453 : /* clear padding */
454 7799706 : memset(&key, 0, sizeof(struct PgStat_HashKey));
455 :
456 7799706 : key.kind = kind;
457 7799706 : key.dboid = dboid;
458 7799706 : key.objid = objid;
459 :
460 : /*
461 : * passing in created_entry only makes sense if we possibly could create
462 : * entry.
463 : */
464 : Assert(create || created_entry == NULL);
465 : pgstat_assert_is_up();
466 : Assert(pgStatLocal.shared_hash != NULL);
467 : Assert(!pgStatLocal.shmem->is_shutdown);
468 :
469 7799706 : pgstat_setup_memcxt();
470 7799706 : pgstat_setup_shared_refs();
471 :
472 7799706 : if (created_entry != NULL)
473 214 : *created_entry = false;
474 :
475 : /*
476 : * Check if other backends dropped stats that could not be deleted because
477 : * somebody held references to it. If so, check this backend's references.
478 : * This is not expected to happen often. The location of the check is a
479 : * bit random, but this is a relatively frequently called path, so better
480 : * than most.
481 : */
482 7799706 : if (pgstat_need_entry_refs_gc())
483 11654 : pgstat_gc_entry_refs();
484 :
485 : /*
486 : * First check the lookup cache hashtable in local memory. If we find a
487 : * match here we can avoid taking locks / causing contention.
488 : */
489 7799706 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
490 5597134 : return entry_ref;
491 :
492 : Assert(entry_ref != NULL);
493 :
494 : /*
495 : * Do a lookup in the hash table first - it's quite likely that the entry
496 : * already exists, and that way we only need a shared lock.
497 : */
498 2202572 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
499 :
500 2202572 : if (create && !shhashent)
501 : {
502 : bool shfound;
503 :
504 : /*
505 : * It's possible that somebody created the entry since the above
506 : * lookup. If so, fall through to the same path as if we'd have if it
507 : * already had been created before the dshash_find() calls.
508 : */
509 234626 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
510 234626 : if (!shfound)
511 : {
512 234624 : shheader = pgstat_init_entry(kind, shhashent);
513 234624 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
514 :
515 234624 : if (created_entry != NULL)
516 96 : *created_entry = true;
517 :
518 234624 : return entry_ref;
519 : }
520 : }
521 :
522 1967948 : if (!shhashent)
523 : {
524 : /*
525 : * If we're not creating, delete the reference again. In all
526 : * likelihood it's just a stats lookup - no point wasting memory for a
527 : * shared ref to nothing...
528 : */
529 163882 : pgstat_release_entry_ref(key, entry_ref, false);
530 :
531 163882 : return NULL;
532 : }
533 : else
534 : {
535 : /*
536 : * Can get here either because dshash_find() found a match, or if
537 : * dshash_find_or_insert() found a concurrently inserted entry.
538 : */
539 :
540 1804066 : if (shhashent->dropped && create)
541 : {
542 : /*
543 : * There are legitimate cases where the old stats entry might not
544 : * yet have been dropped by the time it's reused. The most obvious
545 : * case are replication slot stats, where a new slot can be
546 : * created with the same index just after dropping. But oid
547 : * wraparound can lead to other cases as well. We just reset the
548 : * stats to their plain state, while incrementing its "generation"
549 : * in the shared entry for any remaining local references.
550 : */
551 60 : shheader = pgstat_reinit_entry(kind, shhashent);
552 60 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
553 :
554 60 : if (created_entry != NULL)
555 0 : *created_entry = true;
556 :
557 60 : return entry_ref;
558 : }
559 1804006 : else if (shhashent->dropped)
560 : {
561 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
562 74 : pgstat_release_entry_ref(key, entry_ref, false);
563 :
564 74 : return NULL;
565 : }
566 : else
567 : {
568 1803932 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
569 1803932 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
570 :
571 1803932 : return entry_ref;
572 : }
573 : }
574 : }
575 :
576 : static void
577 2202572 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
578 : bool discard_pending)
579 : {
580 2202572 : if (entry_ref && entry_ref->pending)
581 : {
582 64452 : if (discard_pending)
583 64452 : pgstat_delete_pending_entry(entry_ref);
584 : else
585 0 : elog(ERROR, "releasing ref with pending data");
586 : }
587 :
588 2202572 : if (entry_ref && entry_ref->shared_stats)
589 : {
590 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
591 : Assert(entry_ref->pending == NULL);
592 :
593 : /*
594 : * This can't race with another backend looking up the stats entry and
595 : * increasing the refcount because it is not "legal" to create
596 : * additional references to dropped entries.
597 : */
598 2038616 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
599 : {
600 : PgStatShared_HashEntry *shent;
601 :
602 : /*
603 : * We're the last referrer to this entry, try to drop the shared
604 : * entry.
605 : */
606 :
607 : /* only dropped entries can reach a 0 refcount */
608 : Assert(entry_ref->shared_entry->dropped);
609 :
610 9366 : shent = dshash_find(pgStatLocal.shared_hash,
611 9366 : &entry_ref->shared_entry->key,
612 : true);
613 9366 : if (!shent)
614 0 : elog(ERROR, "could not find just referenced shared stats entry");
615 :
616 : /*
617 : * This entry may have been reinitialized while trying to release
618 : * it, so double-check that it has not been reused while holding a
619 : * lock on its shared entry.
620 : */
621 9366 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
622 9366 : entry_ref->generation)
623 : {
624 : /* Same "generation", so we're OK with the removal */
625 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
626 : Assert(entry_ref->shared_entry == shent);
627 9366 : pgstat_free_entry(shent, NULL);
628 : }
629 : else
630 : {
631 : /*
632 : * Shared stats entry has been reinitialized, so do not drop
633 : * its shared entry, only release its lock.
634 : */
635 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
636 : }
637 : }
638 : }
639 :
640 2202572 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
641 0 : elog(ERROR, "entry ref vanished before deletion");
642 :
643 2202572 : if (entry_ref)
644 2202572 : pfree(entry_ref);
645 2202572 : }
646 :
647 : /*
648 : * Acquire exclusive lock on the entry.
649 : *
650 : * If nowait is true, it's just a conditional acquire, and the result
651 : * *must* be checked to verify success.
652 : * If nowait is false, waits as necessary, always returning true.
653 : */
654 : bool
655 2526504 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
656 : {
657 2526504 : LWLock *lock = &entry_ref->shared_stats->lock;
658 :
659 2526504 : if (nowait)
660 664020 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
661 :
662 1862484 : LWLockAcquire(lock, LW_EXCLUSIVE);
663 1862484 : return true;
664 : }
665 :
666 : /*
667 : * Acquire shared lock on the entry.
668 : *
669 : * Separate from pgstat_lock_entry() as most callers will need to lock
670 : * exclusively. The wait semantics are identical.
671 : */
672 : bool
673 647114 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
674 : {
675 647114 : LWLock *lock = &entry_ref->shared_stats->lock;
676 :
677 647114 : if (nowait)
678 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
679 :
680 647114 : LWLockAcquire(lock, LW_SHARED);
681 647114 : return true;
682 : }
683 :
684 : void
685 3173576 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
686 : {
687 3173576 : LWLockRelease(&entry_ref->shared_stats->lock);
688 3173576 : }
689 :
690 : /*
691 : * Helper function to fetch and lock shared stats.
692 : */
693 : PgStat_EntryRef *
694 584982 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
695 : bool nowait)
696 : {
697 : PgStat_EntryRef *entry_ref;
698 :
699 : /* find shared table stats entry corresponding to the local entry */
700 584982 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
701 :
702 : /* lock the shared entry to protect the content, skip if failed */
703 584982 : if (!pgstat_lock_entry(entry_ref, nowait))
704 0 : return NULL;
705 :
706 584982 : return entry_ref;
707 : }
708 :
709 : void
710 3730 : pgstat_request_entry_refs_gc(void)
711 : {
712 3730 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
713 3730 : }
714 :
715 : static bool
716 7799706 : pgstat_need_entry_refs_gc(void)
717 : {
718 : uint64 curage;
719 :
720 7799706 : if (!pgStatEntryRefHash)
721 0 : return false;
722 :
723 : /* should have been initialized when creating pgStatEntryRefHash */
724 : Assert(pgStatSharedRefAge != 0);
725 :
726 7799706 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
727 :
728 7799706 : return pgStatSharedRefAge != curage;
729 : }
730 :
731 : static void
732 11654 : pgstat_gc_entry_refs(void)
733 : {
734 : pgstat_entry_ref_hash_iterator i;
735 : PgStat_EntryRefHashEntry *ent;
736 : uint64 curage;
737 :
738 11654 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
739 : Assert(curage != 0);
740 :
741 : /*
742 : * Some entries have been dropped or reinitialized. Invalidate cache
743 : * pointer to them.
744 : */
745 11654 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
746 879944 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
747 : {
748 868290 : PgStat_EntryRef *entry_ref = ent->entry_ref;
749 :
750 : Assert(!entry_ref->shared_stats ||
751 : entry_ref->shared_stats->magic == 0xdeadbeef);
752 :
753 : /*
754 : * "generation" checks for the case of entries being reinitialized,
755 : * and "dropped" for the case where these are.. dropped.
756 : */
757 868290 : if (!entry_ref->shared_entry->dropped &&
758 625072 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
759 625072 : entry_ref->generation)
760 625012 : continue;
761 :
762 : /* cannot gc shared ref that has pending data */
763 243278 : if (entry_ref->pending != NULL)
764 234118 : continue;
765 :
766 9160 : pgstat_release_entry_ref(ent->key, entry_ref, false);
767 : }
768 :
769 11654 : pgStatSharedRefAge = curage;
770 11654 : }
771 :
772 : static void
773 36592 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
774 : Datum match_data)
775 : {
776 : pgstat_entry_ref_hash_iterator i;
777 : PgStat_EntryRefHashEntry *ent;
778 :
779 36592 : if (pgStatEntryRefHash == NULL)
780 2 : return;
781 :
782 36590 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
783 :
784 1967160 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
785 1967160 : != NULL)
786 : {
787 : Assert(ent->entry_ref != NULL);
788 :
789 1930570 : if (match && !match(ent, match_data))
790 2048 : continue;
791 :
792 1928522 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
793 : }
794 : }
795 :
796 : /*
797 : * Release all local references to shared stats entries.
798 : *
799 : * When a process exits it cannot do so while still holding references onto
800 : * stats entries, otherwise the shared stats entries could never be freed.
801 : */
802 : static void
803 43262 : pgstat_release_all_entry_refs(bool discard_pending)
804 : {
805 43262 : if (pgStatEntryRefHash == NULL)
806 6740 : return;
807 :
808 36522 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
809 : Assert(pgStatEntryRefHash->members == 0);
810 36522 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
811 36522 : pgStatEntryRefHash = NULL;
812 : }
813 :
814 : static bool
815 2048 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
816 : {
817 2048 : Oid dboid = DatumGetObjectId(match_data);
818 :
819 2048 : return ent->key.dboid == dboid;
820 : }
821 :
822 : static void
823 70 : pgstat_release_db_entry_refs(Oid dboid)
824 : {
825 70 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
826 : match_db,
827 : ObjectIdGetDatum(dboid));
828 70 : }
829 :
830 :
831 : /* ------------------------------------------------------------
832 : * Dropping and resetting of stats entries
833 : * ------------------------------------------------------------
834 : */
835 :
836 : static void
837 109918 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
838 : {
839 : dsa_pointer pdsa;
840 :
841 : /*
842 : * Fetch dsa pointer before deleting entry - that way we can free the
843 : * memory after releasing the lock.
844 : */
845 109918 : pdsa = shent->body;
846 :
847 109918 : if (!hstat)
848 101152 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
849 : else
850 8766 : dshash_delete_current(hstat);
851 :
852 109918 : dsa_free(pgStatLocal.dsa, pdsa);
853 109918 : }
854 :
855 : /*
856 : * Helper for both pgstat_drop_database_and_contents() and
857 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
858 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
859 : * case the entry needs to be already locked.
860 : */
861 : static bool
862 109980 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
863 : dshash_seq_status *hstat)
864 : {
865 : Assert(shent->body != InvalidDsaPointer);
866 :
867 : /* should already have released local reference */
868 109980 : if (pgStatEntryRefHash)
869 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
870 :
871 : /*
872 : * Signal that the entry is dropped - this will eventually cause other
873 : * backends to release their references.
874 : */
875 109980 : if (shent->dropped)
876 0 : elog(ERROR,
877 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
878 : pgstat_get_kind_info(shent->key.kind)->name,
879 : shent->key.dboid,
880 : shent->key.objid,
881 : pg_atomic_read_u32(&shent->refcount),
882 : pg_atomic_read_u32(&shent->generation));
883 109980 : shent->dropped = true;
884 :
885 : /* release refcount marking entry as not dropped */
886 109980 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
887 : {
888 100552 : pgstat_free_entry(shent, hstat);
889 100552 : return true;
890 : }
891 : else
892 : {
893 9428 : if (!hstat)
894 9428 : dshash_release_lock(pgStatLocal.shared_hash, shent);
895 9428 : return false;
896 : }
897 : }
898 :
899 : /*
900 : * Drop stats for the database and all the objects inside that database.
901 : */
902 : static void
903 70 : pgstat_drop_database_and_contents(Oid dboid)
904 : {
905 : dshash_seq_status hstat;
906 : PgStatShared_HashEntry *p;
907 70 : uint64 not_freed_count = 0;
908 :
909 : Assert(OidIsValid(dboid));
910 :
911 : Assert(pgStatLocal.shared_hash != NULL);
912 :
913 : /*
914 : * This backend might very well be the only backend holding a reference to
915 : * about-to-be-dropped entries. Ensure that we're not preventing it from
916 : * being cleaned up till later.
917 : *
918 : * Doing this separately from the dshash iteration below avoids having to
919 : * do so while holding a partition lock on the shared hashtable.
920 : */
921 70 : pgstat_release_db_entry_refs(dboid);
922 :
923 : /* some of the dshash entries are to be removed, take exclusive lock. */
924 70 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
925 25994 : while ((p = dshash_seq_next(&hstat)) != NULL)
926 : {
927 25924 : if (p->dropped)
928 2 : continue;
929 :
930 25922 : if (p->key.dboid != dboid)
931 17270 : continue;
932 :
933 8652 : if (!pgstat_drop_entry_internal(p, &hstat))
934 : {
935 : /*
936 : * Even statistics for a dropped database might currently be
937 : * accessed (consider e.g. database stats for pg_stat_database).
938 : */
939 0 : not_freed_count++;
940 : }
941 : }
942 70 : dshash_seq_term(&hstat);
943 :
944 : /*
945 : * If some of the stats data could not be freed, signal the reference
946 : * holders to run garbage collection of their cached pgStatLocal.shmem.
947 : */
948 70 : if (not_freed_count > 0)
949 0 : pgstat_request_entry_refs_gc();
950 70 : }
951 :
952 : /*
953 : * Drop a single stats entry.
954 : *
955 : * This routine returns false if the stats entry of the dropped object could
956 : * not be freed, true otherwise.
957 : *
958 : * The callers of this function should call pgstat_request_entry_refs_gc()
959 : * if the stats entry could not be freed, to ensure that this entry's memory
960 : * can be reclaimed later by a different backend calling
961 : * pgstat_gc_entry_refs().
962 : */
963 : bool
964 146562 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
965 : {
966 : PgStat_HashKey key;
967 : PgStatShared_HashEntry *shent;
968 146562 : bool freed = true;
969 :
970 : /* clear padding */
971 146562 : memset(&key, 0, sizeof(struct PgStat_HashKey));
972 :
973 146562 : key.kind = kind;
974 146562 : key.dboid = dboid;
975 146562 : key.objid = objid;
976 :
977 : /* delete local reference */
978 146562 : if (pgStatEntryRefHash)
979 : {
980 : PgStat_EntryRefHashEntry *lohashent =
981 139812 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
982 :
983 139812 : if (lohashent)
984 100934 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
985 : true);
986 : }
987 :
988 : /* mark entry in shared hashtable as deleted, drop if possible */
989 146562 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
990 146562 : if (shent)
991 : {
992 101214 : freed = pgstat_drop_entry_internal(shent, NULL);
993 :
994 : /*
995 : * Database stats contain other stats. Drop those as well when
996 : * dropping the database. XXX: Perhaps this should be done in a
997 : * slightly more principled way? But not obvious what that'd look
998 : * like, and so far this is the only case...
999 : */
1000 101214 : if (key.kind == PGSTAT_KIND_DATABASE)
1001 70 : pgstat_drop_database_and_contents(key.dboid);
1002 : }
1003 :
1004 146562 : return freed;
1005 : }
1006 :
1007 : /*
1008 : * Scan through the shared hashtable of stats, dropping statistics if
1009 : * approved by the optional do_drop() function.
1010 : */
1011 : void
1012 460 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1013 : Datum match_data)
1014 : {
1015 : dshash_seq_status hstat;
1016 : PgStatShared_HashEntry *ps;
1017 460 : uint64 not_freed_count = 0;
1018 :
1019 : /* entries are removed, take an exclusive lock */
1020 460 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1021 638 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1022 : {
1023 178 : if (ps->dropped)
1024 0 : continue;
1025 :
1026 178 : if (do_drop != NULL && !do_drop(ps, match_data))
1027 64 : continue;
1028 :
1029 : /* delete local reference */
1030 114 : if (pgStatEntryRefHash)
1031 : {
1032 : PgStat_EntryRefHashEntry *lohashent =
1033 2 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1034 :
1035 2 : if (lohashent)
1036 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1037 : true);
1038 : }
1039 :
1040 114 : if (!pgstat_drop_entry_internal(ps, &hstat))
1041 0 : not_freed_count++;
1042 : }
1043 460 : dshash_seq_term(&hstat);
1044 :
1045 460 : if (not_freed_count > 0)
1046 0 : pgstat_request_entry_refs_gc();
1047 460 : }
1048 :
1049 : /*
1050 : * Scan through the shared hashtable of stats and drop all entries.
1051 : */
1052 : void
1053 458 : pgstat_drop_all_entries(void)
1054 : {
1055 458 : pgstat_drop_matching_entries(NULL, 0);
1056 458 : }
1057 :
1058 : static void
1059 17856 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1060 : TimestampTz ts)
1061 : {
1062 17856 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1063 :
1064 17856 : memset(pgstat_get_entry_data(kind, header), 0,
1065 : pgstat_get_entry_len(kind));
1066 :
1067 17856 : if (kind_info->reset_timestamp_cb)
1068 392 : kind_info->reset_timestamp_cb(header, ts);
1069 17856 : }
1070 :
1071 : /*
1072 : * Reset one variable-numbered stats entry.
1073 : */
1074 : void
1075 364 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1076 : {
1077 : PgStat_EntryRef *entry_ref;
1078 :
1079 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1080 :
1081 364 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1082 364 : if (!entry_ref || entry_ref->shared_entry->dropped)
1083 2 : return;
1084 :
1085 362 : (void) pgstat_lock_entry(entry_ref, false);
1086 362 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1087 362 : pgstat_unlock_entry(entry_ref);
1088 : }
1089 :
1090 : /*
1091 : * Scan through the shared hashtable of stats, resetting statistics if
1092 : * approved by the provided do_reset() function.
1093 : */
1094 : void
1095 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1096 : Datum match_data, TimestampTz ts)
1097 : {
1098 : dshash_seq_status hstat;
1099 : PgStatShared_HashEntry *p;
1100 :
1101 : /* dshash entry is not modified, take shared lock */
1102 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1103 26220 : while ((p = dshash_seq_next(&hstat)) != NULL)
1104 : {
1105 : PgStatShared_Common *header;
1106 :
1107 26186 : if (p->dropped)
1108 2 : continue;
1109 :
1110 26184 : if (!do_reset(p, match_data))
1111 8690 : continue;
1112 :
1113 17494 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1114 :
1115 17494 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1116 :
1117 17494 : shared_stat_reset_contents(p->key.kind, header, ts);
1118 :
1119 17494 : LWLockRelease(&header->lock);
1120 : }
1121 34 : dshash_seq_term(&hstat);
1122 34 : }
1123 :
1124 : static bool
1125 2964 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1126 : {
1127 2964 : return p->key.kind == DatumGetInt32(match_data);
1128 : }
1129 :
1130 : void
1131 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1132 : {
1133 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1134 8 : }
1135 :
1136 : static void
1137 7799706 : pgstat_setup_memcxt(void)
1138 : {
1139 7799706 : if (unlikely(!pgStatSharedRefContext))
1140 36522 : pgStatSharedRefContext =
1141 36522 : AllocSetContextCreate(TopMemoryContext,
1142 : "PgStat Shared Ref",
1143 : ALLOCSET_SMALL_SIZES);
1144 7799706 : if (unlikely(!pgStatEntryRefHashContext))
1145 36522 : pgStatEntryRefHashContext =
1146 36522 : AllocSetContextCreate(TopMemoryContext,
1147 : "PgStat Shared Ref Hash",
1148 : ALLOCSET_SMALL_SIZES);
1149 7799706 : }
|