Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 10502 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 10502 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 10502 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 5132 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 5132 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 5132 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 5132 : return sz;
135 : }
136 :
137 : /*
138 : * Initialize cumulative statistics system during startup
139 : */
140 : void
141 1790 : StatsShmemInit(void)
142 : {
143 : bool found;
144 : Size sz;
145 :
146 1790 : sz = StatsShmemSize();
147 1790 : pgStatLocal.shmem = (PgStat_ShmemControl *)
148 1790 : ShmemInitStruct("Shared Memory Stats", sz, &found);
149 :
150 1790 : if (!IsUnderPostmaster)
151 : {
152 : dsa_area *dsa;
153 : dshash_table *dsh;
154 1790 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
155 1790 : char *p = (char *) ctl;
156 :
157 : Assert(!found);
158 :
159 : /* the allocation of pgStatLocal.shmem itself */
160 1790 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
161 :
162 : /*
163 : * Create a small dsa allocation in plain shared memory. This is
164 : * required because postmaster cannot use dsm segments. It also
165 : * provides a small efficiency win.
166 : */
167 1790 : ctl->raw_dsa_area = p;
168 1790 : p += MAXALIGN(pgstat_dsa_init_size());
169 1790 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
170 : pgstat_dsa_init_size(),
171 : LWTRANCHE_PGSTATS_DSA, 0);
172 1790 : dsa_pin(dsa);
173 :
174 : /*
175 : * To ensure dshash is created in "plain" shared memory, temporarily
176 : * limit size of dsa to the initial size of the dsa.
177 : */
178 1790 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
179 :
180 : /*
181 : * With the limit in place, create the dshash table. XXX: It'd be nice
182 : * if there were dshash_create_in_place().
183 : */
184 1790 : dsh = dshash_create(dsa, &dsh_params, NULL);
185 1790 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
186 :
187 : /* lift limit set above */
188 1790 : dsa_set_size_limit(dsa, -1);
189 :
190 : /*
191 : * Postmaster will never access these again, thus free the local
192 : * dsa/dshash references.
193 : */
194 1790 : dshash_detach(dsh);
195 1790 : dsa_detach(dsa);
196 :
197 1790 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
198 :
199 : /* initialize fixed-numbered stats */
200 21480 : for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
201 : {
202 19690 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
203 : char *ptr;
204 :
205 19690 : if (!kind_info->fixed_amount)
206 8950 : continue;
207 :
208 10740 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
209 10740 : kind_info->init_shmem_cb(ptr);
210 : }
211 : }
212 : else
213 : {
214 : Assert(found);
215 : }
216 1790 : }
217 :
218 : void
219 30132 : pgstat_attach_shmem(void)
220 : {
221 : MemoryContext oldcontext;
222 :
223 : Assert(pgStatLocal.dsa == NULL);
224 :
225 : /* stats shared memory persists for the backend lifetime */
226 30132 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
227 :
228 30132 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
229 : NULL);
230 30132 : dsa_pin_mapping(pgStatLocal.dsa);
231 :
232 60264 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
233 30132 : pgStatLocal.shmem->hash_handle, 0);
234 :
235 30132 : MemoryContextSwitchTo(oldcontext);
236 30132 : }
237 :
238 : void
239 30132 : pgstat_detach_shmem(void)
240 : {
241 : Assert(pgStatLocal.dsa);
242 :
243 : /* we shouldn't leave references to shared stats */
244 30132 : pgstat_release_all_entry_refs(false);
245 :
246 30132 : dshash_detach(pgStatLocal.shared_hash);
247 30132 : pgStatLocal.shared_hash = NULL;
248 :
249 30132 : dsa_detach(pgStatLocal.dsa);
250 :
251 : /*
252 : * dsa_detach() does not decrement the DSA reference count as no segment
253 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
254 : * be registered. Hence, release it manually now.
255 : */
256 30132 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
257 :
258 30132 : pgStatLocal.dsa = NULL;
259 30132 : }
260 :
261 :
262 : /* ------------------------------------------------------------
263 : * Maintenance of shared memory stats entries
264 : * ------------------------------------------------------------
265 : */
266 :
267 : PgStatShared_Common *
268 476456 : pgstat_init_entry(PgStat_Kind kind,
269 : PgStatShared_HashEntry *shhashent)
270 : {
271 : /* Create new stats entry. */
272 : dsa_pointer chunk;
273 : PgStatShared_Common *shheader;
274 :
275 : /*
276 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
277 : * can't be freed before the initialization because it can't be found as
278 : * long as we hold the dshash partition lock. Caller needs to increase
279 : * further if a longer lived reference is needed.
280 : */
281 476456 : pg_atomic_init_u32(&shhashent->refcount, 1);
282 476456 : shhashent->dropped = false;
283 :
284 476456 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
285 476456 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
286 476456 : shheader->magic = 0xdeadbeef;
287 :
288 : /* Link the new entry from the hash entry. */
289 476456 : shhashent->body = chunk;
290 :
291 476456 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
292 :
293 476456 : return shheader;
294 : }
295 :
296 : static PgStatShared_Common *
297 54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
298 : {
299 : PgStatShared_Common *shheader;
300 :
301 54 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
302 :
303 : /* mark as not dropped anymore */
304 54 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
305 54 : shhashent->dropped = false;
306 :
307 : /* reinitialize content */
308 : Assert(shheader->magic == 0xdeadbeef);
309 54 : memset(pgstat_get_entry_data(kind, shheader), 0,
310 : pgstat_get_entry_len(kind));
311 :
312 54 : return shheader;
313 : }
314 :
315 : static void
316 2898072 : pgstat_setup_shared_refs(void)
317 : {
318 2898072 : if (likely(pgStatEntryRefHash != NULL))
319 2872184 : return;
320 :
321 25888 : pgStatEntryRefHash =
322 25888 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
323 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
324 25888 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
325 : Assert(pgStatSharedRefAge != 0);
326 : }
327 :
328 : /*
329 : * Helper function for pgstat_get_entry_ref().
330 : */
331 : static void
332 1132472 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
333 : PgStatShared_HashEntry *shhashent,
334 : PgStatShared_Common *shheader)
335 : {
336 : Assert(shheader->magic == 0xdeadbeef);
337 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
338 :
339 1132472 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
340 :
341 1132472 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
342 :
343 1132472 : entry_ref->shared_stats = shheader;
344 1132472 : entry_ref->shared_entry = shhashent;
345 1132472 : }
346 :
347 : /*
348 : * Helper function for pgstat_get_entry_ref().
349 : */
350 : static bool
351 2898072 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
352 : {
353 : bool found;
354 : PgStat_EntryRefHashEntry *cache_entry;
355 :
356 : /*
357 : * We immediately insert a cache entry, because it avoids 1) multiple
358 : * hashtable lookups in case of a cache miss 2) having to deal with
359 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
360 : */
361 :
362 2898072 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
363 :
364 2898072 : if (!found || !cache_entry->entry_ref)
365 1269714 : {
366 : PgStat_EntryRef *entry_ref;
367 :
368 1269714 : cache_entry->entry_ref = entry_ref =
369 1269714 : MemoryContextAlloc(pgStatSharedRefContext,
370 : sizeof(PgStat_EntryRef));
371 1269714 : entry_ref->shared_stats = NULL;
372 1269714 : entry_ref->shared_entry = NULL;
373 1269714 : entry_ref->pending = NULL;
374 :
375 1269714 : found = false;
376 : }
377 1628358 : else if (cache_entry->entry_ref->shared_stats == NULL)
378 : {
379 : Assert(cache_entry->entry_ref->pending == NULL);
380 0 : found = false;
381 : }
382 : else
383 : {
384 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
385 :
386 1628358 : entry_ref = cache_entry->entry_ref;
387 : Assert(entry_ref->shared_entry != NULL);
388 : Assert(entry_ref->shared_stats != NULL);
389 :
390 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
391 : /* should have at least our reference */
392 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
393 : }
394 :
395 2898072 : *entry_ref_p = cache_entry->entry_ref;
396 2898072 : return found;
397 : }
398 :
399 : /*
400 : * Get a shared stats reference. If create is true, the shared stats object is
401 : * created if it does not exist.
402 : *
403 : * When create is true, and created_entry is non-NULL, it'll be set to true
404 : * if the entry is newly created, false otherwise.
405 : */
406 : PgStat_EntryRef *
407 2898072 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
408 : bool *created_entry)
409 : {
410 2898072 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
411 : PgStatShared_HashEntry *shhashent;
412 2898072 : PgStatShared_Common *shheader = NULL;
413 : PgStat_EntryRef *entry_ref;
414 :
415 : /*
416 : * passing in created_entry only makes sense if we possibly could create
417 : * entry.
418 : */
419 : Assert(create || created_entry == NULL);
420 : pgstat_assert_is_up();
421 : Assert(pgStatLocal.shared_hash != NULL);
422 : Assert(!pgStatLocal.shmem->is_shutdown);
423 :
424 2898072 : pgstat_setup_memcxt();
425 2898072 : pgstat_setup_shared_refs();
426 :
427 2898072 : if (created_entry != NULL)
428 214 : *created_entry = false;
429 :
430 : /*
431 : * Check if other backends dropped stats that could not be deleted because
432 : * somebody held references to it. If so, check this backend's references.
433 : * This is not expected to happen often. The location of the check is a
434 : * bit random, but this is a relatively frequently called path, so better
435 : * than most.
436 : */
437 2898072 : if (pgstat_need_entry_refs_gc())
438 11304 : pgstat_gc_entry_refs();
439 :
440 : /*
441 : * First check the lookup cache hashtable in local memory. If we find a
442 : * match here we can avoid taking locks / causing contention.
443 : */
444 2898072 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
445 1628358 : return entry_ref;
446 :
447 : Assert(entry_ref != NULL);
448 :
449 : /*
450 : * Do a lookup in the hash table first - it's quite likely that the entry
451 : * already exists, and that way we only need a shared lock.
452 : */
453 1269714 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
454 :
455 1269714 : if (create && !shhashent)
456 : {
457 : bool shfound;
458 :
459 : /*
460 : * It's possible that somebody created the entry since the above
461 : * lookup. If so, fall through to the same path as if we'd have if it
462 : * already had been created before the dshash_find() calls.
463 : */
464 167224 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
465 167224 : if (!shfound)
466 : {
467 167222 : shheader = pgstat_init_entry(kind, shhashent);
468 167222 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
469 :
470 167222 : if (created_entry != NULL)
471 96 : *created_entry = true;
472 :
473 167222 : return entry_ref;
474 : }
475 : }
476 :
477 1102492 : if (!shhashent)
478 : {
479 : /*
480 : * If we're not creating, delete the reference again. In all
481 : * likelihood it's just a stats lookup - no point wasting memory for a
482 : * shared ref to nothing...
483 : */
484 137168 : pgstat_release_entry_ref(key, entry_ref, false);
485 :
486 137168 : return NULL;
487 : }
488 : else
489 : {
490 : /*
491 : * Can get here either because dshash_find() found a match, or if
492 : * dshash_find_or_insert() found a concurrently inserted entry.
493 : */
494 :
495 965324 : if (shhashent->dropped && create)
496 : {
497 : /*
498 : * There are legitimate cases where the old stats entry might not
499 : * yet have been dropped by the time it's reused. The most obvious
500 : * case are replication slot stats, where a new slot can be
501 : * created with the same index just after dropping. But oid
502 : * wraparound can lead to other cases as well. We just reset the
503 : * stats to their plain state.
504 : */
505 54 : shheader = pgstat_reinit_entry(kind, shhashent);
506 54 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
507 :
508 54 : if (created_entry != NULL)
509 0 : *created_entry = true;
510 :
511 54 : return entry_ref;
512 : }
513 965270 : else if (shhashent->dropped)
514 : {
515 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
516 74 : pgstat_release_entry_ref(key, entry_ref, false);
517 :
518 74 : return NULL;
519 : }
520 : else
521 : {
522 965196 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
523 965196 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
524 :
525 965196 : return entry_ref;
526 : }
527 : }
528 : }
529 :
530 : static void
531 1269714 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
532 : bool discard_pending)
533 : {
534 1269714 : if (entry_ref && entry_ref->pending)
535 : {
536 61428 : if (discard_pending)
537 61428 : pgstat_delete_pending_entry(entry_ref);
538 : else
539 0 : elog(ERROR, "releasing ref with pending data");
540 : }
541 :
542 1269714 : if (entry_ref && entry_ref->shared_stats)
543 : {
544 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
545 : Assert(entry_ref->pending == NULL);
546 :
547 : /*
548 : * This can't race with another backend looking up the stats entry and
549 : * increasing the refcount because it is not "legal" to create
550 : * additional references to dropped entries.
551 : */
552 1132472 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
553 : {
554 : PgStatShared_HashEntry *shent;
555 :
556 : /*
557 : * We're the last referrer to this entry, try to drop the shared
558 : * entry.
559 : */
560 :
561 : /* only dropped entries can reach a 0 refcount */
562 : Assert(entry_ref->shared_entry->dropped);
563 :
564 9548 : shent = dshash_find(pgStatLocal.shared_hash,
565 9548 : &entry_ref->shared_entry->key,
566 : true);
567 9548 : if (!shent)
568 0 : elog(ERROR, "could not find just referenced shared stats entry");
569 :
570 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
571 : Assert(entry_ref->shared_entry == shent);
572 :
573 9548 : pgstat_free_entry(shent, NULL);
574 : }
575 : }
576 :
577 1269714 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
578 0 : elog(ERROR, "entry ref vanished before deletion");
579 :
580 1269714 : if (entry_ref)
581 1269714 : pfree(entry_ref);
582 1269714 : }
583 :
584 : bool
585 1362592 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
586 : {
587 1362592 : LWLock *lock = &entry_ref->shared_stats->lock;
588 :
589 1362592 : if (nowait)
590 501160 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
591 :
592 861432 : LWLockAcquire(lock, LW_EXCLUSIVE);
593 861432 : return true;
594 : }
595 :
596 : /*
597 : * Separate from pgstat_lock_entry() as most callers will need to lock
598 : * exclusively.
599 : */
600 : bool
601 11308 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
602 : {
603 11308 : LWLock *lock = &entry_ref->shared_stats->lock;
604 :
605 11308 : if (nowait)
606 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
607 :
608 11308 : LWLockAcquire(lock, LW_SHARED);
609 11308 : return true;
610 : }
611 :
612 : void
613 1373900 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
614 : {
615 1373900 : LWLockRelease(&entry_ref->shared_stats->lock);
616 1373900 : }
617 :
618 : /*
619 : * Helper function to fetch and lock shared stats.
620 : */
621 : PgStat_EntryRef *
622 44084 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
623 : bool nowait)
624 : {
625 : PgStat_EntryRef *entry_ref;
626 :
627 : /* find shared table stats entry corresponding to the local entry */
628 44084 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
629 :
630 : /* lock the shared entry to protect the content, skip if failed */
631 44084 : if (!pgstat_lock_entry(entry_ref, nowait))
632 0 : return NULL;
633 :
634 44084 : return entry_ref;
635 : }
636 :
637 : void
638 3726 : pgstat_request_entry_refs_gc(void)
639 : {
640 3726 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
641 3726 : }
642 :
643 : static bool
644 2898072 : pgstat_need_entry_refs_gc(void)
645 : {
646 : uint64 curage;
647 :
648 2898072 : if (!pgStatEntryRefHash)
649 0 : return false;
650 :
651 : /* should have been initialized when creating pgStatEntryRefHash */
652 : Assert(pgStatSharedRefAge != 0);
653 :
654 2898072 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
655 :
656 2898072 : return pgStatSharedRefAge != curage;
657 : }
658 :
659 : static void
660 11304 : pgstat_gc_entry_refs(void)
661 : {
662 : pgstat_entry_ref_hash_iterator i;
663 : PgStat_EntryRefHashEntry *ent;
664 : uint64 curage;
665 :
666 11304 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
667 : Assert(curage != 0);
668 :
669 : /*
670 : * Some entries have been dropped. Invalidate cache pointer to them.
671 : */
672 11304 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
673 869876 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
674 : {
675 858572 : PgStat_EntryRef *entry_ref = ent->entry_ref;
676 :
677 : Assert(!entry_ref->shared_stats ||
678 : entry_ref->shared_stats->magic == 0xdeadbeef);
679 :
680 858572 : if (!entry_ref->shared_entry->dropped)
681 589846 : continue;
682 :
683 : /* cannot gc shared ref that has pending data */
684 268726 : if (entry_ref->pending != NULL)
685 260310 : continue;
686 :
687 8416 : pgstat_release_entry_ref(ent->key, entry_ref, false);
688 : }
689 :
690 11304 : pgStatSharedRefAge = curage;
691 11304 : }
692 :
693 : static void
694 25932 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
695 : Datum match_data)
696 : {
697 : pgstat_entry_ref_hash_iterator i;
698 : PgStat_EntryRefHashEntry *ent;
699 :
700 25932 : if (pgStatEntryRefHash == NULL)
701 12 : return;
702 :
703 25920 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
704 :
705 1088794 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
706 : != NULL)
707 : {
708 : Assert(ent->entry_ref != NULL);
709 :
710 1062874 : if (match && !match(ent, match_data))
711 1020 : continue;
712 :
713 1061854 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
714 : }
715 : }
716 :
717 : /*
718 : * Release all local references to shared stats entries.
719 : *
720 : * When a process exits it cannot do so while still holding references onto
721 : * stats entries, otherwise the shared stats entries could never be freed.
722 : */
723 : static void
724 30132 : pgstat_release_all_entry_refs(bool discard_pending)
725 : {
726 30132 : if (pgStatEntryRefHash == NULL)
727 4244 : return;
728 :
729 25888 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
730 : Assert(pgStatEntryRefHash->members == 0);
731 25888 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
732 25888 : pgStatEntryRefHash = NULL;
733 : }
734 :
735 : static bool
736 1020 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
737 : {
738 1020 : Oid dboid = DatumGetObjectId(match_data);
739 :
740 1020 : return ent->key.dboid == dboid;
741 : }
742 :
743 : static void
744 44 : pgstat_release_db_entry_refs(Oid dboid)
745 : {
746 44 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
747 : match_db,
748 : ObjectIdGetDatum(dboid));
749 44 : }
750 :
751 :
752 : /* ------------------------------------------------------------
753 : * Dropping and resetting of stats entries
754 : * ------------------------------------------------------------
755 : */
756 :
757 : static void
758 66462 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
759 : {
760 : dsa_pointer pdsa;
761 :
762 : /*
763 : * Fetch dsa pointer before deleting entry - that way we can free the
764 : * memory after releasing the lock.
765 : */
766 66462 : pdsa = shent->body;
767 :
768 66462 : if (!hstat)
769 62374 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
770 : else
771 4088 : dshash_delete_current(hstat);
772 :
773 66462 : dsa_free(pgStatLocal.dsa, pdsa);
774 66462 : }
775 :
776 : /*
777 : * Helper for both pgstat_drop_database_and_contents() and
778 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
779 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
780 : * case the entry needs to be already locked.
781 : */
782 : static bool
783 66518 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
784 : dshash_seq_status *hstat)
785 : {
786 : Assert(shent->body != InvalidDsaPointer);
787 :
788 : /* should already have released local reference */
789 66518 : if (pgStatEntryRefHash)
790 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
791 :
792 : /*
793 : * Signal that the entry is dropped - this will eventually cause other
794 : * backends to release their references.
795 : */
796 66518 : if (shent->dropped)
797 0 : elog(ERROR,
798 : "trying to drop stats entry already dropped: kind=%s dboid=%u objoid=%u refcount=%u",
799 : pgstat_get_kind_info(shent->key.kind)->name,
800 : shent->key.dboid, shent->key.objoid,
801 : pg_atomic_read_u32(&shent->refcount));
802 66518 : shent->dropped = true;
803 :
804 : /* release refcount marking entry as not dropped */
805 66518 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
806 : {
807 56914 : pgstat_free_entry(shent, hstat);
808 56914 : return true;
809 : }
810 : else
811 : {
812 9604 : if (!hstat)
813 9604 : dshash_release_lock(pgStatLocal.shared_hash, shent);
814 9604 : return false;
815 : }
816 : }
817 :
818 : /*
819 : * Drop stats for the database and all the objects inside that database.
820 : */
821 : static void
822 44 : pgstat_drop_database_and_contents(Oid dboid)
823 : {
824 : dshash_seq_status hstat;
825 : PgStatShared_HashEntry *p;
826 44 : uint64 not_freed_count = 0;
827 :
828 : Assert(OidIsValid(dboid));
829 :
830 : Assert(pgStatLocal.shared_hash != NULL);
831 :
832 : /*
833 : * This backend might very well be the only backend holding a reference to
834 : * about-to-be-dropped entries. Ensure that we're not preventing it from
835 : * being cleaned up till later.
836 : *
837 : * Doing this separately from the dshash iteration below avoids having to
838 : * do so while holding a partition lock on the shared hashtable.
839 : */
840 44 : pgstat_release_db_entry_refs(dboid);
841 :
842 : /* some of the dshash entries are to be removed, take exclusive lock. */
843 44 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
844 15116 : while ((p = dshash_seq_next(&hstat)) != NULL)
845 : {
846 15072 : if (p->dropped)
847 2 : continue;
848 :
849 15070 : if (p->key.dboid != dboid)
850 11094 : continue;
851 :
852 3976 : if (!pgstat_drop_entry_internal(p, &hstat))
853 : {
854 : /*
855 : * Even statistics for a dropped database might currently be
856 : * accessed (consider e.g. database stats for pg_stat_database).
857 : */
858 0 : not_freed_count++;
859 : }
860 : }
861 44 : dshash_seq_term(&hstat);
862 :
863 : /*
864 : * If some of the stats data could not be freed, signal the reference
865 : * holders to run garbage collection of their cached pgStatShmLookupCache.
866 : */
867 44 : if (not_freed_count > 0)
868 0 : pgstat_request_entry_refs_gc();
869 44 : }
870 :
871 : /*
872 : * Drop a single stats entry.
873 : *
874 : * This routine returns false if the stats entry of the dropped object could
875 : * not be freed, true otherwise.
876 : *
877 : * The callers of this function should call pgstat_request_entry_refs_gc()
878 : * if the stats entry could not be freed, to ensure that this entry's memory
879 : * can be reclaimed later by a different backend calling
880 : * pgstat_gc_entry_refs().
881 : */
882 : bool
883 92526 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
884 : {
885 92526 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
886 : PgStatShared_HashEntry *shent;
887 92526 : bool freed = true;
888 :
889 : /* delete local reference */
890 92526 : if (pgStatEntryRefHash)
891 : {
892 : PgStat_EntryRefHashEntry *lohashent =
893 75470 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
894 :
895 75470 : if (lohashent)
896 62202 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
897 : true);
898 : }
899 :
900 : /* mark entry in shared hashtable as deleted, drop if possible */
901 92526 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
902 92526 : if (shent)
903 : {
904 62430 : freed = pgstat_drop_entry_internal(shent, NULL);
905 :
906 : /*
907 : * Database stats contain other stats. Drop those as well when
908 : * dropping the database. XXX: Perhaps this should be done in a
909 : * slightly more principled way? But not obvious what that'd look
910 : * like, and so far this is the only case...
911 : */
912 62430 : if (key.kind == PGSTAT_KIND_DATABASE)
913 44 : pgstat_drop_database_and_contents(key.dboid);
914 : }
915 :
916 92526 : return freed;
917 : }
918 :
919 : void
920 416 : pgstat_drop_all_entries(void)
921 : {
922 : dshash_seq_status hstat;
923 : PgStatShared_HashEntry *ps;
924 416 : uint64 not_freed_count = 0;
925 :
926 416 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
927 528 : while ((ps = dshash_seq_next(&hstat)) != NULL)
928 : {
929 112 : if (ps->dropped)
930 0 : continue;
931 :
932 112 : if (!pgstat_drop_entry_internal(ps, &hstat))
933 0 : not_freed_count++;
934 : }
935 416 : dshash_seq_term(&hstat);
936 :
937 416 : if (not_freed_count > 0)
938 0 : pgstat_request_entry_refs_gc();
939 416 : }
940 :
941 : static void
942 16476 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
943 : TimestampTz ts)
944 : {
945 16476 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
946 :
947 16476 : memset(pgstat_get_entry_data(kind, header), 0,
948 : pgstat_get_entry_len(kind));
949 :
950 16476 : if (kind_info->reset_timestamp_cb)
951 344 : kind_info->reset_timestamp_cb(header, ts);
952 16476 : }
953 :
954 : /*
955 : * Reset one variable-numbered stats entry.
956 : */
957 : void
958 316 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
959 : {
960 : PgStat_EntryRef *entry_ref;
961 :
962 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
963 :
964 316 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
965 316 : if (!entry_ref || entry_ref->shared_entry->dropped)
966 2 : return;
967 :
968 314 : (void) pgstat_lock_entry(entry_ref, false);
969 314 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
970 314 : pgstat_unlock_entry(entry_ref);
971 : }
972 :
973 : /*
974 : * Scan through the shared hashtable of stats, resetting statistics if
975 : * approved by the provided do_reset() function.
976 : */
977 : void
978 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
979 : Datum match_data, TimestampTz ts)
980 : {
981 : dshash_seq_status hstat;
982 : PgStatShared_HashEntry *p;
983 :
984 : /* dshash entry is not modified, take shared lock */
985 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
986 24322 : while ((p = dshash_seq_next(&hstat)) != NULL)
987 : {
988 : PgStatShared_Common *header;
989 :
990 24288 : if (p->dropped)
991 2 : continue;
992 :
993 24286 : if (!do_reset(p, match_data))
994 8124 : continue;
995 :
996 16162 : header = dsa_get_address(pgStatLocal.dsa, p->body);
997 :
998 16162 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
999 :
1000 16162 : shared_stat_reset_contents(p->key.kind, header, ts);
1001 :
1002 16162 : LWLockRelease(&header->lock);
1003 : }
1004 34 : dshash_seq_term(&hstat);
1005 34 : }
1006 :
1007 : static bool
1008 2916 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1009 : {
1010 2916 : return p->key.kind == DatumGetInt32(match_data);
1011 : }
1012 :
1013 : void
1014 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1015 : {
1016 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1017 8 : }
1018 :
1019 : static void
1020 2898072 : pgstat_setup_memcxt(void)
1021 : {
1022 2898072 : if (unlikely(!pgStatSharedRefContext))
1023 25888 : pgStatSharedRefContext =
1024 25888 : AllocSetContextCreate(TopMemoryContext,
1025 : "PgStat Shared Ref",
1026 : ALLOCSET_SMALL_SIZES);
1027 2898072 : if (unlikely(!pgStatEntryRefHashContext))
1028 25888 : pgStatEntryRefHashContext =
1029 25888 : AllocSetContextCreate(TopMemoryContext,
1030 : "PgStat Shared Ref Hash",
1031 : ALLOCSET_SMALL_SIZES);
1032 2898072 : }
|