Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 10370 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 10370 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 10370 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 5066 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 5066 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 5066 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 5066 : return sz;
135 : }
136 :
137 : /*
138 : * Initialize cumulative statistics system during startup
139 : */
140 : void
141 1768 : StatsShmemInit(void)
142 : {
143 : bool found;
144 : Size sz;
145 :
146 1768 : sz = StatsShmemSize();
147 1768 : pgStatLocal.shmem = (PgStat_ShmemControl *)
148 1768 : ShmemInitStruct("Shared Memory Stats", sz, &found);
149 :
150 1768 : if (!IsUnderPostmaster)
151 : {
152 : dsa_area *dsa;
153 : dshash_table *dsh;
154 1768 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
155 1768 : char *p = (char *) ctl;
156 :
157 : Assert(!found);
158 :
159 : /* the allocation of pgStatLocal.shmem itself */
160 1768 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
161 :
162 : /*
163 : * Create a small dsa allocation in plain shared memory. This is
164 : * required because postmaster cannot use dsm segments. It also
165 : * provides a small efficiency win.
166 : */
167 1768 : ctl->raw_dsa_area = p;
168 1768 : p += MAXALIGN(pgstat_dsa_init_size());
169 1768 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
170 : pgstat_dsa_init_size(),
171 : LWTRANCHE_PGSTATS_DSA, 0);
172 1768 : dsa_pin(dsa);
173 :
174 : /*
175 : * To ensure dshash is created in "plain" shared memory, temporarily
176 : * limit size of dsa to the initial size of the dsa.
177 : */
178 1768 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
179 :
180 : /*
181 : * With the limit in place, create the dshash table. XXX: It'd be nice
182 : * if there were dshash_create_in_place().
183 : */
184 1768 : dsh = dshash_create(dsa, &dsh_params, NULL);
185 1768 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
186 :
187 : /* lift limit set above */
188 1768 : dsa_set_size_limit(dsa, -1);
189 :
190 : /*
191 : * Postmaster will never access these again, thus free the local
192 : * dsa/dshash references.
193 : */
194 1768 : dshash_detach(dsh);
195 1768 : dsa_detach(dsa);
196 :
197 1768 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
198 :
199 :
200 : /* initialize fixed-numbered stats */
201 1768 : LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
202 1768 : LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
203 1768 : LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
204 1768 : LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
205 1768 : LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
206 :
207 30056 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
208 28288 : LWLockInitialize(&ctl->io.locks[i],
209 : LWTRANCHE_PGSTATS_DATA);
210 : }
211 : else
212 : {
213 : Assert(found);
214 : }
215 1768 : }
216 :
217 : void
218 29874 : pgstat_attach_shmem(void)
219 : {
220 : MemoryContext oldcontext;
221 :
222 : Assert(pgStatLocal.dsa == NULL);
223 :
224 : /* stats shared memory persists for the backend lifetime */
225 29874 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
226 :
227 29874 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
228 : NULL);
229 29874 : dsa_pin_mapping(pgStatLocal.dsa);
230 :
231 59748 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
232 29874 : pgStatLocal.shmem->hash_handle, 0);
233 :
234 29874 : MemoryContextSwitchTo(oldcontext);
235 29874 : }
236 :
237 : void
238 29874 : pgstat_detach_shmem(void)
239 : {
240 : Assert(pgStatLocal.dsa);
241 :
242 : /* we shouldn't leave references to shared stats */
243 29874 : pgstat_release_all_entry_refs(false);
244 :
245 29874 : dshash_detach(pgStatLocal.shared_hash);
246 29874 : pgStatLocal.shared_hash = NULL;
247 :
248 29874 : dsa_detach(pgStatLocal.dsa);
249 29874 : pgStatLocal.dsa = NULL;
250 29874 : }
251 :
252 :
253 : /* ------------------------------------------------------------
254 : * Maintenance of shared memory stats entries
255 : * ------------------------------------------------------------
256 : */
257 :
258 : PgStatShared_Common *
259 470484 : pgstat_init_entry(PgStat_Kind kind,
260 : PgStatShared_HashEntry *shhashent)
261 : {
262 : /* Create new stats entry. */
263 : dsa_pointer chunk;
264 : PgStatShared_Common *shheader;
265 :
266 : /*
267 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
268 : * can't be freed before the initialization because it can't be found as
269 : * long as we hold the dshash partition lock. Caller needs to increase
270 : * further if a longer lived reference is needed.
271 : */
272 470484 : pg_atomic_init_u32(&shhashent->refcount, 1);
273 470484 : shhashent->dropped = false;
274 :
275 470484 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
276 470484 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
277 470484 : shheader->magic = 0xdeadbeef;
278 :
279 : /* Link the new entry from the hash entry. */
280 470484 : shhashent->body = chunk;
281 :
282 470484 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
283 :
284 470484 : return shheader;
285 : }
286 :
287 : static PgStatShared_Common *
288 54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
289 : {
290 : PgStatShared_Common *shheader;
291 :
292 54 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
293 :
294 : /* mark as not dropped anymore */
295 54 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
296 54 : shhashent->dropped = false;
297 :
298 : /* reinitialize content */
299 : Assert(shheader->magic == 0xdeadbeef);
300 54 : memset(pgstat_get_entry_data(kind, shheader), 0,
301 : pgstat_get_entry_len(kind));
302 :
303 54 : return shheader;
304 : }
305 :
306 : static void
307 2908548 : pgstat_setup_shared_refs(void)
308 : {
309 2908548 : if (likely(pgStatEntryRefHash != NULL))
310 2882850 : return;
311 :
312 25698 : pgStatEntryRefHash =
313 25698 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
314 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
315 25698 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
316 : Assert(pgStatSharedRefAge != 0);
317 : }
318 :
319 : /*
320 : * Helper function for pgstat_get_entry_ref().
321 : */
322 : static void
323 1133924 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
324 : PgStatShared_HashEntry *shhashent,
325 : PgStatShared_Common *shheader)
326 : {
327 : Assert(shheader->magic == 0xdeadbeef);
328 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
329 :
330 1133924 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
331 :
332 1133924 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
333 :
334 1133924 : entry_ref->shared_stats = shheader;
335 1133924 : entry_ref->shared_entry = shhashent;
336 1133924 : }
337 :
338 : /*
339 : * Helper function for pgstat_get_entry_ref().
340 : */
341 : static bool
342 2908548 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
343 : {
344 : bool found;
345 : PgStat_EntryRefHashEntry *cache_entry;
346 :
347 : /*
348 : * We immediately insert a cache entry, because it avoids 1) multiple
349 : * hashtable lookups in case of a cache miss 2) having to deal with
350 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
351 : */
352 :
353 2908548 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
354 :
355 2908548 : if (!found || !cache_entry->entry_ref)
356 1273500 : {
357 : PgStat_EntryRef *entry_ref;
358 :
359 1273500 : cache_entry->entry_ref = entry_ref =
360 1273500 : MemoryContextAlloc(pgStatSharedRefContext,
361 : sizeof(PgStat_EntryRef));
362 1273500 : entry_ref->shared_stats = NULL;
363 1273500 : entry_ref->shared_entry = NULL;
364 1273500 : entry_ref->pending = NULL;
365 :
366 1273500 : found = false;
367 : }
368 1635048 : else if (cache_entry->entry_ref->shared_stats == NULL)
369 : {
370 : Assert(cache_entry->entry_ref->pending == NULL);
371 0 : found = false;
372 : }
373 : else
374 : {
375 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
376 :
377 1635048 : entry_ref = cache_entry->entry_ref;
378 : Assert(entry_ref->shared_entry != NULL);
379 : Assert(entry_ref->shared_stats != NULL);
380 :
381 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
382 : /* should have at least our reference */
383 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
384 : }
385 :
386 2908548 : *entry_ref_p = cache_entry->entry_ref;
387 2908548 : return found;
388 : }
389 :
390 : /*
391 : * Get a shared stats reference. If create is true, the shared stats object is
392 : * created if it does not exist.
393 : *
394 : * When create is true, and created_entry is non-NULL, it'll be set to true
395 : * if the entry is newly created, false otherwise.
396 : */
397 : PgStat_EntryRef *
398 2908548 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
399 : bool *created_entry)
400 : {
401 2908548 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
402 : PgStatShared_HashEntry *shhashent;
403 2908548 : PgStatShared_Common *shheader = NULL;
404 : PgStat_EntryRef *entry_ref;
405 :
406 : /*
407 : * passing in created_entry only makes sense if we possibly could create
408 : * entry.
409 : */
410 : Assert(create || created_entry == NULL);
411 : pgstat_assert_is_up();
412 : Assert(pgStatLocal.shared_hash != NULL);
413 : Assert(!pgStatLocal.shmem->is_shutdown);
414 :
415 2908548 : pgstat_setup_memcxt();
416 2908548 : pgstat_setup_shared_refs();
417 :
418 2908548 : if (created_entry != NULL)
419 214 : *created_entry = false;
420 :
421 : /*
422 : * Check if other backends dropped stats that could not be deleted because
423 : * somebody held references to it. If so, check this backend's references.
424 : * This is not expected to happen often. The location of the check is a
425 : * bit random, but this is a relatively frequently called path, so better
426 : * than most.
427 : */
428 2908548 : if (pgstat_need_entry_refs_gc())
429 11266 : pgstat_gc_entry_refs();
430 :
431 : /*
432 : * First check the lookup cache hashtable in local memory. If we find a
433 : * match here we can avoid taking locks / causing contention.
434 : */
435 2908548 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
436 1635048 : return entry_ref;
437 :
438 : Assert(entry_ref != NULL);
439 :
440 : /*
441 : * Do a lookup in the hash table first - it's quite likely that the entry
442 : * already exists, and that way we only need a shared lock.
443 : */
444 1273500 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
445 :
446 1273500 : if (create && !shhashent)
447 : {
448 : bool shfound;
449 :
450 : /*
451 : * It's possible that somebody created the entry since the above
452 : * lookup. If so, fall through to the same path as if we'd have if it
453 : * already had been created before the dshash_find() calls.
454 : */
455 167718 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
456 167718 : if (!shfound)
457 : {
458 167718 : shheader = pgstat_init_entry(kind, shhashent);
459 167718 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
460 :
461 167718 : if (created_entry != NULL)
462 96 : *created_entry = true;
463 :
464 167718 : return entry_ref;
465 : }
466 : }
467 :
468 1105782 : if (!shhashent)
469 : {
470 : /*
471 : * If we're not creating, delete the reference again. In all
472 : * likelihood it's just a stats lookup - no point wasting memory for a
473 : * shared ref to nothing...
474 : */
475 139502 : pgstat_release_entry_ref(key, entry_ref, false);
476 :
477 139502 : return NULL;
478 : }
479 : else
480 : {
481 : /*
482 : * Can get here either because dshash_find() found a match, or if
483 : * dshash_find_or_insert() found a concurrently inserted entry.
484 : */
485 :
486 966280 : if (shhashent->dropped && create)
487 : {
488 : /*
489 : * There are legitimate cases where the old stats entry might not
490 : * yet have been dropped by the time it's reused. The most obvious
491 : * case are replication slot stats, where a new slot can be
492 : * created with the same index just after dropping. But oid
493 : * wraparound can lead to other cases as well. We just reset the
494 : * stats to their plain state.
495 : */
496 54 : shheader = pgstat_reinit_entry(kind, shhashent);
497 54 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
498 :
499 54 : if (created_entry != NULL)
500 0 : *created_entry = true;
501 :
502 54 : return entry_ref;
503 : }
504 966226 : else if (shhashent->dropped)
505 : {
506 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
507 74 : pgstat_release_entry_ref(key, entry_ref, false);
508 :
509 74 : return NULL;
510 : }
511 : else
512 : {
513 966152 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
514 966152 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
515 :
516 966152 : return entry_ref;
517 : }
518 : }
519 : }
520 :
521 : static void
522 1273500 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
523 : bool discard_pending)
524 : {
525 1273500 : if (entry_ref && entry_ref->pending)
526 : {
527 61718 : if (discard_pending)
528 61718 : pgstat_delete_pending_entry(entry_ref);
529 : else
530 0 : elog(ERROR, "releasing ref with pending data");
531 : }
532 :
533 1273500 : if (entry_ref && entry_ref->shared_stats)
534 : {
535 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
536 : Assert(entry_ref->pending == NULL);
537 :
538 : /*
539 : * This can't race with another backend looking up the stats entry and
540 : * increasing the refcount because it is not "legal" to create
541 : * additional references to dropped entries.
542 : */
543 1133924 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
544 : {
545 : PgStatShared_HashEntry *shent;
546 :
547 : /*
548 : * We're the last referrer to this entry, try to drop the shared
549 : * entry.
550 : */
551 :
552 : /* only dropped entries can reach a 0 refcount */
553 : Assert(entry_ref->shared_entry->dropped);
554 :
555 9554 : shent = dshash_find(pgStatLocal.shared_hash,
556 9554 : &entry_ref->shared_entry->key,
557 : true);
558 9554 : if (!shent)
559 0 : elog(ERROR, "could not find just referenced shared stats entry");
560 :
561 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
562 : Assert(entry_ref->shared_entry == shent);
563 :
564 9554 : pgstat_free_entry(shent, NULL);
565 : }
566 : }
567 :
568 1273500 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
569 0 : elog(ERROR, "entry ref vanished before deletion");
570 :
571 1273500 : if (entry_ref)
572 1273500 : pfree(entry_ref);
573 1273500 : }
574 :
575 : bool
576 1359824 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
577 : {
578 1359824 : LWLock *lock = &entry_ref->shared_stats->lock;
579 :
580 1359824 : if (nowait)
581 500810 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
582 :
583 859014 : LWLockAcquire(lock, LW_EXCLUSIVE);
584 859014 : return true;
585 : }
586 :
587 : /*
588 : * Separate from pgstat_lock_entry() as most callers will need to lock
589 : * exclusively.
590 : */
591 : bool
592 22282 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
593 : {
594 22282 : LWLock *lock = &entry_ref->shared_stats->lock;
595 :
596 22282 : if (nowait)
597 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
598 :
599 22282 : LWLockAcquire(lock, LW_SHARED);
600 22282 : return true;
601 : }
602 :
603 : void
604 1382106 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
605 : {
606 1382106 : LWLockRelease(&entry_ref->shared_stats->lock);
607 1382106 : }
608 :
609 : /*
610 : * Helper function to fetch and lock shared stats.
611 : */
612 : PgStat_EntryRef *
613 43410 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
614 : bool nowait)
615 : {
616 : PgStat_EntryRef *entry_ref;
617 :
618 : /* find shared table stats entry corresponding to the local entry */
619 43410 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
620 :
621 : /* lock the shared entry to protect the content, skip if failed */
622 43410 : if (!pgstat_lock_entry(entry_ref, nowait))
623 0 : return NULL;
624 :
625 43410 : return entry_ref;
626 : }
627 :
628 : void
629 3622 : pgstat_request_entry_refs_gc(void)
630 : {
631 3622 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
632 3622 : }
633 :
634 : static bool
635 2908548 : pgstat_need_entry_refs_gc(void)
636 : {
637 : uint64 curage;
638 :
639 2908548 : if (!pgStatEntryRefHash)
640 0 : return false;
641 :
642 : /* should have been initialized when creating pgStatEntryRefHash */
643 : Assert(pgStatSharedRefAge != 0);
644 :
645 2908548 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
646 :
647 2908548 : return pgStatSharedRefAge != curage;
648 : }
649 :
650 : static void
651 11266 : pgstat_gc_entry_refs(void)
652 : {
653 : pgstat_entry_ref_hash_iterator i;
654 : PgStat_EntryRefHashEntry *ent;
655 : uint64 curage;
656 :
657 11266 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
658 : Assert(curage != 0);
659 :
660 : /*
661 : * Some entries have been dropped. Invalidate cache pointer to them.
662 : */
663 11266 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
664 870402 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
665 : {
666 859136 : PgStat_EntryRef *entry_ref = ent->entry_ref;
667 :
668 : Assert(!entry_ref->shared_stats ||
669 : entry_ref->shared_stats->magic == 0xdeadbeef);
670 :
671 859136 : if (!entry_ref->shared_entry->dropped)
672 609342 : continue;
673 :
674 : /* cannot gc shared ref that has pending data */
675 249794 : if (entry_ref->pending != NULL)
676 240428 : continue;
677 :
678 9366 : pgstat_release_entry_ref(ent->key, entry_ref, false);
679 : }
680 :
681 11266 : pgStatSharedRefAge = curage;
682 11266 : }
683 :
684 : static void
685 25742 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
686 : Datum match_data)
687 : {
688 : pgstat_entry_ref_hash_iterator i;
689 : PgStat_EntryRefHashEntry *ent;
690 :
691 25742 : if (pgStatEntryRefHash == NULL)
692 12 : return;
693 :
694 25730 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
695 :
696 1088826 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
697 : != NULL)
698 : {
699 : Assert(ent->entry_ref != NULL);
700 :
701 1063096 : if (match && !match(ent, match_data))
702 1020 : continue;
703 :
704 1062076 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
705 : }
706 : }
707 :
708 : /*
709 : * Release all local references to shared stats entries.
710 : *
711 : * When a process exits it cannot do so while still holding references onto
712 : * stats entries, otherwise the shared stats entries could never be freed.
713 : */
714 : static void
715 29874 : pgstat_release_all_entry_refs(bool discard_pending)
716 : {
717 29874 : if (pgStatEntryRefHash == NULL)
718 4176 : return;
719 :
720 25698 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
721 : Assert(pgStatEntryRefHash->members == 0);
722 25698 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
723 25698 : pgStatEntryRefHash = NULL;
724 : }
725 :
726 : static bool
727 1020 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
728 : {
729 1020 : Oid dboid = DatumGetObjectId(match_data);
730 :
731 1020 : return ent->key.dboid == dboid;
732 : }
733 :
734 : static void
735 44 : pgstat_release_db_entry_refs(Oid dboid)
736 : {
737 44 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
738 : match_db,
739 : ObjectIdGetDatum(dboid));
740 44 : }
741 :
742 :
743 : /* ------------------------------------------------------------
744 : * Dropping and resetting of stats entries
745 : * ------------------------------------------------------------
746 : */
747 :
748 : static void
749 66740 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
750 : {
751 : dsa_pointer pdsa;
752 :
753 : /*
754 : * Fetch dsa pointer before deleting entry - that way we can free the
755 : * memory after releasing the lock.
756 : */
757 66740 : pdsa = shent->body;
758 :
759 66740 : if (!hstat)
760 62652 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
761 : else
762 4088 : dshash_delete_current(hstat);
763 :
764 66740 : dsa_free(pgStatLocal.dsa, pdsa);
765 66740 : }
766 :
767 : /*
768 : * Helper for both pgstat_drop_database_and_contents() and
769 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
770 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
771 : * case the entry needs to be already locked.
772 : */
773 : static bool
774 66796 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
775 : dshash_seq_status *hstat)
776 : {
777 : Assert(shent->body != InvalidDsaPointer);
778 :
779 : /* should already have released local reference */
780 66796 : if (pgStatEntryRefHash)
781 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
782 :
783 : /*
784 : * Signal that the entry is dropped - this will eventually cause other
785 : * backends to release their references.
786 : */
787 66796 : if (shent->dropped)
788 0 : elog(ERROR, "can only drop stats once");
789 66796 : shent->dropped = true;
790 :
791 : /* release refcount marking entry as not dropped */
792 66796 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
793 : {
794 57186 : pgstat_free_entry(shent, hstat);
795 57186 : return true;
796 : }
797 : else
798 : {
799 9610 : if (!hstat)
800 9610 : dshash_release_lock(pgStatLocal.shared_hash, shent);
801 9610 : return false;
802 : }
803 : }
804 :
805 : /*
806 : * Drop stats for the database and all the objects inside that database.
807 : */
808 : static void
809 44 : pgstat_drop_database_and_contents(Oid dboid)
810 : {
811 : dshash_seq_status hstat;
812 : PgStatShared_HashEntry *p;
813 44 : uint64 not_freed_count = 0;
814 :
815 : Assert(OidIsValid(dboid));
816 :
817 : Assert(pgStatLocal.shared_hash != NULL);
818 :
819 : /*
820 : * This backend might very well be the only backend holding a reference to
821 : * about-to-be-dropped entries. Ensure that we're not preventing it from
822 : * being cleaned up till later.
823 : *
824 : * Doing this separately from the dshash iteration below avoids having to
825 : * do so while holding a partition lock on the shared hashtable.
826 : */
827 44 : pgstat_release_db_entry_refs(dboid);
828 :
829 : /* some of the dshash entries are to be removed, take exclusive lock. */
830 44 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
831 15116 : while ((p = dshash_seq_next(&hstat)) != NULL)
832 : {
833 15072 : if (p->dropped)
834 2 : continue;
835 :
836 15070 : if (p->key.dboid != dboid)
837 11094 : continue;
838 :
839 3976 : if (!pgstat_drop_entry_internal(p, &hstat))
840 : {
841 : /*
842 : * Even statistics for a dropped database might currently be
843 : * accessed (consider e.g. database stats for pg_stat_database).
844 : */
845 0 : not_freed_count++;
846 : }
847 : }
848 44 : dshash_seq_term(&hstat);
849 :
850 : /*
851 : * If some of the stats data could not be freed, signal the reference
852 : * holders to run garbage collection of their cached pgStatShmLookupCache.
853 : */
854 44 : if (not_freed_count > 0)
855 0 : pgstat_request_entry_refs_gc();
856 44 : }
857 :
858 : bool
859 93384 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
860 : {
861 93384 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
862 : PgStatShared_HashEntry *shent;
863 93384 : bool freed = true;
864 :
865 : /* delete local reference */
866 93384 : if (pgStatEntryRefHash)
867 : {
868 : PgStat_EntryRefHashEntry *lohashent =
869 75984 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
870 :
871 75984 : if (lohashent)
872 62482 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
873 : true);
874 : }
875 :
876 : /* mark entry in shared hashtable as deleted, drop if possible */
877 93384 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
878 93384 : if (shent)
879 : {
880 62708 : freed = pgstat_drop_entry_internal(shent, NULL);
881 :
882 : /*
883 : * Database stats contain other stats. Drop those as well when
884 : * dropping the database. XXX: Perhaps this should be done in a
885 : * slightly more principled way? But not obvious what that'd look
886 : * like, and so far this is the only case...
887 : */
888 62708 : if (key.kind == PGSTAT_KIND_DATABASE)
889 44 : pgstat_drop_database_and_contents(key.dboid);
890 : }
891 :
892 93384 : return freed;
893 : }
894 :
895 : void
896 412 : pgstat_drop_all_entries(void)
897 : {
898 : dshash_seq_status hstat;
899 : PgStatShared_HashEntry *ps;
900 412 : uint64 not_freed_count = 0;
901 :
902 412 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
903 524 : while ((ps = dshash_seq_next(&hstat)) != NULL)
904 : {
905 112 : if (ps->dropped)
906 0 : continue;
907 :
908 112 : if (!pgstat_drop_entry_internal(ps, &hstat))
909 0 : not_freed_count++;
910 : }
911 412 : dshash_seq_term(&hstat);
912 :
913 412 : if (not_freed_count > 0)
914 0 : pgstat_request_entry_refs_gc();
915 412 : }
916 :
917 : static void
918 16652 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
919 : TimestampTz ts)
920 : {
921 16652 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
922 :
923 16652 : memset(pgstat_get_entry_data(kind, header), 0,
924 : pgstat_get_entry_len(kind));
925 :
926 16652 : if (kind_info->reset_timestamp_cb)
927 342 : kind_info->reset_timestamp_cb(header, ts);
928 16652 : }
929 :
930 : /*
931 : * Reset one variable-numbered stats entry.
932 : */
933 : void
934 314 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
935 : {
936 : PgStat_EntryRef *entry_ref;
937 :
938 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
939 :
940 314 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
941 314 : if (!entry_ref || entry_ref->shared_entry->dropped)
942 2 : return;
943 :
944 312 : (void) pgstat_lock_entry(entry_ref, false);
945 312 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
946 312 : pgstat_unlock_entry(entry_ref);
947 : }
948 :
949 : /*
950 : * Scan through the shared hashtable of stats, resetting statistics if
951 : * approved by the provided do_reset() function.
952 : */
953 : void
954 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
955 : Datum match_data, TimestampTz ts)
956 : {
957 : dshash_seq_status hstat;
958 : PgStatShared_HashEntry *p;
959 :
960 : /* dshash entry is not modified, take shared lock */
961 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
962 24488 : while ((p = dshash_seq_next(&hstat)) != NULL)
963 : {
964 : PgStatShared_Common *header;
965 :
966 24454 : if (p->dropped)
967 2 : continue;
968 :
969 24452 : if (!do_reset(p, match_data))
970 8112 : continue;
971 :
972 16340 : header = dsa_get_address(pgStatLocal.dsa, p->body);
973 :
974 16340 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
975 :
976 16340 : shared_stat_reset_contents(p->key.kind, header, ts);
977 :
978 16340 : LWLockRelease(&header->lock);
979 : }
980 34 : dshash_seq_term(&hstat);
981 34 : }
982 :
983 : static bool
984 2904 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
985 : {
986 2904 : return p->key.kind == DatumGetInt32(match_data);
987 : }
988 :
989 : void
990 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
991 : {
992 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
993 8 : }
994 :
995 : static void
996 2908548 : pgstat_setup_memcxt(void)
997 : {
998 2908548 : if (unlikely(!pgStatSharedRefContext))
999 25698 : pgStatSharedRefContext =
1000 25698 : AllocSetContextCreate(TopMemoryContext,
1001 : "PgStat Shared Ref",
1002 : ALLOCSET_SMALL_SIZES);
1003 2908548 : if (unlikely(!pgStatEntryRefHashContext))
1004 25698 : pgStatEntryRefHashContext =
1005 25698 : AllocSetContextCreate(TopMemoryContext,
1006 : "PgStat Shared Ref Hash",
1007 : ALLOCSET_SMALL_SIZES);
1008 2908548 : }
|