Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 12282 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 12282 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 12282 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 5994 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 5994 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 5994 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 779220 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 773226 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 773226 : if (!kind_info)
140 773178 : continue;
141 48 : if (!kind_info->fixed_amount)
142 24 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 24 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 5994 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 2096 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 2096 : sz = StatsShmemSize();
162 2096 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 2096 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 2096 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 2096 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 2096 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 2096 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 2096 : ctl->raw_dsa_area = p;
183 2096 : p += MAXALIGN(pgstat_dsa_init_size());
184 2096 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
185 : pgstat_dsa_init_size(),
186 : LWTRANCHE_PGSTATS_DSA, NULL);
187 2096 : dsa_pin(dsa);
188 :
189 : /*
190 : * To ensure dshash is created in "plain" shared memory, temporarily
191 : * limit size of dsa to the initial size of the dsa.
192 : */
193 2096 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
194 :
195 : /*
196 : * With the limit in place, create the dshash table. XXX: It'd be nice
197 : * if there were dshash_create_in_place().
198 : */
199 2096 : dsh = dshash_create(dsa, &dsh_params, NULL);
200 2096 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
201 :
202 : /* lift limit set above */
203 2096 : dsa_set_size_limit(dsa, -1);
204 :
205 : /*
206 : * Postmaster will never access these again, thus free the local
207 : * dsa/dshash references.
208 : */
209 2096 : dshash_detach(dsh);
210 2096 : dsa_detach(dsa);
211 :
212 2096 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
213 :
214 : /* initialize fixed-numbered stats */
215 538672 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
216 : {
217 536576 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
218 : char *ptr;
219 :
220 536576 : if (!kind_info || !kind_info->fixed_amount)
221 523992 : continue;
222 :
223 12584 : if (pgstat_is_kind_builtin(kind))
224 12576 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
225 : else
226 : {
227 8 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
228 :
229 : Assert(kind_info->shared_size != 0);
230 8 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
231 8 : ptr = ctl->custom_data[idx];
232 : }
233 :
234 12584 : kind_info->init_shmem_cb(ptr);
235 : }
236 : }
237 : else
238 : {
239 : Assert(found);
240 : }
241 2096 : }
242 :
243 : void
244 41882 : pgstat_attach_shmem(void)
245 : {
246 : MemoryContext oldcontext;
247 :
248 : Assert(pgStatLocal.dsa == NULL);
249 :
250 : /* stats shared memory persists for the backend lifetime */
251 41882 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
252 :
253 41882 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
254 : NULL);
255 41882 : dsa_pin_mapping(pgStatLocal.dsa);
256 :
257 83764 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
258 41882 : pgStatLocal.shmem->hash_handle,
259 : NULL);
260 :
261 41882 : MemoryContextSwitchTo(oldcontext);
262 41882 : }
263 :
264 : void
265 41882 : pgstat_detach_shmem(void)
266 : {
267 : Assert(pgStatLocal.dsa);
268 :
269 : /* we shouldn't leave references to shared stats */
270 41882 : pgstat_release_all_entry_refs(false);
271 :
272 41882 : dshash_detach(pgStatLocal.shared_hash);
273 41882 : pgStatLocal.shared_hash = NULL;
274 :
275 41882 : dsa_detach(pgStatLocal.dsa);
276 :
277 : /*
278 : * dsa_detach() does not decrement the DSA reference count as no segment
279 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
280 : * be registered. Hence, release it manually now.
281 : */
282 41882 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
283 :
284 41882 : pgStatLocal.dsa = NULL;
285 41882 : }
286 :
287 :
288 : /* ------------------------------------------------------------
289 : * Maintenance of shared memory stats entries
290 : * ------------------------------------------------------------
291 : */
292 :
293 : PgStatShared_Common *
294 613592 : pgstat_init_entry(PgStat_Kind kind,
295 : PgStatShared_HashEntry *shhashent)
296 : {
297 : /* Create new stats entry. */
298 : dsa_pointer chunk;
299 : PgStatShared_Common *shheader;
300 :
301 : /*
302 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
303 : * can't be freed before the initialization because it can't be found as
304 : * long as we hold the dshash partition lock. Caller needs to increase
305 : * further if a longer lived reference is needed.
306 : */
307 613592 : pg_atomic_init_u32(&shhashent->refcount, 1);
308 :
309 : /*
310 : * Initialize "generation" to 0, as freshly created.
311 : */
312 613592 : pg_atomic_init_u32(&shhashent->generation, 0);
313 613592 : shhashent->dropped = false;
314 :
315 613592 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
316 613592 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
317 613592 : shheader->magic = 0xdeadbeef;
318 :
319 : /* Link the new entry from the hash entry. */
320 613592 : shhashent->body = chunk;
321 :
322 613592 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
323 :
324 613592 : return shheader;
325 : }
326 :
327 : static PgStatShared_Common *
328 58 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
329 : {
330 : PgStatShared_Common *shheader;
331 :
332 58 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
333 :
334 : /* mark as not dropped anymore */
335 58 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
336 :
337 : /*
338 : * Increment "generation", to let any backend with local references know
339 : * that what they point to is outdated.
340 : */
341 58 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
342 58 : shhashent->dropped = false;
343 :
344 : /* reinitialize content */
345 : Assert(shheader->magic == 0xdeadbeef);
346 58 : memset(pgstat_get_entry_data(kind, shheader), 0,
347 : pgstat_get_entry_len(kind));
348 :
349 58 : return shheader;
350 : }
351 :
352 : static void
353 7689666 : pgstat_setup_shared_refs(void)
354 : {
355 7689666 : if (likely(pgStatEntryRefHash != NULL))
356 7654354 : return;
357 :
358 35312 : pgStatEntryRefHash =
359 35312 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
360 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
361 35312 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
362 : Assert(pgStatSharedRefAge != 0);
363 : }
364 :
365 : /*
366 : * Helper function for pgstat_get_entry_ref().
367 : */
368 : static void
369 1991886 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
370 : PgStatShared_HashEntry *shhashent,
371 : PgStatShared_Common *shheader)
372 : {
373 : Assert(shheader->magic == 0xdeadbeef);
374 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
375 :
376 1991886 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
377 :
378 1991886 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
379 :
380 1991886 : entry_ref->shared_stats = shheader;
381 1991886 : entry_ref->shared_entry = shhashent;
382 1991886 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
383 1991886 : }
384 :
385 : /*
386 : * Helper function for pgstat_get_entry_ref().
387 : */
388 : static bool
389 7689666 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
390 : {
391 : bool found;
392 : PgStat_EntryRefHashEntry *cache_entry;
393 :
394 : /*
395 : * We immediately insert a cache entry, because it avoids 1) multiple
396 : * hashtable lookups in case of a cache miss 2) having to deal with
397 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
398 : */
399 :
400 7689666 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
401 :
402 7689666 : if (!found || !cache_entry->entry_ref)
403 2157200 : {
404 : PgStat_EntryRef *entry_ref;
405 :
406 2157200 : cache_entry->entry_ref = entry_ref =
407 2157200 : MemoryContextAlloc(pgStatSharedRefContext,
408 : sizeof(PgStat_EntryRef));
409 2157200 : entry_ref->shared_stats = NULL;
410 2157200 : entry_ref->shared_entry = NULL;
411 2157200 : entry_ref->pending = NULL;
412 :
413 2157200 : found = false;
414 : }
415 5532466 : else if (cache_entry->entry_ref->shared_stats == NULL)
416 : {
417 : Assert(cache_entry->entry_ref->pending == NULL);
418 0 : found = false;
419 : }
420 : else
421 : {
422 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
423 :
424 5532466 : entry_ref = cache_entry->entry_ref;
425 : Assert(entry_ref->shared_entry != NULL);
426 : Assert(entry_ref->shared_stats != NULL);
427 :
428 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
429 : /* should have at least our reference */
430 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
431 : }
432 :
433 7689666 : *entry_ref_p = cache_entry->entry_ref;
434 7689666 : return found;
435 : }
436 :
437 : /*
438 : * Get a shared stats reference. If create is true, the shared stats object is
439 : * created if it does not exist.
440 : *
441 : * When create is true, and created_entry is non-NULL, it'll be set to true
442 : * if the entry is newly created, false otherwise.
443 : */
444 : PgStat_EntryRef *
445 7689666 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
446 : bool *created_entry)
447 : {
448 : PgStat_HashKey key;
449 : PgStatShared_HashEntry *shhashent;
450 7689666 : PgStatShared_Common *shheader = NULL;
451 : PgStat_EntryRef *entry_ref;
452 :
453 : /* clear padding */
454 7689666 : memset(&key, 0, sizeof(struct PgStat_HashKey));
455 :
456 7689666 : key.kind = kind;
457 7689666 : key.dboid = dboid;
458 7689666 : key.objid = objid;
459 :
460 : /*
461 : * passing in created_entry only makes sense if we possibly could create
462 : * entry.
463 : */
464 : Assert(create || created_entry == NULL);
465 : pgstat_assert_is_up();
466 : Assert(pgStatLocal.shared_hash != NULL);
467 : Assert(!pgStatLocal.shmem->is_shutdown);
468 :
469 7689666 : pgstat_setup_memcxt();
470 7689666 : pgstat_setup_shared_refs();
471 :
472 7689666 : if (created_entry != NULL)
473 214 : *created_entry = false;
474 :
475 : /*
476 : * Check if other backends dropped stats that could not be deleted because
477 : * somebody held references to it. If so, check this backend's references.
478 : * This is not expected to happen often. The location of the check is a
479 : * bit random, but this is a relatively frequently called path, so better
480 : * than most.
481 : */
482 7689666 : if (pgstat_need_entry_refs_gc())
483 11554 : pgstat_gc_entry_refs();
484 :
485 : /*
486 : * First check the lookup cache hashtable in local memory. If we find a
487 : * match here we can avoid taking locks / causing contention.
488 : */
489 7689666 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
490 5532466 : return entry_ref;
491 :
492 : Assert(entry_ref != NULL);
493 :
494 : /*
495 : * Do a lookup in the hash table first - it's quite likely that the entry
496 : * already exists, and that way we only need a shared lock.
497 : */
498 2157200 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
499 :
500 2157200 : if (create && !shhashent)
501 : {
502 : bool shfound;
503 :
504 : /*
505 : * It's possible that somebody created the entry since the above
506 : * lookup. If so, fall through to the same path as if we'd have if it
507 : * already had been created before the dshash_find() calls.
508 : */
509 234234 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
510 234234 : if (!shfound)
511 : {
512 234234 : shheader = pgstat_init_entry(kind, shhashent);
513 234234 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
514 :
515 234234 : if (created_entry != NULL)
516 96 : *created_entry = true;
517 :
518 234234 : return entry_ref;
519 : }
520 : }
521 :
522 1922966 : if (!shhashent)
523 : {
524 : /*
525 : * If we're not creating, delete the reference again. In all
526 : * likelihood it's just a stats lookup - no point wasting memory for a
527 : * shared ref to nothing...
528 : */
529 165240 : pgstat_release_entry_ref(key, entry_ref, false);
530 :
531 165240 : return NULL;
532 : }
533 : else
534 : {
535 : /*
536 : * Can get here either because dshash_find() found a match, or if
537 : * dshash_find_or_insert() found a concurrently inserted entry.
538 : */
539 :
540 1757726 : if (shhashent->dropped && create)
541 : {
542 : /*
543 : * There are legitimate cases where the old stats entry might not
544 : * yet have been dropped by the time it's reused. The most obvious
545 : * case are replication slot stats, where a new slot can be
546 : * created with the same index just after dropping. But oid
547 : * wraparound can lead to other cases as well. We just reset the
548 : * stats to their plain state, while incrementing its "generation"
549 : * in the shared entry for any remaining local references.
550 : */
551 58 : shheader = pgstat_reinit_entry(kind, shhashent);
552 58 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
553 :
554 58 : if (created_entry != NULL)
555 0 : *created_entry = true;
556 :
557 58 : return entry_ref;
558 : }
559 1757668 : else if (shhashent->dropped)
560 : {
561 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
562 74 : pgstat_release_entry_ref(key, entry_ref, false);
563 :
564 74 : return NULL;
565 : }
566 : else
567 : {
568 1757594 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
569 1757594 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
570 :
571 1757594 : return entry_ref;
572 : }
573 : }
574 : }
575 :
576 : static void
577 2157200 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
578 : bool discard_pending)
579 : {
580 2157200 : if (entry_ref && entry_ref->pending)
581 : {
582 64170 : if (discard_pending)
583 64170 : pgstat_delete_pending_entry(entry_ref);
584 : else
585 0 : elog(ERROR, "releasing ref with pending data");
586 : }
587 :
588 2157200 : if (entry_ref && entry_ref->shared_stats)
589 : {
590 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
591 : Assert(entry_ref->pending == NULL);
592 :
593 : /*
594 : * This can't race with another backend looking up the stats entry and
595 : * increasing the refcount because it is not "legal" to create
596 : * additional references to dropped entries.
597 : */
598 1991886 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
599 : {
600 : PgStatShared_HashEntry *shent;
601 :
602 : /*
603 : * We're the last referrer to this entry, try to drop the shared
604 : * entry.
605 : */
606 :
607 : /* only dropped entries can reach a 0 refcount */
608 : Assert(entry_ref->shared_entry->dropped);
609 :
610 9368 : shent = dshash_find(pgStatLocal.shared_hash,
611 9368 : &entry_ref->shared_entry->key,
612 : true);
613 9368 : if (!shent)
614 0 : elog(ERROR, "could not find just referenced shared stats entry");
615 :
616 : /*
617 : * This entry may have been reinitialized while trying to release
618 : * it, so double-check that it has not been reused while holding a
619 : * lock on its shared entry.
620 : */
621 9368 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
622 9368 : entry_ref->generation)
623 : {
624 : /* Same "generation", so we're OK with the removal */
625 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
626 : Assert(entry_ref->shared_entry == shent);
627 9368 : pgstat_free_entry(shent, NULL);
628 : }
629 : else
630 : {
631 : /*
632 : * Shared stats entry has been reinitialized, so do not drop
633 : * its shared entry, only release its lock.
634 : */
635 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
636 : }
637 : }
638 : }
639 :
640 2157200 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
641 0 : elog(ERROR, "entry ref vanished before deletion");
642 :
643 2157200 : if (entry_ref)
644 2157200 : pfree(entry_ref);
645 2157200 : }
646 :
647 : /*
648 : * Acquire exclusive lock on the entry.
649 : *
650 : * If nowait is true, it's just a conditional acquire, and the result
651 : * *must* be checked to verify success.
652 : * If nowait is false, waits as necessary, always returning true.
653 : */
654 : bool
655 2524560 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
656 : {
657 2524560 : LWLock *lock = &entry_ref->shared_stats->lock;
658 :
659 2524560 : if (nowait)
660 645096 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
661 :
662 1879464 : LWLockAcquire(lock, LW_EXCLUSIVE);
663 1879464 : return true;
664 : }
665 :
666 : /*
667 : * Acquire shared lock on the entry.
668 : *
669 : * Separate from pgstat_lock_entry() as most callers will need to lock
670 : * exclusively. The wait semantics are identical.
671 : */
672 : bool
673 584362 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
674 : {
675 584362 : LWLock *lock = &entry_ref->shared_stats->lock;
676 :
677 584362 : if (nowait)
678 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
679 :
680 584362 : LWLockAcquire(lock, LW_SHARED);
681 584362 : return true;
682 : }
683 :
684 : void
685 3108888 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
686 : {
687 3108888 : LWLockRelease(&entry_ref->shared_stats->lock);
688 3108888 : }
689 :
690 : /*
691 : * Helper function to fetch and lock shared stats.
692 : */
693 : PgStat_EntryRef *
694 580870 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
695 : bool nowait)
696 : {
697 : PgStat_EntryRef *entry_ref;
698 :
699 : /* find shared table stats entry corresponding to the local entry */
700 580870 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
701 :
702 : /* lock the shared entry to protect the content, skip if failed */
703 580870 : if (!pgstat_lock_entry(entry_ref, nowait))
704 0 : return NULL;
705 :
706 580870 : return entry_ref;
707 : }
708 :
709 : void
710 3708 : pgstat_request_entry_refs_gc(void)
711 : {
712 3708 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
713 3708 : }
714 :
715 : static bool
716 7689666 : pgstat_need_entry_refs_gc(void)
717 : {
718 : uint64 curage;
719 :
720 7689666 : if (!pgStatEntryRefHash)
721 0 : return false;
722 :
723 : /* should have been initialized when creating pgStatEntryRefHash */
724 : Assert(pgStatSharedRefAge != 0);
725 :
726 7689666 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
727 :
728 7689666 : return pgStatSharedRefAge != curage;
729 : }
730 :
731 : static void
732 11554 : pgstat_gc_entry_refs(void)
733 : {
734 : pgstat_entry_ref_hash_iterator i;
735 : PgStat_EntryRefHashEntry *ent;
736 : uint64 curage;
737 :
738 11554 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
739 : Assert(curage != 0);
740 :
741 : /*
742 : * Some entries have been dropped or reinitialized. Invalidate cache
743 : * pointer to them.
744 : */
745 11554 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
746 904084 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
747 : {
748 892530 : PgStat_EntryRef *entry_ref = ent->entry_ref;
749 :
750 : Assert(!entry_ref->shared_stats ||
751 : entry_ref->shared_stats->magic == 0xdeadbeef);
752 :
753 : /*
754 : * "generation" checks for the case of entries being reinitialized,
755 : * and "dropped" for the case where these are.. dropped.
756 : */
757 892530 : if (!entry_ref->shared_entry->dropped &&
758 618768 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
759 618768 : entry_ref->generation)
760 618710 : continue;
761 :
762 : /* cannot gc shared ref that has pending data */
763 273820 : if (entry_ref->pending != NULL)
764 265302 : continue;
765 :
766 8518 : pgstat_release_entry_ref(ent->key, entry_ref, false);
767 : }
768 :
769 11554 : pgStatSharedRefAge = curage;
770 11554 : }
771 :
772 : static void
773 35382 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
774 : Datum match_data)
775 : {
776 : pgstat_entry_ref_hash_iterator i;
777 : PgStat_EntryRefHashEntry *ent;
778 :
779 35382 : if (pgStatEntryRefHash == NULL)
780 2 : return;
781 :
782 35380 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
783 :
784 1921360 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
785 1921360 : != NULL)
786 : {
787 : Assert(ent->entry_ref != NULL);
788 :
789 1885980 : if (match && !match(ent, match_data))
790 2048 : continue;
791 :
792 1883932 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
793 : }
794 : }
795 :
796 : /*
797 : * Release all local references to shared stats entries.
798 : *
799 : * When a process exits it cannot do so while still holding references onto
800 : * stats entries, otherwise the shared stats entries could never be freed.
801 : */
802 : static void
803 41882 : pgstat_release_all_entry_refs(bool discard_pending)
804 : {
805 41882 : if (pgStatEntryRefHash == NULL)
806 6570 : return;
807 :
808 35312 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
809 : Assert(pgStatEntryRefHash->members == 0);
810 35312 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
811 35312 : pgStatEntryRefHash = NULL;
812 : }
813 :
814 : static bool
815 2048 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
816 : {
817 2048 : Oid dboid = DatumGetObjectId(match_data);
818 :
819 2048 : return ent->key.dboid == dboid;
820 : }
821 :
822 : static void
823 70 : pgstat_release_db_entry_refs(Oid dboid)
824 : {
825 70 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
826 : match_db,
827 : ObjectIdGetDatum(dboid));
828 70 : }
829 :
830 :
831 : /* ------------------------------------------------------------
832 : * Dropping and resetting of stats entries
833 : * ------------------------------------------------------------
834 : */
835 :
836 : static void
837 108418 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
838 : {
839 : dsa_pointer pdsa;
840 :
841 : /*
842 : * Fetch dsa pointer before deleting entry - that way we can free the
843 : * memory after releasing the lock.
844 : */
845 108418 : pdsa = shent->body;
846 :
847 108418 : if (!hstat)
848 99652 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
849 : else
850 8766 : dshash_delete_current(hstat);
851 :
852 108418 : dsa_free(pgStatLocal.dsa, pdsa);
853 108418 : }
854 :
855 : /*
856 : * Helper for both pgstat_drop_database_and_contents() and
857 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
858 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
859 : * case the entry needs to be already locked.
860 : */
861 : static bool
862 108476 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
863 : dshash_seq_status *hstat)
864 : {
865 : Assert(shent->body != InvalidDsaPointer);
866 :
867 : /* should already have released local reference */
868 108476 : if (pgStatEntryRefHash)
869 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
870 :
871 : /*
872 : * Signal that the entry is dropped - this will eventually cause other
873 : * backends to release their references.
874 : */
875 108476 : if (shent->dropped)
876 0 : elog(ERROR,
877 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u",
878 : pgstat_get_kind_info(shent->key.kind)->name,
879 : shent->key.dboid,
880 : shent->key.objid,
881 : pg_atomic_read_u32(&shent->refcount));
882 108476 : shent->dropped = true;
883 :
884 : /* release refcount marking entry as not dropped */
885 108476 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
886 : {
887 99050 : pgstat_free_entry(shent, hstat);
888 99050 : return true;
889 : }
890 : else
891 : {
892 9426 : if (!hstat)
893 9426 : dshash_release_lock(pgStatLocal.shared_hash, shent);
894 9426 : return false;
895 : }
896 : }
897 :
898 : /*
899 : * Drop stats for the database and all the objects inside that database.
900 : */
901 : static void
902 70 : pgstat_drop_database_and_contents(Oid dboid)
903 : {
904 : dshash_seq_status hstat;
905 : PgStatShared_HashEntry *p;
906 70 : uint64 not_freed_count = 0;
907 :
908 : Assert(OidIsValid(dboid));
909 :
910 : Assert(pgStatLocal.shared_hash != NULL);
911 :
912 : /*
913 : * This backend might very well be the only backend holding a reference to
914 : * about-to-be-dropped entries. Ensure that we're not preventing it from
915 : * being cleaned up till later.
916 : *
917 : * Doing this separately from the dshash iteration below avoids having to
918 : * do so while holding a partition lock on the shared hashtable.
919 : */
920 70 : pgstat_release_db_entry_refs(dboid);
921 :
922 : /* some of the dshash entries are to be removed, take exclusive lock. */
923 70 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
924 25996 : while ((p = dshash_seq_next(&hstat)) != NULL)
925 : {
926 25926 : if (p->dropped)
927 2 : continue;
928 :
929 25924 : if (p->key.dboid != dboid)
930 17272 : continue;
931 :
932 8652 : if (!pgstat_drop_entry_internal(p, &hstat))
933 : {
934 : /*
935 : * Even statistics for a dropped database might currently be
936 : * accessed (consider e.g. database stats for pg_stat_database).
937 : */
938 0 : not_freed_count++;
939 : }
940 : }
941 70 : dshash_seq_term(&hstat);
942 :
943 : /*
944 : * If some of the stats data could not be freed, signal the reference
945 : * holders to run garbage collection of their cached pgStatLocal.shmem.
946 : */
947 70 : if (not_freed_count > 0)
948 0 : pgstat_request_entry_refs_gc();
949 70 : }
950 :
951 : /*
952 : * Drop a single stats entry.
953 : *
954 : * This routine returns false if the stats entry of the dropped object could
955 : * not be freed, true otherwise.
956 : *
957 : * The callers of this function should call pgstat_request_entry_refs_gc()
958 : * if the stats entry could not be freed, to ensure that this entry's memory
959 : * can be reclaimed later by a different backend calling
960 : * pgstat_gc_entry_refs().
961 : */
962 : bool
963 143616 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
964 : {
965 : PgStat_HashKey key;
966 : PgStatShared_HashEntry *shent;
967 143616 : bool freed = true;
968 :
969 : /* clear padding */
970 143616 : memset(&key, 0, sizeof(struct PgStat_HashKey));
971 :
972 143616 : key.kind = kind;
973 143616 : key.dboid = dboid;
974 143616 : key.objid = objid;
975 :
976 : /* delete local reference */
977 143616 : if (pgStatEntryRefHash)
978 : {
979 : PgStat_EntryRefHashEntry *lohashent =
980 137040 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
981 :
982 137040 : if (lohashent)
983 99436 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
984 : true);
985 : }
986 :
987 : /* mark entry in shared hashtable as deleted, drop if possible */
988 143616 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
989 143616 : if (shent)
990 : {
991 99710 : freed = pgstat_drop_entry_internal(shent, NULL);
992 :
993 : /*
994 : * Database stats contain other stats. Drop those as well when
995 : * dropping the database. XXX: Perhaps this should be done in a
996 : * slightly more principled way? But not obvious what that'd look
997 : * like, and so far this is the only case...
998 : */
999 99710 : if (key.kind == PGSTAT_KIND_DATABASE)
1000 70 : pgstat_drop_database_and_contents(key.dboid);
1001 : }
1002 :
1003 143616 : return freed;
1004 : }
1005 :
1006 : /*
1007 : * Scan through the shared hashtable of stats, dropping statistics if
1008 : * approved by the optional do_drop() function.
1009 : */
1010 : void
1011 452 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1012 : Datum match_data)
1013 : {
1014 : dshash_seq_status hstat;
1015 : PgStatShared_HashEntry *ps;
1016 452 : uint64 not_freed_count = 0;
1017 :
1018 : /* entries are removed, take an exclusive lock */
1019 452 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1020 630 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1021 : {
1022 178 : if (ps->dropped)
1023 0 : continue;
1024 :
1025 178 : if (do_drop != NULL && !do_drop(ps, match_data))
1026 64 : continue;
1027 :
1028 : /* delete local reference */
1029 114 : if (pgStatEntryRefHash)
1030 : {
1031 : PgStat_EntryRefHashEntry *lohashent =
1032 2 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1033 :
1034 2 : if (lohashent)
1035 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1036 : true);
1037 : }
1038 :
1039 114 : if (!pgstat_drop_entry_internal(ps, &hstat))
1040 0 : not_freed_count++;
1041 : }
1042 452 : dshash_seq_term(&hstat);
1043 :
1044 452 : if (not_freed_count > 0)
1045 0 : pgstat_request_entry_refs_gc();
1046 452 : }
1047 :
1048 : /*
1049 : * Scan through the shared hashtable of stats and drop all entries.
1050 : */
1051 : void
1052 450 : pgstat_drop_all_entries(void)
1053 : {
1054 450 : pgstat_drop_matching_entries(NULL, 0);
1055 450 : }
1056 :
1057 : static void
1058 17612 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1059 : TimestampTz ts)
1060 : {
1061 17612 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1062 :
1063 17612 : memset(pgstat_get_entry_data(kind, header), 0,
1064 : pgstat_get_entry_len(kind));
1065 :
1066 17612 : if (kind_info->reset_timestamp_cb)
1067 380 : kind_info->reset_timestamp_cb(header, ts);
1068 17612 : }
1069 :
1070 : /*
1071 : * Reset one variable-numbered stats entry.
1072 : */
1073 : void
1074 352 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1075 : {
1076 : PgStat_EntryRef *entry_ref;
1077 :
1078 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1079 :
1080 352 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1081 352 : if (!entry_ref || entry_ref->shared_entry->dropped)
1082 2 : return;
1083 :
1084 350 : (void) pgstat_lock_entry(entry_ref, false);
1085 350 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1086 350 : pgstat_unlock_entry(entry_ref);
1087 : }
1088 :
1089 : /*
1090 : * Scan through the shared hashtable of stats, resetting statistics if
1091 : * approved by the provided do_reset() function.
1092 : */
1093 : void
1094 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1095 : Datum match_data, TimestampTz ts)
1096 : {
1097 : dshash_seq_status hstat;
1098 : PgStatShared_HashEntry *p;
1099 :
1100 : /* dshash entry is not modified, take shared lock */
1101 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1102 26078 : while ((p = dshash_seq_next(&hstat)) != NULL)
1103 : {
1104 : PgStatShared_Common *header;
1105 :
1106 26044 : if (p->dropped)
1107 2 : continue;
1108 :
1109 26042 : if (!do_reset(p, match_data))
1110 8780 : continue;
1111 :
1112 17262 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1113 :
1114 17262 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1115 :
1116 17262 : shared_stat_reset_contents(p->key.kind, header, ts);
1117 :
1118 17262 : LWLockRelease(&header->lock);
1119 : }
1120 34 : dshash_seq_term(&hstat);
1121 34 : }
1122 :
1123 : static bool
1124 2964 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1125 : {
1126 2964 : return p->key.kind == DatumGetInt32(match_data);
1127 : }
1128 :
1129 : void
1130 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1131 : {
1132 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1133 8 : }
1134 :
1135 : static void
1136 7689666 : pgstat_setup_memcxt(void)
1137 : {
1138 7689666 : if (unlikely(!pgStatSharedRefContext))
1139 35312 : pgStatSharedRefContext =
1140 35312 : AllocSetContextCreate(TopMemoryContext,
1141 : "PgStat Shared Ref",
1142 : ALLOCSET_SMALL_SIZES);
1143 7689666 : if (unlikely(!pgStatEntryRefHashContext))
1144 35312 : pgStatEntryRefHashContext =
1145 35312 : AllocSetContextCreate(TopMemoryContext,
1146 : "PgStat Shared Ref Hash",
1147 : ALLOCSET_SMALL_SIZES);
1148 7689666 : }
|