Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 10590 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 10590 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 10590 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 6234 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 6234 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 6234 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 62340 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 56106 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 56106 : if (!kind_info)
140 56046 : continue;
141 60 : if (!kind_info->fixed_amount)
142 30 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 30 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 6234 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 2178 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 2178 : sz = StatsShmemSize();
162 2178 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 2178 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 2178 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 2178 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 2178 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 2178 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 2178 : ctl->raw_dsa_area = p;
183 2178 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
184 : pgstat_dsa_init_size(),
185 : LWTRANCHE_PGSTATS_DSA, NULL);
186 2178 : dsa_pin(dsa);
187 :
188 : /*
189 : * To ensure dshash is created in "plain" shared memory, temporarily
190 : * limit size of dsa to the initial size of the dsa.
191 : */
192 2178 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
193 :
194 : /*
195 : * With the limit in place, create the dshash table. XXX: It'd be nice
196 : * if there were dshash_create_in_place().
197 : */
198 2178 : dsh = dshash_create(dsa, &dsh_params, NULL);
199 2178 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
200 :
201 : /* lift limit set above */
202 2178 : dsa_set_size_limit(dsa, -1);
203 :
204 : /*
205 : * Postmaster will never access these again, thus free the local
206 : * dsa/dshash references.
207 : */
208 2178 : dshash_detach(dsh);
209 2178 : dsa_detach(dsa);
210 :
211 2178 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
212 :
213 : /* Do the per-kind initialization */
214 71874 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
215 : {
216 69696 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
217 : char *ptr;
218 :
219 69696 : if (!kind_info)
220 43540 : continue;
221 :
222 : /* initialize entry count tracking */
223 26156 : if (kind_info->track_entry_count)
224 10 : pg_atomic_init_u64(&ctl->entry_counts[kind - 1], 0);
225 :
226 : /* initialize fixed-numbered stats */
227 26156 : if (kind_info->fixed_amount)
228 : {
229 13078 : if (pgstat_is_kind_builtin(kind))
230 13068 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
231 : else
232 : {
233 10 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
234 :
235 : Assert(kind_info->shared_size != 0);
236 10 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
237 10 : ptr = ctl->custom_data[idx];
238 : }
239 :
240 13078 : kind_info->init_shmem_cb(ptr);
241 : }
242 : }
243 : }
244 : else
245 : {
246 : Assert(found);
247 : }
248 2178 : }
249 :
250 : void
251 44534 : pgstat_attach_shmem(void)
252 : {
253 : MemoryContext oldcontext;
254 :
255 : Assert(pgStatLocal.dsa == NULL);
256 :
257 : /* stats shared memory persists for the backend lifetime */
258 44534 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
259 :
260 44534 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
261 : NULL);
262 44534 : dsa_pin_mapping(pgStatLocal.dsa);
263 :
264 89068 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
265 44534 : pgStatLocal.shmem->hash_handle,
266 : NULL);
267 :
268 44534 : MemoryContextSwitchTo(oldcontext);
269 44534 : }
270 :
271 : void
272 44534 : pgstat_detach_shmem(void)
273 : {
274 : Assert(pgStatLocal.dsa);
275 :
276 : /* we shouldn't leave references to shared stats */
277 44534 : pgstat_release_all_entry_refs(false);
278 :
279 44534 : dshash_detach(pgStatLocal.shared_hash);
280 44534 : pgStatLocal.shared_hash = NULL;
281 :
282 44534 : dsa_detach(pgStatLocal.dsa);
283 :
284 : /*
285 : * dsa_detach() does not decrement the DSA reference count as no segment
286 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
287 : * be registered. Hence, release it manually now.
288 : */
289 44534 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
290 :
291 44534 : pgStatLocal.dsa = NULL;
292 44534 : }
293 :
294 :
295 : /* ------------------------------------------------------------
296 : * Maintenance of shared memory stats entries
297 : * ------------------------------------------------------------
298 : */
299 :
300 : /*
301 : * Initialize entry newly-created.
302 : *
303 : * Returns NULL in the event of an allocation failure, so as callers can
304 : * take cleanup actions as the entry initialized is already inserted in the
305 : * shared hashtable.
306 : */
307 : PgStatShared_Common *
308 640266 : pgstat_init_entry(PgStat_Kind kind,
309 : PgStatShared_HashEntry *shhashent)
310 : {
311 : /* Create new stats entry. */
312 : dsa_pointer chunk;
313 : PgStatShared_Common *shheader;
314 640266 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
315 :
316 : /*
317 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
318 : * can't be freed before the initialization because it can't be found as
319 : * long as we hold the dshash partition lock. Caller needs to increase
320 : * further if a longer lived reference is needed.
321 : */
322 640266 : pg_atomic_init_u32(&shhashent->refcount, 1);
323 :
324 : /*
325 : * Initialize "generation" to 0, as freshly created.
326 : */
327 640266 : pg_atomic_init_u32(&shhashent->generation, 0);
328 640266 : shhashent->dropped = false;
329 :
330 640266 : chunk = dsa_allocate_extended(pgStatLocal.dsa,
331 640266 : kind_info->shared_size,
332 : DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
333 640266 : if (chunk == InvalidDsaPointer)
334 0 : return NULL;
335 :
336 640266 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
337 640266 : shheader->magic = 0xdeadbeef;
338 :
339 : /* Link the new entry from the hash entry. */
340 640266 : shhashent->body = chunk;
341 :
342 : /* Increment entry count, if required. */
343 640266 : if (kind_info->track_entry_count)
344 6 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
345 :
346 640266 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
347 :
348 640266 : return shheader;
349 : }
350 :
351 : static PgStatShared_Common *
352 60 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
353 : {
354 : PgStatShared_Common *shheader;
355 :
356 60 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
357 :
358 : /* mark as not dropped anymore */
359 60 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
360 :
361 : /*
362 : * Increment "generation", to let any backend with local references know
363 : * that what they point to is outdated.
364 : */
365 60 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
366 60 : shhashent->dropped = false;
367 :
368 : /* reinitialize content */
369 : Assert(shheader->magic == 0xdeadbeef);
370 60 : memset(pgstat_get_entry_data(kind, shheader), 0,
371 : pgstat_get_entry_len(kind));
372 :
373 60 : return shheader;
374 : }
375 :
376 : static void
377 8134714 : pgstat_setup_shared_refs(void)
378 : {
379 8134714 : if (likely(pgStatEntryRefHash != NULL))
380 8097106 : return;
381 :
382 37608 : pgStatEntryRefHash =
383 37608 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
384 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
385 37608 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
386 : Assert(pgStatSharedRefAge != 0);
387 : }
388 :
389 : /*
390 : * Helper function for pgstat_get_entry_ref().
391 : */
392 : static void
393 2154956 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
394 : PgStatShared_HashEntry *shhashent,
395 : PgStatShared_Common *shheader)
396 : {
397 : Assert(shheader->magic == 0xdeadbeef);
398 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
399 :
400 2154956 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
401 :
402 2154956 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
403 :
404 2154956 : entry_ref->shared_stats = shheader;
405 2154956 : entry_ref->shared_entry = shhashent;
406 2154956 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
407 2154956 : }
408 :
409 : /*
410 : * Helper function for pgstat_get_entry_ref().
411 : */
412 : static bool
413 8134714 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
414 : {
415 : bool found;
416 : PgStat_EntryRefHashEntry *cache_entry;
417 :
418 : /*
419 : * We immediately insert a cache entry, because it avoids 1) multiple
420 : * hashtable lookups in case of a cache miss 2) having to deal with
421 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
422 : */
423 :
424 8134714 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
425 :
426 8134714 : if (!found || !cache_entry->entry_ref)
427 2323614 : {
428 : PgStat_EntryRef *entry_ref;
429 :
430 2323614 : cache_entry->entry_ref = entry_ref =
431 2323614 : MemoryContextAlloc(pgStatSharedRefContext,
432 : sizeof(PgStat_EntryRef));
433 2323614 : entry_ref->shared_stats = NULL;
434 2323614 : entry_ref->shared_entry = NULL;
435 2323614 : entry_ref->pending = NULL;
436 :
437 2323614 : found = false;
438 : }
439 5811100 : else if (cache_entry->entry_ref->shared_stats == NULL)
440 : {
441 : Assert(cache_entry->entry_ref->pending == NULL);
442 0 : found = false;
443 : }
444 : else
445 : {
446 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
447 :
448 5811100 : entry_ref = cache_entry->entry_ref;
449 : Assert(entry_ref->shared_entry != NULL);
450 : Assert(entry_ref->shared_stats != NULL);
451 :
452 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
453 : /* should have at least our reference */
454 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
455 : }
456 :
457 8134714 : *entry_ref_p = cache_entry->entry_ref;
458 8134714 : return found;
459 : }
460 :
461 : /*
462 : * Get a shared stats reference. If create is true, the shared stats object is
463 : * created if it does not exist.
464 : *
465 : * When create is true, and created_entry is non-NULL, it'll be set to true
466 : * if the entry is newly created, false otherwise.
467 : */
468 : PgStat_EntryRef *
469 8134714 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
470 : bool *created_entry)
471 : {
472 8134714 : PgStat_HashKey key = {0};
473 : PgStatShared_HashEntry *shhashent;
474 8134714 : PgStatShared_Common *shheader = NULL;
475 : PgStat_EntryRef *entry_ref;
476 :
477 8134714 : key.kind = kind;
478 8134714 : key.dboid = dboid;
479 8134714 : key.objid = objid;
480 :
481 : /*
482 : * passing in created_entry only makes sense if we possibly could create
483 : * entry.
484 : */
485 : Assert(create || created_entry == NULL);
486 : pgstat_assert_is_up();
487 : Assert(pgStatLocal.shared_hash != NULL);
488 : Assert(!pgStatLocal.shmem->is_shutdown);
489 :
490 8134714 : pgstat_setup_memcxt();
491 8134714 : pgstat_setup_shared_refs();
492 :
493 8134714 : if (created_entry != NULL)
494 214 : *created_entry = false;
495 :
496 : /*
497 : * Check if other backends dropped stats that could not be deleted because
498 : * somebody held references to it. If so, check this backend's references.
499 : * This is not expected to happen often. The location of the check is a
500 : * bit random, but this is a relatively frequently called path, so better
501 : * than most.
502 : */
503 8134714 : if (pgstat_need_entry_refs_gc())
504 11906 : pgstat_gc_entry_refs();
505 :
506 : /*
507 : * First check the lookup cache hashtable in local memory. If we find a
508 : * match here we can avoid taking locks / causing contention.
509 : */
510 8134714 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
511 5811100 : return entry_ref;
512 :
513 : Assert(entry_ref != NULL);
514 :
515 : /*
516 : * Do a lookup in the hash table first - it's quite likely that the entry
517 : * already exists, and that way we only need a shared lock.
518 : */
519 2323614 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
520 :
521 2323614 : if (create && !shhashent)
522 : {
523 : bool shfound;
524 :
525 : /*
526 : * It's possible that somebody created the entry since the above
527 : * lookup. If so, fall through to the same path as if we'd have if it
528 : * already had been created before the dshash_find() calls.
529 : */
530 237268 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
531 237268 : if (!shfound)
532 : {
533 237268 : shheader = pgstat_init_entry(kind, shhashent);
534 237268 : if (shheader == NULL)
535 : {
536 : /*
537 : * Failed the allocation of a new entry, so clean up the
538 : * shared hashtable before giving up.
539 : */
540 0 : dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
541 :
542 0 : ereport(ERROR,
543 : (errcode(ERRCODE_OUT_OF_MEMORY),
544 : errmsg("out of memory"),
545 : errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
546 : key.kind, key.dboid, key.objid)));
547 : }
548 237268 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
549 :
550 237268 : if (created_entry != NULL)
551 96 : *created_entry = true;
552 :
553 237268 : return entry_ref;
554 : }
555 : }
556 :
557 2086346 : if (!shhashent)
558 : {
559 : /*
560 : * If we're not creating, delete the reference again. In all
561 : * likelihood it's just a stats lookup - no point wasting memory for a
562 : * shared ref to nothing...
563 : */
564 168584 : pgstat_release_entry_ref(key, entry_ref, false);
565 :
566 168584 : return NULL;
567 : }
568 : else
569 : {
570 : /*
571 : * Can get here either because dshash_find() found a match, or if
572 : * dshash_find_or_insert() found a concurrently inserted entry.
573 : */
574 :
575 1917762 : if (shhashent->dropped && create)
576 : {
577 : /*
578 : * There are legitimate cases where the old stats entry might not
579 : * yet have been dropped by the time it's reused. The most obvious
580 : * case are replication slot stats, where a new slot can be
581 : * created with the same index just after dropping. But oid
582 : * wraparound can lead to other cases as well. We just reset the
583 : * stats to their plain state, while incrementing its "generation"
584 : * in the shared entry for any remaining local references.
585 : */
586 60 : shheader = pgstat_reinit_entry(kind, shhashent);
587 60 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
588 :
589 60 : if (created_entry != NULL)
590 0 : *created_entry = true;
591 :
592 60 : return entry_ref;
593 : }
594 1917702 : else if (shhashent->dropped)
595 : {
596 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
597 74 : pgstat_release_entry_ref(key, entry_ref, false);
598 :
599 74 : return NULL;
600 : }
601 : else
602 : {
603 1917628 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
604 1917628 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
605 :
606 1917628 : return entry_ref;
607 : }
608 : }
609 : }
610 :
611 : static void
612 2323614 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
613 : bool discard_pending)
614 : {
615 2323614 : if (entry_ref && entry_ref->pending)
616 : {
617 65392 : if (discard_pending)
618 65392 : pgstat_delete_pending_entry(entry_ref);
619 : else
620 0 : elog(ERROR, "releasing ref with pending data");
621 : }
622 :
623 2323614 : if (entry_ref && entry_ref->shared_stats)
624 : {
625 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
626 : Assert(entry_ref->pending == NULL);
627 :
628 : /*
629 : * This can't race with another backend looking up the stats entry and
630 : * increasing the refcount because it is not "legal" to create
631 : * additional references to dropped entries.
632 : */
633 2154956 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
634 : {
635 : PgStatShared_HashEntry *shent;
636 :
637 : /*
638 : * We're the last referrer to this entry, try to drop the shared
639 : * entry.
640 : */
641 :
642 : /* only dropped entries can reach a 0 refcount */
643 : Assert(entry_ref->shared_entry->dropped);
644 :
645 9584 : shent = dshash_find(pgStatLocal.shared_hash,
646 9584 : &entry_ref->shared_entry->key,
647 : true);
648 9584 : if (!shent)
649 0 : elog(ERROR, "could not find just referenced shared stats entry");
650 :
651 : /*
652 : * This entry may have been reinitialized while trying to release
653 : * it, so double-check that it has not been reused while holding a
654 : * lock on its shared entry.
655 : */
656 9584 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
657 9584 : entry_ref->generation)
658 : {
659 : /* Same "generation", so we're OK with the removal */
660 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
661 : Assert(entry_ref->shared_entry == shent);
662 9584 : pgstat_free_entry(shent, NULL);
663 : }
664 : else
665 : {
666 : /*
667 : * Shared stats entry has been reinitialized, so do not drop
668 : * its shared entry, only release its lock.
669 : */
670 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
671 : }
672 : }
673 : }
674 :
675 2323614 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
676 0 : elog(ERROR, "entry ref vanished before deletion");
677 :
678 2323614 : if (entry_ref)
679 2323614 : pfree(entry_ref);
680 2323614 : }
681 :
682 : /*
683 : * Acquire exclusive lock on the entry.
684 : *
685 : * If nowait is true, it's just a conditional acquire, and the result
686 : * *must* be checked to verify success.
687 : * If nowait is false, waits as necessary, always returning true.
688 : */
689 : bool
690 2679552 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
691 : {
692 2679552 : LWLock *lock = &entry_ref->shared_stats->lock;
693 :
694 2679552 : if (nowait)
695 674684 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
696 :
697 2004868 : LWLockAcquire(lock, LW_EXCLUSIVE);
698 2004868 : return true;
699 : }
700 :
701 : /*
702 : * Acquire shared lock on the entry.
703 : *
704 : * Separate from pgstat_lock_entry() as most callers will need to lock
705 : * exclusively. The wait semantics are identical.
706 : */
707 : bool
708 712080 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
709 : {
710 712080 : LWLock *lock = &entry_ref->shared_stats->lock;
711 :
712 712080 : if (nowait)
713 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
714 :
715 712080 : LWLockAcquire(lock, LW_SHARED);
716 712080 : return true;
717 : }
718 :
719 : void
720 3391620 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
721 : {
722 3391620 : LWLockRelease(&entry_ref->shared_stats->lock);
723 3391620 : }
724 :
725 : /*
726 : * Helper function to fetch and lock shared stats.
727 : */
728 : PgStat_EntryRef *
729 642910 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
730 : bool nowait)
731 : {
732 : PgStat_EntryRef *entry_ref;
733 :
734 : /* find shared table stats entry corresponding to the local entry */
735 642910 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
736 :
737 : /* lock the shared entry to protect the content, skip if failed */
738 642910 : if (!pgstat_lock_entry(entry_ref, nowait))
739 0 : return NULL;
740 :
741 642910 : return entry_ref;
742 : }
743 :
744 : void
745 3808 : pgstat_request_entry_refs_gc(void)
746 : {
747 3808 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
748 3808 : }
749 :
750 : static bool
751 8134714 : pgstat_need_entry_refs_gc(void)
752 : {
753 : uint64 curage;
754 :
755 8134714 : if (!pgStatEntryRefHash)
756 0 : return false;
757 :
758 : /* should have been initialized when creating pgStatEntryRefHash */
759 : Assert(pgStatSharedRefAge != 0);
760 :
761 8134714 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
762 :
763 8134714 : return pgStatSharedRefAge != curage;
764 : }
765 :
766 : static void
767 11906 : pgstat_gc_entry_refs(void)
768 : {
769 : pgstat_entry_ref_hash_iterator i;
770 : PgStat_EntryRefHashEntry *ent;
771 : uint64 curage;
772 :
773 11906 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
774 : Assert(curage != 0);
775 :
776 : /*
777 : * Some entries have been dropped or reinitialized. Invalidate cache
778 : * pointer to them.
779 : */
780 11906 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
781 928886 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
782 : {
783 916980 : PgStat_EntryRef *entry_ref = ent->entry_ref;
784 :
785 : Assert(!entry_ref->shared_stats ||
786 : entry_ref->shared_stats->magic == 0xdeadbeef);
787 :
788 : /*
789 : * "generation" checks for the case of entries being reinitialized,
790 : * and "dropped" for the case where these are.. dropped.
791 : */
792 916980 : if (!entry_ref->shared_entry->dropped &&
793 637042 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
794 637042 : entry_ref->generation)
795 636982 : continue;
796 :
797 : /* cannot gc shared ref that has pending data */
798 279998 : if (entry_ref->pending != NULL)
799 271646 : continue;
800 :
801 8352 : pgstat_release_entry_ref(ent->key, entry_ref, false);
802 : }
803 :
804 11906 : pgStatSharedRefAge = curage;
805 11906 : }
806 :
807 : static void
808 37680 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
809 : Datum match_data)
810 : {
811 : pgstat_entry_ref_hash_iterator i;
812 : PgStat_EntryRefHashEntry *ent;
813 :
814 37680 : if (pgStatEntryRefHash == NULL)
815 2 : return;
816 :
817 37678 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
818 :
819 2083438 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
820 2083438 : != NULL)
821 : {
822 : Assert(ent->entry_ref != NULL);
823 :
824 2045760 : if (match && !match(ent, match_data))
825 2102 : continue;
826 :
827 2043658 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
828 : }
829 : }
830 :
831 : /*
832 : * Release all local references to shared stats entries.
833 : *
834 : * When a process exits it cannot do so while still holding references onto
835 : * stats entries, otherwise the shared stats entries could never be freed.
836 : */
837 : static void
838 44534 : pgstat_release_all_entry_refs(bool discard_pending)
839 : {
840 44534 : if (pgStatEntryRefHash == NULL)
841 6926 : return;
842 :
843 37608 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
844 : Assert(pgStatEntryRefHash->members == 0);
845 37608 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
846 37608 : pgStatEntryRefHash = NULL;
847 : }
848 :
849 : static bool
850 2102 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
851 : {
852 2102 : Oid dboid = DatumGetObjectId(match_data);
853 :
854 2102 : return ent->key.dboid == dboid;
855 : }
856 :
857 : static void
858 72 : pgstat_release_db_entry_refs(Oid dboid)
859 : {
860 72 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
861 : match_db,
862 : ObjectIdGetDatum(dboid));
863 72 : }
864 :
865 :
866 : /* ------------------------------------------------------------
867 : * Dropping and resetting of stats entries
868 : * ------------------------------------------------------------
869 : */
870 :
871 : static void
872 111968 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
873 : {
874 : dsa_pointer pdsa;
875 111968 : PgStat_Kind kind = shent->key.kind;
876 :
877 : /*
878 : * Fetch dsa pointer before deleting entry - that way we can free the
879 : * memory after releasing the lock.
880 : */
881 111968 : pdsa = shent->body;
882 :
883 111968 : if (!hstat)
884 103166 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
885 : else
886 8802 : dshash_delete_current(hstat);
887 :
888 111968 : dsa_free(pgStatLocal.dsa, pdsa);
889 :
890 : /* Decrement entry count, if required. */
891 111968 : if (pgstat_get_kind_info(kind)->track_entry_count)
892 2 : pg_atomic_sub_fetch_u64(&pgStatLocal.shmem->entry_counts[kind - 1], 1);
893 111968 : }
894 :
895 : /*
896 : * Helper for both pgstat_drop_database_and_contents() and
897 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
898 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
899 : * case the entry needs to be already locked.
900 : */
901 : static bool
902 112030 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
903 : dshash_seq_status *hstat)
904 : {
905 : Assert(shent->body != InvalidDsaPointer);
906 :
907 : /* should already have released local reference */
908 112030 : if (pgStatEntryRefHash)
909 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
910 :
911 : /*
912 : * Signal that the entry is dropped - this will eventually cause other
913 : * backends to release their references.
914 : */
915 112030 : if (shent->dropped)
916 0 : elog(ERROR,
917 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
918 : pgstat_get_kind_info(shent->key.kind)->name,
919 : shent->key.dboid,
920 : shent->key.objid,
921 : pg_atomic_read_u32(&shent->refcount),
922 : pg_atomic_read_u32(&shent->generation));
923 112030 : shent->dropped = true;
924 :
925 : /* release refcount marking entry as not dropped */
926 112030 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
927 : {
928 102384 : pgstat_free_entry(shent, hstat);
929 102384 : return true;
930 : }
931 : else
932 : {
933 9646 : if (!hstat)
934 9646 : dshash_release_lock(pgStatLocal.shared_hash, shent);
935 9646 : return false;
936 : }
937 : }
938 :
939 : /*
940 : * Drop stats for the database and all the objects inside that database.
941 : */
942 : static void
943 72 : pgstat_drop_database_and_contents(Oid dboid)
944 : {
945 : dshash_seq_status hstat;
946 : PgStatShared_HashEntry *p;
947 72 : uint64 not_freed_count = 0;
948 :
949 : Assert(OidIsValid(dboid));
950 :
951 : Assert(pgStatLocal.shared_hash != NULL);
952 :
953 : /*
954 : * This backend might very well be the only backend holding a reference to
955 : * about-to-be-dropped entries. Ensure that we're not preventing it from
956 : * being cleaned up till later.
957 : *
958 : * Doing this separately from the dshash iteration below avoids having to
959 : * do so while holding a partition lock on the shared hashtable.
960 : */
961 72 : pgstat_release_db_entry_refs(dboid);
962 :
963 : /* some of the dshash entries are to be removed, take exclusive lock. */
964 72 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
965 26776 : while ((p = dshash_seq_next(&hstat)) != NULL)
966 : {
967 26704 : if (p->dropped)
968 2 : continue;
969 :
970 26702 : if (p->key.dboid != dboid)
971 18014 : continue;
972 :
973 8688 : if (!pgstat_drop_entry_internal(p, &hstat))
974 : {
975 : /*
976 : * Even statistics for a dropped database might currently be
977 : * accessed (consider e.g. database stats for pg_stat_database).
978 : */
979 0 : not_freed_count++;
980 : }
981 : }
982 72 : dshash_seq_term(&hstat);
983 :
984 : /*
985 : * If some of the stats data could not be freed, signal the reference
986 : * holders to run garbage collection of their cached pgStatLocal.shmem.
987 : */
988 72 : if (not_freed_count > 0)
989 0 : pgstat_request_entry_refs_gc();
990 72 : }
991 :
992 : /*
993 : * Drop a single stats entry.
994 : *
995 : * This routine returns false if the stats entry of the dropped object could
996 : * not be freed, true otherwise.
997 : *
998 : * The callers of this function should call pgstat_request_entry_refs_gc()
999 : * if the stats entry could not be freed, to ensure that this entry's memory
1000 : * can be reclaimed later by a different backend calling
1001 : * pgstat_gc_entry_refs().
1002 : */
1003 : bool
1004 149066 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1005 : {
1006 149066 : PgStat_HashKey key = {0};
1007 : PgStatShared_HashEntry *shent;
1008 149066 : bool freed = true;
1009 :
1010 149066 : key.kind = kind;
1011 149066 : key.dboid = dboid;
1012 149066 : key.objid = objid;
1013 :
1014 : /* delete local reference */
1015 149066 : if (pgStatEntryRefHash)
1016 : {
1017 : PgStat_EntryRefHashEntry *lohashent =
1018 142130 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
1019 :
1020 142130 : if (lohashent)
1021 102946 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1022 : true);
1023 : }
1024 :
1025 : /* mark entry in shared hashtable as deleted, drop if possible */
1026 149066 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
1027 149066 : if (shent)
1028 : {
1029 103228 : freed = pgstat_drop_entry_internal(shent, NULL);
1030 :
1031 : /*
1032 : * Database stats contain other stats. Drop those as well when
1033 : * dropping the database. XXX: Perhaps this should be done in a
1034 : * slightly more principled way? But not obvious what that'd look
1035 : * like, and so far this is the only case...
1036 : */
1037 103228 : if (key.kind == PGSTAT_KIND_DATABASE)
1038 72 : pgstat_drop_database_and_contents(key.dboid);
1039 : }
1040 :
1041 149066 : return freed;
1042 : }
1043 :
1044 : /*
1045 : * Scan through the shared hashtable of stats, dropping statistics if
1046 : * approved by the optional do_drop() function.
1047 : */
1048 : void
1049 460 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1050 : Datum match_data)
1051 : {
1052 : dshash_seq_status hstat;
1053 : PgStatShared_HashEntry *ps;
1054 460 : uint64 not_freed_count = 0;
1055 :
1056 : /* entries are removed, take an exclusive lock */
1057 460 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1058 638 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1059 : {
1060 178 : if (ps->dropped)
1061 0 : continue;
1062 :
1063 178 : if (do_drop != NULL && !do_drop(ps, match_data))
1064 64 : continue;
1065 :
1066 : /* delete local reference */
1067 114 : if (pgStatEntryRefHash)
1068 : {
1069 : PgStat_EntryRefHashEntry *lohashent =
1070 2 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1071 :
1072 2 : if (lohashent)
1073 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1074 : true);
1075 : }
1076 :
1077 114 : if (!pgstat_drop_entry_internal(ps, &hstat))
1078 0 : not_freed_count++;
1079 : }
1080 460 : dshash_seq_term(&hstat);
1081 :
1082 460 : if (not_freed_count > 0)
1083 0 : pgstat_request_entry_refs_gc();
1084 460 : }
1085 :
1086 : /*
1087 : * Scan through the shared hashtable of stats and drop all entries.
1088 : */
1089 : void
1090 458 : pgstat_drop_all_entries(void)
1091 : {
1092 458 : pgstat_drop_matching_entries(NULL, 0);
1093 458 : }
1094 :
1095 : static void
1096 17474 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1097 : TimestampTz ts)
1098 : {
1099 17474 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1100 :
1101 17474 : memset(pgstat_get_entry_data(kind, header), 0,
1102 : pgstat_get_entry_len(kind));
1103 :
1104 17474 : if (kind_info->reset_timestamp_cb)
1105 400 : kind_info->reset_timestamp_cb(header, ts);
1106 17474 : }
1107 :
1108 : /*
1109 : * Reset one variable-numbered stats entry.
1110 : */
1111 : void
1112 372 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1113 : {
1114 : PgStat_EntryRef *entry_ref;
1115 :
1116 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1117 :
1118 372 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1119 372 : if (!entry_ref || entry_ref->shared_entry->dropped)
1120 2 : return;
1121 :
1122 370 : (void) pgstat_lock_entry(entry_ref, false);
1123 370 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1124 370 : pgstat_unlock_entry(entry_ref);
1125 : }
1126 :
1127 : /*
1128 : * Scan through the shared hashtable of stats, resetting statistics if
1129 : * approved by the provided do_reset() function.
1130 : */
1131 : void
1132 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1133 : Datum match_data, TimestampTz ts)
1134 : {
1135 : dshash_seq_status hstat;
1136 : PgStatShared_HashEntry *p;
1137 :
1138 : /* dshash entry is not modified, take shared lock */
1139 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1140 25832 : while ((p = dshash_seq_next(&hstat)) != NULL)
1141 : {
1142 : PgStatShared_Common *header;
1143 :
1144 25798 : if (p->dropped)
1145 2 : continue;
1146 :
1147 25796 : if (!do_reset(p, match_data))
1148 8692 : continue;
1149 :
1150 17104 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1151 :
1152 17104 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1153 :
1154 17104 : shared_stat_reset_contents(p->key.kind, header, ts);
1155 :
1156 17104 : LWLockRelease(&header->lock);
1157 : }
1158 34 : dshash_seq_term(&hstat);
1159 34 : }
1160 :
1161 : static bool
1162 2964 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1163 : {
1164 2964 : return p->key.kind == DatumGetInt32(match_data);
1165 : }
1166 :
1167 : void
1168 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1169 : {
1170 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1171 8 : }
1172 :
1173 : static void
1174 8134714 : pgstat_setup_memcxt(void)
1175 : {
1176 8134714 : if (unlikely(!pgStatSharedRefContext))
1177 37608 : pgStatSharedRefContext =
1178 37608 : AllocSetContextCreate(TopMemoryContext,
1179 : "PgStat Shared Ref",
1180 : ALLOCSET_SMALL_SIZES);
1181 8134714 : if (unlikely(!pgStatEntryRefHashContext))
1182 37608 : pgStatEntryRefHashContext =
1183 37608 : AllocSetContextCreate(TopMemoryContext,
1184 : "PgStat Shared Ref Hash",
1185 : ALLOCSET_SMALL_SIZES);
1186 8134714 : }
|