Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 10570 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 10570 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 10570 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 6222 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 6222 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 6222 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 62220 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 55998 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 55998 : if (!kind_info)
140 55938 : continue;
141 60 : if (!kind_info->fixed_amount)
142 30 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 30 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 6222 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 2174 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 2174 : sz = StatsShmemSize();
162 2174 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 2174 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 2174 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 2174 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 2174 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 2174 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 2174 : ctl->raw_dsa_area = p;
183 2174 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
184 : pgstat_dsa_init_size(),
185 : LWTRANCHE_PGSTATS_DSA, NULL);
186 2174 : dsa_pin(dsa);
187 :
188 : /*
189 : * To ensure dshash is created in "plain" shared memory, temporarily
190 : * limit size of dsa to the initial size of the dsa.
191 : */
192 2174 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
193 :
194 : /*
195 : * With the limit in place, create the dshash table. XXX: It'd be nice
196 : * if there were dshash_create_in_place().
197 : */
198 2174 : dsh = dshash_create(dsa, &dsh_params, NULL);
199 2174 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
200 :
201 : /* lift limit set above */
202 2174 : dsa_set_size_limit(dsa, -1);
203 :
204 : /*
205 : * Postmaster will never access these again, thus free the local
206 : * dsa/dshash references.
207 : */
208 2174 : dshash_detach(dsh);
209 2174 : dsa_detach(dsa);
210 :
211 2174 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
212 :
213 : /* initialize fixed-numbered stats */
214 71742 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
215 : {
216 69568 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
217 : char *ptr;
218 :
219 69568 : if (!kind_info || !kind_info->fixed_amount)
220 56514 : continue;
221 :
222 13054 : if (pgstat_is_kind_builtin(kind))
223 13044 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
224 : else
225 : {
226 10 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
227 :
228 : Assert(kind_info->shared_size != 0);
229 10 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
230 10 : ptr = ctl->custom_data[idx];
231 : }
232 :
233 13054 : kind_info->init_shmem_cb(ptr);
234 : }
235 : }
236 : else
237 : {
238 : Assert(found);
239 : }
240 2174 : }
241 :
242 : void
243 44542 : pgstat_attach_shmem(void)
244 : {
245 : MemoryContext oldcontext;
246 :
247 : Assert(pgStatLocal.dsa == NULL);
248 :
249 : /* stats shared memory persists for the backend lifetime */
250 44542 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
251 :
252 44542 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
253 : NULL);
254 44542 : dsa_pin_mapping(pgStatLocal.dsa);
255 :
256 89084 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
257 44542 : pgStatLocal.shmem->hash_handle,
258 : NULL);
259 :
260 44542 : MemoryContextSwitchTo(oldcontext);
261 44542 : }
262 :
263 : void
264 44542 : pgstat_detach_shmem(void)
265 : {
266 : Assert(pgStatLocal.dsa);
267 :
268 : /* we shouldn't leave references to shared stats */
269 44542 : pgstat_release_all_entry_refs(false);
270 :
271 44542 : dshash_detach(pgStatLocal.shared_hash);
272 44542 : pgStatLocal.shared_hash = NULL;
273 :
274 44542 : dsa_detach(pgStatLocal.dsa);
275 :
276 : /*
277 : * dsa_detach() does not decrement the DSA reference count as no segment
278 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
279 : * be registered. Hence, release it manually now.
280 : */
281 44542 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
282 :
283 44542 : pgStatLocal.dsa = NULL;
284 44542 : }
285 :
286 :
287 : /* ------------------------------------------------------------
288 : * Maintenance of shared memory stats entries
289 : * ------------------------------------------------------------
290 : */
291 :
292 : /*
293 : * Initialize entry newly-created.
294 : *
295 : * Returns NULL in the event of an allocation failure, so as callers can
296 : * take cleanup actions as the entry initialized is already inserted in the
297 : * shared hashtable.
298 : */
299 : PgStatShared_Common *
300 638050 : pgstat_init_entry(PgStat_Kind kind,
301 : PgStatShared_HashEntry *shhashent)
302 : {
303 : /* Create new stats entry. */
304 : dsa_pointer chunk;
305 : PgStatShared_Common *shheader;
306 :
307 : /*
308 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
309 : * can't be freed before the initialization because it can't be found as
310 : * long as we hold the dshash partition lock. Caller needs to increase
311 : * further if a longer lived reference is needed.
312 : */
313 638050 : pg_atomic_init_u32(&shhashent->refcount, 1);
314 :
315 : /*
316 : * Initialize "generation" to 0, as freshly created.
317 : */
318 638050 : pg_atomic_init_u32(&shhashent->generation, 0);
319 638050 : shhashent->dropped = false;
320 :
321 638050 : chunk = dsa_allocate_extended(pgStatLocal.dsa,
322 638050 : pgstat_get_kind_info(kind)->shared_size,
323 : DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
324 638050 : if (chunk == InvalidDsaPointer)
325 0 : return NULL;
326 :
327 638050 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
328 638050 : shheader->magic = 0xdeadbeef;
329 :
330 : /* Link the new entry from the hash entry. */
331 638050 : shhashent->body = chunk;
332 :
333 638050 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
334 :
335 638050 : return shheader;
336 : }
337 :
338 : static PgStatShared_Common *
339 60 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
340 : {
341 : PgStatShared_Common *shheader;
342 :
343 60 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
344 :
345 : /* mark as not dropped anymore */
346 60 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
347 :
348 : /*
349 : * Increment "generation", to let any backend with local references know
350 : * that what they point to is outdated.
351 : */
352 60 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
353 60 : shhashent->dropped = false;
354 :
355 : /* reinitialize content */
356 : Assert(shheader->magic == 0xdeadbeef);
357 60 : memset(pgstat_get_entry_data(kind, shheader), 0,
358 : pgstat_get_entry_len(kind));
359 :
360 60 : return shheader;
361 : }
362 :
363 : static void
364 8233380 : pgstat_setup_shared_refs(void)
365 : {
366 8233380 : if (likely(pgStatEntryRefHash != NULL))
367 8195732 : return;
368 :
369 37648 : pgStatEntryRefHash =
370 37648 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
371 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
372 37648 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
373 : Assert(pgStatSharedRefAge != 0);
374 : }
375 :
376 : /*
377 : * Helper function for pgstat_get_entry_ref().
378 : */
379 : static void
380 2194284 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
381 : PgStatShared_HashEntry *shhashent,
382 : PgStatShared_Common *shheader)
383 : {
384 : Assert(shheader->magic == 0xdeadbeef);
385 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
386 :
387 2194284 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
388 :
389 2194284 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
390 :
391 2194284 : entry_ref->shared_stats = shheader;
392 2194284 : entry_ref->shared_entry = shhashent;
393 2194284 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
394 2194284 : }
395 :
396 : /*
397 : * Helper function for pgstat_get_entry_ref().
398 : */
399 : static bool
400 8233380 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
401 : {
402 : bool found;
403 : PgStat_EntryRefHashEntry *cache_entry;
404 :
405 : /*
406 : * We immediately insert a cache entry, because it avoids 1) multiple
407 : * hashtable lookups in case of a cache miss 2) having to deal with
408 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
409 : */
410 :
411 8233380 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
412 :
413 8233380 : if (!found || !cache_entry->entry_ref)
414 2360588 : {
415 : PgStat_EntryRef *entry_ref;
416 :
417 2360588 : cache_entry->entry_ref = entry_ref =
418 2360588 : MemoryContextAlloc(pgStatSharedRefContext,
419 : sizeof(PgStat_EntryRef));
420 2360588 : entry_ref->shared_stats = NULL;
421 2360588 : entry_ref->shared_entry = NULL;
422 2360588 : entry_ref->pending = NULL;
423 :
424 2360588 : found = false;
425 : }
426 5872792 : else if (cache_entry->entry_ref->shared_stats == NULL)
427 : {
428 : Assert(cache_entry->entry_ref->pending == NULL);
429 0 : found = false;
430 : }
431 : else
432 : {
433 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
434 :
435 5872792 : entry_ref = cache_entry->entry_ref;
436 : Assert(entry_ref->shared_entry != NULL);
437 : Assert(entry_ref->shared_stats != NULL);
438 :
439 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
440 : /* should have at least our reference */
441 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
442 : }
443 :
444 8233380 : *entry_ref_p = cache_entry->entry_ref;
445 8233380 : return found;
446 : }
447 :
448 : /*
449 : * Get a shared stats reference. If create is true, the shared stats object is
450 : * created if it does not exist.
451 : *
452 : * When create is true, and created_entry is non-NULL, it'll be set to true
453 : * if the entry is newly created, false otherwise.
454 : */
455 : PgStat_EntryRef *
456 8233380 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
457 : bool *created_entry)
458 : {
459 : PgStat_HashKey key;
460 : PgStatShared_HashEntry *shhashent;
461 8233380 : PgStatShared_Common *shheader = NULL;
462 : PgStat_EntryRef *entry_ref;
463 :
464 : /* clear padding */
465 8233380 : memset(&key, 0, sizeof(struct PgStat_HashKey));
466 :
467 8233380 : key.kind = kind;
468 8233380 : key.dboid = dboid;
469 8233380 : key.objid = objid;
470 :
471 : /*
472 : * passing in created_entry only makes sense if we possibly could create
473 : * entry.
474 : */
475 : Assert(create || created_entry == NULL);
476 : pgstat_assert_is_up();
477 : Assert(pgStatLocal.shared_hash != NULL);
478 : Assert(!pgStatLocal.shmem->is_shutdown);
479 :
480 8233380 : pgstat_setup_memcxt();
481 8233380 : pgstat_setup_shared_refs();
482 :
483 8233380 : if (created_entry != NULL)
484 214 : *created_entry = false;
485 :
486 : /*
487 : * Check if other backends dropped stats that could not be deleted because
488 : * somebody held references to it. If so, check this backend's references.
489 : * This is not expected to happen often. The location of the check is a
490 : * bit random, but this is a relatively frequently called path, so better
491 : * than most.
492 : */
493 8233380 : if (pgstat_need_entry_refs_gc())
494 11680 : pgstat_gc_entry_refs();
495 :
496 : /*
497 : * First check the lookup cache hashtable in local memory. If we find a
498 : * match here we can avoid taking locks / causing contention.
499 : */
500 8233380 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
501 5872792 : return entry_ref;
502 :
503 : Assert(entry_ref != NULL);
504 :
505 : /*
506 : * Do a lookup in the hash table first - it's quite likely that the entry
507 : * already exists, and that way we only need a shared lock.
508 : */
509 2360588 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
510 :
511 2360588 : if (create && !shhashent)
512 : {
513 : bool shfound;
514 :
515 : /*
516 : * It's possible that somebody created the entry since the above
517 : * lookup. If so, fall through to the same path as if we'd have if it
518 : * already had been created before the dshash_find() calls.
519 : */
520 236160 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
521 236160 : if (!shfound)
522 : {
523 236160 : shheader = pgstat_init_entry(kind, shhashent);
524 236160 : if (shheader == NULL)
525 : {
526 : /*
527 : * Failed the allocation of a new entry, so clean up the
528 : * shared hashtable before giving up.
529 : */
530 0 : dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
531 :
532 0 : ereport(ERROR,
533 : (errcode(ERRCODE_OUT_OF_MEMORY),
534 : errmsg("out of memory"),
535 : errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
536 : key.kind, key.dboid, key.objid)));
537 : }
538 236160 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
539 :
540 236160 : if (created_entry != NULL)
541 96 : *created_entry = true;
542 :
543 236160 : return entry_ref;
544 : }
545 : }
546 :
547 2124428 : if (!shhashent)
548 : {
549 : /*
550 : * If we're not creating, delete the reference again. In all
551 : * likelihood it's just a stats lookup - no point wasting memory for a
552 : * shared ref to nothing...
553 : */
554 166230 : pgstat_release_entry_ref(key, entry_ref, false);
555 :
556 166230 : return NULL;
557 : }
558 : else
559 : {
560 : /*
561 : * Can get here either because dshash_find() found a match, or if
562 : * dshash_find_or_insert() found a concurrently inserted entry.
563 : */
564 :
565 1958198 : if (shhashent->dropped && create)
566 : {
567 : /*
568 : * There are legitimate cases where the old stats entry might not
569 : * yet have been dropped by the time it's reused. The most obvious
570 : * case are replication slot stats, where a new slot can be
571 : * created with the same index just after dropping. But oid
572 : * wraparound can lead to other cases as well. We just reset the
573 : * stats to their plain state, while incrementing its "generation"
574 : * in the shared entry for any remaining local references.
575 : */
576 60 : shheader = pgstat_reinit_entry(kind, shhashent);
577 60 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
578 :
579 60 : if (created_entry != NULL)
580 0 : *created_entry = true;
581 :
582 60 : return entry_ref;
583 : }
584 1958138 : else if (shhashent->dropped)
585 : {
586 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
587 74 : pgstat_release_entry_ref(key, entry_ref, false);
588 :
589 74 : return NULL;
590 : }
591 : else
592 : {
593 1958064 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
594 1958064 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
595 :
596 1958064 : return entry_ref;
597 : }
598 : }
599 : }
600 :
601 : static void
602 2360588 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
603 : bool discard_pending)
604 : {
605 2360588 : if (entry_ref && entry_ref->pending)
606 : {
607 64814 : if (discard_pending)
608 64814 : pgstat_delete_pending_entry(entry_ref);
609 : else
610 0 : elog(ERROR, "releasing ref with pending data");
611 : }
612 :
613 2360588 : if (entry_ref && entry_ref->shared_stats)
614 : {
615 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
616 : Assert(entry_ref->pending == NULL);
617 :
618 : /*
619 : * This can't race with another backend looking up the stats entry and
620 : * increasing the refcount because it is not "legal" to create
621 : * additional references to dropped entries.
622 : */
623 2194284 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
624 : {
625 : PgStatShared_HashEntry *shent;
626 :
627 : /*
628 : * We're the last referrer to this entry, try to drop the shared
629 : * entry.
630 : */
631 :
632 : /* only dropped entries can reach a 0 refcount */
633 : Assert(entry_ref->shared_entry->dropped);
634 :
635 9484 : shent = dshash_find(pgStatLocal.shared_hash,
636 9484 : &entry_ref->shared_entry->key,
637 : true);
638 9484 : if (!shent)
639 0 : elog(ERROR, "could not find just referenced shared stats entry");
640 :
641 : /*
642 : * This entry may have been reinitialized while trying to release
643 : * it, so double-check that it has not been reused while holding a
644 : * lock on its shared entry.
645 : */
646 9484 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
647 9484 : entry_ref->generation)
648 : {
649 : /* Same "generation", so we're OK with the removal */
650 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
651 : Assert(entry_ref->shared_entry == shent);
652 9484 : pgstat_free_entry(shent, NULL);
653 : }
654 : else
655 : {
656 : /*
657 : * Shared stats entry has been reinitialized, so do not drop
658 : * its shared entry, only release its lock.
659 : */
660 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
661 : }
662 : }
663 : }
664 :
665 2360588 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
666 0 : elog(ERROR, "entry ref vanished before deletion");
667 :
668 2360588 : if (entry_ref)
669 2360588 : pfree(entry_ref);
670 2360588 : }
671 :
672 : /*
673 : * Acquire exclusive lock on the entry.
674 : *
675 : * If nowait is true, it's just a conditional acquire, and the result
676 : * *must* be checked to verify success.
677 : * If nowait is false, waits as necessary, always returning true.
678 : */
679 : bool
680 2732600 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
681 : {
682 2732600 : LWLock *lock = &entry_ref->shared_stats->lock;
683 :
684 2732600 : if (nowait)
685 670012 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
686 :
687 2062588 : LWLockAcquire(lock, LW_EXCLUSIVE);
688 2062588 : return true;
689 : }
690 :
691 : /*
692 : * Acquire shared lock on the entry.
693 : *
694 : * Separate from pgstat_lock_entry() as most callers will need to lock
695 : * exclusively. The wait semantics are identical.
696 : */
697 : bool
698 740578 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
699 : {
700 740578 : LWLock *lock = &entry_ref->shared_stats->lock;
701 :
702 740578 : if (nowait)
703 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
704 :
705 740578 : LWLockAcquire(lock, LW_SHARED);
706 740578 : return true;
707 : }
708 :
709 : void
710 3473172 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
711 : {
712 3473172 : LWLockRelease(&entry_ref->shared_stats->lock);
713 3473172 : }
714 :
715 : /*
716 : * Helper function to fetch and lock shared stats.
717 : */
718 : PgStat_EntryRef *
719 667872 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
720 : bool nowait)
721 : {
722 : PgStat_EntryRef *entry_ref;
723 :
724 : /* find shared table stats entry corresponding to the local entry */
725 667872 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
726 :
727 : /* lock the shared entry to protect the content, skip if failed */
728 667872 : if (!pgstat_lock_entry(entry_ref, nowait))
729 0 : return NULL;
730 :
731 667872 : return entry_ref;
732 : }
733 :
734 : void
735 3740 : pgstat_request_entry_refs_gc(void)
736 : {
737 3740 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
738 3740 : }
739 :
740 : static bool
741 8233380 : pgstat_need_entry_refs_gc(void)
742 : {
743 : uint64 curage;
744 :
745 8233380 : if (!pgStatEntryRefHash)
746 0 : return false;
747 :
748 : /* should have been initialized when creating pgStatEntryRefHash */
749 : Assert(pgStatSharedRefAge != 0);
750 :
751 8233380 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
752 :
753 8233380 : return pgStatSharedRefAge != curage;
754 : }
755 :
756 : static void
757 11680 : pgstat_gc_entry_refs(void)
758 : {
759 : pgstat_entry_ref_hash_iterator i;
760 : PgStat_EntryRefHashEntry *ent;
761 : uint64 curage;
762 :
763 11680 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
764 : Assert(curage != 0);
765 :
766 : /*
767 : * Some entries have been dropped or reinitialized. Invalidate cache
768 : * pointer to them.
769 : */
770 11680 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
771 957428 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
772 : {
773 945748 : PgStat_EntryRef *entry_ref = ent->entry_ref;
774 :
775 : Assert(!entry_ref->shared_stats ||
776 : entry_ref->shared_stats->magic == 0xdeadbeef);
777 :
778 : /*
779 : * "generation" checks for the case of entries being reinitialized,
780 : * and "dropped" for the case where these are.. dropped.
781 : */
782 945748 : if (!entry_ref->shared_entry->dropped &&
783 633018 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
784 633018 : entry_ref->generation)
785 632958 : continue;
786 :
787 : /* cannot gc shared ref that has pending data */
788 312790 : if (entry_ref->pending != NULL)
789 303662 : continue;
790 :
791 9128 : pgstat_release_entry_ref(ent->key, entry_ref, false);
792 : }
793 :
794 11680 : pgStatSharedRefAge = curage;
795 11680 : }
796 :
797 : static void
798 37720 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
799 : Datum match_data)
800 : {
801 : pgstat_entry_ref_hash_iterator i;
802 : PgStat_EntryRefHashEntry *ent;
803 :
804 37720 : if (pgStatEntryRefHash == NULL)
805 2 : return;
806 :
807 37718 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
808 :
809 2122574 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
810 2122574 : != NULL)
811 : {
812 : Assert(ent->entry_ref != NULL);
813 :
814 2084856 : if (match && !match(ent, match_data))
815 2102 : continue;
816 :
817 2082754 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
818 : }
819 : }
820 :
821 : /*
822 : * Release all local references to shared stats entries.
823 : *
824 : * When a process exits it cannot do so while still holding references onto
825 : * stats entries, otherwise the shared stats entries could never be freed.
826 : */
827 : static void
828 44542 : pgstat_release_all_entry_refs(bool discard_pending)
829 : {
830 44542 : if (pgStatEntryRefHash == NULL)
831 6894 : return;
832 :
833 37648 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
834 : Assert(pgStatEntryRefHash->members == 0);
835 37648 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
836 37648 : pgStatEntryRefHash = NULL;
837 : }
838 :
839 : static bool
840 2102 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
841 : {
842 2102 : Oid dboid = DatumGetObjectId(match_data);
843 :
844 2102 : return ent->key.dboid == dboid;
845 : }
846 :
847 : static void
848 72 : pgstat_release_db_entry_refs(Oid dboid)
849 : {
850 72 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
851 : match_db,
852 : ObjectIdGetDatum(dboid));
853 72 : }
854 :
855 :
856 : /* ------------------------------------------------------------
857 : * Dropping and resetting of stats entries
858 : * ------------------------------------------------------------
859 : */
860 :
861 : static void
862 111426 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
863 : {
864 : dsa_pointer pdsa;
865 :
866 : /*
867 : * Fetch dsa pointer before deleting entry - that way we can free the
868 : * memory after releasing the lock.
869 : */
870 111426 : pdsa = shent->body;
871 :
872 111426 : if (!hstat)
873 102624 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
874 : else
875 8802 : dshash_delete_current(hstat);
876 :
877 111426 : dsa_free(pgStatLocal.dsa, pdsa);
878 111426 : }
879 :
880 : /*
881 : * Helper for both pgstat_drop_database_and_contents() and
882 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
883 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
884 : * case the entry needs to be already locked.
885 : */
886 : static bool
887 111486 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
888 : dshash_seq_status *hstat)
889 : {
890 : Assert(shent->body != InvalidDsaPointer);
891 :
892 : /* should already have released local reference */
893 111486 : if (pgStatEntryRefHash)
894 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
895 :
896 : /*
897 : * Signal that the entry is dropped - this will eventually cause other
898 : * backends to release their references.
899 : */
900 111486 : if (shent->dropped)
901 0 : elog(ERROR,
902 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
903 : pgstat_get_kind_info(shent->key.kind)->name,
904 : shent->key.dboid,
905 : shent->key.objid,
906 : pg_atomic_read_u32(&shent->refcount),
907 : pg_atomic_read_u32(&shent->generation));
908 111486 : shent->dropped = true;
909 :
910 : /* release refcount marking entry as not dropped */
911 111486 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
912 : {
913 101942 : pgstat_free_entry(shent, hstat);
914 101942 : return true;
915 : }
916 : else
917 : {
918 9544 : if (!hstat)
919 9544 : dshash_release_lock(pgStatLocal.shared_hash, shent);
920 9544 : return false;
921 : }
922 : }
923 :
924 : /*
925 : * Drop stats for the database and all the objects inside that database.
926 : */
927 : static void
928 72 : pgstat_drop_database_and_contents(Oid dboid)
929 : {
930 : dshash_seq_status hstat;
931 : PgStatShared_HashEntry *p;
932 72 : uint64 not_freed_count = 0;
933 :
934 : Assert(OidIsValid(dboid));
935 :
936 : Assert(pgStatLocal.shared_hash != NULL);
937 :
938 : /*
939 : * This backend might very well be the only backend holding a reference to
940 : * about-to-be-dropped entries. Ensure that we're not preventing it from
941 : * being cleaned up till later.
942 : *
943 : * Doing this separately from the dshash iteration below avoids having to
944 : * do so while holding a partition lock on the shared hashtable.
945 : */
946 72 : pgstat_release_db_entry_refs(dboid);
947 :
948 : /* some of the dshash entries are to be removed, take exclusive lock. */
949 72 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
950 26778 : while ((p = dshash_seq_next(&hstat)) != NULL)
951 : {
952 26706 : if (p->dropped)
953 2 : continue;
954 :
955 26704 : if (p->key.dboid != dboid)
956 18016 : continue;
957 :
958 8688 : if (!pgstat_drop_entry_internal(p, &hstat))
959 : {
960 : /*
961 : * Even statistics for a dropped database might currently be
962 : * accessed (consider e.g. database stats for pg_stat_database).
963 : */
964 0 : not_freed_count++;
965 : }
966 : }
967 72 : dshash_seq_term(&hstat);
968 :
969 : /*
970 : * If some of the stats data could not be freed, signal the reference
971 : * holders to run garbage collection of their cached pgStatLocal.shmem.
972 : */
973 72 : if (not_freed_count > 0)
974 0 : pgstat_request_entry_refs_gc();
975 72 : }
976 :
977 : /*
978 : * Drop a single stats entry.
979 : *
980 : * This routine returns false if the stats entry of the dropped object could
981 : * not be freed, true otherwise.
982 : *
983 : * The callers of this function should call pgstat_request_entry_refs_gc()
984 : * if the stats entry could not be freed, to ensure that this entry's memory
985 : * can be reclaimed later by a different backend calling
986 : * pgstat_gc_entry_refs().
987 : */
988 : bool
989 148330 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
990 : {
991 : PgStat_HashKey key;
992 : PgStatShared_HashEntry *shent;
993 148330 : bool freed = true;
994 :
995 : /* clear padding */
996 148330 : memset(&key, 0, sizeof(struct PgStat_HashKey));
997 :
998 148330 : key.kind = kind;
999 148330 : key.dboid = dboid;
1000 148330 : key.objid = objid;
1001 :
1002 : /* delete local reference */
1003 148330 : if (pgStatEntryRefHash)
1004 : {
1005 : PgStat_EntryRefHashEntry *lohashent =
1006 141426 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
1007 :
1008 141426 : if (lohashent)
1009 102402 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1010 : true);
1011 : }
1012 :
1013 : /* mark entry in shared hashtable as deleted, drop if possible */
1014 148330 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
1015 148330 : if (shent)
1016 : {
1017 102684 : freed = pgstat_drop_entry_internal(shent, NULL);
1018 :
1019 : /*
1020 : * Database stats contain other stats. Drop those as well when
1021 : * dropping the database. XXX: Perhaps this should be done in a
1022 : * slightly more principled way? But not obvious what that'd look
1023 : * like, and so far this is the only case...
1024 : */
1025 102684 : if (key.kind == PGSTAT_KIND_DATABASE)
1026 72 : pgstat_drop_database_and_contents(key.dboid);
1027 : }
1028 :
1029 148330 : return freed;
1030 : }
1031 :
1032 : /*
1033 : * Scan through the shared hashtable of stats, dropping statistics if
1034 : * approved by the optional do_drop() function.
1035 : */
1036 : void
1037 460 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1038 : Datum match_data)
1039 : {
1040 : dshash_seq_status hstat;
1041 : PgStatShared_HashEntry *ps;
1042 460 : uint64 not_freed_count = 0;
1043 :
1044 : /* entries are removed, take an exclusive lock */
1045 460 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1046 638 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1047 : {
1048 178 : if (ps->dropped)
1049 0 : continue;
1050 :
1051 178 : if (do_drop != NULL && !do_drop(ps, match_data))
1052 64 : continue;
1053 :
1054 : /* delete local reference */
1055 114 : if (pgStatEntryRefHash)
1056 : {
1057 : PgStat_EntryRefHashEntry *lohashent =
1058 2 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1059 :
1060 2 : if (lohashent)
1061 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1062 : true);
1063 : }
1064 :
1065 114 : if (!pgstat_drop_entry_internal(ps, &hstat))
1066 0 : not_freed_count++;
1067 : }
1068 460 : dshash_seq_term(&hstat);
1069 :
1070 460 : if (not_freed_count > 0)
1071 0 : pgstat_request_entry_refs_gc();
1072 460 : }
1073 :
1074 : /*
1075 : * Scan through the shared hashtable of stats and drop all entries.
1076 : */
1077 : void
1078 458 : pgstat_drop_all_entries(void)
1079 : {
1080 458 : pgstat_drop_matching_entries(NULL, 0);
1081 458 : }
1082 :
1083 : static void
1084 17722 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1085 : TimestampTz ts)
1086 : {
1087 17722 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1088 :
1089 17722 : memset(pgstat_get_entry_data(kind, header), 0,
1090 : pgstat_get_entry_len(kind));
1091 :
1092 17722 : if (kind_info->reset_timestamp_cb)
1093 400 : kind_info->reset_timestamp_cb(header, ts);
1094 17722 : }
1095 :
1096 : /*
1097 : * Reset one variable-numbered stats entry.
1098 : */
1099 : void
1100 372 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1101 : {
1102 : PgStat_EntryRef *entry_ref;
1103 :
1104 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1105 :
1106 372 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1107 372 : if (!entry_ref || entry_ref->shared_entry->dropped)
1108 2 : return;
1109 :
1110 370 : (void) pgstat_lock_entry(entry_ref, false);
1111 370 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1112 370 : pgstat_unlock_entry(entry_ref);
1113 : }
1114 :
1115 : /*
1116 : * Scan through the shared hashtable of stats, resetting statistics if
1117 : * approved by the provided do_reset() function.
1118 : */
1119 : void
1120 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1121 : Datum match_data, TimestampTz ts)
1122 : {
1123 : dshash_seq_status hstat;
1124 : PgStatShared_HashEntry *p;
1125 :
1126 : /* dshash entry is not modified, take shared lock */
1127 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1128 26076 : while ((p = dshash_seq_next(&hstat)) != NULL)
1129 : {
1130 : PgStatShared_Common *header;
1131 :
1132 26042 : if (p->dropped)
1133 2 : continue;
1134 :
1135 26040 : if (!do_reset(p, match_data))
1136 8688 : continue;
1137 :
1138 17352 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1139 :
1140 17352 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1141 :
1142 17352 : shared_stat_reset_contents(p->key.kind, header, ts);
1143 :
1144 17352 : LWLockRelease(&header->lock);
1145 : }
1146 34 : dshash_seq_term(&hstat);
1147 34 : }
1148 :
1149 : static bool
1150 2964 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1151 : {
1152 2964 : return p->key.kind == DatumGetInt32(match_data);
1153 : }
1154 :
1155 : void
1156 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1157 : {
1158 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1159 8 : }
1160 :
1161 : static void
1162 8233380 : pgstat_setup_memcxt(void)
1163 : {
1164 8233380 : if (unlikely(!pgStatSharedRefContext))
1165 37648 : pgStatSharedRefContext =
1166 37648 : AllocSetContextCreate(TopMemoryContext,
1167 : "PgStat Shared Ref",
1168 : ALLOCSET_SMALL_SIZES);
1169 8233380 : if (unlikely(!pgStatEntryRefHashContext))
1170 37648 : pgStatEntryRefHashContext =
1171 37648 : AllocSetContextCreate(TopMemoryContext,
1172 : "PgStat Shared Ref Hash",
1173 : ALLOCSET_SMALL_SIZES);
1174 8233380 : }
|