Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 12306 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 12306 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 12306 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 6006 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 6006 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 6006 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 780780 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 774774 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 774774 : if (!kind_info)
140 774726 : continue;
141 48 : if (!kind_info->fixed_amount)
142 24 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 24 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 6006 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 2100 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 2100 : sz = StatsShmemSize();
162 2100 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 2100 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 2100 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 2100 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 2100 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 2100 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 2100 : ctl->raw_dsa_area = p;
183 2100 : p += MAXALIGN(pgstat_dsa_init_size());
184 2100 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
185 : pgstat_dsa_init_size(),
186 : LWTRANCHE_PGSTATS_DSA, 0);
187 2100 : dsa_pin(dsa);
188 :
189 : /*
190 : * To ensure dshash is created in "plain" shared memory, temporarily
191 : * limit size of dsa to the initial size of the dsa.
192 : */
193 2100 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
194 :
195 : /*
196 : * With the limit in place, create the dshash table. XXX: It'd be nice
197 : * if there were dshash_create_in_place().
198 : */
199 2100 : dsh = dshash_create(dsa, &dsh_params, NULL);
200 2100 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
201 :
202 : /* lift limit set above */
203 2100 : dsa_set_size_limit(dsa, -1);
204 :
205 : /*
206 : * Postmaster will never access these again, thus free the local
207 : * dsa/dshash references.
208 : */
209 2100 : dshash_detach(dsh);
210 2100 : dsa_detach(dsa);
211 :
212 2100 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
213 :
214 : /* initialize fixed-numbered stats */
215 539700 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
216 : {
217 537600 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
218 : char *ptr;
219 :
220 537600 : if (!kind_info || !kind_info->fixed_amount)
221 524992 : continue;
222 :
223 12608 : if (pgstat_is_kind_builtin(kind))
224 12600 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
225 : else
226 : {
227 8 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
228 :
229 : Assert(kind_info->shared_size != 0);
230 8 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
231 8 : ptr = ctl->custom_data[idx];
232 : }
233 :
234 12608 : kind_info->init_shmem_cb(ptr);
235 : }
236 : }
237 : else
238 : {
239 : Assert(found);
240 : }
241 2100 : }
242 :
243 : void
244 40852 : pgstat_attach_shmem(void)
245 : {
246 : MemoryContext oldcontext;
247 :
248 : Assert(pgStatLocal.dsa == NULL);
249 :
250 : /* stats shared memory persists for the backend lifetime */
251 40852 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
252 :
253 40852 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
254 : NULL);
255 40852 : dsa_pin_mapping(pgStatLocal.dsa);
256 :
257 81704 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
258 40852 : pgStatLocal.shmem->hash_handle, 0);
259 :
260 40852 : MemoryContextSwitchTo(oldcontext);
261 40852 : }
262 :
263 : void
264 40852 : pgstat_detach_shmem(void)
265 : {
266 : Assert(pgStatLocal.dsa);
267 :
268 : /* we shouldn't leave references to shared stats */
269 40852 : pgstat_release_all_entry_refs(false);
270 :
271 40852 : dshash_detach(pgStatLocal.shared_hash);
272 40852 : pgStatLocal.shared_hash = NULL;
273 :
274 40852 : dsa_detach(pgStatLocal.dsa);
275 :
276 : /*
277 : * dsa_detach() does not decrement the DSA reference count as no segment
278 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
279 : * be registered. Hence, release it manually now.
280 : */
281 40852 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
282 :
283 40852 : pgStatLocal.dsa = NULL;
284 40852 : }
285 :
286 :
287 : /* ------------------------------------------------------------
288 : * Maintenance of shared memory stats entries
289 : * ------------------------------------------------------------
290 : */
291 :
292 : PgStatShared_Common *
293 614520 : pgstat_init_entry(PgStat_Kind kind,
294 : PgStatShared_HashEntry *shhashent)
295 : {
296 : /* Create new stats entry. */
297 : dsa_pointer chunk;
298 : PgStatShared_Common *shheader;
299 :
300 : /*
301 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
302 : * can't be freed before the initialization because it can't be found as
303 : * long as we hold the dshash partition lock. Caller needs to increase
304 : * further if a longer lived reference is needed.
305 : */
306 614520 : pg_atomic_init_u32(&shhashent->refcount, 1);
307 :
308 : /*
309 : * Initialize "generation" to 0, as freshly created.
310 : */
311 614520 : pg_atomic_init_u32(&shhashent->generation, 0);
312 614520 : shhashent->dropped = false;
313 :
314 614520 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
315 614520 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
316 614520 : shheader->magic = 0xdeadbeef;
317 :
318 : /* Link the new entry from the hash entry. */
319 614520 : shhashent->body = chunk;
320 :
321 614520 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
322 :
323 614520 : return shheader;
324 : }
325 :
326 : static PgStatShared_Common *
327 54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
328 : {
329 : PgStatShared_Common *shheader;
330 :
331 54 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
332 :
333 : /* mark as not dropped anymore */
334 54 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
335 :
336 : /*
337 : * Increment "generation", to let any backend with local references know
338 : * that what they point to is outdated.
339 : */
340 54 : pg_atomic_fetch_add_u32(&shhashent->generation, 1);
341 54 : shhashent->dropped = false;
342 :
343 : /* reinitialize content */
344 : Assert(shheader->magic == 0xdeadbeef);
345 54 : memset(pgstat_get_entry_data(kind, shheader), 0,
346 : pgstat_get_entry_len(kind));
347 :
348 54 : return shheader;
349 : }
350 :
351 : static void
352 7099652 : pgstat_setup_shared_refs(void)
353 : {
354 7099652 : if (likely(pgStatEntryRefHash != NULL))
355 7065360 : return;
356 :
357 34292 : pgStatEntryRefHash =
358 34292 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
359 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
360 34292 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
361 : Assert(pgStatSharedRefAge != 0);
362 : }
363 :
364 : /*
365 : * Helper function for pgstat_get_entry_ref().
366 : */
367 : static void
368 1797828 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
369 : PgStatShared_HashEntry *shhashent,
370 : PgStatShared_Common *shheader)
371 : {
372 : Assert(shheader->magic == 0xdeadbeef);
373 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
374 :
375 1797828 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
376 :
377 1797828 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
378 :
379 1797828 : entry_ref->shared_stats = shheader;
380 1797828 : entry_ref->shared_entry = shhashent;
381 1797828 : entry_ref->generation = pg_atomic_read_u32(&shhashent->generation);
382 1797828 : }
383 :
384 : /*
385 : * Helper function for pgstat_get_entry_ref().
386 : */
387 : static bool
388 7099652 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
389 : {
390 : bool found;
391 : PgStat_EntryRefHashEntry *cache_entry;
392 :
393 : /*
394 : * We immediately insert a cache entry, because it avoids 1) multiple
395 : * hashtable lookups in case of a cache miss 2) having to deal with
396 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
397 : */
398 :
399 7099652 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
400 :
401 7099652 : if (!found || !cache_entry->entry_ref)
402 1961378 : {
403 : PgStat_EntryRef *entry_ref;
404 :
405 1961378 : cache_entry->entry_ref = entry_ref =
406 1961378 : MemoryContextAlloc(pgStatSharedRefContext,
407 : sizeof(PgStat_EntryRef));
408 1961378 : entry_ref->shared_stats = NULL;
409 1961378 : entry_ref->shared_entry = NULL;
410 1961378 : entry_ref->pending = NULL;
411 :
412 1961378 : found = false;
413 : }
414 5138274 : else if (cache_entry->entry_ref->shared_stats == NULL)
415 : {
416 : Assert(cache_entry->entry_ref->pending == NULL);
417 0 : found = false;
418 : }
419 : else
420 : {
421 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
422 :
423 5138274 : entry_ref = cache_entry->entry_ref;
424 : Assert(entry_ref->shared_entry != NULL);
425 : Assert(entry_ref->shared_stats != NULL);
426 :
427 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
428 : /* should have at least our reference */
429 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
430 : }
431 :
432 7099652 : *entry_ref_p = cache_entry->entry_ref;
433 7099652 : return found;
434 : }
435 :
436 : /*
437 : * Get a shared stats reference. If create is true, the shared stats object is
438 : * created if it does not exist.
439 : *
440 : * When create is true, and created_entry is non-NULL, it'll be set to true
441 : * if the entry is newly created, false otherwise.
442 : */
443 : PgStat_EntryRef *
444 7099652 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
445 : bool *created_entry)
446 : {
447 : PgStat_HashKey key;
448 : PgStatShared_HashEntry *shhashent;
449 7099652 : PgStatShared_Common *shheader = NULL;
450 : PgStat_EntryRef *entry_ref;
451 :
452 : /* clear padding */
453 7099652 : memset(&key, 0, sizeof(struct PgStat_HashKey));
454 :
455 7099652 : key.kind = kind;
456 7099652 : key.dboid = dboid;
457 7099652 : key.objid = objid;
458 :
459 : /*
460 : * passing in created_entry only makes sense if we possibly could create
461 : * entry.
462 : */
463 : Assert(create || created_entry == NULL);
464 : pgstat_assert_is_up();
465 : Assert(pgStatLocal.shared_hash != NULL);
466 : Assert(!pgStatLocal.shmem->is_shutdown);
467 :
468 7099652 : pgstat_setup_memcxt();
469 7099652 : pgstat_setup_shared_refs();
470 :
471 7099652 : if (created_entry != NULL)
472 214 : *created_entry = false;
473 :
474 : /*
475 : * Check if other backends dropped stats that could not be deleted because
476 : * somebody held references to it. If so, check this backend's references.
477 : * This is not expected to happen often. The location of the check is a
478 : * bit random, but this is a relatively frequently called path, so better
479 : * than most.
480 : */
481 7099652 : if (pgstat_need_entry_refs_gc())
482 11608 : pgstat_gc_entry_refs();
483 :
484 : /*
485 : * First check the lookup cache hashtable in local memory. If we find a
486 : * match here we can avoid taking locks / causing contention.
487 : */
488 7099652 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
489 5138274 : return entry_ref;
490 :
491 : Assert(entry_ref != NULL);
492 :
493 : /*
494 : * Do a lookup in the hash table first - it's quite likely that the entry
495 : * already exists, and that way we only need a shared lock.
496 : */
497 1961378 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
498 :
499 1961378 : if (create && !shhashent)
500 : {
501 : bool shfound;
502 :
503 : /*
504 : * It's possible that somebody created the entry since the above
505 : * lookup. If so, fall through to the same path as if we'd have if it
506 : * already had been created before the dshash_find() calls.
507 : */
508 233390 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
509 233390 : if (!shfound)
510 : {
511 233390 : shheader = pgstat_init_entry(kind, shhashent);
512 233390 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
513 :
514 233390 : if (created_entry != NULL)
515 96 : *created_entry = true;
516 :
517 233390 : return entry_ref;
518 : }
519 : }
520 :
521 1727988 : if (!shhashent)
522 : {
523 : /*
524 : * If we're not creating, delete the reference again. In all
525 : * likelihood it's just a stats lookup - no point wasting memory for a
526 : * shared ref to nothing...
527 : */
528 163476 : pgstat_release_entry_ref(key, entry_ref, false);
529 :
530 163476 : return NULL;
531 : }
532 : else
533 : {
534 : /*
535 : * Can get here either because dshash_find() found a match, or if
536 : * dshash_find_or_insert() found a concurrently inserted entry.
537 : */
538 :
539 1564512 : if (shhashent->dropped && create)
540 : {
541 : /*
542 : * There are legitimate cases where the old stats entry might not
543 : * yet have been dropped by the time it's reused. The most obvious
544 : * case are replication slot stats, where a new slot can be
545 : * created with the same index just after dropping. But oid
546 : * wraparound can lead to other cases as well. We just reset the
547 : * stats to their plain state, while incrementing its "generation"
548 : * in the shared entry for any remaining local references.
549 : */
550 54 : shheader = pgstat_reinit_entry(kind, shhashent);
551 54 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
552 :
553 54 : if (created_entry != NULL)
554 0 : *created_entry = true;
555 :
556 54 : return entry_ref;
557 : }
558 1564458 : else if (shhashent->dropped)
559 : {
560 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
561 74 : pgstat_release_entry_ref(key, entry_ref, false);
562 :
563 74 : return NULL;
564 : }
565 : else
566 : {
567 1564384 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
568 1564384 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
569 :
570 1564384 : return entry_ref;
571 : }
572 : }
573 : }
574 :
575 : static void
576 1961378 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
577 : bool discard_pending)
578 : {
579 1961378 : if (entry_ref && entry_ref->pending)
580 : {
581 64102 : if (discard_pending)
582 64102 : pgstat_delete_pending_entry(entry_ref);
583 : else
584 0 : elog(ERROR, "releasing ref with pending data");
585 : }
586 :
587 1961378 : if (entry_ref && entry_ref->shared_stats)
588 : {
589 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
590 : Assert(entry_ref->pending == NULL);
591 :
592 : /*
593 : * This can't race with another backend looking up the stats entry and
594 : * increasing the refcount because it is not "legal" to create
595 : * additional references to dropped entries.
596 : */
597 1797828 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
598 : {
599 : PgStatShared_HashEntry *shent;
600 :
601 : /*
602 : * We're the last referrer to this entry, try to drop the shared
603 : * entry.
604 : */
605 :
606 : /* only dropped entries can reach a 0 refcount */
607 : Assert(entry_ref->shared_entry->dropped);
608 :
609 9344 : shent = dshash_find(pgStatLocal.shared_hash,
610 9344 : &entry_ref->shared_entry->key,
611 : true);
612 9344 : if (!shent)
613 0 : elog(ERROR, "could not find just referenced shared stats entry");
614 :
615 : /*
616 : * This entry may have been reinitialized while trying to release
617 : * it, so double-check that it has not been reused while holding a
618 : * lock on its shared entry.
619 : */
620 9344 : if (pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
621 9344 : entry_ref->generation)
622 : {
623 : /* Same "generation", so we're OK with the removal */
624 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
625 : Assert(entry_ref->shared_entry == shent);
626 9344 : pgstat_free_entry(shent, NULL);
627 : }
628 : else
629 : {
630 : /*
631 : * Shared stats entry has been reinitialized, so do not drop
632 : * its shared entry, only release its lock.
633 : */
634 0 : dshash_release_lock(pgStatLocal.shared_hash, shent);
635 : }
636 : }
637 : }
638 :
639 1961378 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
640 0 : elog(ERROR, "entry ref vanished before deletion");
641 :
642 1961378 : if (entry_ref)
643 1961378 : pfree(entry_ref);
644 1961378 : }
645 :
646 : /*
647 : * Acquire exclusive lock on the entry.
648 : *
649 : * If nowait is true, it's just a conditional acquire, and the result
650 : * *must* be checked to verify success.
651 : * If nowait is false, waits as necessary, always returning true.
652 : */
653 : bool
654 2196566 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
655 : {
656 2196566 : LWLock *lock = &entry_ref->shared_stats->lock;
657 :
658 2196566 : if (nowait)
659 636114 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
660 :
661 1560452 : LWLockAcquire(lock, LW_EXCLUSIVE);
662 1560452 : return true;
663 : }
664 :
665 : /*
666 : * Acquire shared lock on the entry.
667 : *
668 : * Separate from pgstat_lock_entry() as most callers will need to lock
669 : * exclusively. The wait semantics are identical.
670 : */
671 : bool
672 374428 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
673 : {
674 374428 : LWLock *lock = &entry_ref->shared_stats->lock;
675 :
676 374428 : if (nowait)
677 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
678 :
679 374428 : LWLockAcquire(lock, LW_SHARED);
680 374428 : return true;
681 : }
682 :
683 : void
684 2570966 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
685 : {
686 2570966 : LWLockRelease(&entry_ref->shared_stats->lock);
687 2570966 : }
688 :
689 : /*
690 : * Helper function to fetch and lock shared stats.
691 : */
692 : PgStat_EntryRef *
693 384154 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
694 : bool nowait)
695 : {
696 : PgStat_EntryRef *entry_ref;
697 :
698 : /* find shared table stats entry corresponding to the local entry */
699 384154 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
700 :
701 : /* lock the shared entry to protect the content, skip if failed */
702 384154 : if (!pgstat_lock_entry(entry_ref, nowait))
703 0 : return NULL;
704 :
705 384154 : return entry_ref;
706 : }
707 :
708 : void
709 3702 : pgstat_request_entry_refs_gc(void)
710 : {
711 3702 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
712 3702 : }
713 :
714 : static bool
715 7099652 : pgstat_need_entry_refs_gc(void)
716 : {
717 : uint64 curage;
718 :
719 7099652 : if (!pgStatEntryRefHash)
720 0 : return false;
721 :
722 : /* should have been initialized when creating pgStatEntryRefHash */
723 : Assert(pgStatSharedRefAge != 0);
724 :
725 7099652 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
726 :
727 7099652 : return pgStatSharedRefAge != curage;
728 : }
729 :
730 : static void
731 11608 : pgstat_gc_entry_refs(void)
732 : {
733 : pgstat_entry_ref_hash_iterator i;
734 : PgStat_EntryRefHashEntry *ent;
735 : uint64 curage;
736 :
737 11608 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
738 : Assert(curage != 0);
739 :
740 : /*
741 : * Some entries have been dropped or reinitialized. Invalidate cache
742 : * pointer to them.
743 : */
744 11608 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
745 870844 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
746 : {
747 859236 : PgStat_EntryRef *entry_ref = ent->entry_ref;
748 :
749 : Assert(!entry_ref->shared_stats ||
750 : entry_ref->shared_stats->magic == 0xdeadbeef);
751 :
752 : /*
753 : * "generation" checks for the case of entries being reinitialized,
754 : * and "dropped" for the case where these are.. dropped.
755 : */
756 859236 : if (!entry_ref->shared_entry->dropped &&
757 616538 : pg_atomic_read_u32(&entry_ref->shared_entry->generation) ==
758 616538 : entry_ref->generation)
759 616484 : continue;
760 :
761 : /* cannot gc shared ref that has pending data */
762 242752 : if (entry_ref->pending != NULL)
763 234202 : continue;
764 :
765 8550 : pgstat_release_entry_ref(ent->key, entry_ref, false);
766 : }
767 :
768 11608 : pgStatSharedRefAge = curage;
769 11608 : }
770 :
771 : static void
772 34362 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
773 : Datum match_data)
774 : {
775 : pgstat_entry_ref_hash_iterator i;
776 : PgStat_EntryRefHashEntry *ent;
777 :
778 34362 : if (pgStatEntryRefHash == NULL)
779 2 : return;
780 :
781 34360 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
782 :
783 1727336 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
784 : != NULL)
785 : {
786 : Assert(ent->entry_ref != NULL);
787 :
788 1692976 : if (match && !match(ent, match_data))
789 2048 : continue;
790 :
791 1690928 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
792 : }
793 : }
794 :
795 : /*
796 : * Release all local references to shared stats entries.
797 : *
798 : * When a process exits it cannot do so while still holding references onto
799 : * stats entries, otherwise the shared stats entries could never be freed.
800 : */
801 : static void
802 40852 : pgstat_release_all_entry_refs(bool discard_pending)
803 : {
804 40852 : if (pgStatEntryRefHash == NULL)
805 6560 : return;
806 :
807 34292 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
808 : Assert(pgStatEntryRefHash->members == 0);
809 34292 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
810 34292 : pgStatEntryRefHash = NULL;
811 : }
812 :
813 : static bool
814 2048 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
815 : {
816 2048 : Oid dboid = DatumGetObjectId(match_data);
817 :
818 2048 : return ent->key.dboid == dboid;
819 : }
820 :
821 : static void
822 70 : pgstat_release_db_entry_refs(Oid dboid)
823 : {
824 70 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
825 : match_db,
826 : ObjectIdGetDatum(dboid));
827 70 : }
828 :
829 :
830 : /* ------------------------------------------------------------
831 : * Dropping and resetting of stats entries
832 : * ------------------------------------------------------------
833 : */
834 :
835 : static void
836 107334 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
837 : {
838 : dsa_pointer pdsa;
839 :
840 : /*
841 : * Fetch dsa pointer before deleting entry - that way we can free the
842 : * memory after releasing the lock.
843 : */
844 107334 : pdsa = shent->body;
845 :
846 107334 : if (!hstat)
847 98568 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
848 : else
849 8766 : dshash_delete_current(hstat);
850 :
851 107334 : dsa_free(pgStatLocal.dsa, pdsa);
852 107334 : }
853 :
854 : /*
855 : * Helper for both pgstat_drop_database_and_contents() and
856 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
857 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
858 : * case the entry needs to be already locked.
859 : */
860 : static bool
861 107390 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
862 : dshash_seq_status *hstat)
863 : {
864 : Assert(shent->body != InvalidDsaPointer);
865 :
866 : /* should already have released local reference */
867 107390 : if (pgStatEntryRefHash)
868 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
869 :
870 : /*
871 : * Signal that the entry is dropped - this will eventually cause other
872 : * backends to release their references.
873 : */
874 107390 : if (shent->dropped)
875 0 : elog(ERROR,
876 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u",
877 : pgstat_get_kind_info(shent->key.kind)->name,
878 : shent->key.dboid,
879 : shent->key.objid,
880 : pg_atomic_read_u32(&shent->refcount));
881 107390 : shent->dropped = true;
882 :
883 : /* release refcount marking entry as not dropped */
884 107390 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
885 : {
886 97990 : pgstat_free_entry(shent, hstat);
887 97990 : return true;
888 : }
889 : else
890 : {
891 9400 : if (!hstat)
892 9400 : dshash_release_lock(pgStatLocal.shared_hash, shent);
893 9400 : return false;
894 : }
895 : }
896 :
897 : /*
898 : * Drop stats for the database and all the objects inside that database.
899 : */
900 : static void
901 70 : pgstat_drop_database_and_contents(Oid dboid)
902 : {
903 : dshash_seq_status hstat;
904 : PgStatShared_HashEntry *p;
905 70 : uint64 not_freed_count = 0;
906 :
907 : Assert(OidIsValid(dboid));
908 :
909 : Assert(pgStatLocal.shared_hash != NULL);
910 :
911 : /*
912 : * This backend might very well be the only backend holding a reference to
913 : * about-to-be-dropped entries. Ensure that we're not preventing it from
914 : * being cleaned up till later.
915 : *
916 : * Doing this separately from the dshash iteration below avoids having to
917 : * do so while holding a partition lock on the shared hashtable.
918 : */
919 70 : pgstat_release_db_entry_refs(dboid);
920 :
921 : /* some of the dshash entries are to be removed, take exclusive lock. */
922 70 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
923 26114 : while ((p = dshash_seq_next(&hstat)) != NULL)
924 : {
925 26044 : if (p->dropped)
926 2 : continue;
927 :
928 26042 : if (p->key.dboid != dboid)
929 17390 : continue;
930 :
931 8652 : if (!pgstat_drop_entry_internal(p, &hstat))
932 : {
933 : /*
934 : * Even statistics for a dropped database might currently be
935 : * accessed (consider e.g. database stats for pg_stat_database).
936 : */
937 0 : not_freed_count++;
938 : }
939 : }
940 70 : dshash_seq_term(&hstat);
941 :
942 : /*
943 : * If some of the stats data could not be freed, signal the reference
944 : * holders to run garbage collection of their cached pgStatLocal.shmem.
945 : */
946 70 : if (not_freed_count > 0)
947 0 : pgstat_request_entry_refs_gc();
948 70 : }
949 :
950 : /*
951 : * Drop a single stats entry.
952 : *
953 : * This routine returns false if the stats entry of the dropped object could
954 : * not be freed, true otherwise.
955 : *
956 : * The callers of this function should call pgstat_request_entry_refs_gc()
957 : * if the stats entry could not be freed, to ensure that this entry's memory
958 : * can be reclaimed later by a different backend calling
959 : * pgstat_gc_entry_refs().
960 : */
961 : bool
962 142472 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
963 : {
964 : PgStat_HashKey key;
965 : PgStatShared_HashEntry *shent;
966 142472 : bool freed = true;
967 :
968 : /* clear padding */
969 142472 : memset(&key, 0, sizeof(struct PgStat_HashKey));
970 :
971 142472 : key.kind = kind;
972 142472 : key.dboid = dboid;
973 142472 : key.objid = objid;
974 :
975 : /* delete local reference */
976 142472 : if (pgStatEntryRefHash)
977 : {
978 : PgStat_EntryRefHashEntry *lohashent =
979 135906 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
980 :
981 135906 : if (lohashent)
982 98350 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
983 : true);
984 : }
985 :
986 : /* mark entry in shared hashtable as deleted, drop if possible */
987 142472 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
988 142472 : if (shent)
989 : {
990 98624 : freed = pgstat_drop_entry_internal(shent, NULL);
991 :
992 : /*
993 : * Database stats contain other stats. Drop those as well when
994 : * dropping the database. XXX: Perhaps this should be done in a
995 : * slightly more principled way? But not obvious what that'd look
996 : * like, and so far this is the only case...
997 : */
998 98624 : if (key.kind == PGSTAT_KIND_DATABASE)
999 70 : pgstat_drop_database_and_contents(key.dboid);
1000 : }
1001 :
1002 142472 : return freed;
1003 : }
1004 :
1005 : /*
1006 : * Scan through the shared hashtable of stats, dropping statistics if
1007 : * approved by the optional do_drop() function.
1008 : */
1009 : void
1010 452 : pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
1011 : Datum match_data)
1012 : {
1013 : dshash_seq_status hstat;
1014 : PgStatShared_HashEntry *ps;
1015 452 : uint64 not_freed_count = 0;
1016 :
1017 : /* entries are removed, take an exclusive lock */
1018 452 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
1019 630 : while ((ps = dshash_seq_next(&hstat)) != NULL)
1020 : {
1021 178 : if (ps->dropped)
1022 0 : continue;
1023 :
1024 178 : if (do_drop != NULL && !do_drop(ps, match_data))
1025 64 : continue;
1026 :
1027 : /* delete local reference */
1028 114 : if (pgStatEntryRefHash)
1029 : {
1030 : PgStat_EntryRefHashEntry *lohashent =
1031 2 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, ps->key);
1032 :
1033 2 : if (lohashent)
1034 0 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
1035 : true);
1036 : }
1037 :
1038 114 : if (!pgstat_drop_entry_internal(ps, &hstat))
1039 0 : not_freed_count++;
1040 : }
1041 452 : dshash_seq_term(&hstat);
1042 :
1043 452 : if (not_freed_count > 0)
1044 0 : pgstat_request_entry_refs_gc();
1045 452 : }
1046 :
1047 : /*
1048 : * Scan through the shared hashtable of stats and drop all entries.
1049 : */
1050 : void
1051 450 : pgstat_drop_all_entries(void)
1052 : {
1053 450 : pgstat_drop_matching_entries(NULL, 0);
1054 450 : }
1055 :
1056 : static void
1057 18000 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
1058 : TimestampTz ts)
1059 : {
1060 18000 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1061 :
1062 18000 : memset(pgstat_get_entry_data(kind, header), 0,
1063 : pgstat_get_entry_len(kind));
1064 :
1065 18000 : if (kind_info->reset_timestamp_cb)
1066 380 : kind_info->reset_timestamp_cb(header, ts);
1067 18000 : }
1068 :
1069 : /*
1070 : * Reset one variable-numbered stats entry.
1071 : */
1072 : void
1073 352 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
1074 : {
1075 : PgStat_EntryRef *entry_ref;
1076 :
1077 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1078 :
1079 352 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1080 352 : if (!entry_ref || entry_ref->shared_entry->dropped)
1081 2 : return;
1082 :
1083 350 : (void) pgstat_lock_entry(entry_ref, false);
1084 350 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
1085 350 : pgstat_unlock_entry(entry_ref);
1086 : }
1087 :
1088 : /*
1089 : * Scan through the shared hashtable of stats, resetting statistics if
1090 : * approved by the provided do_reset() function.
1091 : */
1092 : void
1093 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1094 : Datum match_data, TimestampTz ts)
1095 : {
1096 : dshash_seq_status hstat;
1097 : PgStatShared_HashEntry *p;
1098 :
1099 : /* dshash entry is not modified, take shared lock */
1100 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1101 26310 : while ((p = dshash_seq_next(&hstat)) != NULL)
1102 : {
1103 : PgStatShared_Common *header;
1104 :
1105 26276 : if (p->dropped)
1106 2 : continue;
1107 :
1108 26274 : if (!do_reset(p, match_data))
1109 8624 : continue;
1110 :
1111 17650 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1112 :
1113 17650 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1114 :
1115 17650 : shared_stat_reset_contents(p->key.kind, header, ts);
1116 :
1117 17650 : LWLockRelease(&header->lock);
1118 : }
1119 34 : dshash_seq_term(&hstat);
1120 34 : }
1121 :
1122 : static bool
1123 2980 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1124 : {
1125 2980 : return p->key.kind == DatumGetInt32(match_data);
1126 : }
1127 :
1128 : void
1129 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1130 : {
1131 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1132 8 : }
1133 :
1134 : static void
1135 7099652 : pgstat_setup_memcxt(void)
1136 : {
1137 7099652 : if (unlikely(!pgStatSharedRefContext))
1138 34292 : pgStatSharedRefContext =
1139 34292 : AllocSetContextCreate(TopMemoryContext,
1140 : "PgStat Shared Ref",
1141 : ALLOCSET_SMALL_SIZES);
1142 7099652 : if (unlikely(!pgStatEntryRefHashContext))
1143 34292 : pgStatEntryRefHashContext =
1144 34292 : AllocSetContextCreate(TopMemoryContext,
1145 : "PgStat Shared Ref Hash",
1146 : ALLOCSET_SMALL_SIZES);
1147 7099652 : }
|