Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : dshash_memcpy,
68 : LWTRANCHE_PGSTATS_HASH
69 : };
70 :
71 :
72 : /*
73 : * Backend local references to shared stats entries. If there are pending
74 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 : * list.
76 : *
77 : * When a stats entry is dropped each backend needs to release its reference
78 : * to it before the memory can be released. To trigger that
79 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 : * compares to their copy of pgStatSharedRefAge on a regular basis.
81 : */
82 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 : static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
84 :
85 : /*
86 : * Memory contexts containing the pgStatEntryRefHash table and the
87 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 : * track / attribute memory usage.
89 : */
90 : static MemoryContext pgStatSharedRefContext = NULL;
91 : static MemoryContext pgStatEntryRefHashContext = NULL;
92 :
93 :
94 : /* ------------------------------------------------------------
95 : * Public functions called from postmaster follow
96 : * ------------------------------------------------------------
97 : */
98 :
99 : /*
100 : * The size of the shared memory allocation for stats stored in the shared
101 : * stats hash table. This allocation will be done as part of the main shared
102 : * memory, rather than dynamic shared memory, allowing it to be initialized in
103 : * postmaster.
104 : */
105 : static Size
106 10724 : pgstat_dsa_init_size(void)
107 : {
108 : Size sz;
109 :
110 : /*
111 : * The dshash header / initial buckets array needs to fit into "plain"
112 : * shared memory, but it's beneficial to not need dsm segments
113 : * immediately. A size of 256kB seems works well and is not
114 : * disproportional compared to other constant sized shared memory
115 : * allocations. NB: To avoid DSMs further, the user can configure
116 : * min_dynamic_shared_memory.
117 : */
118 10724 : sz = 256 * 1024;
119 : Assert(dsa_minimum_size() <= sz);
120 10724 : return MAXALIGN(sz);
121 : }
122 :
123 : /*
124 : * Compute shared memory space needed for cumulative statistics
125 : */
126 : Size
127 5240 : StatsShmemSize(void)
128 : {
129 : Size sz;
130 :
131 5240 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 5240 : sz = add_size(sz, pgstat_dsa_init_size());
133 :
134 : /* Add shared memory for all the custom fixed-numbered statistics */
135 681200 : for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
136 : {
137 675960 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
138 :
139 675960 : if (!kind_info)
140 675960 : continue;
141 0 : if (!kind_info->fixed_amount)
142 0 : continue;
143 :
144 : Assert(kind_info->shared_size != 0);
145 :
146 0 : sz += MAXALIGN(kind_info->shared_size);
147 : }
148 :
149 5240 : return sz;
150 : }
151 :
152 : /*
153 : * Initialize cumulative statistics system during startup
154 : */
155 : void
156 1828 : StatsShmemInit(void)
157 : {
158 : bool found;
159 : Size sz;
160 :
161 1828 : sz = StatsShmemSize();
162 1828 : pgStatLocal.shmem = (PgStat_ShmemControl *)
163 1828 : ShmemInitStruct("Shared Memory Stats", sz, &found);
164 :
165 1828 : if (!IsUnderPostmaster)
166 : {
167 : dsa_area *dsa;
168 : dshash_table *dsh;
169 1828 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 1828 : char *p = (char *) ctl;
171 :
172 : Assert(!found);
173 :
174 : /* the allocation of pgStatLocal.shmem itself */
175 1828 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
176 :
177 : /*
178 : * Create a small dsa allocation in plain shared memory. This is
179 : * required because postmaster cannot use dsm segments. It also
180 : * provides a small efficiency win.
181 : */
182 1828 : ctl->raw_dsa_area = p;
183 1828 : p += MAXALIGN(pgstat_dsa_init_size());
184 1828 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
185 : pgstat_dsa_init_size(),
186 : LWTRANCHE_PGSTATS_DSA, 0);
187 1828 : dsa_pin(dsa);
188 :
189 : /*
190 : * To ensure dshash is created in "plain" shared memory, temporarily
191 : * limit size of dsa to the initial size of the dsa.
192 : */
193 1828 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
194 :
195 : /*
196 : * With the limit in place, create the dshash table. XXX: It'd be nice
197 : * if there were dshash_create_in_place().
198 : */
199 1828 : dsh = dshash_create(dsa, &dsh_params, NULL);
200 1828 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
201 :
202 : /* lift limit set above */
203 1828 : dsa_set_size_limit(dsa, -1);
204 :
205 : /*
206 : * Postmaster will never access these again, thus free the local
207 : * dsa/dshash references.
208 : */
209 1828 : dshash_detach(dsh);
210 1828 : dsa_detach(dsa);
211 :
212 1828 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
213 :
214 : /* initialize fixed-numbered stats */
215 469796 : for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
216 : {
217 467968 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
218 : char *ptr;
219 :
220 467968 : if (!kind_info || !kind_info->fixed_amount)
221 457000 : continue;
222 :
223 10968 : if (pgstat_is_kind_builtin(kind))
224 10968 : ptr = ((char *) ctl) + kind_info->shared_ctl_off;
225 : else
226 : {
227 0 : int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
228 :
229 : Assert(kind_info->shared_size != 0);
230 0 : ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
231 0 : ptr = ctl->custom_data[idx];
232 : }
233 :
234 10968 : kind_info->init_shmem_cb(ptr);
235 : }
236 : }
237 : else
238 : {
239 : Assert(found);
240 : }
241 1828 : }
242 :
243 : void
244 33572 : pgstat_attach_shmem(void)
245 : {
246 : MemoryContext oldcontext;
247 :
248 : Assert(pgStatLocal.dsa == NULL);
249 :
250 : /* stats shared memory persists for the backend lifetime */
251 33572 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
252 :
253 33572 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
254 : NULL);
255 33572 : dsa_pin_mapping(pgStatLocal.dsa);
256 :
257 67144 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
258 33572 : pgStatLocal.shmem->hash_handle, 0);
259 :
260 33572 : MemoryContextSwitchTo(oldcontext);
261 33572 : }
262 :
263 : void
264 33572 : pgstat_detach_shmem(void)
265 : {
266 : Assert(pgStatLocal.dsa);
267 :
268 : /* we shouldn't leave references to shared stats */
269 33572 : pgstat_release_all_entry_refs(false);
270 :
271 33572 : dshash_detach(pgStatLocal.shared_hash);
272 33572 : pgStatLocal.shared_hash = NULL;
273 :
274 33572 : dsa_detach(pgStatLocal.dsa);
275 :
276 : /*
277 : * dsa_detach() does not decrement the DSA reference count as no segment
278 : * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
279 : * be registered. Hence, release it manually now.
280 : */
281 33572 : dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
282 :
283 33572 : pgStatLocal.dsa = NULL;
284 33572 : }
285 :
286 :
287 : /* ------------------------------------------------------------
288 : * Maintenance of shared memory stats entries
289 : * ------------------------------------------------------------
290 : */
291 :
292 : PgStatShared_Common *
293 481990 : pgstat_init_entry(PgStat_Kind kind,
294 : PgStatShared_HashEntry *shhashent)
295 : {
296 : /* Create new stats entry. */
297 : dsa_pointer chunk;
298 : PgStatShared_Common *shheader;
299 :
300 : /*
301 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
302 : * can't be freed before the initialization because it can't be found as
303 : * long as we hold the dshash partition lock. Caller needs to increase
304 : * further if a longer lived reference is needed.
305 : */
306 481990 : pg_atomic_init_u32(&shhashent->refcount, 1);
307 481990 : shhashent->dropped = false;
308 :
309 481990 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
310 481990 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
311 481990 : shheader->magic = 0xdeadbeef;
312 :
313 : /* Link the new entry from the hash entry. */
314 481990 : shhashent->body = chunk;
315 :
316 481990 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
317 :
318 481990 : return shheader;
319 : }
320 :
321 : static PgStatShared_Common *
322 54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
323 : {
324 : PgStatShared_Common *shheader;
325 :
326 54 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
327 :
328 : /* mark as not dropped anymore */
329 54 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
330 54 : shhashent->dropped = false;
331 :
332 : /* reinitialize content */
333 : Assert(shheader->magic == 0xdeadbeef);
334 54 : memset(pgstat_get_entry_data(kind, shheader), 0,
335 : pgstat_get_entry_len(kind));
336 :
337 54 : return shheader;
338 : }
339 :
340 : static void
341 3792410 : pgstat_setup_shared_refs(void)
342 : {
343 3792410 : if (likely(pgStatEntryRefHash != NULL))
344 3763210 : return;
345 :
346 29200 : pgStatEntryRefHash =
347 29200 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
348 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
349 29200 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
350 : Assert(pgStatSharedRefAge != 0);
351 : }
352 :
353 : /*
354 : * Helper function for pgstat_get_entry_ref().
355 : */
356 : static void
357 1544664 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
358 : PgStatShared_HashEntry *shhashent,
359 : PgStatShared_Common *shheader)
360 : {
361 : Assert(shheader->magic == 0xdeadbeef);
362 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
363 :
364 1544664 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
365 :
366 1544664 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
367 :
368 1544664 : entry_ref->shared_stats = shheader;
369 1544664 : entry_ref->shared_entry = shhashent;
370 1544664 : }
371 :
372 : /*
373 : * Helper function for pgstat_get_entry_ref().
374 : */
375 : static bool
376 3792410 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
377 : {
378 : bool found;
379 : PgStat_EntryRefHashEntry *cache_entry;
380 :
381 : /*
382 : * We immediately insert a cache entry, because it avoids 1) multiple
383 : * hashtable lookups in case of a cache miss 2) having to deal with
384 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
385 : */
386 :
387 3792410 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
388 :
389 3792410 : if (!found || !cache_entry->entry_ref)
390 1686078 : {
391 : PgStat_EntryRef *entry_ref;
392 :
393 1686078 : cache_entry->entry_ref = entry_ref =
394 1686078 : MemoryContextAlloc(pgStatSharedRefContext,
395 : sizeof(PgStat_EntryRef));
396 1686078 : entry_ref->shared_stats = NULL;
397 1686078 : entry_ref->shared_entry = NULL;
398 1686078 : entry_ref->pending = NULL;
399 :
400 1686078 : found = false;
401 : }
402 2106332 : else if (cache_entry->entry_ref->shared_stats == NULL)
403 : {
404 : Assert(cache_entry->entry_ref->pending == NULL);
405 0 : found = false;
406 : }
407 : else
408 : {
409 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
410 :
411 2106332 : entry_ref = cache_entry->entry_ref;
412 : Assert(entry_ref->shared_entry != NULL);
413 : Assert(entry_ref->shared_stats != NULL);
414 :
415 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
416 : /* should have at least our reference */
417 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
418 : }
419 :
420 3792410 : *entry_ref_p = cache_entry->entry_ref;
421 3792410 : return found;
422 : }
423 :
424 : /*
425 : * Get a shared stats reference. If create is true, the shared stats object is
426 : * created if it does not exist.
427 : *
428 : * When create is true, and created_entry is non-NULL, it'll be set to true
429 : * if the entry is newly created, false otherwise.
430 : */
431 : PgStat_EntryRef *
432 3792410 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
433 : bool *created_entry)
434 : {
435 3792410 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
436 : PgStatShared_HashEntry *shhashent;
437 3792410 : PgStatShared_Common *shheader = NULL;
438 : PgStat_EntryRef *entry_ref;
439 :
440 : /*
441 : * passing in created_entry only makes sense if we possibly could create
442 : * entry.
443 : */
444 : Assert(create || created_entry == NULL);
445 : pgstat_assert_is_up();
446 : Assert(pgStatLocal.shared_hash != NULL);
447 : Assert(!pgStatLocal.shmem->is_shutdown);
448 :
449 3792410 : pgstat_setup_memcxt();
450 3792410 : pgstat_setup_shared_refs();
451 :
452 3792410 : if (created_entry != NULL)
453 214 : *created_entry = false;
454 :
455 : /*
456 : * Check if other backends dropped stats that could not be deleted because
457 : * somebody held references to it. If so, check this backend's references.
458 : * This is not expected to happen often. The location of the check is a
459 : * bit random, but this is a relatively frequently called path, so better
460 : * than most.
461 : */
462 3792410 : if (pgstat_need_entry_refs_gc())
463 11148 : pgstat_gc_entry_refs();
464 :
465 : /*
466 : * First check the lookup cache hashtable in local memory. If we find a
467 : * match here we can avoid taking locks / causing contention.
468 : */
469 3792410 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
470 2106332 : return entry_ref;
471 :
472 : Assert(entry_ref != NULL);
473 :
474 : /*
475 : * Do a lookup in the hash table first - it's quite likely that the entry
476 : * already exists, and that way we only need a shared lock.
477 : */
478 1686078 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
479 :
480 1686078 : if (create && !shhashent)
481 : {
482 : bool shfound;
483 :
484 : /*
485 : * It's possible that somebody created the entry since the above
486 : * lookup. If so, fall through to the same path as if we'd have if it
487 : * already had been created before the dshash_find() calls.
488 : */
489 173788 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
490 173788 : if (!shfound)
491 : {
492 173786 : shheader = pgstat_init_entry(kind, shhashent);
493 173786 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
494 :
495 173786 : if (created_entry != NULL)
496 96 : *created_entry = true;
497 :
498 173786 : return entry_ref;
499 : }
500 : }
501 :
502 1512292 : if (!shhashent)
503 : {
504 : /*
505 : * If we're not creating, delete the reference again. In all
506 : * likelihood it's just a stats lookup - no point wasting memory for a
507 : * shared ref to nothing...
508 : */
509 141340 : pgstat_release_entry_ref(key, entry_ref, false);
510 :
511 141340 : return NULL;
512 : }
513 : else
514 : {
515 : /*
516 : * Can get here either because dshash_find() found a match, or if
517 : * dshash_find_or_insert() found a concurrently inserted entry.
518 : */
519 :
520 1370952 : if (shhashent->dropped && create)
521 : {
522 : /*
523 : * There are legitimate cases where the old stats entry might not
524 : * yet have been dropped by the time it's reused. The most obvious
525 : * case are replication slot stats, where a new slot can be
526 : * created with the same index just after dropping. But oid
527 : * wraparound can lead to other cases as well. We just reset the
528 : * stats to their plain state.
529 : */
530 54 : shheader = pgstat_reinit_entry(kind, shhashent);
531 54 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
532 :
533 54 : if (created_entry != NULL)
534 0 : *created_entry = true;
535 :
536 54 : return entry_ref;
537 : }
538 1370898 : else if (shhashent->dropped)
539 : {
540 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
541 74 : pgstat_release_entry_ref(key, entry_ref, false);
542 :
543 74 : return NULL;
544 : }
545 : else
546 : {
547 1370824 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
548 1370824 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
549 :
550 1370824 : return entry_ref;
551 : }
552 : }
553 : }
554 :
555 : static void
556 1686078 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
557 : bool discard_pending)
558 : {
559 1686078 : if (entry_ref && entry_ref->pending)
560 : {
561 60420 : if (discard_pending)
562 60420 : pgstat_delete_pending_entry(entry_ref);
563 : else
564 0 : elog(ERROR, "releasing ref with pending data");
565 : }
566 :
567 1686078 : if (entry_ref && entry_ref->shared_stats)
568 : {
569 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
570 : Assert(entry_ref->pending == NULL);
571 :
572 : /*
573 : * This can't race with another backend looking up the stats entry and
574 : * increasing the refcount because it is not "legal" to create
575 : * additional references to dropped entries.
576 : */
577 1544664 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
578 : {
579 : PgStatShared_HashEntry *shent;
580 :
581 : /*
582 : * We're the last referrer to this entry, try to drop the shared
583 : * entry.
584 : */
585 :
586 : /* only dropped entries can reach a 0 refcount */
587 : Assert(entry_ref->shared_entry->dropped);
588 :
589 9222 : shent = dshash_find(pgStatLocal.shared_hash,
590 9222 : &entry_ref->shared_entry->key,
591 : true);
592 9222 : if (!shent)
593 0 : elog(ERROR, "could not find just referenced shared stats entry");
594 :
595 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
596 : Assert(entry_ref->shared_entry == shent);
597 :
598 9222 : pgstat_free_entry(shent, NULL);
599 : }
600 : }
601 :
602 1686078 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
603 0 : elog(ERROR, "entry ref vanished before deletion");
604 :
605 1686078 : if (entry_ref)
606 1686078 : pfree(entry_ref);
607 1686078 : }
608 :
609 : bool
610 1687996 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
611 : {
612 1687996 : LWLock *lock = &entry_ref->shared_stats->lock;
613 :
614 1687996 : if (nowait)
615 534328 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
616 :
617 1153668 : LWLockAcquire(lock, LW_EXCLUSIVE);
618 1153668 : return true;
619 : }
620 :
621 : /*
622 : * Separate from pgstat_lock_entry() as most callers will need to lock
623 : * exclusively.
624 : */
625 : bool
626 355196 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
627 : {
628 355196 : LWLock *lock = &entry_ref->shared_stats->lock;
629 :
630 355196 : if (nowait)
631 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
632 :
633 355196 : LWLockAcquire(lock, LW_SHARED);
634 355196 : return true;
635 : }
636 :
637 : void
638 2043190 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
639 : {
640 2043190 : LWLockRelease(&entry_ref->shared_stats->lock);
641 2043190 : }
642 :
643 : /*
644 : * Helper function to fetch and lock shared stats.
645 : */
646 : PgStat_EntryRef *
647 140382 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
648 : bool nowait)
649 : {
650 : PgStat_EntryRef *entry_ref;
651 :
652 : /* find shared table stats entry corresponding to the local entry */
653 140382 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
654 :
655 : /* lock the shared entry to protect the content, skip if failed */
656 140382 : if (!pgstat_lock_entry(entry_ref, nowait))
657 0 : return NULL;
658 :
659 140382 : return entry_ref;
660 : }
661 :
662 : void
663 3646 : pgstat_request_entry_refs_gc(void)
664 : {
665 3646 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
666 3646 : }
667 :
668 : static bool
669 3792410 : pgstat_need_entry_refs_gc(void)
670 : {
671 : uint64 curage;
672 :
673 3792410 : if (!pgStatEntryRefHash)
674 0 : return false;
675 :
676 : /* should have been initialized when creating pgStatEntryRefHash */
677 : Assert(pgStatSharedRefAge != 0);
678 :
679 3792410 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
680 :
681 3792410 : return pgStatSharedRefAge != curage;
682 : }
683 :
684 : static void
685 11148 : pgstat_gc_entry_refs(void)
686 : {
687 : pgstat_entry_ref_hash_iterator i;
688 : PgStat_EntryRefHashEntry *ent;
689 : uint64 curage;
690 :
691 11148 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
692 : Assert(curage != 0);
693 :
694 : /*
695 : * Some entries have been dropped. Invalidate cache pointer to them.
696 : */
697 11148 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
698 868398 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
699 : {
700 857250 : PgStat_EntryRef *entry_ref = ent->entry_ref;
701 :
702 : Assert(!entry_ref->shared_stats ||
703 : entry_ref->shared_stats->magic == 0xdeadbeef);
704 :
705 857250 : if (!entry_ref->shared_entry->dropped)
706 580434 : continue;
707 :
708 : /* cannot gc shared ref that has pending data */
709 276816 : if (entry_ref->pending != NULL)
710 268012 : continue;
711 :
712 8804 : pgstat_release_entry_ref(ent->key, entry_ref, false);
713 : }
714 :
715 11148 : pgStatSharedRefAge = curage;
716 11148 : }
717 :
718 : static void
719 29244 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
720 : Datum match_data)
721 : {
722 : pgstat_entry_ref_hash_iterator i;
723 : PgStat_EntryRefHashEntry *ent;
724 :
725 29244 : if (pgStatEntryRefHash == NULL)
726 12 : return;
727 :
728 29232 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
729 :
730 1504918 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
731 : != NULL)
732 : {
733 : Assert(ent->entry_ref != NULL);
734 :
735 1475686 : if (match && !match(ent, match_data))
736 1020 : continue;
737 :
738 1474666 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
739 : }
740 : }
741 :
742 : /*
743 : * Release all local references to shared stats entries.
744 : *
745 : * When a process exits it cannot do so while still holding references onto
746 : * stats entries, otherwise the shared stats entries could never be freed.
747 : */
748 : static void
749 33572 : pgstat_release_all_entry_refs(bool discard_pending)
750 : {
751 33572 : if (pgStatEntryRefHash == NULL)
752 4372 : return;
753 :
754 29200 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
755 : Assert(pgStatEntryRefHash->members == 0);
756 29200 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
757 29200 : pgStatEntryRefHash = NULL;
758 : }
759 :
760 : static bool
761 1020 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
762 : {
763 1020 : Oid dboid = DatumGetObjectId(match_data);
764 :
765 1020 : return ent->key.dboid == dboid;
766 : }
767 :
768 : static void
769 44 : pgstat_release_db_entry_refs(Oid dboid)
770 : {
771 44 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
772 : match_db,
773 : ObjectIdGetDatum(dboid));
774 44 : }
775 :
776 :
777 : /* ------------------------------------------------------------
778 : * Dropping and resetting of stats entries
779 : * ------------------------------------------------------------
780 : */
781 :
782 : static void
783 63200 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
784 : {
785 : dsa_pointer pdsa;
786 :
787 : /*
788 : * Fetch dsa pointer before deleting entry - that way we can free the
789 : * memory after releasing the lock.
790 : */
791 63200 : pdsa = shent->body;
792 :
793 63200 : if (!hstat)
794 61366 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
795 : else
796 1834 : dshash_delete_current(hstat);
797 :
798 63200 : dsa_free(pgStatLocal.dsa, pdsa);
799 63200 : }
800 :
801 : /*
802 : * Helper for both pgstat_drop_database_and_contents() and
803 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
804 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
805 : * case the entry needs to be already locked.
806 : */
807 : static bool
808 63256 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
809 : dshash_seq_status *hstat)
810 : {
811 : Assert(shent->body != InvalidDsaPointer);
812 :
813 : /* should already have released local reference */
814 63256 : if (pgStatEntryRefHash)
815 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
816 :
817 : /*
818 : * Signal that the entry is dropped - this will eventually cause other
819 : * backends to release their references.
820 : */
821 63256 : if (shent->dropped)
822 0 : elog(ERROR,
823 : "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%llu refcount=%u",
824 : pgstat_get_kind_info(shent->key.kind)->name,
825 : shent->key.dboid,
826 : (unsigned long long) shent->key.objid,
827 : pg_atomic_read_u32(&shent->refcount));
828 63256 : shent->dropped = true;
829 :
830 : /* release refcount marking entry as not dropped */
831 63256 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
832 : {
833 53978 : pgstat_free_entry(shent, hstat);
834 53978 : return true;
835 : }
836 : else
837 : {
838 9278 : if (!hstat)
839 9278 : dshash_release_lock(pgStatLocal.shared_hash, shent);
840 9278 : return false;
841 : }
842 : }
843 :
844 : /*
845 : * Drop stats for the database and all the objects inside that database.
846 : */
847 : static void
848 44 : pgstat_drop_database_and_contents(Oid dboid)
849 : {
850 : dshash_seq_status hstat;
851 : PgStatShared_HashEntry *p;
852 44 : uint64 not_freed_count = 0;
853 :
854 : Assert(OidIsValid(dboid));
855 :
856 : Assert(pgStatLocal.shared_hash != NULL);
857 :
858 : /*
859 : * This backend might very well be the only backend holding a reference to
860 : * about-to-be-dropped entries. Ensure that we're not preventing it from
861 : * being cleaned up till later.
862 : *
863 : * Doing this separately from the dshash iteration below avoids having to
864 : * do so while holding a partition lock on the shared hashtable.
865 : */
866 44 : pgstat_release_db_entry_refs(dboid);
867 :
868 : /* some of the dshash entries are to be removed, take exclusive lock. */
869 44 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
870 11316 : while ((p = dshash_seq_next(&hstat)) != NULL)
871 : {
872 11272 : if (p->dropped)
873 2 : continue;
874 :
875 11270 : if (p->key.dboid != dboid)
876 9548 : continue;
877 :
878 1722 : if (!pgstat_drop_entry_internal(p, &hstat))
879 : {
880 : /*
881 : * Even statistics for a dropped database might currently be
882 : * accessed (consider e.g. database stats for pg_stat_database).
883 : */
884 0 : not_freed_count++;
885 : }
886 : }
887 44 : dshash_seq_term(&hstat);
888 :
889 : /*
890 : * If some of the stats data could not be freed, signal the reference
891 : * holders to run garbage collection of their cached pgStatLocal.shmem.
892 : */
893 44 : if (not_freed_count > 0)
894 0 : pgstat_request_entry_refs_gc();
895 44 : }
896 :
897 : /*
898 : * Drop a single stats entry.
899 : *
900 : * This routine returns false if the stats entry of the dropped object could
901 : * not be freed, true otherwise.
902 : *
903 : * The callers of this function should call pgstat_request_entry_refs_gc()
904 : * if the stats entry could not be freed, to ensure that this entry's memory
905 : * can be reclaimed later by a different backend calling
906 : * pgstat_gc_entry_refs().
907 : */
908 : bool
909 91804 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
910 : {
911 91804 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
912 : PgStatShared_HashEntry *shent;
913 91804 : bool freed = true;
914 :
915 : /* delete local reference */
916 91804 : if (pgStatEntryRefHash)
917 : {
918 : PgStat_EntryRefHashEntry *lohashent =
919 74778 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
920 :
921 74778 : if (lohashent)
922 61194 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
923 : true);
924 : }
925 :
926 : /* mark entry in shared hashtable as deleted, drop if possible */
927 91804 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
928 91804 : if (shent)
929 : {
930 61422 : freed = pgstat_drop_entry_internal(shent, NULL);
931 :
932 : /*
933 : * Database stats contain other stats. Drop those as well when
934 : * dropping the database. XXX: Perhaps this should be done in a
935 : * slightly more principled way? But not obvious what that'd look
936 : * like, and so far this is the only case...
937 : */
938 61422 : if (key.kind == PGSTAT_KIND_DATABASE)
939 44 : pgstat_drop_database_and_contents(key.dboid);
940 : }
941 :
942 91804 : return freed;
943 : }
944 :
945 : void
946 442 : pgstat_drop_all_entries(void)
947 : {
948 : dshash_seq_status hstat;
949 : PgStatShared_HashEntry *ps;
950 442 : uint64 not_freed_count = 0;
951 :
952 442 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
953 554 : while ((ps = dshash_seq_next(&hstat)) != NULL)
954 : {
955 112 : if (ps->dropped)
956 0 : continue;
957 :
958 112 : if (!pgstat_drop_entry_internal(ps, &hstat))
959 0 : not_freed_count++;
960 : }
961 442 : dshash_seq_term(&hstat);
962 :
963 442 : if (not_freed_count > 0)
964 0 : pgstat_request_entry_refs_gc();
965 442 : }
966 :
967 : static void
968 16448 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
969 : TimestampTz ts)
970 : {
971 16448 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
972 :
973 16448 : memset(pgstat_get_entry_data(kind, header), 0,
974 : pgstat_get_entry_len(kind));
975 :
976 16448 : if (kind_info->reset_timestamp_cb)
977 344 : kind_info->reset_timestamp_cb(header, ts);
978 16448 : }
979 :
980 : /*
981 : * Reset one variable-numbered stats entry.
982 : */
983 : void
984 316 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
985 : {
986 : PgStat_EntryRef *entry_ref;
987 :
988 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
989 :
990 316 : entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
991 316 : if (!entry_ref || entry_ref->shared_entry->dropped)
992 2 : return;
993 :
994 314 : (void) pgstat_lock_entry(entry_ref, false);
995 314 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
996 314 : pgstat_unlock_entry(entry_ref);
997 : }
998 :
999 : /*
1000 : * Scan through the shared hashtable of stats, resetting statistics if
1001 : * approved by the provided do_reset() function.
1002 : */
1003 : void
1004 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1005 : Datum match_data, TimestampTz ts)
1006 : {
1007 : dshash_seq_status hstat;
1008 : PgStatShared_HashEntry *p;
1009 :
1010 : /* dshash entry is not modified, take shared lock */
1011 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1012 24386 : while ((p = dshash_seq_next(&hstat)) != NULL)
1013 : {
1014 : PgStatShared_Common *header;
1015 :
1016 24352 : if (p->dropped)
1017 2 : continue;
1018 :
1019 24350 : if (!do_reset(p, match_data))
1020 8216 : continue;
1021 :
1022 16134 : header = dsa_get_address(pgStatLocal.dsa, p->body);
1023 :
1024 16134 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1025 :
1026 16134 : shared_stat_reset_contents(p->key.kind, header, ts);
1027 :
1028 16134 : LWLockRelease(&header->lock);
1029 : }
1030 34 : dshash_seq_term(&hstat);
1031 34 : }
1032 :
1033 : static bool
1034 2936 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
1035 : {
1036 2936 : return p->key.kind == DatumGetInt32(match_data);
1037 : }
1038 :
1039 : void
1040 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1041 : {
1042 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1043 8 : }
1044 :
1045 : static void
1046 3792410 : pgstat_setup_memcxt(void)
1047 : {
1048 3792410 : if (unlikely(!pgStatSharedRefContext))
1049 29200 : pgStatSharedRefContext =
1050 29200 : AllocSetContextCreate(TopMemoryContext,
1051 : "PgStat Shared Ref",
1052 : ALLOCSET_SMALL_SIZES);
1053 3792410 : if (unlikely(!pgStatEntryRefHashContext))
1054 29200 : pgStatEntryRefHashContext =
1055 29200 : AllocSetContextCreate(TopMemoryContext,
1056 : "PgStat Shared Ref Hash",
1057 : ALLOCSET_SMALL_SIZES);
1058 3792410 : }
|