Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : LWTRANCHE_PGSTATS_HASH
68 : };
69 :
70 :
71 : /*
72 : * Backend local references to shared stats entries. If there are pending
73 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
74 : * list.
75 : *
76 : * When a stats entry is dropped each backend needs to release its reference
77 : * to it before the memory can be released. To trigger that
78 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
79 : * compares to their copy of pgStatSharedRefAge on a regular basis.
80 : */
81 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
82 : static int pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
83 :
84 : /*
85 : * Memory contexts containing the pgStatEntryRefHash table and the
86 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
87 : * track / attribute memory usage.
88 : */
89 : static MemoryContext pgStatSharedRefContext = NULL;
90 : static MemoryContext pgStatEntryRefHashContext = NULL;
91 :
92 :
93 : /* ------------------------------------------------------------
94 : * Public functions called from postmaster follow
95 : * ------------------------------------------------------------
96 : */
97 :
98 : /*
99 : * The size of the shared memory allocation for stats stored in the shared
100 : * stats hash table. This allocation will be done as part of the main shared
101 : * memory, rather than dynamic shared memory, allowing it to be initialized in
102 : * postmaster.
103 : */
104 : static Size
105 20000 : pgstat_dsa_init_size(void)
106 : {
107 : Size sz;
108 :
109 : /*
110 : * The dshash header / initial buckets array needs to fit into "plain"
111 : * shared memory, but it's beneficial to not need dsm segments
112 : * immediately. A size of 256kB seems works well and is not
113 : * disproportional compared to other constant sized shared memory
114 : * allocations. NB: To avoid DSMs further, the user can configure
115 : * min_dynamic_shared_memory.
116 : */
117 20000 : sz = 256 * 1024;
118 : Assert(dsa_minimum_size() <= sz);
119 20000 : return MAXALIGN(sz);
120 : }
121 :
122 : /*
123 : * Compute shared memory space needed for cumulative statistics
124 : */
125 : Size
126 9092 : StatsShmemSize(void)
127 : {
128 : Size sz;
129 :
130 9092 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
131 9092 : sz = add_size(sz, pgstat_dsa_init_size());
132 :
133 9092 : return sz;
134 : }
135 :
136 : /*
137 : * Initialize cumulative statistics system during startup
138 : */
139 : void
140 3636 : StatsShmemInit(void)
141 : {
142 : bool found;
143 : Size sz;
144 :
145 3636 : sz = StatsShmemSize();
146 3636 : pgStatLocal.shmem = (PgStat_ShmemControl *)
147 3636 : ShmemInitStruct("Shared Memory Stats", sz, &found);
148 :
149 3636 : if (!IsUnderPostmaster)
150 : {
151 : dsa_area *dsa;
152 : dshash_table *dsh;
153 3636 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
154 3636 : char *p = (char *) ctl;
155 :
156 : Assert(!found);
157 :
158 : /* the allocation of pgStatLocal.shmem itself */
159 3636 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
160 :
161 : /*
162 : * Create a small dsa allocation in plain shared memory. This is
163 : * required because postmaster cannot use dsm segments. It also
164 : * provides a small efficiency win.
165 : */
166 3636 : ctl->raw_dsa_area = p;
167 3636 : p += MAXALIGN(pgstat_dsa_init_size());
168 3636 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
169 : pgstat_dsa_init_size(),
170 : LWTRANCHE_PGSTATS_DSA, 0);
171 3636 : dsa_pin(dsa);
172 :
173 : /*
174 : * To ensure dshash is created in "plain" shared memory, temporarily
175 : * limit size of dsa to the initial size of the dsa.
176 : */
177 3636 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
178 :
179 : /*
180 : * With the limit in place, create the dshash table. XXX: It'd be nice
181 : * if there were dshash_create_in_place().
182 : */
183 3636 : dsh = dshash_create(dsa, &dsh_params, 0);
184 3636 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
185 :
186 : /* lift limit set above */
187 3636 : dsa_set_size_limit(dsa, -1);
188 :
189 : /*
190 : * Postmaster will never access these again, thus free the local
191 : * dsa/dshash references.
192 : */
193 3636 : dshash_detach(dsh);
194 3636 : dsa_detach(dsa);
195 :
196 3636 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
197 :
198 :
199 : /* initialize fixed-numbered stats */
200 3636 : LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
201 3636 : LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
202 3636 : LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
203 3636 : LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
204 3636 : LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
205 :
206 54540 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
207 50904 : LWLockInitialize(&ctl->io.locks[i],
208 : LWTRANCHE_PGSTATS_DATA);
209 : }
210 : else
211 : {
212 : Assert(found);
213 : }
214 3636 : }
215 :
216 : void
217 30036 : pgstat_attach_shmem(void)
218 : {
219 : MemoryContext oldcontext;
220 :
221 : Assert(pgStatLocal.dsa == NULL);
222 :
223 : /* stats shared memory persists for the backend lifetime */
224 30036 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
225 :
226 30036 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
227 : NULL);
228 30036 : dsa_pin_mapping(pgStatLocal.dsa);
229 :
230 60072 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
231 30036 : pgStatLocal.shmem->hash_handle, 0);
232 :
233 30036 : MemoryContextSwitchTo(oldcontext);
234 30036 : }
235 :
236 : void
237 30036 : pgstat_detach_shmem(void)
238 : {
239 : Assert(pgStatLocal.dsa);
240 :
241 : /* we shouldn't leave references to shared stats */
242 30036 : pgstat_release_all_entry_refs(false);
243 :
244 30036 : dshash_detach(pgStatLocal.shared_hash);
245 30036 : pgStatLocal.shared_hash = NULL;
246 :
247 30036 : dsa_detach(pgStatLocal.dsa);
248 30036 : pgStatLocal.dsa = NULL;
249 30036 : }
250 :
251 :
252 : /* ------------------------------------------------------------
253 : * Maintenance of shared memory stats entries
254 : * ------------------------------------------------------------
255 : */
256 :
257 : PgStatShared_Common *
258 596678 : pgstat_init_entry(PgStat_Kind kind,
259 : PgStatShared_HashEntry *shhashent)
260 : {
261 : /* Create new stats entry. */
262 : dsa_pointer chunk;
263 : PgStatShared_Common *shheader;
264 :
265 : /*
266 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
267 : * can't be freed before the initialization because it can't be found as
268 : * long as we hold the dshash partition lock. Caller needs to increase
269 : * further if a longer lived reference is needed.
270 : */
271 596678 : pg_atomic_init_u32(&shhashent->refcount, 1);
272 596678 : shhashent->dropped = false;
273 :
274 596678 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
275 596678 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
276 596678 : shheader->magic = 0xdeadbeef;
277 :
278 : /* Link the new entry from the hash entry. */
279 596678 : shhashent->body = chunk;
280 :
281 596678 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
282 :
283 596678 : return shheader;
284 : }
285 :
286 : static PgStatShared_Common *
287 54 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
288 : {
289 : PgStatShared_Common *shheader;
290 :
291 54 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
292 :
293 : /* mark as not dropped anymore */
294 54 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
295 54 : shhashent->dropped = false;
296 :
297 : /* reinitialize content */
298 : Assert(shheader->magic == 0xdeadbeef);
299 54 : memset(pgstat_get_entry_data(kind, shheader), 0,
300 : pgstat_get_entry_len(kind));
301 :
302 54 : return shheader;
303 : }
304 :
305 : static void
306 3626574 : pgstat_setup_shared_refs(void)
307 : {
308 3626574 : if (likely(pgStatEntryRefHash != NULL))
309 3598230 : return;
310 :
311 28344 : pgStatEntryRefHash =
312 28344 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
313 : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
314 28344 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
315 : Assert(pgStatSharedRefAge != 0);
316 : }
317 :
318 : /*
319 : * Helper function for pgstat_get_entry_ref().
320 : */
321 : static void
322 1305714 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
323 : PgStatShared_HashEntry *shhashent,
324 : PgStatShared_Common *shheader)
325 : {
326 : Assert(shheader->magic == 0xdeadbeef);
327 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
328 :
329 1305714 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
330 :
331 1305714 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
332 :
333 1305714 : entry_ref->shared_stats = shheader;
334 1305714 : entry_ref->shared_entry = shhashent;
335 1305714 : }
336 :
337 : /*
338 : * Helper function for pgstat_get_entry_ref().
339 : */
340 : static bool
341 3626574 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
342 : {
343 : bool found;
344 : PgStat_EntryRefHashEntry *cache_entry;
345 :
346 : /*
347 : * We immediately insert a cache entry, because it avoids 1) multiple
348 : * hashtable lookups in case of a cache miss 2) having to deal with
349 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
350 : */
351 :
352 3626574 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
353 :
354 3626574 : if (!found || !cache_entry->entry_ref)
355 1660790 : {
356 : PgStat_EntryRef *entry_ref;
357 :
358 1660790 : cache_entry->entry_ref = entry_ref =
359 1660790 : MemoryContextAlloc(pgStatSharedRefContext,
360 : sizeof(PgStat_EntryRef));
361 1660790 : entry_ref->shared_stats = NULL;
362 1660790 : entry_ref->shared_entry = NULL;
363 1660790 : entry_ref->pending = NULL;
364 :
365 1660790 : found = false;
366 : }
367 1965784 : else if (cache_entry->entry_ref->shared_stats == NULL)
368 : {
369 : Assert(cache_entry->entry_ref->pending == NULL);
370 0 : found = false;
371 : }
372 : else
373 : {
374 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
375 :
376 1965784 : entry_ref = cache_entry->entry_ref;
377 : Assert(entry_ref->shared_entry != NULL);
378 : Assert(entry_ref->shared_stats != NULL);
379 :
380 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
381 : /* should have at least our reference */
382 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
383 : }
384 :
385 3626574 : *entry_ref_p = cache_entry->entry_ref;
386 3626574 : return found;
387 : }
388 :
389 : /*
390 : * Get a shared stats reference. If create is true, the shared stats object is
391 : * created if it does not exist.
392 : *
393 : * When create is true, and created_entry is non-NULL, it'll be set to true
394 : * if the entry is newly created, false otherwise.
395 : */
396 : PgStat_EntryRef *
397 3626574 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
398 : bool *created_entry)
399 : {
400 3626574 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
401 : PgStatShared_HashEntry *shhashent;
402 3626574 : PgStatShared_Common *shheader = NULL;
403 : PgStat_EntryRef *entry_ref;
404 :
405 : /*
406 : * passing in created_entry only makes sense if we possibly could create
407 : * entry.
408 : */
409 : Assert(create || created_entry == NULL);
410 : pgstat_assert_is_up();
411 : Assert(pgStatLocal.shared_hash != NULL);
412 : Assert(!pgStatLocal.shmem->is_shutdown);
413 :
414 3626574 : pgstat_setup_memcxt();
415 3626574 : pgstat_setup_shared_refs();
416 :
417 3626574 : if (created_entry != NULL)
418 208 : *created_entry = false;
419 :
420 : /*
421 : * Check if other backends dropped stats that could not be deleted because
422 : * somebody held references to it. If so, check this backend's references.
423 : * This is not expected to happen often. The location of the check is a
424 : * bit random, but this is a relatively frequently called path, so better
425 : * than most.
426 : */
427 3626574 : if (pgstat_need_entry_refs_gc())
428 11094 : pgstat_gc_entry_refs();
429 :
430 : /*
431 : * First check the lookup cache hashtable in local memory. If we find a
432 : * match here we can avoid taking locks / causing contention.
433 : */
434 3626574 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
435 1965784 : return entry_ref;
436 :
437 : Assert(entry_ref != NULL);
438 :
439 : /*
440 : * Do a lookup in the hash table first - it's quite likely that the entry
441 : * already exists, and that way we only need a shared lock.
442 : */
443 1660790 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
444 :
445 1660790 : if (create && !shhashent)
446 : {
447 : bool shfound;
448 :
449 : /*
450 : * It's possible that somebody created the entry since the above
451 : * lookup. If so, fall through to the same path as if we'd have if it
452 : * already had been created before the dshash_find() calls.
453 : */
454 293866 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
455 293866 : if (!shfound)
456 : {
457 293866 : shheader = pgstat_init_entry(kind, shhashent);
458 293866 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
459 :
460 293866 : if (created_entry != NULL)
461 90 : *created_entry = true;
462 :
463 293866 : return entry_ref;
464 : }
465 : }
466 :
467 1366924 : if (!shhashent)
468 : {
469 : /*
470 : * If we're not creating, delete the reference again. In all
471 : * likelihood it's just a stats lookup - no point wasting memory for a
472 : * shared ref to nothing...
473 : */
474 355002 : pgstat_release_entry_ref(key, entry_ref, false);
475 :
476 355002 : return NULL;
477 : }
478 : else
479 : {
480 : /*
481 : * Can get here either because dshash_find() found a match, or if
482 : * dshash_find_or_insert() found a concurrently inserted entry.
483 : */
484 :
485 1011922 : if (shhashent->dropped && create)
486 : {
487 : /*
488 : * There are legitimate cases where the old stats entry might not
489 : * yet have been dropped by the time it's reused. The most obvious
490 : * case are replication slot stats, where a new slot can be
491 : * created with the same index just after dropping. But oid
492 : * wraparound can lead to other cases as well. We just reset the
493 : * stats to their plain state.
494 : */
495 54 : shheader = pgstat_reinit_entry(kind, shhashent);
496 54 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
497 :
498 54 : if (created_entry != NULL)
499 0 : *created_entry = true;
500 :
501 54 : return entry_ref;
502 : }
503 1011868 : else if (shhashent->dropped)
504 : {
505 74 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
506 74 : pgstat_release_entry_ref(key, entry_ref, false);
507 :
508 74 : return NULL;
509 : }
510 : else
511 : {
512 1011794 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
513 1011794 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
514 :
515 1011794 : return entry_ref;
516 : }
517 : }
518 : }
519 :
520 : static void
521 1660790 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
522 : bool discard_pending)
523 : {
524 1660790 : if (entry_ref && entry_ref->pending)
525 : {
526 55434 : if (discard_pending)
527 55434 : pgstat_delete_pending_entry(entry_ref);
528 : else
529 0 : elog(ERROR, "releasing ref with pending data");
530 : }
531 :
532 1660790 : if (entry_ref && entry_ref->shared_stats)
533 : {
534 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
535 : Assert(entry_ref->pending == NULL);
536 :
537 : /*
538 : * This can't race with another backend looking up the stats entry and
539 : * increasing the refcount because it is not "legal" to create
540 : * additional references to dropped entries.
541 : */
542 1305714 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
543 : {
544 : PgStatShared_HashEntry *shent;
545 :
546 : /*
547 : * We're the last referrer to this entry, try to drop the shared
548 : * entry.
549 : */
550 :
551 : /* only dropped entries can reach a 0 refcount */
552 : Assert(entry_ref->shared_entry->dropped);
553 :
554 9226 : shent = dshash_find(pgStatLocal.shared_hash,
555 9226 : &entry_ref->shared_entry->key,
556 : true);
557 9226 : if (!shent)
558 0 : elog(ERROR, "could not find just referenced shared stats entry");
559 :
560 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
561 : Assert(entry_ref->shared_entry == shent);
562 :
563 9226 : pgstat_free_entry(shent, NULL);
564 : }
565 : }
566 :
567 1660790 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
568 0 : elog(ERROR, "entry ref vanished before deletion");
569 :
570 1660790 : if (entry_ref)
571 1660790 : pfree(entry_ref);
572 1660790 : }
573 :
574 : bool
575 1641442 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
576 : {
577 1641442 : LWLock *lock = &entry_ref->shared_stats->lock;
578 :
579 1641442 : if (nowait)
580 537940 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
581 :
582 1103502 : LWLockAcquire(lock, LW_EXCLUSIVE);
583 1103502 : return true;
584 : }
585 :
586 : /*
587 : * Separate from pgstat_lock_entry() as most callers will need to lock
588 : * exclusively.
589 : */
590 : bool
591 12448 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
592 : {
593 12448 : LWLock *lock = &entry_ref->shared_stats->lock;
594 :
595 12448 : if (nowait)
596 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
597 :
598 12448 : LWLockAcquire(lock, LW_SHARED);
599 12448 : return true;
600 : }
601 :
602 : void
603 1653876 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
604 : {
605 1653876 : LWLockRelease(&entry_ref->shared_stats->lock);
606 1653876 : }
607 :
608 : /*
609 : * Helper function to fetch and lock shared stats.
610 : */
611 : PgStat_EntryRef *
612 131118 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
613 : bool nowait)
614 : {
615 : PgStat_EntryRef *entry_ref;
616 :
617 : /* find shared table stats entry corresponding to the local entry */
618 131118 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
619 :
620 : /* lock the shared entry to protect the content, skip if failed */
621 131118 : if (!pgstat_lock_entry(entry_ref, nowait))
622 0 : return NULL;
623 :
624 131118 : return entry_ref;
625 : }
626 :
627 : void
628 3552 : pgstat_request_entry_refs_gc(void)
629 : {
630 3552 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
631 3552 : }
632 :
633 : static bool
634 3626574 : pgstat_need_entry_refs_gc(void)
635 : {
636 : uint64 curage;
637 :
638 3626574 : if (!pgStatEntryRefHash)
639 0 : return false;
640 :
641 : /* should have been initialized when creating pgStatEntryRefHash */
642 : Assert(pgStatSharedRefAge != 0);
643 :
644 3626574 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
645 :
646 3626574 : return pgStatSharedRefAge != curage;
647 : }
648 :
649 : static void
650 11094 : pgstat_gc_entry_refs(void)
651 : {
652 : pgstat_entry_ref_hash_iterator i;
653 : PgStat_EntryRefHashEntry *ent;
654 : uint64 curage;
655 :
656 11094 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
657 : Assert(curage != 0);
658 :
659 : /*
660 : * Some entries have been dropped. Invalidate cache pointer to them.
661 : */
662 11094 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
663 881476 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
664 : {
665 870382 : PgStat_EntryRef *entry_ref = ent->entry_ref;
666 :
667 : Assert(!entry_ref->shared_stats ||
668 : entry_ref->shared_stats->magic == 0xdeadbeef);
669 :
670 870382 : if (!entry_ref->shared_entry->dropped)
671 570958 : continue;
672 :
673 : /* cannot gc shared ref that has pending data */
674 299424 : if (entry_ref->pending != NULL)
675 291704 : continue;
676 :
677 7720 : pgstat_release_entry_ref(ent->key, entry_ref, false);
678 : }
679 :
680 11094 : pgStatSharedRefAge = curage;
681 11094 : }
682 :
683 : static void
684 28374 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
685 : Datum match_data)
686 : {
687 : pgstat_entry_ref_hash_iterator i;
688 : PgStat_EntryRefHashEntry *ent;
689 :
690 28374 : if (pgStatEntryRefHash == NULL)
691 10 : return;
692 :
693 28364 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
694 :
695 1270902 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
696 : != NULL)
697 : {
698 : Assert(ent->entry_ref != NULL);
699 :
700 1242538 : if (match && !match(ent, match_data))
701 594 : continue;
702 :
703 1241944 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
704 : }
705 : }
706 :
707 : /*
708 : * Release all local references to shared stats entries.
709 : *
710 : * When a process exits it cannot do so while still holding references onto
711 : * stats entries, otherwise the shared stats entries could never be freed.
712 : */
713 : static void
714 30036 : pgstat_release_all_entry_refs(bool discard_pending)
715 : {
716 30036 : if (pgStatEntryRefHash == NULL)
717 1692 : return;
718 :
719 28344 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
720 : Assert(pgStatEntryRefHash->members == 0);
721 28344 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
722 28344 : pgStatEntryRefHash = NULL;
723 : }
724 :
725 : static bool
726 594 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
727 : {
728 594 : Oid dboid = DatumGetObjectId(match_data);
729 :
730 594 : return ent->key.dboid == dboid;
731 : }
732 :
733 : static void
734 30 : pgstat_release_db_entry_refs(Oid dboid)
735 : {
736 30 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
737 : match_db,
738 : ObjectIdGetDatum(dboid));
739 30 : }
740 :
741 :
742 : /* ------------------------------------------------------------
743 : * Dropping and resetting of stats entries
744 : * ------------------------------------------------------------
745 : */
746 :
747 : static void
748 58308 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
749 : {
750 : dsa_pointer pdsa;
751 :
752 : /*
753 : * Fetch dsa pointer before deleting entry - that way we can free the
754 : * memory after releasing the lock.
755 : */
756 58308 : pdsa = shent->body;
757 :
758 58308 : if (!hstat)
759 56188 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
760 : else
761 2120 : dshash_delete_current(hstat);
762 :
763 58308 : dsa_free(pgStatLocal.dsa, pdsa);
764 58308 : }
765 :
766 : /*
767 : * Helper for both pgstat_drop_database_and_contents() and
768 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
769 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
770 : * case the entry needs to be already locked.
771 : */
772 : static bool
773 58366 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
774 : dshash_seq_status *hstat)
775 : {
776 : Assert(shent->body != InvalidDsaPointer);
777 :
778 : /* should already have released local reference */
779 58366 : if (pgStatEntryRefHash)
780 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
781 :
782 : /*
783 : * Signal that the entry is dropped - this will eventually cause other
784 : * backends to release their references.
785 : */
786 58366 : if (shent->dropped)
787 0 : elog(ERROR, "can only drop stats once");
788 58366 : shent->dropped = true;
789 :
790 : /* release refcount marking entry as not dropped */
791 58366 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
792 : {
793 49082 : pgstat_free_entry(shent, hstat);
794 49082 : return true;
795 : }
796 : else
797 : {
798 9284 : if (!hstat)
799 9284 : dshash_release_lock(pgStatLocal.shared_hash, shent);
800 9284 : return false;
801 : }
802 : }
803 :
804 : /*
805 : * Drop stats for the database and all the objects inside that database.
806 : */
807 : static void
808 30 : pgstat_drop_database_and_contents(Oid dboid)
809 : {
810 : dshash_seq_status hstat;
811 : PgStatShared_HashEntry *p;
812 30 : uint64 not_freed_count = 0;
813 :
814 : Assert(OidIsValid(dboid));
815 :
816 : Assert(pgStatLocal.shared_hash != NULL);
817 :
818 : /*
819 : * This backend might very well be the only backend holding a reference to
820 : * about-to-be-dropped entries. Ensure that we're not preventing it from
821 : * being cleaned up till later.
822 : *
823 : * Doing this separately from the dshash iteration below avoids having to
824 : * do so while holding a partition lock on the shared hashtable.
825 : */
826 30 : pgstat_release_db_entry_refs(dboid);
827 :
828 : /* some of the dshash entries are to be removed, take exclusive lock. */
829 30 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
830 10098 : while ((p = dshash_seq_next(&hstat)) != NULL)
831 : {
832 10068 : if (p->dropped)
833 4 : continue;
834 :
835 10064 : if (p->key.dboid != dboid)
836 8056 : continue;
837 :
838 2008 : if (!pgstat_drop_entry_internal(p, &hstat))
839 : {
840 : /*
841 : * Even statistics for a dropped database might currently be
842 : * accessed (consider e.g. database stats for pg_stat_database).
843 : */
844 0 : not_freed_count++;
845 : }
846 : }
847 30 : dshash_seq_term(&hstat);
848 :
849 : /*
850 : * If some of the stats data could not be freed, signal the reference
851 : * holders to run garbage collection of their cached pgStatShmLookupCache.
852 : */
853 30 : if (not_freed_count > 0)
854 0 : pgstat_request_entry_refs_gc();
855 30 : }
856 :
857 : bool
858 83698 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
859 : {
860 83698 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
861 : PgStatShared_HashEntry *shent;
862 83698 : bool freed = true;
863 :
864 : /* delete local reference */
865 83698 : if (pgStatEntryRefHash)
866 : {
867 : PgStat_EntryRefHashEntry *lohashent =
868 68414 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
869 :
870 68414 : if (lohashent)
871 56050 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
872 : true);
873 : }
874 :
875 : /* mark entry in shared hashtable as deleted, drop if possible */
876 83698 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
877 83698 : if (shent)
878 : {
879 56246 : freed = pgstat_drop_entry_internal(shent, NULL);
880 :
881 : /*
882 : * Database stats contain other stats. Drop those as well when
883 : * dropping the database. XXX: Perhaps this should be done in a
884 : * slightly more principled way? But not obvious what that'd look
885 : * like, and so far this is the only case...
886 : */
887 56246 : if (key.kind == PGSTAT_KIND_DATABASE)
888 30 : pgstat_drop_database_and_contents(key.dboid);
889 : }
890 :
891 83698 : return freed;
892 : }
893 :
894 : void
895 882 : pgstat_drop_all_entries(void)
896 : {
897 : dshash_seq_status hstat;
898 : PgStatShared_HashEntry *ps;
899 882 : uint64 not_freed_count = 0;
900 :
901 882 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
902 994 : while ((ps = dshash_seq_next(&hstat)) != NULL)
903 : {
904 112 : if (ps->dropped)
905 0 : continue;
906 :
907 112 : if (!pgstat_drop_entry_internal(ps, &hstat))
908 0 : not_freed_count++;
909 : }
910 882 : dshash_seq_term(&hstat);
911 :
912 882 : if (not_freed_count > 0)
913 0 : pgstat_request_entry_refs_gc();
914 882 : }
915 :
916 : static void
917 15944 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
918 : TimestampTz ts)
919 : {
920 15944 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
921 :
922 15944 : memset(pgstat_get_entry_data(kind, header), 0,
923 : pgstat_get_entry_len(kind));
924 :
925 15944 : if (kind_info->reset_timestamp_cb)
926 282 : kind_info->reset_timestamp_cb(header, ts);
927 15944 : }
928 :
929 : /*
930 : * Reset one variable-numbered stats entry.
931 : */
932 : void
933 248 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
934 : {
935 : PgStat_EntryRef *entry_ref;
936 :
937 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
938 :
939 248 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
940 248 : if (!entry_ref || entry_ref->shared_entry->dropped)
941 2 : return;
942 :
943 246 : (void) pgstat_lock_entry(entry_ref, false);
944 246 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
945 246 : pgstat_unlock_entry(entry_ref);
946 : }
947 :
948 : /*
949 : * Scan through the shared hashtable of stats, resetting statistics if
950 : * approved by the provided do_reset() function.
951 : */
952 : void
953 34 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
954 : Datum match_data, TimestampTz ts)
955 : {
956 : dshash_seq_status hstat;
957 : PgStatShared_HashEntry *p;
958 :
959 : /* dshash entry is not modified, take shared lock */
960 34 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
961 23976 : while ((p = dshash_seq_next(&hstat)) != NULL)
962 : {
963 : PgStatShared_Common *header;
964 :
965 23942 : if (p->dropped)
966 2 : continue;
967 :
968 23940 : if (!do_reset(p, match_data))
969 8242 : continue;
970 :
971 15698 : header = dsa_get_address(pgStatLocal.dsa, p->body);
972 :
973 15698 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
974 :
975 15698 : shared_stat_reset_contents(p->key.kind, header, ts);
976 :
977 15698 : LWLockRelease(&header->lock);
978 : }
979 34 : dshash_seq_term(&hstat);
980 34 : }
981 :
982 : static bool
983 2902 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
984 : {
985 2902 : return p->key.kind == DatumGetInt32(match_data);
986 : }
987 :
988 : void
989 8 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
990 : {
991 8 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
992 8 : }
993 :
994 : static void
995 3626574 : pgstat_setup_memcxt(void)
996 : {
997 3626574 : if (unlikely(!pgStatSharedRefContext))
998 28344 : pgStatSharedRefContext =
999 28344 : AllocSetContextCreate(TopMemoryContext,
1000 : "PgStat Shared Ref",
1001 : ALLOCSET_SMALL_SIZES);
1002 3626574 : if (unlikely(!pgStatEntryRefHashContext))
1003 28344 : pgStatEntryRefHashContext =
1004 28344 : AllocSetContextCreate(TopMemoryContext,
1005 : "PgStat Shared Ref Hash",
1006 : ALLOCSET_SMALL_SIZES);
1007 3626574 : }
|