Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * shmem.c
4 : * create shared memory and initialize shared memory data structures.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/shmem.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * POSTGRES processes share one or more regions of shared memory.
17 : * The shared memory is created by a postmaster and is inherited
18 : * by each backend via fork() (or, in some ports, via other OS-specific
19 : * methods). The routines in this file are used for allocating and
20 : * binding to shared memory data structures.
21 : *
22 : * This module provides facilities to allocate fixed-size structures in shared
23 : * memory, for things like variables shared between all backend processes.
24 : * Each such structure has a string name to identify it, specified when it is
25 : * requested. shmem_hash.c provides a shared hash table implementation on top
26 : * of that.
27 : *
28 : * Shared memory areas should usually not be allocated after postmaster
29 : * startup, although we do allow small allocations later for the benefit of
30 : * extension modules that are loaded after startup. Despite that allowance,
31 : * extensions that need shared memory should be added in
32 : * shared_preload_libraries, because the allowance is quite small and there is
33 : * no guarantee that any memory is available after startup.
34 : *
35 : * Nowadays, there is also another way to allocate shared memory called
36 : * Dynamic Shared Memory. See dsm.c for that facility. One big difference
37 : * between traditional shared memory handled by shmem.c and dynamic shared
38 : * memory is that traditional shared memory areas are mapped to the same
39 : * address in all processes, so you can use normal pointers in shared memory
40 : * structs. With Dynamic Shared Memory, you must use offsets or DSA pointers
41 : * instead.
42 : *
43 : * Shared memory managed by shmem.c can never be freed, once allocated. Each
44 : * hash table has its own free list, so hash buckets can be reused when an
45 : * item is deleted.
46 : *
47 : * Usage
48 : * -----
49 : *
50 : * To allocate shared memory, you need to register a set of callback functions
51 : * which handle the lifecycle of the allocation. In the request_fn callback,
52 : * call ShmemRequestStruct() with the desired name and size. When the area is
53 : * later allocated or attached to, the global variable pointed to by the .ptr
54 : * option is set to the shared memory location of the allocation. The init_fn
55 : * callback can perform additional initialization.
56 : *
57 : * typedef struct MyShmemData {
58 : * ...
59 : * } MyShmemData;
60 : *
61 : * static MyShmemData *MyShmem;
62 : *
63 : * static void my_shmem_request(void *arg);
64 : * static void my_shmem_init(void *arg);
65 : *
66 : * const ShmemCallbacks MyShmemCallbacks = {
67 : * .request_fn = my_shmem_request,
68 : * .init_fn = my_shmem_init,
69 : * };
70 : *
71 : * static void
72 : * my_shmem_request(void *arg)
73 : * {
74 : * ShmemRequestStruct(.name = "My shmem area",
75 : * .size = sizeof(MyShmemData),
76 : * .ptr = (void **) &MyShmem,
77 : * );
78 : * }
79 : *
80 : * In builtin PostgreSQL code, add the callbacks to the list in
81 : * src/include/storage/subsystemlist.h. In an add-in module, you can register
82 : * the callbacks by calling RegisterShmemCallbacks(&MyShmemCallbacks) in the
83 : * extension's _PG_init() function.
84 : *
85 : * Lifecycle
86 : * ---------
87 : *
88 : * Initializing shared memory happens in multiple phases. In the first phase,
89 : * during postmaster startup, all the request_fn callbacks are called. Only
90 : * after all the request_fn callbacks have been called and all the shmem areas
91 : * have been requested by the ShmemRequestStruct() calls we know how much
92 : * shared memory we need in total. After that, postmaster allocates global
93 : * shared memory segment, and calls all the init_fn callbacks to initialize
94 : * all the requested shmem areas.
95 : *
96 : * In standard Unix-ish environments, individual backends do not need to
97 : * re-establish their local pointers into shared memory, because they inherit
98 : * correct values of those variables via fork() from the postmaster. However,
99 : * this does not work in the EXEC_BACKEND case. In ports using EXEC_BACKEND,
100 : * backend startup also calls the shmem_request callbacks to re-establish the
101 : * knowledge about each shared memory area, sets the pointer variables
102 : * (*options->ptr), and calls the attach_fn callback, if any, for additional
103 : * per-backend setup.
104 : *
105 : * Legacy ShmemInitStruct()/ShmemInitHash() functions
106 : * --------------------------------------------------
107 : *
108 : * ShmemInitStruct()/ShmemInitHash() is another way of registering shmem
109 : * areas. It pre-dates the ShmemRequestStruct()/ShmemRequestHash() functions,
110 : * and should not be used in new code, but as of this writing it is still
111 : * widely used in extensions.
112 : *
113 : * To allocate a shmem area with ShmemInitStruct(), you need to separately
114 : * register the size needed for the area by calling RequestAddinShmemSpace()
115 : * from the extension's shmem_request_hook, and allocate the area by calling
116 : * ShmemInitStruct() from the extension's shmem_startup_hook. There are no
117 : * init/attach callbacks. Instead, the caller of ShmemInitStruct() must check
118 : * the return status of ShmemInitStruct() and initialize the struct if it was
119 : * not previously initialized.
120 : *
121 : * Calling ShmemAlloc() directly
122 : * -----------------------------
123 : *
124 : * There's a more low-level way of allocating shared memory too: you can call
125 : * ShmemAlloc() directly. It's used to implement the higher level mechanisms,
126 : * and should generally not be called directly.
127 : */
128 :
129 : #include "postgres.h"
130 :
131 : #include <unistd.h>
132 :
133 : #include "access/slru.h"
134 : #include "common/int.h"
135 : #include "fmgr.h"
136 : #include "funcapi.h"
137 : #include "miscadmin.h"
138 : #include "port/pg_bitutils.h"
139 : #include "port/pg_numa.h"
140 : #include "storage/lwlock.h"
141 : #include "storage/pg_shmem.h"
142 : #include "storage/shmem.h"
143 : #include "storage/shmem_internal.h"
144 : #include "storage/spin.h"
145 : #include "utils/builtins.h"
146 : #include "utils/tuplestore.h"
147 :
148 : /*
149 : * Registered callbacks.
150 : *
151 : * During postmaster startup, we accumulate the callbacks from all subsystems
152 : * in this list.
153 : *
154 : * This is in process private memory, although on Unix-like systems, we expect
155 : * all the registrations to happen at postmaster startup time and be inherited
156 : * by all the child processes via fork().
157 : */
158 : static List *registered_shmem_callbacks;
159 :
160 : /*
161 : * In the shmem request phase, all the shmem areas requested with the
162 : * ShmemRequest*() functions are accumulated here.
163 : */
164 : typedef struct
165 : {
166 : ShmemStructOpts *options;
167 : ShmemRequestKind kind;
168 : } ShmemRequest;
169 :
170 : static List *pending_shmem_requests;
171 :
172 : /*
173 : * Per-process state machine, for sanity checking that we do things in the
174 : * right order.
175 : *
176 : * Postmaster:
177 : * INITIAL -> REQUESTING -> INITIALIZING -> DONE
178 : *
179 : * Backends in EXEC_BACKEND mode:
180 : * INITIAL -> REQUESTING -> ATTACHING -> DONE
181 : *
182 : * Late request:
183 : * DONE -> REQUESTING -> AFTER_STARTUP_ATTACH_OR_INIT -> DONE
184 : */
185 : enum shmem_request_state
186 : {
187 : /* Initial state */
188 : SRS_INITIAL,
189 :
190 : /*
191 : * When we start calling the shmem_request callbacks, we enter the
192 : * SRS_REQUESTING phase. All ShmemRequestStruct calls happen in this
193 : * state.
194 : */
195 : SRS_REQUESTING,
196 :
197 : /*
198 : * Postmaster has finished all shmem requests, and is now initializing the
199 : * shared memory segment. init_fn callbacks are called in this state.
200 : */
201 : SRS_INITIALIZING,
202 :
203 : /*
204 : * A postmaster child process is starting up. attach_fn callbacks are
205 : * called in this state.
206 : */
207 : SRS_ATTACHING,
208 :
209 : /* An after-startup allocation or attachment is in progress */
210 : SRS_AFTER_STARTUP_ATTACH_OR_INIT,
211 :
212 : /* Normal state after shmem initialization / attachment */
213 : SRS_DONE,
214 : };
215 : static enum shmem_request_state shmem_request_state = SRS_INITIAL;
216 :
217 : /*
218 : * This is the first data structure stored in the shared memory segment, at
219 : * the offset that PGShmemHeader->content_offset points to. Allocations by
220 : * ShmemAlloc() are carved out of the space after this.
221 : *
222 : * For the base pointer and the total size of the shmem segment, we rely on
223 : * the PGShmemHeader.
224 : */
225 : typedef struct ShmemAllocatorData
226 : {
227 : Size free_offset; /* offset to first free space from ShmemBase */
228 :
229 : /* protects 'free_offset' */
230 : slock_t shmem_lock;
231 :
232 : HASHHDR *index; /* location of ShmemIndex */
233 : size_t index_size; /* size of shmem region holding ShmemIndex */
234 : LWLock index_lock; /* protects ShmemIndex */
235 : } ShmemAllocatorData;
236 :
237 : #define ShmemIndexLock (&ShmemAllocator->index_lock)
238 :
239 : static void *ShmemAllocRaw(Size size, Size alignment, Size *allocated_size);
240 :
241 : /* shared memory global variables */
242 :
243 : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
244 : static void *ShmemBase; /* start address of shared memory */
245 : static void *ShmemEnd; /* end+1 address of shared memory */
246 :
247 : static ShmemAllocatorData *ShmemAllocator;
248 :
249 : /*
250 : * ShmemIndex is a global directory of shmem areas, itself also stored in the
251 : * shared memory.
252 : */
253 : static HTAB *ShmemIndex;
254 :
255 : /* max size of data structure string name */
256 : #define SHMEM_INDEX_KEYSIZE (48)
257 :
258 : /*
259 : * # of additional entries to reserve in the shmem index table, for
260 : * allocations after postmaster startup. (This is not a hard limit, the hash
261 : * table can grow larger than that if there is shared memory available)
262 : */
263 : #define SHMEM_INDEX_ADDITIONAL_SIZE (128)
264 :
265 : /* this is a hash bucket in the shmem index table */
266 : typedef struct
267 : {
268 : char key[SHMEM_INDEX_KEYSIZE]; /* string name */
269 : void *location; /* location in shared mem */
270 : Size size; /* # bytes requested for the structure */
271 : Size allocated_size; /* # bytes actually allocated */
272 : } ShmemIndexEnt;
273 :
274 : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
275 : static bool firstNumaTouch = true;
276 :
277 : static void CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks);
278 : static void InitShmemIndexEntry(ShmemRequest *request);
279 : static bool AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok);
280 :
281 : Datum pg_numa_available(PG_FUNCTION_ARGS);
282 :
283 : /*
284 : * ShmemRequestStruct() --- request a named shared memory area
285 : *
286 : * Subsystems call this to register their shared memory needs. This is
287 : * usually done early in postmaster startup, before the shared memory segment
288 : * has been created, so that the size can be included in the estimate for
289 : * total amount of shared memory needed. We set aside a small amount of
290 : * memory for allocations that happen later, for the benefit of non-preloaded
291 : * extensions, but that should not be relied upon.
292 : *
293 : * This does not yet allocate the memory, but merely registers the need for
294 : * it. The actual allocation happens later in the postmaster startup
295 : * sequence.
296 : *
297 : * This must be called from a shmem_request callback function, registered with
298 : * RegisterShmemCallbacks(). This enforces a coding pattern that works the
299 : * same in normal Unix systems and with EXEC_BACKEND. On Unix systems, the
300 : * shmem_request callbacks are called once, early in postmaster startup, and
301 : * the child processes inherit the struct descriptors and any other
302 : * per-process state from the postmaster. In EXEC_BACKEND mode, shmem_request
303 : * callbacks are *also* called in each backend, at backend startup, to
304 : * re-establish the struct descriptors. By calling the same function in both
305 : * cases, we ensure that all the shmem areas are registered the same way in
306 : * all processes.
307 : *
308 : * 'options' defines the name and size of the area, and any other optional
309 : * features. Leave unused options as zeros. The options are copied to
310 : * longer-lived memory, so it doesn't need to live after the
311 : * ShmemRequestStruct() call and can point to a local variable in the calling
312 : * function. The 'name' must point to a long-lived string though, only the
313 : * pointer to it is copied.
314 : */
315 : void
316 77676 : ShmemRequestStructWithOpts(const ShmemStructOpts *options)
317 : {
318 : ShmemStructOpts *options_copy;
319 :
320 77676 : options_copy = MemoryContextAlloc(TopMemoryContext,
321 : sizeof(ShmemStructOpts));
322 77676 : memcpy(options_copy, options, sizeof(ShmemStructOpts));
323 :
324 77676 : ShmemRequestInternal(options_copy, SHMEM_KIND_STRUCT);
325 77676 : }
326 :
327 : /*
328 : * Internal workhorse of ShmemRequestStruct() and ShmemRequestHash().
329 : *
330 : * Note: Unlike in the public ShmemRequestStruct() and ShmemRequestHash()
331 : * functions, 'options' is *not* copied. It must be allocated in
332 : * TopMemoryContext by the caller, and will be freed after the init/attach
333 : * callbacks have been called. This allows ShmemRequestHash() to pass a
334 : * pointer to the extended ShmemHashOpts struct instead.
335 : */
336 : void
337 96186 : ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
338 : {
339 : ShmemRequest *request;
340 :
341 : /* Check the options */
342 96186 : if (options->name == NULL)
343 0 : elog(ERROR, "shared memory request is missing 'name' option");
344 :
345 96186 : if (IsUnderPostmaster)
346 : {
347 3 : if (options->size <= 0 && options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
348 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
349 : options->size, options->name);
350 : }
351 : else
352 : {
353 96183 : if (options->size == SHMEM_ATTACH_UNKNOWN_SIZE)
354 0 : elog(ERROR, "SHMEM_ATTACH_UNKNOWN_SIZE cannot be used during startup");
355 96183 : if (options->size <= 0)
356 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
357 : options->size, options->name);
358 : }
359 :
360 96186 : if (options->alignment != 0 && pg_nextpower2_size_t(options->alignment) != options->alignment)
361 0 : elog(ERROR, "invalid alignment %zu for shared memory request for \"%s\"",
362 : options->alignment, options->name);
363 :
364 : /* Check that we're in the right state */
365 96186 : if (shmem_request_state != SRS_REQUESTING)
366 0 : elog(ERROR, "ShmemRequestStruct can only be called from a shmem_request callback");
367 :
368 : /* Check that it's not already registered in this process */
369 3895821 : foreach_ptr(ShmemRequest, existing, pending_shmem_requests)
370 : {
371 3703449 : if (strcmp(existing->options->name, options->name) == 0)
372 0 : ereport(ERROR,
373 : (errmsg("shared memory struct \"%s\" is already registered",
374 : options->name)));
375 : }
376 :
377 : /* Request looks valid, remember it */
378 96186 : request = palloc(sizeof(ShmemRequest));
379 96186 : request->options = options;
380 96186 : request->kind = kind;
381 96186 : pending_shmem_requests = lappend(pending_shmem_requests, request);
382 96186 : }
383 :
384 : /*
385 : * ShmemGetRequestedSize() --- estimate the total size of all registered shared
386 : * memory structures.
387 : *
388 : * This is called at postmaster startup, before the shared memory segment has
389 : * been created.
390 : */
391 : size_t
392 2289 : ShmemGetRequestedSize(void)
393 : {
394 : size_t size;
395 :
396 : /* memory needed for the ShmemIndex */
397 2289 : size = hash_estimate_size(list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE,
398 : sizeof(ShmemIndexEnt));
399 2289 : size = CACHELINEALIGN(size);
400 :
401 : /* memory needed for all the requested areas */
402 183140 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
403 : {
404 178562 : size_t alignment = request->options->alignment;
405 :
406 : /* pad the start address for alignment like ShmemAllocRaw() does */
407 178562 : if (alignment < PG_CACHE_LINE_SIZE)
408 171695 : alignment = PG_CACHE_LINE_SIZE;
409 178562 : size = TYPEALIGN(alignment, size);
410 :
411 178562 : size = add_size(size, request->options->size);
412 : }
413 :
414 2289 : return size;
415 : }
416 :
417 : /*
418 : * ShmemInitRequested() --- allocate and initialize requested shared memory
419 : * structures.
420 : *
421 : * This is called once at postmaster startup, after the shared memory segment
422 : * has been created.
423 : */
424 : void
425 1230 : ShmemInitRequested(void)
426 : {
427 : /* should be called only by the postmaster or a standalone backend */
428 : Assert(!IsUnderPostmaster);
429 : Assert(shmem_request_state == SRS_INITIALIZING);
430 :
431 : /*
432 : * Initialize the ShmemIndex entries and perform basic initialization of
433 : * all the requested memory areas. There are no concurrent processes yet,
434 : * so no need for locking.
435 : */
436 98409 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
437 : {
438 95949 : InitShmemIndexEntry(request);
439 95949 : pfree(request->options);
440 : }
441 1230 : list_free_deep(pending_shmem_requests);
442 1230 : pending_shmem_requests = NIL;
443 :
444 : /*
445 : * Call the subsystem-specific init callbacks to finish initialization of
446 : * all the areas.
447 : */
448 56606 : foreach_ptr(const ShmemCallbacks, callbacks, registered_shmem_callbacks)
449 : {
450 54146 : if (callbacks->init_fn)
451 49222 : callbacks->init_fn(callbacks->opaque_arg);
452 : }
453 :
454 1230 : shmem_request_state = SRS_DONE;
455 1230 : }
456 :
457 : /*
458 : * Re-establish process private state related to shmem areas.
459 : *
460 : * This is called at backend startup in EXEC_BACKEND mode, in every backend.
461 : */
462 : #ifdef EXEC_BACKEND
463 : void
464 : ShmemAttachRequested(void)
465 : {
466 : ListCell *lc;
467 :
468 : /* Must be initializing a (non-standalone) backend */
469 : Assert(IsUnderPostmaster);
470 : Assert(ShmemAllocator->index != NULL);
471 : Assert(shmem_request_state == SRS_REQUESTING);
472 : shmem_request_state = SRS_ATTACHING;
473 :
474 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
475 :
476 : /*
477 : * Attach to all the requested memory areas.
478 : */
479 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
480 : {
481 : AttachShmemIndexEntry(request, false);
482 : pfree(request->options);
483 : }
484 : list_free_deep(pending_shmem_requests);
485 : pending_shmem_requests = NIL;
486 :
487 : /* Call attach callbacks */
488 : foreach(lc, registered_shmem_callbacks)
489 : {
490 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
491 :
492 : if (callbacks->attach_fn)
493 : callbacks->attach_fn(callbacks->opaque_arg);
494 : }
495 :
496 : LWLockRelease(ShmemIndexLock);
497 :
498 : shmem_request_state = SRS_DONE;
499 : }
500 : #endif
501 :
502 : /*
503 : * Insert requested shmem area into the shared memory index and initialize it.
504 : *
505 : * Note that this only does performs basic initialization depending on
506 : * ShmemRequestKind, like setting the global pointer variable to the area for
507 : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
508 : * This does *not* call the subsystem-specific init callbacks. That's done
509 : * later after all the shmem areas have been initialized or attached to.
510 : */
511 : static void
512 95950 : InitShmemIndexEntry(ShmemRequest *request)
513 : {
514 95950 : const char *name = request->options->name;
515 : ShmemIndexEnt *index_entry;
516 : bool found;
517 : size_t allocated_size;
518 : void *structPtr;
519 :
520 : /* look it up in the shmem index */
521 : index_entry = (ShmemIndexEnt *)
522 95950 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, &found);
523 95950 : if (found)
524 0 : elog(ERROR, "shared memory struct \"%s\" is already initialized", name);
525 95950 : if (!index_entry)
526 : {
527 : /* tried to add it to the hash table, but there was no space */
528 0 : ereport(ERROR,
529 : (errcode(ERRCODE_OUT_OF_MEMORY),
530 : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
531 : name)));
532 : }
533 :
534 : /*
535 : * We inserted the entry to the shared memory index. Allocate requested
536 : * amount of shared memory for it, and initialize the index entry.
537 : */
538 95950 : structPtr = ShmemAllocRaw(request->options->size,
539 95950 : request->options->alignment,
540 : &allocated_size);
541 95950 : if (structPtr == NULL)
542 : {
543 : /* out of memory; remove the failed ShmemIndex entry */
544 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
545 0 : ereport(ERROR,
546 : (errcode(ERRCODE_OUT_OF_MEMORY),
547 : errmsg("not enough shared memory for data structure"
548 : " \"%s\" (%zu bytes requested)",
549 : name, request->options->size)));
550 : }
551 95950 : index_entry->size = request->options->size;
552 95950 : index_entry->allocated_size = allocated_size;
553 95950 : index_entry->location = structPtr;
554 :
555 : /* Initialize depending on the kind of shmem area it is */
556 95950 : switch (request->kind)
557 : {
558 77485 : case SHMEM_KIND_STRUCT:
559 77485 : if (request->options->ptr)
560 77485 : *(request->options->ptr) = index_entry->location;
561 77485 : break;
562 9851 : case SHMEM_KIND_HASH:
563 9851 : shmem_hash_init(structPtr, request->options);
564 9851 : break;
565 8614 : case SHMEM_KIND_SLRU:
566 8614 : shmem_slru_init(structPtr, request->options);
567 8614 : break;
568 : }
569 95950 : }
570 :
571 : /*
572 : * Look up a named shmem area in the shared memory index and attach to it.
573 : *
574 : * Note that this only performs the basic attachment actions depending on
575 : * ShmemRequestKind, like setting the global pointer variable to the area for
576 : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
577 : * This does *not* call the subsystem-specific attach callbacks. That's done
578 : * later after all the shmem areas have been initialized or attached to.
579 : */
580 : static bool
581 2 : AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok)
582 : {
583 2 : const char *name = request->options->name;
584 : ShmemIndexEnt *index_entry;
585 :
586 : /* Look it up in the shmem index */
587 : index_entry = (ShmemIndexEnt *)
588 2 : hash_search(ShmemIndex, name, HASH_FIND, NULL);
589 2 : if (!index_entry)
590 : {
591 0 : if (!missing_ok)
592 0 : ereport(ERROR,
593 : (errmsg("could not find ShmemIndex entry for data structure \"%s\"",
594 : request->options->name)));
595 0 : return false;
596 : }
597 :
598 : /* Check that the size in the index matches the request */
599 2 : if (index_entry->size != request->options->size &&
600 0 : request->options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
601 : {
602 0 : ereport(ERROR,
603 : (errmsg("shared memory struct \"%s\" was created with"
604 : " different size: existing %zu, requested %zu",
605 : name, index_entry->size, request->options->size)));
606 : }
607 :
608 : /*
609 : * Re-establish the caller's pointer variable, or do other actions to
610 : * attach depending on the kind of shmem area it is.
611 : */
612 2 : switch (request->kind)
613 : {
614 2 : case SHMEM_KIND_STRUCT:
615 2 : if (request->options->ptr)
616 2 : *(request->options->ptr) = index_entry->location;
617 2 : break;
618 0 : case SHMEM_KIND_HASH:
619 0 : shmem_hash_attach(index_entry->location, request->options);
620 0 : break;
621 0 : case SHMEM_KIND_SLRU:
622 0 : shmem_slru_attach(index_entry->location, request->options);
623 0 : break;
624 : }
625 :
626 2 : return true;
627 : }
628 :
629 : /*
630 : * InitShmemAllocator() --- set up basic pointers to shared memory.
631 : *
632 : * Called at postmaster or stand-alone backend startup, to initialize the
633 : * allocator's data structure in the shared memory segment. In EXEC_BACKEND,
634 : * this is also called at backend startup, to set up pointers to the
635 : * already-initialized data structure.
636 : */
637 : void
638 1230 : InitShmemAllocator(PGShmemHeader *seghdr)
639 : {
640 : Size offset;
641 : int64 hash_nelems;
642 : HASHCTL info;
643 : int hash_flags;
644 :
645 : #ifndef EXEC_BACKEND
646 : Assert(!IsUnderPostmaster);
647 : #endif
648 : Assert(seghdr != NULL);
649 :
650 1230 : if (IsUnderPostmaster)
651 : {
652 : Assert(shmem_request_state == SRS_INITIAL);
653 : }
654 : else
655 : {
656 : Assert(shmem_request_state == SRS_REQUESTING);
657 1230 : shmem_request_state = SRS_INITIALIZING;
658 : }
659 :
660 : /*
661 : * We assume the pointer and offset are MAXALIGN. Not a hard requirement,
662 : * but it's true today and keeps the math below simpler.
663 : */
664 : Assert(seghdr == (void *) MAXALIGN(seghdr));
665 : Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
666 :
667 : /*
668 : * Allocations after this point should go through ShmemAlloc, which
669 : * expects to allocate everything on cache line boundaries. Make sure the
670 : * first allocation begins on a cache line boundary.
671 : */
672 1230 : offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
673 1230 : if (offset > seghdr->totalsize)
674 0 : ereport(ERROR,
675 : (errcode(ERRCODE_OUT_OF_MEMORY),
676 : errmsg("out of shared memory (%zu bytes requested)",
677 : offset)));
678 :
679 : /*
680 : * In postmaster or stand-alone backend, initialize the shared memory
681 : * allocator so that we can allocate shared memory for ShmemIndex using
682 : * ShmemAlloc(). In a regular backend just set up the pointers required
683 : * by ShmemAlloc().
684 : */
685 1230 : ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
686 1230 : if (!IsUnderPostmaster)
687 : {
688 1230 : SpinLockInit(&ShmemAllocator->shmem_lock);
689 1230 : ShmemAllocator->free_offset = offset;
690 1230 : LWLockInitialize(&ShmemAllocator->index_lock, LWTRANCHE_SHMEM_INDEX);
691 : }
692 :
693 1230 : ShmemSegHdr = seghdr;
694 1230 : ShmemBase = seghdr;
695 1230 : ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
696 :
697 : /*
698 : * Create (or attach to) the shared memory index of shmem areas.
699 : *
700 : * This is the same initialization as ShmemInitHash() does, but we cannot
701 : * use ShmemInitHash() here because it relies on ShmemIndex being already
702 : * initialized.
703 : */
704 1230 : hash_nelems = list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE;
705 :
706 1230 : info.keysize = SHMEM_INDEX_KEYSIZE;
707 1230 : info.entrysize = sizeof(ShmemIndexEnt);
708 1230 : hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
709 :
710 1230 : if (!IsUnderPostmaster)
711 : {
712 1230 : ShmemAllocator->index_size = hash_estimate_size(hash_nelems, info.entrysize);
713 1230 : ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
714 : }
715 2460 : ShmemIndex = shmem_hash_create(ShmemAllocator->index,
716 1230 : ShmemAllocator->index_size,
717 : IsUnderPostmaster,
718 : "ShmemIndex", hash_nelems,
719 : &info, hash_flags);
720 : Assert(ShmemIndex != NULL);
721 :
722 : /*
723 : * Add an entry for ShmemIndex itself into ShmemIndex, so that it's
724 : * visible in the pg_shmem_allocations view
725 : */
726 1230 : if (!IsUnderPostmaster)
727 : {
728 : bool found;
729 : ShmemIndexEnt *result = (ShmemIndexEnt *)
730 1230 : hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
731 :
732 : Assert(!found);
733 1230 : result->size = ShmemAllocator->index_size;
734 1230 : result->allocated_size = ShmemAllocator->index_size;
735 1230 : result->location = ShmemAllocator->index;
736 : }
737 1230 : }
738 :
739 : /*
740 : * Reset state on postmaster crash restart.
741 : */
742 : void
743 5 : ResetShmemAllocator(void)
744 : {
745 : Assert(!IsUnderPostmaster);
746 5 : shmem_request_state = SRS_INITIAL;
747 :
748 5 : pending_shmem_requests = NIL;
749 :
750 : /*
751 : * Note that we don't clear the registered callbacks. We will need to
752 : * call them again as we restart
753 : */
754 5 : }
755 :
756 : /*
757 : * ShmemAlloc -- allocate max-aligned chunk from shared memory
758 : *
759 : * Throws error if request cannot be satisfied.
760 : *
761 : * Assumes ShmemSegHdr is initialized.
762 : */
763 : void *
764 1233 : ShmemAlloc(Size size)
765 : {
766 : void *newSpace;
767 : Size allocated_size;
768 :
769 1233 : newSpace = ShmemAllocRaw(size, 0, &allocated_size);
770 1233 : if (!newSpace)
771 0 : ereport(ERROR,
772 : (errcode(ERRCODE_OUT_OF_MEMORY),
773 : errmsg("out of shared memory (%zu bytes requested)",
774 : size)));
775 1233 : return newSpace;
776 : }
777 :
778 : /*
779 : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
780 : *
781 : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
782 : */
783 : void *
784 0 : ShmemAllocNoError(Size size)
785 : {
786 : Size allocated_size;
787 :
788 0 : return ShmemAllocRaw(size, 0, &allocated_size);
789 : }
790 :
791 : /*
792 : * ShmemAllocRaw -- allocate align chunk and return allocated size
793 : *
794 : * Also sets *allocated_size to the number of bytes allocated, which will
795 : * be equal to the number requested plus any padding we choose to add.
796 : */
797 : static void *
798 97183 : ShmemAllocRaw(Size size, Size alignment, Size *allocated_size)
799 : {
800 : Size rawStart;
801 : Size newStart;
802 : Size newFree;
803 : void *newSpace;
804 :
805 : /*
806 : * Ensure all space is adequately aligned. We used to only MAXALIGN this
807 : * space but experience has proved that on modern systems that is not good
808 : * enough. Many parts of the system are very sensitive to critical data
809 : * structures getting split across cache line boundaries. To avoid that,
810 : * attempt to align the beginning of the allocation to a cache line
811 : * boundary. The calling code will still need to be careful about how it
812 : * uses the allocated space - e.g. by padding each element in an array of
813 : * structures out to a power-of-two size - but without this, even that
814 : * won't be sufficient.
815 : */
816 97183 : if (alignment < PG_CACHE_LINE_SIZE)
817 93493 : alignment = PG_CACHE_LINE_SIZE;
818 :
819 : Assert(ShmemSegHdr != NULL);
820 :
821 97183 : SpinLockAcquire(&ShmemAllocator->shmem_lock);
822 :
823 97183 : rawStart = ShmemAllocator->free_offset;
824 97183 : newStart = TYPEALIGN(alignment, rawStart);
825 :
826 97183 : newFree = newStart + size;
827 97183 : if (newFree <= ShmemSegHdr->totalsize)
828 : {
829 97183 : newSpace = (char *) ShmemBase + newStart;
830 97183 : ShmemAllocator->free_offset = newFree;
831 : }
832 : else
833 0 : newSpace = NULL;
834 :
835 97183 : SpinLockRelease(&ShmemAllocator->shmem_lock);
836 :
837 : /* note this assert is okay with newSpace == NULL */
838 : Assert(newSpace == (void *) TYPEALIGN(alignment, newSpace));
839 :
840 97183 : *allocated_size = newFree - rawStart;
841 97183 : return newSpace;
842 : }
843 :
844 : /*
845 : * ShmemAddrIsValid -- test if an address refers to shared memory
846 : *
847 : * Returns true if the pointer points within the shared memory segment.
848 : */
849 : bool
850 0 : ShmemAddrIsValid(const void *addr)
851 : {
852 0 : return (addr >= ShmemBase) && (addr < ShmemEnd);
853 : }
854 :
855 : /*
856 : * Register callbacks that define a shared memory area (or multiple areas).
857 : *
858 : * The system will call the callbacks at different stages of postmaster or
859 : * backend startup, to allocate and initialize the area.
860 : *
861 : * This is normally called early during postmaster startup, but if the
862 : * SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP is set, this can also be used after
863 : * startup, although after startup there's no guarantee that there's enough
864 : * shared memory available. When called after startup, this immediately calls
865 : * the right callbacks depending on whether another backend had already
866 : * initialized the area.
867 : *
868 : * Note: In EXEC_BACKEND mode, this needs to be called in every backend
869 : * process. That's needed because we cannot pass down the callback function
870 : * pointers from the postmaster process, because different processes may have
871 : * loaded libraries to different addresses.
872 : */
873 : void
874 54631 : RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
875 : {
876 54631 : if (shmem_request_state == SRS_DONE && IsUnderPostmaster)
877 : {
878 : /*
879 : * After-startup initialization or attachment. Call the appropriate
880 : * callbacks immediately.
881 : */
882 3 : if ((callbacks->flags & SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP) == 0)
883 0 : elog(ERROR, "cannot request shared memory at this time");
884 :
885 3 : CallShmemCallbacksAfterStartup(callbacks);
886 : }
887 : else
888 : {
889 : /* Remember the callbacks for later */
890 54628 : registered_shmem_callbacks = lappend(registered_shmem_callbacks,
891 : (void *) callbacks);
892 : }
893 54631 : }
894 :
895 : /*
896 : * Register a shmem area (or multiple areas) after startup.
897 : */
898 : static void
899 3 : CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks)
900 : {
901 : bool found_any;
902 : bool notfound_any;
903 :
904 : Assert(shmem_request_state == SRS_DONE);
905 3 : shmem_request_state = SRS_REQUESTING;
906 :
907 : /*
908 : * Call the request callback first. The callback makes ShmemRequest*()
909 : * calls for each shmem area, adding them to pending_shmem_requests.
910 : */
911 : Assert(pending_shmem_requests == NIL);
912 3 : if (callbacks->request_fn)
913 3 : callbacks->request_fn(callbacks->opaque_arg);
914 3 : shmem_request_state = SRS_AFTER_STARTUP_ATTACH_OR_INIT;
915 :
916 3 : if (pending_shmem_requests == NIL)
917 : {
918 0 : shmem_request_state = SRS_DONE;
919 0 : return;
920 : }
921 :
922 : /* Hold ShmemIndexLock while we allocate all the shmem entries */
923 3 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
924 :
925 : /*
926 : * Check if the requested shared memory areas have already been
927 : * initialized. We assume all the areas requested by the request callback
928 : * to form a coherent unit such that they're all already initialized or
929 : * none. Otherwise it would be ambiguous which callback, init or attach,
930 : * to callback afterwards.
931 : */
932 3 : found_any = notfound_any = false;
933 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
934 : {
935 3 : if (hash_search(ShmemIndex, request->options->name, HASH_FIND, NULL))
936 2 : found_any = true;
937 : else
938 1 : notfound_any = true;
939 : }
940 3 : if (found_any && notfound_any)
941 0 : elog(ERROR, "found some but not all");
942 :
943 : /*
944 : * Allocate or attach all the shmem areas requested by the request_fn
945 : * callback.
946 : */
947 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
948 : {
949 3 : if (found_any)
950 2 : AttachShmemIndexEntry(request, false);
951 : else
952 1 : InitShmemIndexEntry(request);
953 :
954 3 : pfree(request->options);
955 : }
956 3 : list_free_deep(pending_shmem_requests);
957 3 : pending_shmem_requests = NIL;
958 :
959 : /* Finish by calling the appropriate subsystem-specific callback */
960 3 : if (found_any)
961 : {
962 2 : if (callbacks->attach_fn)
963 2 : callbacks->attach_fn(callbacks->opaque_arg);
964 : }
965 : else
966 : {
967 1 : if (callbacks->init_fn)
968 1 : callbacks->init_fn(callbacks->opaque_arg);
969 : }
970 :
971 3 : LWLockRelease(ShmemIndexLock);
972 3 : shmem_request_state = SRS_DONE;
973 : }
974 :
975 : /*
976 : * Call all shmem request callbacks.
977 : */
978 : void
979 1233 : ShmemCallRequestCallbacks(void)
980 : {
981 : ListCell *lc;
982 :
983 : Assert(shmem_request_state == SRS_INITIAL);
984 1233 : shmem_request_state = SRS_REQUESTING;
985 :
986 55511 : foreach(lc, registered_shmem_callbacks)
987 : {
988 54278 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
989 :
990 54278 : if (callbacks->request_fn)
991 54278 : callbacks->request_fn(callbacks->opaque_arg);
992 : }
993 1233 : }
994 :
995 : /*
996 : * ShmemInitStruct -- Create/attach to a structure in shared memory.
997 : *
998 : * This is called during initialization to find or allocate
999 : * a data structure in shared memory. If no other process
1000 : * has created the structure, this routine allocates space
1001 : * for it. If it exists already, a pointer to the existing
1002 : * structure is returned.
1003 : *
1004 : * Returns: pointer to the object. *foundPtr is set true if the object was
1005 : * already in the shmem index (hence, already initialized).
1006 : *
1007 : * Note: This is a legacy interface, kept for backwards compatibility with
1008 : * extensions. Use ShmemRequestStruct() in new code!
1009 : */
1010 : void *
1011 0 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
1012 : {
1013 0 : void *ptr = NULL;
1014 0 : ShmemStructOpts options = {
1015 : .name = name,
1016 : .size = size,
1017 : .ptr = &ptr,
1018 : };
1019 0 : ShmemRequest request = {&options, SHMEM_KIND_STRUCT};
1020 :
1021 : Assert(shmem_request_state == SRS_DONE ||
1022 : shmem_request_state == SRS_INITIALIZING ||
1023 : shmem_request_state == SRS_REQUESTING);
1024 :
1025 0 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
1026 :
1027 : /*
1028 : * During postmaster startup, look up the existing entry if any.
1029 : */
1030 0 : *foundPtr = false;
1031 0 : if (IsUnderPostmaster)
1032 0 : *foundPtr = AttachShmemIndexEntry(&request, true);
1033 :
1034 : /* Initialize it if not found */
1035 0 : if (!*foundPtr)
1036 0 : InitShmemIndexEntry(&request);
1037 :
1038 0 : LWLockRelease(ShmemIndexLock);
1039 :
1040 : Assert(ptr != NULL);
1041 0 : return ptr;
1042 : }
1043 :
1044 : /*
1045 : * Add two Size values, checking for overflow
1046 : */
1047 : Size
1048 499320 : add_size(Size s1, Size s2)
1049 : {
1050 : Size result;
1051 :
1052 499320 : if (pg_add_size_overflow(s1, s2, &result))
1053 0 : ereport(ERROR,
1054 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1055 : errmsg("requested shared memory size overflows size_t")));
1056 499320 : return result;
1057 : }
1058 :
1059 : /*
1060 : * Multiply two Size values, checking for overflow
1061 : */
1062 : Size
1063 124946 : mul_size(Size s1, Size s2)
1064 : {
1065 : Size result;
1066 :
1067 124946 : if (pg_mul_size_overflow(s1, s2, &result))
1068 0 : ereport(ERROR,
1069 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1070 : errmsg("requested shared memory size overflows size_t")));
1071 124946 : return result;
1072 : }
1073 :
1074 : /* SQL SRF showing allocated shared memory */
1075 : Datum
1076 4 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
1077 : {
1078 : #define PG_GET_SHMEM_SIZES_COLS 4
1079 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1080 : HASH_SEQ_STATUS hstat;
1081 : ShmemIndexEnt *ent;
1082 4 : Size named_allocated = 0;
1083 : Datum values[PG_GET_SHMEM_SIZES_COLS];
1084 : bool nulls[PG_GET_SHMEM_SIZES_COLS];
1085 :
1086 4 : InitMaterializedSRF(fcinfo, 0);
1087 :
1088 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1089 :
1090 4 : hash_seq_init(&hstat, ShmemIndex);
1091 :
1092 : /* output all allocated entries */
1093 4 : memset(nulls, 0, sizeof(nulls));
1094 322 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1095 : {
1096 318 : values[0] = CStringGetTextDatum(ent->key);
1097 318 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
1098 318 : values[2] = Int64GetDatum(ent->size);
1099 318 : values[3] = Int64GetDatum(ent->allocated_size);
1100 318 : named_allocated += ent->allocated_size;
1101 :
1102 318 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1103 : values, nulls);
1104 : }
1105 :
1106 : /* output shared memory allocated but not counted via the shmem index */
1107 4 : values[0] = CStringGetTextDatum("<anonymous>");
1108 4 : nulls[1] = true;
1109 4 : values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
1110 4 : values[3] = values[2];
1111 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1112 :
1113 : /* output as-of-yet unused shared memory */
1114 4 : nulls[0] = true;
1115 4 : values[1] = Int64GetDatum(ShmemAllocator->free_offset);
1116 4 : nulls[1] = false;
1117 4 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
1118 4 : values[3] = values[2];
1119 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1120 :
1121 4 : LWLockRelease(ShmemIndexLock);
1122 :
1123 4 : return (Datum) 0;
1124 : }
1125 :
1126 : /*
1127 : * SQL SRF showing NUMA memory nodes for allocated shared memory
1128 : *
1129 : * Compared to pg_get_shmem_allocations(), this function does not return
1130 : * information about shared anonymous allocations and unused shared memory.
1131 : */
1132 : Datum
1133 4 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
1134 : {
1135 : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
1136 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1137 : HASH_SEQ_STATUS hstat;
1138 : ShmemIndexEnt *ent;
1139 : Datum values[PG_GET_SHMEM_NUMA_SIZES_COLS];
1140 : bool nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
1141 : Size os_page_size;
1142 : void **page_ptrs;
1143 : int *pages_status;
1144 : uint64 shm_total_page_count,
1145 : shm_ent_page_count,
1146 : max_nodes;
1147 : Size *nodes;
1148 :
1149 4 : if (pg_numa_init() == -1)
1150 4 : elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
1151 :
1152 0 : InitMaterializedSRF(fcinfo, 0);
1153 :
1154 0 : max_nodes = pg_numa_get_max_node();
1155 0 : nodes = palloc_array(Size, max_nodes + 2);
1156 :
1157 : /*
1158 : * Shared memory allocations can vary in size and may not align with OS
1159 : * memory page boundaries, while NUMA queries work on pages.
1160 : *
1161 : * To correctly map each allocation to NUMA nodes, we need to: 1.
1162 : * Determine the OS memory page size. 2. Align each allocation's start/end
1163 : * addresses to page boundaries. 3. Query NUMA node information for all
1164 : * pages spanning the allocation.
1165 : */
1166 0 : os_page_size = pg_get_shmem_pagesize();
1167 :
1168 : /*
1169 : * Allocate memory for page pointers and status based on total shared
1170 : * memory size. This simplified approach allocates enough space for all
1171 : * pages in shared memory rather than calculating the exact requirements
1172 : * for each segment.
1173 : *
1174 : * Add 1, because we don't know how exactly the segments align to OS
1175 : * pages, so the allocation might use one more memory page. In practice
1176 : * this is not very likely, and moreover we have more entries, each of
1177 : * them using only fraction of the total pages.
1178 : */
1179 0 : shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
1180 0 : page_ptrs = palloc0_array(void *, shm_total_page_count);
1181 0 : pages_status = palloc_array(int, shm_total_page_count);
1182 :
1183 0 : if (firstNumaTouch)
1184 0 : elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
1185 :
1186 0 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1187 :
1188 0 : hash_seq_init(&hstat, ShmemIndex);
1189 :
1190 : /* output all allocated entries */
1191 0 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1192 : {
1193 : int i;
1194 : char *startptr,
1195 : *endptr;
1196 : Size total_len;
1197 :
1198 : /*
1199 : * Calculate the range of OS pages used by this segment. The segment
1200 : * may start / end half-way through a page, we want to count these
1201 : * pages too. So we align the start/end pointers down/up, and then
1202 : * calculate the number of pages from that.
1203 : */
1204 0 : startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
1205 0 : endptr = (char *) TYPEALIGN(os_page_size,
1206 : (char *) ent->location + ent->allocated_size);
1207 0 : total_len = (endptr - startptr);
1208 :
1209 0 : shm_ent_page_count = total_len / os_page_size;
1210 :
1211 : /*
1212 : * If we ever get 0xff (-1) back from kernel inquiry, then we probably
1213 : * have a bug in mapping buffers to OS pages.
1214 : */
1215 0 : memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
1216 :
1217 : /*
1218 : * Setup page_ptrs[] with pointers to all OS pages for this segment,
1219 : * and get the NUMA status using pg_numa_query_pages.
1220 : *
1221 : * In order to get reliable results we also need to touch memory
1222 : * pages, so that inquiry about NUMA memory node doesn't return -2
1223 : * (ENOENT, which indicates unmapped/unallocated pages).
1224 : */
1225 0 : for (i = 0; i < shm_ent_page_count; i++)
1226 : {
1227 0 : page_ptrs[i] = startptr + (i * os_page_size);
1228 :
1229 0 : if (firstNumaTouch)
1230 : pg_numa_touch_mem_if_required(page_ptrs[i]);
1231 :
1232 0 : CHECK_FOR_INTERRUPTS();
1233 : }
1234 :
1235 0 : if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
1236 0 : elog(ERROR, "failed NUMA pages inquiry status: %m");
1237 :
1238 : /* Count number of NUMA nodes used for this shared memory entry */
1239 0 : memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
1240 :
1241 0 : for (i = 0; i < shm_ent_page_count; i++)
1242 : {
1243 0 : int s = pages_status[i];
1244 :
1245 : /* Ensure we are adding only valid index to the array */
1246 0 : if (s >= 0 && s <= max_nodes)
1247 : {
1248 : /* valid NUMA node */
1249 0 : nodes[s]++;
1250 0 : continue;
1251 : }
1252 0 : else if (s == -2)
1253 : {
1254 : /* -2 means ENOENT (e.g. page was moved to swap) */
1255 0 : nodes[max_nodes + 1]++;
1256 0 : continue;
1257 : }
1258 :
1259 0 : elog(ERROR, "invalid NUMA node id outside of allowed range "
1260 : "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
1261 : }
1262 :
1263 : /* no NULLs for regular nodes */
1264 0 : memset(nulls, 0, sizeof(nulls));
1265 :
1266 : /*
1267 : * Add one entry for each NUMA node, including those without allocated
1268 : * memory for this segment.
1269 : */
1270 0 : for (i = 0; i <= max_nodes; i++)
1271 : {
1272 0 : values[0] = CStringGetTextDatum(ent->key);
1273 0 : values[1] = Int32GetDatum(i);
1274 0 : values[2] = Int64GetDatum(nodes[i] * os_page_size);
1275 :
1276 0 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1277 : values, nulls);
1278 : }
1279 :
1280 : /* The last entry is used for pages without a NUMA node. */
1281 0 : nulls[1] = true;
1282 0 : values[0] = CStringGetTextDatum(ent->key);
1283 0 : values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
1284 :
1285 0 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1286 : values, nulls);
1287 : }
1288 :
1289 0 : LWLockRelease(ShmemIndexLock);
1290 0 : firstNumaTouch = false;
1291 :
1292 0 : return (Datum) 0;
1293 : }
1294 :
1295 : /*
1296 : * Determine the memory page size used for the shared memory segment.
1297 : *
1298 : * If the shared segment was allocated using huge pages, returns the size of
1299 : * a huge page. Otherwise returns the size of regular memory page.
1300 : *
1301 : * This should be used only after the server is started.
1302 : */
1303 : Size
1304 2 : pg_get_shmem_pagesize(void)
1305 : {
1306 : Size os_page_size;
1307 : #ifdef WIN32
1308 : SYSTEM_INFO sysinfo;
1309 :
1310 : GetSystemInfo(&sysinfo);
1311 : os_page_size = sysinfo.dwPageSize;
1312 : #else
1313 2 : os_page_size = sysconf(_SC_PAGESIZE);
1314 : #endif
1315 :
1316 : Assert(IsUnderPostmaster);
1317 : Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
1318 :
1319 2 : if (huge_pages_status == HUGE_PAGES_ON)
1320 0 : GetHugePageSize(&os_page_size, NULL);
1321 :
1322 2 : return os_page_size;
1323 : }
1324 :
1325 : Datum
1326 5 : pg_numa_available(PG_FUNCTION_ARGS)
1327 : {
1328 5 : PG_RETURN_BOOL(pg_numa_init() != -1);
1329 : }
|