Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * shmem.c
4 : * create shared memory and initialize shared memory data structures.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/shmem.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * POSTGRES processes share one or more regions of shared memory.
17 : * The shared memory is created by a postmaster and is inherited
18 : * by each backend via fork() (or, in some ports, via other OS-specific
19 : * methods). The routines in this file are used for allocating and
20 : * binding to shared memory data structures.
21 : *
22 : * This module provides facilities to allocate fixed-size structures in shared
23 : * memory, for things like variables shared between all backend processes.
24 : * Each such structure has a string name to identify it, specified when it is
25 : * requested. shmem_hash.c provides a shared hash table implementation on top
26 : * of that.
27 : *
28 : * Shared memory areas should usually not be allocated after postmaster
29 : * startup, although we do allow small allocations later for the benefit of
30 : * extension modules that are loaded after startup. Despite that allowance,
31 : * extensions that need shared memory should be added in
32 : * shared_preload_libraries, because the allowance is quite small and there is
33 : * no guarantee that any memory is available after startup.
34 : *
35 : * Nowadays, there is also another way to allocate shared memory called
36 : * Dynamic Shared Memory. See dsm.c for that facility. One big difference
37 : * between traditional shared memory handled by shmem.c and dynamic shared
38 : * memory is that traditional shared memory areas are mapped to the same
39 : * address in all processes, so you can use normal pointers in shared memory
40 : * structs. With Dynamic Shared Memory, you must use offsets or DSA pointers
41 : * instead.
42 : *
43 : * Shared memory managed by shmem.c can never be freed, once allocated. Each
44 : * hash table has its own free list, so hash buckets can be reused when an
45 : * item is deleted.
46 : *
47 : * Usage
48 : * -----
49 : *
50 : * To allocate shared memory, you need to register a set of callback functions
51 : * which handle the lifecycle of the allocation. In the request_fn callback,
52 : * call ShmemRequestStruct() with the desired name and size. When the area is
53 : * later allocated or attached to, the global variable pointed to by the .ptr
54 : * option is set to the shared memory location of the allocation. The init_fn
55 : * callback can perform additional initialization.
56 : *
57 : * typedef struct MyShmemData {
58 : * ...
59 : * } MyShmemData;
60 : *
61 : * static MyShmemData *MyShmem;
62 : *
63 : * static void my_shmem_request(void *arg);
64 : * static void my_shmem_init(void *arg);
65 : *
66 : * const ShmemCallbacks MyShmemCallbacks = {
67 : * .request_fn = my_shmem_request,
68 : * .init_fn = my_shmem_init,
69 : * };
70 : *
71 : * static void
72 : * my_shmem_request(void *arg)
73 : * {
74 : * ShmemRequestStruct(.name = "My shmem area",
75 : * .size = sizeof(MyShmemData),
76 : * .ptr = (void **) &MyShmem,
77 : * );
78 : * }
79 : *
80 : * In builtin PostgreSQL code, add the callbacks to the list in
81 : * src/include/storage/subsystemlist.h. In an add-in module, you can register
82 : * the callbacks by calling RegisterShmemCallbacks(&MyShmemCallbacks) in the
83 : * extension's _PG_init() function.
84 : *
85 : * Lifecycle
86 : * ---------
87 : *
88 : * Initializing shared memory happens in multiple phases. In the first phase,
89 : * during postmaster startup, all the request_fn callbacks are called. Only
90 : * after all the request_fn callbacks have been called and all the shmem areas
91 : * have been requested by the ShmemRequestStruct() calls we know how much
92 : * shared memory we need in total. After that, postmaster allocates global
93 : * shared memory segment, and calls all the init_fn callbacks to initialize
94 : * all the requested shmem areas.
95 : *
96 : * In standard Unix-ish environments, individual backends do not need to
97 : * re-establish their local pointers into shared memory, because they inherit
98 : * correct values of those variables via fork() from the postmaster. However,
99 : * this does not work in the EXEC_BACKEND case. In ports using EXEC_BACKEND,
100 : * backend startup also calls the shmem_request callbacks to re-establish the
101 : * knowledge about each shared memory area, sets the pointer variables
102 : * (*options->ptr), and calls the attach_fn callback, if any, for additional
103 : * per-backend setup.
104 : *
105 : * Legacy ShmemInitStruct()/ShmemInitHash() functions
106 : * --------------------------------------------------
107 : *
108 : * ShmemInitStruct()/ShmemInitHash() is another way of registering shmem
109 : * areas. It pre-dates the ShmemRequestStruct()/ShmemRequestHash() functions,
110 : * and should not be used in new code, but as of this writing it is still
111 : * widely used in extensions.
112 : *
113 : * To allocate a shmem area with ShmemInitStruct(), you need to separately
114 : * register the size needed for the area by calling RequestAddinShmemSpace()
115 : * from the extension's shmem_request_hook, and allocate the area by calling
116 : * ShmemInitStruct() from the extension's shmem_startup_hook. There are no
117 : * init/attach callbacks. Instead, the caller of ShmemInitStruct() must check
118 : * the return status of ShmemInitStruct() and initialize the struct if it was
119 : * not previously initialized.
120 : *
121 : * Calling ShmemAlloc() directly
122 : * -----------------------------
123 : *
124 : * There's a more low-level way of allocating shared memory too: you can call
125 : * ShmemAlloc() directly. It's used to implement the higher level mechanisms,
126 : * and should generally not be called directly.
127 : */
128 :
129 : #include "postgres.h"
130 :
131 : #include <unistd.h>
132 :
133 : #include "access/slru.h"
134 : #include "fmgr.h"
135 : #include "funcapi.h"
136 : #include "miscadmin.h"
137 : #include "port/pg_bitutils.h"
138 : #include "port/pg_numa.h"
139 : #include "storage/lwlock.h"
140 : #include "storage/pg_shmem.h"
141 : #include "storage/shmem.h"
142 : #include "storage/shmem_internal.h"
143 : #include "storage/spin.h"
144 : #include "utils/builtins.h"
145 : #include "utils/tuplestore.h"
146 :
147 : /*
148 : * Registered callbacks.
149 : *
150 : * During postmaster startup, we accumulate the callbacks from all subsystems
151 : * in this list.
152 : *
153 : * This is in process private memory, although on Unix-like systems, we expect
154 : * all the registrations to happen at postmaster startup time and be inherited
155 : * by all the child processes via fork().
156 : */
157 : static List *registered_shmem_callbacks;
158 :
159 : /*
160 : * In the shmem request phase, all the shmem areas requested with the
161 : * ShmemRequest*() functions are accumulated here.
162 : */
163 : typedef struct
164 : {
165 : ShmemStructOpts *options;
166 : ShmemRequestKind kind;
167 : } ShmemRequest;
168 :
169 : static List *pending_shmem_requests;
170 :
171 : /*
172 : * Per-process state machine, for sanity checking that we do things in the
173 : * right order.
174 : *
175 : * Postmaster:
176 : * INITIAL -> REQUESTING -> INITIALIZING -> DONE
177 : *
178 : * Backends in EXEC_BACKEND mode:
179 : * INITIAL -> REQUESTING -> ATTACHING -> DONE
180 : *
181 : * Late request:
182 : * DONE -> REQUESTING -> AFTER_STARTUP_ATTACH_OR_INIT -> DONE
183 : */
184 : enum shmem_request_state
185 : {
186 : /* Initial state */
187 : SRS_INITIAL,
188 :
189 : /*
190 : * When we start calling the shmem_request callbacks, we enter the
191 : * SRS_REQUESTING phase. All ShmemRequestStruct calls happen in this
192 : * state.
193 : */
194 : SRS_REQUESTING,
195 :
196 : /*
197 : * Postmaster has finished all shmem requests, and is now initializing the
198 : * shared memory segment. init_fn callbacks are called in this state.
199 : */
200 : SRS_INITIALIZING,
201 :
202 : /*
203 : * A postmaster child process is starting up. attach_fn callbacks are
204 : * called in this state.
205 : */
206 : SRS_ATTACHING,
207 :
208 : /* An after-startup allocation or attachment is in progress */
209 : SRS_AFTER_STARTUP_ATTACH_OR_INIT,
210 :
211 : /* Normal state after shmem initialization / attachment */
212 : SRS_DONE,
213 : };
214 : static enum shmem_request_state shmem_request_state = SRS_INITIAL;
215 :
216 : /*
217 : * This is the first data structure stored in the shared memory segment, at
218 : * the offset that PGShmemHeader->content_offset points to. Allocations by
219 : * ShmemAlloc() are carved out of the space after this.
220 : *
221 : * For the base pointer and the total size of the shmem segment, we rely on
222 : * the PGShmemHeader.
223 : */
224 : typedef struct ShmemAllocatorData
225 : {
226 : Size free_offset; /* offset to first free space from ShmemBase */
227 :
228 : /* protects 'free_offset' */
229 : slock_t shmem_lock;
230 :
231 : HASHHDR *index; /* location of ShmemIndex */
232 : size_t index_size; /* size of shmem region holding ShmemIndex */
233 : LWLock index_lock; /* protects ShmemIndex */
234 : } ShmemAllocatorData;
235 :
236 : #define ShmemIndexLock (&ShmemAllocator->index_lock)
237 :
238 : static void *ShmemAllocRaw(Size size, Size alignment, Size *allocated_size);
239 :
240 : /* shared memory global variables */
241 :
242 : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
243 : static void *ShmemBase; /* start address of shared memory */
244 : static void *ShmemEnd; /* end+1 address of shared memory */
245 :
246 : static ShmemAllocatorData *ShmemAllocator;
247 :
248 : /*
249 : * ShmemIndex is a global directory of shmem areas, itself also stored in the
250 : * shared memory.
251 : */
252 : static HTAB *ShmemIndex;
253 :
254 : /* max size of data structure string name */
255 : #define SHMEM_INDEX_KEYSIZE (48)
256 :
257 : /*
258 : * # of additional entries to reserve in the shmem index table, for
259 : * allocations after postmaster startup. (This is not a hard limit, the hash
260 : * table can grow larger than that if there is shared memory available)
261 : */
262 : #define SHMEM_INDEX_ADDITIONAL_SIZE (128)
263 :
264 : /* this is a hash bucket in the shmem index table */
265 : typedef struct
266 : {
267 : char key[SHMEM_INDEX_KEYSIZE]; /* string name */
268 : void *location; /* location in shared mem */
269 : Size size; /* # bytes requested for the structure */
270 : Size allocated_size; /* # bytes actually allocated */
271 : } ShmemIndexEnt;
272 :
273 : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
274 : static bool firstNumaTouch = true;
275 :
276 : static void CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks);
277 : static void InitShmemIndexEntry(ShmemRequest *request);
278 : static bool AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok);
279 :
280 : Datum pg_numa_available(PG_FUNCTION_ARGS);
281 :
282 : /*
283 : * ShmemRequestStruct() --- request a named shared memory area
284 : *
285 : * Subsystems call this to register their shared memory needs. This is
286 : * usually done early in postmaster startup, before the shared memory segment
287 : * has been created, so that the size can be included in the estimate for
288 : * total amount of shared memory needed. We set aside a small amount of
289 : * memory for allocations that happen later, for the benefit of non-preloaded
290 : * extensions, but that should not be relied upon.
291 : *
292 : * This does not yet allocate the memory, but merely registers the need for
293 : * it. The actual allocation happens later in the postmaster startup
294 : * sequence.
295 : *
296 : * This must be called from a shmem_request callback function, registered with
297 : * RegisterShmemCallbacks(). This enforces a coding pattern that works the
298 : * same in normal Unix systems and with EXEC_BACKEND. On Unix systems, the
299 : * shmem_request callbacks are called once, early in postmaster startup, and
300 : * the child processes inherit the struct descriptors and any other
301 : * per-process state from the postmaster. In EXEC_BACKEND mode, shmem_request
302 : * callbacks are *also* called in each backend, at backend startup, to
303 : * re-establish the struct descriptors. By calling the same function in both
304 : * cases, we ensure that all the shmem areas are registered the same way in
305 : * all processes.
306 : *
307 : * 'options' defines the name and size of the area, and any other optional
308 : * features. Leave unused options as zeros. The options are copied to
309 : * longer-lived memory, so it doesn't need to live after the
310 : * ShmemRequestStruct() call and can point to a local variable in the calling
311 : * function. The 'name' must point to a long-lived string though, only the
312 : * pointer to it is copied.
313 : */
314 : void
315 78748 : ShmemRequestStructWithOpts(const ShmemStructOpts *options)
316 : {
317 : ShmemStructOpts *options_copy;
318 :
319 78748 : options_copy = MemoryContextAlloc(TopMemoryContext,
320 : sizeof(ShmemStructOpts));
321 78748 : memcpy(options_copy, options, sizeof(ShmemStructOpts));
322 :
323 78748 : ShmemRequestInternal(options_copy, SHMEM_KIND_STRUCT);
324 78748 : }
325 :
326 : /*
327 : * Internal workhorse of ShmemRequestStruct() and ShmemRequestHash().
328 : *
329 : * Note: Unlike in the public ShmemRequestStruct() and ShmemRequestHash()
330 : * functions, 'options' is *not* copied. It must be allocated in
331 : * TopMemoryContext by the caller, and will be freed after the init/attach
332 : * callbacks have been called. This allows ShmemRequestHash() to pass a
333 : * pointer to the extended ShmemHashOpts struct instead.
334 : */
335 : void
336 97513 : ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
337 : {
338 : ShmemRequest *request;
339 :
340 : /* Check the options */
341 97513 : if (options->name == NULL)
342 0 : elog(ERROR, "shared memory request is missing 'name' option");
343 :
344 97513 : if (IsUnderPostmaster)
345 : {
346 3 : if (options->size <= 0 && options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
347 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
348 : options->size, options->name);
349 : }
350 : else
351 : {
352 97510 : if (options->size == SHMEM_ATTACH_UNKNOWN_SIZE)
353 0 : elog(ERROR, "SHMEM_ATTACH_UNKNOWN_SIZE cannot be used during startup");
354 97510 : if (options->size <= 0)
355 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
356 : options->size, options->name);
357 : }
358 :
359 97513 : if (options->alignment != 0 && pg_nextpower2_size_t(options->alignment) != options->alignment)
360 0 : elog(ERROR, "invalid alignment %zu for shared memory request for \"%s\"",
361 : options->alignment, options->name);
362 :
363 : /* Check that we're in the right state */
364 97513 : if (shmem_request_state != SRS_REQUESTING)
365 0 : elog(ERROR, "ShmemRequestStruct can only be called from a shmem_request callback");
366 :
367 : /* Check that it's not already registered in this process */
368 3949603 : foreach_ptr(ShmemRequest, existing, pending_shmem_requests)
369 : {
370 3754577 : if (strcmp(existing->options->name, options->name) == 0)
371 0 : ereport(ERROR,
372 : (errmsg("shared memory struct \"%s\" is already registered",
373 : options->name)));
374 : }
375 :
376 : /* Request looks valid, remember it */
377 97513 : request = palloc(sizeof(ShmemRequest));
378 97513 : request->options = options;
379 97513 : request->kind = kind;
380 97513 : pending_shmem_requests = lappend(pending_shmem_requests, request);
381 97513 : }
382 :
383 : /*
384 : * ShmemGetRequestedSize() --- estimate the total size of all registered shared
385 : * memory structures.
386 : *
387 : * This is called at postmaster startup, before the shared memory segment has
388 : * been created.
389 : */
390 : size_t
391 2323 : ShmemGetRequestedSize(void)
392 : {
393 : size_t size;
394 :
395 : /* memory needed for the ShmemIndex */
396 2323 : size = hash_estimate_size(list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE,
397 : sizeof(ShmemIndexEnt));
398 2323 : size = CACHELINEALIGN(size);
399 :
400 : /* memory needed for all the requested areas */
401 185862 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
402 : {
403 181216 : size_t alignment = request->options->alignment;
404 :
405 : /* pad the start address for alignment like ShmemAllocRaw() does */
406 181216 : if (alignment < PG_CACHE_LINE_SIZE)
407 174247 : alignment = PG_CACHE_LINE_SIZE;
408 181216 : size = TYPEALIGN(alignment, size);
409 :
410 181216 : size = add_size(size, request->options->size);
411 : }
412 :
413 2323 : return size;
414 : }
415 :
416 : /*
417 : * ShmemInitRequested() --- allocate and initialize requested shared memory
418 : * structures.
419 : *
420 : * This is called once at postmaster startup, after the shared memory segment
421 : * has been created.
422 : */
423 : void
424 1247 : ShmemInitRequested(void)
425 : {
426 : /* should be called only by the postmaster or a standalone backend */
427 : Assert(!IsUnderPostmaster);
428 : Assert(shmem_request_state == SRS_INITIALIZING);
429 :
430 : /*
431 : * Initialize the ShmemIndex entries and perform basic initialization of
432 : * all the requested memory areas. There are no concurrent processes yet,
433 : * so no need for locking.
434 : */
435 99770 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
436 : {
437 97276 : InitShmemIndexEntry(request);
438 97276 : pfree(request->options);
439 : }
440 1247 : list_free_deep(pending_shmem_requests);
441 1247 : pending_shmem_requests = NIL;
442 :
443 : /*
444 : * Call the subsystem-specific init callbacks to finish initialization of
445 : * all the areas.
446 : */
447 57388 : foreach_ptr(const ShmemCallbacks, callbacks, registered_shmem_callbacks)
448 : {
449 54894 : if (callbacks->init_fn)
450 49902 : callbacks->init_fn(callbacks->opaque_arg);
451 : }
452 :
453 1247 : shmem_request_state = SRS_DONE;
454 1247 : }
455 :
456 : /*
457 : * Re-establish process private state related to shmem areas.
458 : *
459 : * This is called at backend startup in EXEC_BACKEND mode, in every backend.
460 : */
461 : #ifdef EXEC_BACKEND
462 : void
463 : ShmemAttachRequested(void)
464 : {
465 : ListCell *lc;
466 :
467 : /* Must be initializing a (non-standalone) backend */
468 : Assert(IsUnderPostmaster);
469 : Assert(ShmemAllocator->index != NULL);
470 : Assert(shmem_request_state == SRS_REQUESTING);
471 : shmem_request_state = SRS_ATTACHING;
472 :
473 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
474 :
475 : /*
476 : * Attach to all the requested memory areas.
477 : */
478 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
479 : {
480 : AttachShmemIndexEntry(request, false);
481 : pfree(request->options);
482 : }
483 : list_free_deep(pending_shmem_requests);
484 : pending_shmem_requests = NIL;
485 :
486 : /* Call attach callbacks */
487 : foreach(lc, registered_shmem_callbacks)
488 : {
489 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
490 :
491 : if (callbacks->attach_fn)
492 : callbacks->attach_fn(callbacks->opaque_arg);
493 : }
494 :
495 : LWLockRelease(ShmemIndexLock);
496 :
497 : shmem_request_state = SRS_DONE;
498 : }
499 : #endif
500 :
501 : /*
502 : * Insert requested shmem area into the shared memory index and initialize it.
503 : *
504 : * Note that this only does performs basic initialization depending on
505 : * ShmemRequestKind, like setting the global pointer variable to the area for
506 : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
507 : * This does *not* call the subsystem-specific init callbacks. That's done
508 : * later after all the shmem areas have been initialized or attached to.
509 : */
510 : static void
511 97277 : InitShmemIndexEntry(ShmemRequest *request)
512 : {
513 97277 : const char *name = request->options->name;
514 : ShmemIndexEnt *index_entry;
515 : bool found;
516 : size_t allocated_size;
517 : void *structPtr;
518 :
519 : /* look it up in the shmem index */
520 : index_entry = (ShmemIndexEnt *)
521 97277 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, &found);
522 97277 : if (found)
523 0 : elog(ERROR, "shared memory struct \"%s\" is already initialized", name);
524 97277 : if (!index_entry)
525 : {
526 : /* tried to add it to the hash table, but there was no space */
527 0 : ereport(ERROR,
528 : (errcode(ERRCODE_OUT_OF_MEMORY),
529 : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
530 : name)));
531 : }
532 :
533 : /*
534 : * We inserted the entry to the shared memory index. Allocate requested
535 : * amount of shared memory for it, and initialize the index entry.
536 : */
537 97277 : structPtr = ShmemAllocRaw(request->options->size,
538 97277 : request->options->alignment,
539 : &allocated_size);
540 97277 : if (structPtr == NULL)
541 : {
542 : /* out of memory; remove the failed ShmemIndex entry */
543 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
544 0 : ereport(ERROR,
545 : (errcode(ERRCODE_OUT_OF_MEMORY),
546 : errmsg("not enough shared memory for data structure"
547 : " \"%s\" (%zd bytes requested)",
548 : name, request->options->size)));
549 : }
550 97277 : index_entry->size = request->options->size;
551 97277 : index_entry->allocated_size = allocated_size;
552 97277 : index_entry->location = structPtr;
553 :
554 : /* Initialize depending on the kind of shmem area it is */
555 97277 : switch (request->kind)
556 : {
557 78557 : case SHMEM_KIND_STRUCT:
558 78557 : if (request->options->ptr)
559 78557 : *(request->options->ptr) = index_entry->location;
560 78557 : break;
561 9987 : case SHMEM_KIND_HASH:
562 9987 : shmem_hash_init(structPtr, request->options);
563 9987 : break;
564 8733 : case SHMEM_KIND_SLRU:
565 8733 : shmem_slru_init(structPtr, request->options);
566 8733 : break;
567 : }
568 97277 : }
569 :
570 : /*
571 : * Look up a named shmem area in the shared memory index and attach to it.
572 : *
573 : * Note that this only performs the basic attachment actions depending on
574 : * ShmemRequestKind, like setting the global pointer variable to the area for
575 : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
576 : * This does *not* call the subsystem-specific attach callbacks. That's done
577 : * later after all the shmem areas have been initialized or attached to.
578 : */
579 : static bool
580 2 : AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok)
581 : {
582 2 : const char *name = request->options->name;
583 : ShmemIndexEnt *index_entry;
584 :
585 : /* Look it up in the shmem index */
586 : index_entry = (ShmemIndexEnt *)
587 2 : hash_search(ShmemIndex, name, HASH_FIND, NULL);
588 2 : if (!index_entry)
589 : {
590 0 : if (!missing_ok)
591 0 : ereport(ERROR,
592 : (errmsg("could not find ShmemIndex entry for data structure \"%s\"",
593 : request->options->name)));
594 0 : return false;
595 : }
596 :
597 : /* Check that the size in the index matches the request */
598 2 : if (index_entry->size != request->options->size &&
599 0 : request->options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
600 : {
601 0 : ereport(ERROR,
602 : (errmsg("shared memory struct \"%s\" was created with"
603 : " different size: existing %zu, requested %zd",
604 : name, index_entry->size, request->options->size)));
605 : }
606 :
607 : /*
608 : * Re-establish the caller's pointer variable, or do other actions to
609 : * attach depending on the kind of shmem area it is.
610 : */
611 2 : switch (request->kind)
612 : {
613 2 : case SHMEM_KIND_STRUCT:
614 2 : if (request->options->ptr)
615 2 : *(request->options->ptr) = index_entry->location;
616 2 : break;
617 0 : case SHMEM_KIND_HASH:
618 0 : shmem_hash_attach(index_entry->location, request->options);
619 0 : break;
620 0 : case SHMEM_KIND_SLRU:
621 0 : shmem_slru_attach(index_entry->location, request->options);
622 0 : break;
623 : }
624 :
625 2 : return true;
626 : }
627 :
628 : /*
629 : * InitShmemAllocator() --- set up basic pointers to shared memory.
630 : *
631 : * Called at postmaster or stand-alone backend startup, to initialize the
632 : * allocator's data structure in the shared memory segment. In EXEC_BACKEND,
633 : * this is also called at backend startup, to set up pointers to the
634 : * already-initialized data structure.
635 : */
636 : void
637 1247 : InitShmemAllocator(PGShmemHeader *seghdr)
638 : {
639 : Size offset;
640 : int64 hash_nelems;
641 : HASHCTL info;
642 : int hash_flags;
643 :
644 : #ifndef EXEC_BACKEND
645 : Assert(!IsUnderPostmaster);
646 : #endif
647 : Assert(seghdr != NULL);
648 :
649 1247 : if (IsUnderPostmaster)
650 : {
651 : Assert(shmem_request_state == SRS_INITIAL);
652 : }
653 : else
654 : {
655 : Assert(shmem_request_state == SRS_REQUESTING);
656 1247 : shmem_request_state = SRS_INITIALIZING;
657 : }
658 :
659 : /*
660 : * We assume the pointer and offset are MAXALIGN. Not a hard requirement,
661 : * but it's true today and keeps the math below simpler.
662 : */
663 : Assert(seghdr == (void *) MAXALIGN(seghdr));
664 : Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
665 :
666 : /*
667 : * Allocations after this point should go through ShmemAlloc, which
668 : * expects to allocate everything on cache line boundaries. Make sure the
669 : * first allocation begins on a cache line boundary.
670 : */
671 1247 : offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
672 1247 : if (offset > seghdr->totalsize)
673 0 : ereport(ERROR,
674 : (errcode(ERRCODE_OUT_OF_MEMORY),
675 : errmsg("out of shared memory (%zu bytes requested)",
676 : offset)));
677 :
678 : /*
679 : * In postmaster or stand-alone backend, initialize the shared memory
680 : * allocator so that we can allocate shared memory for ShmemIndex using
681 : * ShmemAlloc(). In a regular backend just set up the pointers required
682 : * by ShmemAlloc().
683 : */
684 1247 : ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
685 1247 : if (!IsUnderPostmaster)
686 : {
687 1247 : SpinLockInit(&ShmemAllocator->shmem_lock);
688 1247 : ShmemAllocator->free_offset = offset;
689 1247 : LWLockInitialize(&ShmemAllocator->index_lock, LWTRANCHE_SHMEM_INDEX);
690 : }
691 :
692 1247 : ShmemSegHdr = seghdr;
693 1247 : ShmemBase = seghdr;
694 1247 : ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
695 :
696 : /*
697 : * Create (or attach to) the shared memory index of shmem areas.
698 : *
699 : * This is the same initialization as ShmemInitHash() does, but we cannot
700 : * use ShmemInitHash() here because it relies on ShmemIndex being already
701 : * initialized.
702 : */
703 1247 : hash_nelems = list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE;
704 :
705 1247 : info.keysize = SHMEM_INDEX_KEYSIZE;
706 1247 : info.entrysize = sizeof(ShmemIndexEnt);
707 1247 : hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
708 :
709 1247 : if (!IsUnderPostmaster)
710 : {
711 1247 : ShmemAllocator->index_size = hash_estimate_size(hash_nelems, info.entrysize);
712 1247 : ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
713 : }
714 2494 : ShmemIndex = shmem_hash_create(ShmemAllocator->index,
715 1247 : ShmemAllocator->index_size,
716 : IsUnderPostmaster,
717 : "ShmemIndex", hash_nelems,
718 : &info, hash_flags);
719 : Assert(ShmemIndex != NULL);
720 :
721 : /*
722 : * Add an entry for ShmemIndex itself into ShmemIndex, so that it's
723 : * visible in the pg_shmem_allocations view
724 : */
725 1247 : if (!IsUnderPostmaster)
726 : {
727 : bool found;
728 : ShmemIndexEnt *result = (ShmemIndexEnt *)
729 1247 : hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
730 :
731 : Assert(!found);
732 1247 : result->size = ShmemAllocator->index_size;
733 1247 : result->allocated_size = ShmemAllocator->index_size;
734 1247 : result->location = ShmemAllocator->index;
735 : }
736 1247 : }
737 :
738 : /*
739 : * Reset state on postmaster crash restart.
740 : */
741 : void
742 5 : ResetShmemAllocator(void)
743 : {
744 : Assert(!IsUnderPostmaster);
745 5 : shmem_request_state = SRS_INITIAL;
746 :
747 5 : pending_shmem_requests = NIL;
748 :
749 : /*
750 : * Note that we don't clear the registered callbacks. We will need to
751 : * call them again as we restart
752 : */
753 5 : }
754 :
755 : /*
756 : * ShmemAlloc -- allocate max-aligned chunk from shared memory
757 : *
758 : * Throws error if request cannot be satisfied.
759 : *
760 : * Assumes ShmemSegHdr is initialized.
761 : */
762 : void *
763 1247 : ShmemAlloc(Size size)
764 : {
765 : void *newSpace;
766 : Size allocated_size;
767 :
768 1247 : newSpace = ShmemAllocRaw(size, 0, &allocated_size);
769 1247 : if (!newSpace)
770 0 : ereport(ERROR,
771 : (errcode(ERRCODE_OUT_OF_MEMORY),
772 : errmsg("out of shared memory (%zu bytes requested)",
773 : size)));
774 1247 : return newSpace;
775 : }
776 :
777 : /*
778 : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
779 : *
780 : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
781 : */
782 : void *
783 0 : ShmemAllocNoError(Size size)
784 : {
785 : Size allocated_size;
786 :
787 0 : return ShmemAllocRaw(size, 0, &allocated_size);
788 : }
789 :
790 : /*
791 : * ShmemAllocRaw -- allocate align chunk and return allocated size
792 : *
793 : * Also sets *allocated_size to the number of bytes allocated, which will
794 : * be equal to the number requested plus any padding we choose to add.
795 : */
796 : static void *
797 98524 : ShmemAllocRaw(Size size, Size alignment, Size *allocated_size)
798 : {
799 : Size rawStart;
800 : Size newStart;
801 : Size newFree;
802 : void *newSpace;
803 :
804 : /*
805 : * Ensure all space is adequately aligned. We used to only MAXALIGN this
806 : * space but experience has proved that on modern systems that is not good
807 : * enough. Many parts of the system are very sensitive to critical data
808 : * structures getting split across cache line boundaries. To avoid that,
809 : * attempt to align the beginning of the allocation to a cache line
810 : * boundary. The calling code will still need to be careful about how it
811 : * uses the allocated space - e.g. by padding each element in an array of
812 : * structures out to a power-of-two size - but without this, even that
813 : * won't be sufficient.
814 : */
815 98524 : if (alignment < PG_CACHE_LINE_SIZE)
816 94783 : alignment = PG_CACHE_LINE_SIZE;
817 :
818 : Assert(ShmemSegHdr != NULL);
819 :
820 98524 : SpinLockAcquire(&ShmemAllocator->shmem_lock);
821 :
822 98524 : rawStart = ShmemAllocator->free_offset;
823 98524 : newStart = TYPEALIGN(alignment, rawStart);
824 :
825 98524 : newFree = newStart + size;
826 98524 : if (newFree <= ShmemSegHdr->totalsize)
827 : {
828 98524 : newSpace = (char *) ShmemBase + newStart;
829 98524 : ShmemAllocator->free_offset = newFree;
830 : }
831 : else
832 0 : newSpace = NULL;
833 :
834 98524 : SpinLockRelease(&ShmemAllocator->shmem_lock);
835 :
836 : /* note this assert is okay with newSpace == NULL */
837 : Assert(newSpace == (void *) TYPEALIGN(alignment, newSpace));
838 :
839 98524 : *allocated_size = newFree - rawStart;
840 98524 : return newSpace;
841 : }
842 :
843 : /*
844 : * ShmemAddrIsValid -- test if an address refers to shared memory
845 : *
846 : * Returns true if the pointer points within the shared memory segment.
847 : */
848 : bool
849 0 : ShmemAddrIsValid(const void *addr)
850 : {
851 0 : return (addr >= ShmemBase) && (addr < ShmemEnd);
852 : }
853 :
854 : /*
855 : * Register callbacks that define a shared memory area (or multiple areas).
856 : *
857 : * The system will call the callbacks at different stages of postmaster or
858 : * backend startup, to allocate and initialize the area.
859 : *
860 : * This is normally called early during postmaster startup, but if the
861 : * SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP is set, this can also be used after
862 : * startup, although after startup there's no guarantee that there's enough
863 : * shared memory available. When called after startup, this immediately calls
864 : * the right callbacks depending on whether another backend had already
865 : * initialized the area.
866 : *
867 : * Note: In EXEC_BACKEND mode, this needs to be called in every backend
868 : * process. That's needed because we cannot pass down the callback function
869 : * pointers from the postmaster process, because different processes may have
870 : * loaded libraries to different addresses.
871 : */
872 : void
873 55379 : RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
874 : {
875 55379 : if (shmem_request_state == SRS_DONE && IsUnderPostmaster)
876 : {
877 : /*
878 : * After-startup initialization or attachment. Call the appropriate
879 : * callbacks immediately.
880 : */
881 3 : if ((callbacks->flags & SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP) == 0)
882 0 : elog(ERROR, "cannot request shared memory at this time");
883 :
884 3 : CallShmemCallbacksAfterStartup(callbacks);
885 : }
886 : else
887 : {
888 : /* Remember the callbacks for later */
889 55376 : registered_shmem_callbacks = lappend(registered_shmem_callbacks,
890 : (void *) callbacks);
891 : }
892 55379 : }
893 :
894 : /*
895 : * Register a shmem area (or multiple areas) after startup.
896 : */
897 : static void
898 3 : CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks)
899 : {
900 : bool found_any;
901 : bool notfound_any;
902 :
903 : Assert(shmem_request_state == SRS_DONE);
904 3 : shmem_request_state = SRS_REQUESTING;
905 :
906 : /*
907 : * Call the request callback first. The callback makes ShmemRequest*()
908 : * calls for each shmem area, adding them to pending_shmem_requests.
909 : */
910 : Assert(pending_shmem_requests == NIL);
911 3 : if (callbacks->request_fn)
912 3 : callbacks->request_fn(callbacks->opaque_arg);
913 3 : shmem_request_state = SRS_AFTER_STARTUP_ATTACH_OR_INIT;
914 :
915 3 : if (pending_shmem_requests == NIL)
916 : {
917 0 : shmem_request_state = SRS_DONE;
918 0 : return;
919 : }
920 :
921 : /* Hold ShmemIndexLock while we allocate all the shmem entries */
922 3 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
923 :
924 : /*
925 : * Check if the requested shared memory areas have already been
926 : * initialized. We assume all the areas requested by the request callback
927 : * to form a coherent unit such that they're all already initialized or
928 : * none. Otherwise it would be ambiguous which callback, init or attach,
929 : * to callback afterwards.
930 : */
931 3 : found_any = notfound_any = false;
932 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
933 : {
934 3 : if (hash_search(ShmemIndex, request->options->name, HASH_FIND, NULL))
935 2 : found_any = true;
936 : else
937 1 : notfound_any = true;
938 : }
939 3 : if (found_any && notfound_any)
940 0 : elog(ERROR, "found some but not all");
941 :
942 : /*
943 : * Allocate or attach all the shmem areas requested by the request_fn
944 : * callback.
945 : */
946 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
947 : {
948 3 : if (found_any)
949 2 : AttachShmemIndexEntry(request, false);
950 : else
951 1 : InitShmemIndexEntry(request);
952 :
953 3 : pfree(request->options);
954 : }
955 3 : list_free_deep(pending_shmem_requests);
956 3 : pending_shmem_requests = NIL;
957 :
958 : /* Finish by calling the appropriate subsystem-specific callback */
959 3 : if (found_any)
960 : {
961 2 : if (callbacks->attach_fn)
962 2 : callbacks->attach_fn(callbacks->opaque_arg);
963 : }
964 : else
965 : {
966 1 : if (callbacks->init_fn)
967 1 : callbacks->init_fn(callbacks->opaque_arg);
968 : }
969 :
970 3 : LWLockRelease(ShmemIndexLock);
971 3 : shmem_request_state = SRS_DONE;
972 : }
973 :
974 : /*
975 : * Call all shmem request callbacks.
976 : */
977 : void
978 1250 : ShmemCallRequestCallbacks(void)
979 : {
980 : ListCell *lc;
981 :
982 : Assert(shmem_request_state == SRS_INITIAL);
983 1250 : shmem_request_state = SRS_REQUESTING;
984 :
985 56276 : foreach(lc, registered_shmem_callbacks)
986 : {
987 55026 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
988 :
989 55026 : if (callbacks->request_fn)
990 55026 : callbacks->request_fn(callbacks->opaque_arg);
991 : }
992 1250 : }
993 :
994 : /*
995 : * ShmemInitStruct -- Create/attach to a structure in shared memory.
996 : *
997 : * This is called during initialization to find or allocate
998 : * a data structure in shared memory. If no other process
999 : * has created the structure, this routine allocates space
1000 : * for it. If it exists already, a pointer to the existing
1001 : * structure is returned.
1002 : *
1003 : * Returns: pointer to the object. *foundPtr is set true if the object was
1004 : * already in the shmem index (hence, already initialized).
1005 : *
1006 : * Note: This is a legacy interface, kept for backwards compatibility with
1007 : * extensions. Use ShmemRequestStruct() in new code!
1008 : */
1009 : void *
1010 0 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
1011 : {
1012 0 : void *ptr = NULL;
1013 0 : ShmemStructOpts options = {
1014 : .name = name,
1015 : .size = size,
1016 : .ptr = &ptr,
1017 : };
1018 0 : ShmemRequest request = {&options, SHMEM_KIND_STRUCT};
1019 :
1020 : Assert(shmem_request_state == SRS_DONE ||
1021 : shmem_request_state == SRS_INITIALIZING ||
1022 : shmem_request_state == SRS_REQUESTING);
1023 :
1024 0 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
1025 :
1026 : /*
1027 : * During postmaster startup, look up the existing entry if any.
1028 : */
1029 0 : *foundPtr = false;
1030 0 : if (IsUnderPostmaster)
1031 0 : *foundPtr = AttachShmemIndexEntry(&request, true);
1032 :
1033 : /* Initialize it if not found */
1034 0 : if (!*foundPtr)
1035 0 : InitShmemIndexEntry(&request);
1036 :
1037 0 : LWLockRelease(ShmemIndexLock);
1038 :
1039 : Assert(ptr != NULL);
1040 0 : return ptr;
1041 : }
1042 :
1043 : /* SQL SRF showing allocated shared memory */
1044 : Datum
1045 4 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
1046 : {
1047 : #define PG_GET_SHMEM_SIZES_COLS 4
1048 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1049 : HASH_SEQ_STATUS hstat;
1050 : ShmemIndexEnt *ent;
1051 4 : Size named_allocated = 0;
1052 : Datum values[PG_GET_SHMEM_SIZES_COLS];
1053 : bool nulls[PG_GET_SHMEM_SIZES_COLS];
1054 :
1055 4 : InitMaterializedSRF(fcinfo, 0);
1056 :
1057 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1058 :
1059 4 : hash_seq_init(&hstat, ShmemIndex);
1060 :
1061 : /* output all allocated entries */
1062 4 : memset(nulls, 0, sizeof(nulls));
1063 322 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1064 : {
1065 318 : values[0] = CStringGetTextDatum(ent->key);
1066 318 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
1067 318 : values[2] = Int64GetDatum(ent->size);
1068 318 : values[3] = Int64GetDatum(ent->allocated_size);
1069 318 : named_allocated += ent->allocated_size;
1070 :
1071 318 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1072 : values, nulls);
1073 : }
1074 :
1075 : /* output shared memory allocated but not counted via the shmem index */
1076 4 : values[0] = CStringGetTextDatum("<anonymous>");
1077 4 : nulls[1] = true;
1078 4 : values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
1079 4 : values[3] = values[2];
1080 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1081 :
1082 : /* output as-of-yet unused shared memory */
1083 4 : nulls[0] = true;
1084 4 : values[1] = Int64GetDatum(ShmemAllocator->free_offset);
1085 4 : nulls[1] = false;
1086 4 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
1087 4 : values[3] = values[2];
1088 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1089 :
1090 4 : LWLockRelease(ShmemIndexLock);
1091 :
1092 4 : return (Datum) 0;
1093 : }
1094 :
1095 : /*
1096 : * SQL SRF showing NUMA memory nodes for allocated shared memory
1097 : *
1098 : * Compared to pg_get_shmem_allocations(), this function does not return
1099 : * information about shared anonymous allocations and unused shared memory.
1100 : */
1101 : Datum
1102 4 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
1103 : {
1104 : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
1105 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1106 : HASH_SEQ_STATUS hstat;
1107 : ShmemIndexEnt *ent;
1108 : Datum values[PG_GET_SHMEM_NUMA_SIZES_COLS];
1109 : bool nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
1110 : Size os_page_size;
1111 : void **page_ptrs;
1112 : int *pages_status;
1113 : uint64 shm_total_page_count,
1114 : shm_ent_page_count,
1115 : max_nodes;
1116 : Size *nodes;
1117 :
1118 4 : if (pg_numa_init() == -1)
1119 4 : elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
1120 :
1121 0 : InitMaterializedSRF(fcinfo, 0);
1122 :
1123 0 : max_nodes = pg_numa_get_max_node();
1124 0 : nodes = palloc_array(Size, max_nodes + 2);
1125 :
1126 : /*
1127 : * Shared memory allocations can vary in size and may not align with OS
1128 : * memory page boundaries, while NUMA queries work on pages.
1129 : *
1130 : * To correctly map each allocation to NUMA nodes, we need to: 1.
1131 : * Determine the OS memory page size. 2. Align each allocation's start/end
1132 : * addresses to page boundaries. 3. Query NUMA node information for all
1133 : * pages spanning the allocation.
1134 : */
1135 0 : os_page_size = pg_get_shmem_pagesize();
1136 :
1137 : /*
1138 : * Allocate memory for page pointers and status based on total shared
1139 : * memory size. This simplified approach allocates enough space for all
1140 : * pages in shared memory rather than calculating the exact requirements
1141 : * for each segment.
1142 : *
1143 : * Add 1, because we don't know how exactly the segments align to OS
1144 : * pages, so the allocation might use one more memory page. In practice
1145 : * this is not very likely, and moreover we have more entries, each of
1146 : * them using only fraction of the total pages.
1147 : */
1148 0 : shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
1149 0 : page_ptrs = palloc0_array(void *, shm_total_page_count);
1150 0 : pages_status = palloc_array(int, shm_total_page_count);
1151 :
1152 0 : if (firstNumaTouch)
1153 0 : elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
1154 :
1155 0 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1156 :
1157 0 : hash_seq_init(&hstat, ShmemIndex);
1158 :
1159 : /* output all allocated entries */
1160 0 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1161 : {
1162 : int i;
1163 : char *startptr,
1164 : *endptr;
1165 : Size total_len;
1166 :
1167 : /*
1168 : * Calculate the range of OS pages used by this segment. The segment
1169 : * may start / end half-way through a page, we want to count these
1170 : * pages too. So we align the start/end pointers down/up, and then
1171 : * calculate the number of pages from that.
1172 : */
1173 0 : startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
1174 0 : endptr = (char *) TYPEALIGN(os_page_size,
1175 : (char *) ent->location + ent->allocated_size);
1176 0 : total_len = (endptr - startptr);
1177 :
1178 0 : shm_ent_page_count = total_len / os_page_size;
1179 :
1180 : /*
1181 : * If we ever get 0xff (-1) back from kernel inquiry, then we probably
1182 : * have a bug in mapping buffers to OS pages.
1183 : */
1184 0 : memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
1185 :
1186 : /*
1187 : * Setup page_ptrs[] with pointers to all OS pages for this segment,
1188 : * and get the NUMA status using pg_numa_query_pages.
1189 : *
1190 : * In order to get reliable results we also need to touch memory
1191 : * pages, so that inquiry about NUMA memory node doesn't return -2
1192 : * (ENOENT, which indicates unmapped/unallocated pages).
1193 : */
1194 0 : for (i = 0; i < shm_ent_page_count; i++)
1195 : {
1196 0 : page_ptrs[i] = startptr + (i * os_page_size);
1197 :
1198 0 : if (firstNumaTouch)
1199 : pg_numa_touch_mem_if_required(page_ptrs[i]);
1200 :
1201 0 : CHECK_FOR_INTERRUPTS();
1202 : }
1203 :
1204 0 : if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
1205 0 : elog(ERROR, "failed NUMA pages inquiry status: %m");
1206 :
1207 : /* Count number of NUMA nodes used for this shared memory entry */
1208 0 : memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
1209 :
1210 0 : for (i = 0; i < shm_ent_page_count; i++)
1211 : {
1212 0 : int s = pages_status[i];
1213 :
1214 : /* Ensure we are adding only valid index to the array */
1215 0 : if (s >= 0 && s <= max_nodes)
1216 : {
1217 : /* valid NUMA node */
1218 0 : nodes[s]++;
1219 0 : continue;
1220 : }
1221 0 : else if (s == -2)
1222 : {
1223 : /* -2 means ENOENT (e.g. page was moved to swap) */
1224 0 : nodes[max_nodes + 1]++;
1225 0 : continue;
1226 : }
1227 :
1228 0 : elog(ERROR, "invalid NUMA node id outside of allowed range "
1229 : "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
1230 : }
1231 :
1232 : /* no NULLs for regular nodes */
1233 0 : memset(nulls, 0, sizeof(nulls));
1234 :
1235 : /*
1236 : * Add one entry for each NUMA node, including those without allocated
1237 : * memory for this segment.
1238 : */
1239 0 : for (i = 0; i <= max_nodes; i++)
1240 : {
1241 0 : values[0] = CStringGetTextDatum(ent->key);
1242 0 : values[1] = Int32GetDatum(i);
1243 0 : values[2] = Int64GetDatum(nodes[i] * os_page_size);
1244 :
1245 0 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1246 : values, nulls);
1247 : }
1248 :
1249 : /* The last entry is used for pages without a NUMA node. */
1250 0 : nulls[1] = true;
1251 0 : values[0] = CStringGetTextDatum(ent->key);
1252 0 : values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
1253 :
1254 0 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1255 : values, nulls);
1256 : }
1257 :
1258 0 : LWLockRelease(ShmemIndexLock);
1259 0 : firstNumaTouch = false;
1260 :
1261 0 : return (Datum) 0;
1262 : }
1263 :
1264 : /*
1265 : * Determine the memory page size used for the shared memory segment.
1266 : *
1267 : * If the shared segment was allocated using huge pages, returns the size of
1268 : * a huge page. Otherwise returns the size of regular memory page.
1269 : *
1270 : * This should be used only after the server is started.
1271 : */
1272 : Size
1273 2 : pg_get_shmem_pagesize(void)
1274 : {
1275 : Size os_page_size;
1276 : #ifdef WIN32
1277 : SYSTEM_INFO sysinfo;
1278 :
1279 : GetSystemInfo(&sysinfo);
1280 : os_page_size = sysinfo.dwPageSize;
1281 : #else
1282 2 : os_page_size = sysconf(_SC_PAGESIZE);
1283 : #endif
1284 :
1285 : Assert(IsUnderPostmaster);
1286 : Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
1287 :
1288 2 : if (huge_pages_status == HUGE_PAGES_ON)
1289 0 : GetHugePageSize(&os_page_size, NULL);
1290 :
1291 2 : return os_page_size;
1292 : }
1293 :
1294 : Datum
1295 5 : pg_numa_available(PG_FUNCTION_ARGS)
1296 : {
1297 5 : PG_RETURN_BOOL(pg_numa_init() != -1);
1298 : }
|