LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - shmem.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 68.8 % 301 207
Test Date: 2026-04-06 14:16:21 Functions: 86.4 % 22 19
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * shmem.c
       4              :  *    create shared memory and initialize shared memory data structures.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/ipc/shmem.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : /*
      16              :  * POSTGRES processes share one or more regions of shared memory.
      17              :  * The shared memory is created by a postmaster and is inherited
      18              :  * by each backend via fork() (or, in some ports, via other OS-specific
      19              :  * methods).  The routines in this file are used for allocating and
      20              :  * binding to shared memory data structures.
      21              :  *
      22              :  * This module provides facilities to allocate fixed-size structures in shared
      23              :  * memory, for things like variables shared between all backend processes.
      24              :  * Each such structure has a string name to identify it, specified when it is
      25              :  * requested.  shmem_hash.c provides a shared hash table implementation on top
      26              :  * of that.
      27              :  *
      28              :  * Shared memory areas should usually not be allocated after postmaster
      29              :  * startup, although we do allow small allocations later for the benefit of
      30              :  * extension modules that are loaded after startup.  Despite that allowance,
      31              :  * extensions that need shared memory should be added in
      32              :  * shared_preload_libraries, because the allowance is quite small and there is
      33              :  * no guarantee that any memory is available after startup.
      34              :  *
      35              :  * Nowadays, there is also another way to allocate shared memory called
      36              :  * Dynamic Shared Memory.  See dsm.c for that facility.  One big difference
      37              :  * between traditional shared memory handled by shmem.c and dynamic shared
      38              :  * memory is that traditional shared memory areas are mapped to the same
      39              :  * address in all processes, so you can use normal pointers in shared memory
      40              :  * structs.  With Dynamic Shared Memory, you must use offsets or DSA pointers
      41              :  * instead.
      42              :  *
      43              :  * Shared memory managed by shmem.c can never be freed, once allocated.  Each
      44              :  * hash table has its own free list, so hash buckets can be reused when an
      45              :  * item is deleted.
      46              :  *
      47              :  * Usage
      48              :  * -----
      49              :  *
      50              :  * To allocate shared memory, you need to register a set of callback functions
      51              :  * which handle the lifecycle of the allocation.  In the request_fn callback,
      52              :  * call ShmemRequestStruct() with the desired name and size.  When the area is
      53              :  * later allocated or attached to, the global variable pointed to by the .ptr
      54              :  * option is set to the shared memory location of the allocation.  The init_fn
      55              :  * callback can perform additional initialization.
      56              :  *
      57              :  *  typedef struct MyShmemData {
      58              :  *      ...
      59              :  *  } MyShmemData;
      60              :  *
      61              :  *  static MyShmemData *MyShmem;
      62              :  *
      63              :  *  static void my_shmem_request(void *arg);
      64              :  *  static void my_shmem_init(void *arg);
      65              :  *
      66              :  *  const ShmemCallbacks MyShmemCallbacks = {
      67              :  *      .request_fn = my_shmem_request,
      68              :  *      .init_fn = my_shmem_init,
      69              :  *  };
      70              :  *
      71              :  *  static void
      72              :  *  my_shmem_request(void *arg)
      73              :  *  {
      74              :  *      ShmemRequestStruct(.name = "My shmem area",
      75              :  *                         .size = sizeof(MyShmemData),
      76              :  *                         .ptr = (void **) &MyShmem,
      77              :  *          );
      78              :  *  }
      79              :  *
      80              :  * In builtin PostgreSQL code, add the callbacks to the list in
      81              :  * src/include/storage/subsystemlist.h.  In an add-in module, you can register
      82              :  * the callbacks by calling RegisterShmemCallbacks(&MyShmemCallbacks) in the
      83              :  * extension's _PG_init() function.
      84              :  *
      85              :  * Lifecycle
      86              :  * ---------
      87              :  *
      88              :  * Initializing shared memory happens in multiple phases.  In the first phase,
      89              :  * during postmaster startup, all the request_fn callbacks are called.  Only
      90              :  * after all the request_fn callbacks have been called and all the shmem areas
      91              :  * have been requested by the ShmemRequestStruct() calls we know how much
      92              :  * shared memory we need in total.  After that, postmaster allocates global
      93              :  * shared memory segment, and calls all the init_fn callbacks to initialize
      94              :  * all the requested shmem areas.
      95              :  *
      96              :  * In standard Unix-ish environments, individual backends do not need to
      97              :  * re-establish their local pointers into shared memory, because they inherit
      98              :  * correct values of those variables via fork() from the postmaster.  However,
      99              :  * this does not work in the EXEC_BACKEND case.  In ports using EXEC_BACKEND,
     100              :  * backend startup also calls the shmem_request callbacks to re-establish the
     101              :  * knowledge about each shared memory area, sets the pointer variables
     102              :  * (*options->ptr), and calls the attach_fn callback, if any, for additional
     103              :  * per-backend setup.
     104              :  *
     105              :  * Legacy ShmemInitStruct()/ShmemInitHash() functions
     106              :  * --------------------------------------------------
     107              :  *
     108              :  * ShmemInitStruct()/ShmemInitHash() is another way of registering shmem
     109              :  * areas.  It pre-dates the ShmemRequestStruct()/ShmemRequestHash() functions,
     110              :  * and should not be used in new code, but as of this writing it is still
     111              :  * widely used in extensions.
     112              :  *
     113              :  * To allocate a shmem area with ShmemInitStruct(), you need to separately
     114              :  * register the size needed for the area by calling RequestAddinShmemSpace()
     115              :  * from the extension's shmem_request_hook, and allocate the area by calling
     116              :  * ShmemInitStruct() from the extension's shmem_startup_hook.  There are no
     117              :  * init/attach callbacks.  Instead, the caller of ShmemInitStruct() must check
     118              :  * the return status of ShmemInitStruct() and initialize the struct if it was
     119              :  * not previously initialized.
     120              :  *
     121              :  * Calling ShmemAlloc() directly
     122              :  * -----------------------------
     123              :  *
     124              :  * There's a more low-level way of allocating shared memory too: you can call
     125              :  * ShmemAlloc() directly.  It's used to implement the higher level mechanisms,
     126              :  * and should generally not be called directly.
     127              :  */
     128              : 
     129              : #include "postgres.h"
     130              : 
     131              : #include <unistd.h>
     132              : 
     133              : #include "access/slru.h"
     134              : #include "common/int.h"
     135              : #include "fmgr.h"
     136              : #include "funcapi.h"
     137              : #include "miscadmin.h"
     138              : #include "port/pg_bitutils.h"
     139              : #include "port/pg_numa.h"
     140              : #include "storage/lwlock.h"
     141              : #include "storage/pg_shmem.h"
     142              : #include "storage/shmem.h"
     143              : #include "storage/shmem_internal.h"
     144              : #include "storage/spin.h"
     145              : #include "utils/builtins.h"
     146              : #include "utils/tuplestore.h"
     147              : 
     148              : /*
     149              :  * Registered callbacks.
     150              :  *
     151              :  * During postmaster startup, we accumulate the callbacks from all subsystems
     152              :  * in this list.
     153              :  *
     154              :  * This is in process private memory, although on Unix-like systems, we expect
     155              :  * all the registrations to happen at postmaster startup time and be inherited
     156              :  * by all the child processes via fork().
     157              :  */
     158              : static List *registered_shmem_callbacks;
     159              : 
     160              : /*
     161              :  * In the shmem request phase, all the shmem areas requested with the
     162              :  * ShmemRequest*() functions are accumulated here.
     163              :  */
     164              : typedef struct
     165              : {
     166              :     ShmemStructOpts *options;
     167              :     ShmemRequestKind kind;
     168              : } ShmemRequest;
     169              : 
     170              : static List *pending_shmem_requests;
     171              : 
     172              : /*
     173              :  * Per-process state machine, for sanity checking that we do things in the
     174              :  * right order.
     175              :  *
     176              :  * Postmaster:
     177              :  *   INITIAL -> REQUESTING -> INITIALIZING -> DONE
     178              :  *
     179              :  * Backends in EXEC_BACKEND mode:
     180              :  *   INITIAL -> REQUESTING -> ATTACHING -> DONE
     181              :  *
     182              :  * Late request:
     183              :  *   DONE -> REQUESTING -> AFTER_STARTUP_ATTACH_OR_INIT -> DONE
     184              :  */
     185              : enum shmem_request_state
     186              : {
     187              :     /* Initial state */
     188              :     SRS_INITIAL,
     189              : 
     190              :     /*
     191              :      * When we start calling the shmem_request callbacks, we enter the
     192              :      * SRS_REQUESTING phase.  All ShmemRequestStruct calls happen in this
     193              :      * state.
     194              :      */
     195              :     SRS_REQUESTING,
     196              : 
     197              :     /*
     198              :      * Postmaster has finished all shmem requests, and is now initializing the
     199              :      * shared memory segment.  init_fn callbacks are called in this state.
     200              :      */
     201              :     SRS_INITIALIZING,
     202              : 
     203              :     /*
     204              :      * A postmaster child process is starting up.  attach_fn callbacks are
     205              :      * called in this state.
     206              :      */
     207              :     SRS_ATTACHING,
     208              : 
     209              :     /* An after-startup allocation or attachment is in progress */
     210              :     SRS_AFTER_STARTUP_ATTACH_OR_INIT,
     211              : 
     212              :     /* Normal state after shmem initialization / attachment */
     213              :     SRS_DONE,
     214              : };
     215              : static enum shmem_request_state shmem_request_state = SRS_INITIAL;
     216              : 
     217              : /*
     218              :  * This is the first data structure stored in the shared memory segment, at
     219              :  * the offset that PGShmemHeader->content_offset points to.  Allocations by
     220              :  * ShmemAlloc() are carved out of the space after this.
     221              :  *
     222              :  * For the base pointer and the total size of the shmem segment, we rely on
     223              :  * the PGShmemHeader.
     224              :  */
     225              : typedef struct ShmemAllocatorData
     226              : {
     227              :     Size        free_offset;    /* offset to first free space from ShmemBase */
     228              : 
     229              :     /* protects 'free_offset' */
     230              :     slock_t     shmem_lock;
     231              : 
     232              :     HASHHDR    *index;          /* location of ShmemIndex */
     233              :     size_t      index_size;     /* size of shmem region holding ShmemIndex */
     234              :     LWLock      index_lock;     /* protects ShmemIndex */
     235              : } ShmemAllocatorData;
     236              : 
     237              : #define ShmemIndexLock (&ShmemAllocator->index_lock)
     238              : 
     239              : static void *ShmemAllocRaw(Size size, Size alignment, Size *allocated_size);
     240              : 
     241              : /* shared memory global variables */
     242              : 
     243              : static PGShmemHeader *ShmemSegHdr;  /* shared mem segment header */
     244              : static void *ShmemBase;         /* start address of shared memory */
     245              : static void *ShmemEnd;          /* end+1 address of shared memory */
     246              : 
     247              : static ShmemAllocatorData *ShmemAllocator;
     248              : 
     249              : /*
     250              :  * ShmemIndex is a global directory of shmem areas, itself also stored in the
     251              :  * shared memory.
     252              :  */
     253              : static HTAB *ShmemIndex;
     254              : 
     255              :  /* max size of data structure string name */
     256              : #define SHMEM_INDEX_KEYSIZE      (48)
     257              : 
     258              : /*
     259              :  * # of additional entries to reserve in the shmem index table, for
     260              :  * allocations after postmaster startup.  (This is not a hard limit, the hash
     261              :  * table can grow larger than that if there is shared memory available)
     262              :  */
     263              : #define SHMEM_INDEX_ADDITIONAL_SIZE      (128)
     264              : 
     265              : /* this is a hash bucket in the shmem index table */
     266              : typedef struct
     267              : {
     268              :     char        key[SHMEM_INDEX_KEYSIZE];   /* string name */
     269              :     void       *location;       /* location in shared mem */
     270              :     Size        size;           /* # bytes requested for the structure */
     271              :     Size        allocated_size; /* # bytes actually allocated */
     272              : } ShmemIndexEnt;
     273              : 
     274              : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
     275              : static bool firstNumaTouch = true;
     276              : 
     277              : static void CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks);
     278              : static void InitShmemIndexEntry(ShmemRequest *request);
     279              : static bool AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok);
     280              : 
     281              : Datum       pg_numa_available(PG_FUNCTION_ARGS);
     282              : 
     283              : /*
     284              :  *  ShmemRequestStruct() --- request a named shared memory area
     285              :  *
     286              :  * Subsystems call this to register their shared memory needs.  This is
     287              :  * usually done early in postmaster startup, before the shared memory segment
     288              :  * has been created, so that the size can be included in the estimate for
     289              :  * total amount of shared memory needed.  We set aside a small amount of
     290              :  * memory for allocations that happen later, for the benefit of non-preloaded
     291              :  * extensions, but that should not be relied upon.
     292              :  *
     293              :  * This does not yet allocate the memory, but merely registers the need for
     294              :  * it.  The actual allocation happens later in the postmaster startup
     295              :  * sequence.
     296              :  *
     297              :  * This must be called from a shmem_request callback function, registered with
     298              :  * RegisterShmemCallbacks().  This enforces a coding pattern that works the
     299              :  * same in normal Unix systems and with EXEC_BACKEND.  On Unix systems, the
     300              :  * shmem_request callbacks are called once, early in postmaster startup, and
     301              :  * the child processes inherit the struct descriptors and any other
     302              :  * per-process state from the postmaster.  In EXEC_BACKEND mode, shmem_request
     303              :  * callbacks are *also* called in each backend, at backend startup, to
     304              :  * re-establish the struct descriptors.  By calling the same function in both
     305              :  * cases, we ensure that all the shmem areas are registered the same way in
     306              :  * all processes.
     307              :  *
     308              :  * 'options' defines the name and size of the area, and any other optional
     309              :  * features.  Leave unused options as zeros.  The options are copied to
     310              :  * longer-lived memory, so it doesn't need to live after the
     311              :  * ShmemRequestStruct() call and can point to a local variable in the calling
     312              :  * function.  The 'name' must point to a long-lived string though, only the
     313              :  * pointer to it is copied.
     314              :  */
     315              : void
     316        77676 : ShmemRequestStructWithOpts(const ShmemStructOpts *options)
     317              : {
     318              :     ShmemStructOpts *options_copy;
     319              : 
     320        77676 :     options_copy = MemoryContextAlloc(TopMemoryContext,
     321              :                                       sizeof(ShmemStructOpts));
     322        77676 :     memcpy(options_copy, options, sizeof(ShmemStructOpts));
     323              : 
     324        77676 :     ShmemRequestInternal(options_copy, SHMEM_KIND_STRUCT);
     325        77676 : }
     326              : 
     327              : /*
     328              :  * Internal workhorse of ShmemRequestStruct() and ShmemRequestHash().
     329              :  *
     330              :  * Note: Unlike in the public ShmemRequestStruct() and ShmemRequestHash()
     331              :  * functions, 'options' is *not* copied.  It must be allocated in
     332              :  * TopMemoryContext by the caller, and will be freed after the init/attach
     333              :  * callbacks have been called.  This allows ShmemRequestHash() to pass a
     334              :  * pointer to the extended ShmemHashOpts struct instead.
     335              :  */
     336              : void
     337        96186 : ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
     338              : {
     339              :     ShmemRequest *request;
     340              : 
     341              :     /* Check the options */
     342        96186 :     if (options->name == NULL)
     343            0 :         elog(ERROR, "shared memory request is missing 'name' option");
     344              : 
     345        96186 :     if (IsUnderPostmaster)
     346              :     {
     347            3 :         if (options->size <= 0 && options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
     348            0 :             elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
     349              :                  options->size, options->name);
     350              :     }
     351              :     else
     352              :     {
     353        96183 :         if (options->size == SHMEM_ATTACH_UNKNOWN_SIZE)
     354            0 :             elog(ERROR, "SHMEM_ATTACH_UNKNOWN_SIZE cannot be used during startup");
     355        96183 :         if (options->size <= 0)
     356            0 :             elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
     357              :                  options->size, options->name);
     358              :     }
     359              : 
     360        96186 :     if (options->alignment != 0 && pg_nextpower2_size_t(options->alignment) != options->alignment)
     361            0 :         elog(ERROR, "invalid alignment %zu for shared memory request for \"%s\"",
     362              :              options->alignment, options->name);
     363              : 
     364              :     /* Check that we're in the right state */
     365        96186 :     if (shmem_request_state != SRS_REQUESTING)
     366            0 :         elog(ERROR, "ShmemRequestStruct can only be called from a shmem_request callback");
     367              : 
     368              :     /* Check that it's not already registered in this process */
     369      3895821 :     foreach_ptr(ShmemRequest, existing, pending_shmem_requests)
     370              :     {
     371      3703449 :         if (strcmp(existing->options->name, options->name) == 0)
     372            0 :             ereport(ERROR,
     373              :                     (errmsg("shared memory struct \"%s\" is already registered",
     374              :                             options->name)));
     375              :     }
     376              : 
     377              :     /* Request looks valid, remember it */
     378        96186 :     request = palloc(sizeof(ShmemRequest));
     379        96186 :     request->options = options;
     380        96186 :     request->kind = kind;
     381        96186 :     pending_shmem_requests = lappend(pending_shmem_requests, request);
     382        96186 : }
     383              : 
     384              : /*
     385              :  *  ShmemGetRequestedSize() --- estimate the total size of all registered shared
     386              :  *                              memory structures.
     387              :  *
     388              :  * This is called at postmaster startup, before the shared memory segment has
     389              :  * been created.
     390              :  */
     391              : size_t
     392         2289 : ShmemGetRequestedSize(void)
     393              : {
     394              :     size_t      size;
     395              : 
     396              :     /* memory needed for the ShmemIndex */
     397         2289 :     size = hash_estimate_size(list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE,
     398              :                               sizeof(ShmemIndexEnt));
     399         2289 :     size = CACHELINEALIGN(size);
     400              : 
     401              :     /* memory needed for all the requested areas */
     402       183140 :     foreach_ptr(ShmemRequest, request, pending_shmem_requests)
     403              :     {
     404       178562 :         size_t      alignment = request->options->alignment;
     405              : 
     406              :         /* pad the start address for alignment like ShmemAllocRaw() does */
     407       178562 :         if (alignment < PG_CACHE_LINE_SIZE)
     408       171695 :             alignment = PG_CACHE_LINE_SIZE;
     409       178562 :         size = TYPEALIGN(alignment, size);
     410              : 
     411       178562 :         size = add_size(size, request->options->size);
     412              :     }
     413              : 
     414         2289 :     return size;
     415              : }
     416              : 
     417              : /*
     418              :  *  ShmemInitRequested() --- allocate and initialize requested shared memory
     419              :  *                            structures.
     420              :  *
     421              :  * This is called once at postmaster startup, after the shared memory segment
     422              :  * has been created.
     423              :  */
     424              : void
     425         1230 : ShmemInitRequested(void)
     426              : {
     427              :     /* should be called only by the postmaster or a standalone backend */
     428              :     Assert(!IsUnderPostmaster);
     429              :     Assert(shmem_request_state == SRS_INITIALIZING);
     430              : 
     431              :     /*
     432              :      * Initialize the ShmemIndex entries and perform basic initialization of
     433              :      * all the requested memory areas.  There are no concurrent processes yet,
     434              :      * so no need for locking.
     435              :      */
     436        98409 :     foreach_ptr(ShmemRequest, request, pending_shmem_requests)
     437              :     {
     438        95949 :         InitShmemIndexEntry(request);
     439        95949 :         pfree(request->options);
     440              :     }
     441         1230 :     list_free_deep(pending_shmem_requests);
     442         1230 :     pending_shmem_requests = NIL;
     443              : 
     444              :     /*
     445              :      * Call the subsystem-specific init callbacks to finish initialization of
     446              :      * all the areas.
     447              :      */
     448        56606 :     foreach_ptr(const ShmemCallbacks, callbacks, registered_shmem_callbacks)
     449              :     {
     450        54146 :         if (callbacks->init_fn)
     451        49222 :             callbacks->init_fn(callbacks->opaque_arg);
     452              :     }
     453              : 
     454         1230 :     shmem_request_state = SRS_DONE;
     455         1230 : }
     456              : 
     457              : /*
     458              :  * Re-establish process private state related to shmem areas.
     459              :  *
     460              :  * This is called at backend startup in EXEC_BACKEND mode, in every backend.
     461              :  */
     462              : #ifdef EXEC_BACKEND
     463              : void
     464              : ShmemAttachRequested(void)
     465              : {
     466              :     ListCell   *lc;
     467              : 
     468              :     /* Must be initializing a (non-standalone) backend */
     469              :     Assert(IsUnderPostmaster);
     470              :     Assert(ShmemAllocator->index != NULL);
     471              :     Assert(shmem_request_state == SRS_REQUESTING);
     472              :     shmem_request_state = SRS_ATTACHING;
     473              : 
     474              :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     475              : 
     476              :     /*
     477              :      * Attach to all the requested memory areas.
     478              :      */
     479              :     foreach_ptr(ShmemRequest, request, pending_shmem_requests)
     480              :     {
     481              :         AttachShmemIndexEntry(request, false);
     482              :         pfree(request->options);
     483              :     }
     484              :     list_free_deep(pending_shmem_requests);
     485              :     pending_shmem_requests = NIL;
     486              : 
     487              :     /* Call attach callbacks */
     488              :     foreach(lc, registered_shmem_callbacks)
     489              :     {
     490              :         const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
     491              : 
     492              :         if (callbacks->attach_fn)
     493              :             callbacks->attach_fn(callbacks->opaque_arg);
     494              :     }
     495              : 
     496              :     LWLockRelease(ShmemIndexLock);
     497              : 
     498              :     shmem_request_state = SRS_DONE;
     499              : }
     500              : #endif
     501              : 
     502              : /*
     503              :  * Insert requested shmem area into the shared memory index and initialize it.
     504              :  *
     505              :  * Note that this only does performs basic initialization depending on
     506              :  * ShmemRequestKind, like setting the global pointer variable to the area for
     507              :  * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
     508              :  * This does *not* call the subsystem-specific init callbacks.  That's done
     509              :  * later after all the shmem areas have been initialized or attached to.
     510              :  */
     511              : static void
     512        95950 : InitShmemIndexEntry(ShmemRequest *request)
     513              : {
     514        95950 :     const char *name = request->options->name;
     515              :     ShmemIndexEnt *index_entry;
     516              :     bool        found;
     517              :     size_t      allocated_size;
     518              :     void       *structPtr;
     519              : 
     520              :     /* look it up in the shmem index */
     521              :     index_entry = (ShmemIndexEnt *)
     522        95950 :         hash_search(ShmemIndex, name, HASH_ENTER_NULL, &found);
     523        95950 :     if (found)
     524            0 :         elog(ERROR, "shared memory struct \"%s\" is already initialized", name);
     525        95950 :     if (!index_entry)
     526              :     {
     527              :         /* tried to add it to the hash table, but there was no space */
     528            0 :         ereport(ERROR,
     529              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     530              :                  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
     531              :                         name)));
     532              :     }
     533              : 
     534              :     /*
     535              :      * We inserted the entry to the shared memory index.  Allocate requested
     536              :      * amount of shared memory for it, and initialize the index entry.
     537              :      */
     538        95950 :     structPtr = ShmemAllocRaw(request->options->size,
     539        95950 :                               request->options->alignment,
     540              :                               &allocated_size);
     541        95950 :     if (structPtr == NULL)
     542              :     {
     543              :         /* out of memory; remove the failed ShmemIndex entry */
     544            0 :         hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
     545            0 :         ereport(ERROR,
     546              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     547              :                  errmsg("not enough shared memory for data structure"
     548              :                         " \"%s\" (%zu bytes requested)",
     549              :                         name, request->options->size)));
     550              :     }
     551        95950 :     index_entry->size = request->options->size;
     552        95950 :     index_entry->allocated_size = allocated_size;
     553        95950 :     index_entry->location = structPtr;
     554              : 
     555              :     /* Initialize depending on the kind of shmem area it is */
     556        95950 :     switch (request->kind)
     557              :     {
     558        77485 :         case SHMEM_KIND_STRUCT:
     559        77485 :             if (request->options->ptr)
     560        77485 :                 *(request->options->ptr) = index_entry->location;
     561        77485 :             break;
     562         9851 :         case SHMEM_KIND_HASH:
     563         9851 :             shmem_hash_init(structPtr, request->options);
     564         9851 :             break;
     565         8614 :         case SHMEM_KIND_SLRU:
     566         8614 :             shmem_slru_init(structPtr, request->options);
     567         8614 :             break;
     568              :     }
     569        95950 : }
     570              : 
     571              : /*
     572              :  * Look up a named shmem area in the shared memory index and attach to it.
     573              :  *
     574              :  * Note that this only performs the basic attachment actions depending on
     575              :  * ShmemRequestKind, like setting the global pointer variable to the area for
     576              :  * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
     577              :  * This does *not* call the subsystem-specific attach callbacks.  That's done
     578              :  * later after all the shmem areas have been initialized or attached to.
     579              :  */
     580              : static bool
     581            2 : AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok)
     582              : {
     583            2 :     const char *name = request->options->name;
     584              :     ShmemIndexEnt *index_entry;
     585              : 
     586              :     /* Look it up in the shmem index */
     587              :     index_entry = (ShmemIndexEnt *)
     588            2 :         hash_search(ShmemIndex, name, HASH_FIND, NULL);
     589            2 :     if (!index_entry)
     590              :     {
     591            0 :         if (!missing_ok)
     592            0 :             ereport(ERROR,
     593              :                     (errmsg("could not find ShmemIndex entry for data structure \"%s\"",
     594              :                             request->options->name)));
     595            0 :         return false;
     596              :     }
     597              : 
     598              :     /* Check that the size in the index matches the request */
     599            2 :     if (index_entry->size != request->options->size &&
     600            0 :         request->options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
     601              :     {
     602            0 :         ereport(ERROR,
     603              :                 (errmsg("shared memory struct \"%s\" was created with"
     604              :                         " different size: existing %zu, requested %zu",
     605              :                         name, index_entry->size, request->options->size)));
     606              :     }
     607              : 
     608              :     /*
     609              :      * Re-establish the caller's pointer variable, or do other actions to
     610              :      * attach depending on the kind of shmem area it is.
     611              :      */
     612            2 :     switch (request->kind)
     613              :     {
     614            2 :         case SHMEM_KIND_STRUCT:
     615            2 :             if (request->options->ptr)
     616            2 :                 *(request->options->ptr) = index_entry->location;
     617            2 :             break;
     618            0 :         case SHMEM_KIND_HASH:
     619            0 :             shmem_hash_attach(index_entry->location, request->options);
     620            0 :             break;
     621            0 :         case SHMEM_KIND_SLRU:
     622            0 :             shmem_slru_attach(index_entry->location, request->options);
     623            0 :             break;
     624              :     }
     625              : 
     626            2 :     return true;
     627              : }
     628              : 
     629              : /*
     630              :  *  InitShmemAllocator() --- set up basic pointers to shared memory.
     631              :  *
     632              :  * Called at postmaster or stand-alone backend startup, to initialize the
     633              :  * allocator's data structure in the shared memory segment.  In EXEC_BACKEND,
     634              :  * this is also called at backend startup, to set up pointers to the
     635              :  * already-initialized data structure.
     636              :  */
     637              : void
     638         1230 : InitShmemAllocator(PGShmemHeader *seghdr)
     639              : {
     640              :     Size        offset;
     641              :     int64       hash_nelems;
     642              :     HASHCTL     info;
     643              :     int         hash_flags;
     644              : 
     645              : #ifndef EXEC_BACKEND
     646              :     Assert(!IsUnderPostmaster);
     647              : #endif
     648              :     Assert(seghdr != NULL);
     649              : 
     650         1230 :     if (IsUnderPostmaster)
     651              :     {
     652              :         Assert(shmem_request_state == SRS_INITIAL);
     653              :     }
     654              :     else
     655              :     {
     656              :         Assert(shmem_request_state == SRS_REQUESTING);
     657         1230 :         shmem_request_state = SRS_INITIALIZING;
     658              :     }
     659              : 
     660              :     /*
     661              :      * We assume the pointer and offset are MAXALIGN.  Not a hard requirement,
     662              :      * but it's true today and keeps the math below simpler.
     663              :      */
     664              :     Assert(seghdr == (void *) MAXALIGN(seghdr));
     665              :     Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
     666              : 
     667              :     /*
     668              :      * Allocations after this point should go through ShmemAlloc, which
     669              :      * expects to allocate everything on cache line boundaries.  Make sure the
     670              :      * first allocation begins on a cache line boundary.
     671              :      */
     672         1230 :     offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
     673         1230 :     if (offset > seghdr->totalsize)
     674            0 :         ereport(ERROR,
     675              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     676              :                  errmsg("out of shared memory (%zu bytes requested)",
     677              :                         offset)));
     678              : 
     679              :     /*
     680              :      * In postmaster or stand-alone backend, initialize the shared memory
     681              :      * allocator so that we can allocate shared memory for ShmemIndex using
     682              :      * ShmemAlloc().  In a regular backend just set up the pointers required
     683              :      * by ShmemAlloc().
     684              :      */
     685         1230 :     ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
     686         1230 :     if (!IsUnderPostmaster)
     687              :     {
     688         1230 :         SpinLockInit(&ShmemAllocator->shmem_lock);
     689         1230 :         ShmemAllocator->free_offset = offset;
     690         1230 :         LWLockInitialize(&ShmemAllocator->index_lock, LWTRANCHE_SHMEM_INDEX);
     691              :     }
     692              : 
     693         1230 :     ShmemSegHdr = seghdr;
     694         1230 :     ShmemBase = seghdr;
     695         1230 :     ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
     696              : 
     697              :     /*
     698              :      * Create (or attach to) the shared memory index of shmem areas.
     699              :      *
     700              :      * This is the same initialization as ShmemInitHash() does, but we cannot
     701              :      * use ShmemInitHash() here because it relies on ShmemIndex being already
     702              :      * initialized.
     703              :      */
     704         1230 :     hash_nelems = list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE;
     705              : 
     706         1230 :     info.keysize = SHMEM_INDEX_KEYSIZE;
     707         1230 :     info.entrysize = sizeof(ShmemIndexEnt);
     708         1230 :     hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
     709              : 
     710         1230 :     if (!IsUnderPostmaster)
     711              :     {
     712         1230 :         ShmemAllocator->index_size = hash_estimate_size(hash_nelems, info.entrysize);
     713         1230 :         ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
     714              :     }
     715         2460 :     ShmemIndex = shmem_hash_create(ShmemAllocator->index,
     716         1230 :                                    ShmemAllocator->index_size,
     717              :                                    IsUnderPostmaster,
     718              :                                    "ShmemIndex", hash_nelems,
     719              :                                    &info, hash_flags);
     720              :     Assert(ShmemIndex != NULL);
     721              : 
     722              :     /*
     723              :      * Add an entry for ShmemIndex itself into ShmemIndex, so that it's
     724              :      * visible in the pg_shmem_allocations view
     725              :      */
     726         1230 :     if (!IsUnderPostmaster)
     727              :     {
     728              :         bool        found;
     729              :         ShmemIndexEnt *result = (ShmemIndexEnt *)
     730         1230 :             hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
     731              : 
     732              :         Assert(!found);
     733         1230 :         result->size = ShmemAllocator->index_size;
     734         1230 :         result->allocated_size = ShmemAllocator->index_size;
     735         1230 :         result->location = ShmemAllocator->index;
     736              :     }
     737         1230 : }
     738              : 
     739              : /*
     740              :  * Reset state on postmaster crash restart.
     741              :  */
     742              : void
     743            5 : ResetShmemAllocator(void)
     744              : {
     745              :     Assert(!IsUnderPostmaster);
     746            5 :     shmem_request_state = SRS_INITIAL;
     747              : 
     748            5 :     pending_shmem_requests = NIL;
     749              : 
     750              :     /*
     751              :      * Note that we don't clear the registered callbacks.  We will need to
     752              :      * call them again as we restart
     753              :      */
     754            5 : }
     755              : 
     756              : /*
     757              :  * ShmemAlloc -- allocate max-aligned chunk from shared memory
     758              :  *
     759              :  * Throws error if request cannot be satisfied.
     760              :  *
     761              :  * Assumes ShmemSegHdr is initialized.
     762              :  */
     763              : void *
     764         1233 : ShmemAlloc(Size size)
     765              : {
     766              :     void       *newSpace;
     767              :     Size        allocated_size;
     768              : 
     769         1233 :     newSpace = ShmemAllocRaw(size, 0, &allocated_size);
     770         1233 :     if (!newSpace)
     771            0 :         ereport(ERROR,
     772              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     773              :                  errmsg("out of shared memory (%zu bytes requested)",
     774              :                         size)));
     775         1233 :     return newSpace;
     776              : }
     777              : 
     778              : /*
     779              :  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
     780              :  *
     781              :  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
     782              :  */
     783              : void *
     784            0 : ShmemAllocNoError(Size size)
     785              : {
     786              :     Size        allocated_size;
     787              : 
     788            0 :     return ShmemAllocRaw(size, 0, &allocated_size);
     789              : }
     790              : 
     791              : /*
     792              :  * ShmemAllocRaw -- allocate align chunk and return allocated size
     793              :  *
     794              :  * Also sets *allocated_size to the number of bytes allocated, which will
     795              :  * be equal to the number requested plus any padding we choose to add.
     796              :  */
     797              : static void *
     798        97183 : ShmemAllocRaw(Size size, Size alignment, Size *allocated_size)
     799              : {
     800              :     Size        rawStart;
     801              :     Size        newStart;
     802              :     Size        newFree;
     803              :     void       *newSpace;
     804              : 
     805              :     /*
     806              :      * Ensure all space is adequately aligned.  We used to only MAXALIGN this
     807              :      * space but experience has proved that on modern systems that is not good
     808              :      * enough.  Many parts of the system are very sensitive to critical data
     809              :      * structures getting split across cache line boundaries.  To avoid that,
     810              :      * attempt to align the beginning of the allocation to a cache line
     811              :      * boundary.  The calling code will still need to be careful about how it
     812              :      * uses the allocated space - e.g. by padding each element in an array of
     813              :      * structures out to a power-of-two size - but without this, even that
     814              :      * won't be sufficient.
     815              :      */
     816        97183 :     if (alignment < PG_CACHE_LINE_SIZE)
     817        93493 :         alignment = PG_CACHE_LINE_SIZE;
     818              : 
     819              :     Assert(ShmemSegHdr != NULL);
     820              : 
     821        97183 :     SpinLockAcquire(&ShmemAllocator->shmem_lock);
     822              : 
     823        97183 :     rawStart = ShmemAllocator->free_offset;
     824        97183 :     newStart = TYPEALIGN(alignment, rawStart);
     825              : 
     826        97183 :     newFree = newStart + size;
     827        97183 :     if (newFree <= ShmemSegHdr->totalsize)
     828              :     {
     829        97183 :         newSpace = (char *) ShmemBase + newStart;
     830        97183 :         ShmemAllocator->free_offset = newFree;
     831              :     }
     832              :     else
     833            0 :         newSpace = NULL;
     834              : 
     835        97183 :     SpinLockRelease(&ShmemAllocator->shmem_lock);
     836              : 
     837              :     /* note this assert is okay with newSpace == NULL */
     838              :     Assert(newSpace == (void *) TYPEALIGN(alignment, newSpace));
     839              : 
     840        97183 :     *allocated_size = newFree - rawStart;
     841        97183 :     return newSpace;
     842              : }
     843              : 
     844              : /*
     845              :  * ShmemAddrIsValid -- test if an address refers to shared memory
     846              :  *
     847              :  * Returns true if the pointer points within the shared memory segment.
     848              :  */
     849              : bool
     850            0 : ShmemAddrIsValid(const void *addr)
     851              : {
     852            0 :     return (addr >= ShmemBase) && (addr < ShmemEnd);
     853              : }
     854              : 
     855              : /*
     856              :  * Register callbacks that define a shared memory area (or multiple areas).
     857              :  *
     858              :  * The system will call the callbacks at different stages of postmaster or
     859              :  * backend startup, to allocate and initialize the area.
     860              :  *
     861              :  * This is normally called early during postmaster startup, but if the
     862              :  * SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP is set, this can also be used after
     863              :  * startup, although after startup there's no guarantee that there's enough
     864              :  * shared memory available.  When called after startup, this immediately calls
     865              :  * the right callbacks depending on whether another backend had already
     866              :  * initialized the area.
     867              :  *
     868              :  * Note: In EXEC_BACKEND mode, this needs to be called in every backend
     869              :  * process.  That's needed because we cannot pass down the callback function
     870              :  * pointers from the postmaster process, because different processes may have
     871              :  * loaded libraries to different addresses.
     872              :  */
     873              : void
     874        54631 : RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
     875              : {
     876        54631 :     if (shmem_request_state == SRS_DONE && IsUnderPostmaster)
     877              :     {
     878              :         /*
     879              :          * After-startup initialization or attachment.  Call the appropriate
     880              :          * callbacks immediately.
     881              :          */
     882            3 :         if ((callbacks->flags & SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP) == 0)
     883            0 :             elog(ERROR, "cannot request shared memory at this time");
     884              : 
     885            3 :         CallShmemCallbacksAfterStartup(callbacks);
     886              :     }
     887              :     else
     888              :     {
     889              :         /* Remember the callbacks for later */
     890        54628 :         registered_shmem_callbacks = lappend(registered_shmem_callbacks,
     891              :                                              (void *) callbacks);
     892              :     }
     893        54631 : }
     894              : 
     895              : /*
     896              :  * Register a shmem area (or multiple areas) after startup.
     897              :  */
     898              : static void
     899            3 : CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks)
     900              : {
     901              :     bool        found_any;
     902              :     bool        notfound_any;
     903              : 
     904              :     Assert(shmem_request_state == SRS_DONE);
     905            3 :     shmem_request_state = SRS_REQUESTING;
     906              : 
     907              :     /*
     908              :      * Call the request callback first.  The callback makes ShmemRequest*()
     909              :      * calls for each shmem area, adding them to pending_shmem_requests.
     910              :      */
     911              :     Assert(pending_shmem_requests == NIL);
     912            3 :     if (callbacks->request_fn)
     913            3 :         callbacks->request_fn(callbacks->opaque_arg);
     914            3 :     shmem_request_state = SRS_AFTER_STARTUP_ATTACH_OR_INIT;
     915              : 
     916            3 :     if (pending_shmem_requests == NIL)
     917              :     {
     918            0 :         shmem_request_state = SRS_DONE;
     919            0 :         return;
     920              :     }
     921              : 
     922              :     /* Hold ShmemIndexLock while we allocate all the shmem entries */
     923            3 :     LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
     924              : 
     925              :     /*
     926              :      * Check if the requested shared memory areas have already been
     927              :      * initialized.  We assume all the areas requested by the request callback
     928              :      * to form a coherent unit such that they're all already initialized or
     929              :      * none.  Otherwise it would be ambiguous which callback, init or attach,
     930              :      * to callback afterwards.
     931              :      */
     932            3 :     found_any = notfound_any = false;
     933            9 :     foreach_ptr(ShmemRequest, request, pending_shmem_requests)
     934              :     {
     935            3 :         if (hash_search(ShmemIndex, request->options->name, HASH_FIND, NULL))
     936            2 :             found_any = true;
     937              :         else
     938            1 :             notfound_any = true;
     939              :     }
     940            3 :     if (found_any && notfound_any)
     941            0 :         elog(ERROR, "found some but not all");
     942              : 
     943              :     /*
     944              :      * Allocate or attach all the shmem areas requested by the request_fn
     945              :      * callback.
     946              :      */
     947            9 :     foreach_ptr(ShmemRequest, request, pending_shmem_requests)
     948              :     {
     949            3 :         if (found_any)
     950            2 :             AttachShmemIndexEntry(request, false);
     951              :         else
     952            1 :             InitShmemIndexEntry(request);
     953              : 
     954            3 :         pfree(request->options);
     955              :     }
     956            3 :     list_free_deep(pending_shmem_requests);
     957            3 :     pending_shmem_requests = NIL;
     958              : 
     959              :     /* Finish by calling the appropriate subsystem-specific callback */
     960            3 :     if (found_any)
     961              :     {
     962            2 :         if (callbacks->attach_fn)
     963            2 :             callbacks->attach_fn(callbacks->opaque_arg);
     964              :     }
     965              :     else
     966              :     {
     967            1 :         if (callbacks->init_fn)
     968            1 :             callbacks->init_fn(callbacks->opaque_arg);
     969              :     }
     970              : 
     971            3 :     LWLockRelease(ShmemIndexLock);
     972            3 :     shmem_request_state = SRS_DONE;
     973              : }
     974              : 
     975              : /*
     976              :  * Call all shmem request callbacks.
     977              :  */
     978              : void
     979         1233 : ShmemCallRequestCallbacks(void)
     980              : {
     981              :     ListCell   *lc;
     982              : 
     983              :     Assert(shmem_request_state == SRS_INITIAL);
     984         1233 :     shmem_request_state = SRS_REQUESTING;
     985              : 
     986        55511 :     foreach(lc, registered_shmem_callbacks)
     987              :     {
     988        54278 :         const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
     989              : 
     990        54278 :         if (callbacks->request_fn)
     991        54278 :             callbacks->request_fn(callbacks->opaque_arg);
     992              :     }
     993         1233 : }
     994              : 
     995              : /*
     996              :  * ShmemInitStruct -- Create/attach to a structure in shared memory.
     997              :  *
     998              :  *      This is called during initialization to find or allocate
     999              :  *      a data structure in shared memory.  If no other process
    1000              :  *      has created the structure, this routine allocates space
    1001              :  *      for it.  If it exists already, a pointer to the existing
    1002              :  *      structure is returned.
    1003              :  *
    1004              :  *  Returns: pointer to the object.  *foundPtr is set true if the object was
    1005              :  *      already in the shmem index (hence, already initialized).
    1006              :  *
    1007              :  * Note: This is a legacy interface, kept for backwards compatibility with
    1008              :  * extensions.  Use ShmemRequestStruct() in new code!
    1009              :  */
    1010              : void *
    1011            0 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
    1012              : {
    1013            0 :     void       *ptr = NULL;
    1014            0 :     ShmemStructOpts options = {
    1015              :         .name = name,
    1016              :         .size = size,
    1017              :         .ptr = &ptr,
    1018              :     };
    1019            0 :     ShmemRequest request = {&options, SHMEM_KIND_STRUCT};
    1020              : 
    1021              :     Assert(shmem_request_state == SRS_DONE ||
    1022              :            shmem_request_state == SRS_INITIALIZING ||
    1023              :            shmem_request_state == SRS_REQUESTING);
    1024              : 
    1025            0 :     LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
    1026              : 
    1027              :     /*
    1028              :      * During postmaster startup, look up the existing entry if any.
    1029              :      */
    1030            0 :     *foundPtr = false;
    1031            0 :     if (IsUnderPostmaster)
    1032            0 :         *foundPtr = AttachShmemIndexEntry(&request, true);
    1033              : 
    1034              :     /* Initialize it if not found */
    1035            0 :     if (!*foundPtr)
    1036            0 :         InitShmemIndexEntry(&request);
    1037              : 
    1038            0 :     LWLockRelease(ShmemIndexLock);
    1039              : 
    1040              :     Assert(ptr != NULL);
    1041            0 :     return ptr;
    1042              : }
    1043              : 
    1044              : /*
    1045              :  * Add two Size values, checking for overflow
    1046              :  */
    1047              : Size
    1048       499320 : add_size(Size s1, Size s2)
    1049              : {
    1050              :     Size        result;
    1051              : 
    1052       499320 :     if (pg_add_size_overflow(s1, s2, &result))
    1053            0 :         ereport(ERROR,
    1054              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1055              :                  errmsg("requested shared memory size overflows size_t")));
    1056       499320 :     return result;
    1057              : }
    1058              : 
    1059              : /*
    1060              :  * Multiply two Size values, checking for overflow
    1061              :  */
    1062              : Size
    1063       124946 : mul_size(Size s1, Size s2)
    1064              : {
    1065              :     Size        result;
    1066              : 
    1067       124946 :     if (pg_mul_size_overflow(s1, s2, &result))
    1068            0 :         ereport(ERROR,
    1069              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1070              :                  errmsg("requested shared memory size overflows size_t")));
    1071       124946 :     return result;
    1072              : }
    1073              : 
    1074              : /* SQL SRF showing allocated shared memory */
    1075              : Datum
    1076            4 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
    1077              : {
    1078              : #define PG_GET_SHMEM_SIZES_COLS 4
    1079            4 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1080              :     HASH_SEQ_STATUS hstat;
    1081              :     ShmemIndexEnt *ent;
    1082            4 :     Size        named_allocated = 0;
    1083              :     Datum       values[PG_GET_SHMEM_SIZES_COLS];
    1084              :     bool        nulls[PG_GET_SHMEM_SIZES_COLS];
    1085              : 
    1086            4 :     InitMaterializedSRF(fcinfo, 0);
    1087              : 
    1088            4 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
    1089              : 
    1090            4 :     hash_seq_init(&hstat, ShmemIndex);
    1091              : 
    1092              :     /* output all allocated entries */
    1093            4 :     memset(nulls, 0, sizeof(nulls));
    1094          322 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
    1095              :     {
    1096          318 :         values[0] = CStringGetTextDatum(ent->key);
    1097          318 :         values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
    1098          318 :         values[2] = Int64GetDatum(ent->size);
    1099          318 :         values[3] = Int64GetDatum(ent->allocated_size);
    1100          318 :         named_allocated += ent->allocated_size;
    1101              : 
    1102          318 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
    1103              :                              values, nulls);
    1104              :     }
    1105              : 
    1106              :     /* output shared memory allocated but not counted via the shmem index */
    1107            4 :     values[0] = CStringGetTextDatum("<anonymous>");
    1108            4 :     nulls[1] = true;
    1109            4 :     values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
    1110            4 :     values[3] = values[2];
    1111            4 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    1112              : 
    1113              :     /* output as-of-yet unused shared memory */
    1114            4 :     nulls[0] = true;
    1115            4 :     values[1] = Int64GetDatum(ShmemAllocator->free_offset);
    1116            4 :     nulls[1] = false;
    1117            4 :     values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
    1118            4 :     values[3] = values[2];
    1119            4 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    1120              : 
    1121            4 :     LWLockRelease(ShmemIndexLock);
    1122              : 
    1123            4 :     return (Datum) 0;
    1124              : }
    1125              : 
    1126              : /*
    1127              :  * SQL SRF showing NUMA memory nodes for allocated shared memory
    1128              :  *
    1129              :  * Compared to pg_get_shmem_allocations(), this function does not return
    1130              :  * information about shared anonymous allocations and unused shared memory.
    1131              :  */
    1132              : Datum
    1133            4 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
    1134              : {
    1135              : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
    1136            4 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1137              :     HASH_SEQ_STATUS hstat;
    1138              :     ShmemIndexEnt *ent;
    1139              :     Datum       values[PG_GET_SHMEM_NUMA_SIZES_COLS];
    1140              :     bool        nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
    1141              :     Size        os_page_size;
    1142              :     void      **page_ptrs;
    1143              :     int        *pages_status;
    1144              :     uint64      shm_total_page_count,
    1145              :                 shm_ent_page_count,
    1146              :                 max_nodes;
    1147              :     Size       *nodes;
    1148              : 
    1149            4 :     if (pg_numa_init() == -1)
    1150            4 :         elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
    1151              : 
    1152            0 :     InitMaterializedSRF(fcinfo, 0);
    1153              : 
    1154            0 :     max_nodes = pg_numa_get_max_node();
    1155            0 :     nodes = palloc_array(Size, max_nodes + 2);
    1156              : 
    1157              :     /*
    1158              :      * Shared memory allocations can vary in size and may not align with OS
    1159              :      * memory page boundaries, while NUMA queries work on pages.
    1160              :      *
    1161              :      * To correctly map each allocation to NUMA nodes, we need to: 1.
    1162              :      * Determine the OS memory page size. 2. Align each allocation's start/end
    1163              :      * addresses to page boundaries. 3. Query NUMA node information for all
    1164              :      * pages spanning the allocation.
    1165              :      */
    1166            0 :     os_page_size = pg_get_shmem_pagesize();
    1167              : 
    1168              :     /*
    1169              :      * Allocate memory for page pointers and status based on total shared
    1170              :      * memory size. This simplified approach allocates enough space for all
    1171              :      * pages in shared memory rather than calculating the exact requirements
    1172              :      * for each segment.
    1173              :      *
    1174              :      * Add 1, because we don't know how exactly the segments align to OS
    1175              :      * pages, so the allocation might use one more memory page. In practice
    1176              :      * this is not very likely, and moreover we have more entries, each of
    1177              :      * them using only fraction of the total pages.
    1178              :      */
    1179            0 :     shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
    1180            0 :     page_ptrs = palloc0_array(void *, shm_total_page_count);
    1181            0 :     pages_status = palloc_array(int, shm_total_page_count);
    1182              : 
    1183            0 :     if (firstNumaTouch)
    1184            0 :         elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
    1185              : 
    1186            0 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
    1187              : 
    1188            0 :     hash_seq_init(&hstat, ShmemIndex);
    1189              : 
    1190              :     /* output all allocated entries */
    1191            0 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
    1192              :     {
    1193              :         int         i;
    1194              :         char       *startptr,
    1195              :                    *endptr;
    1196              :         Size        total_len;
    1197              : 
    1198              :         /*
    1199              :          * Calculate the range of OS pages used by this segment. The segment
    1200              :          * may start / end half-way through a page, we want to count these
    1201              :          * pages too. So we align the start/end pointers down/up, and then
    1202              :          * calculate the number of pages from that.
    1203              :          */
    1204            0 :         startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
    1205            0 :         endptr = (char *) TYPEALIGN(os_page_size,
    1206              :                                     (char *) ent->location + ent->allocated_size);
    1207            0 :         total_len = (endptr - startptr);
    1208              : 
    1209            0 :         shm_ent_page_count = total_len / os_page_size;
    1210              : 
    1211              :         /*
    1212              :          * If we ever get 0xff (-1) back from kernel inquiry, then we probably
    1213              :          * have a bug in mapping buffers to OS pages.
    1214              :          */
    1215            0 :         memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
    1216              : 
    1217              :         /*
    1218              :          * Setup page_ptrs[] with pointers to all OS pages for this segment,
    1219              :          * and get the NUMA status using pg_numa_query_pages.
    1220              :          *
    1221              :          * In order to get reliable results we also need to touch memory
    1222              :          * pages, so that inquiry about NUMA memory node doesn't return -2
    1223              :          * (ENOENT, which indicates unmapped/unallocated pages).
    1224              :          */
    1225            0 :         for (i = 0; i < shm_ent_page_count; i++)
    1226              :         {
    1227            0 :             page_ptrs[i] = startptr + (i * os_page_size);
    1228              : 
    1229            0 :             if (firstNumaTouch)
    1230              :                 pg_numa_touch_mem_if_required(page_ptrs[i]);
    1231              : 
    1232            0 :             CHECK_FOR_INTERRUPTS();
    1233              :         }
    1234              : 
    1235            0 :         if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
    1236            0 :             elog(ERROR, "failed NUMA pages inquiry status: %m");
    1237              : 
    1238              :         /* Count number of NUMA nodes used for this shared memory entry */
    1239            0 :         memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
    1240              : 
    1241            0 :         for (i = 0; i < shm_ent_page_count; i++)
    1242              :         {
    1243            0 :             int         s = pages_status[i];
    1244              : 
    1245              :             /* Ensure we are adding only valid index to the array */
    1246            0 :             if (s >= 0 && s <= max_nodes)
    1247              :             {
    1248              :                 /* valid NUMA node */
    1249            0 :                 nodes[s]++;
    1250            0 :                 continue;
    1251              :             }
    1252            0 :             else if (s == -2)
    1253              :             {
    1254              :                 /* -2 means ENOENT (e.g. page was moved to swap) */
    1255            0 :                 nodes[max_nodes + 1]++;
    1256            0 :                 continue;
    1257              :             }
    1258              : 
    1259            0 :             elog(ERROR, "invalid NUMA node id outside of allowed range "
    1260              :                  "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
    1261              :         }
    1262              : 
    1263              :         /* no NULLs for regular nodes */
    1264            0 :         memset(nulls, 0, sizeof(nulls));
    1265              : 
    1266              :         /*
    1267              :          * Add one entry for each NUMA node, including those without allocated
    1268              :          * memory for this segment.
    1269              :          */
    1270            0 :         for (i = 0; i <= max_nodes; i++)
    1271              :         {
    1272            0 :             values[0] = CStringGetTextDatum(ent->key);
    1273            0 :             values[1] = Int32GetDatum(i);
    1274            0 :             values[2] = Int64GetDatum(nodes[i] * os_page_size);
    1275              : 
    1276            0 :             tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
    1277              :                                  values, nulls);
    1278              :         }
    1279              : 
    1280              :         /* The last entry is used for pages without a NUMA node. */
    1281            0 :         nulls[1] = true;
    1282            0 :         values[0] = CStringGetTextDatum(ent->key);
    1283            0 :         values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
    1284              : 
    1285            0 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
    1286              :                              values, nulls);
    1287              :     }
    1288              : 
    1289            0 :     LWLockRelease(ShmemIndexLock);
    1290            0 :     firstNumaTouch = false;
    1291              : 
    1292            0 :     return (Datum) 0;
    1293              : }
    1294              : 
    1295              : /*
    1296              :  * Determine the memory page size used for the shared memory segment.
    1297              :  *
    1298              :  * If the shared segment was allocated using huge pages, returns the size of
    1299              :  * a huge page. Otherwise returns the size of regular memory page.
    1300              :  *
    1301              :  * This should be used only after the server is started.
    1302              :  */
    1303              : Size
    1304            2 : pg_get_shmem_pagesize(void)
    1305              : {
    1306              :     Size        os_page_size;
    1307              : #ifdef WIN32
    1308              :     SYSTEM_INFO sysinfo;
    1309              : 
    1310              :     GetSystemInfo(&sysinfo);
    1311              :     os_page_size = sysinfo.dwPageSize;
    1312              : #else
    1313            2 :     os_page_size = sysconf(_SC_PAGESIZE);
    1314              : #endif
    1315              : 
    1316              :     Assert(IsUnderPostmaster);
    1317              :     Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
    1318              : 
    1319            2 :     if (huge_pages_status == HUGE_PAGES_ON)
    1320            0 :         GetHugePageSize(&os_page_size, NULL);
    1321              : 
    1322            2 :     return os_page_size;
    1323              : }
    1324              : 
    1325              : Datum
    1326            5 : pg_numa_available(PG_FUNCTION_ARGS)
    1327              : {
    1328            5 :     PG_RETURN_BOOL(pg_numa_init() != -1);
    1329              : }
        

Generated by: LCOV version 2.0-1