LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - shmem.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 60.6 % 175 106
Test Date: 2026-03-17 07:15:15 Functions: 92.9 % 14 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * shmem.c
       4              :  *    create shared memory and initialize shared memory data structures.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/ipc/shmem.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : /*
      16              :  * POSTGRES processes share one or more regions of shared memory.
      17              :  * The shared memory is created by a postmaster and is inherited
      18              :  * by each backend via fork() (or, in some ports, via other OS-specific
      19              :  * methods).  The routines in this file are used for allocating and
      20              :  * binding to shared memory data structures.
      21              :  *
      22              :  * NOTES:
      23              :  *      (a) There are three kinds of shared memory data structures
      24              :  *  available to POSTGRES: fixed-size structures, queues and hash
      25              :  *  tables.  Fixed-size structures contain things like global variables
      26              :  *  for a module and should never be allocated after the shared memory
      27              :  *  initialization phase.  Hash tables have a fixed maximum size, but
      28              :  *  their actual size can vary dynamically.  When entries are added
      29              :  *  to the table, more space is allocated.  Queues link data structures
      30              :  *  that have been allocated either within fixed-size structures or as hash
      31              :  *  buckets.  Each shared data structure has a string name to identify
      32              :  *  it (assigned in the module that declares it).
      33              :  *
      34              :  *      (b) During initialization, each module looks for its
      35              :  *  shared data structures in a hash table called the "Shmem Index".
      36              :  *  If the data structure is not present, the caller can allocate
      37              :  *  a new one and initialize it.  If the data structure is present,
      38              :  *  the caller "attaches" to the structure by initializing a pointer
      39              :  *  in the local address space.
      40              :  *      The shmem index has two purposes: first, it gives us
      41              :  *  a simple model of how the world looks when a backend process
      42              :  *  initializes.  If something is present in the shmem index,
      43              :  *  it is initialized.  If it is not, it is uninitialized.  Second,
      44              :  *  the shmem index allows us to allocate shared memory on demand
      45              :  *  instead of trying to preallocate structures and hard-wire the
      46              :  *  sizes and locations in header files.  If you are using a lot
      47              :  *  of shared memory in a lot of different places (and changing
      48              :  *  things during development), this is important.
      49              :  *
      50              :  *      (c) In standard Unix-ish environments, individual backends do not
      51              :  *  need to re-establish their local pointers into shared memory, because
      52              :  *  they inherit correct values of those variables via fork() from the
      53              :  *  postmaster.  However, this does not work in the EXEC_BACKEND case.
      54              :  *  In ports using EXEC_BACKEND, new backends have to set up their local
      55              :  *  pointers using the method described in (b) above.
      56              :  *
      57              :  *      (d) memory allocation model: shared memory can never be
      58              :  *  freed, once allocated.   Each hash table has its own free list,
      59              :  *  so hash buckets can be reused when an item is deleted.  However,
      60              :  *  if one hash table grows very large and then shrinks, its space
      61              :  *  cannot be redistributed to other tables.  We could build a simple
      62              :  *  hash bucket garbage collector if need be.  Right now, it seems
      63              :  *  unnecessary.
      64              :  */
      65              : 
      66              : #include "postgres.h"
      67              : 
      68              : #include <unistd.h>
      69              : 
      70              : #include "common/int.h"
      71              : #include "fmgr.h"
      72              : #include "funcapi.h"
      73              : #include "miscadmin.h"
      74              : #include "port/pg_numa.h"
      75              : #include "storage/lwlock.h"
      76              : #include "storage/pg_shmem.h"
      77              : #include "storage/shmem.h"
      78              : #include "storage/spin.h"
      79              : #include "utils/builtins.h"
      80              : #include "utils/tuplestore.h"
      81              : 
      82              : /*
      83              :  * This is the first data structure stored in the shared memory segment, at
      84              :  * the offset that PGShmemHeader->content_offset points to.  Allocations by
      85              :  * ShmemAlloc() are carved out of the space after this.
      86              :  *
      87              :  * For the base pointer and the total size of the shmem segment, we rely on
      88              :  * the PGShmemHeader.
      89              :  */
      90              : typedef struct ShmemAllocatorData
      91              : {
      92              :     Size        free_offset;    /* offset to first free space from ShmemBase */
      93              :     HASHHDR    *index;          /* location of ShmemIndex */
      94              : 
      95              :     /* protects shared memory and LWLock allocation */
      96              :     slock_t     shmem_lock;
      97              : } ShmemAllocatorData;
      98              : 
      99              : static void *ShmemAllocRaw(Size size, Size *allocated_size);
     100              : 
     101              : /* shared memory global variables */
     102              : 
     103              : static PGShmemHeader *ShmemSegHdr;  /* shared mem segment header */
     104              : static void *ShmemBase;         /* start address of shared memory */
     105              : static void *ShmemEnd;          /* end+1 address of shared memory */
     106              : 
     107              : static ShmemAllocatorData *ShmemAllocator;
     108              : slock_t    *ShmemLock;          /* points to ShmemAllocator->shmem_lock */
     109              : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
     110              : 
     111              : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
     112              : static bool firstNumaTouch = true;
     113              : 
     114              : Datum       pg_numa_available(PG_FUNCTION_ARGS);
     115              : 
     116              : /*
     117              :  *  InitShmemAllocator() --- set up basic pointers to shared memory.
     118              :  *
     119              :  * Called at postmaster or stand-alone backend startup, to initialize the
     120              :  * allocator's data structure in the shared memory segment.  In EXEC_BACKEND,
     121              :  * this is also called at backend startup, to set up pointers to the shared
     122              :  * memory areas.
     123              :  */
     124              : void
     125         1165 : InitShmemAllocator(PGShmemHeader *seghdr)
     126              : {
     127              :     Assert(seghdr != NULL);
     128              : 
     129              :     /*
     130              :      * We assume the pointer and offset are MAXALIGN.  Not a hard requirement,
     131              :      * but it's true today and keeps the math below simpler.
     132              :      */
     133              :     Assert(seghdr == (void *) MAXALIGN(seghdr));
     134              :     Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
     135              : 
     136         1165 :     ShmemSegHdr = seghdr;
     137         1165 :     ShmemBase = seghdr;
     138         1165 :     ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
     139              : 
     140              : #ifndef EXEC_BACKEND
     141              :     Assert(!IsUnderPostmaster);
     142              : #endif
     143         1165 :     if (IsUnderPostmaster)
     144              :     {
     145            0 :         PGShmemHeader *shmhdr = ShmemSegHdr;
     146              : 
     147            0 :         ShmemAllocator = (ShmemAllocatorData *) ((char *) shmhdr + shmhdr->content_offset);
     148            0 :         ShmemLock = &ShmemAllocator->shmem_lock;
     149              :     }
     150              :     else
     151              :     {
     152              :         Size        offset;
     153              : 
     154              :         /*
     155              :          * Allocations after this point should go through ShmemAlloc, which
     156              :          * expects to allocate everything on cache line boundaries.  Make sure
     157              :          * the first allocation begins on a cache line boundary.
     158              :          */
     159         1165 :         offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
     160         1165 :         if (offset > seghdr->totalsize)
     161            0 :             ereport(ERROR,
     162              :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     163              :                      errmsg("out of shared memory (%zu bytes requested)",
     164              :                             offset)));
     165              : 
     166         1165 :         ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
     167              : 
     168         1165 :         SpinLockInit(&ShmemAllocator->shmem_lock);
     169         1165 :         ShmemLock = &ShmemAllocator->shmem_lock;
     170         1165 :         ShmemAllocator->free_offset = offset;
     171              :         /* ShmemIndex can't be set up yet (need LWLocks first) */
     172         1165 :         ShmemAllocator->index = NULL;
     173         1165 :         ShmemIndex = (HTAB *) NULL;
     174              :     }
     175         1165 : }
     176              : 
     177              : /*
     178              :  * ShmemAlloc -- allocate max-aligned chunk from shared memory
     179              :  *
     180              :  * Throws error if request cannot be satisfied.
     181              :  *
     182              :  * Assumes ShmemLock and ShmemSegHdr are initialized.
     183              :  */
     184              : void *
     185         3498 : ShmemAlloc(Size size)
     186              : {
     187              :     void       *newSpace;
     188              :     Size        allocated_size;
     189              : 
     190         3498 :     newSpace = ShmemAllocRaw(size, &allocated_size);
     191         3498 :     if (!newSpace)
     192            0 :         ereport(ERROR,
     193              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     194              :                  errmsg("out of shared memory (%zu bytes requested)",
     195              :                         size)));
     196         3498 :     return newSpace;
     197              : }
     198              : 
     199              : /*
     200              :  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
     201              :  *
     202              :  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
     203              :  */
     204              : void *
     205       471241 : ShmemAllocNoError(Size size)
     206              : {
     207              :     Size        allocated_size;
     208              : 
     209       471241 :     return ShmemAllocRaw(size, &allocated_size);
     210              : }
     211              : 
     212              : /*
     213              :  * ShmemAllocRaw -- allocate align chunk and return allocated size
     214              :  *
     215              :  * Also sets *allocated_size to the number of bytes allocated, which will
     216              :  * be equal to the number requested plus any padding we choose to add.
     217              :  */
     218              : static void *
     219       560948 : ShmemAllocRaw(Size size, Size *allocated_size)
     220              : {
     221              :     Size        newStart;
     222              :     Size        newFree;
     223              :     void       *newSpace;
     224              : 
     225              :     /*
     226              :      * Ensure all space is adequately aligned.  We used to only MAXALIGN this
     227              :      * space but experience has proved that on modern systems that is not good
     228              :      * enough.  Many parts of the system are very sensitive to critical data
     229              :      * structures getting split across cache line boundaries.  To avoid that,
     230              :      * attempt to align the beginning of the allocation to a cache line
     231              :      * boundary.  The calling code will still need to be careful about how it
     232              :      * uses the allocated space - e.g. by padding each element in an array of
     233              :      * structures out to a power-of-two size - but without this, even that
     234              :      * won't be sufficient.
     235              :      */
     236       560948 :     size = CACHELINEALIGN(size);
     237       560948 :     *allocated_size = size;
     238              : 
     239              :     Assert(ShmemSegHdr != NULL);
     240              : 
     241       560948 :     SpinLockAcquire(ShmemLock);
     242              : 
     243       560948 :     newStart = ShmemAllocator->free_offset;
     244              : 
     245       560948 :     newFree = newStart + size;
     246       560948 :     if (newFree <= ShmemSegHdr->totalsize)
     247              :     {
     248       560948 :         newSpace = (char *) ShmemBase + newStart;
     249       560948 :         ShmemAllocator->free_offset = newFree;
     250              :     }
     251              :     else
     252            0 :         newSpace = NULL;
     253              : 
     254       560948 :     SpinLockRelease(ShmemLock);
     255              : 
     256              :     /* note this assert is okay with newSpace == NULL */
     257              :     Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
     258              : 
     259       560948 :     return newSpace;
     260              : }
     261              : 
     262              : /*
     263              :  * ShmemAddrIsValid -- test if an address refers to shared memory
     264              :  *
     265              :  * Returns true if the pointer points within the shared memory segment.
     266              :  */
     267              : bool
     268            0 : ShmemAddrIsValid(const void *addr)
     269              : {
     270            0 :     return (addr >= ShmemBase) && (addr < ShmemEnd);
     271              : }
     272              : 
     273              : /*
     274              :  *  InitShmemIndex() --- set up or attach to shmem index table.
     275              :  */
     276              : void
     277         1165 : InitShmemIndex(void)
     278              : {
     279              :     HASHCTL     info;
     280              : 
     281              :     /*
     282              :      * Create the shared memory shmem index.
     283              :      *
     284              :      * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
     285              :      * hashtable to exist already, we have a bit of a circularity problem in
     286              :      * initializing the ShmemIndex itself.  The special "ShmemIndex" hash
     287              :      * table name will tell ShmemInitStruct to fake it.
     288              :      */
     289         1165 :     info.keysize = SHMEM_INDEX_KEYSIZE;
     290         1165 :     info.entrysize = sizeof(ShmemIndexEnt);
     291              : 
     292         1165 :     ShmemIndex = ShmemInitHash("ShmemIndex",
     293              :                                SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
     294              :                                &info,
     295              :                                HASH_ELEM | HASH_STRINGS);
     296         1165 : }
     297              : 
     298              : /*
     299              :  * ShmemInitHash -- Create and initialize, or attach to, a
     300              :  *      shared memory hash table.
     301              :  *
     302              :  * We assume caller is doing some kind of synchronization
     303              :  * so that two processes don't try to create/initialize the same
     304              :  * table at once.  (In practice, all creations are done in the postmaster
     305              :  * process; child processes should always be attaching to existing tables.)
     306              :  *
     307              :  * max_size is the estimated maximum number of hashtable entries.  This is
     308              :  * not a hard limit, but the access efficiency will degrade if it is
     309              :  * exceeded substantially (since it's used to compute directory size and
     310              :  * the hash table buckets will get overfull).
     311              :  *
     312              :  * init_size is the number of hashtable entries to preallocate.  For a table
     313              :  * whose maximum size is certain, this should be equal to max_size; that
     314              :  * ensures that no run-time out-of-shared-memory failures can occur.
     315              :  *
     316              :  * *infoP and hash_flags must specify at least the entry sizes and key
     317              :  * comparison semantics (see hash_create()).  Flag bits and values specific
     318              :  * to shared-memory hash tables are added here, except that callers may
     319              :  * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
     320              :  *
     321              :  * Note: before Postgres 9.0, this function returned NULL for some failure
     322              :  * cases.  Now, it always throws error instead, so callers need not check
     323              :  * for NULL.
     324              :  */
     325              : HTAB *
     326        10492 : ShmemInitHash(const char *name,     /* table string name for shmem index */
     327              :               int64 init_size,  /* initial table size */
     328              :               int64 max_size,   /* max size of the table */
     329              :               HASHCTL *infoP,   /* info about key and bucket size */
     330              :               int hash_flags)   /* info about infoP */
     331              : {
     332              :     bool        found;
     333              :     void       *location;
     334              : 
     335              :     /*
     336              :      * Hash tables allocated in shared memory have a fixed directory; it can't
     337              :      * grow or other backends wouldn't be able to find it. So, make sure we
     338              :      * make it big enough to start with.
     339              :      *
     340              :      * The shared memory allocator must be specified too.
     341              :      */
     342        10492 :     infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
     343        10492 :     infoP->alloc = ShmemAllocNoError;
     344        10492 :     hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
     345              : 
     346              :     /* look it up in the shmem index */
     347        10492 :     location = ShmemInitStruct(name,
     348              :                                hash_get_shared_size(infoP, hash_flags),
     349              :                                &found);
     350              : 
     351              :     /*
     352              :      * if it already exists, attach to it rather than allocate and initialize
     353              :      * new space
     354              :      */
     355        10492 :     if (found)
     356            0 :         hash_flags |= HASH_ATTACH;
     357              : 
     358              :     /* Pass location of hashtable header to hash_create */
     359        10492 :     infoP->hctl = (HASHHDR *) location;
     360              : 
     361        10492 :     return hash_create(name, init_size, infoP, hash_flags);
     362              : }
     363              : 
     364              : /*
     365              :  * ShmemInitStruct -- Create/attach to a structure in shared memory.
     366              :  *
     367              :  *      This is called during initialization to find or allocate
     368              :  *      a data structure in shared memory.  If no other process
     369              :  *      has created the structure, this routine allocates space
     370              :  *      for it.  If it exists already, a pointer to the existing
     371              :  *      structure is returned.
     372              :  *
     373              :  *  Returns: pointer to the object.  *foundPtr is set true if the object was
     374              :  *      already in the shmem index (hence, already initialized).
     375              :  *
     376              :  *  Note: before Postgres 9.0, this function returned NULL for some failure
     377              :  *  cases.  Now, it always throws error instead, so callers need not check
     378              :  *  for NULL.
     379              :  */
     380              : void *
     381        87374 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
     382              : {
     383              :     ShmemIndexEnt *result;
     384              :     void       *structPtr;
     385              : 
     386        87374 :     LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
     387              : 
     388        87374 :     if (!ShmemIndex)
     389              :     {
     390              :         /* Must be trying to create/attach to ShmemIndex itself */
     391              :         Assert(strcmp(name, "ShmemIndex") == 0);
     392              : 
     393         1165 :         if (IsUnderPostmaster)
     394              :         {
     395              :             /* Must be initializing a (non-standalone) backend */
     396              :             Assert(ShmemAllocator->index != NULL);
     397            0 :             structPtr = ShmemAllocator->index;
     398            0 :             *foundPtr = true;
     399              :         }
     400              :         else
     401              :         {
     402              :             /*
     403              :              * If the shmem index doesn't exist, we are bootstrapping: we must
     404              :              * be trying to init the shmem index itself.
     405              :              *
     406              :              * Notice that the ShmemIndexLock is released before the shmem
     407              :              * index has been initialized.  This should be OK because no other
     408              :              * process can be accessing shared memory yet.
     409              :              */
     410              :             Assert(ShmemAllocator->index == NULL);
     411         1165 :             structPtr = ShmemAlloc(size);
     412         1165 :             ShmemAllocator->index = structPtr;
     413         1165 :             *foundPtr = false;
     414              :         }
     415         1165 :         LWLockRelease(ShmemIndexLock);
     416         1165 :         return structPtr;
     417              :     }
     418              : 
     419              :     /* look it up in the shmem index */
     420              :     result = (ShmemIndexEnt *)
     421        86209 :         hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
     422              : 
     423        86209 :     if (!result)
     424              :     {
     425            0 :         LWLockRelease(ShmemIndexLock);
     426            0 :         ereport(ERROR,
     427              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     428              :                  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
     429              :                         name)));
     430              :     }
     431              : 
     432        86209 :     if (*foundPtr)
     433              :     {
     434              :         /*
     435              :          * Structure is in the shmem index so someone else has allocated it
     436              :          * already.  The size better be the same as the size we are trying to
     437              :          * initialize to, or there is a name conflict (or worse).
     438              :          */
     439            0 :         if (result->size != size)
     440              :         {
     441            0 :             LWLockRelease(ShmemIndexLock);
     442            0 :             ereport(ERROR,
     443              :                     (errmsg("ShmemIndex entry size is wrong for data structure"
     444              :                             " \"%s\": expected %zu, actual %zu",
     445              :                             name, size, result->size)));
     446              :         }
     447            0 :         structPtr = result->location;
     448              :     }
     449              :     else
     450              :     {
     451              :         Size        allocated_size;
     452              : 
     453              :         /* It isn't in the table yet. allocate and initialize it */
     454        86209 :         structPtr = ShmemAllocRaw(size, &allocated_size);
     455        86209 :         if (structPtr == NULL)
     456              :         {
     457              :             /* out of memory; remove the failed ShmemIndex entry */
     458            0 :             hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
     459            0 :             LWLockRelease(ShmemIndexLock);
     460            0 :             ereport(ERROR,
     461              :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     462              :                      errmsg("not enough shared memory for data structure"
     463              :                             " \"%s\" (%zu bytes requested)",
     464              :                             name, size)));
     465              :         }
     466        86209 :         result->size = size;
     467        86209 :         result->allocated_size = allocated_size;
     468        86209 :         result->location = structPtr;
     469              :     }
     470              : 
     471        86209 :     LWLockRelease(ShmemIndexLock);
     472              : 
     473              :     Assert(ShmemAddrIsValid(structPtr));
     474              : 
     475              :     Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
     476              : 
     477        86209 :     return structPtr;
     478              : }
     479              : 
     480              : 
     481              : /*
     482              :  * Add two Size values, checking for overflow
     483              :  */
     484              : Size
     485       587417 : add_size(Size s1, Size s2)
     486              : {
     487              :     Size        result;
     488              : 
     489       587417 :     if (pg_add_size_overflow(s1, s2, &result))
     490            0 :         ereport(ERROR,
     491              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     492              :                  errmsg("requested shared memory size overflows size_t")));
     493       587417 :     return result;
     494              : }
     495              : 
     496              : /*
     497              :  * Multiply two Size values, checking for overflow
     498              :  */
     499              : Size
     500       277784 : mul_size(Size s1, Size s2)
     501              : {
     502              :     Size        result;
     503              : 
     504       277784 :     if (pg_mul_size_overflow(s1, s2, &result))
     505            0 :         ereport(ERROR,
     506              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     507              :                  errmsg("requested shared memory size overflows size_t")));
     508       277784 :     return result;
     509              : }
     510              : 
     511              : /* SQL SRF showing allocated shared memory */
     512              : Datum
     513            3 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
     514              : {
     515              : #define PG_GET_SHMEM_SIZES_COLS 4
     516            3 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     517              :     HASH_SEQ_STATUS hstat;
     518              :     ShmemIndexEnt *ent;
     519            3 :     Size        named_allocated = 0;
     520              :     Datum       values[PG_GET_SHMEM_SIZES_COLS];
     521              :     bool        nulls[PG_GET_SHMEM_SIZES_COLS];
     522              : 
     523            3 :     InitMaterializedSRF(fcinfo, 0);
     524              : 
     525            3 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     526              : 
     527            3 :     hash_seq_init(&hstat, ShmemIndex);
     528              : 
     529              :     /* output all allocated entries */
     530            3 :     memset(nulls, 0, sizeof(nulls));
     531          227 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     532              :     {
     533          224 :         values[0] = CStringGetTextDatum(ent->key);
     534          224 :         values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
     535          224 :         values[2] = Int64GetDatum(ent->size);
     536          224 :         values[3] = Int64GetDatum(ent->allocated_size);
     537          224 :         named_allocated += ent->allocated_size;
     538              : 
     539          224 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     540              :                              values, nulls);
     541              :     }
     542              : 
     543              :     /* output shared memory allocated but not counted via the shmem index */
     544            3 :     values[0] = CStringGetTextDatum("<anonymous>");
     545            3 :     nulls[1] = true;
     546            3 :     values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
     547            3 :     values[3] = values[2];
     548            3 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     549              : 
     550              :     /* output as-of-yet unused shared memory */
     551            3 :     nulls[0] = true;
     552            3 :     values[1] = Int64GetDatum(ShmemAllocator->free_offset);
     553            3 :     nulls[1] = false;
     554            3 :     values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
     555            3 :     values[3] = values[2];
     556            3 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     557              : 
     558            3 :     LWLockRelease(ShmemIndexLock);
     559              : 
     560            3 :     return (Datum) 0;
     561              : }
     562              : 
     563              : /*
     564              :  * SQL SRF showing NUMA memory nodes for allocated shared memory
     565              :  *
     566              :  * Compared to pg_get_shmem_allocations(), this function does not return
     567              :  * information about shared anonymous allocations and unused shared memory.
     568              :  */
     569              : Datum
     570            3 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
     571              : {
     572              : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
     573            3 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     574              :     HASH_SEQ_STATUS hstat;
     575              :     ShmemIndexEnt *ent;
     576              :     Datum       values[PG_GET_SHMEM_NUMA_SIZES_COLS];
     577              :     bool        nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
     578              :     Size        os_page_size;
     579              :     void      **page_ptrs;
     580              :     int        *pages_status;
     581              :     uint64      shm_total_page_count,
     582              :                 shm_ent_page_count,
     583              :                 max_nodes;
     584              :     Size       *nodes;
     585              : 
     586            3 :     if (pg_numa_init() == -1)
     587            3 :         elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
     588              : 
     589            0 :     InitMaterializedSRF(fcinfo, 0);
     590              : 
     591            0 :     max_nodes = pg_numa_get_max_node();
     592            0 :     nodes = palloc_array(Size, max_nodes + 2);
     593              : 
     594              :     /*
     595              :      * Shared memory allocations can vary in size and may not align with OS
     596              :      * memory page boundaries, while NUMA queries work on pages.
     597              :      *
     598              :      * To correctly map each allocation to NUMA nodes, we need to: 1.
     599              :      * Determine the OS memory page size. 2. Align each allocation's start/end
     600              :      * addresses to page boundaries. 3. Query NUMA node information for all
     601              :      * pages spanning the allocation.
     602              :      */
     603            0 :     os_page_size = pg_get_shmem_pagesize();
     604              : 
     605              :     /*
     606              :      * Allocate memory for page pointers and status based on total shared
     607              :      * memory size. This simplified approach allocates enough space for all
     608              :      * pages in shared memory rather than calculating the exact requirements
     609              :      * for each segment.
     610              :      *
     611              :      * Add 1, because we don't know how exactly the segments align to OS
     612              :      * pages, so the allocation might use one more memory page. In practice
     613              :      * this is not very likely, and moreover we have more entries, each of
     614              :      * them using only fraction of the total pages.
     615              :      */
     616            0 :     shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
     617            0 :     page_ptrs = palloc0_array(void *, shm_total_page_count);
     618            0 :     pages_status = palloc_array(int, shm_total_page_count);
     619              : 
     620            0 :     if (firstNumaTouch)
     621            0 :         elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
     622              : 
     623            0 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     624              : 
     625            0 :     hash_seq_init(&hstat, ShmemIndex);
     626              : 
     627              :     /* output all allocated entries */
     628            0 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     629              :     {
     630              :         int         i;
     631              :         char       *startptr,
     632              :                    *endptr;
     633              :         Size        total_len;
     634              : 
     635              :         /*
     636              :          * Calculate the range of OS pages used by this segment. The segment
     637              :          * may start / end half-way through a page, we want to count these
     638              :          * pages too. So we align the start/end pointers down/up, and then
     639              :          * calculate the number of pages from that.
     640              :          */
     641            0 :         startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
     642            0 :         endptr = (char *) TYPEALIGN(os_page_size,
     643              :                                     (char *) ent->location + ent->allocated_size);
     644            0 :         total_len = (endptr - startptr);
     645              : 
     646            0 :         shm_ent_page_count = total_len / os_page_size;
     647              : 
     648              :         /*
     649              :          * If we ever get 0xff (-1) back from kernel inquiry, then we probably
     650              :          * have a bug in mapping buffers to OS pages.
     651              :          */
     652            0 :         memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
     653              : 
     654              :         /*
     655              :          * Setup page_ptrs[] with pointers to all OS pages for this segment,
     656              :          * and get the NUMA status using pg_numa_query_pages.
     657              :          *
     658              :          * In order to get reliable results we also need to touch memory
     659              :          * pages, so that inquiry about NUMA memory node doesn't return -2
     660              :          * (ENOENT, which indicates unmapped/unallocated pages).
     661              :          */
     662            0 :         for (i = 0; i < shm_ent_page_count; i++)
     663              :         {
     664            0 :             page_ptrs[i] = startptr + (i * os_page_size);
     665              : 
     666            0 :             if (firstNumaTouch)
     667              :                 pg_numa_touch_mem_if_required(page_ptrs[i]);
     668              : 
     669            0 :             CHECK_FOR_INTERRUPTS();
     670              :         }
     671              : 
     672            0 :         if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
     673            0 :             elog(ERROR, "failed NUMA pages inquiry status: %m");
     674              : 
     675              :         /* Count number of NUMA nodes used for this shared memory entry */
     676            0 :         memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
     677              : 
     678            0 :         for (i = 0; i < shm_ent_page_count; i++)
     679              :         {
     680            0 :             int         s = pages_status[i];
     681              : 
     682              :             /* Ensure we are adding only valid index to the array */
     683            0 :             if (s >= 0 && s <= max_nodes)
     684              :             {
     685              :                 /* valid NUMA node */
     686            0 :                 nodes[s]++;
     687            0 :                 continue;
     688              :             }
     689            0 :             else if (s == -2)
     690              :             {
     691              :                 /* -2 means ENOENT (e.g. page was moved to swap) */
     692            0 :                 nodes[max_nodes + 1]++;
     693            0 :                 continue;
     694              :             }
     695              : 
     696            0 :             elog(ERROR, "invalid NUMA node id outside of allowed range "
     697              :                  "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
     698              :         }
     699              : 
     700              :         /* no NULLs for regular nodes */
     701            0 :         memset(nulls, 0, sizeof(nulls));
     702              : 
     703              :         /*
     704              :          * Add one entry for each NUMA node, including those without allocated
     705              :          * memory for this segment.
     706              :          */
     707            0 :         for (i = 0; i <= max_nodes; i++)
     708              :         {
     709            0 :             values[0] = CStringGetTextDatum(ent->key);
     710            0 :             values[1] = Int32GetDatum(i);
     711            0 :             values[2] = Int64GetDatum(nodes[i] * os_page_size);
     712              : 
     713            0 :             tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     714              :                                  values, nulls);
     715              :         }
     716              : 
     717              :         /* The last entry is used for pages without a NUMA node. */
     718            0 :         nulls[1] = true;
     719            0 :         values[0] = CStringGetTextDatum(ent->key);
     720            0 :         values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
     721              : 
     722            0 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     723              :                              values, nulls);
     724              :     }
     725              : 
     726            0 :     LWLockRelease(ShmemIndexLock);
     727            0 :     firstNumaTouch = false;
     728              : 
     729            0 :     return (Datum) 0;
     730              : }
     731              : 
     732              : /*
     733              :  * Determine the memory page size used for the shared memory segment.
     734              :  *
     735              :  * If the shared segment was allocated using huge pages, returns the size of
     736              :  * a huge page. Otherwise returns the size of regular memory page.
     737              :  *
     738              :  * This should be used only after the server is started.
     739              :  */
     740              : Size
     741            2 : pg_get_shmem_pagesize(void)
     742              : {
     743              :     Size        os_page_size;
     744              : #ifdef WIN32
     745              :     SYSTEM_INFO sysinfo;
     746              : 
     747              :     GetSystemInfo(&sysinfo);
     748              :     os_page_size = sysinfo.dwPageSize;
     749              : #else
     750            2 :     os_page_size = sysconf(_SC_PAGESIZE);
     751              : #endif
     752              : 
     753              :     Assert(IsUnderPostmaster);
     754              :     Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
     755              : 
     756            2 :     if (huge_pages_status == HUGE_PAGES_ON)
     757            0 :         GetHugePageSize(&os_page_size, NULL);
     758              : 
     759            2 :     return os_page_size;
     760              : }
     761              : 
     762              : Datum
     763            4 : pg_numa_available(PG_FUNCTION_ARGS)
     764              : {
     765            4 :     PG_RETURN_BOOL(pg_numa_init() != -1);
     766              : }
        

Generated by: LCOV version 2.0-1