LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 106 175 60.6 %
Date: 2026-02-07 08:18:04 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * shmem.c
       4             :  *    create shared memory and initialize shared memory data structures.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/ipc/shmem.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : /*
      16             :  * POSTGRES processes share one or more regions of shared memory.
      17             :  * The shared memory is created by a postmaster and is inherited
      18             :  * by each backend via fork() (or, in some ports, via other OS-specific
      19             :  * methods).  The routines in this file are used for allocating and
      20             :  * binding to shared memory data structures.
      21             :  *
      22             :  * NOTES:
      23             :  *      (a) There are three kinds of shared memory data structures
      24             :  *  available to POSTGRES: fixed-size structures, queues and hash
      25             :  *  tables.  Fixed-size structures contain things like global variables
      26             :  *  for a module and should never be allocated after the shared memory
      27             :  *  initialization phase.  Hash tables have a fixed maximum size, but
      28             :  *  their actual size can vary dynamically.  When entries are added
      29             :  *  to the table, more space is allocated.  Queues link data structures
      30             :  *  that have been allocated either within fixed-size structures or as hash
      31             :  *  buckets.  Each shared data structure has a string name to identify
      32             :  *  it (assigned in the module that declares it).
      33             :  *
      34             :  *      (b) During initialization, each module looks for its
      35             :  *  shared data structures in a hash table called the "Shmem Index".
      36             :  *  If the data structure is not present, the caller can allocate
      37             :  *  a new one and initialize it.  If the data structure is present,
      38             :  *  the caller "attaches" to the structure by initializing a pointer
      39             :  *  in the local address space.
      40             :  *      The shmem index has two purposes: first, it gives us
      41             :  *  a simple model of how the world looks when a backend process
      42             :  *  initializes.  If something is present in the shmem index,
      43             :  *  it is initialized.  If it is not, it is uninitialized.  Second,
      44             :  *  the shmem index allows us to allocate shared memory on demand
      45             :  *  instead of trying to preallocate structures and hard-wire the
      46             :  *  sizes and locations in header files.  If you are using a lot
      47             :  *  of shared memory in a lot of different places (and changing
      48             :  *  things during development), this is important.
      49             :  *
      50             :  *      (c) In standard Unix-ish environments, individual backends do not
      51             :  *  need to re-establish their local pointers into shared memory, because
      52             :  *  they inherit correct values of those variables via fork() from the
      53             :  *  postmaster.  However, this does not work in the EXEC_BACKEND case.
      54             :  *  In ports using EXEC_BACKEND, new backends have to set up their local
      55             :  *  pointers using the method described in (b) above.
      56             :  *
      57             :  *      (d) memory allocation model: shared memory can never be
      58             :  *  freed, once allocated.   Each hash table has its own free list,
      59             :  *  so hash buckets can be reused when an item is deleted.  However,
      60             :  *  if one hash table grows very large and then shrinks, its space
      61             :  *  cannot be redistributed to other tables.  We could build a simple
      62             :  *  hash bucket garbage collector if need be.  Right now, it seems
      63             :  *  unnecessary.
      64             :  */
      65             : 
      66             : #include "postgres.h"
      67             : 
      68             : #include "common/int.h"
      69             : #include "fmgr.h"
      70             : #include "funcapi.h"
      71             : #include "miscadmin.h"
      72             : #include "port/pg_numa.h"
      73             : #include "storage/lwlock.h"
      74             : #include "storage/pg_shmem.h"
      75             : #include "storage/shmem.h"
      76             : #include "storage/spin.h"
      77             : #include "utils/builtins.h"
      78             : 
      79             : /*
      80             :  * This is the first data structure stored in the shared memory segment, at
      81             :  * the offset that PGShmemHeader->content_offset points to.  Allocations by
      82             :  * ShmemAlloc() are carved out of the space after this.
      83             :  *
      84             :  * For the base pointer and the total size of the shmem segment, we rely on
      85             :  * the PGShmemHeader.
      86             :  */
      87             : typedef struct ShmemAllocatorData
      88             : {
      89             :     Size        free_offset;    /* offset to first free space from ShmemBase */
      90             :     HTAB       *index;          /* copy of ShmemIndex */
      91             : 
      92             :     /* protects shared memory and LWLock allocation */
      93             :     slock_t     shmem_lock;
      94             : } ShmemAllocatorData;
      95             : 
      96             : static void *ShmemAllocRaw(Size size, Size *allocated_size);
      97             : 
      98             : /* shared memory global variables */
      99             : 
     100             : static PGShmemHeader *ShmemSegHdr;  /* shared mem segment header */
     101             : static void *ShmemBase;         /* start address of shared memory */
     102             : static void *ShmemEnd;          /* end+1 address of shared memory */
     103             : 
     104             : static ShmemAllocatorData *ShmemAllocator;
     105             : slock_t    *ShmemLock;          /* points to ShmemAllocator->shmem_lock */
     106             : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
     107             : 
     108             : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
     109             : static bool firstNumaTouch = true;
     110             : 
     111             : Datum       pg_numa_available(PG_FUNCTION_ARGS);
     112             : 
     113             : /*
     114             :  *  InitShmemAllocator() --- set up basic pointers to shared memory.
     115             :  *
     116             :  * Called at postmaster or stand-alone backend startup, to initialize the
     117             :  * allocator's data structure in the shared memory segment.  In EXEC_BACKEND,
     118             :  * this is also called at backend startup, to set up pointers to the shared
     119             :  * memory areas.
     120             :  */
     121             : void
     122        2280 : InitShmemAllocator(PGShmemHeader *seghdr)
     123             : {
     124             :     Assert(seghdr != NULL);
     125             : 
     126             :     /*
     127             :      * We assume the pointer and offset are MAXALIGN.  Not a hard requirement,
     128             :      * but it's true today and keeps the math below simpler.
     129             :      */
     130             :     Assert(seghdr == (void *) MAXALIGN(seghdr));
     131             :     Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
     132             : 
     133        2280 :     ShmemSegHdr = seghdr;
     134        2280 :     ShmemBase = seghdr;
     135        2280 :     ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
     136             : 
     137             : #ifndef EXEC_BACKEND
     138             :     Assert(!IsUnderPostmaster);
     139             : #endif
     140        2280 :     if (IsUnderPostmaster)
     141             :     {
     142           0 :         PGShmemHeader *shmhdr = ShmemSegHdr;
     143             : 
     144           0 :         ShmemAllocator = (ShmemAllocatorData *) ((char *) shmhdr + shmhdr->content_offset);
     145           0 :         ShmemLock = &ShmemAllocator->shmem_lock;
     146             :     }
     147             :     else
     148             :     {
     149             :         Size        offset;
     150             : 
     151             :         /*
     152             :          * Allocations after this point should go through ShmemAlloc, which
     153             :          * expects to allocate everything on cache line boundaries.  Make sure
     154             :          * the first allocation begins on a cache line boundary.
     155             :          */
     156        2280 :         offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
     157        2280 :         if (offset > seghdr->totalsize)
     158           0 :             ereport(ERROR,
     159             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     160             :                      errmsg("out of shared memory (%zu bytes requested)",
     161             :                             offset)));
     162             : 
     163        2280 :         ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
     164             : 
     165        2280 :         SpinLockInit(&ShmemAllocator->shmem_lock);
     166        2280 :         ShmemLock = &ShmemAllocator->shmem_lock;
     167        2280 :         ShmemAllocator->free_offset = offset;
     168             :         /* ShmemIndex can't be set up yet (need LWLocks first) */
     169        2280 :         ShmemAllocator->index = NULL;
     170        2280 :         ShmemIndex = (HTAB *) NULL;
     171             :     }
     172        2280 : }
     173             : 
     174             : /*
     175             :  * ShmemAlloc -- allocate max-aligned chunk from shared memory
     176             :  *
     177             :  * Throws error if request cannot be satisfied.
     178             :  *
     179             :  * Assumes ShmemLock and ShmemSegHdr are initialized.
     180             :  */
     181             : void *
     182        6846 : ShmemAlloc(Size size)
     183             : {
     184             :     void       *newSpace;
     185             :     Size        allocated_size;
     186             : 
     187        6846 :     newSpace = ShmemAllocRaw(size, &allocated_size);
     188        6846 :     if (!newSpace)
     189           0 :         ereport(ERROR,
     190             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     191             :                  errmsg("out of shared memory (%zu bytes requested)",
     192             :                         size)));
     193        6846 :     return newSpace;
     194             : }
     195             : 
     196             : /*
     197             :  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
     198             :  *
     199             :  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
     200             :  */
     201             : void *
     202      920326 : ShmemAllocNoError(Size size)
     203             : {
     204             :     Size        allocated_size;
     205             : 
     206      920326 :     return ShmemAllocRaw(size, &allocated_size);
     207             : }
     208             : 
     209             : /*
     210             :  * ShmemAllocRaw -- allocate align chunk and return allocated size
     211             :  *
     212             :  * Also sets *allocated_size to the number of bytes allocated, which will
     213             :  * be equal to the number requested plus any padding we choose to add.
     214             :  */
     215             : static void *
     216     1098170 : ShmemAllocRaw(Size size, Size *allocated_size)
     217             : {
     218             :     Size        newStart;
     219             :     Size        newFree;
     220             :     void       *newSpace;
     221             : 
     222             :     /*
     223             :      * Ensure all space is adequately aligned.  We used to only MAXALIGN this
     224             :      * space but experience has proved that on modern systems that is not good
     225             :      * enough.  Many parts of the system are very sensitive to critical data
     226             :      * structures getting split across cache line boundaries.  To avoid that,
     227             :      * attempt to align the beginning of the allocation to a cache line
     228             :      * boundary.  The calling code will still need to be careful about how it
     229             :      * uses the allocated space - e.g. by padding each element in an array of
     230             :      * structures out to a power-of-two size - but without this, even that
     231             :      * won't be sufficient.
     232             :      */
     233     1098170 :     size = CACHELINEALIGN(size);
     234     1098170 :     *allocated_size = size;
     235             : 
     236             :     Assert(ShmemSegHdr != NULL);
     237             : 
     238     1098170 :     SpinLockAcquire(ShmemLock);
     239             : 
     240     1098170 :     newStart = ShmemAllocator->free_offset;
     241             : 
     242     1098170 :     newFree = newStart + size;
     243     1098170 :     if (newFree <= ShmemSegHdr->totalsize)
     244             :     {
     245     1098170 :         newSpace = (char *) ShmemBase + newStart;
     246     1098170 :         ShmemAllocator->free_offset = newFree;
     247             :     }
     248             :     else
     249           0 :         newSpace = NULL;
     250             : 
     251     1098170 :     SpinLockRelease(ShmemLock);
     252             : 
     253             :     /* note this assert is okay with newSpace == NULL */
     254             :     Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
     255             : 
     256     1098170 :     return newSpace;
     257             : }
     258             : 
     259             : /*
     260             :  * ShmemAddrIsValid -- test if an address refers to shared memory
     261             :  *
     262             :  * Returns true if the pointer points within the shared memory segment.
     263             :  */
     264             : bool
     265           0 : ShmemAddrIsValid(const void *addr)
     266             : {
     267           0 :     return (addr >= ShmemBase) && (addr < ShmemEnd);
     268             : }
     269             : 
     270             : /*
     271             :  *  InitShmemIndex() --- set up or attach to shmem index table.
     272             :  */
     273             : void
     274        2280 : InitShmemIndex(void)
     275             : {
     276             :     HASHCTL     info;
     277             : 
     278             :     /*
     279             :      * Create the shared memory shmem index.
     280             :      *
     281             :      * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
     282             :      * hashtable to exist already, we have a bit of a circularity problem in
     283             :      * initializing the ShmemIndex itself.  The special "ShmemIndex" hash
     284             :      * table name will tell ShmemInitStruct to fake it.
     285             :      */
     286        2280 :     info.keysize = SHMEM_INDEX_KEYSIZE;
     287        2280 :     info.entrysize = sizeof(ShmemIndexEnt);
     288             : 
     289        2280 :     ShmemIndex = ShmemInitHash("ShmemIndex",
     290             :                                SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
     291             :                                &info,
     292             :                                HASH_ELEM | HASH_STRINGS);
     293        2280 : }
     294             : 
     295             : /*
     296             :  * ShmemInitHash -- Create and initialize, or attach to, a
     297             :  *      shared memory hash table.
     298             :  *
     299             :  * We assume caller is doing some kind of synchronization
     300             :  * so that two processes don't try to create/initialize the same
     301             :  * table at once.  (In practice, all creations are done in the postmaster
     302             :  * process; child processes should always be attaching to existing tables.)
     303             :  *
     304             :  * max_size is the estimated maximum number of hashtable entries.  This is
     305             :  * not a hard limit, but the access efficiency will degrade if it is
     306             :  * exceeded substantially (since it's used to compute directory size and
     307             :  * the hash table buckets will get overfull).
     308             :  *
     309             :  * init_size is the number of hashtable entries to preallocate.  For a table
     310             :  * whose maximum size is certain, this should be equal to max_size; that
     311             :  * ensures that no run-time out-of-shared-memory failures can occur.
     312             :  *
     313             :  * *infoP and hash_flags must specify at least the entry sizes and key
     314             :  * comparison semantics (see hash_create()).  Flag bits and values specific
     315             :  * to shared-memory hash tables are added here, except that callers may
     316             :  * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
     317             :  *
     318             :  * Note: before Postgres 9.0, this function returned NULL for some failure
     319             :  * cases.  Now, it always throws error instead, so callers need not check
     320             :  * for NULL.
     321             :  */
     322             : HTAB *
     323       20534 : ShmemInitHash(const char *name,     /* table string name for shmem index */
     324             :               int64 init_size,  /* initial table size */
     325             :               int64 max_size,   /* max size of the table */
     326             :               HASHCTL *infoP,   /* info about key and bucket size */
     327             :               int hash_flags)   /* info about infoP */
     328             : {
     329             :     bool        found;
     330             :     void       *location;
     331             : 
     332             :     /*
     333             :      * Hash tables allocated in shared memory have a fixed directory; it can't
     334             :      * grow or other backends wouldn't be able to find it. So, make sure we
     335             :      * make it big enough to start with.
     336             :      *
     337             :      * The shared memory allocator must be specified too.
     338             :      */
     339       20534 :     infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
     340       20534 :     infoP->alloc = ShmemAllocNoError;
     341       20534 :     hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
     342             : 
     343             :     /* look it up in the shmem index */
     344       20534 :     location = ShmemInitStruct(name,
     345             :                                hash_get_shared_size(infoP, hash_flags),
     346             :                                &found);
     347             : 
     348             :     /*
     349             :      * if it already exists, attach to it rather than allocate and initialize
     350             :      * new space
     351             :      */
     352       20534 :     if (found)
     353           0 :         hash_flags |= HASH_ATTACH;
     354             : 
     355             :     /* Pass location of hashtable header to hash_create */
     356       20534 :     infoP->hctl = (HASHHDR *) location;
     357             : 
     358       20534 :     return hash_create(name, init_size, infoP, hash_flags);
     359             : }
     360             : 
     361             : /*
     362             :  * ShmemInitStruct -- Create/attach to a structure in shared memory.
     363             :  *
     364             :  *      This is called during initialization to find or allocate
     365             :  *      a data structure in shared memory.  If no other process
     366             :  *      has created the structure, this routine allocates space
     367             :  *      for it.  If it exists already, a pointer to the existing
     368             :  *      structure is returned.
     369             :  *
     370             :  *  Returns: pointer to the object.  *foundPtr is set true if the object was
     371             :  *      already in the shmem index (hence, already initialized).
     372             :  *
     373             :  *  Note: before Postgres 9.0, this function returned NULL for some failure
     374             :  *  cases.  Now, it always throws error instead, so callers need not check
     375             :  *  for NULL.
     376             :  */
     377             : void *
     378      173278 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
     379             : {
     380             :     ShmemIndexEnt *result;
     381             :     void       *structPtr;
     382             : 
     383      173278 :     LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
     384             : 
     385      173278 :     if (!ShmemIndex)
     386             :     {
     387             :         /* Must be trying to create/attach to ShmemIndex itself */
     388             :         Assert(strcmp(name, "ShmemIndex") == 0);
     389             : 
     390        2280 :         if (IsUnderPostmaster)
     391             :         {
     392             :             /* Must be initializing a (non-standalone) backend */
     393             :             Assert(ShmemAllocator->index != NULL);
     394           0 :             structPtr = ShmemAllocator->index;
     395           0 :             *foundPtr = true;
     396             :         }
     397             :         else
     398             :         {
     399             :             /*
     400             :              * If the shmem index doesn't exist, we are bootstrapping: we must
     401             :              * be trying to init the shmem index itself.
     402             :              *
     403             :              * Notice that the ShmemIndexLock is released before the shmem
     404             :              * index has been initialized.  This should be OK because no other
     405             :              * process can be accessing shared memory yet.
     406             :              */
     407             :             Assert(ShmemAllocator->index == NULL);
     408        2280 :             structPtr = ShmemAlloc(size);
     409        2280 :             ShmemAllocator->index = structPtr;
     410        2280 :             *foundPtr = false;
     411             :         }
     412        2280 :         LWLockRelease(ShmemIndexLock);
     413        2280 :         return structPtr;
     414             :     }
     415             : 
     416             :     /* look it up in the shmem index */
     417             :     result = (ShmemIndexEnt *)
     418      170998 :         hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
     419             : 
     420      170998 :     if (!result)
     421             :     {
     422           0 :         LWLockRelease(ShmemIndexLock);
     423           0 :         ereport(ERROR,
     424             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     425             :                  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
     426             :                         name)));
     427             :     }
     428             : 
     429      170998 :     if (*foundPtr)
     430             :     {
     431             :         /*
     432             :          * Structure is in the shmem index so someone else has allocated it
     433             :          * already.  The size better be the same as the size we are trying to
     434             :          * initialize to, or there is a name conflict (or worse).
     435             :          */
     436           0 :         if (result->size != size)
     437             :         {
     438           0 :             LWLockRelease(ShmemIndexLock);
     439           0 :             ereport(ERROR,
     440             :                     (errmsg("ShmemIndex entry size is wrong for data structure"
     441             :                             " \"%s\": expected %zu, actual %zu",
     442             :                             name, size, result->size)));
     443             :         }
     444           0 :         structPtr = result->location;
     445             :     }
     446             :     else
     447             :     {
     448             :         Size        allocated_size;
     449             : 
     450             :         /* It isn't in the table yet. allocate and initialize it */
     451      170998 :         structPtr = ShmemAllocRaw(size, &allocated_size);
     452      170998 :         if (structPtr == NULL)
     453             :         {
     454             :             /* out of memory; remove the failed ShmemIndex entry */
     455           0 :             hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
     456           0 :             LWLockRelease(ShmemIndexLock);
     457           0 :             ereport(ERROR,
     458             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     459             :                      errmsg("not enough shared memory for data structure"
     460             :                             " \"%s\" (%zu bytes requested)",
     461             :                             name, size)));
     462             :         }
     463      170998 :         result->size = size;
     464      170998 :         result->allocated_size = allocated_size;
     465      170998 :         result->location = structPtr;
     466             :     }
     467             : 
     468      170998 :     LWLockRelease(ShmemIndexLock);
     469             : 
     470             :     Assert(ShmemAddrIsValid(structPtr));
     471             : 
     472             :     Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
     473             : 
     474      170998 :     return structPtr;
     475             : }
     476             : 
     477             : 
     478             : /*
     479             :  * Add two Size values, checking for overflow
     480             :  */
     481             : Size
     482     1139824 : add_size(Size s1, Size s2)
     483             : {
     484             :     Size        result;
     485             : 
     486     1139824 :     if (pg_add_size_overflow(s1, s2, &result))
     487           0 :         ereport(ERROR,
     488             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     489             :                  errmsg("requested shared memory size overflows size_t")));
     490     1139824 :     return result;
     491             : }
     492             : 
     493             : /*
     494             :  * Multiply two Size values, checking for overflow
     495             :  */
     496             : Size
     497      534232 : mul_size(Size s1, Size s2)
     498             : {
     499             :     Size        result;
     500             : 
     501      534232 :     if (pg_mul_size_overflow(s1, s2, &result))
     502           0 :         ereport(ERROR,
     503             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     504             :                  errmsg("requested shared memory size overflows size_t")));
     505      534232 :     return result;
     506             : }
     507             : 
     508             : /* SQL SRF showing allocated shared memory */
     509             : Datum
     510           6 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
     511             : {
     512             : #define PG_GET_SHMEM_SIZES_COLS 4
     513           6 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     514             :     HASH_SEQ_STATUS hstat;
     515             :     ShmemIndexEnt *ent;
     516           6 :     Size        named_allocated = 0;
     517             :     Datum       values[PG_GET_SHMEM_SIZES_COLS];
     518             :     bool        nulls[PG_GET_SHMEM_SIZES_COLS];
     519             : 
     520           6 :     InitMaterializedSRF(fcinfo, 0);
     521             : 
     522           6 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     523             : 
     524           6 :     hash_seq_init(&hstat, ShmemIndex);
     525             : 
     526             :     /* output all allocated entries */
     527           6 :     memset(nulls, 0, sizeof(nulls));
     528         460 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     529             :     {
     530         454 :         values[0] = CStringGetTextDatum(ent->key);
     531         454 :         values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
     532         454 :         values[2] = Int64GetDatum(ent->size);
     533         454 :         values[3] = Int64GetDatum(ent->allocated_size);
     534         454 :         named_allocated += ent->allocated_size;
     535             : 
     536         454 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     537             :                              values, nulls);
     538             :     }
     539             : 
     540             :     /* output shared memory allocated but not counted via the shmem index */
     541           6 :     values[0] = CStringGetTextDatum("<anonymous>");
     542           6 :     nulls[1] = true;
     543           6 :     values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
     544           6 :     values[3] = values[2];
     545           6 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     546             : 
     547             :     /* output as-of-yet unused shared memory */
     548           6 :     nulls[0] = true;
     549           6 :     values[1] = Int64GetDatum(ShmemAllocator->free_offset);
     550           6 :     nulls[1] = false;
     551           6 :     values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
     552           6 :     values[3] = values[2];
     553           6 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     554             : 
     555           6 :     LWLockRelease(ShmemIndexLock);
     556             : 
     557           6 :     return (Datum) 0;
     558             : }
     559             : 
     560             : /*
     561             :  * SQL SRF showing NUMA memory nodes for allocated shared memory
     562             :  *
     563             :  * Compared to pg_get_shmem_allocations(), this function does not return
     564             :  * information about shared anonymous allocations and unused shared memory.
     565             :  */
     566             : Datum
     567           6 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
     568             : {
     569             : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
     570           6 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     571             :     HASH_SEQ_STATUS hstat;
     572             :     ShmemIndexEnt *ent;
     573             :     Datum       values[PG_GET_SHMEM_NUMA_SIZES_COLS];
     574             :     bool        nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
     575             :     Size        os_page_size;
     576             :     void      **page_ptrs;
     577             :     int        *pages_status;
     578             :     uint64      shm_total_page_count,
     579             :                 shm_ent_page_count,
     580             :                 max_nodes;
     581             :     Size       *nodes;
     582             : 
     583           6 :     if (pg_numa_init() == -1)
     584           6 :         elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
     585             : 
     586           0 :     InitMaterializedSRF(fcinfo, 0);
     587             : 
     588           0 :     max_nodes = pg_numa_get_max_node();
     589           0 :     nodes = palloc_array(Size, max_nodes + 2);
     590             : 
     591             :     /*
     592             :      * Shared memory allocations can vary in size and may not align with OS
     593             :      * memory page boundaries, while NUMA queries work on pages.
     594             :      *
     595             :      * To correctly map each allocation to NUMA nodes, we need to: 1.
     596             :      * Determine the OS memory page size. 2. Align each allocation's start/end
     597             :      * addresses to page boundaries. 3. Query NUMA node information for all
     598             :      * pages spanning the allocation.
     599             :      */
     600           0 :     os_page_size = pg_get_shmem_pagesize();
     601             : 
     602             :     /*
     603             :      * Allocate memory for page pointers and status based on total shared
     604             :      * memory size. This simplified approach allocates enough space for all
     605             :      * pages in shared memory rather than calculating the exact requirements
     606             :      * for each segment.
     607             :      *
     608             :      * Add 1, because we don't know how exactly the segments align to OS
     609             :      * pages, so the allocation might use one more memory page. In practice
     610             :      * this is not very likely, and moreover we have more entries, each of
     611             :      * them using only fraction of the total pages.
     612             :      */
     613           0 :     shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
     614           0 :     page_ptrs = palloc0_array(void *, shm_total_page_count);
     615           0 :     pages_status = palloc_array(int, shm_total_page_count);
     616             : 
     617           0 :     if (firstNumaTouch)
     618           0 :         elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
     619             : 
     620           0 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     621             : 
     622           0 :     hash_seq_init(&hstat, ShmemIndex);
     623             : 
     624             :     /* output all allocated entries */
     625           0 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     626             :     {
     627             :         int         i;
     628             :         char       *startptr,
     629             :                    *endptr;
     630             :         Size        total_len;
     631             : 
     632             :         /*
     633             :          * Calculate the range of OS pages used by this segment. The segment
     634             :          * may start / end half-way through a page, we want to count these
     635             :          * pages too. So we align the start/end pointers down/up, and then
     636             :          * calculate the number of pages from that.
     637             :          */
     638           0 :         startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
     639           0 :         endptr = (char *) TYPEALIGN(os_page_size,
     640             :                                     (char *) ent->location + ent->allocated_size);
     641           0 :         total_len = (endptr - startptr);
     642             : 
     643           0 :         shm_ent_page_count = total_len / os_page_size;
     644             : 
     645             :         /*
     646             :          * If we ever get 0xff (-1) back from kernel inquiry, then we probably
     647             :          * have a bug in mapping buffers to OS pages.
     648             :          */
     649           0 :         memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
     650             : 
     651             :         /*
     652             :          * Setup page_ptrs[] with pointers to all OS pages for this segment,
     653             :          * and get the NUMA status using pg_numa_query_pages.
     654             :          *
     655             :          * In order to get reliable results we also need to touch memory
     656             :          * pages, so that inquiry about NUMA memory node doesn't return -2
     657             :          * (ENOENT, which indicates unmapped/unallocated pages).
     658             :          */
     659           0 :         for (i = 0; i < shm_ent_page_count; i++)
     660             :         {
     661           0 :             page_ptrs[i] = startptr + (i * os_page_size);
     662             : 
     663           0 :             if (firstNumaTouch)
     664             :                 pg_numa_touch_mem_if_required(page_ptrs[i]);
     665             : 
     666           0 :             CHECK_FOR_INTERRUPTS();
     667             :         }
     668             : 
     669           0 :         if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
     670           0 :             elog(ERROR, "failed NUMA pages inquiry status: %m");
     671             : 
     672             :         /* Count number of NUMA nodes used for this shared memory entry */
     673           0 :         memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
     674             : 
     675           0 :         for (i = 0; i < shm_ent_page_count; i++)
     676             :         {
     677           0 :             int         s = pages_status[i];
     678             : 
     679             :             /* Ensure we are adding only valid index to the array */
     680           0 :             if (s >= 0 && s <= max_nodes)
     681             :             {
     682             :                 /* valid NUMA node */
     683           0 :                 nodes[s]++;
     684           0 :                 continue;
     685             :             }
     686           0 :             else if (s == -2)
     687             :             {
     688             :                 /* -2 means ENOENT (e.g. page was moved to swap) */
     689           0 :                 nodes[max_nodes + 1]++;
     690           0 :                 continue;
     691             :             }
     692             : 
     693           0 :             elog(ERROR, "invalid NUMA node id outside of allowed range "
     694             :                  "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
     695             :         }
     696             : 
     697             :         /* no NULLs for regular nodes */
     698           0 :         memset(nulls, 0, sizeof(nulls));
     699             : 
     700             :         /*
     701             :          * Add one entry for each NUMA node, including those without allocated
     702             :          * memory for this segment.
     703             :          */
     704           0 :         for (i = 0; i <= max_nodes; i++)
     705             :         {
     706           0 :             values[0] = CStringGetTextDatum(ent->key);
     707           0 :             values[1] = Int32GetDatum(i);
     708           0 :             values[2] = Int64GetDatum(nodes[i] * os_page_size);
     709             : 
     710           0 :             tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     711             :                                  values, nulls);
     712             :         }
     713             : 
     714             :         /* The last entry is used for pages without a NUMA node. */
     715           0 :         nulls[1] = true;
     716           0 :         values[0] = CStringGetTextDatum(ent->key);
     717           0 :         values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
     718             : 
     719           0 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     720             :                              values, nulls);
     721             :     }
     722             : 
     723           0 :     LWLockRelease(ShmemIndexLock);
     724           0 :     firstNumaTouch = false;
     725             : 
     726           0 :     return (Datum) 0;
     727             : }
     728             : 
     729             : /*
     730             :  * Determine the memory page size used for the shared memory segment.
     731             :  *
     732             :  * If the shared segment was allocated using huge pages, returns the size of
     733             :  * a huge page. Otherwise returns the size of regular memory page.
     734             :  *
     735             :  * This should be used only after the server is started.
     736             :  */
     737             : Size
     738           4 : pg_get_shmem_pagesize(void)
     739             : {
     740             :     Size        os_page_size;
     741             : #ifdef WIN32
     742             :     SYSTEM_INFO sysinfo;
     743             : 
     744             :     GetSystemInfo(&sysinfo);
     745             :     os_page_size = sysinfo.dwPageSize;
     746             : #else
     747           4 :     os_page_size = sysconf(_SC_PAGESIZE);
     748             : #endif
     749             : 
     750             :     Assert(IsUnderPostmaster);
     751             :     Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
     752             : 
     753           4 :     if (huge_pages_status == HUGE_PAGES_ON)
     754           0 :         GetHugePageSize(&os_page_size, NULL);
     755             : 
     756           4 :     return os_page_size;
     757             : }
     758             : 
     759             : Datum
     760           8 : pg_numa_available(PG_FUNCTION_ARGS)
     761             : {
     762           8 :     PG_RETURN_BOOL(pg_numa_init() != -1);
     763             : }

Generated by: LCOV version 1.16