LCOV - PostgreSQL 19devel - src/backend/storage/ipc/shmem.c

LCOV - code coverage report

Current view:	top level - src/backend/storage/ipc - shmem.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	116	178	65.2 %
Date:	2025-07-29 04:18:44	Functions:	14	16	87.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * shmem.c
       4             :  *    create shared memory and initialize shared memory data structures.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/ipc/shmem.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : /*
      16             :  * POSTGRES processes share one or more regions of shared memory.
      17             :  * The shared memory is created by a postmaster and is inherited
      18             :  * by each backend via fork() (or, in some ports, via other OS-specific
      19             :  * methods).  The routines in this file are used for allocating and
      20             :  * binding to shared memory data structures.
      21             :  *
      22             :  * NOTES:
      23             :  *      (a) There are three kinds of shared memory data structures
      24             :  *  available to POSTGRES: fixed-size structures, queues and hash
      25             :  *  tables.  Fixed-size structures contain things like global variables
      26             :  *  for a module and should never be allocated after the shared memory
      27             :  *  initialization phase.  Hash tables have a fixed maximum size, but
      28             :  *  their actual size can vary dynamically.  When entries are added
      29             :  *  to the table, more space is allocated.  Queues link data structures
      30             :  *  that have been allocated either within fixed-size structures or as hash
      31             :  *  buckets.  Each shared data structure has a string name to identify
      32             :  *  it (assigned in the module that declares it).
      33             :  *
      34             :  *      (b) During initialization, each module looks for its
      35             :  *  shared data structures in a hash table called the "Shmem Index".
      36             :  *  If the data structure is not present, the caller can allocate
      37             :  *  a new one and initialize it.  If the data structure is present,
      38             :  *  the caller "attaches" to the structure by initializing a pointer
      39             :  *  in the local address space.
      40             :  *      The shmem index has two purposes: first, it gives us
      41             :  *  a simple model of how the world looks when a backend process
      42             :  *  initializes.  If something is present in the shmem index,
      43             :  *  it is initialized.  If it is not, it is uninitialized.  Second,
      44             :  *  the shmem index allows us to allocate shared memory on demand
      45             :  *  instead of trying to preallocate structures and hard-wire the
      46             :  *  sizes and locations in header files.  If you are using a lot
      47             :  *  of shared memory in a lot of different places (and changing
      48             :  *  things during development), this is important.
      49             :  *
      50             :  *      (c) In standard Unix-ish environments, individual backends do not
      51             :  *  need to re-establish their local pointers into shared memory, because
      52             :  *  they inherit correct values of those variables via fork() from the
      53             :  *  postmaster.  However, this does not work in the EXEC_BACKEND case.
      54             :  *  In ports using EXEC_BACKEND, new backends have to set up their local
      55             :  *  pointers using the method described in (b) above.
      56             :  *
      57             :  *      (d) memory allocation model: shared memory can never be
      58             :  *  freed, once allocated.   Each hash table has its own free list,
      59             :  *  so hash buckets can be reused when an item is deleted.  However,
      60             :  *  if one hash table grows very large and then shrinks, its space
      61             :  *  cannot be redistributed to other tables.  We could build a simple
      62             :  *  hash bucket garbage collector if need be.  Right now, it seems
      63             :  *  unnecessary.
      64             :  */
      65             : 
      66             : #include "postgres.h"
      67             : 
      68             : #include "fmgr.h"
      69             : #include "funcapi.h"
      70             : #include "miscadmin.h"
      71             : #include "port/pg_numa.h"
      72             : #include "storage/lwlock.h"
      73             : #include "storage/pg_shmem.h"
      74             : #include "storage/shmem.h"
      75             : #include "storage/spin.h"
      76             : #include "utils/builtins.h"
      77             : 
      78             : static void *ShmemAllocRaw(Size size, Size *allocated_size);
      79             : 
      80             : /* shared memory global variables */
      81             : 
      82             : static PGShmemHeader *ShmemSegHdr;  /* shared mem segment header */
      83             : 
      84             : static void *ShmemBase;         /* start address of shared memory */
      85             : 
      86             : static void *ShmemEnd;          /* end+1 address of shared memory */
      87             : 
      88             : slock_t    *ShmemLock;          /* spinlock for shared memory and LWLock
      89             :                                  * allocation */
      90             : 
      91             : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
      92             : 
      93             : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
      94             : static bool firstNumaTouch = true;
      95             : 
      96             : Datum       pg_numa_available(PG_FUNCTION_ARGS);
      97             : 
      98             : /*
      99             :  *  InitShmemAccess() --- set up basic pointers to shared memory.
     100             :  */
     101             : void
     102        2152 : InitShmemAccess(PGShmemHeader *seghdr)
     103             : {
     104        2152 :     ShmemSegHdr = seghdr;
     105        2152 :     ShmemBase = seghdr;
     106        2152 :     ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
     107        2152 : }
     108             : 
     109             : /*
     110             :  *  InitShmemAllocation() --- set up shared-memory space allocation.
     111             :  *
     112             :  * This should be called only in the postmaster or a standalone backend.
     113             :  */
     114             : void
     115        2152 : InitShmemAllocation(void)
     116             : {
     117        2152 :     PGShmemHeader *shmhdr = ShmemSegHdr;
     118             :     char       *aligned;
     119             : 
     120             :     Assert(shmhdr != NULL);
     121             : 
     122             :     /*
     123             :      * Initialize the spinlock used by ShmemAlloc.  We must use
     124             :      * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
     125             :      */
     126        2152 :     ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
     127             : 
     128        2152 :     SpinLockInit(ShmemLock);
     129             : 
     130             :     /*
     131             :      * Allocations after this point should go through ShmemAlloc, which
     132             :      * expects to allocate everything on cache line boundaries.  Make sure the
     133             :      * first allocation begins on a cache line boundary.
     134             :      */
     135        2152 :     aligned = (char *)
     136        2152 :         (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
     137        2152 :     shmhdr->freeoffset = aligned - (char *) shmhdr;
     138             : 
     139             :     /* ShmemIndex can't be set up yet (need LWLocks first) */
     140        2152 :     shmhdr->index = NULL;
     141        2152 :     ShmemIndex = (HTAB *) NULL;
     142        2152 : }
     143             : 
     144             : /*
     145             :  * ShmemAlloc -- allocate max-aligned chunk from shared memory
     146             :  *
     147             :  * Throws error if request cannot be satisfied.
     148             :  *
     149             :  * Assumes ShmemLock and ShmemSegHdr are initialized.
     150             :  */
     151             : void *
     152        4312 : ShmemAlloc(Size size)
     153             : {
     154             :     void       *newSpace;
     155             :     Size        allocated_size;
     156             : 
     157        4312 :     newSpace = ShmemAllocRaw(size, &allocated_size);
     158        4312 :     if (!newSpace)
     159           0 :         ereport(ERROR,
     160             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     161             :                  errmsg("out of shared memory (%zu bytes requested)",
     162             :                         size)));
     163        4312 :     return newSpace;
     164             : }
     165             : 
     166             : /*
     167             :  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
     168             :  *
     169             :  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
     170             :  */
     171             : void *
     172      879806 : ShmemAllocNoError(Size size)
     173             : {
     174             :     Size        allocated_size;
     175             : 
     176      879806 :     return ShmemAllocRaw(size, &allocated_size);
     177             : }
     178             : 
     179             : /*
     180             :  * ShmemAllocRaw -- allocate align chunk and return allocated size
     181             :  *
     182             :  * Also sets *allocated_size to the number of bytes allocated, which will
     183             :  * be equal to the number requested plus any padding we choose to add.
     184             :  */
     185             : static void *
     186     1041210 : ShmemAllocRaw(Size size, Size *allocated_size)
     187             : {
     188             :     Size        newStart;
     189             :     Size        newFree;
     190             :     void       *newSpace;
     191             : 
     192             :     /*
     193             :      * Ensure all space is adequately aligned.  We used to only MAXALIGN this
     194             :      * space but experience has proved that on modern systems that is not good
     195             :      * enough.  Many parts of the system are very sensitive to critical data
     196             :      * structures getting split across cache line boundaries.  To avoid that,
     197             :      * attempt to align the beginning of the allocation to a cache line
     198             :      * boundary.  The calling code will still need to be careful about how it
     199             :      * uses the allocated space - e.g. by padding each element in an array of
     200             :      * structures out to a power-of-two size - but without this, even that
     201             :      * won't be sufficient.
     202             :      */
     203     1041210 :     size = CACHELINEALIGN(size);
     204     1041210 :     *allocated_size = size;
     205             : 
     206             :     Assert(ShmemSegHdr != NULL);
     207             : 
     208     1041210 :     SpinLockAcquire(ShmemLock);
     209             : 
     210     1041210 :     newStart = ShmemSegHdr->freeoffset;
     211             : 
     212     1041210 :     newFree = newStart + size;
     213     1041210 :     if (newFree <= ShmemSegHdr->totalsize)
     214             :     {
     215     1041210 :         newSpace = (char *) ShmemBase + newStart;
     216     1041210 :         ShmemSegHdr->freeoffset = newFree;
     217             :     }
     218             :     else
     219           0 :         newSpace = NULL;
     220             : 
     221     1041210 :     SpinLockRelease(ShmemLock);
     222             : 
     223             :     /* note this assert is okay with newSpace == NULL */
     224             :     Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
     225             : 
     226     1041210 :     return newSpace;
     227             : }
     228             : 
     229             : /*
     230             :  * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
     231             :  *
     232             :  * Allocate space without locking ShmemLock.  This should be used for,
     233             :  * and only for, allocations that must happen before ShmemLock is ready.
     234             :  *
     235             :  * We consider maxalign, rather than cachealign, sufficient here.
     236             :  */
     237             : void *
     238        4304 : ShmemAllocUnlocked(Size size)
     239             : {
     240             :     Size        newStart;
     241             :     Size        newFree;
     242             :     void       *newSpace;
     243             : 
     244             :     /*
     245             :      * Ensure allocated space is adequately aligned.
     246             :      */
     247        4304 :     size = MAXALIGN(size);
     248             : 
     249             :     Assert(ShmemSegHdr != NULL);
     250             : 
     251        4304 :     newStart = ShmemSegHdr->freeoffset;
     252             : 
     253        4304 :     newFree = newStart + size;
     254        4304 :     if (newFree > ShmemSegHdr->totalsize)
     255           0 :         ereport(ERROR,
     256             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     257             :                  errmsg("out of shared memory (%zu bytes requested)",
     258             :                         size)));
     259        4304 :     ShmemSegHdr->freeoffset = newFree;
     260             : 
     261        4304 :     newSpace = (char *) ShmemBase + newStart;
     262             : 
     263             :     Assert(newSpace == (void *) MAXALIGN(newSpace));
     264             : 
     265        4304 :     return newSpace;
     266             : }
     267             : 
     268             : /*
     269             :  * ShmemAddrIsValid -- test if an address refers to shared memory
     270             :  *
     271             :  * Returns true if the pointer points within the shared memory segment.
     272             :  */
     273             : bool
     274           0 : ShmemAddrIsValid(const void *addr)
     275             : {
     276           0 :     return (addr >= ShmemBase) && (addr < ShmemEnd);
     277             : }
     278             : 
     279             : /*
     280             :  *  InitShmemIndex() --- set up or attach to shmem index table.
     281             :  */
     282             : void
     283        2152 : InitShmemIndex(void)
     284             : {
     285             :     HASHCTL     info;
     286             : 
     287             :     /*
     288             :      * Create the shared memory shmem index.
     289             :      *
     290             :      * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
     291             :      * hashtable to exist already, we have a bit of a circularity problem in
     292             :      * initializing the ShmemIndex itself.  The special "ShmemIndex" hash
     293             :      * table name will tell ShmemInitStruct to fake it.
     294             :      */
     295        2152 :     info.keysize = SHMEM_INDEX_KEYSIZE;
     296        2152 :     info.entrysize = sizeof(ShmemIndexEnt);
     297             : 
     298        2152 :     ShmemIndex = ShmemInitHash("ShmemIndex",
     299             :                                SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
     300             :                                &info,
     301             :                                HASH_ELEM | HASH_STRINGS);
     302        2152 : }
     303             : 
     304             : /*
     305             :  * ShmemInitHash -- Create and initialize, or attach to, a
     306             :  *      shared memory hash table.
     307             :  *
     308             :  * We assume caller is doing some kind of synchronization
     309             :  * so that two processes don't try to create/initialize the same
     310             :  * table at once.  (In practice, all creations are done in the postmaster
     311             :  * process; child processes should always be attaching to existing tables.)
     312             :  *
     313             :  * max_size is the estimated maximum number of hashtable entries.  This is
     314             :  * not a hard limit, but the access efficiency will degrade if it is
     315             :  * exceeded substantially (since it's used to compute directory size and
     316             :  * the hash table buckets will get overfull).
     317             :  *
     318             :  * init_size is the number of hashtable entries to preallocate.  For a table
     319             :  * whose maximum size is certain, this should be equal to max_size; that
     320             :  * ensures that no run-time out-of-shared-memory failures can occur.
     321             :  *
     322             :  * *infoP and hash_flags must specify at least the entry sizes and key
     323             :  * comparison semantics (see hash_create()).  Flag bits and values specific
     324             :  * to shared-memory hash tables are added here, except that callers may
     325             :  * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
     326             :  *
     327             :  * Note: before Postgres 9.0, this function returned NULL for some failure
     328             :  * cases.  Now, it always throws error instead, so callers need not check
     329             :  * for NULL.
     330             :  */
     331             : HTAB *
     332       19382 : ShmemInitHash(const char *name,     /* table string name for shmem index */
     333             :               long init_size,   /* initial table size */
     334             :               long max_size,    /* max size of the table */
     335             :               HASHCTL *infoP,   /* info about key and bucket size */
     336             :               int hash_flags)   /* info about infoP */
     337             : {
     338             :     bool        found;
     339             :     void       *location;
     340             : 
     341             :     /*
     342             :      * Hash tables allocated in shared memory have a fixed directory; it can't
     343             :      * grow or other backends wouldn't be able to find it. So, make sure we
     344             :      * make it big enough to start with.
     345             :      *
     346             :      * The shared memory allocator must be specified too.
     347             :      */
     348       19382 :     infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
     349       19382 :     infoP->alloc = ShmemAllocNoError;
     350       19382 :     hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
     351             : 
     352             :     /* look it up in the shmem index */
     353       19382 :     location = ShmemInitStruct(name,
     354             :                                hash_get_shared_size(infoP, hash_flags),
     355             :                                &found);
     356             : 
     357             :     /*
     358             :      * if it already exists, attach to it rather than allocate and initialize
     359             :      * new space
     360             :      */
     361       19382 :     if (found)
     362           0 :         hash_flags |= HASH_ATTACH;
     363             : 
     364             :     /* Pass location of hashtable header to hash_create */
     365       19382 :     infoP->hctl = (HASHHDR *) location;
     366             : 
     367       19382 :     return hash_create(name, init_size, infoP, hash_flags);
     368             : }
     369             : 
     370             : /*
     371             :  * ShmemInitStruct -- Create/attach to a structure in shared memory.
     372             :  *
     373             :  *      This is called during initialization to find or allocate
     374             :  *      a data structure in shared memory.  If no other process
     375             :  *      has created the structure, this routine allocates space
     376             :  *      for it.  If it exists already, a pointer to the existing
     377             :  *      structure is returned.
     378             :  *
     379             :  *  Returns: pointer to the object.  *foundPtr is set true if the object was
     380             :  *      already in the shmem index (hence, already initialized).
     381             :  *
     382             :  *  Note: before Postgres 9.0, this function returned NULL for some failure
     383             :  *  cases.  Now, it always throws error instead, so callers need not check
     384             :  *  for NULL.
     385             :  */
     386             : void *
     387      159244 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
     388             : {
     389             :     ShmemIndexEnt *result;
     390             :     void       *structPtr;
     391             : 
     392      159244 :     LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
     393             : 
     394      159244 :     if (!ShmemIndex)
     395             :     {
     396        2152 :         PGShmemHeader *shmemseghdr = ShmemSegHdr;
     397             : 
     398             :         /* Must be trying to create/attach to ShmemIndex itself */
     399             :         Assert(strcmp(name, "ShmemIndex") == 0);
     400             : 
     401        2152 :         if (IsUnderPostmaster)
     402             :         {
     403             :             /* Must be initializing a (non-standalone) backend */
     404             :             Assert(shmemseghdr->index != NULL);
     405           0 :             structPtr = shmemseghdr->index;
     406           0 :             *foundPtr = true;
     407             :         }
     408             :         else
     409             :         {
     410             :             /*
     411             :              * If the shmem index doesn't exist, we are bootstrapping: we must
     412             :              * be trying to init the shmem index itself.
     413             :              *
     414             :              * Notice that the ShmemIndexLock is released before the shmem
     415             :              * index has been initialized.  This should be OK because no other
     416             :              * process can be accessing shared memory yet.
     417             :              */
     418             :             Assert(shmemseghdr->index == NULL);
     419        2152 :             structPtr = ShmemAlloc(size);
     420        2152 :             shmemseghdr->index = structPtr;
     421        2152 :             *foundPtr = false;
     422             :         }
     423        2152 :         LWLockRelease(ShmemIndexLock);
     424        2152 :         return structPtr;
     425             :     }
     426             : 
     427             :     /* look it up in the shmem index */
     428             :     result = (ShmemIndexEnt *)
     429      157092 :         hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
     430             : 
     431      157092 :     if (!result)
     432             :     {
     433           0 :         LWLockRelease(ShmemIndexLock);
     434           0 :         ereport(ERROR,
     435             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     436             :                  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
     437             :                         name)));
     438             :     }
     439             : 
     440      157092 :     if (*foundPtr)
     441             :     {
     442             :         /*
     443             :          * Structure is in the shmem index so someone else has allocated it
     444             :          * already.  The size better be the same as the size we are trying to
     445             :          * initialize to, or there is a name conflict (or worse).
     446             :          */
     447           0 :         if (result->size != size)
     448             :         {
     449           0 :             LWLockRelease(ShmemIndexLock);
     450           0 :             ereport(ERROR,
     451             :                     (errmsg("ShmemIndex entry size is wrong for data structure"
     452             :                             " \"%s\": expected %zu, actual %zu",
     453             :                             name, size, result->size)));
     454             :         }
     455           0 :         structPtr = result->location;
     456             :     }
     457             :     else
     458             :     {
     459             :         Size        allocated_size;
     460             : 
     461             :         /* It isn't in the table yet. allocate and initialize it */
     462      157092 :         structPtr = ShmemAllocRaw(size, &allocated_size);
     463      157092 :         if (structPtr == NULL)
     464             :         {
     465             :             /* out of memory; remove the failed ShmemIndex entry */
     466           0 :             hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
     467           0 :             LWLockRelease(ShmemIndexLock);
     468           0 :             ereport(ERROR,
     469             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     470             :                      errmsg("not enough shared memory for data structure"
     471             :                             " \"%s\" (%zu bytes requested)",
     472             :                             name, size)));
     473             :         }
     474      157092 :         result->size = size;
     475      157092 :         result->allocated_size = allocated_size;
     476      157092 :         result->location = structPtr;
     477             :     }
     478             : 
     479      157092 :     LWLockRelease(ShmemIndexLock);
     480             : 
     481             :     Assert(ShmemAddrIsValid(structPtr));
     482             : 
     483             :     Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
     484             : 
     485      157092 :     return structPtr;
     486             : }
     487             : 
     488             : 
     489             : /*
     490             :  * Add two Size values, checking for overflow
     491             :  */
     492             : Size
     493     1028026 : add_size(Size s1, Size s2)
     494             : {
     495             :     Size        result;
     496             : 
     497     1028026 :     result = s1 + s2;
     498             :     /* We are assuming Size is an unsigned type here... */
     499     1028026 :     if (result < s1 || result < s2)
     500           0 :         ereport(ERROR,
     501             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     502             :                  errmsg("requested shared memory size overflows size_t")));
     503     1028026 :     return result;
     504             : }
     505             : 
     506             : /*
     507             :  * Multiply two Size values, checking for overflow
     508             :  */
     509             : Size
     510      483950 : mul_size(Size s1, Size s2)
     511             : {
     512             :     Size        result;
     513             : 
     514      483950 :     if (s1 == 0 || s2 == 0)
     515       22158 :         return 0;
     516      461792 :     result = s1 * s2;
     517             :     /* We are assuming Size is an unsigned type here... */
     518      461792 :     if (result / s2 != s1)
     519           0 :         ereport(ERROR,
     520             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     521             :                  errmsg("requested shared memory size overflows size_t")));
     522      461792 :     return result;
     523             : }
     524             : 
     525             : /* SQL SRF showing allocated shared memory */
     526             : Datum
     527           6 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
     528             : {
     529             : #define PG_GET_SHMEM_SIZES_COLS 4
     530           6 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     531             :     HASH_SEQ_STATUS hstat;
     532             :     ShmemIndexEnt *ent;
     533           6 :     Size        named_allocated = 0;
     534             :     Datum       values[PG_GET_SHMEM_SIZES_COLS];
     535             :     bool        nulls[PG_GET_SHMEM_SIZES_COLS];
     536             : 
     537           6 :     InitMaterializedSRF(fcinfo, 0);
     538             : 
     539           6 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     540             : 
     541           6 :     hash_seq_init(&hstat, ShmemIndex);
     542             : 
     543             :     /* output all allocated entries */
     544           6 :     memset(nulls, 0, sizeof(nulls));
     545         448 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     546             :     {
     547         442 :         values[0] = CStringGetTextDatum(ent->key);
     548         442 :         values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
     549         442 :         values[2] = Int64GetDatum(ent->size);
     550         442 :         values[3] = Int64GetDatum(ent->allocated_size);
     551         442 :         named_allocated += ent->allocated_size;
     552             : 
     553         442 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     554             :                              values, nulls);
     555             :     }
     556             : 
     557             :     /* output shared memory allocated but not counted via the shmem index */
     558           6 :     values[0] = CStringGetTextDatum("<anonymous>");
     559           6 :     nulls[1] = true;
     560           6 :     values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
     561           6 :     values[3] = values[2];
     562           6 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     563             : 
     564             :     /* output as-of-yet unused shared memory */
     565           6 :     nulls[0] = true;
     566           6 :     values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
     567           6 :     nulls[1] = false;
     568           6 :     values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
     569           6 :     values[3] = values[2];
     570           6 :     tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
     571             : 
     572           6 :     LWLockRelease(ShmemIndexLock);
     573             : 
     574           6 :     return (Datum) 0;
     575             : }
     576             : 
     577             : /*
     578             :  * SQL SRF showing NUMA memory nodes for allocated shared memory
     579             :  *
     580             :  * Compared to pg_get_shmem_allocations(), this function does not return
     581             :  * information about shared anonymous allocations and unused shared memory.
     582             :  */
     583             : Datum
     584           6 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
     585             : {
     586             : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
     587           6 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
     588             :     HASH_SEQ_STATUS hstat;
     589             :     ShmemIndexEnt *ent;
     590             :     Datum       values[PG_GET_SHMEM_NUMA_SIZES_COLS];
     591             :     bool        nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
     592             :     Size        os_page_size;
     593             :     void      **page_ptrs;
     594             :     int        *pages_status;
     595             :     uint64      shm_total_page_count,
     596             :                 shm_ent_page_count,
     597             :                 max_nodes;
     598             :     Size       *nodes;
     599             : 
     600           6 :     if (pg_numa_init() == -1)
     601           6 :         elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
     602             : 
     603           0 :     InitMaterializedSRF(fcinfo, 0);
     604             : 
     605           0 :     max_nodes = pg_numa_get_max_node();
     606           0 :     nodes = palloc(sizeof(Size) * (max_nodes + 1));
     607             : 
     608             :     /*
     609             :      * Different database block sizes (4kB, 8kB, ..., 32kB) can be used, while
     610             :      * the OS may have different memory page sizes.
     611             :      *
     612             :      * To correctly map between them, we need to: 1. Determine the OS memory
     613             :      * page size 2. Calculate how many OS pages are used by all buffer blocks
     614             :      * 3. Calculate how many OS pages are contained within each database
     615             :      * block.
     616             :      *
     617             :      * This information is needed before calling move_pages() for NUMA memory
     618             :      * node inquiry.
     619             :      */
     620           0 :     os_page_size = pg_get_shmem_pagesize();
     621             : 
     622             :     /*
     623             :      * Allocate memory for page pointers and status based on total shared
     624             :      * memory size. This simplified approach allocates enough space for all
     625             :      * pages in shared memory rather than calculating the exact requirements
     626             :      * for each segment.
     627             :      *
     628             :      * Add 1, because we don't know how exactly the segments align to OS
     629             :      * pages, so the allocation might use one more memory page. In practice
     630             :      * this is not very likely, and moreover we have more entries, each of
     631             :      * them using only fraction of the total pages.
     632             :      */
     633           0 :     shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
     634           0 :     page_ptrs = palloc0(sizeof(void *) * shm_total_page_count);
     635           0 :     pages_status = palloc(sizeof(int) * shm_total_page_count);
     636             : 
     637           0 :     if (firstNumaTouch)
     638           0 :         elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
     639             : 
     640           0 :     LWLockAcquire(ShmemIndexLock, LW_SHARED);
     641             : 
     642           0 :     hash_seq_init(&hstat, ShmemIndex);
     643             : 
     644             :     /* output all allocated entries */
     645           0 :     memset(nulls, 0, sizeof(nulls));
     646           0 :     while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
     647             :     {
     648             :         int         i;
     649             :         char       *startptr,
     650             :                    *endptr;
     651             :         Size        total_len;
     652             : 
     653             :         /*
     654             :          * Calculate the range of OS pages used by this segment. The segment
     655             :          * may start / end half-way through a page, we want to count these
     656             :          * pages too. So we align the start/end pointers down/up, and then
     657             :          * calculate the number of pages from that.
     658             :          */
     659           0 :         startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
     660           0 :         endptr = (char *) TYPEALIGN(os_page_size,
     661             :                                     (char *) ent->location + ent->allocated_size);
     662           0 :         total_len = (endptr - startptr);
     663             : 
     664           0 :         shm_ent_page_count = total_len / os_page_size;
     665             : 
     666             :         /*
     667             :          * If we ever get 0xff (-1) back from kernel inquiry, then we probably
     668             :          * have a bug in mapping buffers to OS pages.
     669             :          */
     670           0 :         memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
     671             : 
     672             :         /*
     673             :          * Setup page_ptrs[] with pointers to all OS pages for this segment,
     674             :          * and get the NUMA status using pg_numa_query_pages.
     675             :          *
     676             :          * In order to get reliable results we also need to touch memory
     677             :          * pages, so that inquiry about NUMA memory node doesn't return -2
     678             :          * (ENOENT, which indicates unmapped/unallocated pages).
     679             :          */
     680           0 :         for (i = 0; i < shm_ent_page_count; i++)
     681             :         {
     682           0 :             page_ptrs[i] = startptr + (i * os_page_size);
     683             : 
     684           0 :             if (firstNumaTouch)
     685             :                 pg_numa_touch_mem_if_required(page_ptrs[i]);
     686             : 
     687           0 :             CHECK_FOR_INTERRUPTS();
     688             :         }
     689             : 
     690           0 :         if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
     691           0 :             elog(ERROR, "failed NUMA pages inquiry status: %m");
     692             : 
     693             :         /* Count number of NUMA nodes used for this shared memory entry */
     694           0 :         memset(nodes, 0, sizeof(Size) * (max_nodes + 1));
     695             : 
     696           0 :         for (i = 0; i < shm_ent_page_count; i++)
     697             :         {
     698           0 :             int         s = pages_status[i];
     699             : 
     700             :             /* Ensure we are adding only valid index to the array */
     701           0 :             if (s < 0 || s > max_nodes)
     702             :             {
     703           0 :                 elog(ERROR, "invalid NUMA node id outside of allowed range "
     704             :                      "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
     705             :             }
     706             : 
     707           0 :             nodes[s]++;
     708             :         }
     709             : 
     710             :         /*
     711             :          * Add one entry for each NUMA node, including those without allocated
     712             :          * memory for this segment.
     713             :          */
     714           0 :         for (i = 0; i <= max_nodes; i++)
     715             :         {
     716           0 :             values[0] = CStringGetTextDatum(ent->key);
     717           0 :             values[1] = i;
     718           0 :             values[2] = Int64GetDatum(nodes[i] * os_page_size);
     719             : 
     720           0 :             tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
     721             :                                  values, nulls);
     722             :         }
     723             :     }
     724             : 
     725           0 :     LWLockRelease(ShmemIndexLock);
     726           0 :     firstNumaTouch = false;
     727             : 
     728           0 :     return (Datum) 0;
     729             : }
     730             : 
     731             : /*
     732             :  * Determine the memory page size used for the shared memory segment.
     733             :  *
     734             :  * If the shared segment was allocated using huge pages, returns the size of
     735             :  * a huge page. Otherwise returns the size of regular memory page.
     736             :  *
     737             :  * This should be used only after the server is started.
     738             :  */
     739             : Size
     740           0 : pg_get_shmem_pagesize(void)
     741             : {
     742             :     Size        os_page_size;
     743             : #ifdef WIN32
     744             :     SYSTEM_INFO sysinfo;
     745             : 
     746             :     GetSystemInfo(&sysinfo);
     747             :     os_page_size = sysinfo.dwPageSize;
     748             : #else
     749           0 :     os_page_size = sysconf(_SC_PAGESIZE);
     750             : #endif
     751             : 
     752             :     Assert(IsUnderPostmaster);
     753             :     Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
     754             : 
     755           0 :     if (huge_pages_status == HUGE_PAGES_ON)
     756           0 :         GetHugePageSize(&os_page_size, NULL);
     757             : 
     758           0 :     return os_page_size;
     759             : }
     760             : 
     761             : Datum
     762           8 : pg_numa_available(PG_FUNCTION_ARGS)
     763             : {
     764           8 :     PG_RETURN_BOOL(pg_numa_init() != -1);
     765             : }

Generated by: LCOV version 1.16