LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - shm_toc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 55 70 78.6 %
Date: 2025-01-18 03:14:54 Functions: 6 7 85.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * shm_toc.c
       4             :  *    shared memory segment table of contents
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * src/backend/storage/ipc/shm_toc.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : 
      14             : #include "postgres.h"
      15             : 
      16             : #include "port/atomics.h"
      17             : #include "storage/shm_toc.h"
      18             : #include "storage/spin.h"
      19             : 
      20             : typedef struct shm_toc_entry
      21             : {
      22             :     uint64      key;            /* Arbitrary identifier */
      23             :     Size        offset;         /* Offset, in bytes, from TOC start */
      24             : } shm_toc_entry;
      25             : 
      26             : struct shm_toc
      27             : {
      28             :     uint64      toc_magic;      /* Magic number identifying this TOC */
      29             :     slock_t     toc_mutex;      /* Spinlock for mutual exclusion */
      30             :     Size        toc_total_bytes;    /* Bytes managed by this TOC */
      31             :     Size        toc_allocated_bytes;    /* Bytes allocated of those managed */
      32             :     uint32      toc_nentry;     /* Number of entries in TOC */
      33             :     shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER];
      34             : };
      35             : 
      36             : /*
      37             :  * Initialize a region of shared memory with a table of contents.
      38             :  */
      39             : shm_toc *
      40        1060 : shm_toc_create(uint64 magic, void *address, Size nbytes)
      41             : {
      42        1060 :     shm_toc    *toc = (shm_toc *) address;
      43             : 
      44             :     Assert(nbytes > offsetof(shm_toc, toc_entry));
      45        1060 :     toc->toc_magic = magic;
      46        1060 :     SpinLockInit(&toc->toc_mutex);
      47             : 
      48             :     /*
      49             :      * The alignment code in shm_toc_allocate() assumes that the starting
      50             :      * value is buffer-aligned.
      51             :      */
      52        1060 :     toc->toc_total_bytes = BUFFERALIGN_DOWN(nbytes);
      53        1060 :     toc->toc_allocated_bytes = 0;
      54        1060 :     toc->toc_nentry = 0;
      55             : 
      56        1060 :     return toc;
      57             : }
      58             : 
      59             : /*
      60             :  * Attach to an existing table of contents.  If the magic number found at
      61             :  * the target address doesn't match our expectations, return NULL.
      62             :  */
      63             : shm_toc *
      64        5462 : shm_toc_attach(uint64 magic, void *address)
      65             : {
      66        5462 :     shm_toc    *toc = (shm_toc *) address;
      67             : 
      68        5462 :     if (toc->toc_magic != magic)
      69           0 :         return NULL;
      70             : 
      71             :     Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes);
      72             :     Assert(toc->toc_total_bytes > offsetof(shm_toc, toc_entry));
      73             : 
      74        5462 :     return toc;
      75             : }
      76             : 
      77             : /*
      78             :  * Allocate shared memory from a segment managed by a table of contents.
      79             :  *
      80             :  * This is not a full-blown allocator; there's no way to free memory.  It's
      81             :  * just a way of dividing a single physical shared memory segment into logical
      82             :  * chunks that may be used for different purposes.
      83             :  *
      84             :  * We allocate backwards from the end of the segment, so that the TOC entries
      85             :  * can grow forward from the start of the segment.
      86             :  */
      87             : void *
      88       21182 : shm_toc_allocate(shm_toc *toc, Size nbytes)
      89             : {
      90       21182 :     volatile shm_toc *vtoc = toc;
      91             :     Size        total_bytes;
      92             :     Size        allocated_bytes;
      93             :     Size        nentry;
      94             :     Size        toc_bytes;
      95             : 
      96             :     /*
      97             :      * Make sure request is well-aligned.  XXX: MAXALIGN is not enough,
      98             :      * because atomic ops might need a wider alignment.  We don't have a
      99             :      * proper definition for the minimum to make atomic ops safe, but
     100             :      * BUFFERALIGN ought to be enough.
     101             :      */
     102       21182 :     nbytes = BUFFERALIGN(nbytes);
     103             : 
     104       21182 :     SpinLockAcquire(&toc->toc_mutex);
     105             : 
     106       21182 :     total_bytes = vtoc->toc_total_bytes;
     107       21182 :     allocated_bytes = vtoc->toc_allocated_bytes;
     108       21182 :     nentry = vtoc->toc_nentry;
     109       21182 :     toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
     110       21182 :         + allocated_bytes;
     111             : 
     112             :     /* Check for memory exhaustion and overflow. */
     113       21182 :     if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes)
     114             :     {
     115           0 :         SpinLockRelease(&toc->toc_mutex);
     116           0 :         ereport(ERROR,
     117             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     118             :                  errmsg("out of shared memory")));
     119             :     }
     120       21182 :     vtoc->toc_allocated_bytes += nbytes;
     121             : 
     122       21182 :     SpinLockRelease(&toc->toc_mutex);
     123             : 
     124       21182 :     return ((char *) toc) + (total_bytes - allocated_bytes - nbytes);
     125             : }
     126             : 
     127             : /*
     128             :  * Return the number of bytes that can still be allocated.
     129             :  */
     130             : Size
     131           0 : shm_toc_freespace(shm_toc *toc)
     132             : {
     133           0 :     volatile shm_toc *vtoc = toc;
     134             :     Size        total_bytes;
     135             :     Size        allocated_bytes;
     136             :     Size        nentry;
     137             :     Size        toc_bytes;
     138             : 
     139           0 :     SpinLockAcquire(&toc->toc_mutex);
     140           0 :     total_bytes = vtoc->toc_total_bytes;
     141           0 :     allocated_bytes = vtoc->toc_allocated_bytes;
     142           0 :     nentry = vtoc->toc_nentry;
     143           0 :     SpinLockRelease(&toc->toc_mutex);
     144             : 
     145           0 :     toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry);
     146             :     Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
     147           0 :     return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
     148             : }
     149             : 
     150             : /*
     151             :  * Insert a TOC entry.
     152             :  *
     153             :  * The idea here is that the process setting up the shared memory segment will
     154             :  * register the addresses of data structures within the segment using this
     155             :  * function.  Each data structure will be identified using a 64-bit key, which
     156             :  * is assumed to be a well-known or discoverable integer.  Other processes
     157             :  * accessing the shared memory segment can pass the same key to
     158             :  * shm_toc_lookup() to discover the addresses of those data structures.
     159             :  *
     160             :  * Since the shared memory segment may be mapped at different addresses within
     161             :  * different backends, we store relative rather than absolute pointers.
     162             :  *
     163             :  * This won't scale well to a large number of keys.  Hopefully, that isn't
     164             :  * necessary; if it proves to be, we might need to provide a more sophisticated
     165             :  * data structure here.  But the real idea here is just to give someone mapping
     166             :  * a dynamic shared memory the ability to find the bare minimum number of
     167             :  * pointers that they need to bootstrap.  If you're storing a lot of stuff in
     168             :  * the TOC, you're doing it wrong.
     169             :  */
     170             : void
     171       21182 : shm_toc_insert(shm_toc *toc, uint64 key, void *address)
     172             : {
     173       21182 :     volatile shm_toc *vtoc = toc;
     174             :     Size        total_bytes;
     175             :     Size        allocated_bytes;
     176             :     Size        nentry;
     177             :     Size        toc_bytes;
     178             :     Size        offset;
     179             : 
     180             :     /* Relativize pointer. */
     181             :     Assert(address > (void *) toc);
     182       21182 :     offset = ((char *) address) - (char *) toc;
     183             : 
     184       21182 :     SpinLockAcquire(&toc->toc_mutex);
     185             : 
     186       21182 :     total_bytes = vtoc->toc_total_bytes;
     187       21182 :     allocated_bytes = vtoc->toc_allocated_bytes;
     188       21182 :     nentry = vtoc->toc_nentry;
     189       21182 :     toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
     190       21182 :         + allocated_bytes;
     191             : 
     192             :     /* Check for memory exhaustion and overflow. */
     193       21182 :     if (toc_bytes + sizeof(shm_toc_entry) > total_bytes ||
     194       21182 :         toc_bytes + sizeof(shm_toc_entry) < toc_bytes ||
     195             :         nentry >= PG_UINT32_MAX)
     196             :     {
     197           0 :         SpinLockRelease(&toc->toc_mutex);
     198           0 :         ereport(ERROR,
     199             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     200             :                  errmsg("out of shared memory")));
     201             :     }
     202             : 
     203             :     Assert(offset < total_bytes);
     204       21182 :     vtoc->toc_entry[nentry].key = key;
     205       21182 :     vtoc->toc_entry[nentry].offset = offset;
     206             : 
     207             :     /*
     208             :      * By placing a write barrier after filling in the entry and before
     209             :      * updating the number of entries, we make it safe to read the TOC
     210             :      * unlocked.
     211             :      */
     212       21182 :     pg_write_barrier();
     213             : 
     214       21182 :     vtoc->toc_nentry++;
     215             : 
     216       21182 :     SpinLockRelease(&toc->toc_mutex);
     217       21182 : }
     218             : 
     219             : /*
     220             :  * Look up a TOC entry.
     221             :  *
     222             :  * If the key is not found, returns NULL if noError is true, otherwise
     223             :  * throws elog(ERROR).
     224             :  *
     225             :  * Unlike the other functions in this file, this operation acquires no lock;
     226             :  * it uses only barriers.  It probably wouldn't hurt concurrency very much even
     227             :  * if it did get a lock, but since it's reasonably likely that a group of
     228             :  * worker processes could each read a series of entries from the same TOC
     229             :  * right around the same time, there seems to be some value in avoiding it.
     230             :  */
     231             : void *
     232       81094 : shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
     233             : {
     234             :     uint32      nentry;
     235             :     uint32      i;
     236             : 
     237             :     /*
     238             :      * Read the number of entries before we examine any entry.  We assume that
     239             :      * reading a uint32 is atomic.
     240             :      */
     241       81094 :     nentry = toc->toc_nentry;
     242       81094 :     pg_read_barrier();
     243             : 
     244             :     /* Now search for a matching entry. */
     245     1041002 :     for (i = 0; i < nentry; ++i)
     246             :     {
     247     1032410 :         if (toc->toc_entry[i].key == key)
     248       72502 :             return ((char *) toc) + toc->toc_entry[i].offset;
     249             :     }
     250             : 
     251             :     /* No matching entry was found. */
     252        8592 :     if (!noError)
     253           0 :         elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p",
     254             :              key, toc);
     255        8592 :     return NULL;
     256             : }
     257             : 
     258             : /*
     259             :  * Estimate how much shared memory will be required to store a TOC and its
     260             :  * dependent data structures.
     261             :  */
     262             : Size
     263        1090 : shm_toc_estimate(shm_toc_estimator *e)
     264             : {
     265             :     Size        sz;
     266             : 
     267        1090 :     sz = offsetof(shm_toc, toc_entry);
     268        1090 :     sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry)));
     269        1090 :     sz = add_size(sz, e->space_for_chunks);
     270             : 
     271        1090 :     return BUFFERALIGN(sz);
     272             : }

Generated by: LCOV version 1.14