LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 23.6 % 203 48
Test Date: 2026-03-24 01:16:09 Functions: 62.5 % 8 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * dsm_impl.c
       4              :  *    manage dynamic shared memory segments
       5              :  *
       6              :  * This file provides low-level APIs for creating and destroying shared
       7              :  * memory segments using several different possible techniques.  We refer
       8              :  * to these segments as dynamic because they can be created, altered, and
       9              :  * destroyed at any point during the server life cycle.  This is unlike
      10              :  * the main shared memory segment, of which there is always exactly one
      11              :  * and which is always mapped at a fixed address in every PostgreSQL
      12              :  * background process.
      13              :  *
      14              :  * Because not all systems provide the same primitives in this area, nor
      15              :  * do all primitives behave the same way on all systems, we provide
      16              :  * several implementations of this facility.  Many systems implement
      17              :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18              :  * in this area, with the exception that shared memory identifiers live
      19              :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20              :  * name collisions with other processes (including other copies of
      21              :  * PostgreSQL) running on the same system.  Some systems only support
      22              :  * the older System V shared memory interface (shmget etc.) which is
      23              :  * also usable; however, the default allocation limits are often quite
      24              :  * small, and the namespace is even more restricted.
      25              :  *
      26              :  * We also provide an mmap-based shared memory implementation.  This may
      27              :  * be useful on systems that provide shared memory via a special-purpose
      28              :  * filesystem; by opting for this implementation, the user can even
      29              :  * control precisely where their shared memory segments are placed.  It
      30              :  * can also be used as a fallback for systems where shm_open and shmget
      31              :  * are not available or can't be used for some reason.  Of course,
      32              :  * mapping a file residing on an actual spinning disk is a fairly poor
      33              :  * approximation for shared memory because writeback may hurt performance
      34              :  * substantially, but there should be few systems where we must make do
      35              :  * with such poor tools.
      36              :  *
      37              :  * As ever, Windows requires its own implementation.
      38              :  *
      39              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40              :  * Portions Copyright (c) 1994, Regents of the University of California
      41              :  *
      42              :  *
      43              :  * IDENTIFICATION
      44              :  *    src/backend/storage/ipc/dsm_impl.c
      45              :  *
      46              :  *-------------------------------------------------------------------------
      47              :  */
      48              : 
      49              : #include "postgres.h"
      50              : 
      51              : #include <fcntl.h>
      52              : #include <signal.h>
      53              : #include <unistd.h>
      54              : #ifndef WIN32
      55              : #include <sys/mman.h>
      56              : #include <sys/ipc.h>
      57              : #include <sys/shm.h>
      58              : #include <sys/stat.h>
      59              : #endif
      60              : 
      61              : #include "common/file_perm.h"
      62              : #include "libpq/pqsignal.h"
      63              : #include "miscadmin.h"
      64              : #include "pgstat.h"
      65              : #include "portability/mem.h"
      66              : #include "postmaster/postmaster.h"
      67              : #include "storage/dsm_impl.h"
      68              : #include "storage/fd.h"
      69              : #include "utils/guc.h"
      70              : #include "utils/memutils.h"
      71              : #include "utils/wait_event.h"
      72              : 
      73              : #ifdef USE_DSM_POSIX
      74              : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      75              :                            void **impl_private, void **mapped_address,
      76              :                            Size *mapped_size, int elevel);
      77              : static int  dsm_impl_posix_resize(int fd, off_t size);
      78              : #endif
      79              : #ifdef USE_DSM_SYSV
      80              : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      81              :                           void **impl_private, void **mapped_address,
      82              :                           Size *mapped_size, int elevel);
      83              : #endif
      84              : #ifdef USE_DSM_WINDOWS
      85              : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      86              :                              void **impl_private, void **mapped_address,
      87              :                              Size *mapped_size, int elevel);
      88              : #endif
      89              : #ifdef USE_DSM_MMAP
      90              : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      91              :                           void **impl_private, void **mapped_address,
      92              :                           Size *mapped_size, int elevel);
      93              : #endif
      94              : static int  errcode_for_dynamic_shared_memory(void);
      95              : 
      96              : const struct config_enum_entry dynamic_shared_memory_options[] = {
      97              : #ifdef USE_DSM_POSIX
      98              :     {"posix", DSM_IMPL_POSIX, false},
      99              : #endif
     100              : #ifdef USE_DSM_SYSV
     101              :     {"sysv", DSM_IMPL_SYSV, false},
     102              : #endif
     103              : #ifdef USE_DSM_WINDOWS
     104              :     {"windows", DSM_IMPL_WINDOWS, false},
     105              : #endif
     106              : #ifdef USE_DSM_MMAP
     107              :     {"mmap", DSM_IMPL_MMAP, false},
     108              : #endif
     109              :     {NULL, 0, false}
     110              : };
     111              : 
     112              : /* Implementation selector. */
     113              : int         dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
     114              : 
     115              : /* Amount of space reserved for DSM segments in the main area. */
     116              : int         min_dynamic_shared_memory;
     117              : 
     118              : /* Size of buffer to be used for zero-filling. */
     119              : #define ZBUFFER_SIZE                8192
     120              : 
     121              : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     122              : 
     123              : /*------
     124              :  * Perform a low-level shared memory operation in a platform-specific way,
     125              :  * as dictated by the selected implementation.  Each implementation is
     126              :  * required to implement the following primitives.
     127              :  *
     128              :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     129              :  * map it.
     130              :  *
     131              :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     132              :  *
     133              :  * DSM_OP_DETACH.  Unmap the segment.
     134              :  *
     135              :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     136              :  * segment.
     137              :  *
     138              :  * Arguments:
     139              :  *   op: The operation to be performed.
     140              :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     141              :  *     identifier for the new handle the caller wants created.
     142              :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     143              :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     144              :  *     to NULL for the first operation on a shared memory segment within this
     145              :  *     backend; thereafter, it will point to the value to which it was set
     146              :  *     on the previous call.
     147              :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     148              :  *     if none.  Updated with new mapping address.
     149              :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     150              :  *     Updated with new mapped size.
     151              :  *   elevel: Level at which to log errors.
     152              :  *
     153              :  * Return value: true on success, false on failure.  When false is returned,
     154              :  * a message should first be logged at the specified elevel, except in the
     155              :  * case where DSM_OP_CREATE experiences a name collision, which should
     156              :  * silently return false.
     157              :  *-----
     158              :  */
     159              : bool
     160        60560 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     161              :             void **impl_private, void **mapped_address, Size *mapped_size,
     162              :             int elevel)
     163              : {
     164              :     Assert(op == DSM_OP_CREATE || request_size == 0);
     165              :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     166              :            (*mapped_address == NULL && *mapped_size == 0));
     167              : 
     168        60560 :     switch (dynamic_shared_memory_type)
     169              :     {
     170              : #ifdef USE_DSM_POSIX
     171        60560 :         case DSM_IMPL_POSIX:
     172        60560 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     173              :                                   mapped_address, mapped_size, elevel);
     174              : #endif
     175              : #ifdef USE_DSM_SYSV
     176            0 :         case DSM_IMPL_SYSV:
     177            0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     178              :                                  mapped_address, mapped_size, elevel);
     179              : #endif
     180              : #ifdef USE_DSM_WINDOWS
     181              :         case DSM_IMPL_WINDOWS:
     182              :             return dsm_impl_windows(op, handle, request_size, impl_private,
     183              :                                     mapped_address, mapped_size, elevel);
     184              : #endif
     185              : #ifdef USE_DSM_MMAP
     186            0 :         case DSM_IMPL_MMAP:
     187            0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     188              :                                  mapped_address, mapped_size, elevel);
     189              : #endif
     190            0 :         default:
     191            0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     192              :                  dynamic_shared_memory_type);
     193              :             return false;
     194              :     }
     195              : }
     196              : 
     197              : #ifdef USE_DSM_POSIX
     198              : /*
     199              :  * Operating system primitives to support POSIX shared memory.
     200              :  *
     201              :  * POSIX shared memory segments are created and attached using shm_open()
     202              :  * and shm_unlink(); other operations, such as sizing or mapping the
     203              :  * segment, are performed as if the shared memory segments were files.
     204              :  *
     205              :  * Indeed, on some platforms, they may be implemented that way.  While
     206              :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     207              :  * some operating systems may implement them as files, even going so far
     208              :  * to treat a request for /xyz as a request to create a file by that name
     209              :  * in the root directory.  Users of such broken platforms should select
     210              :  * a different shared memory implementation.
     211              :  */
     212              : static bool
     213        60560 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     214              :                void **impl_private, void **mapped_address, Size *mapped_size,
     215              :                int elevel)
     216              : {
     217              :     char        name[64];
     218              :     int         flags;
     219              :     int         fd;
     220              :     char       *address;
     221              : 
     222        60560 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     223              : 
     224              :     /* Handle teardown cases. */
     225        60560 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     226              :     {
     227        31408 :         if (*mapped_address != NULL
     228        29153 :             && munmap(*mapped_address, *mapped_size) != 0)
     229              :         {
     230            0 :             ereport(elevel,
     231              :                     (errcode_for_dynamic_shared_memory(),
     232              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     233              :                             name)));
     234            0 :             return false;
     235              :         }
     236        31408 :         *mapped_address = NULL;
     237        31408 :         *mapped_size = 0;
     238        31408 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     239              :         {
     240            0 :             ereport(elevel,
     241              :                     (errcode_for_dynamic_shared_memory(),
     242              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     243              :                             name)));
     244            0 :             return false;
     245              :         }
     246        31408 :         return true;
     247              :     }
     248              : 
     249              :     /*
     250              :      * Create new segment or open an existing one for attach.
     251              :      *
     252              :      * Even though we will close the FD before returning, it seems desirable
     253              :      * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
     254              :      * failure.  The fact that we won't hold the FD open long justifies using
     255              :      * ReserveExternalFD rather than AcquireExternalFD, though.
     256              :      */
     257        29152 :     ReserveExternalFD();
     258              : 
     259        29152 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     260        29152 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     261              :     {
     262            0 :         ReleaseExternalFD();
     263            0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     264            0 :             ereport(elevel,
     265              :                     (errcode_for_dynamic_shared_memory(),
     266              :                      errmsg("could not open shared memory segment \"%s\": %m",
     267              :                             name)));
     268            0 :         return false;
     269              :     }
     270              : 
     271              :     /*
     272              :      * If we're attaching the segment, determine the current size; if we are
     273              :      * creating the segment, set the size to the requested value.
     274              :      */
     275        29152 :     if (op == DSM_OP_ATTACH)
     276              :     {
     277              :         struct stat st;
     278              : 
     279        25818 :         if (fstat(fd, &st) != 0)
     280              :         {
     281              :             int         save_errno;
     282              : 
     283              :             /* Back out what's already been done. */
     284            0 :             save_errno = errno;
     285            0 :             close(fd);
     286            0 :             ReleaseExternalFD();
     287            0 :             errno = save_errno;
     288              : 
     289            0 :             ereport(elevel,
     290              :                     (errcode_for_dynamic_shared_memory(),
     291              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     292              :                             name)));
     293            0 :             return false;
     294              :         }
     295        25818 :         request_size = st.st_size;
     296              :     }
     297         3334 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     298              :     {
     299              :         int         save_errno;
     300              : 
     301              :         /* Back out what's already been done. */
     302            0 :         save_errno = errno;
     303            0 :         close(fd);
     304            0 :         ReleaseExternalFD();
     305            0 :         shm_unlink(name);
     306            0 :         errno = save_errno;
     307              : 
     308            0 :         ereport(elevel,
     309              :                 (errcode_for_dynamic_shared_memory(),
     310              :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     311              :                         name, request_size)));
     312            0 :         return false;
     313              :     }
     314              : 
     315              :     /* Map it. */
     316        29152 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     317              :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     318        29152 :     if (address == MAP_FAILED)
     319              :     {
     320              :         int         save_errno;
     321              : 
     322              :         /* Back out what's already been done. */
     323            0 :         save_errno = errno;
     324            0 :         close(fd);
     325            0 :         ReleaseExternalFD();
     326            0 :         if (op == DSM_OP_CREATE)
     327            0 :             shm_unlink(name);
     328            0 :         errno = save_errno;
     329              : 
     330            0 :         ereport(elevel,
     331              :                 (errcode_for_dynamic_shared_memory(),
     332              :                  errmsg("could not map shared memory segment \"%s\": %m",
     333              :                         name)));
     334            0 :         return false;
     335              :     }
     336        29152 :     *mapped_address = address;
     337        29152 :     *mapped_size = request_size;
     338        29152 :     close(fd);
     339        29152 :     ReleaseExternalFD();
     340              : 
     341        29152 :     return true;
     342              : }
     343              : 
     344              : /*
     345              :  * Set the size of a virtual memory region associated with a file descriptor.
     346              :  * If necessary, also ensure that virtual memory is actually allocated by the
     347              :  * operating system, to avoid nasty surprises later.
     348              :  *
     349              :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     350              :  */
     351              : static int
     352         3334 : dsm_impl_posix_resize(int fd, off_t size)
     353              : {
     354              :     int         rc;
     355              :     int         save_errno;
     356              :     sigset_t    save_sigmask;
     357              : 
     358              :     /*
     359              :      * Block all blockable signals, except SIGQUIT.  posix_fallocate() can run
     360              :      * for quite a long time, and is an all-or-nothing operation.  If we
     361              :      * allowed SIGUSR1 to interrupt us repeatedly (for example, due to
     362              :      * recovery conflicts), the retry loop might never succeed.
     363              :      */
     364         3334 :     if (IsUnderPostmaster)
     365         2033 :         sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
     366              : 
     367         3334 :     pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
     368              : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     369              : 
     370              :     /*
     371              :      * On Linux, a shm_open fd is backed by a tmpfs file.  If we were to use
     372              :      * ftruncate, the file would contain a hole.  Accessing memory backed by a
     373              :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     374              :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     375              :      * here, so we can fail gracefully with ENOSPC now rather than risking
     376              :      * SIGBUS later.
     377              :      *
     378              :      * We still use a traditional EINTR retry loop to handle SIGCONT.
     379              :      * posix_fallocate() doesn't restart automatically, and we don't want this
     380              :      * to fail if you attach a debugger.
     381              :      */
     382              :     do
     383              :     {
     384         3334 :         rc = posix_fallocate(fd, 0, size);
     385         3334 :     } while (rc == EINTR);
     386              : 
     387              :     /*
     388              :      * The caller expects errno to be set, but posix_fallocate() doesn't set
     389              :      * it.  Instead it returns error numbers directly.  So set errno, even
     390              :      * though we'll also return rc to indicate success or failure.
     391              :      */
     392         3334 :     errno = rc;
     393              : #else
     394              :     /* Extend the file to the requested size. */
     395              :     do
     396              :     {
     397              :         rc = ftruncate(fd, size);
     398              :     } while (rc < 0 && errno == EINTR);
     399              : #endif
     400         3334 :     pgstat_report_wait_end();
     401              : 
     402         3334 :     if (IsUnderPostmaster)
     403              :     {
     404         2033 :         save_errno = errno;
     405         2033 :         sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
     406         2033 :         errno = save_errno;
     407              :     }
     408              : 
     409         3334 :     return rc;
     410              : }
     411              : 
     412              : #endif                          /* USE_DSM_POSIX */
     413              : 
     414              : #ifdef USE_DSM_SYSV
     415              : /*
     416              :  * Operating system primitives to support System V shared memory.
     417              :  *
     418              :  * System V shared memory segments are manipulated using shmget(), shmat(),
     419              :  * shmdt(), and shmctl().  As the default allocation limits for System V
     420              :  * shared memory are usually quite low, the POSIX facilities may be
     421              :  * preferable; but those are not supported everywhere.
     422              :  */
     423              : static bool
     424            0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     425              :               void **impl_private, void **mapped_address, Size *mapped_size,
     426              :               int elevel)
     427              : {
     428              :     key_t       key;
     429              :     int         ident;
     430              :     char       *address;
     431              :     char        name[64];
     432              :     int        *ident_cache;
     433              : 
     434              :     /*
     435              :      * POSIX shared memory and mmap-based shared memory identify segments with
     436              :      * names.  To avoid needless error message variation, we use the handle as
     437              :      * the name.
     438              :      */
     439            0 :     snprintf(name, 64, "%u", handle);
     440              : 
     441              :     /*
     442              :      * The System V shared memory namespace is very restricted; names are of
     443              :      * type key_t, which is expected to be some sort of integer data type, but
     444              :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     445              :      * identify shared memory segments across processes, this might seem like
     446              :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     447              :      * the cast below might truncate away some bits from the handle the
     448              :      * user-provided, but it'll truncate exactly the same bits away in exactly
     449              :      * the same fashion every time we use that handle, which is all that
     450              :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     451              :      * won't use the full range of available key space, but that's no big deal
     452              :      * either.
     453              :      *
     454              :      * We do make sure that the key isn't negative, because that might not be
     455              :      * portable.
     456              :      */
     457            0 :     key = (key_t) handle;
     458            0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     459            0 :         key = -key;
     460              : 
     461              :     /*
     462              :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     463              :      * up with that value by chance during a create operation, just pretend it
     464              :      * already exists, so that caller will retry.  If we run into it anywhere
     465              :      * else, the caller has passed a handle that doesn't correspond to
     466              :      * anything we ever created, which should not happen.
     467              :      */
     468            0 :     if (key == IPC_PRIVATE)
     469              :     {
     470            0 :         if (op != DSM_OP_CREATE)
     471            0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     472            0 :         errno = EEXIST;
     473            0 :         return false;
     474              :     }
     475              : 
     476              :     /*
     477              :      * Before we can do anything with a shared memory segment, we have to map
     478              :      * the shared memory key to a shared memory identifier using shmget(). To
     479              :      * avoid repeated lookups, we store the key using impl_private.
     480              :      */
     481            0 :     if (*impl_private != NULL)
     482              :     {
     483            0 :         ident_cache = *impl_private;
     484            0 :         ident = *ident_cache;
     485              :     }
     486              :     else
     487              :     {
     488            0 :         int         flags = IPCProtection;
     489              :         size_t      segsize;
     490              : 
     491              :         /*
     492              :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     493              :          * leak the resource if memory allocation fails.
     494              :          */
     495            0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     496              : 
     497              :         /*
     498              :          * When using shmget to find an existing segment, we must pass the
     499              :          * size as 0.  Passing a non-zero size which is greater than the
     500              :          * actual size will result in EINVAL.
     501              :          */
     502            0 :         segsize = 0;
     503              : 
     504            0 :         if (op == DSM_OP_CREATE)
     505              :         {
     506            0 :             flags |= IPC_CREAT | IPC_EXCL;
     507            0 :             segsize = request_size;
     508              :         }
     509              : 
     510            0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     511              :         {
     512            0 :             if (op == DSM_OP_ATTACH || errno != EEXIST)
     513              :             {
     514            0 :                 int         save_errno = errno;
     515              : 
     516            0 :                 pfree(ident_cache);
     517            0 :                 errno = save_errno;
     518            0 :                 ereport(elevel,
     519              :                         (errcode_for_dynamic_shared_memory(),
     520              :                          errmsg("could not get shared memory segment: %m")));
     521              :             }
     522            0 :             return false;
     523              :         }
     524              : 
     525            0 :         *ident_cache = ident;
     526            0 :         *impl_private = ident_cache;
     527              :     }
     528              : 
     529              :     /* Handle teardown cases. */
     530            0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     531              :     {
     532            0 :         pfree(ident_cache);
     533            0 :         *impl_private = NULL;
     534            0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     535              :         {
     536            0 :             ereport(elevel,
     537              :                     (errcode_for_dynamic_shared_memory(),
     538              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     539              :                             name)));
     540            0 :             return false;
     541              :         }
     542            0 :         *mapped_address = NULL;
     543            0 :         *mapped_size = 0;
     544            0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     545              :         {
     546            0 :             ereport(elevel,
     547              :                     (errcode_for_dynamic_shared_memory(),
     548              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     549              :                             name)));
     550            0 :             return false;
     551              :         }
     552            0 :         return true;
     553              :     }
     554              : 
     555              :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     556            0 :     if (op == DSM_OP_ATTACH)
     557              :     {
     558              :         struct shmid_ds shm;
     559              : 
     560            0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     561              :         {
     562            0 :             ereport(elevel,
     563              :                     (errcode_for_dynamic_shared_memory(),
     564              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     565              :                             name)));
     566            0 :             return false;
     567              :         }
     568            0 :         request_size = shm.shm_segsz;
     569              :     }
     570              : 
     571              :     /* Map it. */
     572            0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     573            0 :     if (address == (void *) -1)
     574              :     {
     575              :         int         save_errno;
     576              : 
     577              :         /* Back out what's already been done. */
     578            0 :         save_errno = errno;
     579            0 :         if (op == DSM_OP_CREATE)
     580            0 :             shmctl(ident, IPC_RMID, NULL);
     581            0 :         errno = save_errno;
     582              : 
     583            0 :         ereport(elevel,
     584              :                 (errcode_for_dynamic_shared_memory(),
     585              :                  errmsg("could not map shared memory segment \"%s\": %m",
     586              :                         name)));
     587            0 :         return false;
     588              :     }
     589            0 :     *mapped_address = address;
     590            0 :     *mapped_size = request_size;
     591              : 
     592            0 :     return true;
     593              : }
     594              : #endif
     595              : 
     596              : #ifdef USE_DSM_WINDOWS
     597              : /*
     598              :  * Operating system primitives to support Windows shared memory.
     599              :  *
     600              :  * Windows shared memory implementation is done using file mapping
     601              :  * which can be backed by either physical file or system paging file.
     602              :  * Current implementation uses system paging file as other effects
     603              :  * like performance are not clear for physical file and it is used in similar
     604              :  * way for main shared memory in windows.
     605              :  *
     606              :  * A memory mapping object is a kernel object - they always get deleted when
     607              :  * the last reference to them goes away, either explicitly via a CloseHandle or
     608              :  * when the process containing the reference exits.
     609              :  */
     610              : static bool
     611              : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     612              :                  void **impl_private, void **mapped_address,
     613              :                  Size *mapped_size, int elevel)
     614              : {
     615              :     char       *address;
     616              :     HANDLE      hmap;
     617              :     char        name[64];
     618              :     MEMORY_BASIC_INFORMATION info;
     619              : 
     620              :     /*
     621              :      * Storing the shared memory segment in the Global\ namespace, can allow
     622              :      * any process running in any session to access that file mapping object
     623              :      * provided that the caller has the required access rights. But to avoid
     624              :      * issues faced in main shared memory, we are using the naming convention
     625              :      * similar to main shared memory. We can change here once issue mentioned
     626              :      * in GetSharedMemName is resolved.
     627              :      */
     628              :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     629              : 
     630              :     /*
     631              :      * Handle teardown cases.  Since Windows automatically destroys the object
     632              :      * when no references remain, we can treat it the same as detach.
     633              :      */
     634              :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     635              :     {
     636              :         if (*mapped_address != NULL
     637              :             && UnmapViewOfFile(*mapped_address) == 0)
     638              :         {
     639              :             _dosmaperr(GetLastError());
     640              :             ereport(elevel,
     641              :                     (errcode_for_dynamic_shared_memory(),
     642              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     643              :                             name)));
     644              :             return false;
     645              :         }
     646              :         if (*impl_private != NULL
     647              :             && CloseHandle(*impl_private) == 0)
     648              :         {
     649              :             _dosmaperr(GetLastError());
     650              :             ereport(elevel,
     651              :                     (errcode_for_dynamic_shared_memory(),
     652              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     653              :                             name)));
     654              :             return false;
     655              :         }
     656              : 
     657              :         *impl_private = NULL;
     658              :         *mapped_address = NULL;
     659              :         *mapped_size = 0;
     660              :         return true;
     661              :     }
     662              : 
     663              :     /* Create new segment or open an existing one for attach. */
     664              :     if (op == DSM_OP_CREATE)
     665              :     {
     666              :         DWORD       size_high;
     667              :         DWORD       size_low;
     668              :         DWORD       errcode;
     669              : 
     670              :         /* Shifts >= the width of the type are undefined. */
     671              : #ifdef _WIN64
     672              :         size_high = request_size >> 32;
     673              : #else
     674              :         size_high = 0;
     675              : #endif
     676              :         size_low = (DWORD) request_size;
     677              : 
     678              :         /* CreateFileMapping might not clear the error code on success */
     679              :         SetLastError(0);
     680              : 
     681              :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     682              :                                  NULL,  /* Default security attrs */
     683              :                                  PAGE_READWRITE,    /* Memory is read/write */
     684              :                                  size_high, /* Upper 32 bits of size */
     685              :                                  size_low,  /* Lower 32 bits of size */
     686              :                                  name);
     687              : 
     688              :         errcode = GetLastError();
     689              :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     690              :         {
     691              :             /*
     692              :              * On Windows, when the segment already exists, a handle for the
     693              :              * existing segment is returned.  We must close it before
     694              :              * returning.  However, if the existing segment is created by a
     695              :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     696              :              * _dosmaperr here, so errno won't be modified.
     697              :              */
     698              :             if (hmap)
     699              :                 CloseHandle(hmap);
     700              :             return false;
     701              :         }
     702              : 
     703              :         if (!hmap)
     704              :         {
     705              :             _dosmaperr(errcode);
     706              :             ereport(elevel,
     707              :                     (errcode_for_dynamic_shared_memory(),
     708              :                      errmsg("could not create shared memory segment \"%s\": %m",
     709              :                             name)));
     710              :             return false;
     711              :         }
     712              :     }
     713              :     else
     714              :     {
     715              :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     716              :                                FALSE,   /* do not inherit the name */
     717              :                                name);   /* name of mapping object */
     718              :         if (!hmap)
     719              :         {
     720              :             _dosmaperr(GetLastError());
     721              :             ereport(elevel,
     722              :                     (errcode_for_dynamic_shared_memory(),
     723              :                      errmsg("could not open shared memory segment \"%s\": %m",
     724              :                             name)));
     725              :             return false;
     726              :         }
     727              :     }
     728              : 
     729              :     /* Map it. */
     730              :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     731              :                             0, 0, 0);
     732              :     if (!address)
     733              :     {
     734              :         int         save_errno;
     735              : 
     736              :         _dosmaperr(GetLastError());
     737              :         /* Back out what's already been done. */
     738              :         save_errno = errno;
     739              :         CloseHandle(hmap);
     740              :         errno = save_errno;
     741              : 
     742              :         ereport(elevel,
     743              :                 (errcode_for_dynamic_shared_memory(),
     744              :                  errmsg("could not map shared memory segment \"%s\": %m",
     745              :                         name)));
     746              :         return false;
     747              :     }
     748              : 
     749              :     /*
     750              :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     751              :      * need size only when we are attaching, but it's better to get the size
     752              :      * when creating new segment to keep size consistent both for
     753              :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     754              :      */
     755              :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     756              :     {
     757              :         int         save_errno;
     758              : 
     759              :         _dosmaperr(GetLastError());
     760              :         /* Back out what's already been done. */
     761              :         save_errno = errno;
     762              :         UnmapViewOfFile(address);
     763              :         CloseHandle(hmap);
     764              :         errno = save_errno;
     765              : 
     766              :         ereport(elevel,
     767              :                 (errcode_for_dynamic_shared_memory(),
     768              :                  errmsg("could not stat shared memory segment \"%s\": %m",
     769              :                         name)));
     770              :         return false;
     771              :     }
     772              : 
     773              :     *mapped_address = address;
     774              :     *mapped_size = info.RegionSize;
     775              :     *impl_private = hmap;
     776              : 
     777              :     return true;
     778              : }
     779              : #endif
     780              : 
     781              : #ifdef USE_DSM_MMAP
     782              : /*
     783              :  * Operating system primitives to support mmap-based shared memory.
     784              :  *
     785              :  * Calling this "shared memory" is somewhat of a misnomer, because what
     786              :  * we're really doing is creating a bunch of files and mapping them into
     787              :  * our address space.  The operating system may feel obliged to
     788              :  * synchronize the contents to disk even if nothing is being paged out,
     789              :  * which will not serve us well.  The user can relocate the pg_dynshmem
     790              :  * directory to a ramdisk to avoid this problem, if available.
     791              :  */
     792              : static bool
     793            0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     794              :               void **impl_private, void **mapped_address, Size *mapped_size,
     795              :               int elevel)
     796              : {
     797              :     char        name[64];
     798              :     int         flags;
     799              :     int         fd;
     800              :     char       *address;
     801              : 
     802            0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     803              :              handle);
     804              : 
     805              :     /* Handle teardown cases. */
     806            0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     807              :     {
     808            0 :         if (*mapped_address != NULL
     809            0 :             && munmap(*mapped_address, *mapped_size) != 0)
     810              :         {
     811            0 :             ereport(elevel,
     812              :                     (errcode_for_dynamic_shared_memory(),
     813              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     814              :                             name)));
     815            0 :             return false;
     816              :         }
     817            0 :         *mapped_address = NULL;
     818            0 :         *mapped_size = 0;
     819            0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     820              :         {
     821            0 :             ereport(elevel,
     822              :                     (errcode_for_dynamic_shared_memory(),
     823              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     824              :                             name)));
     825            0 :             return false;
     826              :         }
     827            0 :         return true;
     828              :     }
     829              : 
     830              :     /* Create new segment or open an existing one for attach. */
     831            0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     832            0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     833              :     {
     834            0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     835            0 :             ereport(elevel,
     836              :                     (errcode_for_dynamic_shared_memory(),
     837              :                      errmsg("could not open shared memory segment \"%s\": %m",
     838              :                             name)));
     839            0 :         return false;
     840              :     }
     841              : 
     842              :     /*
     843              :      * If we're attaching the segment, determine the current size; if we are
     844              :      * creating the segment, set the size to the requested value.
     845              :      */
     846            0 :     if (op == DSM_OP_ATTACH)
     847              :     {
     848              :         struct stat st;
     849              : 
     850            0 :         if (fstat(fd, &st) != 0)
     851              :         {
     852              :             int         save_errno;
     853              : 
     854              :             /* Back out what's already been done. */
     855            0 :             save_errno = errno;
     856            0 :             CloseTransientFile(fd);
     857            0 :             errno = save_errno;
     858              : 
     859            0 :             ereport(elevel,
     860              :                     (errcode_for_dynamic_shared_memory(),
     861              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     862              :                             name)));
     863            0 :             return false;
     864              :         }
     865            0 :         request_size = st.st_size;
     866              :     }
     867              :     else
     868              :     {
     869              :         /*
     870              :          * Allocate a buffer full of zeros.
     871              :          *
     872              :          * Note: palloc zbuffer, instead of just using a local char array, to
     873              :          * ensure it is reasonably well-aligned; this may save a few cycles
     874              :          * transferring data to the kernel.
     875              :          */
     876            0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     877            0 :         Size        remaining = request_size;
     878            0 :         bool        success = true;
     879              : 
     880              :         /*
     881              :          * Zero-fill the file. We have to do this the hard way to ensure that
     882              :          * all the file space has really been allocated, so that we don't
     883              :          * later seg fault when accessing the memory mapping.  This is pretty
     884              :          * pessimal.
     885              :          */
     886            0 :         while (success && remaining > 0)
     887              :         {
     888            0 :             Size        goal = remaining;
     889              : 
     890            0 :             if (goal > ZBUFFER_SIZE)
     891            0 :                 goal = ZBUFFER_SIZE;
     892            0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     893            0 :             if (write(fd, zbuffer, goal) == goal)
     894            0 :                 remaining -= goal;
     895              :             else
     896            0 :                 success = false;
     897            0 :             pgstat_report_wait_end();
     898              :         }
     899              : 
     900            0 :         if (!success)
     901              :         {
     902              :             int         save_errno;
     903              : 
     904              :             /* Back out what's already been done. */
     905            0 :             save_errno = errno;
     906            0 :             CloseTransientFile(fd);
     907            0 :             unlink(name);
     908            0 :             errno = save_errno ? save_errno : ENOSPC;
     909              : 
     910            0 :             ereport(elevel,
     911              :                     (errcode_for_dynamic_shared_memory(),
     912              :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     913              :                             name, request_size)));
     914            0 :             return false;
     915              :         }
     916              :     }
     917              : 
     918              :     /* Map it. */
     919            0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     920              :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     921            0 :     if (address == MAP_FAILED)
     922              :     {
     923              :         int         save_errno;
     924              : 
     925              :         /* Back out what's already been done. */
     926            0 :         save_errno = errno;
     927            0 :         CloseTransientFile(fd);
     928            0 :         if (op == DSM_OP_CREATE)
     929            0 :             unlink(name);
     930            0 :         errno = save_errno;
     931              : 
     932            0 :         ereport(elevel,
     933              :                 (errcode_for_dynamic_shared_memory(),
     934              :                  errmsg("could not map shared memory segment \"%s\": %m",
     935              :                         name)));
     936            0 :         return false;
     937              :     }
     938            0 :     *mapped_address = address;
     939            0 :     *mapped_size = request_size;
     940              : 
     941            0 :     if (CloseTransientFile(fd) != 0)
     942              :     {
     943            0 :         ereport(elevel,
     944              :                 (errcode_for_file_access(),
     945              :                  errmsg("could not close shared memory segment \"%s\": %m",
     946              :                         name)));
     947            0 :         return false;
     948              :     }
     949              : 
     950            0 :     return true;
     951              : }
     952              : #endif
     953              : 
     954              : /*
     955              :  * Implementation-specific actions that must be performed when a segment is to
     956              :  * be preserved even when no backend has it attached.
     957              :  *
     958              :  * Except on Windows, we don't need to do anything at all.  But since Windows
     959              :  * cleans up segments automatically when no references remain, we duplicate
     960              :  * the segment handle into the postmaster process.  The postmaster needn't
     961              :  * do anything to receive the handle; Windows transfers it automatically.
     962              :  */
     963              : void
     964         1352 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     965              :                      void **impl_private_pm_handle)
     966              : {
     967         1352 :     switch (dynamic_shared_memory_type)
     968              :     {
     969              : #ifdef USE_DSM_WINDOWS
     970              :         case DSM_IMPL_WINDOWS:
     971              :             if (IsUnderPostmaster)
     972              :             {
     973              :                 HANDLE      hmap;
     974              : 
     975              :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     976              :                                      PostmasterHandle, &hmap, 0, FALSE,
     977              :                                      DUPLICATE_SAME_ACCESS))
     978              :                 {
     979              :                     char        name[64];
     980              : 
     981              :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     982              :                     _dosmaperr(GetLastError());
     983              :                     ereport(ERROR,
     984              :                             (errcode_for_dynamic_shared_memory(),
     985              :                              errmsg("could not duplicate handle for \"%s\": %m",
     986              :                                     name)));
     987              :                 }
     988              : 
     989              :                 /*
     990              :                  * Here, we remember the handle that we created in the
     991              :                  * postmaster process.  This handle isn't actually usable in
     992              :                  * any process other than the postmaster, but that doesn't
     993              :                  * matter.  We're just holding onto it so that, if the segment
     994              :                  * is unpinned, dsm_impl_unpin_segment can close it.
     995              :                  */
     996              :                 *impl_private_pm_handle = hmap;
     997              :             }
     998              :             break;
     999              : #endif
    1000              :         default:
    1001         1352 :             break;
    1002              :     }
    1003         1352 : }
    1004              : 
    1005              : /*
    1006              :  * Implementation-specific actions that must be performed when a segment is no
    1007              :  * longer to be preserved, so that it will be cleaned up when all backends
    1008              :  * have detached from it.
    1009              :  *
    1010              :  * Except on Windows, we don't need to do anything at all.  For Windows, we
    1011              :  * close the extra handle that dsm_impl_pin_segment created in the
    1012              :  * postmaster's process space.
    1013              :  */
    1014              : void
    1015          252 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1016              : {
    1017          252 :     switch (dynamic_shared_memory_type)
    1018              :     {
    1019              : #ifdef USE_DSM_WINDOWS
    1020              :         case DSM_IMPL_WINDOWS:
    1021              :             if (IsUnderPostmaster)
    1022              :             {
    1023              :                 if (*impl_private &&
    1024              :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1025              :                                      NULL, NULL, 0, FALSE,
    1026              :                                      DUPLICATE_CLOSE_SOURCE))
    1027              :                 {
    1028              :                     char        name[64];
    1029              : 
    1030              :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1031              :                     _dosmaperr(GetLastError());
    1032              :                     ereport(ERROR,
    1033              :                             (errcode_for_dynamic_shared_memory(),
    1034              :                              errmsg("could not duplicate handle for \"%s\": %m",
    1035              :                                     name)));
    1036              :                 }
    1037              : 
    1038              :                 *impl_private = NULL;
    1039              :             }
    1040              :             break;
    1041              : #endif
    1042              :         default:
    1043          252 :             break;
    1044              :     }
    1045          252 : }
    1046              : 
    1047              : static int
    1048            0 : errcode_for_dynamic_shared_memory(void)
    1049              : {
    1050            0 :     if (errno == EFBIG || errno == ENOMEM)
    1051            0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1052              :     else
    1053            0 :         return errcode_for_file_access();
    1054              : }
        

Generated by: LCOV version 2.0-1