LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 23.6 % 203 48
Test Date: 2026-03-03 14:15:12 Functions: 62.5 % 8 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * dsm_impl.c
       4              :  *    manage dynamic shared memory segments
       5              :  *
       6              :  * This file provides low-level APIs for creating and destroying shared
       7              :  * memory segments using several different possible techniques.  We refer
       8              :  * to these segments as dynamic because they can be created, altered, and
       9              :  * destroyed at any point during the server life cycle.  This is unlike
      10              :  * the main shared memory segment, of which there is always exactly one
      11              :  * and which is always mapped at a fixed address in every PostgreSQL
      12              :  * background process.
      13              :  *
      14              :  * Because not all systems provide the same primitives in this area, nor
      15              :  * do all primitives behave the same way on all systems, we provide
      16              :  * several implementations of this facility.  Many systems implement
      17              :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18              :  * in this area, with the exception that shared memory identifiers live
      19              :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20              :  * name collisions with other processes (including other copies of
      21              :  * PostgreSQL) running on the same system.  Some systems only support
      22              :  * the older System V shared memory interface (shmget etc.) which is
      23              :  * also usable; however, the default allocation limits are often quite
      24              :  * small, and the namespace is even more restricted.
      25              :  *
      26              :  * We also provide an mmap-based shared memory implementation.  This may
      27              :  * be useful on systems that provide shared memory via a special-purpose
      28              :  * filesystem; by opting for this implementation, the user can even
      29              :  * control precisely where their shared memory segments are placed.  It
      30              :  * can also be used as a fallback for systems where shm_open and shmget
      31              :  * are not available or can't be used for some reason.  Of course,
      32              :  * mapping a file residing on an actual spinning disk is a fairly poor
      33              :  * approximation for shared memory because writeback may hurt performance
      34              :  * substantially, but there should be few systems where we must make do
      35              :  * with such poor tools.
      36              :  *
      37              :  * As ever, Windows requires its own implementation.
      38              :  *
      39              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40              :  * Portions Copyright (c) 1994, Regents of the University of California
      41              :  *
      42              :  *
      43              :  * IDENTIFICATION
      44              :  *    src/backend/storage/ipc/dsm_impl.c
      45              :  *
      46              :  *-------------------------------------------------------------------------
      47              :  */
      48              : 
      49              : #include "postgres.h"
      50              : 
      51              : #include <fcntl.h>
      52              : #include <signal.h>
      53              : #include <unistd.h>
      54              : #ifndef WIN32
      55              : #include <sys/mman.h>
      56              : #include <sys/ipc.h>
      57              : #include <sys/shm.h>
      58              : #include <sys/stat.h>
      59              : #endif
      60              : 
      61              : #include "common/file_perm.h"
      62              : #include "libpq/pqsignal.h"
      63              : #include "miscadmin.h"
      64              : #include "pgstat.h"
      65              : #include "portability/mem.h"
      66              : #include "postmaster/postmaster.h"
      67              : #include "storage/dsm_impl.h"
      68              : #include "storage/fd.h"
      69              : #include "utils/guc.h"
      70              : #include "utils/memutils.h"
      71              : 
      72              : #ifdef USE_DSM_POSIX
      73              : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      74              :                            void **impl_private, void **mapped_address,
      75              :                            Size *mapped_size, int elevel);
      76              : static int  dsm_impl_posix_resize(int fd, off_t size);
      77              : #endif
      78              : #ifdef USE_DSM_SYSV
      79              : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      80              :                           void **impl_private, void **mapped_address,
      81              :                           Size *mapped_size, int elevel);
      82              : #endif
      83              : #ifdef USE_DSM_WINDOWS
      84              : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      85              :                              void **impl_private, void **mapped_address,
      86              :                              Size *mapped_size, int elevel);
      87              : #endif
      88              : #ifdef USE_DSM_MMAP
      89              : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      90              :                           void **impl_private, void **mapped_address,
      91              :                           Size *mapped_size, int elevel);
      92              : #endif
      93              : static int  errcode_for_dynamic_shared_memory(void);
      94              : 
      95              : const struct config_enum_entry dynamic_shared_memory_options[] = {
      96              : #ifdef USE_DSM_POSIX
      97              :     {"posix", DSM_IMPL_POSIX, false},
      98              : #endif
      99              : #ifdef USE_DSM_SYSV
     100              :     {"sysv", DSM_IMPL_SYSV, false},
     101              : #endif
     102              : #ifdef USE_DSM_WINDOWS
     103              :     {"windows", DSM_IMPL_WINDOWS, false},
     104              : #endif
     105              : #ifdef USE_DSM_MMAP
     106              :     {"mmap", DSM_IMPL_MMAP, false},
     107              : #endif
     108              :     {NULL, 0, false}
     109              : };
     110              : 
     111              : /* Implementation selector. */
     112              : int         dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
     113              : 
     114              : /* Amount of space reserved for DSM segments in the main area. */
     115              : int         min_dynamic_shared_memory;
     116              : 
     117              : /* Size of buffer to be used for zero-filling. */
     118              : #define ZBUFFER_SIZE                8192
     119              : 
     120              : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     121              : 
     122              : /*------
     123              :  * Perform a low-level shared memory operation in a platform-specific way,
     124              :  * as dictated by the selected implementation.  Each implementation is
     125              :  * required to implement the following primitives.
     126              :  *
     127              :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     128              :  * map it.
     129              :  *
     130              :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     131              :  *
     132              :  * DSM_OP_DETACH.  Unmap the segment.
     133              :  *
     134              :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     135              :  * segment.
     136              :  *
     137              :  * Arguments:
     138              :  *   op: The operation to be performed.
     139              :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     140              :  *     identifier for the new handle the caller wants created.
     141              :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     142              :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     143              :  *     to NULL for the first operation on a shared memory segment within this
     144              :  *     backend; thereafter, it will point to the value to which it was set
     145              :  *     on the previous call.
     146              :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     147              :  *     if none.  Updated with new mapping address.
     148              :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     149              :  *     Updated with new mapped size.
     150              :  *   elevel: Level at which to log errors.
     151              :  *
     152              :  * Return value: true on success, false on failure.  When false is returned,
     153              :  * a message should first be logged at the specified elevel, except in the
     154              :  * case where DSM_OP_CREATE experiences a name collision, which should
     155              :  * silently return false.
     156              :  *-----
     157              :  */
     158              : bool
     159        54797 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     160              :             void **impl_private, void **mapped_address, Size *mapped_size,
     161              :             int elevel)
     162              : {
     163              :     Assert(op == DSM_OP_CREATE || request_size == 0);
     164              :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     165              :            (*mapped_address == NULL && *mapped_size == 0));
     166              : 
     167        54797 :     switch (dynamic_shared_memory_type)
     168              :     {
     169              : #ifdef USE_DSM_POSIX
     170        54797 :         case DSM_IMPL_POSIX:
     171        54797 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     172              :                                   mapped_address, mapped_size, elevel);
     173              : #endif
     174              : #ifdef USE_DSM_SYSV
     175            0 :         case DSM_IMPL_SYSV:
     176            0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     177              :                                  mapped_address, mapped_size, elevel);
     178              : #endif
     179              : #ifdef USE_DSM_WINDOWS
     180              :         case DSM_IMPL_WINDOWS:
     181              :             return dsm_impl_windows(op, handle, request_size, impl_private,
     182              :                                     mapped_address, mapped_size, elevel);
     183              : #endif
     184              : #ifdef USE_DSM_MMAP
     185            0 :         case DSM_IMPL_MMAP:
     186            0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     187              :                                  mapped_address, mapped_size, elevel);
     188              : #endif
     189            0 :         default:
     190            0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     191              :                  dynamic_shared_memory_type);
     192              :             return false;
     193              :     }
     194              : }
     195              : 
     196              : #ifdef USE_DSM_POSIX
     197              : /*
     198              :  * Operating system primitives to support POSIX shared memory.
     199              :  *
     200              :  * POSIX shared memory segments are created and attached using shm_open()
     201              :  * and shm_unlink(); other operations, such as sizing or mapping the
     202              :  * segment, are performed as if the shared memory segments were files.
     203              :  *
     204              :  * Indeed, on some platforms, they may be implemented that way.  While
     205              :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     206              :  * some operating systems may implement them as files, even going so far
     207              :  * to treat a request for /xyz as a request to create a file by that name
     208              :  * in the root directory.  Users of such broken platforms should select
     209              :  * a different shared memory implementation.
     210              :  */
     211              : static bool
     212        54797 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     213              :                void **impl_private, void **mapped_address, Size *mapped_size,
     214              :                int elevel)
     215              : {
     216              :     char        name[64];
     217              :     int         flags;
     218              :     int         fd;
     219              :     char       *address;
     220              : 
     221        54797 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     222              : 
     223              :     /* Handle teardown cases. */
     224        54797 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     225              :     {
     226        28366 :         if (*mapped_address != NULL
     227        26432 :             && munmap(*mapped_address, *mapped_size) != 0)
     228              :         {
     229            0 :             ereport(elevel,
     230              :                     (errcode_for_dynamic_shared_memory(),
     231              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     232              :                             name)));
     233            0 :             return false;
     234              :         }
     235        28366 :         *mapped_address = NULL;
     236        28366 :         *mapped_size = 0;
     237        28366 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     238              :         {
     239            0 :             ereport(elevel,
     240              :                     (errcode_for_dynamic_shared_memory(),
     241              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     242              :                             name)));
     243            0 :             return false;
     244              :         }
     245        28366 :         return true;
     246              :     }
     247              : 
     248              :     /*
     249              :      * Create new segment or open an existing one for attach.
     250              :      *
     251              :      * Even though we will close the FD before returning, it seems desirable
     252              :      * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
     253              :      * failure.  The fact that we won't hold the FD open long justifies using
     254              :      * ReserveExternalFD rather than AcquireExternalFD, though.
     255              :      */
     256        26431 :     ReserveExternalFD();
     257              : 
     258        26431 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     259        26431 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     260              :     {
     261            0 :         ReleaseExternalFD();
     262            0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     263            0 :             ereport(elevel,
     264              :                     (errcode_for_dynamic_shared_memory(),
     265              :                      errmsg("could not open shared memory segment \"%s\": %m",
     266              :                             name)));
     267            0 :         return false;
     268              :     }
     269              : 
     270              :     /*
     271              :      * If we're attaching the segment, determine the current size; if we are
     272              :      * creating the segment, set the size to the requested value.
     273              :      */
     274        26431 :     if (op == DSM_OP_ATTACH)
     275              :     {
     276              :         struct stat st;
     277              : 
     278        23444 :         if (fstat(fd, &st) != 0)
     279              :         {
     280              :             int         save_errno;
     281              : 
     282              :             /* Back out what's already been done. */
     283            0 :             save_errno = errno;
     284            0 :             close(fd);
     285            0 :             ReleaseExternalFD();
     286            0 :             errno = save_errno;
     287              : 
     288            0 :             ereport(elevel,
     289              :                     (errcode_for_dynamic_shared_memory(),
     290              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     291              :                             name)));
     292            0 :             return false;
     293              :         }
     294        23444 :         request_size = st.st_size;
     295              :     }
     296         2987 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     297              :     {
     298              :         int         save_errno;
     299              : 
     300              :         /* Back out what's already been done. */
     301            0 :         save_errno = errno;
     302            0 :         close(fd);
     303            0 :         ReleaseExternalFD();
     304            0 :         shm_unlink(name);
     305            0 :         errno = save_errno;
     306              : 
     307            0 :         ereport(elevel,
     308              :                 (errcode_for_dynamic_shared_memory(),
     309              :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     310              :                         name, request_size)));
     311            0 :         return false;
     312              :     }
     313              : 
     314              :     /* Map it. */
     315        26431 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     316              :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     317        26431 :     if (address == MAP_FAILED)
     318              :     {
     319              :         int         save_errno;
     320              : 
     321              :         /* Back out what's already been done. */
     322            0 :         save_errno = errno;
     323            0 :         close(fd);
     324            0 :         ReleaseExternalFD();
     325            0 :         if (op == DSM_OP_CREATE)
     326            0 :             shm_unlink(name);
     327            0 :         errno = save_errno;
     328              : 
     329            0 :         ereport(elevel,
     330              :                 (errcode_for_dynamic_shared_memory(),
     331              :                  errmsg("could not map shared memory segment \"%s\": %m",
     332              :                         name)));
     333            0 :         return false;
     334              :     }
     335        26431 :     *mapped_address = address;
     336        26431 :     *mapped_size = request_size;
     337        26431 :     close(fd);
     338        26431 :     ReleaseExternalFD();
     339              : 
     340        26431 :     return true;
     341              : }
     342              : 
     343              : /*
     344              :  * Set the size of a virtual memory region associated with a file descriptor.
     345              :  * If necessary, also ensure that virtual memory is actually allocated by the
     346              :  * operating system, to avoid nasty surprises later.
     347              :  *
     348              :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     349              :  */
     350              : static int
     351         2987 : dsm_impl_posix_resize(int fd, off_t size)
     352              : {
     353              :     int         rc;
     354              :     int         save_errno;
     355              :     sigset_t    save_sigmask;
     356              : 
     357              :     /*
     358              :      * Block all blockable signals, except SIGQUIT.  posix_fallocate() can run
     359              :      * for quite a long time, and is an all-or-nothing operation.  If we
     360              :      * allowed SIGUSR1 to interrupt us repeatedly (for example, due to
     361              :      * recovery conflicts), the retry loop might never succeed.
     362              :      */
     363         2987 :     if (IsUnderPostmaster)
     364         1715 :         sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
     365              : 
     366         2987 :     pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
     367              : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     368              : 
     369              :     /*
     370              :      * On Linux, a shm_open fd is backed by a tmpfs file.  If we were to use
     371              :      * ftruncate, the file would contain a hole.  Accessing memory backed by a
     372              :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     373              :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     374              :      * here, so we can fail gracefully with ENOSPC now rather than risking
     375              :      * SIGBUS later.
     376              :      *
     377              :      * We still use a traditional EINTR retry loop to handle SIGCONT.
     378              :      * posix_fallocate() doesn't restart automatically, and we don't want this
     379              :      * to fail if you attach a debugger.
     380              :      */
     381              :     do
     382              :     {
     383         2987 :         rc = posix_fallocate(fd, 0, size);
     384         2987 :     } while (rc == EINTR);
     385              : 
     386              :     /*
     387              :      * The caller expects errno to be set, but posix_fallocate() doesn't set
     388              :      * it.  Instead it returns error numbers directly.  So set errno, even
     389              :      * though we'll also return rc to indicate success or failure.
     390              :      */
     391         2987 :     errno = rc;
     392              : #else
     393              :     /* Extend the file to the requested size. */
     394              :     do
     395              :     {
     396              :         rc = ftruncate(fd, size);
     397              :     } while (rc < 0 && errno == EINTR);
     398              : #endif
     399         2987 :     pgstat_report_wait_end();
     400              : 
     401         2987 :     if (IsUnderPostmaster)
     402              :     {
     403         1715 :         save_errno = errno;
     404         1715 :         sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
     405         1715 :         errno = save_errno;
     406              :     }
     407              : 
     408         2987 :     return rc;
     409              : }
     410              : 
     411              : #endif                          /* USE_DSM_POSIX */
     412              : 
     413              : #ifdef USE_DSM_SYSV
     414              : /*
     415              :  * Operating system primitives to support System V shared memory.
     416              :  *
     417              :  * System V shared memory segments are manipulated using shmget(), shmat(),
     418              :  * shmdt(), and shmctl().  As the default allocation limits for System V
     419              :  * shared memory are usually quite low, the POSIX facilities may be
     420              :  * preferable; but those are not supported everywhere.
     421              :  */
     422              : static bool
     423            0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     424              :               void **impl_private, void **mapped_address, Size *mapped_size,
     425              :               int elevel)
     426              : {
     427              :     key_t       key;
     428              :     int         ident;
     429              :     char       *address;
     430              :     char        name[64];
     431              :     int        *ident_cache;
     432              : 
     433              :     /*
     434              :      * POSIX shared memory and mmap-based shared memory identify segments with
     435              :      * names.  To avoid needless error message variation, we use the handle as
     436              :      * the name.
     437              :      */
     438            0 :     snprintf(name, 64, "%u", handle);
     439              : 
     440              :     /*
     441              :      * The System V shared memory namespace is very restricted; names are of
     442              :      * type key_t, which is expected to be some sort of integer data type, but
     443              :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     444              :      * identify shared memory segments across processes, this might seem like
     445              :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     446              :      * the cast below might truncate away some bits from the handle the
     447              :      * user-provided, but it'll truncate exactly the same bits away in exactly
     448              :      * the same fashion every time we use that handle, which is all that
     449              :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     450              :      * won't use the full range of available key space, but that's no big deal
     451              :      * either.
     452              :      *
     453              :      * We do make sure that the key isn't negative, because that might not be
     454              :      * portable.
     455              :      */
     456            0 :     key = (key_t) handle;
     457            0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     458            0 :         key = -key;
     459              : 
     460              :     /*
     461              :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     462              :      * up with that value by chance during a create operation, just pretend it
     463              :      * already exists, so that caller will retry.  If we run into it anywhere
     464              :      * else, the caller has passed a handle that doesn't correspond to
     465              :      * anything we ever created, which should not happen.
     466              :      */
     467            0 :     if (key == IPC_PRIVATE)
     468              :     {
     469            0 :         if (op != DSM_OP_CREATE)
     470            0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     471            0 :         errno = EEXIST;
     472            0 :         return false;
     473              :     }
     474              : 
     475              :     /*
     476              :      * Before we can do anything with a shared memory segment, we have to map
     477              :      * the shared memory key to a shared memory identifier using shmget(). To
     478              :      * avoid repeated lookups, we store the key using impl_private.
     479              :      */
     480            0 :     if (*impl_private != NULL)
     481              :     {
     482            0 :         ident_cache = *impl_private;
     483            0 :         ident = *ident_cache;
     484              :     }
     485              :     else
     486              :     {
     487            0 :         int         flags = IPCProtection;
     488              :         size_t      segsize;
     489              : 
     490              :         /*
     491              :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     492              :          * leak the resource if memory allocation fails.
     493              :          */
     494            0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     495              : 
     496              :         /*
     497              :          * When using shmget to find an existing segment, we must pass the
     498              :          * size as 0.  Passing a non-zero size which is greater than the
     499              :          * actual size will result in EINVAL.
     500              :          */
     501            0 :         segsize = 0;
     502              : 
     503            0 :         if (op == DSM_OP_CREATE)
     504              :         {
     505            0 :             flags |= IPC_CREAT | IPC_EXCL;
     506            0 :             segsize = request_size;
     507              :         }
     508              : 
     509            0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     510              :         {
     511            0 :             if (op == DSM_OP_ATTACH || errno != EEXIST)
     512              :             {
     513            0 :                 int         save_errno = errno;
     514              : 
     515            0 :                 pfree(ident_cache);
     516            0 :                 errno = save_errno;
     517            0 :                 ereport(elevel,
     518              :                         (errcode_for_dynamic_shared_memory(),
     519              :                          errmsg("could not get shared memory segment: %m")));
     520              :             }
     521            0 :             return false;
     522              :         }
     523              : 
     524            0 :         *ident_cache = ident;
     525            0 :         *impl_private = ident_cache;
     526              :     }
     527              : 
     528              :     /* Handle teardown cases. */
     529            0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     530              :     {
     531            0 :         pfree(ident_cache);
     532            0 :         *impl_private = NULL;
     533            0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     534              :         {
     535            0 :             ereport(elevel,
     536              :                     (errcode_for_dynamic_shared_memory(),
     537              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     538              :                             name)));
     539            0 :             return false;
     540              :         }
     541            0 :         *mapped_address = NULL;
     542            0 :         *mapped_size = 0;
     543            0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     544              :         {
     545            0 :             ereport(elevel,
     546              :                     (errcode_for_dynamic_shared_memory(),
     547              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     548              :                             name)));
     549            0 :             return false;
     550              :         }
     551            0 :         return true;
     552              :     }
     553              : 
     554              :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     555            0 :     if (op == DSM_OP_ATTACH)
     556              :     {
     557              :         struct shmid_ds shm;
     558              : 
     559            0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     560              :         {
     561            0 :             ereport(elevel,
     562              :                     (errcode_for_dynamic_shared_memory(),
     563              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     564              :                             name)));
     565            0 :             return false;
     566              :         }
     567            0 :         request_size = shm.shm_segsz;
     568              :     }
     569              : 
     570              :     /* Map it. */
     571            0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     572            0 :     if (address == (void *) -1)
     573              :     {
     574              :         int         save_errno;
     575              : 
     576              :         /* Back out what's already been done. */
     577            0 :         save_errno = errno;
     578            0 :         if (op == DSM_OP_CREATE)
     579            0 :             shmctl(ident, IPC_RMID, NULL);
     580            0 :         errno = save_errno;
     581              : 
     582            0 :         ereport(elevel,
     583              :                 (errcode_for_dynamic_shared_memory(),
     584              :                  errmsg("could not map shared memory segment \"%s\": %m",
     585              :                         name)));
     586            0 :         return false;
     587              :     }
     588            0 :     *mapped_address = address;
     589            0 :     *mapped_size = request_size;
     590              : 
     591            0 :     return true;
     592              : }
     593              : #endif
     594              : 
     595              : #ifdef USE_DSM_WINDOWS
     596              : /*
     597              :  * Operating system primitives to support Windows shared memory.
     598              :  *
     599              :  * Windows shared memory implementation is done using file mapping
     600              :  * which can be backed by either physical file or system paging file.
     601              :  * Current implementation uses system paging file as other effects
     602              :  * like performance are not clear for physical file and it is used in similar
     603              :  * way for main shared memory in windows.
     604              :  *
     605              :  * A memory mapping object is a kernel object - they always get deleted when
     606              :  * the last reference to them goes away, either explicitly via a CloseHandle or
     607              :  * when the process containing the reference exits.
     608              :  */
     609              : static bool
     610              : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     611              :                  void **impl_private, void **mapped_address,
     612              :                  Size *mapped_size, int elevel)
     613              : {
     614              :     char       *address;
     615              :     HANDLE      hmap;
     616              :     char        name[64];
     617              :     MEMORY_BASIC_INFORMATION info;
     618              : 
     619              :     /*
     620              :      * Storing the shared memory segment in the Global\ namespace, can allow
     621              :      * any process running in any session to access that file mapping object
     622              :      * provided that the caller has the required access rights. But to avoid
     623              :      * issues faced in main shared memory, we are using the naming convention
     624              :      * similar to main shared memory. We can change here once issue mentioned
     625              :      * in GetSharedMemName is resolved.
     626              :      */
     627              :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     628              : 
     629              :     /*
     630              :      * Handle teardown cases.  Since Windows automatically destroys the object
     631              :      * when no references remain, we can treat it the same as detach.
     632              :      */
     633              :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     634              :     {
     635              :         if (*mapped_address != NULL
     636              :             && UnmapViewOfFile(*mapped_address) == 0)
     637              :         {
     638              :             _dosmaperr(GetLastError());
     639              :             ereport(elevel,
     640              :                     (errcode_for_dynamic_shared_memory(),
     641              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     642              :                             name)));
     643              :             return false;
     644              :         }
     645              :         if (*impl_private != NULL
     646              :             && CloseHandle(*impl_private) == 0)
     647              :         {
     648              :             _dosmaperr(GetLastError());
     649              :             ereport(elevel,
     650              :                     (errcode_for_dynamic_shared_memory(),
     651              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     652              :                             name)));
     653              :             return false;
     654              :         }
     655              : 
     656              :         *impl_private = NULL;
     657              :         *mapped_address = NULL;
     658              :         *mapped_size = 0;
     659              :         return true;
     660              :     }
     661              : 
     662              :     /* Create new segment or open an existing one for attach. */
     663              :     if (op == DSM_OP_CREATE)
     664              :     {
     665              :         DWORD       size_high;
     666              :         DWORD       size_low;
     667              :         DWORD       errcode;
     668              : 
     669              :         /* Shifts >= the width of the type are undefined. */
     670              : #ifdef _WIN64
     671              :         size_high = request_size >> 32;
     672              : #else
     673              :         size_high = 0;
     674              : #endif
     675              :         size_low = (DWORD) request_size;
     676              : 
     677              :         /* CreateFileMapping might not clear the error code on success */
     678              :         SetLastError(0);
     679              : 
     680              :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     681              :                                  NULL,  /* Default security attrs */
     682              :                                  PAGE_READWRITE,    /* Memory is read/write */
     683              :                                  size_high, /* Upper 32 bits of size */
     684              :                                  size_low,  /* Lower 32 bits of size */
     685              :                                  name);
     686              : 
     687              :         errcode = GetLastError();
     688              :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     689              :         {
     690              :             /*
     691              :              * On Windows, when the segment already exists, a handle for the
     692              :              * existing segment is returned.  We must close it before
     693              :              * returning.  However, if the existing segment is created by a
     694              :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     695              :              * _dosmaperr here, so errno won't be modified.
     696              :              */
     697              :             if (hmap)
     698              :                 CloseHandle(hmap);
     699              :             return false;
     700              :         }
     701              : 
     702              :         if (!hmap)
     703              :         {
     704              :             _dosmaperr(errcode);
     705              :             ereport(elevel,
     706              :                     (errcode_for_dynamic_shared_memory(),
     707              :                      errmsg("could not create shared memory segment \"%s\": %m",
     708              :                             name)));
     709              :             return false;
     710              :         }
     711              :     }
     712              :     else
     713              :     {
     714              :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     715              :                                FALSE,   /* do not inherit the name */
     716              :                                name);   /* name of mapping object */
     717              :         if (!hmap)
     718              :         {
     719              :             _dosmaperr(GetLastError());
     720              :             ereport(elevel,
     721              :                     (errcode_for_dynamic_shared_memory(),
     722              :                      errmsg("could not open shared memory segment \"%s\": %m",
     723              :                             name)));
     724              :             return false;
     725              :         }
     726              :     }
     727              : 
     728              :     /* Map it. */
     729              :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     730              :                             0, 0, 0);
     731              :     if (!address)
     732              :     {
     733              :         int         save_errno;
     734              : 
     735              :         _dosmaperr(GetLastError());
     736              :         /* Back out what's already been done. */
     737              :         save_errno = errno;
     738              :         CloseHandle(hmap);
     739              :         errno = save_errno;
     740              : 
     741              :         ereport(elevel,
     742              :                 (errcode_for_dynamic_shared_memory(),
     743              :                  errmsg("could not map shared memory segment \"%s\": %m",
     744              :                         name)));
     745              :         return false;
     746              :     }
     747              : 
     748              :     /*
     749              :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     750              :      * need size only when we are attaching, but it's better to get the size
     751              :      * when creating new segment to keep size consistent both for
     752              :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     753              :      */
     754              :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     755              :     {
     756              :         int         save_errno;
     757              : 
     758              :         _dosmaperr(GetLastError());
     759              :         /* Back out what's already been done. */
     760              :         save_errno = errno;
     761              :         UnmapViewOfFile(address);
     762              :         CloseHandle(hmap);
     763              :         errno = save_errno;
     764              : 
     765              :         ereport(elevel,
     766              :                 (errcode_for_dynamic_shared_memory(),
     767              :                  errmsg("could not stat shared memory segment \"%s\": %m",
     768              :                         name)));
     769              :         return false;
     770              :     }
     771              : 
     772              :     *mapped_address = address;
     773              :     *mapped_size = info.RegionSize;
     774              :     *impl_private = hmap;
     775              : 
     776              :     return true;
     777              : }
     778              : #endif
     779              : 
     780              : #ifdef USE_DSM_MMAP
     781              : /*
     782              :  * Operating system primitives to support mmap-based shared memory.
     783              :  *
     784              :  * Calling this "shared memory" is somewhat of a misnomer, because what
     785              :  * we're really doing is creating a bunch of files and mapping them into
     786              :  * our address space.  The operating system may feel obliged to
     787              :  * synchronize the contents to disk even if nothing is being paged out,
     788              :  * which will not serve us well.  The user can relocate the pg_dynshmem
     789              :  * directory to a ramdisk to avoid this problem, if available.
     790              :  */
     791              : static bool
     792            0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     793              :               void **impl_private, void **mapped_address, Size *mapped_size,
     794              :               int elevel)
     795              : {
     796              :     char        name[64];
     797              :     int         flags;
     798              :     int         fd;
     799              :     char       *address;
     800              : 
     801            0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     802              :              handle);
     803              : 
     804              :     /* Handle teardown cases. */
     805            0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     806              :     {
     807            0 :         if (*mapped_address != NULL
     808            0 :             && munmap(*mapped_address, *mapped_size) != 0)
     809              :         {
     810            0 :             ereport(elevel,
     811              :                     (errcode_for_dynamic_shared_memory(),
     812              :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     813              :                             name)));
     814            0 :             return false;
     815              :         }
     816            0 :         *mapped_address = NULL;
     817            0 :         *mapped_size = 0;
     818            0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     819              :         {
     820            0 :             ereport(elevel,
     821              :                     (errcode_for_dynamic_shared_memory(),
     822              :                      errmsg("could not remove shared memory segment \"%s\": %m",
     823              :                             name)));
     824            0 :             return false;
     825              :         }
     826            0 :         return true;
     827              :     }
     828              : 
     829              :     /* Create new segment or open an existing one for attach. */
     830            0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     831            0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     832              :     {
     833            0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     834            0 :             ereport(elevel,
     835              :                     (errcode_for_dynamic_shared_memory(),
     836              :                      errmsg("could not open shared memory segment \"%s\": %m",
     837              :                             name)));
     838            0 :         return false;
     839              :     }
     840              : 
     841              :     /*
     842              :      * If we're attaching the segment, determine the current size; if we are
     843              :      * creating the segment, set the size to the requested value.
     844              :      */
     845            0 :     if (op == DSM_OP_ATTACH)
     846              :     {
     847              :         struct stat st;
     848              : 
     849            0 :         if (fstat(fd, &st) != 0)
     850              :         {
     851              :             int         save_errno;
     852              : 
     853              :             /* Back out what's already been done. */
     854            0 :             save_errno = errno;
     855            0 :             CloseTransientFile(fd);
     856            0 :             errno = save_errno;
     857              : 
     858            0 :             ereport(elevel,
     859              :                     (errcode_for_dynamic_shared_memory(),
     860              :                      errmsg("could not stat shared memory segment \"%s\": %m",
     861              :                             name)));
     862            0 :             return false;
     863              :         }
     864            0 :         request_size = st.st_size;
     865              :     }
     866              :     else
     867              :     {
     868              :         /*
     869              :          * Allocate a buffer full of zeros.
     870              :          *
     871              :          * Note: palloc zbuffer, instead of just using a local char array, to
     872              :          * ensure it is reasonably well-aligned; this may save a few cycles
     873              :          * transferring data to the kernel.
     874              :          */
     875            0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     876            0 :         Size        remaining = request_size;
     877            0 :         bool        success = true;
     878              : 
     879              :         /*
     880              :          * Zero-fill the file. We have to do this the hard way to ensure that
     881              :          * all the file space has really been allocated, so that we don't
     882              :          * later seg fault when accessing the memory mapping.  This is pretty
     883              :          * pessimal.
     884              :          */
     885            0 :         while (success && remaining > 0)
     886              :         {
     887            0 :             Size        goal = remaining;
     888              : 
     889            0 :             if (goal > ZBUFFER_SIZE)
     890            0 :                 goal = ZBUFFER_SIZE;
     891            0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     892            0 :             if (write(fd, zbuffer, goal) == goal)
     893            0 :                 remaining -= goal;
     894              :             else
     895            0 :                 success = false;
     896            0 :             pgstat_report_wait_end();
     897              :         }
     898              : 
     899            0 :         if (!success)
     900              :         {
     901              :             int         save_errno;
     902              : 
     903              :             /* Back out what's already been done. */
     904            0 :             save_errno = errno;
     905            0 :             CloseTransientFile(fd);
     906            0 :             unlink(name);
     907            0 :             errno = save_errno ? save_errno : ENOSPC;
     908              : 
     909            0 :             ereport(elevel,
     910              :                     (errcode_for_dynamic_shared_memory(),
     911              :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     912              :                             name, request_size)));
     913            0 :             return false;
     914              :         }
     915              :     }
     916              : 
     917              :     /* Map it. */
     918            0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     919              :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     920            0 :     if (address == MAP_FAILED)
     921              :     {
     922              :         int         save_errno;
     923              : 
     924              :         /* Back out what's already been done. */
     925            0 :         save_errno = errno;
     926            0 :         CloseTransientFile(fd);
     927            0 :         if (op == DSM_OP_CREATE)
     928            0 :             unlink(name);
     929            0 :         errno = save_errno;
     930              : 
     931            0 :         ereport(elevel,
     932              :                 (errcode_for_dynamic_shared_memory(),
     933              :                  errmsg("could not map shared memory segment \"%s\": %m",
     934              :                         name)));
     935            0 :         return false;
     936              :     }
     937            0 :     *mapped_address = address;
     938            0 :     *mapped_size = request_size;
     939              : 
     940            0 :     if (CloseTransientFile(fd) != 0)
     941              :     {
     942            0 :         ereport(elevel,
     943              :                 (errcode_for_file_access(),
     944              :                  errmsg("could not close shared memory segment \"%s\": %m",
     945              :                         name)));
     946            0 :         return false;
     947              :     }
     948              : 
     949            0 :     return true;
     950              : }
     951              : #endif
     952              : 
     953              : /*
     954              :  * Implementation-specific actions that must be performed when a segment is to
     955              :  * be preserved even when no backend has it attached.
     956              :  *
     957              :  * Except on Windows, we don't need to do anything at all.  But since Windows
     958              :  * cleans up segments automatically when no references remain, we duplicate
     959              :  * the segment handle into the postmaster process.  The postmaster needn't
     960              :  * do anything to receive the handle; Windows transfers it automatically.
     961              :  */
     962              : void
     963         1237 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     964              :                      void **impl_private_pm_handle)
     965              : {
     966         1237 :     switch (dynamic_shared_memory_type)
     967              :     {
     968              : #ifdef USE_DSM_WINDOWS
     969              :         case DSM_IMPL_WINDOWS:
     970              :             if (IsUnderPostmaster)
     971              :             {
     972              :                 HANDLE      hmap;
     973              : 
     974              :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     975              :                                      PostmasterHandle, &hmap, 0, FALSE,
     976              :                                      DUPLICATE_SAME_ACCESS))
     977              :                 {
     978              :                     char        name[64];
     979              : 
     980              :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     981              :                     _dosmaperr(GetLastError());
     982              :                     ereport(ERROR,
     983              :                             (errcode_for_dynamic_shared_memory(),
     984              :                              errmsg("could not duplicate handle for \"%s\": %m",
     985              :                                     name)));
     986              :                 }
     987              : 
     988              :                 /*
     989              :                  * Here, we remember the handle that we created in the
     990              :                  * postmaster process.  This handle isn't actually usable in
     991              :                  * any process other than the postmaster, but that doesn't
     992              :                  * matter.  We're just holding onto it so that, if the segment
     993              :                  * is unpinned, dsm_impl_unpin_segment can close it.
     994              :                  */
     995              :                 *impl_private_pm_handle = hmap;
     996              :             }
     997              :             break;
     998              : #endif
     999              :         default:
    1000         1237 :             break;
    1001              :     }
    1002         1237 : }
    1003              : 
    1004              : /*
    1005              :  * Implementation-specific actions that must be performed when a segment is no
    1006              :  * longer to be preserved, so that it will be cleaned up when all backends
    1007              :  * have detached from it.
    1008              :  *
    1009              :  * Except on Windows, we don't need to do anything at all.  For Windows, we
    1010              :  * close the extra handle that dsm_impl_pin_segment created in the
    1011              :  * postmaster's process space.
    1012              :  */
    1013              : void
    1014          170 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1015              : {
    1016          170 :     switch (dynamic_shared_memory_type)
    1017              :     {
    1018              : #ifdef USE_DSM_WINDOWS
    1019              :         case DSM_IMPL_WINDOWS:
    1020              :             if (IsUnderPostmaster)
    1021              :             {
    1022              :                 if (*impl_private &&
    1023              :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1024              :                                      NULL, NULL, 0, FALSE,
    1025              :                                      DUPLICATE_CLOSE_SOURCE))
    1026              :                 {
    1027              :                     char        name[64];
    1028              : 
    1029              :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1030              :                     _dosmaperr(GetLastError());
    1031              :                     ereport(ERROR,
    1032              :                             (errcode_for_dynamic_shared_memory(),
    1033              :                              errmsg("could not duplicate handle for \"%s\": %m",
    1034              :                                     name)));
    1035              :                 }
    1036              : 
    1037              :                 *impl_private = NULL;
    1038              :             }
    1039              :             break;
    1040              : #endif
    1041              :         default:
    1042          170 :             break;
    1043              :     }
    1044          170 : }
    1045              : 
    1046              : static int
    1047            0 : errcode_for_dynamic_shared_memory(void)
    1048              : {
    1049            0 :     if (errno == EFBIG || errno == ENOMEM)
    1050            0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1051              :     else
    1052            0 :         return errcode_for_file_access();
    1053              : }
        

Generated by: LCOV version 2.0-1