LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 48 203 23.6 %
Date: 2025-01-18 03:14:54 Functions: 5 8 62.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dsm_impl.c
       4             :  *    manage dynamic shared memory segments
       5             :  *
       6             :  * This file provides low-level APIs for creating and destroying shared
       7             :  * memory segments using several different possible techniques.  We refer
       8             :  * to these segments as dynamic because they can be created, altered, and
       9             :  * destroyed at any point during the server life cycle.  This is unlike
      10             :  * the main shared memory segment, of which there is always exactly one
      11             :  * and which is always mapped at a fixed address in every PostgreSQL
      12             :  * background process.
      13             :  *
      14             :  * Because not all systems provide the same primitives in this area, nor
      15             :  * do all primitives behave the same way on all systems, we provide
      16             :  * several implementations of this facility.  Many systems implement
      17             :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18             :  * in this area, with the exception that shared memory identifiers live
      19             :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20             :  * name collisions with other processes (including other copies of
      21             :  * PostgreSQL) running on the same system.  Some systems only support
      22             :  * the older System V shared memory interface (shmget etc.) which is
      23             :  * also usable; however, the default allocation limits are often quite
      24             :  * small, and the namespace is even more restricted.
      25             :  *
      26             :  * We also provide an mmap-based shared memory implementation.  This may
      27             :  * be useful on systems that provide shared memory via a special-purpose
      28             :  * filesystem; by opting for this implementation, the user can even
      29             :  * control precisely where their shared memory segments are placed.  It
      30             :  * can also be used as a fallback for systems where shm_open and shmget
      31             :  * are not available or can't be used for some reason.  Of course,
      32             :  * mapping a file residing on an actual spinning disk is a fairly poor
      33             :  * approximation for shared memory because writeback may hurt performance
      34             :  * substantially, but there should be few systems where we must make do
      35             :  * with such poor tools.
      36             :  *
      37             :  * As ever, Windows requires its own implementation.
      38             :  *
      39             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      40             :  * Portions Copyright (c) 1994, Regents of the University of California
      41             :  *
      42             :  *
      43             :  * IDENTIFICATION
      44             :  *    src/backend/storage/ipc/dsm_impl.c
      45             :  *
      46             :  *-------------------------------------------------------------------------
      47             :  */
      48             : 
      49             : #include "postgres.h"
      50             : 
      51             : #include <fcntl.h>
      52             : #include <signal.h>
      53             : #include <unistd.h>
      54             : #ifndef WIN32
      55             : #include <sys/mman.h>
      56             : #include <sys/ipc.h>
      57             : #include <sys/shm.h>
      58             : #include <sys/stat.h>
      59             : #endif
      60             : 
      61             : #include "common/file_perm.h"
      62             : #include "libpq/pqsignal.h"
      63             : #include "miscadmin.h"
      64             : #include "pgstat.h"
      65             : #include "portability/mem.h"
      66             : #include "postmaster/postmaster.h"
      67             : #include "storage/dsm_impl.h"
      68             : #include "storage/fd.h"
      69             : #include "utils/guc.h"
      70             : #include "utils/memutils.h"
      71             : 
      72             : #ifdef USE_DSM_POSIX
      73             : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      74             :                            void **impl_private, void **mapped_address,
      75             :                            Size *mapped_size, int elevel);
      76             : static int  dsm_impl_posix_resize(int fd, off_t size);
      77             : #endif
      78             : #ifdef USE_DSM_SYSV
      79             : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      80             :                           void **impl_private, void **mapped_address,
      81             :                           Size *mapped_size, int elevel);
      82             : #endif
      83             : #ifdef USE_DSM_WINDOWS
      84             : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      85             :                              void **impl_private, void **mapped_address,
      86             :                              Size *mapped_size, int elevel);
      87             : #endif
      88             : #ifdef USE_DSM_MMAP
      89             : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      90             :                           void **impl_private, void **mapped_address,
      91             :                           Size *mapped_size, int elevel);
      92             : #endif
      93             : static int  errcode_for_dynamic_shared_memory(void);
      94             : 
      95             : const struct config_enum_entry dynamic_shared_memory_options[] = {
      96             : #ifdef USE_DSM_POSIX
      97             :     {"posix", DSM_IMPL_POSIX, false},
      98             : #endif
      99             : #ifdef USE_DSM_SYSV
     100             :     {"sysv", DSM_IMPL_SYSV, false},
     101             : #endif
     102             : #ifdef USE_DSM_WINDOWS
     103             :     {"windows", DSM_IMPL_WINDOWS, false},
     104             : #endif
     105             : #ifdef USE_DSM_MMAP
     106             :     {"mmap", DSM_IMPL_MMAP, false},
     107             : #endif
     108             :     {NULL, 0, false}
     109             : };
     110             : 
     111             : /* Implementation selector. */
     112             : int         dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
     113             : 
     114             : /* Amount of space reserved for DSM segments in the main area. */
     115             : int         min_dynamic_shared_memory;
     116             : 
     117             : /* Size of buffer to be used for zero-filling. */
     118             : #define ZBUFFER_SIZE                8192
     119             : 
     120             : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     121             : 
     122             : /*------
     123             :  * Perform a low-level shared memory operation in a platform-specific way,
     124             :  * as dictated by the selected implementation.  Each implementation is
     125             :  * required to implement the following primitives.
     126             :  *
     127             :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     128             :  * map it.
     129             :  *
     130             :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     131             :  *
     132             :  * DSM_OP_DETACH.  Unmap the segment.
     133             :  *
     134             :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     135             :  * segment.
     136             :  *
     137             :  * Arguments:
     138             :  *   op: The operation to be performed.
     139             :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     140             :  *     identifier for the new handle the caller wants created.
     141             :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     142             :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     143             :  *     to NULL for the first operation on a shared memory segment within this
     144             :  *     backend; thereafter, it will point to the value to which it was set
     145             :  *     on the previous call.
     146             :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     147             :  *     if none.  Updated with new mapping address.
     148             :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     149             :  *     Updated with new mapped size.
     150             :  *   elevel: Level at which to log errors.
     151             :  *
     152             :  * Return value: true on success, false on failure.  When false is returned,
     153             :  * a message should first be logged at the specified elevel, except in the
     154             :  * case where DSM_OP_CREATE experiences a name collision, which should
     155             :  * silently return false.
     156             :  *-----
     157             :  */
     158             : bool
     159       93898 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     160             :             void **impl_private, void **mapped_address, Size *mapped_size,
     161             :             int elevel)
     162             : {
     163             :     Assert(op == DSM_OP_CREATE || request_size == 0);
     164             :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     165             :            (*mapped_address == NULL && *mapped_size == 0));
     166             : 
     167       93898 :     switch (dynamic_shared_memory_type)
     168             :     {
     169             : #ifdef USE_DSM_POSIX
     170       93898 :         case DSM_IMPL_POSIX:
     171       93898 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     172             :                                   mapped_address, mapped_size, elevel);
     173             : #endif
     174             : #ifdef USE_DSM_SYSV
     175           0 :         case DSM_IMPL_SYSV:
     176           0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     177             :                                  mapped_address, mapped_size, elevel);
     178             : #endif
     179             : #ifdef USE_DSM_WINDOWS
     180             :         case DSM_IMPL_WINDOWS:
     181             :             return dsm_impl_windows(op, handle, request_size, impl_private,
     182             :                                     mapped_address, mapped_size, elevel);
     183             : #endif
     184             : #ifdef USE_DSM_MMAP
     185           0 :         case DSM_IMPL_MMAP:
     186           0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     187             :                                  mapped_address, mapped_size, elevel);
     188             : #endif
     189           0 :         default:
     190           0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     191             :                  dynamic_shared_memory_type);
     192             :             return false;
     193             :     }
     194             : }
     195             : 
     196             : #ifdef USE_DSM_POSIX
     197             : /*
     198             :  * Operating system primitives to support POSIX shared memory.
     199             :  *
     200             :  * POSIX shared memory segments are created and attached using shm_open()
     201             :  * and shm_unlink(); other operations, such as sizing or mapping the
     202             :  * segment, are performed as if the shared memory segments were files.
     203             :  *
     204             :  * Indeed, on some platforms, they may be implemented that way.  While
     205             :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     206             :  * some operating systems may implement them as files, even going so far
     207             :  * to treat a request for /xyz as a request to create a file by that name
     208             :  * in the root directory.  Users of such broken platforms should select
     209             :  * a different shared memory implementation.
     210             :  */
     211             : static bool
     212       93898 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     213             :                void **impl_private, void **mapped_address, Size *mapped_size,
     214             :                int elevel)
     215             : {
     216             :     char        name[64];
     217             :     int         flags;
     218             :     int         fd;
     219             :     char       *address;
     220             : 
     221       93898 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     222             : 
     223             :     /* Handle teardown cases. */
     224       93898 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     225             :     {
     226       48562 :         if (*mapped_address != NULL
     227       45332 :             && munmap(*mapped_address, *mapped_size) != 0)
     228             :         {
     229           0 :             ereport(elevel,
     230             :                     (errcode_for_dynamic_shared_memory(),
     231             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     232             :                             name)));
     233           0 :             return false;
     234             :         }
     235       48562 :         *mapped_address = NULL;
     236       48562 :         *mapped_size = 0;
     237       48562 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     238             :         {
     239           0 :             ereport(elevel,
     240             :                     (errcode_for_dynamic_shared_memory(),
     241             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     242             :                             name)));
     243           0 :             return false;
     244             :         }
     245       48562 :         return true;
     246             :     }
     247             : 
     248             :     /*
     249             :      * Create new segment or open an existing one for attach.
     250             :      *
     251             :      * Even though we will close the FD before returning, it seems desirable
     252             :      * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
     253             :      * failure.  The fact that we won't hold the FD open long justifies using
     254             :      * ReserveExternalFD rather than AcquireExternalFD, though.
     255             :      */
     256       45336 :     ReserveExternalFD();
     257             : 
     258       45336 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     259       45336 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     260             :     {
     261           0 :         ReleaseExternalFD();
     262           0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     263           0 :             ereport(elevel,
     264             :                     (errcode_for_dynamic_shared_memory(),
     265             :                      errmsg("could not open shared memory segment \"%s\": %m",
     266             :                             name)));
     267           0 :         return false;
     268             :     }
     269             : 
     270             :     /*
     271             :      * If we're attaching the segment, determine the current size; if we are
     272             :      * creating the segment, set the size to the requested value.
     273             :      */
     274       45336 :     if (op == DSM_OP_ATTACH)
     275             :     {
     276             :         struct stat st;
     277             : 
     278       40424 :         if (fstat(fd, &st) != 0)
     279             :         {
     280             :             int         save_errno;
     281             : 
     282             :             /* Back out what's already been done. */
     283           0 :             save_errno = errno;
     284           0 :             close(fd);
     285           0 :             ReleaseExternalFD();
     286           0 :             errno = save_errno;
     287             : 
     288           0 :             ereport(elevel,
     289             :                     (errcode_for_dynamic_shared_memory(),
     290             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     291             :                             name)));
     292           0 :             return false;
     293             :         }
     294       40424 :         request_size = st.st_size;
     295             :     }
     296        4912 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     297             :     {
     298             :         int         save_errno;
     299             : 
     300             :         /* Back out what's already been done. */
     301           0 :         save_errno = errno;
     302           0 :         close(fd);
     303           0 :         ReleaseExternalFD();
     304           0 :         shm_unlink(name);
     305           0 :         errno = save_errno;
     306             : 
     307           0 :         ereport(elevel,
     308             :                 (errcode_for_dynamic_shared_memory(),
     309             :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     310             :                         name, request_size)));
     311           0 :         return false;
     312             :     }
     313             : 
     314             :     /* Map it. */
     315       45336 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     316             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     317       45336 :     if (address == MAP_FAILED)
     318             :     {
     319             :         int         save_errno;
     320             : 
     321             :         /* Back out what's already been done. */
     322           0 :         save_errno = errno;
     323           0 :         close(fd);
     324           0 :         ReleaseExternalFD();
     325           0 :         if (op == DSM_OP_CREATE)
     326           0 :             shm_unlink(name);
     327           0 :         errno = save_errno;
     328             : 
     329           0 :         ereport(elevel,
     330             :                 (errcode_for_dynamic_shared_memory(),
     331             :                  errmsg("could not map shared memory segment \"%s\": %m",
     332             :                         name)));
     333           0 :         return false;
     334             :     }
     335       45336 :     *mapped_address = address;
     336       45336 :     *mapped_size = request_size;
     337       45336 :     close(fd);
     338       45336 :     ReleaseExternalFD();
     339             : 
     340       45336 :     return true;
     341             : }
     342             : 
     343             : /*
     344             :  * Set the size of a virtual memory region associated with a file descriptor.
     345             :  * If necessary, also ensure that virtual memory is actually allocated by the
     346             :  * operating system, to avoid nasty surprises later.
     347             :  *
     348             :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     349             :  */
     350             : static int
     351        4912 : dsm_impl_posix_resize(int fd, off_t size)
     352             : {
     353             :     int         rc;
     354             :     int         save_errno;
     355             :     sigset_t    save_sigmask;
     356             : 
     357             :     /*
     358             :      * Block all blockable signals, except SIGQUIT.  posix_fallocate() can run
     359             :      * for quite a long time, and is an all-or-nothing operation.  If we
     360             :      * allowed SIGUSR1 to interrupt us repeatedly (for example, due to
     361             :      * recovery conflicts), the retry loop might never succeed.
     362             :      */
     363        4912 :     if (IsUnderPostmaster)
     364        2796 :         sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
     365             : 
     366        4912 :     pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
     367             : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     368             : 
     369             :     /*
     370             :      * On Linux, a shm_open fd is backed by a tmpfs file.  If we were to use
     371             :      * ftruncate, the file would contain a hole.  Accessing memory backed by a
     372             :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     373             :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     374             :      * here, so we can fail gracefully with ENOSPC now rather than risking
     375             :      * SIGBUS later.
     376             :      *
     377             :      * We still use a traditional EINTR retry loop to handle SIGCONT.
     378             :      * posix_fallocate() doesn't restart automatically, and we don't want this
     379             :      * to fail if you attach a debugger.
     380             :      */
     381             :     do
     382             :     {
     383        4912 :         rc = posix_fallocate(fd, 0, size);
     384        4912 :     } while (rc == EINTR);
     385             : 
     386             :     /*
     387             :      * The caller expects errno to be set, but posix_fallocate() doesn't set
     388             :      * it.  Instead it returns error numbers directly.  So set errno, even
     389             :      * though we'll also return rc to indicate success or failure.
     390             :      */
     391        4912 :     errno = rc;
     392             : #else
     393             :     /* Extend the file to the requested size. */
     394             :     do
     395             :     {
     396             :         rc = ftruncate(fd, size);
     397             :     } while (rc < 0 && errno == EINTR);
     398             : #endif
     399        4912 :     pgstat_report_wait_end();
     400             : 
     401        4912 :     if (IsUnderPostmaster)
     402             :     {
     403        2796 :         save_errno = errno;
     404        2796 :         sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
     405        2796 :         errno = save_errno;
     406             :     }
     407             : 
     408        4912 :     return rc;
     409             : }
     410             : 
     411             : #endif                          /* USE_DSM_POSIX */
     412             : 
     413             : #ifdef USE_DSM_SYSV
     414             : /*
     415             :  * Operating system primitives to support System V shared memory.
     416             :  *
     417             :  * System V shared memory segments are manipulated using shmget(), shmat(),
     418             :  * shmdt(), and shmctl().  As the default allocation limits for System V
     419             :  * shared memory are usually quite low, the POSIX facilities may be
     420             :  * preferable; but those are not supported everywhere.
     421             :  */
     422             : static bool
     423           0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     424             :               void **impl_private, void **mapped_address, Size *mapped_size,
     425             :               int elevel)
     426             : {
     427             :     key_t       key;
     428             :     int         ident;
     429             :     char       *address;
     430             :     char        name[64];
     431             :     int        *ident_cache;
     432             : 
     433             :     /*
     434             :      * POSIX shared memory and mmap-based shared memory identify segments with
     435             :      * names.  To avoid needless error message variation, we use the handle as
     436             :      * the name.
     437             :      */
     438           0 :     snprintf(name, 64, "%u", handle);
     439             : 
     440             :     /*
     441             :      * The System V shared memory namespace is very restricted; names are of
     442             :      * type key_t, which is expected to be some sort of integer data type, but
     443             :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     444             :      * identify shared memory segments across processes, this might seem like
     445             :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     446             :      * the cast below might truncate away some bits from the handle the
     447             :      * user-provided, but it'll truncate exactly the same bits away in exactly
     448             :      * the same fashion every time we use that handle, which is all that
     449             :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     450             :      * won't use the full range of available key space, but that's no big deal
     451             :      * either.
     452             :      *
     453             :      * We do make sure that the key isn't negative, because that might not be
     454             :      * portable.
     455             :      */
     456           0 :     key = (key_t) handle;
     457           0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     458           0 :         key = -key;
     459             : 
     460             :     /*
     461             :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     462             :      * up with that value by chance during a create operation, just pretend it
     463             :      * already exists, so that caller will retry.  If we run into it anywhere
     464             :      * else, the caller has passed a handle that doesn't correspond to
     465             :      * anything we ever created, which should not happen.
     466             :      */
     467           0 :     if (key == IPC_PRIVATE)
     468             :     {
     469           0 :         if (op != DSM_OP_CREATE)
     470           0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     471           0 :         errno = EEXIST;
     472           0 :         return false;
     473             :     }
     474             : 
     475             :     /*
     476             :      * Before we can do anything with a shared memory segment, we have to map
     477             :      * the shared memory key to a shared memory identifier using shmget(). To
     478             :      * avoid repeated lookups, we store the key using impl_private.
     479             :      */
     480           0 :     if (*impl_private != NULL)
     481             :     {
     482           0 :         ident_cache = *impl_private;
     483           0 :         ident = *ident_cache;
     484             :     }
     485             :     else
     486             :     {
     487           0 :         int         flags = IPCProtection;
     488             :         size_t      segsize;
     489             : 
     490             :         /*
     491             :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     492             :          * leak the resource if memory allocation fails.
     493             :          */
     494           0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     495             : 
     496             :         /*
     497             :          * When using shmget to find an existing segment, we must pass the
     498             :          * size as 0.  Passing a non-zero size which is greater than the
     499             :          * actual size will result in EINVAL.
     500             :          */
     501           0 :         segsize = 0;
     502             : 
     503           0 :         if (op == DSM_OP_CREATE)
     504             :         {
     505           0 :             flags |= IPC_CREAT | IPC_EXCL;
     506           0 :             segsize = request_size;
     507             :         }
     508             : 
     509           0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     510             :         {
     511           0 :             if (op == DSM_OP_ATTACH || errno != EEXIST)
     512             :             {
     513           0 :                 int         save_errno = errno;
     514             : 
     515           0 :                 pfree(ident_cache);
     516           0 :                 errno = save_errno;
     517           0 :                 ereport(elevel,
     518             :                         (errcode_for_dynamic_shared_memory(),
     519             :                          errmsg("could not get shared memory segment: %m")));
     520             :             }
     521           0 :             return false;
     522             :         }
     523             : 
     524           0 :         *ident_cache = ident;
     525           0 :         *impl_private = ident_cache;
     526             :     }
     527             : 
     528             :     /* Handle teardown cases. */
     529           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     530             :     {
     531           0 :         pfree(ident_cache);
     532           0 :         *impl_private = NULL;
     533           0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     534             :         {
     535           0 :             ereport(elevel,
     536             :                     (errcode_for_dynamic_shared_memory(),
     537             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     538             :                             name)));
     539           0 :             return false;
     540             :         }
     541           0 :         *mapped_address = NULL;
     542           0 :         *mapped_size = 0;
     543           0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     544             :         {
     545           0 :             ereport(elevel,
     546             :                     (errcode_for_dynamic_shared_memory(),
     547             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     548             :                             name)));
     549           0 :             return false;
     550             :         }
     551           0 :         return true;
     552             :     }
     553             : 
     554             :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     555           0 :     if (op == DSM_OP_ATTACH)
     556             :     {
     557             :         struct shmid_ds shm;
     558             : 
     559           0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     560             :         {
     561           0 :             ereport(elevel,
     562             :                     (errcode_for_dynamic_shared_memory(),
     563             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     564             :                             name)));
     565           0 :             return false;
     566             :         }
     567           0 :         request_size = shm.shm_segsz;
     568             :     }
     569             : 
     570             :     /* Map it. */
     571           0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     572           0 :     if (address == (void *) -1)
     573             :     {
     574             :         int         save_errno;
     575             : 
     576             :         /* Back out what's already been done. */
     577           0 :         save_errno = errno;
     578           0 :         if (op == DSM_OP_CREATE)
     579           0 :             shmctl(ident, IPC_RMID, NULL);
     580           0 :         errno = save_errno;
     581             : 
     582           0 :         ereport(elevel,
     583             :                 (errcode_for_dynamic_shared_memory(),
     584             :                  errmsg("could not map shared memory segment \"%s\": %m",
     585             :                         name)));
     586           0 :         return false;
     587             :     }
     588           0 :     *mapped_address = address;
     589           0 :     *mapped_size = request_size;
     590             : 
     591           0 :     return true;
     592             : }
     593             : #endif
     594             : 
     595             : #ifdef USE_DSM_WINDOWS
     596             : /*
     597             :  * Operating system primitives to support Windows shared memory.
     598             :  *
     599             :  * Windows shared memory implementation is done using file mapping
     600             :  * which can be backed by either physical file or system paging file.
     601             :  * Current implementation uses system paging file as other effects
     602             :  * like performance are not clear for physical file and it is used in similar
     603             :  * way for main shared memory in windows.
     604             :  *
     605             :  * A memory mapping object is a kernel object - they always get deleted when
     606             :  * the last reference to them goes away, either explicitly via a CloseHandle or
     607             :  * when the process containing the reference exits.
     608             :  */
     609             : static bool
     610             : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     611             :                  void **impl_private, void **mapped_address,
     612             :                  Size *mapped_size, int elevel)
     613             : {
     614             :     char       *address;
     615             :     HANDLE      hmap;
     616             :     char        name[64];
     617             :     MEMORY_BASIC_INFORMATION info;
     618             : 
     619             :     /*
     620             :      * Storing the shared memory segment in the Global\ namespace, can allow
     621             :      * any process running in any session to access that file mapping object
     622             :      * provided that the caller has the required access rights. But to avoid
     623             :      * issues faced in main shared memory, we are using the naming convention
     624             :      * similar to main shared memory. We can change here once issue mentioned
     625             :      * in GetSharedMemName is resolved.
     626             :      */
     627             :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     628             : 
     629             :     /*
     630             :      * Handle teardown cases.  Since Windows automatically destroys the object
     631             :      * when no references remain, we can treat it the same as detach.
     632             :      */
     633             :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     634             :     {
     635             :         if (*mapped_address != NULL
     636             :             && UnmapViewOfFile(*mapped_address) == 0)
     637             :         {
     638             :             _dosmaperr(GetLastError());
     639             :             ereport(elevel,
     640             :                     (errcode_for_dynamic_shared_memory(),
     641             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     642             :                             name)));
     643             :             return false;
     644             :         }
     645             :         if (*impl_private != NULL
     646             :             && CloseHandle(*impl_private) == 0)
     647             :         {
     648             :             _dosmaperr(GetLastError());
     649             :             ereport(elevel,
     650             :                     (errcode_for_dynamic_shared_memory(),
     651             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     652             :                             name)));
     653             :             return false;
     654             :         }
     655             : 
     656             :         *impl_private = NULL;
     657             :         *mapped_address = NULL;
     658             :         *mapped_size = 0;
     659             :         return true;
     660             :     }
     661             : 
     662             :     /* Create new segment or open an existing one for attach. */
     663             :     if (op == DSM_OP_CREATE)
     664             :     {
     665             :         DWORD       size_high;
     666             :         DWORD       size_low;
     667             :         DWORD       errcode;
     668             : 
     669             :         /* Shifts >= the width of the type are undefined. */
     670             : #ifdef _WIN64
     671             :         size_high = request_size >> 32;
     672             : #else
     673             :         size_high = 0;
     674             : #endif
     675             :         size_low = (DWORD) request_size;
     676             : 
     677             :         /* CreateFileMapping might not clear the error code on success */
     678             :         SetLastError(0);
     679             : 
     680             :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     681             :                                  NULL,  /* Default security attrs */
     682             :                                  PAGE_READWRITE,    /* Memory is read/write */
     683             :                                  size_high, /* Upper 32 bits of size */
     684             :                                  size_low,  /* Lower 32 bits of size */
     685             :                                  name);
     686             : 
     687             :         errcode = GetLastError();
     688             :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     689             :         {
     690             :             /*
     691             :              * On Windows, when the segment already exists, a handle for the
     692             :              * existing segment is returned.  We must close it before
     693             :              * returning.  However, if the existing segment is created by a
     694             :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     695             :              * _dosmaperr here, so errno won't be modified.
     696             :              */
     697             :             if (hmap)
     698             :                 CloseHandle(hmap);
     699             :             return false;
     700             :         }
     701             : 
     702             :         if (!hmap)
     703             :         {
     704             :             _dosmaperr(errcode);
     705             :             ereport(elevel,
     706             :                     (errcode_for_dynamic_shared_memory(),
     707             :                      errmsg("could not create shared memory segment \"%s\": %m",
     708             :                             name)));
     709             :             return false;
     710             :         }
     711             :     }
     712             :     else
     713             :     {
     714             :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     715             :                                FALSE,   /* do not inherit the name */
     716             :                                name);   /* name of mapping object */
     717             :         if (!hmap)
     718             :         {
     719             :             _dosmaperr(GetLastError());
     720             :             ereport(elevel,
     721             :                     (errcode_for_dynamic_shared_memory(),
     722             :                      errmsg("could not open shared memory segment \"%s\": %m",
     723             :                             name)));
     724             :             return false;
     725             :         }
     726             :     }
     727             : 
     728             :     /* Map it. */
     729             :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     730             :                             0, 0, 0);
     731             :     if (!address)
     732             :     {
     733             :         int         save_errno;
     734             : 
     735             :         _dosmaperr(GetLastError());
     736             :         /* Back out what's already been done. */
     737             :         save_errno = errno;
     738             :         CloseHandle(hmap);
     739             :         errno = save_errno;
     740             : 
     741             :         ereport(elevel,
     742             :                 (errcode_for_dynamic_shared_memory(),
     743             :                  errmsg("could not map shared memory segment \"%s\": %m",
     744             :                         name)));
     745             :         return false;
     746             :     }
     747             : 
     748             :     /*
     749             :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     750             :      * need size only when we are attaching, but it's better to get the size
     751             :      * when creating new segment to keep size consistent both for
     752             :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     753             :      */
     754             :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     755             :     {
     756             :         int         save_errno;
     757             : 
     758             :         _dosmaperr(GetLastError());
     759             :         /* Back out what's already been done. */
     760             :         save_errno = errno;
     761             :         UnmapViewOfFile(address);
     762             :         CloseHandle(hmap);
     763             :         errno = save_errno;
     764             : 
     765             :         ereport(elevel,
     766             :                 (errcode_for_dynamic_shared_memory(),
     767             :                  errmsg("could not stat shared memory segment \"%s\": %m",
     768             :                         name)));
     769             :         return false;
     770             :     }
     771             : 
     772             :     *mapped_address = address;
     773             :     *mapped_size = info.RegionSize;
     774             :     *impl_private = hmap;
     775             : 
     776             :     return true;
     777             : }
     778             : #endif
     779             : 
     780             : #ifdef USE_DSM_MMAP
     781             : /*
     782             :  * Operating system primitives to support mmap-based shared memory.
     783             :  *
     784             :  * Calling this "shared memory" is somewhat of a misnomer, because what
     785             :  * we're really doing is creating a bunch of files and mapping them into
     786             :  * our address space.  The operating system may feel obliged to
     787             :  * synchronize the contents to disk even if nothing is being paged out,
     788             :  * which will not serve us well.  The user can relocate the pg_dynshmem
     789             :  * directory to a ramdisk to avoid this problem, if available.
     790             :  */
     791             : static bool
     792           0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     793             :               void **impl_private, void **mapped_address, Size *mapped_size,
     794             :               int elevel)
     795             : {
     796             :     char        name[64];
     797             :     int         flags;
     798             :     int         fd;
     799             :     char       *address;
     800             : 
     801           0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     802             :              handle);
     803             : 
     804             :     /* Handle teardown cases. */
     805           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     806             :     {
     807           0 :         if (*mapped_address != NULL
     808           0 :             && munmap(*mapped_address, *mapped_size) != 0)
     809             :         {
     810           0 :             ereport(elevel,
     811             :                     (errcode_for_dynamic_shared_memory(),
     812             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     813             :                             name)));
     814           0 :             return false;
     815             :         }
     816           0 :         *mapped_address = NULL;
     817           0 :         *mapped_size = 0;
     818           0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     819             :         {
     820           0 :             ereport(elevel,
     821             :                     (errcode_for_dynamic_shared_memory(),
     822             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     823             :                             name)));
     824           0 :             return false;
     825             :         }
     826           0 :         return true;
     827             :     }
     828             : 
     829             :     /* Create new segment or open an existing one for attach. */
     830           0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     831           0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     832             :     {
     833           0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     834           0 :             ereport(elevel,
     835             :                     (errcode_for_dynamic_shared_memory(),
     836             :                      errmsg("could not open shared memory segment \"%s\": %m",
     837             :                             name)));
     838           0 :         return false;
     839             :     }
     840             : 
     841             :     /*
     842             :      * If we're attaching the segment, determine the current size; if we are
     843             :      * creating the segment, set the size to the requested value.
     844             :      */
     845           0 :     if (op == DSM_OP_ATTACH)
     846             :     {
     847             :         struct stat st;
     848             : 
     849           0 :         if (fstat(fd, &st) != 0)
     850             :         {
     851             :             int         save_errno;
     852             : 
     853             :             /* Back out what's already been done. */
     854           0 :             save_errno = errno;
     855           0 :             CloseTransientFile(fd);
     856           0 :             errno = save_errno;
     857             : 
     858           0 :             ereport(elevel,
     859             :                     (errcode_for_dynamic_shared_memory(),
     860             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     861             :                             name)));
     862           0 :             return false;
     863             :         }
     864           0 :         request_size = st.st_size;
     865             :     }
     866             :     else
     867             :     {
     868             :         /*
     869             :          * Allocate a buffer full of zeros.
     870             :          *
     871             :          * Note: palloc zbuffer, instead of just using a local char array, to
     872             :          * ensure it is reasonably well-aligned; this may save a few cycles
     873             :          * transferring data to the kernel.
     874             :          */
     875           0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     876           0 :         Size        remaining = request_size;
     877           0 :         bool        success = true;
     878             : 
     879             :         /*
     880             :          * Zero-fill the file. We have to do this the hard way to ensure that
     881             :          * all the file space has really been allocated, so that we don't
     882             :          * later seg fault when accessing the memory mapping.  This is pretty
     883             :          * pessimal.
     884             :          */
     885           0 :         while (success && remaining > 0)
     886             :         {
     887           0 :             Size        goal = remaining;
     888             : 
     889           0 :             if (goal > ZBUFFER_SIZE)
     890           0 :                 goal = ZBUFFER_SIZE;
     891           0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     892           0 :             if (write(fd, zbuffer, goal) == goal)
     893           0 :                 remaining -= goal;
     894             :             else
     895           0 :                 success = false;
     896           0 :             pgstat_report_wait_end();
     897             :         }
     898             : 
     899           0 :         if (!success)
     900             :         {
     901             :             int         save_errno;
     902             : 
     903             :             /* Back out what's already been done. */
     904           0 :             save_errno = errno;
     905           0 :             CloseTransientFile(fd);
     906           0 :             unlink(name);
     907           0 :             errno = save_errno ? save_errno : ENOSPC;
     908             : 
     909           0 :             ereport(elevel,
     910             :                     (errcode_for_dynamic_shared_memory(),
     911             :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     912             :                             name, request_size)));
     913           0 :             return false;
     914             :         }
     915             :     }
     916             : 
     917             :     /* Map it. */
     918           0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     919             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     920           0 :     if (address == MAP_FAILED)
     921             :     {
     922             :         int         save_errno;
     923             : 
     924             :         /* Back out what's already been done. */
     925           0 :         save_errno = errno;
     926           0 :         CloseTransientFile(fd);
     927           0 :         if (op == DSM_OP_CREATE)
     928           0 :             unlink(name);
     929           0 :         errno = save_errno;
     930             : 
     931           0 :         ereport(elevel,
     932             :                 (errcode_for_dynamic_shared_memory(),
     933             :                  errmsg("could not map shared memory segment \"%s\": %m",
     934             :                         name)));
     935           0 :         return false;
     936             :     }
     937           0 :     *mapped_address = address;
     938           0 :     *mapped_size = request_size;
     939             : 
     940           0 :     if (CloseTransientFile(fd) != 0)
     941             :     {
     942           0 :         ereport(elevel,
     943             :                 (errcode_for_file_access(),
     944             :                  errmsg("could not close shared memory segment \"%s\": %m",
     945             :                         name)));
     946           0 :         return false;
     947             :     }
     948             : 
     949           0 :     return true;
     950             : }
     951             : #endif
     952             : 
     953             : /*
     954             :  * Implementation-specific actions that must be performed when a segment is to
     955             :  * be preserved even when no backend has it attached.
     956             :  *
     957             :  * Except on Windows, we don't need to do anything at all.  But since Windows
     958             :  * cleans up segments automatically when no references remain, we duplicate
     959             :  * the segment handle into the postmaster process.  The postmaster needn't
     960             :  * do anything to receive the handle; Windows transfers it automatically.
     961             :  */
     962             : void
     963        1932 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     964             :                      void **impl_private_pm_handle)
     965             : {
     966        1932 :     switch (dynamic_shared_memory_type)
     967             :     {
     968             : #ifdef USE_DSM_WINDOWS
     969             :         case DSM_IMPL_WINDOWS:
     970             :             if (IsUnderPostmaster)
     971             :             {
     972             :                 HANDLE      hmap;
     973             : 
     974             :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     975             :                                      PostmasterHandle, &hmap, 0, FALSE,
     976             :                                      DUPLICATE_SAME_ACCESS))
     977             :                 {
     978             :                     char        name[64];
     979             : 
     980             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     981             :                     _dosmaperr(GetLastError());
     982             :                     ereport(ERROR,
     983             :                             (errcode_for_dynamic_shared_memory(),
     984             :                              errmsg("could not duplicate handle for \"%s\": %m",
     985             :                                     name)));
     986             :                 }
     987             : 
     988             :                 /*
     989             :                  * Here, we remember the handle that we created in the
     990             :                  * postmaster process.  This handle isn't actually usable in
     991             :                  * any process other than the postmaster, but that doesn't
     992             :                  * matter.  We're just holding onto it so that, if the segment
     993             :                  * is unpinned, dsm_impl_unpin_segment can close it.
     994             :                  */
     995             :                 *impl_private_pm_handle = hmap;
     996             :             }
     997             :             break;
     998             : #endif
     999             :         default:
    1000        1932 :             break;
    1001             :     }
    1002        1932 : }
    1003             : 
    1004             : /*
    1005             :  * Implementation-specific actions that must be performed when a segment is no
    1006             :  * longer to be preserved, so that it will be cleaned up when all backends
    1007             :  * have detached from it.
    1008             :  *
    1009             :  * Except on Windows, we don't need to do anything at all.  For Windows, we
    1010             :  * close the extra handle that dsm_impl_pin_segment created in the
    1011             :  * postmaster's process space.
    1012             :  */
    1013             : void
    1014         296 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1015             : {
    1016         296 :     switch (dynamic_shared_memory_type)
    1017             :     {
    1018             : #ifdef USE_DSM_WINDOWS
    1019             :         case DSM_IMPL_WINDOWS:
    1020             :             if (IsUnderPostmaster)
    1021             :             {
    1022             :                 if (*impl_private &&
    1023             :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1024             :                                      NULL, NULL, 0, FALSE,
    1025             :                                      DUPLICATE_CLOSE_SOURCE))
    1026             :                 {
    1027             :                     char        name[64];
    1028             : 
    1029             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1030             :                     _dosmaperr(GetLastError());
    1031             :                     ereport(ERROR,
    1032             :                             (errcode_for_dynamic_shared_memory(),
    1033             :                              errmsg("could not duplicate handle for \"%s\": %m",
    1034             :                                     name)));
    1035             :                 }
    1036             : 
    1037             :                 *impl_private = NULL;
    1038             :             }
    1039             :             break;
    1040             : #endif
    1041             :         default:
    1042         296 :             break;
    1043             :     }
    1044         296 : }
    1045             : 
    1046             : static int
    1047           0 : errcode_for_dynamic_shared_memory(void)
    1048             : {
    1049           0 :     if (errno == EFBIG || errno == ENOMEM)
    1050           0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1051             :     else
    1052           0 :         return errcode_for_file_access();
    1053             : }

Generated by: LCOV version 1.14