LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 44 201 21.9 %
Date: 2020-06-03 10:06:28 Functions: 5 8 62.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dsm_impl.c
       4             :  *    manage dynamic shared memory segments
       5             :  *
       6             :  * This file provides low-level APIs for creating and destroying shared
       7             :  * memory segments using several different possible techniques.  We refer
       8             :  * to these segments as dynamic because they can be created, altered, and
       9             :  * destroyed at any point during the server life cycle.  This is unlike
      10             :  * the main shared memory segment, of which there is always exactly one
      11             :  * and which is always mapped at a fixed address in every PostgreSQL
      12             :  * background process.
      13             :  *
      14             :  * Because not all systems provide the same primitives in this area, nor
      15             :  * do all primitives behave the same way on all systems, we provide
      16             :  * several implementations of this facility.  Many systems implement
      17             :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18             :  * in this area, with the exception that shared memory identifiers live
      19             :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20             :  * name collisions with other processes (including other copies of
      21             :  * PostgreSQL) running on the same system.  Some systems only support
      22             :  * the older System V shared memory interface (shmget etc.) which is
      23             :  * also usable; however, the default allocation limits are often quite
      24             :  * small, and the namespace is even more restricted.
      25             :  *
      26             :  * We also provide an mmap-based shared memory implementation.  This may
      27             :  * be useful on systems that provide shared memory via a special-purpose
      28             :  * filesystem; by opting for this implementation, the user can even
      29             :  * control precisely where their shared memory segments are placed.  It
      30             :  * can also be used as a fallback for systems where shm_open and shmget
      31             :  * are not available or can't be used for some reason.  Of course,
      32             :  * mapping a file residing on an actual spinning disk is a fairly poor
      33             :  * approximation for shared memory because writeback may hurt performance
      34             :  * substantially, but there should be few systems where we must make do
      35             :  * with such poor tools.
      36             :  *
      37             :  * As ever, Windows requires its own implementation.
      38             :  *
      39             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
      40             :  * Portions Copyright (c) 1994, Regents of the University of California
      41             :  *
      42             :  *
      43             :  * IDENTIFICATION
      44             :  *    src/backend/storage/ipc/dsm_impl.c
      45             :  *
      46             :  *-------------------------------------------------------------------------
      47             :  */
      48             : 
      49             : #include "postgres.h"
      50             : 
      51             : #include <fcntl.h>
      52             : #include <unistd.h>
      53             : #ifndef WIN32
      54             : #include <sys/mman.h>
      55             : #endif
      56             : #include <sys/stat.h>
      57             : #ifdef HAVE_SYS_IPC_H
      58             : #include <sys/ipc.h>
      59             : #endif
      60             : #ifdef HAVE_SYS_SHM_H
      61             : #include <sys/shm.h>
      62             : #endif
      63             : 
      64             : #include "common/file_perm.h"
      65             : #include "miscadmin.h"
      66             : #include "pgstat.h"
      67             : #include "portability/mem.h"
      68             : #include "postmaster/postmaster.h"
      69             : #include "storage/dsm_impl.h"
      70             : #include "storage/fd.h"
      71             : #include "utils/guc.h"
      72             : #include "utils/memutils.h"
      73             : 
      74             : #ifdef USE_DSM_POSIX
      75             : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      76             :                            void **impl_private, void **mapped_address,
      77             :                            Size *mapped_size, int elevel);
      78             : static int  dsm_impl_posix_resize(int fd, off_t size);
      79             : #endif
      80             : #ifdef USE_DSM_SYSV
      81             : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      82             :                           void **impl_private, void **mapped_address,
      83             :                           Size *mapped_size, int elevel);
      84             : #endif
      85             : #ifdef USE_DSM_WINDOWS
      86             : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      87             :                              void **impl_private, void **mapped_address,
      88             :                              Size *mapped_size, int elevel);
      89             : #endif
      90             : #ifdef USE_DSM_MMAP
      91             : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      92             :                           void **impl_private, void **mapped_address,
      93             :                           Size *mapped_size, int elevel);
      94             : #endif
      95             : static int  errcode_for_dynamic_shared_memory(void);
      96             : 
      97             : const struct config_enum_entry dynamic_shared_memory_options[] = {
      98             : #ifdef USE_DSM_POSIX
      99             :     {"posix", DSM_IMPL_POSIX, false},
     100             : #endif
     101             : #ifdef USE_DSM_SYSV
     102             :     {"sysv", DSM_IMPL_SYSV, false},
     103             : #endif
     104             : #ifdef USE_DSM_WINDOWS
     105             :     {"windows", DSM_IMPL_WINDOWS, false},
     106             : #endif
     107             : #ifdef USE_DSM_MMAP
     108             :     {"mmap", DSM_IMPL_MMAP, false},
     109             : #endif
     110             :     {NULL, 0, false}
     111             : };
     112             : 
     113             : /* Implementation selector. */
     114             : int         dynamic_shared_memory_type;
     115             : 
     116             : /* Size of buffer to be used for zero-filling. */
     117             : #define ZBUFFER_SIZE                8192
     118             : 
     119             : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     120             : 
     121             : /*------
     122             :  * Perform a low-level shared memory operation in a platform-specific way,
     123             :  * as dictated by the selected implementation.  Each implementation is
     124             :  * required to implement the following primitives.
     125             :  *
     126             :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     127             :  * map it.
     128             :  *
     129             :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     130             :  *
     131             :  * DSM_OP_DETACH.  Unmap the segment.
     132             :  *
     133             :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     134             :  * segment.
     135             :  *
     136             :  * Arguments:
     137             :  *   op: The operation to be performed.
     138             :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     139             :  *     a new handle the caller wants created.
     140             :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     141             :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     142             :  *     to NULL for the first operation on a shared memory segment within this
     143             :  *     backend; thereafter, it will point to the value to which it was set
     144             :  *     on the previous call.
     145             :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     146             :  *     if none.  Updated with new mapping address.
     147             :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     148             :  *     Updated with new mapped size.
     149             :  *   elevel: Level at which to log errors.
     150             :  *
     151             :  * Return value: true on success, false on failure.  When false is returned,
     152             :  * a message should first be logged at the specified elevel, except in the
     153             :  * case where DSM_OP_CREATE experiences a name collision, which should
     154             :  * silently return false.
     155             :  *-----
     156             :  */
     157             : bool
     158       14122 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     159             :             void **impl_private, void **mapped_address, Size *mapped_size,
     160             :             int elevel)
     161             : {
     162             :     Assert(op == DSM_OP_CREATE || request_size == 0);
     163             :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     164             :            (*mapped_address == NULL && *mapped_size == 0));
     165             : 
     166       14122 :     switch (dynamic_shared_memory_type)
     167             :     {
     168             : #ifdef USE_DSM_POSIX
     169       14122 :         case DSM_IMPL_POSIX:
     170       14122 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     171             :                                   mapped_address, mapped_size, elevel);
     172             : #endif
     173             : #ifdef USE_DSM_SYSV
     174           0 :         case DSM_IMPL_SYSV:
     175           0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     176             :                                  mapped_address, mapped_size, elevel);
     177             : #endif
     178             : #ifdef USE_DSM_WINDOWS
     179             :         case DSM_IMPL_WINDOWS:
     180             :             return dsm_impl_windows(op, handle, request_size, impl_private,
     181             :                                     mapped_address, mapped_size, elevel);
     182             : #endif
     183             : #ifdef USE_DSM_MMAP
     184           0 :         case DSM_IMPL_MMAP:
     185           0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     186             :                                  mapped_address, mapped_size, elevel);
     187             : #endif
     188           0 :         default:
     189           0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     190             :                  dynamic_shared_memory_type);
     191             :             return false;
     192             :     }
     193             : }
     194             : 
     195             : #ifdef USE_DSM_POSIX
     196             : /*
     197             :  * Operating system primitives to support POSIX shared memory.
     198             :  *
     199             :  * POSIX shared memory segments are created and attached using shm_open()
     200             :  * and shm_unlink(); other operations, such as sizing or mapping the
     201             :  * segment, are performed as if the shared memory segments were files.
     202             :  *
     203             :  * Indeed, on some platforms, they may be implemented that way.  While
     204             :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     205             :  * some operating systems may implement them as files, even going so far
     206             :  * to treat a request for /xyz as a request to create a file by that name
     207             :  * in the root directory.  Users of such broken platforms should select
     208             :  * a different shared memory implementation.
     209             :  */
     210             : static bool
     211       14122 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     212             :                void **impl_private, void **mapped_address, Size *mapped_size,
     213             :                int elevel)
     214             : {
     215             :     char        name[64];
     216             :     int         flags;
     217             :     int         fd;
     218             :     char       *address;
     219             : 
     220       14122 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     221             : 
     222             :     /* Handle teardown cases. */
     223       14122 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     224             :     {
     225        7756 :         if (*mapped_address != NULL
     226        7104 :             && munmap(*mapped_address, *mapped_size) != 0)
     227             :         {
     228           0 :             ereport(elevel,
     229             :                     (errcode_for_dynamic_shared_memory(),
     230             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     231             :                             name)));
     232           0 :             return false;
     233             :         }
     234        7756 :         *mapped_address = NULL;
     235        7756 :         *mapped_size = 0;
     236        7756 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     237             :         {
     238           0 :             ereport(elevel,
     239             :                     (errcode_for_dynamic_shared_memory(),
     240             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     241             :                             name)));
     242           0 :             return false;
     243             :         }
     244        7756 :         return true;
     245             :     }
     246             : 
     247             :     /*
     248             :      * Create new segment or open an existing one for attach.
     249             :      *
     250             :      * Even though we will close the FD before returning, it seems desirable
     251             :      * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
     252             :      * failure.  The fact that we won't hold the FD open long justifies using
     253             :      * ReserveExternalFD rather than AcquireExternalFD, though.
     254             :      */
     255        6366 :     ReserveExternalFD();
     256             : 
     257        6366 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     258        6366 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     259             :     {
     260           0 :         ReleaseExternalFD();
     261           0 :         if (errno != EEXIST)
     262           0 :             ereport(elevel,
     263             :                     (errcode_for_dynamic_shared_memory(),
     264             :                      errmsg("could not open shared memory segment \"%s\": %m",
     265             :                             name)));
     266           0 :         return false;
     267             :     }
     268             : 
     269             :     /*
     270             :      * If we're attaching the segment, determine the current size; if we are
     271             :      * creating the segment, set the size to the requested value.
     272             :      */
     273        6366 :     if (op == DSM_OP_ATTACH)
     274             :     {
     275             :         struct stat st;
     276             : 
     277        3544 :         if (fstat(fd, &st) != 0)
     278             :         {
     279             :             int         save_errno;
     280             : 
     281             :             /* Back out what's already been done. */
     282           0 :             save_errno = errno;
     283           0 :             close(fd);
     284           0 :             ReleaseExternalFD();
     285           0 :             errno = save_errno;
     286             : 
     287           0 :             ereport(elevel,
     288             :                     (errcode_for_dynamic_shared_memory(),
     289             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     290             :                             name)));
     291           0 :             return false;
     292             :         }
     293        3544 :         request_size = st.st_size;
     294             :     }
     295        2822 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     296             :     {
     297             :         int         save_errno;
     298             : 
     299             :         /* Back out what's already been done. */
     300           0 :         save_errno = errno;
     301           0 :         close(fd);
     302           0 :         ReleaseExternalFD();
     303           0 :         shm_unlink(name);
     304           0 :         errno = save_errno;
     305             : 
     306             :         /*
     307             :          * If we received a query cancel or termination signal, we will have
     308             :          * EINTR set here.  If the caller said that errors are OK here, check
     309             :          * for interrupts immediately.
     310             :          */
     311           0 :         if (errno == EINTR && elevel >= ERROR)
     312           0 :             CHECK_FOR_INTERRUPTS();
     313             : 
     314           0 :         ereport(elevel,
     315             :                 (errcode_for_dynamic_shared_memory(),
     316             :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     317             :                         name, request_size)));
     318           0 :         return false;
     319             :     }
     320             : 
     321             :     /* Map it. */
     322        6366 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     323             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     324        6366 :     if (address == MAP_FAILED)
     325             :     {
     326             :         int         save_errno;
     327             : 
     328             :         /* Back out what's already been done. */
     329           0 :         save_errno = errno;
     330           0 :         close(fd);
     331           0 :         ReleaseExternalFD();
     332           0 :         if (op == DSM_OP_CREATE)
     333           0 :             shm_unlink(name);
     334           0 :         errno = save_errno;
     335             : 
     336           0 :         ereport(elevel,
     337             :                 (errcode_for_dynamic_shared_memory(),
     338             :                  errmsg("could not map shared memory segment \"%s\": %m",
     339             :                         name)));
     340           0 :         return false;
     341             :     }
     342        6366 :     *mapped_address = address;
     343        6366 :     *mapped_size = request_size;
     344        6366 :     close(fd);
     345        6366 :     ReleaseExternalFD();
     346             : 
     347        6366 :     return true;
     348             : }
     349             : 
     350             : /*
     351             :  * Set the size of a virtual memory region associated with a file descriptor.
     352             :  * If necessary, also ensure that virtual memory is actually allocated by the
     353             :  * operating system, to avoid nasty surprises later.
     354             :  *
     355             :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     356             :  */
     357             : static int
     358        2822 : dsm_impl_posix_resize(int fd, off_t size)
     359             : {
     360             :     int         rc;
     361             : 
     362             :     /* Truncate (or extend) the file to the requested size. */
     363        2822 :     rc = ftruncate(fd, size);
     364             : 
     365             :     /*
     366             :      * On Linux, a shm_open fd is backed by a tmpfs file.  After resizing with
     367             :      * ftruncate, the file may contain a hole.  Accessing memory backed by a
     368             :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     369             :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     370             :      * here, so we can fail gracefully with ENOSPC now rather than risking
     371             :      * SIGBUS later.
     372             :      */
     373             : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     374        2822 :     if (rc == 0)
     375             :     {
     376             :         /*
     377             :          * We may get interrupted.  If so, just retry unless there is an
     378             :          * interrupt pending.  This avoids the possibility of looping forever
     379             :          * if another backend is repeatedly trying to interrupt us.
     380             :          */
     381        2822 :         pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     382             :         do
     383             :         {
     384        2822 :             rc = posix_fallocate(fd, 0, size);
     385        2822 :         } while (rc == EINTR && !(ProcDiePending || QueryCancelPending));
     386        2822 :         pgstat_report_wait_end();
     387             : 
     388             :         /*
     389             :          * The caller expects errno to be set, but posix_fallocate() doesn't
     390             :          * set it.  Instead it returns error numbers directly.  So set errno,
     391             :          * even though we'll also return rc to indicate success or failure.
     392             :          */
     393        2822 :         errno = rc;
     394             :     }
     395             : #endif                          /* HAVE_POSIX_FALLOCATE && __linux__ */
     396             : 
     397        2822 :     return rc;
     398             : }
     399             : 
     400             : #endif                          /* USE_DSM_POSIX */
     401             : 
     402             : #ifdef USE_DSM_SYSV
     403             : /*
     404             :  * Operating system primitives to support System V shared memory.
     405             :  *
     406             :  * System V shared memory segments are manipulated using shmget(), shmat(),
     407             :  * shmdt(), and shmctl().  As the default allocation limits for System V
     408             :  * shared memory are usually quite low, the POSIX facilities may be
     409             :  * preferable; but those are not supported everywhere.
     410             :  */
     411             : static bool
     412           0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     413             :               void **impl_private, void **mapped_address, Size *mapped_size,
     414             :               int elevel)
     415             : {
     416             :     key_t       key;
     417             :     int         ident;
     418             :     char       *address;
     419             :     char        name[64];
     420             :     int        *ident_cache;
     421             : 
     422             :     /*
     423             :      * POSIX shared memory and mmap-based shared memory identify segments with
     424             :      * names.  To avoid needless error message variation, we use the handle as
     425             :      * the name.
     426             :      */
     427           0 :     snprintf(name, 64, "%u", handle);
     428             : 
     429             :     /*
     430             :      * The System V shared memory namespace is very restricted; names are of
     431             :      * type key_t, which is expected to be some sort of integer data type, but
     432             :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     433             :      * identify shared memory segments across processes, this might seem like
     434             :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     435             :      * the cast below might truncate away some bits from the handle the
     436             :      * user-provided, but it'll truncate exactly the same bits away in exactly
     437             :      * the same fashion every time we use that handle, which is all that
     438             :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     439             :      * won't use the full range of available key space, but that's no big deal
     440             :      * either.
     441             :      *
     442             :      * We do make sure that the key isn't negative, because that might not be
     443             :      * portable.
     444             :      */
     445           0 :     key = (key_t) handle;
     446           0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     447           0 :         key = -key;
     448             : 
     449             :     /*
     450             :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     451             :      * up with that value by chance during a create operation, just pretend it
     452             :      * already exists, so that caller will retry.  If we run into it anywhere
     453             :      * else, the caller has passed a handle that doesn't correspond to
     454             :      * anything we ever created, which should not happen.
     455             :      */
     456           0 :     if (key == IPC_PRIVATE)
     457             :     {
     458           0 :         if (op != DSM_OP_CREATE)
     459           0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     460           0 :         errno = EEXIST;
     461           0 :         return false;
     462             :     }
     463             : 
     464             :     /*
     465             :      * Before we can do anything with a shared memory segment, we have to map
     466             :      * the shared memory key to a shared memory identifier using shmget(). To
     467             :      * avoid repeated lookups, we store the key using impl_private.
     468             :      */
     469           0 :     if (*impl_private != NULL)
     470             :     {
     471           0 :         ident_cache = *impl_private;
     472           0 :         ident = *ident_cache;
     473             :     }
     474             :     else
     475             :     {
     476           0 :         int         flags = IPCProtection;
     477             :         size_t      segsize;
     478             : 
     479             :         /*
     480             :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     481             :          * leak the resource if memory allocation fails.
     482             :          */
     483           0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     484             : 
     485             :         /*
     486             :          * When using shmget to find an existing segment, we must pass the
     487             :          * size as 0.  Passing a non-zero size which is greater than the
     488             :          * actual size will result in EINVAL.
     489             :          */
     490           0 :         segsize = 0;
     491             : 
     492           0 :         if (op == DSM_OP_CREATE)
     493             :         {
     494           0 :             flags |= IPC_CREAT | IPC_EXCL;
     495           0 :             segsize = request_size;
     496             :         }
     497             : 
     498           0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     499             :         {
     500           0 :             if (errno != EEXIST)
     501             :             {
     502           0 :                 int         save_errno = errno;
     503             : 
     504           0 :                 pfree(ident_cache);
     505           0 :                 errno = save_errno;
     506           0 :                 ereport(elevel,
     507             :                         (errcode_for_dynamic_shared_memory(),
     508             :                          errmsg("could not get shared memory segment: %m")));
     509             :             }
     510           0 :             return false;
     511             :         }
     512             : 
     513           0 :         *ident_cache = ident;
     514           0 :         *impl_private = ident_cache;
     515             :     }
     516             : 
     517             :     /* Handle teardown cases. */
     518           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     519             :     {
     520           0 :         pfree(ident_cache);
     521           0 :         *impl_private = NULL;
     522           0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     523             :         {
     524           0 :             ereport(elevel,
     525             :                     (errcode_for_dynamic_shared_memory(),
     526             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     527             :                             name)));
     528           0 :             return false;
     529             :         }
     530           0 :         *mapped_address = NULL;
     531           0 :         *mapped_size = 0;
     532           0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     533             :         {
     534           0 :             ereport(elevel,
     535             :                     (errcode_for_dynamic_shared_memory(),
     536             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     537             :                             name)));
     538           0 :             return false;
     539             :         }
     540           0 :         return true;
     541             :     }
     542             : 
     543             :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     544           0 :     if (op == DSM_OP_ATTACH)
     545             :     {
     546             :         struct shmid_ds shm;
     547             : 
     548           0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     549             :         {
     550           0 :             ereport(elevel,
     551             :                     (errcode_for_dynamic_shared_memory(),
     552             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     553             :                             name)));
     554           0 :             return false;
     555             :         }
     556           0 :         request_size = shm.shm_segsz;
     557             :     }
     558             : 
     559             :     /* Map it. */
     560           0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     561           0 :     if (address == (void *) -1)
     562             :     {
     563             :         int         save_errno;
     564             : 
     565             :         /* Back out what's already been done. */
     566           0 :         save_errno = errno;
     567           0 :         if (op == DSM_OP_CREATE)
     568           0 :             shmctl(ident, IPC_RMID, NULL);
     569           0 :         errno = save_errno;
     570             : 
     571           0 :         ereport(elevel,
     572             :                 (errcode_for_dynamic_shared_memory(),
     573             :                  errmsg("could not map shared memory segment \"%s\": %m",
     574             :                         name)));
     575           0 :         return false;
     576             :     }
     577           0 :     *mapped_address = address;
     578           0 :     *mapped_size = request_size;
     579             : 
     580           0 :     return true;
     581             : }
     582             : #endif
     583             : 
     584             : #ifdef USE_DSM_WINDOWS
     585             : /*
     586             :  * Operating system primitives to support Windows shared memory.
     587             :  *
     588             :  * Windows shared memory implementation is done using file mapping
     589             :  * which can be backed by either physical file or system paging file.
     590             :  * Current implementation uses system paging file as other effects
     591             :  * like performance are not clear for physical file and it is used in similar
     592             :  * way for main shared memory in windows.
     593             :  *
     594             :  * A memory mapping object is a kernel object - they always get deleted when
     595             :  * the last reference to them goes away, either explicitly via a CloseHandle or
     596             :  * when the process containing the reference exits.
     597             :  */
     598             : static bool
     599             : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     600             :                  void **impl_private, void **mapped_address,
     601             :                  Size *mapped_size, int elevel)
     602             : {
     603             :     char       *address;
     604             :     HANDLE      hmap;
     605             :     char        name[64];
     606             :     MEMORY_BASIC_INFORMATION info;
     607             : 
     608             :     /*
     609             :      * Storing the shared memory segment in the Global\ namespace, can allow
     610             :      * any process running in any session to access that file mapping object
     611             :      * provided that the caller has the required access rights. But to avoid
     612             :      * issues faced in main shared memory, we are using the naming convention
     613             :      * similar to main shared memory. We can change here once issue mentioned
     614             :      * in GetSharedMemName is resolved.
     615             :      */
     616             :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     617             : 
     618             :     /*
     619             :      * Handle teardown cases.  Since Windows automatically destroys the object
     620             :      * when no references remain, we can treat it the same as detach.
     621             :      */
     622             :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     623             :     {
     624             :         if (*mapped_address != NULL
     625             :             && UnmapViewOfFile(*mapped_address) == 0)
     626             :         {
     627             :             _dosmaperr(GetLastError());
     628             :             ereport(elevel,
     629             :                     (errcode_for_dynamic_shared_memory(),
     630             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     631             :                             name)));
     632             :             return false;
     633             :         }
     634             :         if (*impl_private != NULL
     635             :             && CloseHandle(*impl_private) == 0)
     636             :         {
     637             :             _dosmaperr(GetLastError());
     638             :             ereport(elevel,
     639             :                     (errcode_for_dynamic_shared_memory(),
     640             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     641             :                             name)));
     642             :             return false;
     643             :         }
     644             : 
     645             :         *impl_private = NULL;
     646             :         *mapped_address = NULL;
     647             :         *mapped_size = 0;
     648             :         return true;
     649             :     }
     650             : 
     651             :     /* Create new segment or open an existing one for attach. */
     652             :     if (op == DSM_OP_CREATE)
     653             :     {
     654             :         DWORD       size_high;
     655             :         DWORD       size_low;
     656             :         DWORD       errcode;
     657             : 
     658             :         /* Shifts >= the width of the type are undefined. */
     659             : #ifdef _WIN64
     660             :         size_high = request_size >> 32;
     661             : #else
     662             :         size_high = 0;
     663             : #endif
     664             :         size_low = (DWORD) request_size;
     665             : 
     666             :         /* CreateFileMapping might not clear the error code on success */
     667             :         SetLastError(0);
     668             : 
     669             :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     670             :                                  NULL,  /* Default security attrs */
     671             :                                  PAGE_READWRITE,    /* Memory is read/write */
     672             :                                  size_high, /* Upper 32 bits of size */
     673             :                                  size_low,  /* Lower 32 bits of size */
     674             :                                  name);
     675             : 
     676             :         errcode = GetLastError();
     677             :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     678             :         {
     679             :             /*
     680             :              * On Windows, when the segment already exists, a handle for the
     681             :              * existing segment is returned.  We must close it before
     682             :              * returning.  However, if the existing segment is created by a
     683             :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     684             :              * _dosmaperr here, so errno won't be modified.
     685             :              */
     686             :             if (hmap)
     687             :                 CloseHandle(hmap);
     688             :             return false;
     689             :         }
     690             : 
     691             :         if (!hmap)
     692             :         {
     693             :             _dosmaperr(errcode);
     694             :             ereport(elevel,
     695             :                     (errcode_for_dynamic_shared_memory(),
     696             :                      errmsg("could not create shared memory segment \"%s\": %m",
     697             :                             name)));
     698             :             return false;
     699             :         }
     700             :     }
     701             :     else
     702             :     {
     703             :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     704             :                                FALSE,   /* do not inherit the name */
     705             :                                name);   /* name of mapping object */
     706             :         if (!hmap)
     707             :         {
     708             :             _dosmaperr(GetLastError());
     709             :             ereport(elevel,
     710             :                     (errcode_for_dynamic_shared_memory(),
     711             :                      errmsg("could not open shared memory segment \"%s\": %m",
     712             :                             name)));
     713             :             return false;
     714             :         }
     715             :     }
     716             : 
     717             :     /* Map it. */
     718             :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     719             :                             0, 0, 0);
     720             :     if (!address)
     721             :     {
     722             :         int         save_errno;
     723             : 
     724             :         _dosmaperr(GetLastError());
     725             :         /* Back out what's already been done. */
     726             :         save_errno = errno;
     727             :         CloseHandle(hmap);
     728             :         errno = save_errno;
     729             : 
     730             :         ereport(elevel,
     731             :                 (errcode_for_dynamic_shared_memory(),
     732             :                  errmsg("could not map shared memory segment \"%s\": %m",
     733             :                         name)));
     734             :         return false;
     735             :     }
     736             : 
     737             :     /*
     738             :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     739             :      * need size only when we are attaching, but it's better to get the size
     740             :      * when creating new segment to keep size consistent both for
     741             :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     742             :      */
     743             :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     744             :     {
     745             :         int         save_errno;
     746             : 
     747             :         _dosmaperr(GetLastError());
     748             :         /* Back out what's already been done. */
     749             :         save_errno = errno;
     750             :         UnmapViewOfFile(address);
     751             :         CloseHandle(hmap);
     752             :         errno = save_errno;
     753             : 
     754             :         ereport(elevel,
     755             :                 (errcode_for_dynamic_shared_memory(),
     756             :                  errmsg("could not stat shared memory segment \"%s\": %m",
     757             :                         name)));
     758             :         return false;
     759             :     }
     760             : 
     761             :     *mapped_address = address;
     762             :     *mapped_size = info.RegionSize;
     763             :     *impl_private = hmap;
     764             : 
     765             :     return true;
     766             : }
     767             : #endif
     768             : 
     769             : #ifdef USE_DSM_MMAP
     770             : /*
     771             :  * Operating system primitives to support mmap-based shared memory.
     772             :  *
     773             :  * Calling this "shared memory" is somewhat of a misnomer, because what
     774             :  * we're really doing is creating a bunch of files and mapping them into
     775             :  * our address space.  The operating system may feel obliged to
     776             :  * synchronize the contents to disk even if nothing is being paged out,
     777             :  * which will not serve us well.  The user can relocate the pg_dynshmem
     778             :  * directory to a ramdisk to avoid this problem, if available.
     779             :  */
     780             : static bool
     781           0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     782             :               void **impl_private, void **mapped_address, Size *mapped_size,
     783             :               int elevel)
     784             : {
     785             :     char        name[64];
     786             :     int         flags;
     787             :     int         fd;
     788             :     char       *address;
     789             : 
     790           0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     791             :              handle);
     792             : 
     793             :     /* Handle teardown cases. */
     794           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     795             :     {
     796           0 :         if (*mapped_address != NULL
     797           0 :             && munmap(*mapped_address, *mapped_size) != 0)
     798             :         {
     799           0 :             ereport(elevel,
     800             :                     (errcode_for_dynamic_shared_memory(),
     801             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     802             :                             name)));
     803           0 :             return false;
     804             :         }
     805           0 :         *mapped_address = NULL;
     806           0 :         *mapped_size = 0;
     807           0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     808             :         {
     809           0 :             ereport(elevel,
     810             :                     (errcode_for_dynamic_shared_memory(),
     811             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     812             :                             name)));
     813           0 :             return false;
     814             :         }
     815           0 :         return true;
     816             :     }
     817             : 
     818             :     /* Create new segment or open an existing one for attach. */
     819           0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     820           0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     821             :     {
     822           0 :         if (errno != EEXIST)
     823           0 :             ereport(elevel,
     824             :                     (errcode_for_dynamic_shared_memory(),
     825             :                      errmsg("could not open shared memory segment \"%s\": %m",
     826             :                             name)));
     827           0 :         return false;
     828             :     }
     829             : 
     830             :     /*
     831             :      * If we're attaching the segment, determine the current size; if we are
     832             :      * creating the segment, set the size to the requested value.
     833             :      */
     834           0 :     if (op == DSM_OP_ATTACH)
     835             :     {
     836             :         struct stat st;
     837             : 
     838           0 :         if (fstat(fd, &st) != 0)
     839             :         {
     840             :             int         save_errno;
     841             : 
     842             :             /* Back out what's already been done. */
     843           0 :             save_errno = errno;
     844           0 :             CloseTransientFile(fd);
     845           0 :             errno = save_errno;
     846             : 
     847           0 :             ereport(elevel,
     848             :                     (errcode_for_dynamic_shared_memory(),
     849             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     850             :                             name)));
     851           0 :             return false;
     852             :         }
     853           0 :         request_size = st.st_size;
     854             :     }
     855             :     else
     856             :     {
     857             :         /*
     858             :          * Allocate a buffer full of zeros.
     859             :          *
     860             :          * Note: palloc zbuffer, instead of just using a local char array, to
     861             :          * ensure it is reasonably well-aligned; this may save a few cycles
     862             :          * transferring data to the kernel.
     863             :          */
     864           0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     865           0 :         uint32      remaining = request_size;
     866           0 :         bool        success = true;
     867             : 
     868             :         /*
     869             :          * Zero-fill the file. We have to do this the hard way to ensure that
     870             :          * all the file space has really been allocated, so that we don't
     871             :          * later seg fault when accessing the memory mapping.  This is pretty
     872             :          * pessimal.
     873             :          */
     874           0 :         while (success && remaining > 0)
     875             :         {
     876           0 :             Size        goal = remaining;
     877             : 
     878           0 :             if (goal > ZBUFFER_SIZE)
     879           0 :                 goal = ZBUFFER_SIZE;
     880           0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     881           0 :             if (write(fd, zbuffer, goal) == goal)
     882           0 :                 remaining -= goal;
     883             :             else
     884           0 :                 success = false;
     885           0 :             pgstat_report_wait_end();
     886             :         }
     887             : 
     888           0 :         if (!success)
     889             :         {
     890             :             int         save_errno;
     891             : 
     892             :             /* Back out what's already been done. */
     893           0 :             save_errno = errno;
     894           0 :             CloseTransientFile(fd);
     895           0 :             unlink(name);
     896           0 :             errno = save_errno ? save_errno : ENOSPC;
     897             : 
     898           0 :             ereport(elevel,
     899             :                     (errcode_for_dynamic_shared_memory(),
     900             :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     901             :                             name, request_size)));
     902           0 :             return false;
     903             :         }
     904             :     }
     905             : 
     906             :     /* Map it. */
     907           0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     908             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     909           0 :     if (address == MAP_FAILED)
     910             :     {
     911             :         int         save_errno;
     912             : 
     913             :         /* Back out what's already been done. */
     914           0 :         save_errno = errno;
     915           0 :         CloseTransientFile(fd);
     916           0 :         if (op == DSM_OP_CREATE)
     917           0 :             unlink(name);
     918           0 :         errno = save_errno;
     919             : 
     920           0 :         ereport(elevel,
     921             :                 (errcode_for_dynamic_shared_memory(),
     922             :                  errmsg("could not map shared memory segment \"%s\": %m",
     923             :                         name)));
     924           0 :         return false;
     925             :     }
     926           0 :     *mapped_address = address;
     927           0 :     *mapped_size = request_size;
     928             : 
     929           0 :     if (CloseTransientFile(fd) != 0)
     930             :     {
     931           0 :         ereport(elevel,
     932             :                 (errcode_for_file_access(),
     933             :                  errmsg("could not close shared memory segment \"%s\": %m",
     934             :                         name)));
     935           0 :         return false;
     936             :     }
     937             : 
     938           0 :     return true;
     939             : }
     940             : #endif
     941             : 
     942             : /*
     943             :  * Implementation-specific actions that must be performed when a segment is to
     944             :  * be preserved even when no backend has it attached.
     945             :  *
     946             :  * Except on Windows, we don't need to do anything at all.  But since Windows
     947             :  * cleans up segments automatically when no references remain, we duplicate
     948             :  * the segment handle into the postmaster process.  The postmaster needn't
     949             :  * do anything to receive the handle; Windows transfers it automatically.
     950             :  */
     951             : void
     952         112 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     953             :                      void **impl_private_pm_handle)
     954             : {
     955         112 :     switch (dynamic_shared_memory_type)
     956             :     {
     957             : #ifdef USE_DSM_WINDOWS
     958             :         case DSM_IMPL_WINDOWS:
     959             :             {
     960             :                 HANDLE      hmap;
     961             : 
     962             :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     963             :                                      PostmasterHandle, &hmap, 0, FALSE,
     964             :                                      DUPLICATE_SAME_ACCESS))
     965             :                 {
     966             :                     char        name[64];
     967             : 
     968             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     969             :                     _dosmaperr(GetLastError());
     970             :                     ereport(ERROR,
     971             :                             (errcode_for_dynamic_shared_memory(),
     972             :                              errmsg("could not duplicate handle for \"%s\": %m",
     973             :                                     name)));
     974             :                 }
     975             : 
     976             :                 /*
     977             :                  * Here, we remember the handle that we created in the
     978             :                  * postmaster process.  This handle isn't actually usable in
     979             :                  * any process other than the postmaster, but that doesn't
     980             :                  * matter.  We're just holding onto it so that, if the segment
     981             :                  * is unpinned, dsm_impl_unpin_segment can close it.
     982             :                  */
     983             :                 *impl_private_pm_handle = hmap;
     984             :                 break;
     985             :             }
     986             : #endif
     987             :         default:
     988         112 :             break;
     989             :     }
     990         112 : }
     991             : 
     992             : /*
     993             :  * Implementation-specific actions that must be performed when a segment is no
     994             :  * longer to be preserved, so that it will be cleaned up when all backends
     995             :  * have detached from it.
     996             :  *
     997             :  * Except on Windows, we don't need to do anything at all.  For Windows, we
     998             :  * close the extra handle that dsm_impl_pin_segment created in the
     999             :  * postmaster's process space.
    1000             :  */
    1001             : void
    1002         112 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1003             : {
    1004         112 :     switch (dynamic_shared_memory_type)
    1005             :     {
    1006             : #ifdef USE_DSM_WINDOWS
    1007             :         case DSM_IMPL_WINDOWS:
    1008             :             {
    1009             :                 if (*impl_private &&
    1010             :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1011             :                                      NULL, NULL, 0, FALSE,
    1012             :                                      DUPLICATE_CLOSE_SOURCE))
    1013             :                 {
    1014             :                     char        name[64];
    1015             : 
    1016             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1017             :                     _dosmaperr(GetLastError());
    1018             :                     ereport(ERROR,
    1019             :                             (errcode_for_dynamic_shared_memory(),
    1020             :                              errmsg("could not duplicate handle for \"%s\": %m",
    1021             :                                     name)));
    1022             :                 }
    1023             : 
    1024             :                 *impl_private = NULL;
    1025             :                 break;
    1026             :             }
    1027             : #endif
    1028             :         default:
    1029         112 :             break;
    1030             :     }
    1031         112 : }
    1032             : 
    1033             : static int
    1034           0 : errcode_for_dynamic_shared_memory(void)
    1035             : {
    1036           0 :     if (errno == EFBIG || errno == ENOMEM)
    1037           0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1038             :     else
    1039           0 :         return errcode_for_file_access();
    1040             : }

Generated by: LCOV version 1.13