LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 39 189 20.6 %
Date: 2019-09-19 02:07:14 Functions: 5 8 62.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dsm_impl.c
       4             :  *    manage dynamic shared memory segments
       5             :  *
       6             :  * This file provides low-level APIs for creating and destroying shared
       7             :  * memory segments using several different possible techniques.  We refer
       8             :  * to these segments as dynamic because they can be created, altered, and
       9             :  * destroyed at any point during the server life cycle.  This is unlike
      10             :  * the main shared memory segment, of which there is always exactly one
      11             :  * and which is always mapped at a fixed address in every PostgreSQL
      12             :  * background process.
      13             :  *
      14             :  * Because not all systems provide the same primitives in this area, nor
      15             :  * do all primitives behave the same way on all systems, we provide
      16             :  * several implementations of this facility.  Many systems implement
      17             :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18             :  * in this area, with the exception that shared memory identifiers live
      19             :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20             :  * name collisions with other processes (including other copies of
      21             :  * PostgreSQL) running on the same system.  Some systems only support
      22             :  * the older System V shared memory interface (shmget etc.) which is
      23             :  * also usable; however, the default allocation limits are often quite
      24             :  * small, and the namespace is even more restricted.
      25             :  *
      26             :  * We also provide an mmap-based shared memory implementation.  This may
      27             :  * be useful on systems that provide shared memory via a special-purpose
      28             :  * filesystem; by opting for this implementation, the user can even
      29             :  * control precisely where their shared memory segments are placed.  It
      30             :  * can also be used as a fallback for systems where shm_open and shmget
      31             :  * are not available or can't be used for some reason.  Of course,
      32             :  * mapping a file residing on an actual spinning disk is a fairly poor
      33             :  * approximation for shared memory because writeback may hurt performance
      34             :  * substantially, but there should be few systems where we must make do
      35             :  * with such poor tools.
      36             :  *
      37             :  * As ever, Windows requires its own implementation.
      38             :  *
      39             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      40             :  * Portions Copyright (c) 1994, Regents of the University of California
      41             :  *
      42             :  *
      43             :  * IDENTIFICATION
      44             :  *    src/backend/storage/ipc/dsm_impl.c
      45             :  *
      46             :  *-------------------------------------------------------------------------
      47             :  */
      48             : 
      49             : #include "postgres.h"
      50             : #include "miscadmin.h"
      51             : 
      52             : #include <fcntl.h>
      53             : #include <unistd.h>
      54             : #ifndef WIN32
      55             : #include <sys/mman.h>
      56             : #endif
      57             : #include <sys/stat.h>
      58             : #ifdef HAVE_SYS_IPC_H
      59             : #include <sys/ipc.h>
      60             : #endif
      61             : #ifdef HAVE_SYS_SHM_H
      62             : #include <sys/shm.h>
      63             : #endif
      64             : #include "common/file_perm.h"
      65             : #include "pgstat.h"
      66             : 
      67             : #include "portability/mem.h"
      68             : #include "storage/dsm_impl.h"
      69             : #include "storage/fd.h"
      70             : #include "utils/guc.h"
      71             : #include "utils/memutils.h"
      72             : #include "postmaster/postmaster.h"
      73             : 
      74             : #ifdef USE_DSM_POSIX
      75             : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      76             :                            void **impl_private, void **mapped_address,
      77             :                            Size *mapped_size, int elevel);
      78             : static int  dsm_impl_posix_resize(int fd, off_t size);
      79             : #endif
      80             : #ifdef USE_DSM_SYSV
      81             : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      82             :                           void **impl_private, void **mapped_address,
      83             :                           Size *mapped_size, int elevel);
      84             : #endif
      85             : #ifdef USE_DSM_WINDOWS
      86             : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      87             :                              void **impl_private, void **mapped_address,
      88             :                              Size *mapped_size, int elevel);
      89             : #endif
      90             : #ifdef USE_DSM_MMAP
      91             : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      92             :                           void **impl_private, void **mapped_address,
      93             :                           Size *mapped_size, int elevel);
      94             : #endif
      95             : static int  errcode_for_dynamic_shared_memory(void);
      96             : 
      97             : const struct config_enum_entry dynamic_shared_memory_options[] = {
      98             : #ifdef USE_DSM_POSIX
      99             :     {"posix", DSM_IMPL_POSIX, false},
     100             : #endif
     101             : #ifdef USE_DSM_SYSV
     102             :     {"sysv", DSM_IMPL_SYSV, false},
     103             : #endif
     104             : #ifdef USE_DSM_WINDOWS
     105             :     {"windows", DSM_IMPL_WINDOWS, false},
     106             : #endif
     107             : #ifdef USE_DSM_MMAP
     108             :     {"mmap", DSM_IMPL_MMAP, false},
     109             : #endif
     110             :     {NULL, 0, false}
     111             : };
     112             : 
     113             : /* Implementation selector. */
     114             : int         dynamic_shared_memory_type;
     115             : 
     116             : /* Size of buffer to be used for zero-filling. */
     117             : #define ZBUFFER_SIZE                8192
     118             : 
     119             : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     120             : 
     121             : /*------
     122             :  * Perform a low-level shared memory operation in a platform-specific way,
     123             :  * as dictated by the selected implementation.  Each implementation is
     124             :  * required to implement the following primitives.
     125             :  *
     126             :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     127             :  * map it.
     128             :  *
     129             :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     130             :  *
     131             :  * DSM_OP_DETACH.  Unmap the segment.
     132             :  *
     133             :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     134             :  * segment.
     135             :  *
     136             :  * Arguments:
     137             :  *   op: The operation to be performed.
     138             :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     139             :  *     a new handle the caller wants created.
     140             :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     141             :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     142             :  *     to NULL for the first operation on a shared memory segment within this
     143             :  *     backend; thereafter, it will point to the value to which it was set
     144             :  *     on the previous call.
     145             :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     146             :  *     if none.  Updated with new mapping address.
     147             :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     148             :  *     Updated with new mapped size.
     149             :  *   elevel: Level at which to log errors.
     150             :  *
     151             :  * Return value: true on success, false on failure.  When false is returned,
     152             :  * a message should first be logged at the specified elevel, except in the
     153             :  * case where DSM_OP_CREATE experiences a name collision, which should
     154             :  * silently return false.
     155             :  *-----
     156             :  */
     157             : bool
     158       13526 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     159             :             void **impl_private, void **mapped_address, Size *mapped_size,
     160             :             int elevel)
     161             : {
     162             :     Assert(op == DSM_OP_CREATE || request_size == 0);
     163             :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     164             :            (*mapped_address == NULL && *mapped_size == 0));
     165             : 
     166       13526 :     switch (dynamic_shared_memory_type)
     167             :     {
     168             : #ifdef USE_DSM_POSIX
     169             :         case DSM_IMPL_POSIX:
     170       13526 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     171             :                                   mapped_address, mapped_size, elevel);
     172             : #endif
     173             : #ifdef USE_DSM_SYSV
     174             :         case DSM_IMPL_SYSV:
     175           0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     176             :                                  mapped_address, mapped_size, elevel);
     177             : #endif
     178             : #ifdef USE_DSM_WINDOWS
     179             :         case DSM_IMPL_WINDOWS:
     180             :             return dsm_impl_windows(op, handle, request_size, impl_private,
     181             :                                     mapped_address, mapped_size, elevel);
     182             : #endif
     183             : #ifdef USE_DSM_MMAP
     184             :         case DSM_IMPL_MMAP:
     185           0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     186             :                                  mapped_address, mapped_size, elevel);
     187             : #endif
     188             :         default:
     189           0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     190             :                  dynamic_shared_memory_type);
     191             :             return false;
     192             :     }
     193             : }
     194             : 
     195             : #ifdef USE_DSM_POSIX
     196             : /*
     197             :  * Operating system primitives to support POSIX shared memory.
     198             :  *
     199             :  * POSIX shared memory segments are created and attached using shm_open()
     200             :  * and shm_unlink(); other operations, such as sizing or mapping the
     201             :  * segment, are performed as if the shared memory segments were files.
     202             :  *
     203             :  * Indeed, on some platforms, they may be implemented that way.  While
     204             :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     205             :  * some operating systems may implement them as files, even going so far
     206             :  * to treat a request for /xyz as a request to create a file by that name
     207             :  * in the root directory.  Users of such broken platforms should select
     208             :  * a different shared memory implementation.
     209             :  */
     210             : static bool
     211       13526 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     212             :                void **impl_private, void **mapped_address, Size *mapped_size,
     213             :                int elevel)
     214             : {
     215             :     char        name[64];
     216             :     int         flags;
     217             :     int         fd;
     218             :     char       *address;
     219             : 
     220       13526 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     221             : 
     222             :     /* Handle teardown cases. */
     223       13526 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     224             :     {
     225        7380 :         if (*mapped_address != NULL
     226        6728 :             && munmap(*mapped_address, *mapped_size) != 0)
     227             :         {
     228           0 :             ereport(elevel,
     229             :                     (errcode_for_dynamic_shared_memory(),
     230             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     231             :                             name)));
     232           0 :             return false;
     233             :         }
     234        7380 :         *mapped_address = NULL;
     235        7380 :         *mapped_size = 0;
     236        7380 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     237             :         {
     238           0 :             ereport(elevel,
     239             :                     (errcode_for_dynamic_shared_memory(),
     240             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     241             :                             name)));
     242           0 :             return false;
     243             :         }
     244        7380 :         return true;
     245             :     }
     246             : 
     247             :     /*
     248             :      * Create new segment or open an existing one for attach.
     249             :      *
     250             :      * Even though we're not going through fd.c, we should be safe against
     251             :      * running out of file descriptors, because of NUM_RESERVED_FDS.  We're
     252             :      * only opening one extra descriptor here, and we'll close it before
     253             :      * returning.
     254             :      */
     255        6146 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     256        6146 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     257             :     {
     258           0 :         if (errno != EEXIST)
     259           0 :             ereport(elevel,
     260             :                     (errcode_for_dynamic_shared_memory(),
     261             :                      errmsg("could not open shared memory segment \"%s\": %m",
     262             :                             name)));
     263           0 :         return false;
     264             :     }
     265             : 
     266             :     /*
     267             :      * If we're attaching the segment, determine the current size; if we are
     268             :      * creating the segment, set the size to the requested value.
     269             :      */
     270        6146 :     if (op == DSM_OP_ATTACH)
     271             :     {
     272             :         struct stat st;
     273             : 
     274        3638 :         if (fstat(fd, &st) != 0)
     275             :         {
     276             :             int         save_errno;
     277             : 
     278             :             /* Back out what's already been done. */
     279           0 :             save_errno = errno;
     280           0 :             close(fd);
     281           0 :             errno = save_errno;
     282             : 
     283           0 :             ereport(elevel,
     284             :                     (errcode_for_dynamic_shared_memory(),
     285             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     286             :                             name)));
     287           0 :             return false;
     288             :         }
     289        3638 :         request_size = st.st_size;
     290             :     }
     291        2508 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     292             :     {
     293             :         int         save_errno;
     294             : 
     295             :         /* Back out what's already been done. */
     296           0 :         save_errno = errno;
     297           0 :         close(fd);
     298           0 :         shm_unlink(name);
     299           0 :         errno = save_errno;
     300             : 
     301             :         /*
     302             :          * If we received a query cancel or termination signal, we will have
     303             :          * EINTR set here.  If the caller said that errors are OK here, check
     304             :          * for interrupts immediately.
     305             :          */
     306           0 :         if (errno == EINTR && elevel >= ERROR)
     307           0 :             CHECK_FOR_INTERRUPTS();
     308             : 
     309           0 :         ereport(elevel,
     310             :                 (errcode_for_dynamic_shared_memory(),
     311             :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     312             :                         name, request_size)));
     313           0 :         return false;
     314             :     }
     315             : 
     316             :     /* Map it. */
     317        6146 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     318             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     319        6146 :     if (address == MAP_FAILED)
     320             :     {
     321             :         int         save_errno;
     322             : 
     323             :         /* Back out what's already been done. */
     324           0 :         save_errno = errno;
     325           0 :         close(fd);
     326           0 :         if (op == DSM_OP_CREATE)
     327           0 :             shm_unlink(name);
     328           0 :         errno = save_errno;
     329             : 
     330           0 :         ereport(elevel,
     331             :                 (errcode_for_dynamic_shared_memory(),
     332             :                  errmsg("could not map shared memory segment \"%s\": %m",
     333             :                         name)));
     334           0 :         return false;
     335             :     }
     336        6146 :     *mapped_address = address;
     337        6146 :     *mapped_size = request_size;
     338        6146 :     close(fd);
     339             : 
     340        6146 :     return true;
     341             : }
     342             : 
     343             : /*
     344             :  * Set the size of a virtual memory region associated with a file descriptor.
     345             :  * If necessary, also ensure that virtual memory is actually allocated by the
     346             :  * operating system, to avoid nasty surprises later.
     347             :  *
     348             :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     349             :  */
     350             : static int
     351        2508 : dsm_impl_posix_resize(int fd, off_t size)
     352             : {
     353             :     int         rc;
     354             : 
     355             :     /* Truncate (or extend) the file to the requested size. */
     356        2508 :     rc = ftruncate(fd, size);
     357             : 
     358             :     /*
     359             :      * On Linux, a shm_open fd is backed by a tmpfs file.  After resizing with
     360             :      * ftruncate, the file may contain a hole.  Accessing memory backed by a
     361             :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     362             :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     363             :      * here, so we can fail gracefully with ENOSPC now rather than risking
     364             :      * SIGBUS later.
     365             :      */
     366             : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     367        2508 :     if (rc == 0)
     368             :     {
     369             :         /*
     370             :          * We may get interrupted.  If so, just retry unless there is an
     371             :          * interrupt pending.  This avoids the possibility of looping forever
     372             :          * if another backend is repeatedly trying to interrupt us.
     373             :          */
     374             :         do
     375             :         {
     376        2508 :             rc = posix_fallocate(fd, 0, size);
     377        2508 :         } while (rc == EINTR && !(ProcDiePending || QueryCancelPending));
     378             : 
     379             :         /*
     380             :          * The caller expects errno to be set, but posix_fallocate() doesn't
     381             :          * set it.  Instead it returns error numbers directly.  So set errno,
     382             :          * even though we'll also return rc to indicate success or failure.
     383             :          */
     384        2508 :         errno = rc;
     385             :     }
     386             : #endif                          /* HAVE_POSIX_FALLOCATE && __linux__ */
     387             : 
     388        2508 :     return rc;
     389             : }
     390             : 
     391             : #endif                          /* USE_DSM_POSIX */
     392             : 
     393             : #ifdef USE_DSM_SYSV
     394             : /*
     395             :  * Operating system primitives to support System V shared memory.
     396             :  *
     397             :  * System V shared memory segments are manipulated using shmget(), shmat(),
     398             :  * shmdt(), and shmctl().  As the default allocation limits for System V
     399             :  * shared memory are usually quite low, the POSIX facilities may be
     400             :  * preferable; but those are not supported everywhere.
     401             :  */
     402             : static bool
     403           0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     404             :               void **impl_private, void **mapped_address, Size *mapped_size,
     405             :               int elevel)
     406             : {
     407             :     key_t       key;
     408             :     int         ident;
     409             :     char       *address;
     410             :     char        name[64];
     411             :     int        *ident_cache;
     412             : 
     413             :     /*
     414             :      * POSIX shared memory and mmap-based shared memory identify segments with
     415             :      * names.  To avoid needless error message variation, we use the handle as
     416             :      * the name.
     417             :      */
     418           0 :     snprintf(name, 64, "%u", handle);
     419             : 
     420             :     /*
     421             :      * The System V shared memory namespace is very restricted; names are of
     422             :      * type key_t, which is expected to be some sort of integer data type, but
     423             :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     424             :      * identify shared memory segments across processes, this might seem like
     425             :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     426             :      * the cast below might truncate away some bits from the handle the
     427             :      * user-provided, but it'll truncate exactly the same bits away in exactly
     428             :      * the same fashion every time we use that handle, which is all that
     429             :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     430             :      * won't use the full range of available key space, but that's no big deal
     431             :      * either.
     432             :      *
     433             :      * We do make sure that the key isn't negative, because that might not be
     434             :      * portable.
     435             :      */
     436           0 :     key = (key_t) handle;
     437           0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     438           0 :         key = -key;
     439             : 
     440             :     /*
     441             :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     442             :      * up with that value by chance during a create operation, just pretend it
     443             :      * already exists, so that caller will retry.  If we run into it anywhere
     444             :      * else, the caller has passed a handle that doesn't correspond to
     445             :      * anything we ever created, which should not happen.
     446             :      */
     447           0 :     if (key == IPC_PRIVATE)
     448             :     {
     449           0 :         if (op != DSM_OP_CREATE)
     450           0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     451           0 :         errno = EEXIST;
     452           0 :         return false;
     453             :     }
     454             : 
     455             :     /*
     456             :      * Before we can do anything with a shared memory segment, we have to map
     457             :      * the shared memory key to a shared memory identifier using shmget(). To
     458             :      * avoid repeated lookups, we store the key using impl_private.
     459             :      */
     460           0 :     if (*impl_private != NULL)
     461             :     {
     462           0 :         ident_cache = *impl_private;
     463           0 :         ident = *ident_cache;
     464             :     }
     465             :     else
     466             :     {
     467           0 :         int         flags = IPCProtection;
     468             :         size_t      segsize;
     469             : 
     470             :         /*
     471             :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     472             :          * leak the resource if memory allocation fails.
     473             :          */
     474           0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     475             : 
     476             :         /*
     477             :          * When using shmget to find an existing segment, we must pass the
     478             :          * size as 0.  Passing a non-zero size which is greater than the
     479             :          * actual size will result in EINVAL.
     480             :          */
     481           0 :         segsize = 0;
     482             : 
     483           0 :         if (op == DSM_OP_CREATE)
     484             :         {
     485           0 :             flags |= IPC_CREAT | IPC_EXCL;
     486           0 :             segsize = request_size;
     487             :         }
     488             : 
     489           0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     490             :         {
     491           0 :             if (errno != EEXIST)
     492             :             {
     493           0 :                 int         save_errno = errno;
     494             : 
     495           0 :                 pfree(ident_cache);
     496           0 :                 errno = save_errno;
     497           0 :                 ereport(elevel,
     498             :                         (errcode_for_dynamic_shared_memory(),
     499             :                          errmsg("could not get shared memory segment: %m")));
     500             :             }
     501           0 :             return false;
     502             :         }
     503             : 
     504           0 :         *ident_cache = ident;
     505           0 :         *impl_private = ident_cache;
     506             :     }
     507             : 
     508             :     /* Handle teardown cases. */
     509           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     510             :     {
     511           0 :         pfree(ident_cache);
     512           0 :         *impl_private = NULL;
     513           0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     514             :         {
     515           0 :             ereport(elevel,
     516             :                     (errcode_for_dynamic_shared_memory(),
     517             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     518             :                             name)));
     519           0 :             return false;
     520             :         }
     521           0 :         *mapped_address = NULL;
     522           0 :         *mapped_size = 0;
     523           0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     524             :         {
     525           0 :             ereport(elevel,
     526             :                     (errcode_for_dynamic_shared_memory(),
     527             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     528             :                             name)));
     529           0 :             return false;
     530             :         }
     531           0 :         return true;
     532             :     }
     533             : 
     534             :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     535           0 :     if (op == DSM_OP_ATTACH)
     536             :     {
     537             :         struct shmid_ds shm;
     538             : 
     539           0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     540             :         {
     541           0 :             ereport(elevel,
     542             :                     (errcode_for_dynamic_shared_memory(),
     543             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     544             :                             name)));
     545           0 :             return false;
     546             :         }
     547           0 :         request_size = shm.shm_segsz;
     548             :     }
     549             : 
     550             :     /* Map it. */
     551           0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     552           0 :     if (address == (void *) -1)
     553             :     {
     554             :         int         save_errno;
     555             : 
     556             :         /* Back out what's already been done. */
     557           0 :         save_errno = errno;
     558           0 :         if (op == DSM_OP_CREATE)
     559           0 :             shmctl(ident, IPC_RMID, NULL);
     560           0 :         errno = save_errno;
     561             : 
     562           0 :         ereport(elevel,
     563             :                 (errcode_for_dynamic_shared_memory(),
     564             :                  errmsg("could not map shared memory segment \"%s\": %m",
     565             :                         name)));
     566           0 :         return false;
     567             :     }
     568           0 :     *mapped_address = address;
     569           0 :     *mapped_size = request_size;
     570             : 
     571           0 :     return true;
     572             : }
     573             : #endif
     574             : 
     575             : #ifdef USE_DSM_WINDOWS
     576             : /*
     577             :  * Operating system primitives to support Windows shared memory.
     578             :  *
     579             :  * Windows shared memory implementation is done using file mapping
     580             :  * which can be backed by either physical file or system paging file.
     581             :  * Current implementation uses system paging file as other effects
     582             :  * like performance are not clear for physical file and it is used in similar
     583             :  * way for main shared memory in windows.
     584             :  *
     585             :  * A memory mapping object is a kernel object - they always get deleted when
     586             :  * the last reference to them goes away, either explicitly via a CloseHandle or
     587             :  * when the process containing the reference exits.
     588             :  */
     589             : static bool
     590             : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     591             :                  void **impl_private, void **mapped_address,
     592             :                  Size *mapped_size, int elevel)
     593             : {
     594             :     char       *address;
     595             :     HANDLE      hmap;
     596             :     char        name[64];
     597             :     MEMORY_BASIC_INFORMATION info;
     598             : 
     599             :     /*
     600             :      * Storing the shared memory segment in the Global\ namespace, can allow
     601             :      * any process running in any session to access that file mapping object
     602             :      * provided that the caller has the required access rights. But to avoid
     603             :      * issues faced in main shared memory, we are using the naming convention
     604             :      * similar to main shared memory. We can change here once issue mentioned
     605             :      * in GetSharedMemName is resolved.
     606             :      */
     607             :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     608             : 
     609             :     /*
     610             :      * Handle teardown cases.  Since Windows automatically destroys the object
     611             :      * when no references remain, we can treat it the same as detach.
     612             :      */
     613             :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     614             :     {
     615             :         if (*mapped_address != NULL
     616             :             && UnmapViewOfFile(*mapped_address) == 0)
     617             :         {
     618             :             _dosmaperr(GetLastError());
     619             :             ereport(elevel,
     620             :                     (errcode_for_dynamic_shared_memory(),
     621             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     622             :                             name)));
     623             :             return false;
     624             :         }
     625             :         if (*impl_private != NULL
     626             :             && CloseHandle(*impl_private) == 0)
     627             :         {
     628             :             _dosmaperr(GetLastError());
     629             :             ereport(elevel,
     630             :                     (errcode_for_dynamic_shared_memory(),
     631             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     632             :                             name)));
     633             :             return false;
     634             :         }
     635             : 
     636             :         *impl_private = NULL;
     637             :         *mapped_address = NULL;
     638             :         *mapped_size = 0;
     639             :         return true;
     640             :     }
     641             : 
     642             :     /* Create new segment or open an existing one for attach. */
     643             :     if (op == DSM_OP_CREATE)
     644             :     {
     645             :         DWORD       size_high;
     646             :         DWORD       size_low;
     647             :         DWORD       errcode;
     648             : 
     649             :         /* Shifts >= the width of the type are undefined. */
     650             : #ifdef _WIN64
     651             :         size_high = request_size >> 32;
     652             : #else
     653             :         size_high = 0;
     654             : #endif
     655             :         size_low = (DWORD) request_size;
     656             : 
     657             :         /* CreateFileMapping might not clear the error code on success */
     658             :         SetLastError(0);
     659             : 
     660             :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     661             :                                  NULL,  /* Default security attrs */
     662             :                                  PAGE_READWRITE,    /* Memory is read/write */
     663             :                                  size_high, /* Upper 32 bits of size */
     664             :                                  size_low,  /* Lower 32 bits of size */
     665             :                                  name);
     666             : 
     667             :         errcode = GetLastError();
     668             :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     669             :         {
     670             :             /*
     671             :              * On Windows, when the segment already exists, a handle for the
     672             :              * existing segment is returned.  We must close it before
     673             :              * returning.  However, if the existing segment is created by a
     674             :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     675             :              * _dosmaperr here, so errno won't be modified.
     676             :              */
     677             :             if (hmap)
     678             :                 CloseHandle(hmap);
     679             :             return false;
     680             :         }
     681             : 
     682             :         if (!hmap)
     683             :         {
     684             :             _dosmaperr(errcode);
     685             :             ereport(elevel,
     686             :                     (errcode_for_dynamic_shared_memory(),
     687             :                      errmsg("could not create shared memory segment \"%s\": %m",
     688             :                             name)));
     689             :             return false;
     690             :         }
     691             :     }
     692             :     else
     693             :     {
     694             :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     695             :                                FALSE,   /* do not inherit the name */
     696             :                                name);   /* name of mapping object */
     697             :         if (!hmap)
     698             :         {
     699             :             _dosmaperr(GetLastError());
     700             :             ereport(elevel,
     701             :                     (errcode_for_dynamic_shared_memory(),
     702             :                      errmsg("could not open shared memory segment \"%s\": %m",
     703             :                             name)));
     704             :             return false;
     705             :         }
     706             :     }
     707             : 
     708             :     /* Map it. */
     709             :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     710             :                             0, 0, 0);
     711             :     if (!address)
     712             :     {
     713             :         int         save_errno;
     714             : 
     715             :         _dosmaperr(GetLastError());
     716             :         /* Back out what's already been done. */
     717             :         save_errno = errno;
     718             :         CloseHandle(hmap);
     719             :         errno = save_errno;
     720             : 
     721             :         ereport(elevel,
     722             :                 (errcode_for_dynamic_shared_memory(),
     723             :                  errmsg("could not map shared memory segment \"%s\": %m",
     724             :                         name)));
     725             :         return false;
     726             :     }
     727             : 
     728             :     /*
     729             :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     730             :      * need size only when we are attaching, but it's better to get the size
     731             :      * when creating new segment to keep size consistent both for
     732             :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     733             :      */
     734             :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     735             :     {
     736             :         int         save_errno;
     737             : 
     738             :         _dosmaperr(GetLastError());
     739             :         /* Back out what's already been done. */
     740             :         save_errno = errno;
     741             :         UnmapViewOfFile(address);
     742             :         CloseHandle(hmap);
     743             :         errno = save_errno;
     744             : 
     745             :         ereport(elevel,
     746             :                 (errcode_for_dynamic_shared_memory(),
     747             :                  errmsg("could not stat shared memory segment \"%s\": %m",
     748             :                         name)));
     749             :         return false;
     750             :     }
     751             : 
     752             :     *mapped_address = address;
     753             :     *mapped_size = info.RegionSize;
     754             :     *impl_private = hmap;
     755             : 
     756             :     return true;
     757             : }
     758             : #endif
     759             : 
     760             : #ifdef USE_DSM_MMAP
     761             : /*
     762             :  * Operating system primitives to support mmap-based shared memory.
     763             :  *
     764             :  * Calling this "shared memory" is somewhat of a misnomer, because what
     765             :  * we're really doing is creating a bunch of files and mapping them into
     766             :  * our address space.  The operating system may feel obliged to
     767             :  * synchronize the contents to disk even if nothing is being paged out,
     768             :  * which will not serve us well.  The user can relocate the pg_dynshmem
     769             :  * directory to a ramdisk to avoid this problem, if available.
     770             :  */
     771             : static bool
     772           0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     773             :               void **impl_private, void **mapped_address, Size *mapped_size,
     774             :               int elevel)
     775             : {
     776             :     char        name[64];
     777             :     int         flags;
     778             :     int         fd;
     779             :     char       *address;
     780             : 
     781           0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     782             :              handle);
     783             : 
     784             :     /* Handle teardown cases. */
     785           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     786             :     {
     787           0 :         if (*mapped_address != NULL
     788           0 :             && munmap(*mapped_address, *mapped_size) != 0)
     789             :         {
     790           0 :             ereport(elevel,
     791             :                     (errcode_for_dynamic_shared_memory(),
     792             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     793             :                             name)));
     794           0 :             return false;
     795             :         }
     796           0 :         *mapped_address = NULL;
     797           0 :         *mapped_size = 0;
     798           0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     799             :         {
     800           0 :             ereport(elevel,
     801             :                     (errcode_for_dynamic_shared_memory(),
     802             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     803             :                             name)));
     804           0 :             return false;
     805             :         }
     806           0 :         return true;
     807             :     }
     808             : 
     809             :     /* Create new segment or open an existing one for attach. */
     810           0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     811           0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     812             :     {
     813           0 :         if (errno != EEXIST)
     814           0 :             ereport(elevel,
     815             :                     (errcode_for_dynamic_shared_memory(),
     816             :                      errmsg("could not open shared memory segment \"%s\": %m",
     817             :                             name)));
     818           0 :         return false;
     819             :     }
     820             : 
     821             :     /*
     822             :      * If we're attaching the segment, determine the current size; if we are
     823             :      * creating the segment, set the size to the requested value.
     824             :      */
     825           0 :     if (op == DSM_OP_ATTACH)
     826             :     {
     827             :         struct stat st;
     828             : 
     829           0 :         if (fstat(fd, &st) != 0)
     830             :         {
     831             :             int         save_errno;
     832             : 
     833             :             /* Back out what's already been done. */
     834           0 :             save_errno = errno;
     835           0 :             CloseTransientFile(fd);
     836           0 :             errno = save_errno;
     837             : 
     838           0 :             ereport(elevel,
     839             :                     (errcode_for_dynamic_shared_memory(),
     840             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     841             :                             name)));
     842           0 :             return false;
     843             :         }
     844           0 :         request_size = st.st_size;
     845             :     }
     846             :     else
     847             :     {
     848             :         /*
     849             :          * Allocate a buffer full of zeros.
     850             :          *
     851             :          * Note: palloc zbuffer, instead of just using a local char array, to
     852             :          * ensure it is reasonably well-aligned; this may save a few cycles
     853             :          * transferring data to the kernel.
     854             :          */
     855           0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     856           0 :         uint32      remaining = request_size;
     857           0 :         bool        success = true;
     858             : 
     859             :         /*
     860             :          * Zero-fill the file. We have to do this the hard way to ensure that
     861             :          * all the file space has really been allocated, so that we don't
     862             :          * later seg fault when accessing the memory mapping.  This is pretty
     863             :          * pessimal.
     864             :          */
     865           0 :         while (success && remaining > 0)
     866             :         {
     867           0 :             Size        goal = remaining;
     868             : 
     869           0 :             if (goal > ZBUFFER_SIZE)
     870           0 :                 goal = ZBUFFER_SIZE;
     871           0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     872           0 :             if (write(fd, zbuffer, goal) == goal)
     873           0 :                 remaining -= goal;
     874             :             else
     875           0 :                 success = false;
     876           0 :             pgstat_report_wait_end();
     877             :         }
     878             : 
     879           0 :         if (!success)
     880             :         {
     881             :             int         save_errno;
     882             : 
     883             :             /* Back out what's already been done. */
     884           0 :             save_errno = errno;
     885           0 :             CloseTransientFile(fd);
     886           0 :             unlink(name);
     887           0 :             errno = save_errno ? save_errno : ENOSPC;
     888             : 
     889           0 :             ereport(elevel,
     890             :                     (errcode_for_dynamic_shared_memory(),
     891             :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     892             :                             name, request_size)));
     893           0 :             return false;
     894             :         }
     895             :     }
     896             : 
     897             :     /* Map it. */
     898           0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     899             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     900           0 :     if (address == MAP_FAILED)
     901             :     {
     902             :         int         save_errno;
     903             : 
     904             :         /* Back out what's already been done. */
     905           0 :         save_errno = errno;
     906           0 :         CloseTransientFile(fd);
     907           0 :         if (op == DSM_OP_CREATE)
     908           0 :             unlink(name);
     909           0 :         errno = save_errno;
     910             : 
     911           0 :         ereport(elevel,
     912             :                 (errcode_for_dynamic_shared_memory(),
     913             :                  errmsg("could not map shared memory segment \"%s\": %m",
     914             :                         name)));
     915           0 :         return false;
     916             :     }
     917           0 :     *mapped_address = address;
     918           0 :     *mapped_size = request_size;
     919             : 
     920           0 :     if (CloseTransientFile(fd) != 0)
     921             :     {
     922           0 :         ereport(elevel,
     923             :                 (errcode_for_file_access(),
     924             :                  errmsg("could not close shared memory segment \"%s\": %m",
     925             :                         name)));
     926           0 :         return false;
     927             :     }
     928             : 
     929           0 :     return true;
     930             : }
     931             : #endif
     932             : 
     933             : /*
     934             :  * Implementation-specific actions that must be performed when a segment is to
     935             :  * be preserved even when no backend has it attached.
     936             :  *
     937             :  * Except on Windows, we don't need to do anything at all.  But since Windows
     938             :  * cleans up segments automatically when no references remain, we duplicate
     939             :  * the segment handle into the postmaster process.  The postmaster needn't
     940             :  * do anything to receive the handle; Windows transfers it automatically.
     941             :  */
     942             : void
     943         108 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     944             :                      void **impl_private_pm_handle)
     945             : {
     946         108 :     switch (dynamic_shared_memory_type)
     947             :     {
     948             : #ifdef USE_DSM_WINDOWS
     949             :         case DSM_IMPL_WINDOWS:
     950             :             {
     951             :                 HANDLE      hmap;
     952             : 
     953             :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     954             :                                      PostmasterHandle, &hmap, 0, FALSE,
     955             :                                      DUPLICATE_SAME_ACCESS))
     956             :                 {
     957             :                     char        name[64];
     958             : 
     959             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     960             :                     _dosmaperr(GetLastError());
     961             :                     ereport(ERROR,
     962             :                             (errcode_for_dynamic_shared_memory(),
     963             :                              errmsg("could not duplicate handle for \"%s\": %m",
     964             :                                     name)));
     965             :                 }
     966             : 
     967             :                 /*
     968             :                  * Here, we remember the handle that we created in the
     969             :                  * postmaster process.  This handle isn't actually usable in
     970             :                  * any process other than the postmaster, but that doesn't
     971             :                  * matter.  We're just holding onto it so that, if the segment
     972             :                  * is unpinned, dsm_impl_unpin_segment can close it.
     973             :                  */
     974             :                 *impl_private_pm_handle = hmap;
     975             :                 break;
     976             :             }
     977             : #endif
     978             :         default:
     979         108 :             break;
     980             :     }
     981         108 : }
     982             : 
     983             : /*
     984             :  * Implementation-specific actions that must be performed when a segment is no
     985             :  * longer to be preserved, so that it will be cleaned up when all backends
     986             :  * have detached from it.
     987             :  *
     988             :  * Except on Windows, we don't need to do anything at all.  For Windows, we
     989             :  * close the extra handle that dsm_impl_pin_segment created in the
     990             :  * postmaster's process space.
     991             :  */
     992             : void
     993         108 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
     994             : {
     995         108 :     switch (dynamic_shared_memory_type)
     996             :     {
     997             : #ifdef USE_DSM_WINDOWS
     998             :         case DSM_IMPL_WINDOWS:
     999             :             {
    1000             :                 if (*impl_private &&
    1001             :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1002             :                                      NULL, NULL, 0, FALSE,
    1003             :                                      DUPLICATE_CLOSE_SOURCE))
    1004             :                 {
    1005             :                     char        name[64];
    1006             : 
    1007             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1008             :                     _dosmaperr(GetLastError());
    1009             :                     ereport(ERROR,
    1010             :                             (errcode_for_dynamic_shared_memory(),
    1011             :                              errmsg("could not duplicate handle for \"%s\": %m",
    1012             :                                     name)));
    1013             :                 }
    1014             : 
    1015             :                 *impl_private = NULL;
    1016             :                 break;
    1017             :             }
    1018             : #endif
    1019             :         default:
    1020         108 :             break;
    1021             :     }
    1022         108 : }
    1023             : 
    1024             : static int
    1025           0 : errcode_for_dynamic_shared_memory(void)
    1026             : {
    1027           0 :     if (errno == EFBIG || errno == ENOMEM)
    1028           0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1029             :     else
    1030           0 :         return errcode_for_file_access();
    1031             : }

Generated by: LCOV version 1.13