LCOV - code coverage report
Current view: top level - src/backend/port - pg_shmem.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 71.9 % 192 138
Test Date: 2026-03-01 16:14:42 Functions: 100.0 % 11 11
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * sysv_shmem.c
       4              :  *    Implement shared memory using SysV facilities
       5              :  *
       6              :  * These routines used to be a fairly thin layer on top of SysV shared
       7              :  * memory functionality.  With the addition of anonymous-shmem logic,
       8              :  * they're a bit fatter now.  We still require a SysV shmem block to
       9              :  * exist, though, because mmap'd shmem provides no way to find out how
      10              :  * many processes are attached, which we need for interlocking purposes.
      11              :  *
      12              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      13              :  * Portions Copyright (c) 1994, Regents of the University of California
      14              :  *
      15              :  * IDENTIFICATION
      16              :  *    src/backend/port/sysv_shmem.c
      17              :  *
      18              :  *-------------------------------------------------------------------------
      19              :  */
      20              : #include "postgres.h"
      21              : 
      22              : #include <signal.h>
      23              : #include <unistd.h>
      24              : #include <sys/file.h>
      25              : #include <sys/ipc.h>
      26              : #include <sys/mman.h>
      27              : #include <sys/shm.h>
      28              : #include <sys/stat.h>
      29              : 
      30              : #include "miscadmin.h"
      31              : #include "port/pg_bitutils.h"
      32              : #include "portability/mem.h"
      33              : #include "storage/dsm.h"
      34              : #include "storage/fd.h"
      35              : #include "storage/ipc.h"
      36              : #include "storage/pg_shmem.h"
      37              : #include "storage/shmem.h"
      38              : #include "utils/guc.h"
      39              : #include "utils/guc_hooks.h"
      40              : #include "utils/pidfile.h"
      41              : 
      42              : 
      43              : /*
      44              :  * As of PostgreSQL 9.3, we normally allocate only a very small amount of
      45              :  * System V shared memory, and only for the purposes of providing an
      46              :  * interlock to protect the data directory.  The real shared memory block
      47              :  * is allocated using mmap().  This works around the problem that many
      48              :  * systems have very low limits on the amount of System V shared memory
      49              :  * that can be allocated.  Even a limit of a few megabytes will be enough
      50              :  * to run many copies of PostgreSQL without needing to adjust system settings.
      51              :  *
      52              :  * We assume that no one will attempt to run PostgreSQL 9.3 or later on
      53              :  * systems that are ancient enough that anonymous shared memory is not
      54              :  * supported, such as pre-2.4 versions of Linux.  If that turns out to be
      55              :  * false, we might need to add compile and/or run-time tests here and do this
      56              :  * only if the running kernel supports it.
      57              :  *
      58              :  * However, we must always disable this logic in the EXEC_BACKEND case, and
      59              :  * fall back to the old method of allocating the entire segment using System V
      60              :  * shared memory, because there's no way to attach an anonymous mmap'd segment
      61              :  * to a process after exec().  Since EXEC_BACKEND is intended only for
      62              :  * developer use, this shouldn't be a big problem.  Because of this, we do
      63              :  * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
      64              :  *
      65              :  * As of PostgreSQL 12, we regained the ability to use a large System V shared
      66              :  * memory region even in non-EXEC_BACKEND builds, if shared_memory_type is set
      67              :  * to sysv (though this is not the default).
      68              :  */
      69              : 
      70              : 
      71              : typedef key_t IpcMemoryKey;     /* shared memory key passed to shmget(2) */
      72              : typedef int IpcMemoryId;        /* shared memory ID returned by shmget(2) */
      73              : 
      74              : /*
      75              :  * How does a given IpcMemoryId relate to this PostgreSQL process?
      76              :  *
      77              :  * One could recycle unattached segments of different data directories if we
      78              :  * distinguished that case from other SHMSTATE_FOREIGN cases.  Doing so would
      79              :  * cause us to visit less of the key space, making us less likely to detect a
      80              :  * SHMSTATE_ATTACHED key.  It would also complicate the concurrency analysis,
      81              :  * in that postmasters of different data directories could simultaneously
      82              :  * attempt to recycle a given key.  We'll waste keys longer in some cases, but
      83              :  * avoiding the problems of the alternative justifies that loss.
      84              :  */
      85              : typedef enum
      86              : {
      87              :     SHMSTATE_ANALYSIS_FAILURE,  /* unexpected failure to analyze the ID */
      88              :     SHMSTATE_ATTACHED,          /* pertinent to DataDir, has attached PIDs */
      89              :     SHMSTATE_ENOENT,            /* no segment of that ID */
      90              :     SHMSTATE_FOREIGN,           /* exists, but not pertinent to DataDir */
      91              :     SHMSTATE_UNATTACHED,        /* pertinent to DataDir, no attached PIDs */
      92              : } IpcMemoryState;
      93              : 
      94              : 
      95              : unsigned long UsedShmemSegID = 0;
      96              : void       *UsedShmemSegAddr = NULL;
      97              : 
      98              : static Size AnonymousShmemSize;
      99              : static void *AnonymousShmem = NULL;
     100              : 
     101              : static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
     102              : static void IpcMemoryDetach(int status, Datum shmaddr);
     103              : static void IpcMemoryDelete(int status, Datum shmId);
     104              : static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId,
     105              :                                            void *attachAt,
     106              :                                            PGShmemHeader **addr);
     107              : 
     108              : 
     109              : /*
     110              :  *  InternalIpcMemoryCreate(memKey, size)
     111              :  *
     112              :  * Attempt to create a new shared memory segment with the specified key.
     113              :  * Will fail (return NULL) if such a segment already exists.  If successful,
     114              :  * attach the segment to the current process and return its attached address.
     115              :  * On success, callbacks are registered with on_shmem_exit to detach and
     116              :  * delete the segment when on_shmem_exit is called.
     117              :  *
     118              :  * If we fail with a failure code other than collision-with-existing-segment,
     119              :  * print out an error and abort.  Other types of errors are not recoverable.
     120              :  */
     121              : static void *
     122         1156 : InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
     123              : {
     124              :     IpcMemoryId shmid;
     125         1156 :     void       *requestedAddress = NULL;
     126              :     void       *memAddress;
     127              : 
     128              :     /*
     129              :      * Normally we just pass requestedAddress = NULL to shmat(), allowing the
     130              :      * system to choose where the segment gets mapped.  But in an EXEC_BACKEND
     131              :      * build, it's possible for whatever is chosen in the postmaster to not
     132              :      * work for backends, due to variations in address space layout.  As a
     133              :      * rather klugy workaround, allow the user to specify the address to use
     134              :      * via setting the environment variable PG_SHMEM_ADDR.  (If this were of
     135              :      * interest for anything except debugging, we'd probably create a cleaner
     136              :      * and better-documented way to set it, such as a GUC.)
     137              :      */
     138              : #ifdef EXEC_BACKEND
     139              :     {
     140              :         char       *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
     141              : 
     142              :         if (pg_shmem_addr)
     143              :             requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
     144              :         else
     145              :         {
     146              : #if defined(__darwin__) && SIZEOF_VOID_P == 8
     147              :             /*
     148              :              * Provide a default value that is believed to avoid problems with
     149              :              * ASLR on the current macOS release.
     150              :              */
     151              :             requestedAddress = (void *) 0x80000000000;
     152              : #endif
     153              :         }
     154              :     }
     155              : #endif
     156              : 
     157         1156 :     shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
     158              : 
     159         1156 :     if (shmid < 0)
     160              :     {
     161            6 :         int         shmget_errno = errno;
     162              : 
     163              :         /*
     164              :          * Fail quietly if error indicates a collision with existing segment.
     165              :          * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
     166              :          * we could get a permission violation instead?  Also, EIDRM might
     167              :          * occur if an old seg is slated for destruction but not gone yet.
     168              :          */
     169            6 :         if (shmget_errno == EEXIST || shmget_errno == EACCES
     170              : #ifdef EIDRM
     171            0 :             || shmget_errno == EIDRM
     172              : #endif
     173              :             )
     174            6 :             return NULL;
     175              : 
     176              :         /*
     177              :          * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
     178              :          * there is an existing segment but it's smaller than "size" (this is
     179              :          * a result of poorly-thought-out ordering of error tests). To
     180              :          * distinguish between collision and invalid size in such cases, we
     181              :          * make a second try with size = 0.  These kernels do not test size
     182              :          * against SHMMIN in the preexisting-segment case, so we will not get
     183              :          * EINVAL a second time if there is such a segment.
     184              :          */
     185            0 :         if (shmget_errno == EINVAL)
     186              :         {
     187            0 :             shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
     188              : 
     189            0 :             if (shmid < 0)
     190              :             {
     191              :                 /* As above, fail quietly if we verify a collision */
     192            0 :                 if (errno == EEXIST || errno == EACCES
     193              : #ifdef EIDRM
     194            0 :                     || errno == EIDRM
     195              : #endif
     196              :                     )
     197            0 :                     return NULL;
     198              :                 /* Otherwise, fall through to report the original error */
     199              :             }
     200              :             else
     201              :             {
     202              :                 /*
     203              :                  * On most platforms we cannot get here because SHMMIN is
     204              :                  * greater than zero.  However, if we do succeed in creating a
     205              :                  * zero-size segment, free it and then fall through to report
     206              :                  * the original error.
     207              :                  */
     208            0 :                 if (shmctl(shmid, IPC_RMID, NULL) < 0)
     209            0 :                     elog(LOG, "shmctl(%d, %d, 0) failed: %m",
     210              :                          shmid, IPC_RMID);
     211              :             }
     212              :         }
     213              : 
     214              :         /*
     215              :          * Else complain and abort.
     216              :          *
     217              :          * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
     218              :          * is violated.  SHMALL violation might be reported as either ENOMEM
     219              :          * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
     220              :          * it should be.  SHMMNI violation is ENOSPC, per spec.  Just plain
     221              :          * not-enough-RAM is ENOMEM.
     222              :          */
     223            0 :         errno = shmget_errno;
     224            0 :         ereport(FATAL,
     225              :                 (errmsg("could not create shared memory segment: %m"),
     226              :                  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
     227              :                            (unsigned long) memKey, size,
     228              :                            IPC_CREAT | IPC_EXCL | IPCProtection),
     229              :                  (shmget_errno == EINVAL) ?
     230              :                  errhint("This error usually means that PostgreSQL's request for a shared memory "
     231              :                          "segment exceeded your kernel's SHMMAX parameter, or possibly that "
     232              :                          "it is less than "
     233              :                          "your kernel's SHMMIN parameter.\n"
     234              :                          "The PostgreSQL documentation contains more information about shared "
     235              :                          "memory configuration.") : 0,
     236              :                  (shmget_errno == ENOMEM) ?
     237              :                  errhint("This error usually means that PostgreSQL's request for a shared "
     238              :                          "memory segment exceeded your kernel's SHMALL parameter.  You might need "
     239              :                          "to reconfigure the kernel with larger SHMALL.\n"
     240              :                          "The PostgreSQL documentation contains more information about shared "
     241              :                          "memory configuration.") : 0,
     242              :                  (shmget_errno == ENOSPC) ?
     243              :                  errhint("This error does *not* mean that you have run out of disk space.  "
     244              :                          "It occurs either if all available shared memory IDs have been taken, "
     245              :                          "in which case you need to raise the SHMMNI parameter in your kernel, "
     246              :                          "or because the system's overall limit for shared memory has been "
     247              :                          "reached.\n"
     248              :                          "The PostgreSQL documentation contains more information about shared "
     249              :                          "memory configuration.") : 0));
     250              :     }
     251              : 
     252              :     /* Register on-exit routine to delete the new segment */
     253         1150 :     on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
     254              : 
     255              :     /* OK, should be able to attach to the segment */
     256         1150 :     memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
     257              : 
     258         1150 :     if (memAddress == (void *) -1)
     259            0 :         elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
     260              :              shmid, requestedAddress, PG_SHMAT_FLAGS);
     261              : 
     262              :     /* Register on-exit routine to detach new segment before deleting */
     263         1150 :     on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
     264              : 
     265              :     /*
     266              :      * Store shmem key and ID in data directory lockfile.  Format to try to
     267              :      * keep it the same length always (trailing junk in the lockfile won't
     268              :      * hurt, but might confuse humans).
     269              :      */
     270              :     {
     271              :         char        line[64];
     272              : 
     273         1150 :         sprintf(line, "%9lu %9lu",
     274              :                 (unsigned long) memKey, (unsigned long) shmid);
     275         1150 :         AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
     276              :     }
     277              : 
     278         1150 :     return memAddress;
     279              : }
     280              : 
     281              : /****************************************************************************/
     282              : /*  IpcMemoryDetach(status, shmaddr)    removes a shared memory segment     */
     283              : /*                                      from process' address space         */
     284              : /*  (called as an on_shmem_exit callback, hence funny argument list)        */
     285              : /****************************************************************************/
     286              : static void
     287         1150 : IpcMemoryDetach(int status, Datum shmaddr)
     288              : {
     289              :     /* Detach System V shared memory block. */
     290         1150 :     if (shmdt(DatumGetPointer(shmaddr)) < 0)
     291            0 :         elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
     292         1150 : }
     293              : 
     294              : /****************************************************************************/
     295              : /*  IpcMemoryDelete(status, shmId)      deletes a shared memory segment     */
     296              : /*  (called as an on_shmem_exit callback, hence funny argument list)        */
     297              : /****************************************************************************/
     298              : static void
     299         1150 : IpcMemoryDelete(int status, Datum shmId)
     300              : {
     301         1150 :     if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
     302            0 :         elog(LOG, "shmctl(%d, %d, 0) failed: %m",
     303              :              DatumGetInt32(shmId), IPC_RMID);
     304         1150 : }
     305              : 
     306              : /*
     307              :  * PGSharedMemoryIsInUse
     308              :  *
     309              :  * Is a previously-existing shmem segment still existing and in use?
     310              :  *
     311              :  * The point of this exercise is to detect the case where a prior postmaster
     312              :  * crashed, but it left child backends that are still running.  Therefore
     313              :  * we only care about shmem segments that are associated with the intended
     314              :  * DataDir.  This is an important consideration since accidental matches of
     315              :  * shmem segment IDs are reasonably common.
     316              :  */
     317              : bool
     318            1 : PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
     319              : {
     320              :     PGShmemHeader *memAddress;
     321              :     IpcMemoryState state;
     322              : 
     323            1 :     state = PGSharedMemoryAttach((IpcMemoryId) id2, NULL, &memAddress);
     324            1 :     if (memAddress && shmdt(memAddress) < 0)
     325            0 :         elog(LOG, "shmdt(%p) failed: %m", memAddress);
     326            1 :     switch (state)
     327              :     {
     328            1 :         case SHMSTATE_ENOENT:
     329              :         case SHMSTATE_FOREIGN:
     330              :         case SHMSTATE_UNATTACHED:
     331            1 :             return false;
     332            0 :         case SHMSTATE_ANALYSIS_FAILURE:
     333              :         case SHMSTATE_ATTACHED:
     334            0 :             return true;
     335              :     }
     336            0 :     return true;
     337              : }
     338              : 
     339              : /*
     340              :  * Test for a segment with id shmId; see comment at IpcMemoryState.
     341              :  *
     342              :  * If the segment exists, we'll attempt to attach to it, using attachAt
     343              :  * if that's not NULL (but it's best to pass NULL if possible).
     344              :  *
     345              :  * *addr is set to the segment memory address if we attached to it, else NULL.
     346              :  */
     347              : static IpcMemoryState
     348            7 : PGSharedMemoryAttach(IpcMemoryId shmId,
     349              :                      void *attachAt,
     350              :                      PGShmemHeader **addr)
     351              : {
     352              :     struct shmid_ds shmStat;
     353              :     struct stat statbuf;
     354              :     PGShmemHeader *hdr;
     355              : 
     356            7 :     *addr = NULL;
     357              : 
     358              :     /*
     359              :      * First, try to stat the shm segment ID, to see if it exists at all.
     360              :      */
     361            7 :     if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
     362              :     {
     363              :         /*
     364              :          * EINVAL actually has multiple possible causes documented in the
     365              :          * shmctl man page, but we assume it must mean the segment no longer
     366              :          * exists.
     367              :          */
     368            0 :         if (errno == EINVAL)
     369            0 :             return SHMSTATE_ENOENT;
     370              : 
     371              :         /*
     372              :          * EACCES implies we have no read permission, which means it is not a
     373              :          * Postgres shmem segment (or at least, not one that is relevant to
     374              :          * our data directory).
     375              :          */
     376            0 :         if (errno == EACCES)
     377            0 :             return SHMSTATE_FOREIGN;
     378              : 
     379              :         /*
     380              :          * Some Linux kernel versions (in fact, all of them as of July 2007)
     381              :          * sometimes return EIDRM when EINVAL is correct.  The Linux kernel
     382              :          * actually does not have any internal state that would justify
     383              :          * returning EIDRM, so we can get away with assuming that EIDRM is
     384              :          * equivalent to EINVAL on that platform.
     385              :          */
     386              : #ifdef HAVE_LINUX_EIDRM_BUG
     387            0 :         if (errno == EIDRM)
     388            0 :             return SHMSTATE_ENOENT;
     389              : #endif
     390              : 
     391              :         /*
     392              :          * Otherwise, we had better assume that the segment is in use.  The
     393              :          * only likely case is (non-Linux, assumed spec-compliant) EIDRM,
     394              :          * which implies that the segment has been IPC_RMID'd but there are
     395              :          * still processes attached to it.
     396              :          */
     397            0 :         return SHMSTATE_ANALYSIS_FAILURE;
     398              :     }
     399              : 
     400              :     /*
     401              :      * Try to attach to the segment and see if it matches our data directory.
     402              :      * This avoids any risk of duplicate-shmem-key conflicts on machines that
     403              :      * are running several postmasters under the same userid.
     404              :      *
     405              :      * (When we're called from PGSharedMemoryCreate, this stat call is
     406              :      * duplicative; but since this isn't a high-traffic case it's not worth
     407              :      * trying to optimize.)
     408              :      */
     409            7 :     if (stat(DataDir, &statbuf) < 0)
     410            0 :         return SHMSTATE_ANALYSIS_FAILURE;   /* can't stat; be conservative */
     411              : 
     412            7 :     hdr = (PGShmemHeader *) shmat(shmId, attachAt, PG_SHMAT_FLAGS);
     413            7 :     if (hdr == (PGShmemHeader *) -1)
     414              :     {
     415              :         /*
     416              :          * Attachment failed.  The cases we're interested in are the same as
     417              :          * for the shmctl() call above.  In particular, note that the owning
     418              :          * postmaster could have terminated and removed the segment between
     419              :          * shmctl() and shmat().
     420              :          *
     421              :          * If attachAt isn't NULL, it's possible that EINVAL reflects a
     422              :          * problem with that address not a vanished segment, so it's best to
     423              :          * pass NULL when probing for conflicting segments.
     424              :          */
     425            0 :         if (errno == EINVAL)
     426            0 :             return SHMSTATE_ENOENT; /* segment disappeared */
     427            0 :         if (errno == EACCES)
     428            0 :             return SHMSTATE_FOREIGN;    /* must be non-Postgres */
     429              : #ifdef HAVE_LINUX_EIDRM_BUG
     430            0 :         if (errno == EIDRM)
     431            0 :             return SHMSTATE_ENOENT; /* segment disappeared */
     432              : #endif
     433              :         /* Otherwise, be conservative. */
     434            0 :         return SHMSTATE_ANALYSIS_FAILURE;
     435              :     }
     436            7 :     *addr = hdr;
     437              : 
     438            7 :     if (hdr->magic != PGShmemMagic ||
     439            5 :         hdr->device != statbuf.st_dev ||
     440            5 :         hdr->inode != statbuf.st_ino)
     441              :     {
     442              :         /*
     443              :          * It's either not a Postgres segment, or not one for my data
     444              :          * directory.
     445              :          */
     446            2 :         return SHMSTATE_FOREIGN;
     447              :     }
     448              : 
     449              :     /*
     450              :      * It does match our data directory, so now test whether any processes are
     451              :      * still attached to it.  (We are, now, but the shm_nattch result is from
     452              :      * before we attached to it.)
     453              :      */
     454            5 :     return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
     455              : }
     456              : 
     457              : /*
     458              :  * Identify the huge page size to use, and compute the related mmap flags.
     459              :  *
     460              :  * Some Linux kernel versions have a bug causing mmap() to fail on requests
     461              :  * that are not a multiple of the hugepage size.  Versions without that bug
     462              :  * instead silently round the request up to the next hugepage multiple ---
     463              :  * and then munmap() fails when we give it a size different from that.
     464              :  * So we have to round our request up to a multiple of the actual hugepage
     465              :  * size to avoid trouble.
     466              :  *
     467              :  * Doing the round-up ourselves also lets us make use of the extra memory,
     468              :  * rather than just wasting it.  Currently, we just increase the available
     469              :  * space recorded in the shmem header, which will make the extra usable for
     470              :  * purposes such as additional locktable entries.  Someday, for very large
     471              :  * hugepage sizes, we might want to think about more invasive strategies,
     472              :  * such as increasing shared_buffers to absorb the extra space.
     473              :  *
     474              :  * Returns the (real, assumed or config provided) page size into
     475              :  * *hugepagesize, and the hugepage-related mmap flags to use into
     476              :  * *mmap_flags if requested by the caller.  If huge pages are not supported,
     477              :  * *hugepagesize and *mmap_flags are set to 0.
     478              :  */
     479              : void
     480         2147 : GetHugePageSize(Size *hugepagesize, int *mmap_flags)
     481              : {
     482              : #ifdef MAP_HUGETLB
     483              : 
     484         2147 :     Size        default_hugepagesize = 0;
     485         2147 :     Size        hugepagesize_local = 0;
     486         2147 :     int         mmap_flags_local = 0;
     487              : 
     488              :     /*
     489              :      * System-dependent code to find out the default huge page size.
     490              :      *
     491              :      * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
     492              :      * nnnn kB".  Ignore any failures, falling back to the preset default.
     493              :      */
     494              : #ifdef __linux__
     495              : 
     496              :     {
     497         2147 :         FILE       *fp = AllocateFile("/proc/meminfo", "r");
     498              :         char        buf[128];
     499              :         unsigned int sz;
     500              :         char        ch;
     501              : 
     502         2147 :         if (fp)
     503              :         {
     504       109497 :             while (fgets(buf, sizeof(buf), fp))
     505              :             {
     506       109497 :                 if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
     507              :                 {
     508         2147 :                     if (ch == 'k')
     509              :                     {
     510         2147 :                         default_hugepagesize = sz * (Size) 1024;
     511         2147 :                         break;
     512              :                     }
     513              :                     /* We could accept other units besides kB, if needed */
     514              :                 }
     515              :             }
     516         2147 :             FreeFile(fp);
     517              :         }
     518              :     }
     519              : #endif                          /* __linux__ */
     520              : 
     521         2147 :     if (huge_page_size != 0)
     522              :     {
     523              :         /* If huge page size is requested explicitly, use that. */
     524            0 :         hugepagesize_local = (Size) huge_page_size * 1024;
     525              :     }
     526         2147 :     else if (default_hugepagesize != 0)
     527              :     {
     528              :         /* Otherwise use the system default, if we have it. */
     529         2147 :         hugepagesize_local = default_hugepagesize;
     530              :     }
     531              :     else
     532              :     {
     533              :         /*
     534              :          * If we fail to find out the system's default huge page size, or no
     535              :          * huge page size is requested explicitly, assume it is 2MB. This will
     536              :          * work fine when the actual size is less.  If it's more, we might get
     537              :          * mmap() or munmap() failures due to unaligned requests; but at this
     538              :          * writing, there are no reports of any non-Linux systems being picky
     539              :          * about that.
     540              :          */
     541            0 :         hugepagesize_local = 2 * 1024 * 1024;
     542              :     }
     543              : 
     544         2147 :     mmap_flags_local = MAP_HUGETLB;
     545              : 
     546              :     /*
     547              :      * On recent enough Linux, also include the explicit page size, if
     548              :      * necessary.
     549              :      */
     550              : #if defined(MAP_HUGE_MASK) && defined(MAP_HUGE_SHIFT)
     551         2147 :     if (hugepagesize_local != default_hugepagesize)
     552              :     {
     553            0 :         int         shift = pg_ceil_log2_64(hugepagesize_local);
     554              : 
     555            0 :         mmap_flags_local |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
     556              :     }
     557              : #endif
     558              : 
     559              :     /* assign the results found */
     560         2147 :     if (mmap_flags)
     561         1152 :         *mmap_flags = mmap_flags_local;
     562         2147 :     if (hugepagesize)
     563         2147 :         *hugepagesize = hugepagesize_local;
     564              : 
     565              : #else
     566              : 
     567              :     if (hugepagesize)
     568              :         *hugepagesize = 0;
     569              :     if (mmap_flags)
     570              :         *mmap_flags = 0;
     571              : 
     572              : #endif                          /* MAP_HUGETLB */
     573         2147 : }
     574              : 
     575              : /*
     576              :  * GUC check_hook for huge_page_size
     577              :  */
     578              : bool
     579         1187 : check_huge_page_size(int *newval, void **extra, GucSource source)
     580              : {
     581              : #if !(defined(MAP_HUGE_MASK) && defined(MAP_HUGE_SHIFT))
     582              :     /* Recent enough Linux only, for now.  See GetHugePageSize(). */
     583              :     if (*newval != 0)
     584              :     {
     585              :         GUC_check_errdetail("\"huge_page_size\" must be 0 on this platform.");
     586              :         return false;
     587              :     }
     588              : #endif
     589         1187 :     return true;
     590              : }
     591              : 
     592              : /*
     593              :  * Creates an anonymous mmap()ed shared memory segment.
     594              :  *
     595              :  * Pass the requested size in *size.  This function will modify *size to the
     596              :  * actual size of the allocation, if it ends up allocating a segment that is
     597              :  * larger than requested.
     598              :  */
     599              : static void *
     600         1152 : CreateAnonymousSegment(Size *size)
     601              : {
     602         1152 :     Size        allocsize = *size;
     603         1152 :     void       *ptr = MAP_FAILED;
     604         1152 :     int         mmap_errno = 0;
     605         1152 :     int         mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_HASSEMAPHORE;
     606              : 
     607              : #ifndef MAP_HUGETLB
     608              :     /* PGSharedMemoryCreate should have dealt with this case */
     609              :     Assert(huge_pages != HUGE_PAGES_ON);
     610              : #else
     611         1152 :     if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
     612              :     {
     613              :         /*
     614              :          * Round up the request size to a suitable large value.
     615              :          */
     616              :         Size        hugepagesize;
     617              :         int         huge_mmap_flags;
     618              : 
     619         1152 :         GetHugePageSize(&hugepagesize, &huge_mmap_flags);
     620              : 
     621         1152 :         if (allocsize % hugepagesize != 0)
     622         1152 :             allocsize = add_size(allocsize, hugepagesize - (allocsize % hugepagesize));
     623              : 
     624         1152 :         ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
     625              :                    mmap_flags | huge_mmap_flags, -1, 0);
     626         1152 :         mmap_errno = errno;
     627         1152 :         if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
     628         1152 :             elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
     629              :                  allocsize);
     630              :     }
     631              : #endif
     632              : 
     633              :     /*
     634              :      * Report whether huge pages are in use.  This needs to be tracked before
     635              :      * the second mmap() call if attempting to use huge pages failed
     636              :      * previously.
     637              :      */
     638         1152 :     SetConfigOption("huge_pages_status", (ptr == MAP_FAILED) ? "off" : "on",
     639              :                     PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
     640              : 
     641         1152 :     if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
     642              :     {
     643              :         /*
     644              :          * Use the original size, not the rounded-up value, when falling back
     645              :          * to non-huge pages.
     646              :          */
     647         1152 :         allocsize = *size;
     648         1152 :         ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
     649              :                    mmap_flags, -1, 0);
     650         1152 :         mmap_errno = errno;
     651              :     }
     652              : 
     653         1152 :     if (ptr == MAP_FAILED)
     654              :     {
     655            0 :         errno = mmap_errno;
     656            0 :         ereport(FATAL,
     657              :                 (errmsg("could not map anonymous shared memory: %m"),
     658              :                  (mmap_errno == ENOMEM) ?
     659              :                  errhint("This error usually means that PostgreSQL's request "
     660              :                          "for a shared memory segment exceeded available memory, "
     661              :                          "swap space, or huge pages. To reduce the request size "
     662              :                          "(currently %zu bytes), reduce PostgreSQL's shared "
     663              :                          "memory usage, perhaps by reducing \"shared_buffers\" or "
     664              :                          "\"max_connections\".",
     665              :                          allocsize) : 0));
     666              :     }
     667              : 
     668         1152 :     *size = allocsize;
     669         1152 :     return ptr;
     670              : }
     671              : 
     672              : /*
     673              :  * AnonymousShmemDetach --- detach from an anonymous mmap'd block
     674              :  * (called as an on_shmem_exit callback, hence funny argument list)
     675              :  */
     676              : static void
     677         1152 : AnonymousShmemDetach(int status, Datum arg)
     678              : {
     679              :     /* Release anonymous shared memory block, if any. */
     680         1152 :     if (AnonymousShmem != NULL)
     681              :     {
     682         1152 :         if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
     683            0 :             elog(LOG, "munmap(%p, %zu) failed: %m",
     684              :                  AnonymousShmem, AnonymousShmemSize);
     685         1152 :         AnonymousShmem = NULL;
     686              :     }
     687         1152 : }
     688              : 
     689              : /*
     690              :  * PGSharedMemoryCreate
     691              :  *
     692              :  * Create a shared memory segment of the given size and initialize its
     693              :  * standard header.  Also, register an on_shmem_exit callback to release
     694              :  * the storage.
     695              :  *
     696              :  * Dead Postgres segments pertinent to this DataDir are recycled if found, but
     697              :  * we do not fail upon collision with foreign shmem segments.  The idea here
     698              :  * is to detect and re-use keys that may have been assigned by a crashed
     699              :  * postmaster or backend.
     700              :  */
     701              : PGShmemHeader *
     702         1152 : PGSharedMemoryCreate(Size size,
     703              :                      PGShmemHeader **shim)
     704              : {
     705              :     IpcMemoryKey NextShmemSegID;
     706              :     void       *memAddress;
     707              :     PGShmemHeader *hdr;
     708              :     struct stat statbuf;
     709              :     Size        sysvsize;
     710              : 
     711              :     /*
     712              :      * We use the data directory's ID info (inode and device numbers) to
     713              :      * positively identify shmem segments associated with this data dir, and
     714              :      * also as seeds for searching for a free shmem key.
     715              :      */
     716         1152 :     if (stat(DataDir, &statbuf) < 0)
     717            0 :         ereport(FATAL,
     718              :                 (errcode_for_file_access(),
     719              :                  errmsg("could not stat data directory \"%s\": %m",
     720              :                         DataDir)));
     721              : 
     722              :     /* Complain if hugepages demanded but we can't possibly support them */
     723              : #if !defined(MAP_HUGETLB)
     724              :     if (huge_pages == HUGE_PAGES_ON)
     725              :         ereport(ERROR,
     726              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     727              :                  errmsg("huge pages not supported on this platform")));
     728              : #endif
     729              : 
     730              :     /* For now, we don't support huge pages in SysV memory */
     731         1152 :     if (huge_pages == HUGE_PAGES_ON && shared_memory_type != SHMEM_TYPE_MMAP)
     732            0 :         ereport(ERROR,
     733              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     734              :                  errmsg("huge pages not supported with the current \"shared_memory_type\" setting")));
     735              : 
     736              :     /* Room for a header? */
     737              :     Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
     738              : 
     739         1152 :     if (shared_memory_type == SHMEM_TYPE_MMAP)
     740              :     {
     741         1152 :         AnonymousShmem = CreateAnonymousSegment(&size);
     742         1152 :         AnonymousShmemSize = size;
     743              : 
     744              :         /* Register on-exit routine to unmap the anonymous segment */
     745         1152 :         on_shmem_exit(AnonymousShmemDetach, (Datum) 0);
     746              : 
     747              :         /* Now we need only allocate a minimal-sized SysV shmem block. */
     748         1152 :         sysvsize = sizeof(PGShmemHeader);
     749              :     }
     750              :     else
     751              :     {
     752            0 :         sysvsize = size;
     753              : 
     754              :         /* huge pages are only available with mmap */
     755            0 :         SetConfigOption("huge_pages_status", "off",
     756              :                         PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
     757              :     }
     758              : 
     759              :     /*
     760              :      * Loop till we find a free IPC key.  Trust CreateDataDirLockFile() to
     761              :      * ensure no more than one postmaster per data directory can enter this
     762              :      * loop simultaneously.  (CreateDataDirLockFile() does not entirely ensure
     763              :      * that, but prefer fixing it over coping here.)
     764              :      */
     765         1152 :     NextShmemSegID = statbuf.st_ino;
     766              : 
     767              :     for (;;)
     768            4 :     {
     769              :         IpcMemoryId shmid;
     770              :         PGShmemHeader *oldhdr;
     771              :         IpcMemoryState state;
     772              : 
     773              :         /* Try to create new segment */
     774         1156 :         memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
     775         1156 :         if (memAddress)
     776         1150 :             break;              /* successful create and attach */
     777              : 
     778              :         /* Check shared memory and possibly remove and recreate */
     779              : 
     780              :         /*
     781              :          * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
     782              :          * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
     783              :          * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
     784              :          */
     785            6 :         shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
     786            6 :         if (shmid < 0)
     787              :         {
     788            0 :             oldhdr = NULL;
     789            0 :             state = SHMSTATE_FOREIGN;
     790              :         }
     791              :         else
     792            6 :             state = PGSharedMemoryAttach(shmid, NULL, &oldhdr);
     793              : 
     794            6 :         switch (state)
     795              :         {
     796            2 :             case SHMSTATE_ANALYSIS_FAILURE:
     797              :             case SHMSTATE_ATTACHED:
     798            2 :                 ereport(FATAL,
     799              :                         (errcode(ERRCODE_LOCK_FILE_EXISTS),
     800              :                          errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
     801              :                                 (unsigned long) NextShmemSegID,
     802              :                                 (unsigned long) shmid),
     803              :                          errhint("Terminate any old server processes associated with data directory \"%s\".",
     804              :                                  DataDir)));
     805              :                 break;
     806            0 :             case SHMSTATE_ENOENT:
     807              : 
     808              :                 /*
     809              :                  * To our surprise, some other process deleted since our last
     810              :                  * InternalIpcMemoryCreate().  Moments earlier, we would have
     811              :                  * seen SHMSTATE_FOREIGN.  Try that same ID again.
     812              :                  */
     813            0 :                 elog(LOG,
     814              :                      "shared memory block (key %lu, ID %lu) deleted during startup",
     815              :                      (unsigned long) NextShmemSegID,
     816              :                      (unsigned long) shmid);
     817            0 :                 break;
     818            2 :             case SHMSTATE_FOREIGN:
     819            2 :                 NextShmemSegID++;
     820            2 :                 break;
     821            2 :             case SHMSTATE_UNATTACHED:
     822              : 
     823              :                 /*
     824              :                  * The segment pertains to DataDir, and every process that had
     825              :                  * used it has died or detached.  Zap it, if possible, and any
     826              :                  * associated dynamic shared memory segments, as well.  This
     827              :                  * shouldn't fail, but if it does, assume the segment belongs
     828              :                  * to someone else after all, and try the next candidate.
     829              :                  * Otherwise, try again to create the segment.  That may fail
     830              :                  * if some other process creates the same shmem key before we
     831              :                  * do, in which case we'll try the next key.
     832              :                  */
     833            2 :                 if (oldhdr->dsm_control != 0)
     834            2 :                     dsm_cleanup_using_control_segment(oldhdr->dsm_control);
     835            2 :                 if (shmctl(shmid, IPC_RMID, NULL) < 0)
     836            0 :                     NextShmemSegID++;
     837            2 :                 break;
     838              :         }
     839              : 
     840            4 :         if (oldhdr && shmdt(oldhdr) < 0)
     841            0 :             elog(LOG, "shmdt(%p) failed: %m", oldhdr);
     842              :     }
     843              : 
     844              :     /* Initialize new segment. */
     845         1150 :     hdr = (PGShmemHeader *) memAddress;
     846         1150 :     hdr->creatorPID = getpid();
     847         1150 :     hdr->magic = PGShmemMagic;
     848         1150 :     hdr->dsm_control = 0;
     849              : 
     850              :     /* Fill in the data directory ID info, too */
     851         1150 :     hdr->device = statbuf.st_dev;
     852         1150 :     hdr->inode = statbuf.st_ino;
     853              : 
     854              :     /*
     855              :      * Initialize space allocation status for segment.
     856              :      */
     857         1150 :     hdr->totalsize = size;
     858         1150 :     hdr->content_offset = MAXALIGN(sizeof(PGShmemHeader));
     859         1150 :     *shim = hdr;
     860              : 
     861              :     /* Save info for possible future use */
     862         1150 :     UsedShmemSegAddr = memAddress;
     863         1150 :     UsedShmemSegID = (unsigned long) NextShmemSegID;
     864              : 
     865              :     /*
     866              :      * If AnonymousShmem is NULL here, then we're not using anonymous shared
     867              :      * memory, and should return a pointer to the System V shared memory
     868              :      * block. Otherwise, the System V shared memory block is only a shim, and
     869              :      * we must return a pointer to the real block.
     870              :      */
     871         1150 :     if (AnonymousShmem == NULL)
     872            0 :         return hdr;
     873         1150 :     memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
     874         1150 :     return (PGShmemHeader *) AnonymousShmem;
     875              : }
     876              : 
     877              : #ifdef EXEC_BACKEND
     878              : 
     879              : /*
     880              :  * PGSharedMemoryReAttach
     881              :  *
     882              :  * This is called during startup of a postmaster child process to re-attach to
     883              :  * an already existing shared memory segment.  This is needed only in the
     884              :  * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
     885              :  * segment attachment via fork().
     886              :  *
     887              :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     888              :  * routine.  The caller must have already restored them to the postmaster's
     889              :  * values.
     890              :  */
     891              : void
     892              : PGSharedMemoryReAttach(void)
     893              : {
     894              :     IpcMemoryId shmid;
     895              :     PGShmemHeader *hdr;
     896              :     IpcMemoryState state;
     897              :     void       *origUsedShmemSegAddr = UsedShmemSegAddr;
     898              : 
     899              :     Assert(UsedShmemSegAddr != NULL);
     900              :     Assert(IsUnderPostmaster);
     901              : 
     902              : #ifdef __CYGWIN__
     903              :     /* cygipc (currently) appears to not detach on exec. */
     904              :     PGSharedMemoryDetach();
     905              :     UsedShmemSegAddr = origUsedShmemSegAddr;
     906              : #endif
     907              : 
     908              :     elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
     909              :     shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
     910              :     if (shmid < 0)
     911              :         state = SHMSTATE_FOREIGN;
     912              :     else
     913              :         state = PGSharedMemoryAttach(shmid, UsedShmemSegAddr, &hdr);
     914              :     if (state != SHMSTATE_ATTACHED)
     915              :         elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
     916              :              (int) UsedShmemSegID, UsedShmemSegAddr);
     917              :     if (hdr != origUsedShmemSegAddr)
     918              :         elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
     919              :              hdr, origUsedShmemSegAddr);
     920              :     dsm_set_control_handle(hdr->dsm_control);
     921              : 
     922              :     UsedShmemSegAddr = hdr;     /* probably redundant */
     923              : }
     924              : 
     925              : /*
     926              :  * PGSharedMemoryNoReAttach
     927              :  *
     928              :  * This is called during startup of a postmaster child process when we choose
     929              :  * *not* to re-attach to the existing shared memory segment.  We must clean up
     930              :  * to leave things in the appropriate state.  This is not used in the non
     931              :  * EXEC_BACKEND case, either.
     932              :  *
     933              :  * The child process startup logic might or might not call PGSharedMemoryDetach
     934              :  * after this; make sure that it will be a no-op if called.
     935              :  *
     936              :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     937              :  * routine.  The caller must have already restored them to the postmaster's
     938              :  * values.
     939              :  */
     940              : void
     941              : PGSharedMemoryNoReAttach(void)
     942              : {
     943              :     Assert(UsedShmemSegAddr != NULL);
     944              :     Assert(IsUnderPostmaster);
     945              : 
     946              : #ifdef __CYGWIN__
     947              :     /* cygipc (currently) appears to not detach on exec. */
     948              :     PGSharedMemoryDetach();
     949              : #endif
     950              : 
     951              :     /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
     952              :     UsedShmemSegAddr = NULL;
     953              :     /* And the same for UsedShmemSegID. */
     954              :     UsedShmemSegID = 0;
     955              : }
     956              : 
     957              : #endif                          /* EXEC_BACKEND */
     958              : 
     959              : /*
     960              :  * PGSharedMemoryDetach
     961              :  *
     962              :  * Detach from the shared memory segment, if still attached.  This is not
     963              :  * intended to be called explicitly by the process that originally created the
     964              :  * segment (it will have on_shmem_exit callback(s) registered to do that).
     965              :  * Rather, this is for subprocesses that have inherited an attachment and want
     966              :  * to get rid of it.
     967              :  *
     968              :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     969              :  * routine, also AnonymousShmem and AnonymousShmemSize.
     970              :  */
     971              : void
     972            1 : PGSharedMemoryDetach(void)
     973              : {
     974            1 :     if (UsedShmemSegAddr != NULL)
     975              :     {
     976            1 :         if ((shmdt(UsedShmemSegAddr) < 0)
     977              : #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
     978              :         /* Work-around for cygipc exec bug */
     979              :             && shmdt(NULL) < 0
     980              : #endif
     981              :             )
     982            0 :             elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
     983            1 :         UsedShmemSegAddr = NULL;
     984              :     }
     985              : 
     986            1 :     if (AnonymousShmem != NULL)
     987              :     {
     988            1 :         if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
     989            0 :             elog(LOG, "munmap(%p, %zu) failed: %m",
     990              :                  AnonymousShmem, AnonymousShmemSize);
     991            1 :         AnonymousShmem = NULL;
     992              :     }
     993            1 : }
        

Generated by: LCOV version 2.0-1