LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - shm_mq.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 321 379 84.7 %
Date: 2021-12-04 22:09:09 Functions: 20 21 95.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * shm_mq.c
       4             :  *    single-reader, single-writer shared memory message queue
       5             :  *
       6             :  * Both the sender and the receiver must have a PGPROC; their respective
       7             :  * process latches are used for synchronization.  Only the sender may send,
       8             :  * and only the receiver may receive.  This is intended to allow a user
       9             :  * backend to communicate with worker backends that it has registered.
      10             :  *
      11             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
      12             :  * Portions Copyright (c) 1994, Regents of the University of California
      13             :  *
      14             :  * src/backend/storage/ipc/shm_mq.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : 
      19             : #include "postgres.h"
      20             : 
      21             : #include "miscadmin.h"
      22             : #include "pgstat.h"
      23             : #include "port/pg_bitutils.h"
      24             : #include "postmaster/bgworker.h"
      25             : #include "storage/procsignal.h"
      26             : #include "storage/shm_mq.h"
      27             : #include "storage/spin.h"
      28             : #include "utils/memutils.h"
      29             : 
      30             : /*
      31             :  * This structure represents the actual queue, stored in shared memory.
      32             :  *
      33             :  * Some notes on synchronization:
      34             :  *
      35             :  * mq_receiver and mq_bytes_read can only be changed by the receiver; and
      36             :  * mq_sender and mq_bytes_written can only be changed by the sender.
      37             :  * mq_receiver and mq_sender are protected by mq_mutex, although, importantly,
      38             :  * they cannot change once set, and thus may be read without a lock once this
      39             :  * is known to be the case.
      40             :  *
      41             :  * mq_bytes_read and mq_bytes_written are not protected by the mutex.  Instead,
      42             :  * they are written atomically using 8 byte loads and stores.  Memory barriers
      43             :  * must be carefully used to synchronize reads and writes of these values with
      44             :  * reads and writes of the actual data in mq_ring.
      45             :  *
      46             :  * mq_detached needs no locking.  It can be set by either the sender or the
      47             :  * receiver, but only ever from false to true, so redundant writes don't
      48             :  * matter.  It is important that if we set mq_detached and then set the
      49             :  * counterparty's latch, the counterparty must be certain to see the change
      50             :  * after waking up.  Since SetLatch begins with a memory barrier and ResetLatch
      51             :  * ends with one, this should be OK.
      52             :  *
      53             :  * mq_ring_size and mq_ring_offset never change after initialization, and
      54             :  * can therefore be read without the lock.
      55             :  *
      56             :  * Importantly, mq_ring can be safely read and written without a lock.
      57             :  * At any given time, the difference between mq_bytes_read and
      58             :  * mq_bytes_written defines the number of bytes within mq_ring that contain
      59             :  * unread data, and mq_bytes_read defines the position where those bytes
      60             :  * begin.  The sender can increase the number of unread bytes at any time,
      61             :  * but only the receiver can give license to overwrite those bytes, by
      62             :  * incrementing mq_bytes_read.  Therefore, it's safe for the receiver to read
      63             :  * the unread bytes it knows to be present without the lock.  Conversely,
      64             :  * the sender can write to the unused portion of the ring buffer without
      65             :  * the lock, because nobody else can be reading or writing those bytes.  The
      66             :  * receiver could be making more bytes unused by incrementing mq_bytes_read,
      67             :  * but that's OK.  Note that it would be unsafe for the receiver to read any
      68             :  * data it's already marked as read, or to write any data; and it would be
      69             :  * unsafe for the sender to reread any data after incrementing
      70             :  * mq_bytes_written, but fortunately there's no need for any of that.
      71             :  */
      72             : struct shm_mq
      73             : {
      74             :     slock_t     mq_mutex;
      75             :     PGPROC     *mq_receiver;
      76             :     PGPROC     *mq_sender;
      77             :     pg_atomic_uint64 mq_bytes_read;
      78             :     pg_atomic_uint64 mq_bytes_written;
      79             :     Size        mq_ring_size;
      80             :     bool        mq_detached;
      81             :     uint8       mq_ring_offset;
      82             :     char        mq_ring[FLEXIBLE_ARRAY_MEMBER];
      83             : };
      84             : 
      85             : /*
      86             :  * This structure is a backend-private handle for access to a queue.
      87             :  *
      88             :  * mqh_queue is a pointer to the queue we've attached, and mqh_segment is
      89             :  * an optional pointer to the dynamic shared memory segment that contains it.
      90             :  * (If mqh_segment is provided, we register an on_dsm_detach callback to
      91             :  * make sure we detach from the queue before detaching from DSM.)
      92             :  *
      93             :  * If this queue is intended to connect the current process with a background
      94             :  * worker that started it, the user can pass a pointer to the worker handle
      95             :  * to shm_mq_attach(), and we'll store it in mqh_handle.  The point of this
      96             :  * is to allow us to begin sending to or receiving from that queue before the
      97             :  * process we'll be communicating with has even been started.  If it fails
      98             :  * to start, the handle will allow us to notice that and fail cleanly, rather
      99             :  * than waiting forever; see shm_mq_wait_internal.  This is mostly useful in
     100             :  * simple cases - e.g. where there are just 2 processes communicating; in
     101             :  * more complex scenarios, every process may not have a BackgroundWorkerHandle
     102             :  * available, or may need to watch for the failure of more than one other
     103             :  * process at a time.
     104             :  *
     105             :  * When a message exists as a contiguous chunk of bytes in the queue - that is,
     106             :  * it is smaller than the size of the ring buffer and does not wrap around
     107             :  * the end - we return the message to the caller as a pointer into the buffer.
     108             :  * For messages that are larger or happen to wrap, we reassemble the message
     109             :  * locally by copying the chunks into a backend-local buffer.  mqh_buffer is
     110             :  * the buffer, and mqh_buflen is the number of bytes allocated for it.
     111             :  *
     112             :  * mqh_send_pending, is number of bytes that is written to the queue but not
     113             :  * yet updated in the shared memory.  We will not update it until the written
     114             :  * data is 1/4th of the ring size or the tuple queue is full.  This will
     115             :  * prevent frequent CPU cache misses, and it will also avoid frequent
     116             :  * SetLatch() calls, which are quite expensive.
     117             :  *
     118             :  * mqh_partial_bytes, mqh_expected_bytes, and mqh_length_word_complete
     119             :  * are used to track the state of non-blocking operations.  When the caller
     120             :  * attempts a non-blocking operation that returns SHM_MQ_WOULD_BLOCK, they
     121             :  * are expected to retry the call at a later time with the same argument;
     122             :  * we need to retain enough state to pick up where we left off.
     123             :  * mqh_length_word_complete tracks whether we are done sending or receiving
     124             :  * (whichever we're doing) the entire length word.  mqh_partial_bytes tracks
     125             :  * the number of bytes read or written for either the length word or the
     126             :  * message itself, and mqh_expected_bytes - which is used only for reads -
     127             :  * tracks the expected total size of the payload.
     128             :  *
     129             :  * mqh_counterparty_attached tracks whether we know the counterparty to have
     130             :  * attached to the queue at some previous point.  This lets us avoid some
     131             :  * mutex acquisitions.
     132             :  *
     133             :  * mqh_context is the memory context in effect at the time we attached to
     134             :  * the shm_mq.  The shm_mq_handle itself is allocated in this context, and
     135             :  * we make sure any other allocations we do happen in this context as well,
     136             :  * to avoid nasty surprises.
     137             :  */
     138             : struct shm_mq_handle
     139             : {
     140             :     shm_mq     *mqh_queue;
     141             :     dsm_segment *mqh_segment;
     142             :     BackgroundWorkerHandle *mqh_handle;
     143             :     char       *mqh_buffer;
     144             :     Size        mqh_buflen;
     145             :     Size        mqh_consume_pending;
     146             :     Size        mqh_send_pending;
     147             :     Size        mqh_partial_bytes;
     148             :     Size        mqh_expected_bytes;
     149             :     bool        mqh_length_word_complete;
     150             :     bool        mqh_counterparty_attached;
     151             :     MemoryContext mqh_context;
     152             : };
     153             : 
     154             : static void shm_mq_detach_internal(shm_mq *mq);
     155             : static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes,
     156             :                                        const void *data, bool nowait, Size *bytes_written);
     157             : static shm_mq_result shm_mq_receive_bytes(shm_mq_handle *mqh,
     158             :                                           Size bytes_needed, bool nowait, Size *nbytesp,
     159             :                                           void **datap);
     160             : static bool shm_mq_counterparty_gone(shm_mq *mq,
     161             :                                      BackgroundWorkerHandle *handle);
     162             : static bool shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr,
     163             :                                  BackgroundWorkerHandle *handle);
     164             : static void shm_mq_inc_bytes_read(shm_mq *mq, Size n);
     165             : static void shm_mq_inc_bytes_written(shm_mq *mq, Size n);
     166             : static void shm_mq_detach_callback(dsm_segment *seg, Datum arg);
     167             : 
     168             : /* Minimum queue size is enough for header and at least one chunk of data. */
     169             : const Size  shm_mq_minimum_size =
     170             : MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
     171             : 
     172             : #define MQH_INITIAL_BUFSIZE             8192
     173             : 
     174             : /*
     175             :  * Initialize a new shared message queue.
     176             :  */
     177             : shm_mq *
     178        3388 : shm_mq_create(void *address, Size size)
     179             : {
     180        3388 :     shm_mq     *mq = address;
     181        3388 :     Size        data_offset = MAXALIGN(offsetof(shm_mq, mq_ring));
     182             : 
     183             :     /* If the size isn't MAXALIGN'd, just discard the odd bytes. */
     184        3388 :     size = MAXALIGN_DOWN(size);
     185             : 
     186             :     /* Queue size must be large enough to hold some data. */
     187             :     Assert(size > data_offset);
     188             : 
     189             :     /* Initialize queue header. */
     190        3388 :     SpinLockInit(&mq->mq_mutex);
     191        3388 :     mq->mq_receiver = NULL;
     192        3388 :     mq->mq_sender = NULL;
     193        3388 :     pg_atomic_init_u64(&mq->mq_bytes_read, 0);
     194        3388 :     pg_atomic_init_u64(&mq->mq_bytes_written, 0);
     195        3388 :     mq->mq_ring_size = size - data_offset;
     196        3388 :     mq->mq_detached = false;
     197        3388 :     mq->mq_ring_offset = data_offset - offsetof(shm_mq, mq_ring);
     198             : 
     199        3388 :     return mq;
     200             : }
     201             : 
     202             : /*
     203             :  * Set the identity of the process that will receive from a shared message
     204             :  * queue.
     205             :  */
     206             : void
     207        3388 : shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
     208             : {
     209             :     PGPROC     *sender;
     210             : 
     211        3388 :     SpinLockAcquire(&mq->mq_mutex);
     212             :     Assert(mq->mq_receiver == NULL);
     213        3388 :     mq->mq_receiver = proc;
     214        3388 :     sender = mq->mq_sender;
     215        3388 :     SpinLockRelease(&mq->mq_mutex);
     216             : 
     217        3388 :     if (sender != NULL)
     218          14 :         SetLatch(&sender->procLatch);
     219        3388 : }
     220             : 
     221             : /*
     222             :  * Set the identity of the process that will send to a shared message queue.
     223             :  */
     224             : void
     225        3286 : shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
     226             : {
     227             :     PGPROC     *receiver;
     228             : 
     229        3286 :     SpinLockAcquire(&mq->mq_mutex);
     230             :     Assert(mq->mq_sender == NULL);
     231        3286 :     mq->mq_sender = proc;
     232        3286 :     receiver = mq->mq_receiver;
     233        3286 :     SpinLockRelease(&mq->mq_mutex);
     234             : 
     235        3286 :     if (receiver != NULL)
     236        3272 :         SetLatch(&receiver->procLatch);
     237        3286 : }
     238             : 
     239             : /*
     240             :  * Get the configured receiver.
     241             :  */
     242             : PGPROC *
     243           2 : shm_mq_get_receiver(shm_mq *mq)
     244             : {
     245             :     PGPROC     *receiver;
     246             : 
     247           2 :     SpinLockAcquire(&mq->mq_mutex);
     248           2 :     receiver = mq->mq_receiver;
     249           2 :     SpinLockRelease(&mq->mq_mutex);
     250             : 
     251           2 :     return receiver;
     252             : }
     253             : 
     254             : /*
     255             :  * Get the configured sender.
     256             :  */
     257             : PGPROC *
     258    14694066 : shm_mq_get_sender(shm_mq *mq)
     259             : {
     260             :     PGPROC     *sender;
     261             : 
     262    14694066 :     SpinLockAcquire(&mq->mq_mutex);
     263    14694066 :     sender = mq->mq_sender;
     264    14694066 :     SpinLockRelease(&mq->mq_mutex);
     265             : 
     266    14694066 :     return sender;
     267             : }
     268             : 
     269             : /*
     270             :  * Attach to a shared message queue so we can send or receive messages.
     271             :  *
     272             :  * The memory context in effect at the time this function is called should
     273             :  * be one which will last for at least as long as the message queue itself.
     274             :  * We'll allocate the handle in that context, and future allocations that
     275             :  * are needed to buffer incoming data will happen in that context as well.
     276             :  *
     277             :  * If seg != NULL, the queue will be automatically detached when that dynamic
     278             :  * shared memory segment is detached.
     279             :  *
     280             :  * If handle != NULL, the queue can be read or written even before the
     281             :  * other process has attached.  We'll wait for it to do so if needed.  The
     282             :  * handle must be for a background worker initialized with bgw_notify_pid
     283             :  * equal to our PID.
     284             :  *
     285             :  * shm_mq_detach() should be called when done.  This will free the
     286             :  * shm_mq_handle and mark the queue itself as detached, so that our
     287             :  * counterpart won't get stuck waiting for us to fill or drain the queue
     288             :  * after we've already lost interest.
     289             :  */
     290             : shm_mq_handle *
     291        6674 : shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
     292             : {
     293        6674 :     shm_mq_handle *mqh = palloc(sizeof(shm_mq_handle));
     294             : 
     295             :     Assert(mq->mq_receiver == MyProc || mq->mq_sender == MyProc);
     296        6674 :     mqh->mqh_queue = mq;
     297        6674 :     mqh->mqh_segment = seg;
     298        6674 :     mqh->mqh_handle = handle;
     299        6674 :     mqh->mqh_buffer = NULL;
     300        6674 :     mqh->mqh_buflen = 0;
     301        6674 :     mqh->mqh_consume_pending = 0;
     302        6674 :     mqh->mqh_send_pending = 0;
     303        6674 :     mqh->mqh_partial_bytes = 0;
     304        6674 :     mqh->mqh_expected_bytes = 0;
     305        6674 :     mqh->mqh_length_word_complete = false;
     306        6674 :     mqh->mqh_counterparty_attached = false;
     307        6674 :     mqh->mqh_context = CurrentMemoryContext;
     308             : 
     309        6674 :     if (seg != NULL)
     310        6674 :         on_dsm_detach(seg, shm_mq_detach_callback, PointerGetDatum(mq));
     311             : 
     312        6674 :     return mqh;
     313             : }
     314             : 
     315             : /*
     316             :  * Associate a BackgroundWorkerHandle with a shm_mq_handle just as if it had
     317             :  * been passed to shm_mq_attach.
     318             :  */
     319             : void
     320        3262 : shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
     321             : {
     322             :     Assert(mqh->mqh_handle == NULL);
     323        3262 :     mqh->mqh_handle = handle;
     324        3262 : }
     325             : 
     326             : /*
     327             :  * Write a message into a shared message queue.
     328             :  */
     329             : shm_mq_result
     330     1068918 : shm_mq_send(shm_mq_handle *mqh, Size nbytes, const void *data, bool nowait,
     331             :             bool force_flush)
     332             : {
     333             :     shm_mq_iovec iov;
     334             : 
     335     1068918 :     iov.data = data;
     336     1068918 :     iov.len = nbytes;
     337             : 
     338     1068918 :     return shm_mq_sendv(mqh, &iov, 1, nowait, force_flush);
     339             : }
     340             : 
     341             : /*
     342             :  * Write a message into a shared message queue, gathered from multiple
     343             :  * addresses.
     344             :  *
     345             :  * When nowait = false, we'll wait on our process latch when the ring buffer
     346             :  * fills up, and then continue writing once the receiver has drained some data.
     347             :  * The process latch is reset after each wait.
     348             :  *
     349             :  * When nowait = true, we do not manipulate the state of the process latch;
     350             :  * instead, if the buffer becomes full, we return SHM_MQ_WOULD_BLOCK.  In
     351             :  * this case, the caller should call this function again, with the same
     352             :  * arguments, each time the process latch is set.  (Once begun, the sending
     353             :  * of a message cannot be aborted except by detaching from the queue; changing
     354             :  * the length or payload will corrupt the queue.)
     355             :  *
     356             :  * When force_flush = true, we immediately update the shm_mq's mq_bytes_written
     357             :  * and notify the receiver (if it is already attached).  Otherwise, we don't
     358             :  * update it until we have written an amount of data greater than 1/4th of the
     359             :  * ring size.
     360             :  */
     361             : shm_mq_result
     362     1072302 : shm_mq_sendv(shm_mq_handle *mqh, shm_mq_iovec *iov, int iovcnt, bool nowait,
     363             :              bool force_flush)
     364             : {
     365             :     shm_mq_result res;
     366     1072302 :     shm_mq     *mq = mqh->mqh_queue;
     367             :     PGPROC     *receiver;
     368     1072302 :     Size        nbytes = 0;
     369             :     Size        bytes_written;
     370             :     int         i;
     371     1072302 :     int         which_iov = 0;
     372             :     Size        offset;
     373             : 
     374             :     Assert(mq->mq_sender == MyProc);
     375             : 
     376             :     /* Compute total size of write. */
     377     2147988 :     for (i = 0; i < iovcnt; ++i)
     378     1075686 :         nbytes += iov[i].len;
     379             : 
     380             :     /* Prevent writing messages overwhelming the receiver. */
     381     1072302 :     if (nbytes > MaxAllocSize)
     382           0 :         ereport(ERROR,
     383             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     384             :                  errmsg("cannot send a message of size %zu via shared memory queue",
     385             :                         nbytes)));
     386             : 
     387             :     /* Try to write, or finish writing, the length word into the buffer. */
     388     2134018 :     while (!mqh->mqh_length_word_complete)
     389             :     {
     390             :         Assert(mqh->mqh_partial_bytes < sizeof(Size));
     391     1061716 :         res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
     392     1061716 :                                 ((char *) &nbytes) + mqh->mqh_partial_bytes,
     393             :                                 nowait, &bytes_written);
     394             : 
     395     1061716 :         if (res == SHM_MQ_DETACHED)
     396             :         {
     397             :             /* Reset state in case caller tries to send another message. */
     398           0 :             mqh->mqh_partial_bytes = 0;
     399           0 :             mqh->mqh_length_word_complete = false;
     400           0 :             return res;
     401             :         }
     402     1061716 :         mqh->mqh_partial_bytes += bytes_written;
     403             : 
     404     1061716 :         if (mqh->mqh_partial_bytes >= sizeof(Size))
     405             :         {
     406             :             Assert(mqh->mqh_partial_bytes == sizeof(Size));
     407             : 
     408     1061716 :             mqh->mqh_partial_bytes = 0;
     409     1061716 :             mqh->mqh_length_word_complete = true;
     410             :         }
     411             : 
     412     1061716 :         if (res != SHM_MQ_SUCCESS)
     413           0 :             return res;
     414             : 
     415             :         /* Length word can't be split unless bigger than required alignment. */
     416             :         Assert(mqh->mqh_length_word_complete || sizeof(Size) > MAXIMUM_ALIGNOF);
     417             :     }
     418             : 
     419             :     /* Write the actual data bytes into the buffer. */
     420             :     Assert(mqh->mqh_partial_bytes <= nbytes);
     421     1072302 :     offset = mqh->mqh_partial_bytes;
     422             :     do
     423             :     {
     424             :         Size        chunksize;
     425             : 
     426             :         /* Figure out which bytes need to be sent next. */
     427     1074000 :         if (offset >= iov[which_iov].len)
     428             :         {
     429        8004 :             offset -= iov[which_iov].len;
     430        8004 :             ++which_iov;
     431        8004 :             if (which_iov >= iovcnt)
     432        8000 :                 break;
     433           4 :             continue;
     434             :         }
     435             : 
     436             :         /*
     437             :          * We want to avoid copying the data if at all possible, but every
     438             :          * chunk of bytes we write into the queue has to be MAXALIGN'd, except
     439             :          * the last.  Thus, if a chunk other than the last one ends on a
     440             :          * non-MAXALIGN'd boundary, we have to combine the tail end of its
     441             :          * data with data from one or more following chunks until we either
     442             :          * reach the last chunk or accumulate a number of bytes which is
     443             :          * MAXALIGN'd.
     444             :          */
     445     1065996 :         if (which_iov + 1 < iovcnt &&
     446        3380 :             offset + MAXIMUM_ALIGNOF > iov[which_iov].len)
     447             :         {
     448             :             char        tmpbuf[MAXIMUM_ALIGNOF];
     449        3380 :             int         j = 0;
     450             : 
     451             :             for (;;)
     452             :             {
     453       20304 :                 if (offset < iov[which_iov].len)
     454             :                 {
     455       15238 :                     tmpbuf[j] = iov[which_iov].data[offset];
     456       15238 :                     j++;
     457       15238 :                     offset++;
     458       15238 :                     if (j == MAXIMUM_ALIGNOF)
     459        1694 :                         break;
     460             :                 }
     461             :                 else
     462             :                 {
     463        5066 :                     offset -= iov[which_iov].len;
     464        5066 :                     which_iov++;
     465        5066 :                     if (which_iov >= iovcnt)
     466        1686 :                         break;
     467             :                 }
     468             :             }
     469             : 
     470        3380 :             res = shm_mq_send_bytes(mqh, j, tmpbuf, nowait, &bytes_written);
     471             : 
     472        3380 :             if (res == SHM_MQ_DETACHED)
     473             :             {
     474             :                 /* Reset state in case caller tries to send another message. */
     475           0 :                 mqh->mqh_partial_bytes = 0;
     476           0 :                 mqh->mqh_length_word_complete = false;
     477           0 :                 return res;
     478             :             }
     479             : 
     480        3380 :             mqh->mqh_partial_bytes += bytes_written;
     481        3380 :             if (res != SHM_MQ_SUCCESS)
     482           0 :                 return res;
     483        3380 :             continue;
     484             :         }
     485             : 
     486             :         /*
     487             :          * If this is the last chunk, we can write all the data, even if it
     488             :          * isn't a multiple of MAXIMUM_ALIGNOF.  Otherwise, we need to
     489             :          * MAXALIGN_DOWN the write size.
     490             :          */
     491     1062616 :         chunksize = iov[which_iov].len - offset;
     492     1062616 :         if (which_iov + 1 < iovcnt)
     493           0 :             chunksize = MAXALIGN_DOWN(chunksize);
     494     1062616 :         res = shm_mq_send_bytes(mqh, chunksize, &iov[which_iov].data[offset],
     495             :                                 nowait, &bytes_written);
     496             : 
     497     1062616 :         if (res == SHM_MQ_DETACHED)
     498             :         {
     499             :             /* Reset state in case caller tries to send another message. */
     500           0 :             mqh->mqh_length_word_complete = false;
     501           0 :             mqh->mqh_partial_bytes = 0;
     502           0 :             return res;
     503             :         }
     504             : 
     505     1062616 :         mqh->mqh_partial_bytes += bytes_written;
     506     1062616 :         offset += bytes_written;
     507     1062616 :         if (res != SHM_MQ_SUCCESS)
     508       10586 :             return res;
     509     1055414 :     } while (mqh->mqh_partial_bytes < nbytes);
     510             : 
     511             :     /* Reset for next message. */
     512     1061716 :     mqh->mqh_partial_bytes = 0;
     513     1061716 :     mqh->mqh_length_word_complete = false;
     514             : 
     515             :     /* If queue has been detached, let caller know. */
     516     1061716 :     if (mq->mq_detached)
     517           0 :         return SHM_MQ_DETACHED;
     518             : 
     519             :     /*
     520             :      * If the counterparty is known to have attached, we can read mq_receiver
     521             :      * without acquiring the spinlock and assume it isn't NULL.  Otherwise,
     522             :      * more caution is needed.
     523             :      */
     524     1061716 :     if (mqh->mqh_counterparty_attached)
     525     1059244 :         receiver = mq->mq_receiver;
     526             :     else
     527             :     {
     528        2472 :         SpinLockAcquire(&mq->mq_mutex);
     529        2472 :         receiver = mq->mq_receiver;
     530        2472 :         SpinLockRelease(&mq->mq_mutex);
     531        2472 :         if (receiver == NULL)
     532           0 :             return SHM_MQ_SUCCESS;
     533        2472 :         mqh->mqh_counterparty_attached = true;
     534             :     }
     535             : 
     536             :     /*
     537             :      * If the caller has requested force flush or we have written more than 1/4
     538             :      * of the ring size, mark it as written in shared memory and notify the
     539             :      * receiver.
     540             :      */
     541     1061716 :     if (force_flush || mqh->mqh_send_pending > (mq->mq_ring_size >> 2))
     542             :     {
     543      102666 :         shm_mq_inc_bytes_written(mq, mqh->mqh_send_pending);
     544      102666 :         SetLatch(&receiver->procLatch);
     545      102666 :         mqh->mqh_send_pending = 0;
     546             :     }
     547             : 
     548     1061716 :     return SHM_MQ_SUCCESS;
     549             : }
     550             : 
     551             : /*
     552             :  * Receive a message from a shared message queue.
     553             :  *
     554             :  * We set *nbytes to the message length and *data to point to the message
     555             :  * payload.  If the entire message exists in the queue as a single,
     556             :  * contiguous chunk, *data will point directly into shared memory; otherwise,
     557             :  * it will point to a temporary buffer.  This mostly avoids data copying in
     558             :  * the hoped-for case where messages are short compared to the buffer size,
     559             :  * while still allowing longer messages.  In either case, the return value
     560             :  * remains valid until the next receive operation is performed on the queue.
     561             :  *
     562             :  * When nowait = false, we'll wait on our process latch when the ring buffer
     563             :  * is empty and we have not yet received a full message.  The sender will
     564             :  * set our process latch after more data has been written, and we'll resume
     565             :  * processing.  Each call will therefore return a complete message
     566             :  * (unless the sender detaches the queue).
     567             :  *
     568             :  * When nowait = true, we do not manipulate the state of the process latch;
     569             :  * instead, whenever the buffer is empty and we need to read from it, we
     570             :  * return SHM_MQ_WOULD_BLOCK.  In this case, the caller should call this
     571             :  * function again after the process latch has been set.
     572             :  */
     573             : shm_mq_result
     574     4655964 : shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
     575             : {
     576     4655964 :     shm_mq     *mq = mqh->mqh_queue;
     577             :     shm_mq_result res;
     578     4655964 :     Size        rb = 0;
     579             :     Size        nbytes;
     580             :     void       *rawdata;
     581             : 
     582             :     Assert(mq->mq_receiver == MyProc);
     583             : 
     584             :     /* We can't receive data until the sender has attached. */
     585     4655964 :     if (!mqh->mqh_counterparty_attached)
     586             :     {
     587     3574206 :         if (nowait)
     588             :         {
     589             :             int         counterparty_gone;
     590             : 
     591             :             /*
     592             :              * We shouldn't return at this point at all unless the sender
     593             :              * hasn't attached yet.  However, the correct return value depends
     594             :              * on whether the sender is still attached.  If we first test
     595             :              * whether the sender has ever attached and then test whether the
     596             :              * sender has detached, there's a race condition: a sender that
     597             :              * attaches and detaches very quickly might fool us into thinking
     598             :              * the sender never attached at all.  So, test whether our
     599             :              * counterparty is definitively gone first, and only afterwards
     600             :              * check whether the sender ever attached in the first place.
     601             :              */
     602     3573942 :             counterparty_gone = shm_mq_counterparty_gone(mq, mqh->mqh_handle);
     603     3573942 :             if (shm_mq_get_sender(mq) == NULL)
     604             :             {
     605     3570920 :                 if (counterparty_gone)
     606           0 :                     return SHM_MQ_DETACHED;
     607             :                 else
     608     3570920 :                     return SHM_MQ_WOULD_BLOCK;
     609             :             }
     610             :         }
     611         264 :         else if (!shm_mq_wait_internal(mq, &mq->mq_sender, mqh->mqh_handle)
     612         152 :                  && shm_mq_get_sender(mq) == NULL)
     613             :         {
     614           0 :             mq->mq_detached = true;
     615           0 :             return SHM_MQ_DETACHED;
     616             :         }
     617        3286 :         mqh->mqh_counterparty_attached = true;
     618             :     }
     619             : 
     620             :     /*
     621             :      * If we've consumed an amount of data greater than 1/4th of the ring
     622             :      * size, mark it consumed in shared memory.  We try to avoid doing this
     623             :      * unnecessarily when only a small amount of data has been consumed,
     624             :      * because SetLatch() is fairly expensive and we don't want to do it too
     625             :      * often.
     626             :      */
     627     1085044 :     if (mqh->mqh_consume_pending > mq->mq_ring_size / 4)
     628             :     {
     629       35266 :         shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending);
     630       35266 :         mqh->mqh_consume_pending = 0;
     631             :     }
     632             : 
     633             :     /* Try to read, or finish reading, the length word from the buffer. */
     634     1127208 :     while (!mqh->mqh_length_word_complete)
     635             :     {
     636             :         /* Try to receive the message length word. */
     637             :         Assert(mqh->mqh_partial_bytes < sizeof(Size));
     638     1074662 :         res = shm_mq_receive_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
     639             :                                    nowait, &rb, &rawdata);
     640     1074662 :         if (res != SHM_MQ_SUCCESS)
     641       12946 :             return res;
     642             : 
     643             :         /*
     644             :          * Hopefully, we'll receive the entire message length word at once.
     645             :          * But if sizeof(Size) > MAXIMUM_ALIGNOF, then it might be split over
     646             :          * multiple reads.
     647             :          */
     648     1061716 :         if (mqh->mqh_partial_bytes == 0 && rb >= sizeof(Size))
     649       42164 :         {
     650             :             Size        needed;
     651             : 
     652     1061716 :             nbytes = *(Size *) rawdata;
     653             : 
     654             :             /* If we've already got the whole message, we're done. */
     655     1061716 :             needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes);
     656     1061716 :             if (rb >= needed)
     657             :             {
     658     1019552 :                 mqh->mqh_consume_pending += needed;
     659     1019552 :                 *nbytesp = nbytes;
     660     1019552 :                 *datap = ((char *) rawdata) + MAXALIGN(sizeof(Size));
     661     1019552 :                 return SHM_MQ_SUCCESS;
     662             :             }
     663             : 
     664             :             /*
     665             :              * We don't have the whole message, but we at least have the whole
     666             :              * length word.
     667             :              */
     668       42164 :             mqh->mqh_expected_bytes = nbytes;
     669       42164 :             mqh->mqh_length_word_complete = true;
     670       42164 :             mqh->mqh_consume_pending += MAXALIGN(sizeof(Size));
     671       42164 :             rb -= MAXALIGN(sizeof(Size));
     672             :         }
     673             :         else
     674             :         {
     675             :             Size        lengthbytes;
     676             : 
     677             :             /* Can't be split unless bigger than required alignment. */
     678             :             Assert(sizeof(Size) > MAXIMUM_ALIGNOF);
     679             : 
     680             :             /* Message word is split; need buffer to reassemble. */
     681           0 :             if (mqh->mqh_buffer == NULL)
     682             :             {
     683           0 :                 mqh->mqh_buffer = MemoryContextAlloc(mqh->mqh_context,
     684             :                                                      MQH_INITIAL_BUFSIZE);
     685           0 :                 mqh->mqh_buflen = MQH_INITIAL_BUFSIZE;
     686             :             }
     687             :             Assert(mqh->mqh_buflen >= sizeof(Size));
     688             : 
     689             :             /* Copy partial length word; remember to consume it. */
     690           0 :             if (mqh->mqh_partial_bytes + rb > sizeof(Size))
     691           0 :                 lengthbytes = sizeof(Size) - mqh->mqh_partial_bytes;
     692             :             else
     693           0 :                 lengthbytes = rb;
     694           0 :             memcpy(&mqh->mqh_buffer[mqh->mqh_partial_bytes], rawdata,
     695             :                    lengthbytes);
     696           0 :             mqh->mqh_partial_bytes += lengthbytes;
     697           0 :             mqh->mqh_consume_pending += MAXALIGN(lengthbytes);
     698           0 :             rb -= lengthbytes;
     699             : 
     700             :             /* If we now have the whole word, we're ready to read payload. */
     701           0 :             if (mqh->mqh_partial_bytes >= sizeof(Size))
     702             :             {
     703             :                 Assert(mqh->mqh_partial_bytes == sizeof(Size));
     704           0 :                 mqh->mqh_expected_bytes = *(Size *) mqh->mqh_buffer;
     705           0 :                 mqh->mqh_length_word_complete = true;
     706           0 :                 mqh->mqh_partial_bytes = 0;
     707             :             }
     708             :         }
     709             :     }
     710       52546 :     nbytes = mqh->mqh_expected_bytes;
     711             : 
     712             :     /*
     713             :      * Should be disallowed on the sending side already, but better check and
     714             :      * error out on the receiver side as well rather than trying to read a
     715             :      * prohibitively large message.
     716             :      */
     717       52546 :     if (nbytes > MaxAllocSize)
     718           0 :         ereport(ERROR,
     719             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     720             :                  errmsg("invalid message size %zu in shared memory queue",
     721             :                         nbytes)));
     722             : 
     723       52546 :     if (mqh->mqh_partial_bytes == 0)
     724             :     {
     725             :         /*
     726             :          * Try to obtain the whole message in a single chunk.  If this works,
     727             :          * we need not copy the data and can return a pointer directly into
     728             :          * shared memory.
     729             :          */
     730       42382 :         res = shm_mq_receive_bytes(mqh, nbytes, nowait, &rb, &rawdata);
     731       42382 :         if (res != SHM_MQ_SUCCESS)
     732         218 :             return res;
     733       42164 :         if (rb >= nbytes)
     734             :         {
     735         254 :             mqh->mqh_length_word_complete = false;
     736         254 :             mqh->mqh_consume_pending += MAXALIGN(nbytes);
     737         254 :             *nbytesp = nbytes;
     738         254 :             *datap = rawdata;
     739         254 :             return SHM_MQ_SUCCESS;
     740             :         }
     741             : 
     742             :         /*
     743             :          * The message has wrapped the buffer.  We'll need to copy it in order
     744             :          * to return it to the client in one chunk.  First, make sure we have
     745             :          * a large enough buffer available.
     746             :          */
     747       41910 :         if (mqh->mqh_buflen < nbytes)
     748             :         {
     749             :             Size        newbuflen;
     750             : 
     751             :             /*
     752             :              * Increase size to the next power of 2 that's >= nbytes, but
     753             :              * limit to MaxAllocSize.
     754             :              */
     755         136 :             newbuflen = pg_nextpower2_size_t(nbytes);
     756         136 :             newbuflen = Min(newbuflen, MaxAllocSize);
     757             : 
     758         136 :             if (mqh->mqh_buffer != NULL)
     759             :             {
     760           0 :                 pfree(mqh->mqh_buffer);
     761           0 :                 mqh->mqh_buffer = NULL;
     762           0 :                 mqh->mqh_buflen = 0;
     763             :             }
     764         136 :             mqh->mqh_buffer = MemoryContextAlloc(mqh->mqh_context, newbuflen);
     765         136 :             mqh->mqh_buflen = newbuflen;
     766             :         }
     767             :     }
     768             : 
     769             :     /* Loop until we've copied the entire message. */
     770             :     for (;;)
     771      314766 :     {
     772             :         Size        still_needed;
     773             : 
     774             :         /* Copy as much as we can. */
     775             :         Assert(mqh->mqh_partial_bytes + rb <= nbytes);
     776      366840 :         memcpy(&mqh->mqh_buffer[mqh->mqh_partial_bytes], rawdata, rb);
     777      366840 :         mqh->mqh_partial_bytes += rb;
     778             : 
     779             :         /*
     780             :          * Update count of bytes that can be consumed, accounting for
     781             :          * alignment padding.  Note that this will never actually insert any
     782             :          * padding except at the end of a message, because the buffer size is
     783             :          * a multiple of MAXIMUM_ALIGNOF, and each read and write is as well.
     784             :          */
     785             :         Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb));
     786      366840 :         mqh->mqh_consume_pending += MAXALIGN(rb);
     787             : 
     788             :         /* If we got all the data, exit the loop. */
     789      366840 :         if (mqh->mqh_partial_bytes >= nbytes)
     790       41910 :             break;
     791             : 
     792             :         /* Wait for some more data. */
     793      324930 :         still_needed = nbytes - mqh->mqh_partial_bytes;
     794      324930 :         res = shm_mq_receive_bytes(mqh, still_needed, nowait, &rb, &rawdata);
     795      324930 :         if (res != SHM_MQ_SUCCESS)
     796       10164 :             return res;
     797      314766 :         if (rb > still_needed)
     798       40486 :             rb = still_needed;
     799             :     }
     800             : 
     801             :     /* Return the complete message, and reset for next message. */
     802       41910 :     *nbytesp = nbytes;
     803       41910 :     *datap = mqh->mqh_buffer;
     804       41910 :     mqh->mqh_length_word_complete = false;
     805       41910 :     mqh->mqh_partial_bytes = 0;
     806       41910 :     return SHM_MQ_SUCCESS;
     807             : }
     808             : 
     809             : /*
     810             :  * Wait for the other process that's supposed to use this queue to attach
     811             :  * to it.
     812             :  *
     813             :  * The return value is SHM_MQ_DETACHED if the worker has already detached or
     814             :  * if it dies; it is SHM_MQ_SUCCESS if we detect that the worker has attached.
     815             :  * Note that we will only be able to detect that the worker has died before
     816             :  * attaching if a background worker handle was passed to shm_mq_attach().
     817             :  */
     818             : shm_mq_result
     819           0 : shm_mq_wait_for_attach(shm_mq_handle *mqh)
     820             : {
     821           0 :     shm_mq     *mq = mqh->mqh_queue;
     822             :     PGPROC    **victim;
     823             : 
     824           0 :     if (shm_mq_get_receiver(mq) == MyProc)
     825           0 :         victim = &mq->mq_sender;
     826             :     else
     827             :     {
     828             :         Assert(shm_mq_get_sender(mq) == MyProc);
     829           0 :         victim = &mq->mq_receiver;
     830             :     }
     831             : 
     832           0 :     if (shm_mq_wait_internal(mq, victim, mqh->mqh_handle))
     833           0 :         return SHM_MQ_SUCCESS;
     834             :     else
     835           0 :         return SHM_MQ_DETACHED;
     836             : }
     837             : 
     838             : /*
     839             :  * Detach from a shared message queue, and destroy the shm_mq_handle.
     840             :  */
     841             : void
     842        4876 : shm_mq_detach(shm_mq_handle *mqh)
     843             : {
     844             :     /* Before detaching, notify the receiver about any already-written data. */
     845        4876 :     if (mqh->mqh_send_pending > 0)
     846             :     {
     847         770 :         shm_mq_inc_bytes_written(mqh->mqh_queue, mqh->mqh_send_pending);
     848         770 :         mqh->mqh_send_pending = 0;
     849             :     }
     850             : 
     851             :     /* Notify counterparty that we're outta here. */
     852        4876 :     shm_mq_detach_internal(mqh->mqh_queue);
     853             : 
     854             :     /* Cancel on_dsm_detach callback, if any. */
     855        4876 :     if (mqh->mqh_segment)
     856        4876 :         cancel_on_dsm_detach(mqh->mqh_segment,
     857             :                              shm_mq_detach_callback,
     858        4876 :                              PointerGetDatum(mqh->mqh_queue));
     859             : 
     860             :     /* Release local memory associated with handle. */
     861        4876 :     if (mqh->mqh_buffer != NULL)
     862         120 :         pfree(mqh->mqh_buffer);
     863        4876 :     pfree(mqh);
     864        4876 : }
     865             : 
     866             : /*
     867             :  * Notify counterparty that we're detaching from shared message queue.
     868             :  *
     869             :  * The purpose of this function is to make sure that the process
     870             :  * with which we're communicating doesn't block forever waiting for us to
     871             :  * fill or drain the queue once we've lost interest.  When the sender
     872             :  * detaches, the receiver can read any messages remaining in the queue;
     873             :  * further reads will return SHM_MQ_DETACHED.  If the receiver detaches,
     874             :  * further attempts to send messages will likewise return SHM_MQ_DETACHED.
     875             :  *
     876             :  * This is separated out from shm_mq_detach() because if the on_dsm_detach
     877             :  * callback fires, we only want to do this much.  We do not try to touch
     878             :  * the local shm_mq_handle, as it may have been pfree'd already.
     879             :  */
     880             : static void
     881        6674 : shm_mq_detach_internal(shm_mq *mq)
     882             : {
     883             :     PGPROC     *victim;
     884             : 
     885        6674 :     SpinLockAcquire(&mq->mq_mutex);
     886        6674 :     if (mq->mq_sender == MyProc)
     887        3286 :         victim = mq->mq_receiver;
     888             :     else
     889             :     {
     890             :         Assert(mq->mq_receiver == MyProc);
     891        3388 :         victim = mq->mq_sender;
     892             :     }
     893        6674 :     mq->mq_detached = true;
     894        6674 :     SpinLockRelease(&mq->mq_mutex);
     895             : 
     896        6674 :     if (victim != NULL)
     897        6572 :         SetLatch(&victim->procLatch);
     898        6674 : }
     899             : 
     900             : /*
     901             :  * Get the shm_mq from handle.
     902             :  */
     903             : shm_mq *
     904    11119972 : shm_mq_get_queue(shm_mq_handle *mqh)
     905             : {
     906    11119972 :     return mqh->mqh_queue;
     907             : }
     908             : 
     909             : /*
     910             :  * Write bytes into a shared message queue.
     911             :  */
     912             : static shm_mq_result
     913     2127712 : shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data,
     914             :                   bool nowait, Size *bytes_written)
     915             : {
     916     2127712 :     shm_mq     *mq = mqh->mqh_queue;
     917     2127712 :     Size        sent = 0;
     918             :     uint64      used;
     919     2127712 :     Size        ringsize = mq->mq_ring_size;
     920             :     Size        available;
     921             : 
     922     4911978 :     while (sent < nbytes)
     923             :     {
     924             :         uint64      rb;
     925             :         uint64      wb;
     926             : 
     927             :         /* Compute number of ring buffer bytes used and available. */
     928     2794852 :         rb = pg_atomic_read_u64(&mq->mq_bytes_read);
     929     2794852 :         wb = pg_atomic_read_u64(&mq->mq_bytes_written) + mqh->mqh_send_pending;
     930             :         Assert(wb >= rb);
     931     2794852 :         used = wb - rb;
     932             :         Assert(used <= ringsize);
     933     2794852 :         available = Min(ringsize - used, nbytes - sent);
     934             : 
     935             :         /*
     936             :          * Bail out if the queue has been detached.  Note that we would be in
     937             :          * trouble if the compiler decided to cache the value of
     938             :          * mq->mq_detached in a register or on the stack across loop
     939             :          * iterations.  It probably shouldn't do that anyway since we'll
     940             :          * always return, call an external function that performs a system
     941             :          * call, or reach a memory barrier at some point later in the loop,
     942             :          * but just to be sure, insert a compiler barrier here.
     943             :          */
     944     2794852 :         pg_compiler_barrier();
     945     2794852 :         if (mq->mq_detached)
     946             :         {
     947           0 :             *bytes_written = sent;
     948           0 :             return SHM_MQ_DETACHED;
     949             :         }
     950             : 
     951     2794852 :         if (available == 0 && !mqh->mqh_counterparty_attached)
     952             :         {
     953             :             /*
     954             :              * The queue is full, so if the receiver isn't yet known to be
     955             :              * attached, we must wait for that to happen.
     956             :              */
     957          12 :             if (nowait)
     958             :             {
     959           2 :                 if (shm_mq_counterparty_gone(mq, mqh->mqh_handle))
     960             :                 {
     961           0 :                     *bytes_written = sent;
     962           0 :                     return SHM_MQ_DETACHED;
     963             :                 }
     964           2 :                 if (shm_mq_get_receiver(mq) == NULL)
     965             :                 {
     966           0 :                     *bytes_written = sent;
     967           0 :                     return SHM_MQ_WOULD_BLOCK;
     968             :                 }
     969             :             }
     970          10 :             else if (!shm_mq_wait_internal(mq, &mq->mq_receiver,
     971             :                                            mqh->mqh_handle))
     972             :             {
     973           0 :                 mq->mq_detached = true;
     974           0 :                 *bytes_written = sent;
     975           0 :                 return SHM_MQ_DETACHED;
     976             :             }
     977          12 :             mqh->mqh_counterparty_attached = true;
     978             : 
     979             :             /*
     980             :              * The receiver may have read some data after attaching, so we
     981             :              * must not wait without rechecking the queue state.
     982             :              */
     983             :         }
     984     2794840 :         else if (available == 0)
     985             :         {
     986             :             /* Update the pending send bytes in the shared memory. */
     987      330904 :             shm_mq_inc_bytes_written(mq, mqh->mqh_send_pending);
     988             : 
     989             :             /*
     990             :              * Since mq->mqh_counterparty_attached is known to be true at this
     991             :              * point, mq_receiver has been set, and it can't change once set.
     992             :              * Therefore, we can read it without acquiring the spinlock.
     993             :              */
     994             :             Assert(mqh->mqh_counterparty_attached);
     995      330904 :             SetLatch(&mq->mq_receiver->procLatch);
     996             : 
     997             :             /*
     998             :              * We have just updated the mqh_send_pending bytes in the shared
     999             :              * memory so reset it.
    1000             :              */
    1001      330904 :             mqh->mqh_send_pending = 0;
    1002             : 
    1003             :             /* Skip manipulation of our latch if nowait = true. */
    1004      330904 :             if (nowait)
    1005             :             {
    1006       10586 :                 *bytes_written = sent;
    1007       10586 :                 return SHM_MQ_WOULD_BLOCK;
    1008             :             }
    1009             : 
    1010             :             /*
    1011             :              * Wait for our latch to be set.  It might already be set for some
    1012             :              * unrelated reason, but that'll just result in one extra trip
    1013             :              * through the loop.  It's worth it to avoid resetting the latch
    1014             :              * at top of loop, because setting an already-set latch is much
    1015             :              * cheaper than setting one that has been reset.
    1016             :              */
    1017      320318 :             (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
    1018             :                              WAIT_EVENT_MQ_SEND);
    1019             : 
    1020             :             /* Reset the latch so we don't spin. */
    1021      320318 :             ResetLatch(MyLatch);
    1022             : 
    1023             :             /* An interrupt may have occurred while we were waiting. */
    1024      320318 :             CHECK_FOR_INTERRUPTS();
    1025             :         }
    1026             :         else
    1027             :         {
    1028             :             Size        offset;
    1029             :             Size        sendnow;
    1030             : 
    1031     2463936 :             offset = wb % (uint64) ringsize;
    1032     2463936 :             sendnow = Min(available, ringsize - offset);
    1033             : 
    1034             :             /*
    1035             :              * Write as much data as we can via a single memcpy(). Make sure
    1036             :              * these writes happen after the read of mq_bytes_read, above.
    1037             :              * This barrier pairs with the one in shm_mq_inc_bytes_read.
    1038             :              * (Since we're separating the read of mq_bytes_read from a
    1039             :              * subsequent write to mq_ring, we need a full barrier here.)
    1040             :              */
    1041     2463936 :             pg_memory_barrier();
    1042     2463936 :             memcpy(&mq->mq_ring[mq->mq_ring_offset + offset],
    1043             :                    (char *) data + sent, sendnow);
    1044     2463936 :             sent += sendnow;
    1045             : 
    1046             :             /*
    1047             :              * Update count of bytes written, with alignment padding.  Note
    1048             :              * that this will never actually insert any padding except at the
    1049             :              * end of a run of bytes, because the buffer size is a multiple of
    1050             :              * MAXIMUM_ALIGNOF, and each read is as well.
    1051             :              */
    1052             :             Assert(sent == nbytes || sendnow == MAXALIGN(sendnow));
    1053             : 
    1054             :             /*
    1055             :              * For efficiency, we don't update the bytes written in the shared
    1056             :              * memory and also don't set the reader's latch here.  Refer to
    1057             :              * the comments atop the shm_mq_handle structure for more
    1058             :              * information.
    1059             :              */
    1060     2463936 :             mqh->mqh_send_pending += MAXALIGN(sendnow);
    1061             :         }
    1062             :     }
    1063             : 
    1064     2117126 :     *bytes_written = sent;
    1065     2117126 :     return SHM_MQ_SUCCESS;
    1066             : }
    1067             : 
    1068             : /*
    1069             :  * Wait until at least *nbytesp bytes are available to be read from the
    1070             :  * shared message queue, or until the buffer wraps around.  If the queue is
    1071             :  * detached, returns SHM_MQ_DETACHED.  If nowait is specified and a wait
    1072             :  * would be required, returns SHM_MQ_WOULD_BLOCK.  Otherwise, *datap is set
    1073             :  * to the location at which data bytes can be read, *nbytesp is set to the
    1074             :  * number of bytes which can be read at that address, and the return value
    1075             :  * is SHM_MQ_SUCCESS.
    1076             :  */
    1077             : static shm_mq_result
    1078     1441974 : shm_mq_receive_bytes(shm_mq_handle *mqh, Size bytes_needed, bool nowait,
    1079             :                      Size *nbytesp, void **datap)
    1080             : {
    1081     1441974 :     shm_mq     *mq = mqh->mqh_queue;
    1082     1441974 :     Size        ringsize = mq->mq_ring_size;
    1083             :     uint64      used;
    1084             :     uint64      written;
    1085             : 
    1086             :     for (;;)
    1087      394058 :     {
    1088             :         Size        offset;
    1089             :         uint64      read;
    1090             : 
    1091             :         /* Get bytes written, so we can compute what's available to read. */
    1092     1836032 :         written = pg_atomic_read_u64(&mq->mq_bytes_written);
    1093             : 
    1094             :         /*
    1095             :          * Get bytes read.  Include bytes we could consume but have not yet
    1096             :          * consumed.
    1097             :          */
    1098     1836032 :         read = pg_atomic_read_u64(&mq->mq_bytes_read) +
    1099     1836032 :             mqh->mqh_consume_pending;
    1100     1836032 :         used = written - read;
    1101             :         Assert(used <= ringsize);
    1102     1836032 :         offset = read % (uint64) ringsize;
    1103             : 
    1104             :         /* If we have enough data or buffer has wrapped, we're done. */
    1105     1836032 :         if (used >= bytes_needed || offset + used >= ringsize)
    1106             :         {
    1107     1418646 :             *nbytesp = Min(used, ringsize - offset);
    1108     1418646 :             *datap = &mq->mq_ring[mq->mq_ring_offset + offset];
    1109             : 
    1110             :             /*
    1111             :              * Separate the read of mq_bytes_written, above, from caller's
    1112             :              * attempt to read the data itself.  Pairs with the barrier in
    1113             :              * shm_mq_inc_bytes_written.
    1114             :              */
    1115     1418646 :             pg_read_barrier();
    1116     1418646 :             return SHM_MQ_SUCCESS;
    1117             :         }
    1118             : 
    1119             :         /*
    1120             :          * Fall out before waiting if the queue has been detached.
    1121             :          *
    1122             :          * Note that we don't check for this until *after* considering whether
    1123             :          * the data already available is enough, since the receiver can finish
    1124             :          * receiving a message stored in the buffer even after the sender has
    1125             :          * detached.
    1126             :          */
    1127      417386 :         if (mq->mq_detached)
    1128             :         {
    1129             :             /*
    1130             :              * If the writer advanced mq_bytes_written and then set
    1131             :              * mq_detached, we might not have read the final value of
    1132             :              * mq_bytes_written above.  Insert a read barrier and then check
    1133             :              * again if mq_bytes_written has advanced.
    1134             :              */
    1135        1582 :             pg_read_barrier();
    1136        1582 :             if (written != pg_atomic_read_u64(&mq->mq_bytes_written))
    1137           0 :                 continue;
    1138             : 
    1139        1582 :             return SHM_MQ_DETACHED;
    1140             :         }
    1141             : 
    1142             :         /*
    1143             :          * We didn't get enough data to satisfy the request, so mark any data
    1144             :          * previously-consumed as read to make more buffer space.
    1145             :          */
    1146      415804 :         if (mqh->mqh_consume_pending > 0)
    1147             :         {
    1148      370726 :             shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending);
    1149      370726 :             mqh->mqh_consume_pending = 0;
    1150             :         }
    1151             : 
    1152             :         /* Skip manipulation of our latch if nowait = true. */
    1153      415804 :         if (nowait)
    1154       21746 :             return SHM_MQ_WOULD_BLOCK;
    1155             : 
    1156             :         /*
    1157             :          * Wait for our latch to be set.  It might already be set for some
    1158             :          * unrelated reason, but that'll just result in one extra trip through
    1159             :          * the loop.  It's worth it to avoid resetting the latch at top of
    1160             :          * loop, because setting an already-set latch is much cheaper than
    1161             :          * setting one that has been reset.
    1162             :          */
    1163      394058 :         (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
    1164             :                          WAIT_EVENT_MQ_RECEIVE);
    1165             : 
    1166             :         /* Reset the latch so we don't spin. */
    1167      394058 :         ResetLatch(MyLatch);
    1168             : 
    1169             :         /* An interrupt may have occurred while we were waiting. */
    1170      394058 :         CHECK_FOR_INTERRUPTS();
    1171             :     }
    1172             : }
    1173             : 
    1174             : /*
    1175             :  * Test whether a counterparty who may not even be alive yet is definitely gone.
    1176             :  */
    1177             : static bool
    1178     3573944 : shm_mq_counterparty_gone(shm_mq *mq, BackgroundWorkerHandle *handle)
    1179             : {
    1180             :     pid_t       pid;
    1181             : 
    1182             :     /* If the queue has been detached, counterparty is definitely gone. */
    1183     3573944 :     if (mq->mq_detached)
    1184         150 :         return true;
    1185             : 
    1186             :     /* If there's a handle, check worker status. */
    1187     3573794 :     if (handle != NULL)
    1188             :     {
    1189             :         BgwHandleStatus status;
    1190             : 
    1191             :         /* Check for unexpected worker death. */
    1192     3573794 :         status = GetBackgroundWorkerPid(handle, &pid);
    1193     3573794 :         if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED)
    1194             :         {
    1195             :             /* Mark it detached, just to make it official. */
    1196           0 :             mq->mq_detached = true;
    1197           0 :             return true;
    1198             :         }
    1199             :     }
    1200             : 
    1201             :     /* Counterparty is not definitively gone. */
    1202     3573794 :     return false;
    1203             : }
    1204             : 
    1205             : /*
    1206             :  * This is used when a process is waiting for its counterpart to attach to the
    1207             :  * queue.  We exit when the other process attaches as expected, or, if
    1208             :  * handle != NULL, when the referenced background process or the postmaster
    1209             :  * dies.  Note that if handle == NULL, and the process fails to attach, we'll
    1210             :  * potentially get stuck here forever waiting for a process that may never
    1211             :  * start.  We do check for interrupts, though.
    1212             :  *
    1213             :  * ptr is a pointer to the memory address that we're expecting to become
    1214             :  * non-NULL when our counterpart attaches to the queue.
    1215             :  */
    1216             : static bool
    1217         274 : shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr, BackgroundWorkerHandle *handle)
    1218             : {
    1219         274 :     bool        result = false;
    1220             : 
    1221             :     for (;;)
    1222        1704 :     {
    1223             :         BgwHandleStatus status;
    1224             :         pid_t       pid;
    1225             : 
    1226             :         /* Acquire the lock just long enough to check the pointer. */
    1227        1978 :         SpinLockAcquire(&mq->mq_mutex);
    1228        1978 :         result = (*ptr != NULL);
    1229        1978 :         SpinLockRelease(&mq->mq_mutex);
    1230             : 
    1231             :         /* Fail if detached; else succeed if initialized. */
    1232        1978 :         if (mq->mq_detached)
    1233             :         {
    1234         152 :             result = false;
    1235         152 :             break;
    1236             :         }
    1237        1826 :         if (result)
    1238         122 :             break;
    1239             : 
    1240        1704 :         if (handle != NULL)
    1241             :         {
    1242             :             /* Check for unexpected worker death. */
    1243        1704 :             status = GetBackgroundWorkerPid(handle, &pid);
    1244        1704 :             if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED)
    1245             :             {
    1246           0 :                 result = false;
    1247           0 :                 break;
    1248             :             }
    1249             :         }
    1250             : 
    1251             :         /* Wait to be signaled. */
    1252        1704 :         (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
    1253             :                          WAIT_EVENT_MQ_INTERNAL);
    1254             : 
    1255             :         /* Reset the latch so we don't spin. */
    1256        1704 :         ResetLatch(MyLatch);
    1257             : 
    1258             :         /* An interrupt may have occurred while we were waiting. */
    1259        1704 :         CHECK_FOR_INTERRUPTS();
    1260             :     }
    1261             : 
    1262         274 :     return result;
    1263             : }
    1264             : 
    1265             : /*
    1266             :  * Increment the number of bytes read.
    1267             :  */
    1268             : static void
    1269      405992 : shm_mq_inc_bytes_read(shm_mq *mq, Size n)
    1270             : {
    1271             :     PGPROC     *sender;
    1272             : 
    1273             :     /*
    1274             :      * Separate prior reads of mq_ring from the increment of mq_bytes_read
    1275             :      * which follows.  This pairs with the full barrier in
    1276             :      * shm_mq_send_bytes(). We only need a read barrier here because the
    1277             :      * increment of mq_bytes_read is actually a read followed by a dependent
    1278             :      * write.
    1279             :      */
    1280      405992 :     pg_read_barrier();
    1281             : 
    1282             :     /*
    1283             :      * There's no need to use pg_atomic_fetch_add_u64 here, because nobody
    1284             :      * else can be changing this value.  This method should be cheaper.
    1285             :      */
    1286      405992 :     pg_atomic_write_u64(&mq->mq_bytes_read,
    1287      405992 :                         pg_atomic_read_u64(&mq->mq_bytes_read) + n);
    1288             : 
    1289             :     /*
    1290             :      * We shouldn't have any bytes to read without a sender, so we can read
    1291             :      * mq_sender here without a lock.  Once it's initialized, it can't change.
    1292             :      */
    1293      405992 :     sender = mq->mq_sender;
    1294             :     Assert(sender != NULL);
    1295      405992 :     SetLatch(&sender->procLatch);
    1296      405992 : }
    1297             : 
    1298             : /*
    1299             :  * Increment the number of bytes written.
    1300             :  */
    1301             : static void
    1302      434340 : shm_mq_inc_bytes_written(shm_mq *mq, Size n)
    1303             : {
    1304             :     /*
    1305             :      * Separate prior reads of mq_ring from the write of mq_bytes_written
    1306             :      * which we're about to do.  Pairs with the read barrier found in
    1307             :      * shm_mq_receive_bytes.
    1308             :      */
    1309      434340 :     pg_write_barrier();
    1310             : 
    1311             :     /*
    1312             :      * There's no need to use pg_atomic_fetch_add_u64 here, because nobody
    1313             :      * else can be changing this value.  This method avoids taking the bus
    1314             :      * lock unnecessarily.
    1315             :      */
    1316      434340 :     pg_atomic_write_u64(&mq->mq_bytes_written,
    1317      434340 :                         pg_atomic_read_u64(&mq->mq_bytes_written) + n);
    1318      434340 : }
    1319             : 
    1320             : /* Shim for on_dsm_detach callback. */
    1321             : static void
    1322        1798 : shm_mq_detach_callback(dsm_segment *seg, Datum arg)
    1323             : {
    1324        1798 :     shm_mq     *mq = (shm_mq *) DatumGetPointer(arg);
    1325             : 
    1326        1798 :     shm_mq_detach_internal(mq);
    1327        1798 : }

Generated by: LCOV version 1.14