LCOV - code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.8 % 167 155
Test Date: 2026-04-07 14:16:30 Functions: 100.0 % 15 15
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * freelist.c
       4              :  *    routines for managing the buffer pool's replacement strategy.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  *
      11              :  * IDENTIFICATION
      12              :  *    src/backend/storage/buffer/freelist.c
      13              :  *
      14              :  *-------------------------------------------------------------------------
      15              :  */
      16              : #include "postgres.h"
      17              : 
      18              : #include "pgstat.h"
      19              : #include "port/atomics.h"
      20              : #include "storage/buf_internals.h"
      21              : #include "storage/bufmgr.h"
      22              : #include "storage/proc.h"
      23              : #include "storage/shmem.h"
      24              : #include "storage/subsystems.h"
      25              : 
      26              : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      27              : 
      28              : 
      29              : /*
      30              :  * The shared freelist control information.
      31              :  */
      32              : typedef struct
      33              : {
      34              :     /* Spinlock: protects the values below */
      35              :     slock_t     buffer_strategy_lock;
      36              : 
      37              :     /*
      38              :      * clock-sweep hand: index of next buffer to consider grabbing. Note that
      39              :      * this isn't a concrete buffer - we only ever increase the value. So, to
      40              :      * get an actual buffer, it needs to be used modulo NBuffers.
      41              :      */
      42              :     pg_atomic_uint32 nextVictimBuffer;
      43              : 
      44              :     /*
      45              :      * Statistics.  These counters should be wide enough that they can't
      46              :      * overflow during a single bgwriter cycle.
      47              :      */
      48              :     uint32      completePasses; /* Complete cycles of the clock-sweep */
      49              :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
      50              : 
      51              :     /*
      52              :      * Bgworker process to be notified upon activity or -1 if none. See
      53              :      * StrategyNotifyBgWriter.
      54              :      */
      55              :     int         bgwprocno;
      56              : } BufferStrategyControl;
      57              : 
      58              : /* Pointers to shared state */
      59              : static BufferStrategyControl *StrategyControl = NULL;
      60              : 
      61              : static void StrategyCtlShmemRequest(void *arg);
      62              : static void StrategyCtlShmemInit(void *arg);
      63              : 
      64              : const ShmemCallbacks StrategyCtlShmemCallbacks = {
      65              :     .request_fn = StrategyCtlShmemRequest,
      66              :     .init_fn = StrategyCtlShmemInit,
      67              : };
      68              : 
      69              : /*
      70              :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      71              :  * This is currently the only kind of BufferAccessStrategy object, but someday
      72              :  * we might have more kinds.
      73              :  */
      74              : typedef struct BufferAccessStrategyData
      75              : {
      76              :     /* Overall strategy type */
      77              :     BufferAccessStrategyType btype;
      78              :     /* Number of elements in buffers[] array */
      79              :     int         nbuffers;
      80              : 
      81              :     /*
      82              :      * Index of the "current" slot in the ring, ie, the one most recently
      83              :      * returned by GetBufferFromRing.
      84              :      */
      85              :     int         current;
      86              : 
      87              :     /*
      88              :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      89              :      * have not yet selected a buffer for this ring slot.  For allocation
      90              :      * simplicity this is palloc'd together with the fixed fields of the
      91              :      * struct.
      92              :      */
      93              :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
      94              : }           BufferAccessStrategyData;
      95              : 
      96              : 
      97              : /* Prototypes for internal functions */
      98              : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      99              :                                      uint64 *buf_state);
     100              : static void AddBufferToRing(BufferAccessStrategy strategy,
     101              :                             BufferDesc *buf);
     102              : 
     103              : /*
     104              :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
     105              :  *
     106              :  * Move the clock hand one buffer ahead of its current position and return the
     107              :  * id of the buffer now under the hand.
     108              :  */
     109              : static inline uint32
     110      5392589 : ClockSweepTick(void)
     111              : {
     112              :     uint32      victim;
     113              : 
     114              :     /*
     115              :      * Atomically move hand ahead one buffer - if there's several processes
     116              :      * doing this, this can lead to buffers being returned slightly out of
     117              :      * apparent order.
     118              :      */
     119              :     victim =
     120      5392589 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     121              : 
     122      5392589 :     if (victim >= NBuffers)
     123              :     {
     124        36123 :         uint32      originalVictim = victim;
     125              : 
     126              :         /* always wrap what we look up in BufferDescriptors */
     127        36123 :         victim = victim % NBuffers;
     128              : 
     129              :         /*
     130              :          * If we're the one that just caused a wraparound, force
     131              :          * completePasses to be incremented while holding the spinlock. We
     132              :          * need the spinlock so StrategySyncStart() can return a consistent
     133              :          * value consisting of nextVictimBuffer and completePasses.
     134              :          */
     135        36123 :         if (victim == 0)
     136              :         {
     137              :             uint32      expected;
     138              :             uint32      wrapped;
     139        35899 :             bool        success = false;
     140              : 
     141        35899 :             expected = originalVictim + 1;
     142              : 
     143        71969 :             while (!success)
     144              :             {
     145              :                 /*
     146              :                  * Acquire the spinlock while increasing completePasses. That
     147              :                  * allows other readers to read nextVictimBuffer and
     148              :                  * completePasses in a consistent manner which is required for
     149              :                  * StrategySyncStart().  In theory delaying the increment
     150              :                  * could lead to an overflow of nextVictimBuffers, but that's
     151              :                  * highly unlikely and wouldn't be particularly harmful.
     152              :                  */
     153        36070 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     154              : 
     155        36070 :                 wrapped = expected % NBuffers;
     156              : 
     157        36070 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     158              :                                                          &expected, wrapped);
     159        36070 :                 if (success)
     160        35899 :                     StrategyControl->completePasses++;
     161        36070 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     162              :             }
     163              :         }
     164              :     }
     165      5392589 :     return victim;
     166              : }
     167              : 
     168              : /*
     169              :  * StrategyGetBuffer
     170              :  *
     171              :  *  Called by the bufmgr to get the next candidate buffer to use in
     172              :  *  GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
     173              :  *  the selected buffer must not currently be pinned by anyone.
     174              :  *
     175              :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
     176              :  *
     177              :  *  It is the callers responsibility to ensure the buffer ownership can be
     178              :  *  tracked via TrackNewBufferPin().
     179              :  *
     180              :  *  The buffer is pinned and marked as owned, using TrackNewBufferPin(),
     181              :  *  before returning.
     182              :  */
     183              : BufferDesc *
     184      2282582 : StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
     185              : {
     186              :     BufferDesc *buf;
     187              :     int         bgwprocno;
     188              :     int         trycounter;
     189              : 
     190      2282582 :     *from_ring = false;
     191              : 
     192              :     /*
     193              :      * If given a strategy object, see whether it can select a buffer. We
     194              :      * assume strategy objects don't need buffer_strategy_lock.
     195              :      */
     196      2282582 :     if (strategy != NULL)
     197              :     {
     198      1007746 :         buf = GetBufferFromRing(strategy, buf_state);
     199      1007746 :         if (buf != NULL)
     200              :         {
     201       418330 :             *from_ring = true;
     202       418330 :             return buf;
     203              :         }
     204              :     }
     205              : 
     206              :     /*
     207              :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
     208              :      * a spinlock for this we force a read from shared memory once, and then
     209              :      * set the latch based on that value. We need to go through that length
     210              :      * because otherwise bgwprocno might be reset while/after we check because
     211              :      * the compiler might just reread from memory.
     212              :      *
     213              :      * This can possibly set the latch of the wrong process if the bgwriter
     214              :      * dies in the wrong moment. But since PGPROC->procLatch is never
     215              :      * deallocated the worst consequence of that is that we set the latch of
     216              :      * some arbitrary process.
     217              :      */
     218      1864252 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     219      1864252 :     if (bgwprocno != -1)
     220              :     {
     221              :         /* reset bgwprocno first, before setting the latch */
     222          468 :         StrategyControl->bgwprocno = -1;
     223              : 
     224              :         /*
     225              :          * Not acquiring ProcArrayLock here which is slightly icky. It's
     226              :          * actually fine because procLatch isn't ever freed, so we just can
     227              :          * potentially set the wrong process' (or no process') latch.
     228              :          */
     229          468 :         SetLatch(&GetPGProcByNumber(bgwprocno)->procLatch);
     230              :     }
     231              : 
     232              :     /*
     233              :      * We count buffer allocation requests so that the bgwriter can estimate
     234              :      * the rate of buffer consumption.  Note that buffers recycled by a
     235              :      * strategy object are intentionally not counted here.
     236              :      */
     237      1864252 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     238              : 
     239              :     /* Use the "clock sweep" algorithm to find a free buffer */
     240      1864252 :     trycounter = NBuffers;
     241              :     for (;;)
     242      3528337 :     {
     243              :         uint64      old_buf_state;
     244              :         uint64      local_buf_state;
     245              : 
     246      5392589 :         buf = GetBufferDescriptor(ClockSweepTick());
     247              : 
     248              :         /*
     249              :          * Check whether the buffer can be used and pin it if so. Do this
     250              :          * using a CAS loop, to avoid having to lock the buffer header.
     251              :          */
     252      5392589 :         old_buf_state = pg_atomic_read_u64(&buf->state);
     253              :         for (;;)
     254              :         {
     255      5392631 :             local_buf_state = old_buf_state;
     256              : 
     257              :             /*
     258              :              * If the buffer is pinned or has a nonzero usage_count, we cannot
     259              :              * use it; decrement the usage_count (unless pinned) and keep
     260              :              * scanning.
     261              :              */
     262              : 
     263      5392631 :             if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
     264              :             {
     265        86695 :                 if (--trycounter == 0)
     266              :                 {
     267              :                     /*
     268              :                      * We've scanned all the buffers without making any state
     269              :                      * changes, so all the buffers are pinned (or were when we
     270              :                      * looked at them). We could hope that someone will free
     271              :                      * one eventually, but it's probably better to fail than
     272              :                      * to risk getting stuck in an infinite loop.
     273              :                      */
     274            0 :                     elog(ERROR, "no unpinned buffers available");
     275              :                 }
     276        86695 :                 break;
     277              :             }
     278              : 
     279              :             /* See equivalent code in PinBuffer() */
     280      5305936 :             if (unlikely(local_buf_state & BM_LOCKED))
     281              :             {
     282            0 :                 old_buf_state = WaitBufHdrUnlocked(buf);
     283            0 :                 continue;
     284              :             }
     285              : 
     286      5305936 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     287              :             {
     288      3441680 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
     289              : 
     290      3441680 :                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     291              :                                                    local_buf_state))
     292              :                 {
     293      3441642 :                     trycounter = NBuffers;
     294      3441642 :                     break;
     295              :                 }
     296              :             }
     297              :             else
     298              :             {
     299              :                 /* pin the buffer if the CAS succeeds */
     300      1864256 :                 local_buf_state += BUF_REFCOUNT_ONE;
     301              : 
     302      1864256 :                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     303              :                                                    local_buf_state))
     304              :                 {
     305              :                     /* Found a usable buffer */
     306      1864252 :                     if (strategy != NULL)
     307       589416 :                         AddBufferToRing(strategy, buf);
     308      1864252 :                     *buf_state = local_buf_state;
     309              : 
     310      1864252 :                     TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     311              : 
     312      1864252 :                     return buf;
     313              :                 }
     314              :             }
     315              :         }
     316              :     }
     317              : }
     318              : 
     319              : /*
     320              :  * StrategySyncStart -- tell BgBufferSync where to start syncing
     321              :  *
     322              :  * The result is the buffer index of the best buffer to sync first.
     323              :  * BgBufferSync() will proceed circularly around the buffer array from there.
     324              :  *
     325              :  * In addition, we return the completed-pass count (which is effectively
     326              :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     327              :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     328              :  * being read.
     329              :  */
     330              : int
     331        14504 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     332              : {
     333              :     uint32      nextVictimBuffer;
     334              :     int         result;
     335              : 
     336        14504 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     337        14504 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     338        14504 :     result = nextVictimBuffer % NBuffers;
     339              : 
     340        14504 :     if (complete_passes)
     341              :     {
     342        14504 :         *complete_passes = StrategyControl->completePasses;
     343              : 
     344              :         /*
     345              :          * Additionally add the number of wraparounds that happened before
     346              :          * completePasses could be incremented. C.f. ClockSweepTick().
     347              :          */
     348        14504 :         *complete_passes += nextVictimBuffer / NBuffers;
     349              :     }
     350              : 
     351        14504 :     if (num_buf_alloc)
     352              :     {
     353        14504 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     354              :     }
     355        14504 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     356        14504 :     return result;
     357              : }
     358              : 
     359              : /*
     360              :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     361              :  *
     362              :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     363              :  * set that latch.  Pass -1 to clear the pending notification before it
     364              :  * happens.  This feature is used by the bgwriter process to wake itself up
     365              :  * from hibernation, and is not meant for anybody else to use.
     366              :  */
     367              : void
     368          880 : StrategyNotifyBgWriter(int bgwprocno)
     369              : {
     370              :     /*
     371              :      * We acquire buffer_strategy_lock just to ensure that the store appears
     372              :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     373              :      * infrequently, so there's no performance penalty from being safe.
     374              :      */
     375          880 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     376          880 :     StrategyControl->bgwprocno = bgwprocno;
     377          880 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     378          880 : }
     379              : 
     380              : 
     381              : /*
     382              :  * StrategyCtlShmemRequest -- request shared memory for the buffer
     383              :  *      cache replacement strategy.
     384              :  */
     385              : static void
     386         1234 : StrategyCtlShmemRequest(void *arg)
     387              : {
     388         1234 :     ShmemRequestStruct(.name = "Buffer Strategy Status",
     389              :                        .size = sizeof(BufferStrategyControl),
     390              :                        .ptr = (void **) &StrategyControl
     391              :         );
     392         1234 : }
     393              : 
     394              : /*
     395              :  * StrategyCtlShmemInit -- initialize the buffer cache replacement strategy.
     396              :  */
     397              : static void
     398         1231 : StrategyCtlShmemInit(void *arg)
     399              : {
     400         1231 :     SpinLockInit(&StrategyControl->buffer_strategy_lock);
     401              : 
     402              :     /* Initialize the clock-sweep pointer */
     403         1231 :     pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     404              : 
     405              :     /* Clear statistics */
     406         1231 :     StrategyControl->completePasses = 0;
     407         1231 :     pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     408              : 
     409              :     /* No pending notification */
     410         1231 :     StrategyControl->bgwprocno = -1;
     411         1231 : }
     412              : 
     413              : 
     414              : /* ----------------------------------------------------------------
     415              :  *              Backend-private buffer ring management
     416              :  * ----------------------------------------------------------------
     417              :  */
     418              : 
     419              : 
     420              : /*
     421              :  * GetAccessStrategy -- create a BufferAccessStrategy object
     422              :  *
     423              :  * The object is allocated in the current memory context.
     424              :  */
     425              : BufferAccessStrategy
     426       167332 : GetAccessStrategy(BufferAccessStrategyType btype)
     427              : {
     428              :     int         ring_size_kb;
     429              : 
     430              :     /*
     431              :      * Select ring size to use.  See buffer/README for rationales.
     432              :      *
     433              :      * Note: if you change the ring size for BAS_BULKREAD, see also
     434              :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     435              :      */
     436       167332 :     switch (btype)
     437              :     {
     438            0 :         case BAS_NORMAL:
     439              :             /* if someone asks for NORMAL, just give 'em a "default" object */
     440            0 :             return NULL;
     441              : 
     442        92004 :         case BAS_BULKREAD:
     443              :             {
     444              :                 int         ring_max_kb;
     445              : 
     446              :                 /*
     447              :                  * The ring always needs to be large enough to allow some
     448              :                  * separation in time between providing a buffer to the user
     449              :                  * of the strategy and that buffer being reused. Otherwise the
     450              :                  * user's pin will prevent reuse of the buffer, even without
     451              :                  * concurrent activity.
     452              :                  *
     453              :                  * We also need to ensure the ring always is large enough for
     454              :                  * SYNC_SCAN_REPORT_INTERVAL, as noted above.
     455              :                  *
     456              :                  * Thus we start out a minimal size and increase the size
     457              :                  * further if appropriate.
     458              :                  */
     459        92004 :                 ring_size_kb = 256;
     460              : 
     461              :                 /*
     462              :                  * There's no point in a larger ring if we won't be allowed to
     463              :                  * pin sufficiently many buffers.  But we never limit to less
     464              :                  * than the minimal size above.
     465              :                  */
     466        92004 :                 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
     467        92004 :                 ring_max_kb = Max(ring_size_kb, ring_max_kb);
     468              : 
     469              :                 /*
     470              :                  * We would like the ring to additionally have space for the
     471              :                  * configured degree of IO concurrency. While being read in,
     472              :                  * buffers can obviously not yet be reused.
     473              :                  *
     474              :                  * Each IO can be up to io_combine_limit blocks large, and we
     475              :                  * want to start up to effective_io_concurrency IOs.
     476              :                  *
     477              :                  * Note that effective_io_concurrency may be 0, which disables
     478              :                  * AIO.
     479              :                  */
     480        92004 :                 ring_size_kb += (BLCKSZ / 1024) *
     481        92004 :                     io_combine_limit * effective_io_concurrency;
     482              : 
     483        92004 :                 if (ring_size_kb > ring_max_kb)
     484        92004 :                     ring_size_kb = ring_max_kb;
     485        92004 :                 break;
     486              :             }
     487        75308 :         case BAS_BULKWRITE:
     488        75308 :             ring_size_kb = 16 * 1024;
     489        75308 :             break;
     490           20 :         case BAS_VACUUM:
     491           20 :             ring_size_kb = 2048;
     492           20 :             break;
     493              : 
     494            0 :         default:
     495            0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
     496              :                  (int) btype);
     497              :             return NULL;        /* keep compiler quiet */
     498              :     }
     499              : 
     500       167332 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
     501              : }
     502              : 
     503              : /*
     504              :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     505              :  *      number of buffers equivalent to the passed in size.
     506              :  *
     507              :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     508              :  * the function will return NULL.  ring_size_kb must not be negative.
     509              :  */
     510              : BufferAccessStrategy
     511       177166 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     512              : {
     513              :     int         ring_buffers;
     514              :     BufferAccessStrategy strategy;
     515              : 
     516              :     Assert(ring_size_kb >= 0);
     517              : 
     518              :     /* Figure out how many buffers ring_size_kb is */
     519       177166 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     520              : 
     521              :     /* 0 means unlimited, so no BufferAccessStrategy required */
     522       177166 :     if (ring_buffers == 0)
     523            8 :         return NULL;
     524              : 
     525              :     /* Cap to 1/8th of shared_buffers */
     526       177158 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
     527              : 
     528              :     /* NBuffers should never be less than 16, so this shouldn't happen */
     529              :     Assert(ring_buffers > 0);
     530              : 
     531              :     /* Allocate the object and initialize all elements to zeroes */
     532              :     strategy = (BufferAccessStrategy)
     533       177158 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
     534              :                 ring_buffers * sizeof(Buffer));
     535              : 
     536              :     /* Set fields that don't start out zero */
     537       177158 :     strategy->btype = btype;
     538       177158 :     strategy->nbuffers = ring_buffers;
     539              : 
     540       177158 :     return strategy;
     541              : }
     542              : 
     543              : /*
     544              :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     545              :  *      the ring
     546              :  *
     547              :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     548              :  * returning NULL with 0 size.
     549              :  */
     550              : int
     551           27 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     552              : {
     553           27 :     if (strategy == NULL)
     554            0 :         return 0;
     555              : 
     556           27 :     return strategy->nbuffers;
     557              : }
     558              : 
     559              : /*
     560              :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
     561              :  *
     562              :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
     563              :  * in danger of pinning too much of the ring at once while performing look-ahead.
     564              :  * For some strategies, that means "escaping" from the ring, and in others it
     565              :  * means forcing dirty data to disk very frequently with associated WAL
     566              :  * flushing.  Since external code has no insight into any of that, allow
     567              :  * individual strategy types to expose a clamp that should be applied when
     568              :  * deciding on a maximum number of buffers to pin at once.
     569              :  *
     570              :  * Callers should combine this number with other relevant limits and take the
     571              :  * minimum.
     572              :  */
     573              : int
     574       684233 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
     575              : {
     576       684233 :     if (strategy == NULL)
     577       490926 :         return NBuffers;
     578              : 
     579       193307 :     switch (strategy->btype)
     580              :     {
     581        87194 :         case BAS_BULKREAD:
     582              : 
     583              :             /*
     584              :              * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
     585              :              * shouldn't be a problem and the caller is free to pin up to the
     586              :              * entire ring at once.
     587              :              */
     588        87194 :             return strategy->nbuffers;
     589              : 
     590       106113 :         default:
     591              : 
     592              :             /*
     593              :              * Tell caller not to pin more than half the buffers in the ring.
     594              :              * This is a trade-off between look ahead distance and deferring
     595              :              * writeback and associated WAL traffic.
     596              :              */
     597       106113 :             return strategy->nbuffers / 2;
     598              :     }
     599              : }
     600              : 
     601              : /*
     602              :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     603              :  *
     604              :  * A simple pfree would do at the moment, but we would prefer that callers
     605              :  * don't assume that much about the representation of BufferAccessStrategy.
     606              :  */
     607              : void
     608       160721 : FreeAccessStrategy(BufferAccessStrategy strategy)
     609              : {
     610              :     /* don't crash if called on a "default" strategy */
     611       160721 :     if (strategy != NULL)
     612       160721 :         pfree(strategy);
     613       160721 : }
     614              : 
     615              : /*
     616              :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     617              :  *      ring is empty / not usable.
     618              :  *
     619              :  * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
     620              :  * returning.
     621              :  */
     622              : static BufferDesc *
     623      1007746 : GetBufferFromRing(BufferAccessStrategy strategy, uint64 *buf_state)
     624              : {
     625              :     BufferDesc *buf;
     626              :     Buffer      bufnum;
     627              :     uint64      old_buf_state;
     628              :     uint64      local_buf_state;    /* to avoid repeated (de-)referencing */
     629              : 
     630              : 
     631              :     /* Advance to next ring slot */
     632      1007746 :     if (++strategy->current >= strategy->nbuffers)
     633        31579 :         strategy->current = 0;
     634              : 
     635              :     /*
     636              :      * If the slot hasn't been filled yet, tell the caller to allocate a new
     637              :      * buffer with the normal allocation strategy.  He will then fill this
     638              :      * slot by calling AddBufferToRing with the new buffer.
     639              :      */
     640      1007746 :     bufnum = strategy->buffers[strategy->current];
     641      1007746 :     if (bufnum == InvalidBuffer)
     642       577855 :         return NULL;
     643              : 
     644       429891 :     buf = GetBufferDescriptor(bufnum - 1);
     645              : 
     646              :     /*
     647              :      * Check whether the buffer can be used and pin it if so. Do this using a
     648              :      * CAS loop, to avoid having to lock the buffer header.
     649              :      */
     650       429891 :     old_buf_state = pg_atomic_read_u64(&buf->state);
     651              :     for (;;)
     652              :     {
     653       429894 :         local_buf_state = old_buf_state;
     654              : 
     655              :         /*
     656              :          * If the buffer is pinned we cannot use it under any circumstances.
     657              :          *
     658              :          * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     659              :          * since our own previous usage of the ring element would have left it
     660              :          * there, but it might've been decremented by clock-sweep since then).
     661              :          * A higher usage_count indicates someone else has touched the buffer,
     662              :          * so we shouldn't re-use it.
     663              :          */
     664       429894 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
     665       423660 :             || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
     666              :             break;
     667              : 
     668              :         /* See equivalent code in PinBuffer() */
     669       418333 :         if (unlikely(local_buf_state & BM_LOCKED))
     670              :         {
     671            0 :             old_buf_state = WaitBufHdrUnlocked(buf);
     672            0 :             continue;
     673              :         }
     674              : 
     675              :         /* pin the buffer if the CAS succeeds */
     676       418333 :         local_buf_state += BUF_REFCOUNT_ONE;
     677              : 
     678       418333 :         if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     679              :                                            local_buf_state))
     680              :         {
     681       418330 :             *buf_state = local_buf_state;
     682              : 
     683       418330 :             TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     684       418330 :             return buf;
     685              :         }
     686              :     }
     687              : 
     688              :     /*
     689              :      * Tell caller to allocate a new buffer with the normal allocation
     690              :      * strategy.  He'll then replace this ring element via AddBufferToRing.
     691              :      */
     692        11561 :     return NULL;
     693              : }
     694              : 
     695              : /*
     696              :  * AddBufferToRing -- add a buffer to the buffer ring
     697              :  *
     698              :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     699              :  * is called with the spinlock held, it had better be quite cheap.
     700              :  */
     701              : static void
     702       589416 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     703              : {
     704       589416 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     705       589416 : }
     706              : 
     707              : /*
     708              :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     709              :  * strategy ring.
     710              :  */
     711              : IOContext
     712     86140502 : IOContextForStrategy(BufferAccessStrategy strategy)
     713              : {
     714     86140502 :     if (!strategy)
     715     83361166 :         return IOCONTEXT_NORMAL;
     716              : 
     717      2779336 :     switch (strategy->btype)
     718              :     {
     719              :         case BAS_NORMAL:
     720              : 
     721              :             /*
     722              :              * Currently, GetAccessStrategy() returns NULL for
     723              :              * BufferAccessStrategyType BAS_NORMAL, so this case is
     724              :              * unreachable.
     725              :              */
     726              :             pg_unreachable();
     727              :             return IOCONTEXT_NORMAL;
     728      1823810 :         case BAS_BULKREAD:
     729      1823810 :             return IOCONTEXT_BULKREAD;
     730       332748 :         case BAS_BULKWRITE:
     731       332748 :             return IOCONTEXT_BULKWRITE;
     732       622778 :         case BAS_VACUUM:
     733       622778 :             return IOCONTEXT_VACUUM;
     734              :     }
     735              : 
     736            0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     737              :     pg_unreachable();
     738              : }
     739              : 
     740              : /*
     741              :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     742              :  *
     743              :  * When a nondefault strategy is used, the buffer manager calls this function
     744              :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     745              :  * be written out and doing so would require flushing WAL too.  This gives us
     746              :  * a chance to choose a different victim.
     747              :  *
     748              :  * Returns true if buffer manager should ask for a new victim, and false
     749              :  * if this buffer should be written and re-used.
     750              :  */
     751              : bool
     752        26704 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     753              : {
     754              :     /* We only do this in bulkread mode */
     755        26704 :     if (strategy->btype != BAS_BULKREAD)
     756         4319 :         return false;
     757              : 
     758              :     /* Don't muck with behavior of normal buffer-replacement strategy */
     759        44770 :     if (!from_ring ||
     760        22385 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     761            0 :         return false;
     762              : 
     763              :     /*
     764              :      * Remove the dirty buffer from the ring; necessary to prevent infinite
     765              :      * loop if all ring members are dirty.
     766              :      */
     767        22385 :     strategy->buffers[strategy->current] = InvalidBuffer;
     768              : 
     769        22385 :     return true;
     770              : }
        

Generated by: LCOV version 2.0-1