LCOV - code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 147 158 93.0 %
Date: 2025-09-10 21:18:40 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freelist.c
       4             :  *    routines for managing the buffer pool's replacement strategy.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/storage/buffer/freelist.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include "pgstat.h"
      19             : #include "port/atomics.h"
      20             : #include "storage/buf_internals.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "storage/proc.h"
      23             : 
      24             : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      25             : 
      26             : 
      27             : /*
      28             :  * The shared freelist control information.
      29             :  */
      30             : typedef struct
      31             : {
      32             :     /* Spinlock: protects the values below */
      33             :     slock_t     buffer_strategy_lock;
      34             : 
      35             :     /*
      36             :      * clock-sweep hand: index of next buffer to consider grabbing. Note that
      37             :      * this isn't a concrete buffer - we only ever increase the value. So, to
      38             :      * get an actual buffer, it needs to be used modulo NBuffers.
      39             :      */
      40             :     pg_atomic_uint32 nextVictimBuffer;
      41             : 
      42             :     /*
      43             :      * Statistics.  These counters should be wide enough that they can't
      44             :      * overflow during a single bgwriter cycle.
      45             :      */
      46             :     uint32      completePasses; /* Complete cycles of the clock-sweep */
      47             :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
      48             : 
      49             :     /*
      50             :      * Bgworker process to be notified upon activity or -1 if none. See
      51             :      * StrategyNotifyBgWriter.
      52             :      */
      53             :     int         bgwprocno;
      54             : } BufferStrategyControl;
      55             : 
      56             : /* Pointers to shared state */
      57             : static BufferStrategyControl *StrategyControl = NULL;
      58             : 
      59             : /*
      60             :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      61             :  * This is currently the only kind of BufferAccessStrategy object, but someday
      62             :  * we might have more kinds.
      63             :  */
      64             : typedef struct BufferAccessStrategyData
      65             : {
      66             :     /* Overall strategy type */
      67             :     BufferAccessStrategyType btype;
      68             :     /* Number of elements in buffers[] array */
      69             :     int         nbuffers;
      70             : 
      71             :     /*
      72             :      * Index of the "current" slot in the ring, ie, the one most recently
      73             :      * returned by GetBufferFromRing.
      74             :      */
      75             :     int         current;
      76             : 
      77             :     /*
      78             :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      79             :      * have not yet selected a buffer for this ring slot.  For allocation
      80             :      * simplicity this is palloc'd together with the fixed fields of the
      81             :      * struct.
      82             :      */
      83             :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
      84             : }           BufferAccessStrategyData;
      85             : 
      86             : 
      87             : /* Prototypes for internal functions */
      88             : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      89             :                                      uint32 *buf_state);
      90             : static void AddBufferToRing(BufferAccessStrategy strategy,
      91             :                             BufferDesc *buf);
      92             : 
      93             : /*
      94             :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
      95             :  *
      96             :  * Move the clock hand one buffer ahead of its current position and return the
      97             :  * id of the buffer now under the hand.
      98             :  */
      99             : static inline uint32
     100     9475212 : ClockSweepTick(void)
     101             : {
     102             :     uint32      victim;
     103             : 
     104             :     /*
     105             :      * Atomically move hand ahead one buffer - if there's several processes
     106             :      * doing this, this can lead to buffers being returned slightly out of
     107             :      * apparent order.
     108             :      */
     109             :     victim =
     110     9475212 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     111             : 
     112     9475212 :     if (victim >= NBuffers)
     113             :     {
     114       65134 :         uint32      originalVictim = victim;
     115             : 
     116             :         /* always wrap what we look up in BufferDescriptors */
     117       65134 :         victim = victim % NBuffers;
     118             : 
     119             :         /*
     120             :          * If we're the one that just caused a wraparound, force
     121             :          * completePasses to be incremented while holding the spinlock. We
     122             :          * need the spinlock so StrategySyncStart() can return a consistent
     123             :          * value consisting of nextVictimBuffer and completePasses.
     124             :          */
     125       65134 :         if (victim == 0)
     126             :         {
     127             :             uint32      expected;
     128             :             uint32      wrapped;
     129       64746 :             bool        success = false;
     130             : 
     131       64746 :             expected = originalVictim + 1;
     132             : 
     133      129784 :             while (!success)
     134             :             {
     135             :                 /*
     136             :                  * Acquire the spinlock while increasing completePasses. That
     137             :                  * allows other readers to read nextVictimBuffer and
     138             :                  * completePasses in a consistent manner which is required for
     139             :                  * StrategySyncStart().  In theory delaying the increment
     140             :                  * could lead to an overflow of nextVictimBuffers, but that's
     141             :                  * highly unlikely and wouldn't be particularly harmful.
     142             :                  */
     143       65038 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     144             : 
     145       65038 :                 wrapped = expected % NBuffers;
     146             : 
     147       65038 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     148             :                                                          &expected, wrapped);
     149       65038 :                 if (success)
     150       64746 :                     StrategyControl->completePasses++;
     151       65038 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     152             :             }
     153             :         }
     154             :     }
     155     9475212 :     return victim;
     156             : }
     157             : 
     158             : /*
     159             :  * StrategyGetBuffer
     160             :  *
     161             :  *  Called by the bufmgr to get the next candidate buffer to use in
     162             :  *  BufferAlloc(). The only hard requirement BufferAlloc() has is that
     163             :  *  the selected buffer must not currently be pinned by anyone.
     164             :  *
     165             :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
     166             :  *
     167             :  *  To ensure that no one else can pin the buffer before we do, we must
     168             :  *  return the buffer with the buffer header spinlock still held.
     169             :  */
     170             : BufferDesc *
     171     3810990 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
     172             : {
     173             :     BufferDesc *buf;
     174             :     int         bgwprocno;
     175             :     int         trycounter;
     176             :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     177             : 
     178     3810990 :     *from_ring = false;
     179             : 
     180             :     /*
     181             :      * If given a strategy object, see whether it can select a buffer. We
     182             :      * assume strategy objects don't need buffer_strategy_lock.
     183             :      */
     184     3810990 :     if (strategy != NULL)
     185             :     {
     186     1590048 :         buf = GetBufferFromRing(strategy, buf_state);
     187     1590048 :         if (buf != NULL)
     188             :         {
     189      606706 :             *from_ring = true;
     190      606706 :             return buf;
     191             :         }
     192             :     }
     193             : 
     194             :     /*
     195             :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
     196             :      * a spinlock for this we force a read from shared memory once, and then
     197             :      * set the latch based on that value. We need to go through that length
     198             :      * because otherwise bgwprocno might be reset while/after we check because
     199             :      * the compiler might just reread from memory.
     200             :      *
     201             :      * This can possibly set the latch of the wrong process if the bgwriter
     202             :      * dies in the wrong moment. But since PGPROC->procLatch is never
     203             :      * deallocated the worst consequence of that is that we set the latch of
     204             :      * some arbitrary process.
     205             :      */
     206     3204284 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     207     3204284 :     if (bgwprocno != -1)
     208             :     {
     209             :         /* reset bgwprocno first, before setting the latch */
     210        1214 :         StrategyControl->bgwprocno = -1;
     211             : 
     212             :         /*
     213             :          * Not acquiring ProcArrayLock here which is slightly icky. It's
     214             :          * actually fine because procLatch isn't ever freed, so we just can
     215             :          * potentially set the wrong process' (or no process') latch.
     216             :          */
     217        1214 :         SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
     218             :     }
     219             : 
     220             :     /*
     221             :      * We count buffer allocation requests so that the bgwriter can estimate
     222             :      * the rate of buffer consumption.  Note that buffers recycled by a
     223             :      * strategy object are intentionally not counted here.
     224             :      */
     225     3204284 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     226             : 
     227             :     /* Use the "clock sweep" algorithm to find a free buffer */
     228     3204284 :     trycounter = NBuffers;
     229             :     for (;;)
     230             :     {
     231     9475212 :         buf = GetBufferDescriptor(ClockSweepTick());
     232             : 
     233             :         /*
     234             :          * If the buffer is pinned or has a nonzero usage_count, we cannot use
     235             :          * it; decrement the usage_count (unless pinned) and keep scanning.
     236             :          */
     237     9475212 :         local_buf_state = LockBufHdr(buf);
     238             : 
     239     9475212 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
     240             :         {
     241     9295452 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     242             :             {
     243     6091168 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
     244             : 
     245     6091168 :                 trycounter = NBuffers;
     246             :             }
     247             :             else
     248             :             {
     249             :                 /* Found a usable buffer */
     250     3204284 :                 if (strategy != NULL)
     251      983342 :                     AddBufferToRing(strategy, buf);
     252     3204284 :                 *buf_state = local_buf_state;
     253     3204284 :                 return buf;
     254             :             }
     255             :         }
     256      179760 :         else if (--trycounter == 0)
     257             :         {
     258             :             /*
     259             :              * We've scanned all the buffers without making any state changes,
     260             :              * so all the buffers are pinned (or were when we looked at them).
     261             :              * We could hope that someone will free one eventually, but it's
     262             :              * probably better to fail than to risk getting stuck in an
     263             :              * infinite loop.
     264             :              */
     265           0 :             UnlockBufHdr(buf, local_buf_state);
     266           0 :             elog(ERROR, "no unpinned buffers available");
     267             :         }
     268     6270928 :         UnlockBufHdr(buf, local_buf_state);
     269             :     }
     270             : }
     271             : 
     272             : /*
     273             :  * StrategySyncStart -- tell BgBufferSync where to start syncing
     274             :  *
     275             :  * The result is the buffer index of the best buffer to sync first.
     276             :  * BgBufferSync() will proceed circularly around the buffer array from there.
     277             :  *
     278             :  * In addition, we return the completed-pass count (which is effectively
     279             :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     280             :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     281             :  * being read.
     282             :  */
     283             : int
     284       25394 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     285             : {
     286             :     uint32      nextVictimBuffer;
     287             :     int         result;
     288             : 
     289       25394 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     290       25394 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     291       25394 :     result = nextVictimBuffer % NBuffers;
     292             : 
     293       25394 :     if (complete_passes)
     294             :     {
     295       25394 :         *complete_passes = StrategyControl->completePasses;
     296             : 
     297             :         /*
     298             :          * Additionally add the number of wraparounds that happened before
     299             :          * completePasses could be incremented. C.f. ClockSweepTick().
     300             :          */
     301       25394 :         *complete_passes += nextVictimBuffer / NBuffers;
     302             :     }
     303             : 
     304       25394 :     if (num_buf_alloc)
     305             :     {
     306       25394 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     307             :     }
     308       25394 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     309       25394 :     return result;
     310             : }
     311             : 
     312             : /*
     313             :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     314             :  *
     315             :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     316             :  * set that latch.  Pass -1 to clear the pending notification before it
     317             :  * happens.  This feature is used by the bgwriter process to wake itself up
     318             :  * from hibernation, and is not meant for anybody else to use.
     319             :  */
     320             : void
     321        2472 : StrategyNotifyBgWriter(int bgwprocno)
     322             : {
     323             :     /*
     324             :      * We acquire buffer_strategy_lock just to ensure that the store appears
     325             :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     326             :      * infrequently, so there's no performance penalty from being safe.
     327             :      */
     328        2472 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     329        2472 :     StrategyControl->bgwprocno = bgwprocno;
     330        2472 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     331        2472 : }
     332             : 
     333             : 
     334             : /*
     335             :  * StrategyShmemSize
     336             :  *
     337             :  * estimate the size of shared memory used by the freelist-related structures.
     338             :  *
     339             :  * Note: for somewhat historical reasons, the buffer lookup hashtable size
     340             :  * is also determined here.
     341             :  */
     342             : Size
     343        4048 : StrategyShmemSize(void)
     344             : {
     345        4048 :     Size        size = 0;
     346             : 
     347             :     /* size of lookup hash table ... see comment in StrategyInitialize */
     348        4048 :     size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
     349             : 
     350             :     /* size of the shared replacement strategy control block */
     351        4048 :     size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
     352             : 
     353        4048 :     return size;
     354             : }
     355             : 
     356             : /*
     357             :  * StrategyInitialize -- initialize the buffer cache replacement
     358             :  *      strategy.
     359             :  *
     360             :  * Assumes: All of the buffers are already built into a linked list.
     361             :  *      Only called by postmaster and only during initialization.
     362             :  */
     363             : void
     364        2174 : StrategyInitialize(bool init)
     365             : {
     366             :     bool        found;
     367             : 
     368             :     /*
     369             :      * Initialize the shared buffer lookup hashtable.
     370             :      *
     371             :      * Since we can't tolerate running out of lookup table entries, we must be
     372             :      * sure to specify an adequate table size here.  The maximum steady-state
     373             :      * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     374             :      * a new entry before deleting the old.  In principle this could be
     375             :      * happening in each partition concurrently, so we could need as many as
     376             :      * NBuffers + NUM_BUFFER_PARTITIONS entries.
     377             :      */
     378        2174 :     InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
     379             : 
     380             :     /*
     381             :      * Get or create the shared strategy control block
     382             :      */
     383        2174 :     StrategyControl = (BufferStrategyControl *)
     384        2174 :         ShmemInitStruct("Buffer Strategy Status",
     385             :                         sizeof(BufferStrategyControl),
     386             :                         &found);
     387             : 
     388        2174 :     if (!found)
     389             :     {
     390             :         /*
     391             :          * Only done once, usually in postmaster
     392             :          */
     393             :         Assert(init);
     394             : 
     395        2174 :         SpinLockInit(&StrategyControl->buffer_strategy_lock);
     396             : 
     397             :         /* Initialize the clock-sweep pointer */
     398        2174 :         pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     399             : 
     400             :         /* Clear statistics */
     401        2174 :         StrategyControl->completePasses = 0;
     402        2174 :         pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     403             : 
     404             :         /* No pending notification */
     405        2174 :         StrategyControl->bgwprocno = -1;
     406             :     }
     407             :     else
     408             :         Assert(!init);
     409        2174 : }
     410             : 
     411             : 
     412             : /* ----------------------------------------------------------------
     413             :  *              Backend-private buffer ring management
     414             :  * ----------------------------------------------------------------
     415             :  */
     416             : 
     417             : 
     418             : /*
     419             :  * GetAccessStrategy -- create a BufferAccessStrategy object
     420             :  *
     421             :  * The object is allocated in the current memory context.
     422             :  */
     423             : BufferAccessStrategy
     424      283996 : GetAccessStrategy(BufferAccessStrategyType btype)
     425             : {
     426             :     int         ring_size_kb;
     427             : 
     428             :     /*
     429             :      * Select ring size to use.  See buffer/README for rationales.
     430             :      *
     431             :      * Note: if you change the ring size for BAS_BULKREAD, see also
     432             :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     433             :      */
     434      283996 :     switch (btype)
     435             :     {
     436           0 :         case BAS_NORMAL:
     437             :             /* if someone asks for NORMAL, just give 'em a "default" object */
     438           0 :             return NULL;
     439             : 
     440      157772 :         case BAS_BULKREAD:
     441             :             {
     442             :                 int         ring_max_kb;
     443             : 
     444             :                 /*
     445             :                  * The ring always needs to be large enough to allow some
     446             :                  * separation in time between providing a buffer to the user
     447             :                  * of the strategy and that buffer being reused. Otherwise the
     448             :                  * user's pin will prevent reuse of the buffer, even without
     449             :                  * concurrent activity.
     450             :                  *
     451             :                  * We also need to ensure the ring always is large enough for
     452             :                  * SYNC_SCAN_REPORT_INTERVAL, as noted above.
     453             :                  *
     454             :                  * Thus we start out a minimal size and increase the size
     455             :                  * further if appropriate.
     456             :                  */
     457      157772 :                 ring_size_kb = 256;
     458             : 
     459             :                 /*
     460             :                  * There's no point in a larger ring if we won't be allowed to
     461             :                  * pin sufficiently many buffers.  But we never limit to less
     462             :                  * than the minimal size above.
     463             :                  */
     464      157772 :                 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
     465      157772 :                 ring_max_kb = Max(ring_size_kb, ring_max_kb);
     466             : 
     467             :                 /*
     468             :                  * We would like the ring to additionally have space for the
     469             :                  * configured degree of IO concurrency. While being read in,
     470             :                  * buffers can obviously not yet be reused.
     471             :                  *
     472             :                  * Each IO can be up to io_combine_limit blocks large, and we
     473             :                  * want to start up to effective_io_concurrency IOs.
     474             :                  *
     475             :                  * Note that effective_io_concurrency may be 0, which disables
     476             :                  * AIO.
     477             :                  */
     478      157772 :                 ring_size_kb += (BLCKSZ / 1024) *
     479      157772 :                     io_combine_limit * effective_io_concurrency;
     480             : 
     481      157772 :                 if (ring_size_kb > ring_max_kb)
     482      157772 :                     ring_size_kb = ring_max_kb;
     483      157772 :                 break;
     484             :             }
     485      126224 :         case BAS_BULKWRITE:
     486      126224 :             ring_size_kb = 16 * 1024;
     487      126224 :             break;
     488           0 :         case BAS_VACUUM:
     489           0 :             ring_size_kb = 2048;
     490           0 :             break;
     491             : 
     492           0 :         default:
     493           0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
     494             :                  (int) btype);
     495             :             return NULL;        /* keep compiler quiet */
     496             :     }
     497             : 
     498      283996 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
     499             : }
     500             : 
     501             : /*
     502             :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     503             :  *      number of buffers equivalent to the passed in size.
     504             :  *
     505             :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     506             :  * the function will return NULL.  ring_size_kb must not be negative.
     507             :  */
     508             : BufferAccessStrategy
     509      300804 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     510             : {
     511             :     int         ring_buffers;
     512             :     BufferAccessStrategy strategy;
     513             : 
     514             :     Assert(ring_size_kb >= 0);
     515             : 
     516             :     /* Figure out how many buffers ring_size_kb is */
     517      300804 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     518             : 
     519             :     /* 0 means unlimited, so no BufferAccessStrategy required */
     520      300804 :     if (ring_buffers == 0)
     521          12 :         return NULL;
     522             : 
     523             :     /* Cap to 1/8th of shared_buffers */
     524      300792 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
     525             : 
     526             :     /* NBuffers should never be less than 16, so this shouldn't happen */
     527             :     Assert(ring_buffers > 0);
     528             : 
     529             :     /* Allocate the object and initialize all elements to zeroes */
     530             :     strategy = (BufferAccessStrategy)
     531      300792 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
     532             :                 ring_buffers * sizeof(Buffer));
     533             : 
     534             :     /* Set fields that don't start out zero */
     535      300792 :     strategy->btype = btype;
     536      300792 :     strategy->nbuffers = ring_buffers;
     537             : 
     538      300792 :     return strategy;
     539             : }
     540             : 
     541             : /*
     542             :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     543             :  *      the ring
     544             :  *
     545             :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     546             :  * returning NULL with 0 size.
     547             :  */
     548             : int
     549          34 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     550             : {
     551          34 :     if (strategy == NULL)
     552           0 :         return 0;
     553             : 
     554          34 :     return strategy->nbuffers;
     555             : }
     556             : 
     557             : /*
     558             :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
     559             :  *
     560             :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
     561             :  * in danger of pinning too much of the ring at once while performing look-ahead.
     562             :  * For some strategies, that means "escaping" from the ring, and in others it
     563             :  * means forcing dirty data to disk very frequently with associated WAL
     564             :  * flushing.  Since external code has no insight into any of that, allow
     565             :  * individual strategy types to expose a clamp that should be applied when
     566             :  * deciding on a maximum number of buffers to pin at once.
     567             :  *
     568             :  * Callers should combine this number with other relevant limits and take the
     569             :  * minimum.
     570             :  */
     571             : int
     572     1100148 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
     573             : {
     574     1100148 :     if (strategy == NULL)
     575      794992 :         return NBuffers;
     576             : 
     577      305156 :     switch (strategy->btype)
     578             :     {
     579      148394 :         case BAS_BULKREAD:
     580             : 
     581             :             /*
     582             :              * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
     583             :              * shouldn't be a problem and the caller is free to pin up to the
     584             :              * entire ring at once.
     585             :              */
     586      148394 :             return strategy->nbuffers;
     587             : 
     588      156762 :         default:
     589             : 
     590             :             /*
     591             :              * Tell caller not to pin more than half the buffers in the ring.
     592             :              * This is a trade-off between look ahead distance and deferring
     593             :              * writeback and associated WAL traffic.
     594             :              */
     595      156762 :             return strategy->nbuffers / 2;
     596             :     }
     597             : }
     598             : 
     599             : /*
     600             :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     601             :  *
     602             :  * A simple pfree would do at the moment, but we would prefer that callers
     603             :  * don't assume that much about the representation of BufferAccessStrategy.
     604             :  */
     605             : void
     606      272156 : FreeAccessStrategy(BufferAccessStrategy strategy)
     607             : {
     608             :     /* don't crash if called on a "default" strategy */
     609      272156 :     if (strategy != NULL)
     610      272156 :         pfree(strategy);
     611      272156 : }
     612             : 
     613             : /*
     614             :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     615             :  *      ring is empty / not usable.
     616             :  *
     617             :  * The bufhdr spin lock is held on the returned buffer.
     618             :  */
     619             : static BufferDesc *
     620     1590048 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
     621             : {
     622             :     BufferDesc *buf;
     623             :     Buffer      bufnum;
     624             :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     625             : 
     626             : 
     627             :     /* Advance to next ring slot */
     628     1590048 :     if (++strategy->current >= strategy->nbuffers)
     629       46224 :         strategy->current = 0;
     630             : 
     631             :     /*
     632             :      * If the slot hasn't been filled yet, tell the caller to allocate a new
     633             :      * buffer with the normal allocation strategy.  He will then fill this
     634             :      * slot by calling AddBufferToRing with the new buffer.
     635             :      */
     636     1590048 :     bufnum = strategy->buffers[strategy->current];
     637     1590048 :     if (bufnum == InvalidBuffer)
     638      963884 :         return NULL;
     639             : 
     640             :     /*
     641             :      * If the buffer is pinned we cannot use it under any circumstances.
     642             :      *
     643             :      * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     644             :      * since our own previous usage of the ring element would have left it
     645             :      * there, but it might've been decremented by clock-sweep since then). A
     646             :      * higher usage_count indicates someone else has touched the buffer, so we
     647             :      * shouldn't re-use it.
     648             :      */
     649      626164 :     buf = GetBufferDescriptor(bufnum - 1);
     650      626164 :     local_buf_state = LockBufHdr(buf);
     651      626164 :     if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
     652      616662 :         && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
     653             :     {
     654      606706 :         *buf_state = local_buf_state;
     655      606706 :         return buf;
     656             :     }
     657       19458 :     UnlockBufHdr(buf, local_buf_state);
     658             : 
     659             :     /*
     660             :      * Tell caller to allocate a new buffer with the normal allocation
     661             :      * strategy.  He'll then replace this ring element via AddBufferToRing.
     662             :      */
     663       19458 :     return NULL;
     664             : }
     665             : 
     666             : /*
     667             :  * AddBufferToRing -- add a buffer to the buffer ring
     668             :  *
     669             :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     670             :  * is called with the spinlock held, it had better be quite cheap.
     671             :  */
     672             : static void
     673      983342 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     674             : {
     675      983342 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     676      983342 : }
     677             : 
     678             : /*
     679             :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     680             :  * strategy ring.
     681             :  */
     682             : IOContext
     683   124442794 : IOContextForStrategy(BufferAccessStrategy strategy)
     684             : {
     685   124442794 :     if (!strategy)
     686   119766906 :         return IOCONTEXT_NORMAL;
     687             : 
     688     4675888 :     switch (strategy->btype)
     689             :     {
     690             :         case BAS_NORMAL:
     691             : 
     692             :             /*
     693             :              * Currently, GetAccessStrategy() returns NULL for
     694             :              * BufferAccessStrategyType BAS_NORMAL, so this case is
     695             :              * unreachable.
     696             :              */
     697             :             pg_unreachable();
     698             :             return IOCONTEXT_NORMAL;
     699     3100106 :         case BAS_BULKREAD:
     700     3100106 :             return IOCONTEXT_BULKREAD;
     701      556752 :         case BAS_BULKWRITE:
     702      556752 :             return IOCONTEXT_BULKWRITE;
     703     1019030 :         case BAS_VACUUM:
     704     1019030 :             return IOCONTEXT_VACUUM;
     705             :     }
     706             : 
     707           0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     708             :     pg_unreachable();
     709             : }
     710             : 
     711             : /*
     712             :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     713             :  *
     714             :  * When a nondefault strategy is used, the buffer manager calls this function
     715             :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     716             :  * be written out and doing so would require flushing WAL too.  This gives us
     717             :  * a chance to choose a different victim.
     718             :  *
     719             :  * Returns true if buffer manager should ask for a new victim, and false
     720             :  * if this buffer should be written and re-used.
     721             :  */
     722             : bool
     723       17692 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     724             : {
     725             :     /* We only do this in bulkread mode */
     726       17692 :     if (strategy->btype != BAS_BULKREAD)
     727        4976 :         return false;
     728             : 
     729             :     /* Don't muck with behavior of normal buffer-replacement strategy */
     730       24074 :     if (!from_ring ||
     731       11358 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     732        1358 :         return false;
     733             : 
     734             :     /*
     735             :      * Remove the dirty buffer from the ring; necessary to prevent infinite
     736             :      * loop if all ring members are dirty.
     737             :      */
     738       11358 :     strategy->buffers[strategy->current] = InvalidBuffer;
     739             : 
     740       11358 :     return true;
     741             : }

Generated by: LCOV version 1.16