LCOV - code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 161 173 93.1 %
Date: 2025-10-10 10:17:52 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freelist.c
       4             :  *    routines for managing the buffer pool's replacement strategy.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/storage/buffer/freelist.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include "pgstat.h"
      19             : #include "port/atomics.h"
      20             : #include "storage/buf_internals.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "storage/proc.h"
      23             : 
      24             : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      25             : 
      26             : 
      27             : /*
      28             :  * The shared freelist control information.
      29             :  */
      30             : typedef struct
      31             : {
      32             :     /* Spinlock: protects the values below */
      33             :     slock_t     buffer_strategy_lock;
      34             : 
      35             :     /*
      36             :      * clock-sweep hand: index of next buffer to consider grabbing. Note that
      37             :      * this isn't a concrete buffer - we only ever increase the value. So, to
      38             :      * get an actual buffer, it needs to be used modulo NBuffers.
      39             :      */
      40             :     pg_atomic_uint32 nextVictimBuffer;
      41             : 
      42             :     /*
      43             :      * Statistics.  These counters should be wide enough that they can't
      44             :      * overflow during a single bgwriter cycle.
      45             :      */
      46             :     uint32      completePasses; /* Complete cycles of the clock-sweep */
      47             :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
      48             : 
      49             :     /*
      50             :      * Bgworker process to be notified upon activity or -1 if none. See
      51             :      * StrategyNotifyBgWriter.
      52             :      */
      53             :     int         bgwprocno;
      54             : } BufferStrategyControl;
      55             : 
      56             : /* Pointers to shared state */
      57             : static BufferStrategyControl *StrategyControl = NULL;
      58             : 
      59             : /*
      60             :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      61             :  * This is currently the only kind of BufferAccessStrategy object, but someday
      62             :  * we might have more kinds.
      63             :  */
      64             : typedef struct BufferAccessStrategyData
      65             : {
      66             :     /* Overall strategy type */
      67             :     BufferAccessStrategyType btype;
      68             :     /* Number of elements in buffers[] array */
      69             :     int         nbuffers;
      70             : 
      71             :     /*
      72             :      * Index of the "current" slot in the ring, ie, the one most recently
      73             :      * returned by GetBufferFromRing.
      74             :      */
      75             :     int         current;
      76             : 
      77             :     /*
      78             :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      79             :      * have not yet selected a buffer for this ring slot.  For allocation
      80             :      * simplicity this is palloc'd together with the fixed fields of the
      81             :      * struct.
      82             :      */
      83             :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
      84             : }           BufferAccessStrategyData;
      85             : 
      86             : 
      87             : /* Prototypes for internal functions */
      88             : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      89             :                                      uint32 *buf_state);
      90             : static void AddBufferToRing(BufferAccessStrategy strategy,
      91             :                             BufferDesc *buf);
      92             : 
      93             : /*
      94             :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
      95             :  *
      96             :  * Move the clock hand one buffer ahead of its current position and return the
      97             :  * id of the buffer now under the hand.
      98             :  */
      99             : static inline uint32
     100     9375902 : ClockSweepTick(void)
     101             : {
     102             :     uint32      victim;
     103             : 
     104             :     /*
     105             :      * Atomically move hand ahead one buffer - if there's several processes
     106             :      * doing this, this can lead to buffers being returned slightly out of
     107             :      * apparent order.
     108             :      */
     109             :     victim =
     110     9375902 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     111             : 
     112     9375902 :     if (victim >= NBuffers)
     113             :     {
     114       64002 :         uint32      originalVictim = victim;
     115             : 
     116             :         /* always wrap what we look up in BufferDescriptors */
     117       64002 :         victim = victim % NBuffers;
     118             : 
     119             :         /*
     120             :          * If we're the one that just caused a wraparound, force
     121             :          * completePasses to be incremented while holding the spinlock. We
     122             :          * need the spinlock so StrategySyncStart() can return a consistent
     123             :          * value consisting of nextVictimBuffer and completePasses.
     124             :          */
     125       64002 :         if (victim == 0)
     126             :         {
     127             :             uint32      expected;
     128             :             uint32      wrapped;
     129       63740 :             bool        success = false;
     130             : 
     131       63740 :             expected = originalVictim + 1;
     132             : 
     133      127718 :             while (!success)
     134             :             {
     135             :                 /*
     136             :                  * Acquire the spinlock while increasing completePasses. That
     137             :                  * allows other readers to read nextVictimBuffer and
     138             :                  * completePasses in a consistent manner which is required for
     139             :                  * StrategySyncStart().  In theory delaying the increment
     140             :                  * could lead to an overflow of nextVictimBuffers, but that's
     141             :                  * highly unlikely and wouldn't be particularly harmful.
     142             :                  */
     143       63978 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     144             : 
     145       63978 :                 wrapped = expected % NBuffers;
     146             : 
     147       63978 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     148             :                                                          &expected, wrapped);
     149       63978 :                 if (success)
     150       63740 :                     StrategyControl->completePasses++;
     151       63978 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     152             :             }
     153             :         }
     154             :     }
     155     9375902 :     return victim;
     156             : }
     157             : 
     158             : /*
     159             :  * StrategyGetBuffer
     160             :  *
     161             :  *  Called by the bufmgr to get the next candidate buffer to use in
     162             :  *  GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
     163             :  *  the selected buffer must not currently be pinned by anyone.
     164             :  *
     165             :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
     166             :  *
     167             :  *  It is the callers responsibility to ensure the buffer ownership can be
     168             :  *  tracked via TrackNewBufferPin().
     169             :  *
     170             :  *  The buffer is pinned and marked as owned, using TrackNewBufferPin(),
     171             :  *  before returning.
     172             :  */
     173             : BufferDesc *
     174     3793590 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
     175             : {
     176             :     BufferDesc *buf;
     177             :     int         bgwprocno;
     178             :     int         trycounter;
     179             : 
     180     3793590 :     *from_ring = false;
     181             : 
     182             :     /*
     183             :      * If given a strategy object, see whether it can select a buffer. We
     184             :      * assume strategy objects don't need buffer_strategy_lock.
     185             :      */
     186     3793590 :     if (strategy != NULL)
     187             :     {
     188     1593994 :         buf = GetBufferFromRing(strategy, buf_state);
     189     1593994 :         if (buf != NULL)
     190             :         {
     191      605564 :             *from_ring = true;
     192      605564 :             return buf;
     193             :         }
     194             :     }
     195             : 
     196             :     /*
     197             :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
     198             :      * a spinlock for this we force a read from shared memory once, and then
     199             :      * set the latch based on that value. We need to go through that length
     200             :      * because otherwise bgwprocno might be reset while/after we check because
     201             :      * the compiler might just reread from memory.
     202             :      *
     203             :      * This can possibly set the latch of the wrong process if the bgwriter
     204             :      * dies in the wrong moment. But since PGPROC->procLatch is never
     205             :      * deallocated the worst consequence of that is that we set the latch of
     206             :      * some arbitrary process.
     207             :      */
     208     3188026 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     209     3188026 :     if (bgwprocno != -1)
     210             :     {
     211             :         /* reset bgwprocno first, before setting the latch */
     212         838 :         StrategyControl->bgwprocno = -1;
     213             : 
     214             :         /*
     215             :          * Not acquiring ProcArrayLock here which is slightly icky. It's
     216             :          * actually fine because procLatch isn't ever freed, so we just can
     217             :          * potentially set the wrong process' (or no process') latch.
     218             :          */
     219         838 :         SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
     220             :     }
     221             : 
     222             :     /*
     223             :      * We count buffer allocation requests so that the bgwriter can estimate
     224             :      * the rate of buffer consumption.  Note that buffers recycled by a
     225             :      * strategy object are intentionally not counted here.
     226             :      */
     227     3188026 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     228             : 
     229             :     /* Use the "clock sweep" algorithm to find a free buffer */
     230     3188026 :     trycounter = NBuffers;
     231             :     for (;;)
     232     6187876 :     {
     233             :         uint32      old_buf_state;
     234             :         uint32      local_buf_state;
     235             : 
     236     9375902 :         buf = GetBufferDescriptor(ClockSweepTick());
     237             : 
     238             :         /*
     239             :          * Check whether the buffer can be used and pin it if so. Do this
     240             :          * using a CAS loop, to avoid having to lock the buffer header.
     241             :          */
     242     9375902 :         old_buf_state = pg_atomic_read_u32(&buf->state);
     243             :         for (;;)
     244             :         {
     245     9376096 :             local_buf_state = old_buf_state;
     246             : 
     247             :             /*
     248             :              * If the buffer is pinned or has a nonzero usage_count, we cannot
     249             :              * use it; decrement the usage_count (unless pinned) and keep
     250             :              * scanning.
     251             :              */
     252             : 
     253     9376096 :             if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
     254             :             {
     255      165556 :                 if (--trycounter == 0)
     256             :                 {
     257             :                     /*
     258             :                      * We've scanned all the buffers without making any state
     259             :                      * changes, so all the buffers are pinned (or were when we
     260             :                      * looked at them). We could hope that someone will free
     261             :                      * one eventually, but it's probably better to fail than
     262             :                      * to risk getting stuck in an infinite loop.
     263             :                      */
     264           0 :                     elog(ERROR, "no unpinned buffers available");
     265             :                 }
     266      165556 :                 break;
     267             :             }
     268             : 
     269     9210540 :             if (unlikely(local_buf_state & BM_LOCKED))
     270             :             {
     271          12 :                 old_buf_state = WaitBufHdrUnlocked(buf);
     272          12 :                 continue;
     273             :             }
     274             : 
     275     9210528 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     276             :             {
     277     6022492 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
     278             : 
     279     6022492 :                 if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
     280             :                                                    local_buf_state))
     281             :                 {
     282     6022320 :                     trycounter = NBuffers;
     283     6022320 :                     break;
     284             :                 }
     285             :             }
     286             :             else
     287             :             {
     288             :                 /* pin the buffer if the CAS succeeds */
     289     3188036 :                 local_buf_state += BUF_REFCOUNT_ONE;
     290             : 
     291     3188036 :                 if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
     292             :                                                    local_buf_state))
     293             :                 {
     294             :                     /* Found a usable buffer */
     295     3188026 :                     if (strategy != NULL)
     296      988430 :                         AddBufferToRing(strategy, buf);
     297     3188026 :                     *buf_state = local_buf_state;
     298             : 
     299     3188026 :                     TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     300             : 
     301     3188026 :                     return buf;
     302             :                 }
     303             :             }
     304             : 
     305             :         }
     306             :     }
     307             : }
     308             : 
     309             : /*
     310             :  * StrategySyncStart -- tell BgBufferSync where to start syncing
     311             :  *
     312             :  * The result is the buffer index of the best buffer to sync first.
     313             :  * BgBufferSync() will proceed circularly around the buffer array from there.
     314             :  *
     315             :  * In addition, we return the completed-pass count (which is effectively
     316             :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     317             :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     318             :  * being read.
     319             :  */
     320             : int
     321       21972 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     322             : {
     323             :     uint32      nextVictimBuffer;
     324             :     int         result;
     325             : 
     326       21972 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     327       21972 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     328       21972 :     result = nextVictimBuffer % NBuffers;
     329             : 
     330       21972 :     if (complete_passes)
     331             :     {
     332       21972 :         *complete_passes = StrategyControl->completePasses;
     333             : 
     334             :         /*
     335             :          * Additionally add the number of wraparounds that happened before
     336             :          * completePasses could be incremented. C.f. ClockSweepTick().
     337             :          */
     338       21972 :         *complete_passes += nextVictimBuffer / NBuffers;
     339             :     }
     340             : 
     341       21972 :     if (num_buf_alloc)
     342             :     {
     343       21972 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     344             :     }
     345       21972 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     346       21972 :     return result;
     347             : }
     348             : 
     349             : /*
     350             :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     351             :  *
     352             :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     353             :  * set that latch.  Pass -1 to clear the pending notification before it
     354             :  * happens.  This feature is used by the bgwriter process to wake itself up
     355             :  * from hibernation, and is not meant for anybody else to use.
     356             :  */
     357             : void
     358        1696 : StrategyNotifyBgWriter(int bgwprocno)
     359             : {
     360             :     /*
     361             :      * We acquire buffer_strategy_lock just to ensure that the store appears
     362             :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     363             :      * infrequently, so there's no performance penalty from being safe.
     364             :      */
     365        1696 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     366        1696 :     StrategyControl->bgwprocno = bgwprocno;
     367        1696 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     368        1696 : }
     369             : 
     370             : 
     371             : /*
     372             :  * StrategyShmemSize
     373             :  *
     374             :  * estimate the size of shared memory used by the freelist-related structures.
     375             :  *
     376             :  * Note: for somewhat historical reasons, the buffer lookup hashtable size
     377             :  * is also determined here.
     378             :  */
     379             : Size
     380        4060 : StrategyShmemSize(void)
     381             : {
     382        4060 :     Size        size = 0;
     383             : 
     384             :     /* size of lookup hash table ... see comment in StrategyInitialize */
     385        4060 :     size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
     386             : 
     387             :     /* size of the shared replacement strategy control block */
     388        4060 :     size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
     389             : 
     390        4060 :     return size;
     391             : }
     392             : 
     393             : /*
     394             :  * StrategyInitialize -- initialize the buffer cache replacement
     395             :  *      strategy.
     396             :  *
     397             :  * Assumes: All of the buffers are already built into a linked list.
     398             :  *      Only called by postmaster and only during initialization.
     399             :  */
     400             : void
     401        2180 : StrategyInitialize(bool init)
     402             : {
     403             :     bool        found;
     404             : 
     405             :     /*
     406             :      * Initialize the shared buffer lookup hashtable.
     407             :      *
     408             :      * Since we can't tolerate running out of lookup table entries, we must be
     409             :      * sure to specify an adequate table size here.  The maximum steady-state
     410             :      * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     411             :      * a new entry before deleting the old.  In principle this could be
     412             :      * happening in each partition concurrently, so we could need as many as
     413             :      * NBuffers + NUM_BUFFER_PARTITIONS entries.
     414             :      */
     415        2180 :     InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
     416             : 
     417             :     /*
     418             :      * Get or create the shared strategy control block
     419             :      */
     420        2180 :     StrategyControl = (BufferStrategyControl *)
     421        2180 :         ShmemInitStruct("Buffer Strategy Status",
     422             :                         sizeof(BufferStrategyControl),
     423             :                         &found);
     424             : 
     425        2180 :     if (!found)
     426             :     {
     427             :         /*
     428             :          * Only done once, usually in postmaster
     429             :          */
     430             :         Assert(init);
     431             : 
     432        2180 :         SpinLockInit(&StrategyControl->buffer_strategy_lock);
     433             : 
     434             :         /* Initialize the clock-sweep pointer */
     435        2180 :         pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     436             : 
     437             :         /* Clear statistics */
     438        2180 :         StrategyControl->completePasses = 0;
     439        2180 :         pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     440             : 
     441             :         /* No pending notification */
     442        2180 :         StrategyControl->bgwprocno = -1;
     443             :     }
     444             :     else
     445             :         Assert(!init);
     446        2180 : }
     447             : 
     448             : 
     449             : /* ----------------------------------------------------------------
     450             :  *              Backend-private buffer ring management
     451             :  * ----------------------------------------------------------------
     452             :  */
     453             : 
     454             : 
     455             : /*
     456             :  * GetAccessStrategy -- create a BufferAccessStrategy object
     457             :  *
     458             :  * The object is allocated in the current memory context.
     459             :  */
     460             : BufferAccessStrategy
     461      285852 : GetAccessStrategy(BufferAccessStrategyType btype)
     462             : {
     463             :     int         ring_size_kb;
     464             : 
     465             :     /*
     466             :      * Select ring size to use.  See buffer/README for rationales.
     467             :      *
     468             :      * Note: if you change the ring size for BAS_BULKREAD, see also
     469             :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     470             :      */
     471      285852 :     switch (btype)
     472             :     {
     473           0 :         case BAS_NORMAL:
     474             :             /* if someone asks for NORMAL, just give 'em a "default" object */
     475           0 :             return NULL;
     476             : 
     477      159122 :         case BAS_BULKREAD:
     478             :             {
     479             :                 int         ring_max_kb;
     480             : 
     481             :                 /*
     482             :                  * The ring always needs to be large enough to allow some
     483             :                  * separation in time between providing a buffer to the user
     484             :                  * of the strategy and that buffer being reused. Otherwise the
     485             :                  * user's pin will prevent reuse of the buffer, even without
     486             :                  * concurrent activity.
     487             :                  *
     488             :                  * We also need to ensure the ring always is large enough for
     489             :                  * SYNC_SCAN_REPORT_INTERVAL, as noted above.
     490             :                  *
     491             :                  * Thus we start out a minimal size and increase the size
     492             :                  * further if appropriate.
     493             :                  */
     494      159122 :                 ring_size_kb = 256;
     495             : 
     496             :                 /*
     497             :                  * There's no point in a larger ring if we won't be allowed to
     498             :                  * pin sufficiently many buffers.  But we never limit to less
     499             :                  * than the minimal size above.
     500             :                  */
     501      159122 :                 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
     502      159122 :                 ring_max_kb = Max(ring_size_kb, ring_max_kb);
     503             : 
     504             :                 /*
     505             :                  * We would like the ring to additionally have space for the
     506             :                  * configured degree of IO concurrency. While being read in,
     507             :                  * buffers can obviously not yet be reused.
     508             :                  *
     509             :                  * Each IO can be up to io_combine_limit blocks large, and we
     510             :                  * want to start up to effective_io_concurrency IOs.
     511             :                  *
     512             :                  * Note that effective_io_concurrency may be 0, which disables
     513             :                  * AIO.
     514             :                  */
     515      159122 :                 ring_size_kb += (BLCKSZ / 1024) *
     516      159122 :                     io_combine_limit * effective_io_concurrency;
     517             : 
     518      159122 :                 if (ring_size_kb > ring_max_kb)
     519      159122 :                     ring_size_kb = ring_max_kb;
     520      159122 :                 break;
     521             :             }
     522      126730 :         case BAS_BULKWRITE:
     523      126730 :             ring_size_kb = 16 * 1024;
     524      126730 :             break;
     525           0 :         case BAS_VACUUM:
     526           0 :             ring_size_kb = 2048;
     527           0 :             break;
     528             : 
     529           0 :         default:
     530           0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
     531             :                  (int) btype);
     532             :             return NULL;        /* keep compiler quiet */
     533             :     }
     534             : 
     535      285852 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
     536             : }
     537             : 
     538             : /*
     539             :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     540             :  *      number of buffers equivalent to the passed in size.
     541             :  *
     542             :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     543             :  * the function will return NULL.  ring_size_kb must not be negative.
     544             :  */
     545             : BufferAccessStrategy
     546      303424 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     547             : {
     548             :     int         ring_buffers;
     549             :     BufferAccessStrategy strategy;
     550             : 
     551             :     Assert(ring_size_kb >= 0);
     552             : 
     553             :     /* Figure out how many buffers ring_size_kb is */
     554      303424 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     555             : 
     556             :     /* 0 means unlimited, so no BufferAccessStrategy required */
     557      303424 :     if (ring_buffers == 0)
     558          12 :         return NULL;
     559             : 
     560             :     /* Cap to 1/8th of shared_buffers */
     561      303412 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
     562             : 
     563             :     /* NBuffers should never be less than 16, so this shouldn't happen */
     564             :     Assert(ring_buffers > 0);
     565             : 
     566             :     /* Allocate the object and initialize all elements to zeroes */
     567             :     strategy = (BufferAccessStrategy)
     568      303412 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
     569             :                 ring_buffers * sizeof(Buffer));
     570             : 
     571             :     /* Set fields that don't start out zero */
     572      303412 :     strategy->btype = btype;
     573      303412 :     strategy->nbuffers = ring_buffers;
     574             : 
     575      303412 :     return strategy;
     576             : }
     577             : 
     578             : /*
     579             :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     580             :  *      the ring
     581             :  *
     582             :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     583             :  * returning NULL with 0 size.
     584             :  */
     585             : int
     586          34 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     587             : {
     588          34 :     if (strategy == NULL)
     589           0 :         return 0;
     590             : 
     591          34 :     return strategy->nbuffers;
     592             : }
     593             : 
     594             : /*
     595             :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
     596             :  *
     597             :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
     598             :  * in danger of pinning too much of the ring at once while performing look-ahead.
     599             :  * For some strategies, that means "escaping" from the ring, and in others it
     600             :  * means forcing dirty data to disk very frequently with associated WAL
     601             :  * flushing.  Since external code has no insight into any of that, allow
     602             :  * individual strategy types to expose a clamp that should be applied when
     603             :  * deciding on a maximum number of buffers to pin at once.
     604             :  *
     605             :  * Callers should combine this number with other relevant limits and take the
     606             :  * minimum.
     607             :  */
     608             : int
     609     1152596 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
     610             : {
     611     1152596 :     if (strategy == NULL)
     612      821662 :         return NBuffers;
     613             : 
     614      330934 :     switch (strategy->btype)
     615             :     {
     616      149708 :         case BAS_BULKREAD:
     617             : 
     618             :             /*
     619             :              * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
     620             :              * shouldn't be a problem and the caller is free to pin up to the
     621             :              * entire ring at once.
     622             :              */
     623      149708 :             return strategy->nbuffers;
     624             : 
     625      181226 :         default:
     626             : 
     627             :             /*
     628             :              * Tell caller not to pin more than half the buffers in the ring.
     629             :              * This is a trade-off between look ahead distance and deferring
     630             :              * writeback and associated WAL traffic.
     631             :              */
     632      181226 :             return strategy->nbuffers / 2;
     633             :     }
     634             : }
     635             : 
     636             : /*
     637             :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     638             :  *
     639             :  * A simple pfree would do at the moment, but we would prefer that callers
     640             :  * don't assume that much about the representation of BufferAccessStrategy.
     641             :  */
     642             : void
     643      274064 : FreeAccessStrategy(BufferAccessStrategy strategy)
     644             : {
     645             :     /* don't crash if called on a "default" strategy */
     646      274064 :     if (strategy != NULL)
     647      274064 :         pfree(strategy);
     648      274064 : }
     649             : 
     650             : /*
     651             :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     652             :  *      ring is empty / not usable.
     653             :  *
     654             :  * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
     655             :  * returning.
     656             :  */
     657             : static BufferDesc *
     658     1593994 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
     659             : {
     660             :     BufferDesc *buf;
     661             :     Buffer      bufnum;
     662             :     uint32      old_buf_state;
     663             :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     664             : 
     665             : 
     666             :     /* Advance to next ring slot */
     667     1593994 :     if (++strategy->current >= strategy->nbuffers)
     668       46304 :         strategy->current = 0;
     669             : 
     670             :     /*
     671             :      * If the slot hasn't been filled yet, tell the caller to allocate a new
     672             :      * buffer with the normal allocation strategy.  He will then fill this
     673             :      * slot by calling AddBufferToRing with the new buffer.
     674             :      */
     675     1593994 :     bufnum = strategy->buffers[strategy->current];
     676     1593994 :     if (bufnum == InvalidBuffer)
     677      970162 :         return NULL;
     678             : 
     679      623832 :     buf = GetBufferDescriptor(bufnum - 1);
     680             : 
     681             :     /*
     682             :      * Check whether the buffer can be used and pin it if so. Do this using a
     683             :      * CAS loop, to avoid having to lock the buffer header.
     684             :      */
     685      623832 :     old_buf_state = pg_atomic_read_u32(&buf->state);
     686             :     for (;;)
     687             :     {
     688      623842 :         local_buf_state = old_buf_state;
     689             : 
     690             :         /*
     691             :          * If the buffer is pinned we cannot use it under any circumstances.
     692             :          *
     693             :          * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     694             :          * since our own previous usage of the ring element would have left it
     695             :          * there, but it might've been decremented by clock-sweep since then).
     696             :          * A higher usage_count indicates someone else has touched the buffer,
     697             :          * so we shouldn't re-use it.
     698             :          */
     699      623842 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
     700      614560 :             || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
     701             :             break;
     702             : 
     703      605574 :         if (unlikely(local_buf_state & BM_LOCKED))
     704             :         {
     705           0 :             old_buf_state = WaitBufHdrUnlocked(buf);
     706           0 :             continue;
     707             :         }
     708             : 
     709             :         /* pin the buffer if the CAS succeeds */
     710      605574 :         local_buf_state += BUF_REFCOUNT_ONE;
     711             : 
     712      605574 :         if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
     713             :                                            local_buf_state))
     714             :         {
     715      605564 :             *buf_state = local_buf_state;
     716             : 
     717      605564 :             TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     718      605564 :             return buf;
     719             :         }
     720             :     }
     721             : 
     722             :     /*
     723             :      * Tell caller to allocate a new buffer with the normal allocation
     724             :      * strategy.  He'll then replace this ring element via AddBufferToRing.
     725             :      */
     726       18268 :     return NULL;
     727             : }
     728             : 
     729             : /*
     730             :  * AddBufferToRing -- add a buffer to the buffer ring
     731             :  *
     732             :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     733             :  * is called with the spinlock held, it had better be quite cheap.
     734             :  */
     735             : static void
     736      988430 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     737             : {
     738      988430 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     739      988430 : }
     740             : 
     741             : /*
     742             :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     743             :  * strategy ring.
     744             :  */
     745             : IOContext
     746   126696606 : IOContextForStrategy(BufferAccessStrategy strategy)
     747             : {
     748   126696606 :     if (!strategy)
     749   121816384 :         return IOCONTEXT_NORMAL;
     750             : 
     751     4880222 :     switch (strategy->btype)
     752             :     {
     753             :         case BAS_NORMAL:
     754             : 
     755             :             /*
     756             :              * Currently, GetAccessStrategy() returns NULL for
     757             :              * BufferAccessStrategyType BAS_NORMAL, so this case is
     758             :              * unreachable.
     759             :              */
     760             :             pg_unreachable();
     761             :             return IOCONTEXT_NORMAL;
     762     3145672 :         case BAS_BULKREAD:
     763     3145672 :             return IOCONTEXT_BULKREAD;
     764      558636 :         case BAS_BULKWRITE:
     765      558636 :             return IOCONTEXT_BULKWRITE;
     766     1175914 :         case BAS_VACUUM:
     767     1175914 :             return IOCONTEXT_VACUUM;
     768             :     }
     769             : 
     770           0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     771             :     pg_unreachable();
     772             : }
     773             : 
     774             : /*
     775             :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     776             :  *
     777             :  * When a nondefault strategy is used, the buffer manager calls this function
     778             :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     779             :  * be written out and doing so would require flushing WAL too.  This gives us
     780             :  * a chance to choose a different victim.
     781             :  *
     782             :  * Returns true if buffer manager should ask for a new victim, and false
     783             :  * if this buffer should be written and re-used.
     784             :  */
     785             : bool
     786       17354 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     787             : {
     788             :     /* We only do this in bulkread mode */
     789       17354 :     if (strategy->btype != BAS_BULKREAD)
     790        4706 :         return false;
     791             : 
     792             :     /* Don't muck with behavior of normal buffer-replacement strategy */
     793       24014 :     if (!from_ring ||
     794       11366 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     795        1282 :         return false;
     796             : 
     797             :     /*
     798             :      * Remove the dirty buffer from the ring; necessary to prevent infinite
     799             :      * loop if all ring members are dirty.
     800             :      */
     801       11366 :     strategy->buffers[strategy->current] = InvalidBuffer;
     802             : 
     803       11366 :     return true;
     804             : }

Generated by: LCOV version 1.16