LCOV - code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 172 184 93.5 %
Date: 2025-01-18 03:14:54 Functions: 17 17 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freelist.c
       4             :  *    routines for managing the buffer pool's replacement strategy.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/storage/buffer/freelist.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include "pgstat.h"
      19             : #include "port/atomics.h"
      20             : #include "storage/buf_internals.h"
      21             : #include "storage/bufmgr.h"
      22             : #include "storage/proc.h"
      23             : 
      24             : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      25             : 
      26             : 
      27             : /*
      28             :  * The shared freelist control information.
      29             :  */
      30             : typedef struct
      31             : {
      32             :     /* Spinlock: protects the values below */
      33             :     slock_t     buffer_strategy_lock;
      34             : 
      35             :     /*
      36             :      * Clock sweep hand: index of next buffer to consider grabbing. Note that
      37             :      * this isn't a concrete buffer - we only ever increase the value. So, to
      38             :      * get an actual buffer, it needs to be used modulo NBuffers.
      39             :      */
      40             :     pg_atomic_uint32 nextVictimBuffer;
      41             : 
      42             :     int         firstFreeBuffer;    /* Head of list of unused buffers */
      43             :     int         lastFreeBuffer; /* Tail of list of unused buffers */
      44             : 
      45             :     /*
      46             :      * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
      47             :      * when the list is empty)
      48             :      */
      49             : 
      50             :     /*
      51             :      * Statistics.  These counters should be wide enough that they can't
      52             :      * overflow during a single bgwriter cycle.
      53             :      */
      54             :     uint32      completePasses; /* Complete cycles of the clock sweep */
      55             :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
      56             : 
      57             :     /*
      58             :      * Bgworker process to be notified upon activity or -1 if none. See
      59             :      * StrategyNotifyBgWriter.
      60             :      */
      61             :     int         bgwprocno;
      62             : } BufferStrategyControl;
      63             : 
      64             : /* Pointers to shared state */
      65             : static BufferStrategyControl *StrategyControl = NULL;
      66             : 
      67             : /*
      68             :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      69             :  * This is currently the only kind of BufferAccessStrategy object, but someday
      70             :  * we might have more kinds.
      71             :  */
      72             : typedef struct BufferAccessStrategyData
      73             : {
      74             :     /* Overall strategy type */
      75             :     BufferAccessStrategyType btype;
      76             :     /* Number of elements in buffers[] array */
      77             :     int         nbuffers;
      78             : 
      79             :     /*
      80             :      * Index of the "current" slot in the ring, ie, the one most recently
      81             :      * returned by GetBufferFromRing.
      82             :      */
      83             :     int         current;
      84             : 
      85             :     /*
      86             :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      87             :      * have not yet selected a buffer for this ring slot.  For allocation
      88             :      * simplicity this is palloc'd together with the fixed fields of the
      89             :      * struct.
      90             :      */
      91             :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
      92             : }           BufferAccessStrategyData;
      93             : 
      94             : 
      95             : /* Prototypes for internal functions */
      96             : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      97             :                                      uint32 *buf_state);
      98             : static void AddBufferToRing(BufferAccessStrategy strategy,
      99             :                             BufferDesc *buf);
     100             : 
     101             : /*
     102             :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
     103             :  *
     104             :  * Move the clock hand one buffer ahead of its current position and return the
     105             :  * id of the buffer now under the hand.
     106             :  */
     107             : static inline uint32
     108     6875286 : ClockSweepTick(void)
     109             : {
     110             :     uint32      victim;
     111             : 
     112             :     /*
     113             :      * Atomically move hand ahead one buffer - if there's several processes
     114             :      * doing this, this can lead to buffers being returned slightly out of
     115             :      * apparent order.
     116             :      */
     117             :     victim =
     118     6875286 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     119             : 
     120     6875286 :     if (victim >= NBuffers)
     121             :     {
     122       55312 :         uint32      originalVictim = victim;
     123             : 
     124             :         /* always wrap what we look up in BufferDescriptors */
     125       55312 :         victim = victim % NBuffers;
     126             : 
     127             :         /*
     128             :          * If we're the one that just caused a wraparound, force
     129             :          * completePasses to be incremented while holding the spinlock. We
     130             :          * need the spinlock so StrategySyncStart() can return a consistent
     131             :          * value consisting of nextVictimBuffer and completePasses.
     132             :          */
     133       55312 :         if (victim == 0)
     134             :         {
     135             :             uint32      expected;
     136             :             uint32      wrapped;
     137       55170 :             bool        success = false;
     138             : 
     139       55170 :             expected = originalVictim + 1;
     140             : 
     141      110464 :             while (!success)
     142             :             {
     143             :                 /*
     144             :                  * Acquire the spinlock while increasing completePasses. That
     145             :                  * allows other readers to read nextVictimBuffer and
     146             :                  * completePasses in a consistent manner which is required for
     147             :                  * StrategySyncStart().  In theory delaying the increment
     148             :                  * could lead to an overflow of nextVictimBuffers, but that's
     149             :                  * highly unlikely and wouldn't be particularly harmful.
     150             :                  */
     151       55294 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     152             : 
     153       55294 :                 wrapped = expected % NBuffers;
     154             : 
     155       55294 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     156             :                                                          &expected, wrapped);
     157       55294 :                 if (success)
     158       55170 :                     StrategyControl->completePasses++;
     159       55294 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     160             :             }
     161             :         }
     162             :     }
     163     6875286 :     return victim;
     164             : }
     165             : 
     166             : /*
     167             :  * have_free_buffer -- a lockless check to see if there is a free buffer in
     168             :  *                     buffer pool.
     169             :  *
     170             :  * If the result is true that will become stale once free buffers are moved out
     171             :  * by other operations, so the caller who strictly want to use a free buffer
     172             :  * should not call this.
     173             :  */
     174             : bool
     175         398 : have_free_buffer(void)
     176             : {
     177         398 :     if (StrategyControl->firstFreeBuffer >= 0)
     178         398 :         return true;
     179             :     else
     180           0 :         return false;
     181             : }
     182             : 
     183             : /*
     184             :  * StrategyGetBuffer
     185             :  *
     186             :  *  Called by the bufmgr to get the next candidate buffer to use in
     187             :  *  BufferAlloc(). The only hard requirement BufferAlloc() has is that
     188             :  *  the selected buffer must not currently be pinned by anyone.
     189             :  *
     190             :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
     191             :  *
     192             :  *  To ensure that no one else can pin the buffer before we do, we must
     193             :  *  return the buffer with the buffer header spinlock still held.
     194             :  */
     195             : BufferDesc *
     196     3421590 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
     197             : {
     198             :     BufferDesc *buf;
     199             :     int         bgwprocno;
     200             :     int         trycounter;
     201             :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     202             : 
     203     3421590 :     *from_ring = false;
     204             : 
     205             :     /*
     206             :      * If given a strategy object, see whether it can select a buffer. We
     207             :      * assume strategy objects don't need buffer_strategy_lock.
     208             :      */
     209     3421590 :     if (strategy != NULL)
     210             :     {
     211     1533978 :         buf = GetBufferFromRing(strategy, buf_state);
     212     1533978 :         if (buf != NULL)
     213             :         {
     214      696748 :             *from_ring = true;
     215      696748 :             return buf;
     216             :         }
     217             :     }
     218             : 
     219             :     /*
     220             :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
     221             :      * a spinlock for this we force a read from shared memory once, and then
     222             :      * set the latch based on that value. We need to go through that length
     223             :      * because otherwise bgwprocno might be reset while/after we check because
     224             :      * the compiler might just reread from memory.
     225             :      *
     226             :      * This can possibly set the latch of the wrong process if the bgwriter
     227             :      * dies in the wrong moment. But since PGPROC->procLatch is never
     228             :      * deallocated the worst consequence of that is that we set the latch of
     229             :      * some arbitrary process.
     230             :      */
     231     2724842 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     232     2724842 :     if (bgwprocno != -1)
     233             :     {
     234             :         /* reset bgwprocno first, before setting the latch */
     235         494 :         StrategyControl->bgwprocno = -1;
     236             : 
     237             :         /*
     238             :          * Not acquiring ProcArrayLock here which is slightly icky. It's
     239             :          * actually fine because procLatch isn't ever freed, so we just can
     240             :          * potentially set the wrong process' (or no process') latch.
     241             :          */
     242         494 :         SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
     243             :     }
     244             : 
     245             :     /*
     246             :      * We count buffer allocation requests so that the bgwriter can estimate
     247             :      * the rate of buffer consumption.  Note that buffers recycled by a
     248             :      * strategy object are intentionally not counted here.
     249             :      */
     250     2724842 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     251             : 
     252             :     /*
     253             :      * First check, without acquiring the lock, whether there's buffers in the
     254             :      * freelist. Since we otherwise don't require the spinlock in every
     255             :      * StrategyGetBuffer() invocation, it'd be sad to acquire it here -
     256             :      * uselessly in most cases. That obviously leaves a race where a buffer is
     257             :      * put on the freelist but we don't see the store yet - but that's pretty
     258             :      * harmless, it'll just get used during the next buffer acquisition.
     259             :      *
     260             :      * If there's buffers on the freelist, acquire the spinlock to pop one
     261             :      * buffer of the freelist. Then check whether that buffer is usable and
     262             :      * repeat if not.
     263             :      *
     264             :      * Note that the freeNext fields are considered to be protected by the
     265             :      * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to
     266             :      * manipulate them without holding the spinlock.
     267             :      */
     268     2724842 :     if (StrategyControl->firstFreeBuffer >= 0)
     269             :     {
     270             :         while (true)
     271             :         {
     272             :             /* Acquire the spinlock to remove element from the freelist */
     273     1186586 :             SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     274             : 
     275     1186586 :             if (StrategyControl->firstFreeBuffer < 0)
     276             :             {
     277          14 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     278          14 :                 break;
     279             :             }
     280             : 
     281     1186572 :             buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
     282             :             Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
     283             : 
     284             :             /* Unconditionally remove buffer from freelist */
     285     1186572 :             StrategyControl->firstFreeBuffer = buf->freeNext;
     286     1186572 :             buf->freeNext = FREENEXT_NOT_IN_LIST;
     287             : 
     288             :             /*
     289             :              * Release the lock so someone else can access the freelist while
     290             :              * we check out this buffer.
     291             :              */
     292     1186572 :             SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     293             : 
     294             :             /*
     295             :              * If the buffer is pinned or has a nonzero usage_count, we cannot
     296             :              * use it; discard it and retry.  (This can only happen if VACUUM
     297             :              * put a valid buffer in the freelist and then someone else used
     298             :              * it before we got to it.  It's probably impossible altogether as
     299             :              * of 8.3, but we'd better check anyway.)
     300             :              */
     301     1186572 :             local_buf_state = LockBufHdr(buf);
     302     1186572 :             if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
     303     1186570 :                 && BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0)
     304             :             {
     305     1186564 :                 if (strategy != NULL)
     306      598342 :                     AddBufferToRing(strategy, buf);
     307     1186564 :                 *buf_state = local_buf_state;
     308     1186564 :                 return buf;
     309             :             }
     310           8 :             UnlockBufHdr(buf, local_buf_state);
     311             :         }
     312             :     }
     313             : 
     314             :     /* Nothing on the freelist, so run the "clock sweep" algorithm */
     315     1538278 :     trycounter = NBuffers;
     316             :     for (;;)
     317             :     {
     318     6875286 :         buf = GetBufferDescriptor(ClockSweepTick());
     319             : 
     320             :         /*
     321             :          * If the buffer is pinned or has a nonzero usage_count, we cannot use
     322             :          * it; decrement the usage_count (unless pinned) and keep scanning.
     323             :          */
     324     6875286 :         local_buf_state = LockBufHdr(buf);
     325             : 
     326     6875286 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
     327             :         {
     328     6753144 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     329             :             {
     330     5214866 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
     331             : 
     332     5214866 :                 trycounter = NBuffers;
     333             :             }
     334             :             else
     335             :             {
     336             :                 /* Found a usable buffer */
     337     1538278 :                 if (strategy != NULL)
     338      238888 :                     AddBufferToRing(strategy, buf);
     339     1538278 :                 *buf_state = local_buf_state;
     340     1538278 :                 return buf;
     341             :             }
     342             :         }
     343      122142 :         else if (--trycounter == 0)
     344             :         {
     345             :             /*
     346             :              * We've scanned all the buffers without making any state changes,
     347             :              * so all the buffers are pinned (or were when we looked at them).
     348             :              * We could hope that someone will free one eventually, but it's
     349             :              * probably better to fail than to risk getting stuck in an
     350             :              * infinite loop.
     351             :              */
     352           0 :             UnlockBufHdr(buf, local_buf_state);
     353           0 :             elog(ERROR, "no unpinned buffers available");
     354             :         }
     355     5337008 :         UnlockBufHdr(buf, local_buf_state);
     356             :     }
     357             : }
     358             : 
     359             : /*
     360             :  * StrategyFreeBuffer: put a buffer on the freelist
     361             :  */
     362             : void
     363      193108 : StrategyFreeBuffer(BufferDesc *buf)
     364             : {
     365      193108 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     366             : 
     367             :     /*
     368             :      * It is possible that we are told to put something in the freelist that
     369             :      * is already in it; don't screw up the list if so.
     370             :      */
     371      193108 :     if (buf->freeNext == FREENEXT_NOT_IN_LIST)
     372             :     {
     373      193108 :         buf->freeNext = StrategyControl->firstFreeBuffer;
     374      193108 :         if (buf->freeNext < 0)
     375        4638 :             StrategyControl->lastFreeBuffer = buf->buf_id;
     376      193108 :         StrategyControl->firstFreeBuffer = buf->buf_id;
     377             :     }
     378             : 
     379      193108 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     380      193108 : }
     381             : 
     382             : /*
     383             :  * StrategySyncStart -- tell BufferSync where to start syncing
     384             :  *
     385             :  * The result is the buffer index of the best buffer to sync first.
     386             :  * BufferSync() will proceed circularly around the buffer array from there.
     387             :  *
     388             :  * In addition, we return the completed-pass count (which is effectively
     389             :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     390             :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     391             :  * being read.
     392             :  */
     393             : int
     394       17746 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     395             : {
     396             :     uint32      nextVictimBuffer;
     397             :     int         result;
     398             : 
     399       17746 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     400       17746 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     401       17746 :     result = nextVictimBuffer % NBuffers;
     402             : 
     403       17746 :     if (complete_passes)
     404             :     {
     405       17746 :         *complete_passes = StrategyControl->completePasses;
     406             : 
     407             :         /*
     408             :          * Additionally add the number of wraparounds that happened before
     409             :          * completePasses could be incremented. C.f. ClockSweepTick().
     410             :          */
     411       17746 :         *complete_passes += nextVictimBuffer / NBuffers;
     412             :     }
     413             : 
     414       17746 :     if (num_buf_alloc)
     415             :     {
     416       17746 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     417             :     }
     418       17746 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     419       17746 :     return result;
     420             : }
     421             : 
     422             : /*
     423             :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     424             :  *
     425             :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     426             :  * set that latch.  Pass -1 to clear the pending notification before it
     427             :  * happens.  This feature is used by the bgwriter process to wake itself up
     428             :  * from hibernation, and is not meant for anybody else to use.
     429             :  */
     430             : void
     431         804 : StrategyNotifyBgWriter(int bgwprocno)
     432             : {
     433             :     /*
     434             :      * We acquire buffer_strategy_lock just to ensure that the store appears
     435             :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     436             :      * infrequently, so there's no performance penalty from being safe.
     437             :      */
     438         804 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     439         804 :     StrategyControl->bgwprocno = bgwprocno;
     440         804 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     441         804 : }
     442             : 
     443             : 
     444             : /*
     445             :  * StrategyShmemSize
     446             :  *
     447             :  * estimate the size of shared memory used by the freelist-related structures.
     448             :  *
     449             :  * Note: for somewhat historical reasons, the buffer lookup hashtable size
     450             :  * is also determined here.
     451             :  */
     452             : Size
     453        3566 : StrategyShmemSize(void)
     454             : {
     455        3566 :     Size        size = 0;
     456             : 
     457             :     /* size of lookup hash table ... see comment in StrategyInitialize */
     458        3566 :     size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
     459             : 
     460             :     /* size of the shared replacement strategy control block */
     461        3566 :     size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
     462             : 
     463        3566 :     return size;
     464             : }
     465             : 
     466             : /*
     467             :  * StrategyInitialize -- initialize the buffer cache replacement
     468             :  *      strategy.
     469             :  *
     470             :  * Assumes: All of the buffers are already built into a linked list.
     471             :  *      Only called by postmaster and only during initialization.
     472             :  */
     473             : void
     474        1918 : StrategyInitialize(bool init)
     475             : {
     476             :     bool        found;
     477             : 
     478             :     /*
     479             :      * Initialize the shared buffer lookup hashtable.
     480             :      *
     481             :      * Since we can't tolerate running out of lookup table entries, we must be
     482             :      * sure to specify an adequate table size here.  The maximum steady-state
     483             :      * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     484             :      * a new entry before deleting the old.  In principle this could be
     485             :      * happening in each partition concurrently, so we could need as many as
     486             :      * NBuffers + NUM_BUFFER_PARTITIONS entries.
     487             :      */
     488        1918 :     InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
     489             : 
     490             :     /*
     491             :      * Get or create the shared strategy control block
     492             :      */
     493        1918 :     StrategyControl = (BufferStrategyControl *)
     494        1918 :         ShmemInitStruct("Buffer Strategy Status",
     495             :                         sizeof(BufferStrategyControl),
     496             :                         &found);
     497             : 
     498        1918 :     if (!found)
     499             :     {
     500             :         /*
     501             :          * Only done once, usually in postmaster
     502             :          */
     503             :         Assert(init);
     504             : 
     505        1918 :         SpinLockInit(&StrategyControl->buffer_strategy_lock);
     506             : 
     507             :         /*
     508             :          * Grab the whole linked list of free buffers for our strategy. We
     509             :          * assume it was previously set up by BufferManagerShmemInit().
     510             :          */
     511        1918 :         StrategyControl->firstFreeBuffer = 0;
     512        1918 :         StrategyControl->lastFreeBuffer = NBuffers - 1;
     513             : 
     514             :         /* Initialize the clock sweep pointer */
     515        1918 :         pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     516             : 
     517             :         /* Clear statistics */
     518        1918 :         StrategyControl->completePasses = 0;
     519        1918 :         pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     520             : 
     521             :         /* No pending notification */
     522        1918 :         StrategyControl->bgwprocno = -1;
     523             :     }
     524             :     else
     525             :         Assert(!init);
     526        1918 : }
     527             : 
     528             : 
     529             : /* ----------------------------------------------------------------
     530             :  *              Backend-private buffer ring management
     531             :  * ----------------------------------------------------------------
     532             :  */
     533             : 
     534             : 
     535             : /*
     536             :  * GetAccessStrategy -- create a BufferAccessStrategy object
     537             :  *
     538             :  * The object is allocated in the current memory context.
     539             :  */
     540             : BufferAccessStrategy
     541      253456 : GetAccessStrategy(BufferAccessStrategyType btype)
     542             : {
     543             :     int         ring_size_kb;
     544             : 
     545             :     /*
     546             :      * Select ring size to use.  See buffer/README for rationales.
     547             :      *
     548             :      * Note: if you change the ring size for BAS_BULKREAD, see also
     549             :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     550             :      */
     551      253456 :     switch (btype)
     552             :     {
     553           0 :         case BAS_NORMAL:
     554             :             /* if someone asks for NORMAL, just give 'em a "default" object */
     555           0 :             return NULL;
     556             : 
     557      141382 :         case BAS_BULKREAD:
     558      141382 :             ring_size_kb = 256;
     559      141382 :             break;
     560      112074 :         case BAS_BULKWRITE:
     561      112074 :             ring_size_kb = 16 * 1024;
     562      112074 :             break;
     563           0 :         case BAS_VACUUM:
     564           0 :             ring_size_kb = 2048;
     565           0 :             break;
     566             : 
     567           0 :         default:
     568           0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
     569             :                  (int) btype);
     570             :             return NULL;        /* keep compiler quiet */
     571             :     }
     572             : 
     573      253456 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
     574             : }
     575             : 
     576             : /*
     577             :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     578             :  *      number of buffers equivalent to the passed in size.
     579             :  *
     580             :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     581             :  * the function will return NULL.  ring_size_kb must not be negative.
     582             :  */
     583             : BufferAccessStrategy
     584      265946 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     585             : {
     586             :     int         ring_buffers;
     587             :     BufferAccessStrategy strategy;
     588             : 
     589             :     Assert(ring_size_kb >= 0);
     590             : 
     591             :     /* Figure out how many buffers ring_size_kb is */
     592      265946 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     593             : 
     594             :     /* 0 means unlimited, so no BufferAccessStrategy required */
     595      265946 :     if (ring_buffers == 0)
     596          12 :         return NULL;
     597             : 
     598             :     /* Cap to 1/8th of shared_buffers */
     599      265934 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
     600             : 
     601             :     /* NBuffers should never be less than 16, so this shouldn't happen */
     602             :     Assert(ring_buffers > 0);
     603             : 
     604             :     /* Allocate the object and initialize all elements to zeroes */
     605             :     strategy = (BufferAccessStrategy)
     606      265934 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
     607             :                 ring_buffers * sizeof(Buffer));
     608             : 
     609             :     /* Set fields that don't start out zero */
     610      265934 :     strategy->btype = btype;
     611      265934 :     strategy->nbuffers = ring_buffers;
     612             : 
     613      265934 :     return strategy;
     614             : }
     615             : 
     616             : /*
     617             :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     618             :  *      the ring
     619             :  *
     620             :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     621             :  * returning NULL with 0 size.
     622             :  */
     623             : int
     624          22 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     625             : {
     626          22 :     if (strategy == NULL)
     627           0 :         return 0;
     628             : 
     629          22 :     return strategy->nbuffers;
     630             : }
     631             : 
     632             : /*
     633             :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
     634             :  *
     635             :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
     636             :  * in danger of pinning too much of the ring at once while performing look-ahead.
     637             :  * For some strategies, that means "escaping" from the ring, and in others it
     638             :  * means forcing dirty data to disk very frequently with associated WAL
     639             :  * flushing.  Since external code has no insight into any of that, allow
     640             :  * individual strategy types to expose a clamp that should be applied when
     641             :  * deciding on a maximum number of buffers to pin at once.
     642             :  *
     643             :  * Callers should combine this number with other relevant limits and take the
     644             :  * minimum.
     645             :  */
     646             : int
     647      775028 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
     648             : {
     649      775028 :     if (strategy == NULL)
     650      631914 :         return NBuffers;
     651             : 
     652      143114 :     switch (strategy->btype)
     653             :     {
     654      128568 :         case BAS_BULKREAD:
     655             : 
     656             :             /*
     657             :              * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
     658             :              * shouldn't be a problem and the caller is free to pin up to the
     659             :              * entire ring at once.
     660             :              */
     661      128568 :             return strategy->nbuffers;
     662             : 
     663       14546 :         default:
     664             : 
     665             :             /*
     666             :              * Tell caller not to pin more than half the buffers in the ring.
     667             :              * This is a trade-off between look ahead distance and deferring
     668             :              * writeback and associated WAL traffic.
     669             :              */
     670       14546 :             return strategy->nbuffers / 2;
     671             :     }
     672             : }
     673             : 
     674             : /*
     675             :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     676             :  *
     677             :  * A simple pfree would do at the moment, but we would prefer that callers
     678             :  * don't assume that much about the representation of BufferAccessStrategy.
     679             :  */
     680             : void
     681      241152 : FreeAccessStrategy(BufferAccessStrategy strategy)
     682             : {
     683             :     /* don't crash if called on a "default" strategy */
     684      241152 :     if (strategy != NULL)
     685      241152 :         pfree(strategy);
     686      241152 : }
     687             : 
     688             : /*
     689             :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     690             :  *      ring is empty / not usable.
     691             :  *
     692             :  * The bufhdr spin lock is held on the returned buffer.
     693             :  */
     694             : static BufferDesc *
     695     1533978 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
     696             : {
     697             :     BufferDesc *buf;
     698             :     Buffer      bufnum;
     699             :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     700             : 
     701             : 
     702             :     /* Advance to next ring slot */
     703     1533978 :     if (++strategy->current >= strategy->nbuffers)
     704       50844 :         strategy->current = 0;
     705             : 
     706             :     /*
     707             :      * If the slot hasn't been filled yet, tell the caller to allocate a new
     708             :      * buffer with the normal allocation strategy.  He will then fill this
     709             :      * slot by calling AddBufferToRing with the new buffer.
     710             :      */
     711     1533978 :     bufnum = strategy->buffers[strategy->current];
     712     1533978 :     if (bufnum == InvalidBuffer)
     713      825084 :         return NULL;
     714             : 
     715             :     /*
     716             :      * If the buffer is pinned we cannot use it under any circumstances.
     717             :      *
     718             :      * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     719             :      * since our own previous usage of the ring element would have left it
     720             :      * there, but it might've been decremented by clock sweep since then). A
     721             :      * higher usage_count indicates someone else has touched the buffer, so we
     722             :      * shouldn't re-use it.
     723             :      */
     724      708894 :     buf = GetBufferDescriptor(bufnum - 1);
     725      708894 :     local_buf_state = LockBufHdr(buf);
     726      708894 :     if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
     727      702324 :         && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
     728             :     {
     729      696748 :         *buf_state = local_buf_state;
     730      696748 :         return buf;
     731             :     }
     732       12146 :     UnlockBufHdr(buf, local_buf_state);
     733             : 
     734             :     /*
     735             :      * Tell caller to allocate a new buffer with the normal allocation
     736             :      * strategy.  He'll then replace this ring element via AddBufferToRing.
     737             :      */
     738       12146 :     return NULL;
     739             : }
     740             : 
     741             : /*
     742             :  * AddBufferToRing -- add a buffer to the buffer ring
     743             :  *
     744             :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     745             :  * is called with the spinlock held, it had better be quite cheap.
     746             :  */
     747             : static void
     748      837230 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     749             : {
     750      837230 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     751      837230 : }
     752             : 
     753             : /*
     754             :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     755             :  * strategy ring.
     756             :  */
     757             : IOContext
     758   111223994 : IOContextForStrategy(BufferAccessStrategy strategy)
     759             : {
     760   111223994 :     if (!strategy)
     761   107569090 :         return IOCONTEXT_NORMAL;
     762             : 
     763     3654904 :     switch (strategy->btype)
     764             :     {
     765             :         case BAS_NORMAL:
     766             : 
     767             :             /*
     768             :              * Currently, GetAccessStrategy() returns NULL for
     769             :              * BufferAccessStrategyType BAS_NORMAL, so this case is
     770             :              * unreachable.
     771             :              */
     772             :             pg_unreachable();
     773             :             return IOCONTEXT_NORMAL;
     774     2375262 :         case BAS_BULKREAD:
     775     2375262 :             return IOCONTEXT_BULKREAD;
     776      499038 :         case BAS_BULKWRITE:
     777      499038 :             return IOCONTEXT_BULKWRITE;
     778      780604 :         case BAS_VACUUM:
     779      780604 :             return IOCONTEXT_VACUUM;
     780             :     }
     781             : 
     782           0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     783             :     pg_unreachable();
     784             : }
     785             : 
     786             : /*
     787             :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     788             :  *
     789             :  * When a nondefault strategy is used, the buffer manager calls this function
     790             :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     791             :  * be written out and doing so would require flushing WAL too.  This gives us
     792             :  * a chance to choose a different victim.
     793             :  *
     794             :  * Returns true if buffer manager should ask for a new victim, and false
     795             :  * if this buffer should be written and re-used.
     796             :  */
     797             : bool
     798       15660 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     799             : {
     800             :     /* We only do this in bulkread mode */
     801       15660 :     if (strategy->btype != BAS_BULKREAD)
     802        3310 :         return false;
     803             : 
     804             :     /* Don't muck with behavior of normal buffer-replacement strategy */
     805       23700 :     if (!from_ring ||
     806       11350 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     807        1000 :         return false;
     808             : 
     809             :     /*
     810             :      * Remove the dirty buffer from the ring; necessary to prevent infinite
     811             :      * loop if all ring members are dirty.
     812             :      */
     813       11350 :     strategy->buffers[strategy->current] = InvalidBuffer;
     814             : 
     815       11350 :     return true;
     816             : }

Generated by: LCOV version 1.14