Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * freelist.c
4 : * routines for managing the buffer pool's replacement strategy.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : *
11 : * IDENTIFICATION
12 : * src/backend/storage/buffer/freelist.c
13 : *
14 : *-------------------------------------------------------------------------
15 : */
16 : #include "postgres.h"
17 :
18 : #include "pgstat.h"
19 : #include "port/atomics.h"
20 : #include "storage/buf_internals.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/proc.h"
23 :
24 : #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var))))
25 :
26 :
27 : /*
28 : * The shared freelist control information.
29 : */
30 : typedef struct
31 : {
32 : /* Spinlock: protects the values below */
33 : slock_t buffer_strategy_lock;
34 :
35 : /*
36 : * clock-sweep hand: index of next buffer to consider grabbing. Note that
37 : * this isn't a concrete buffer - we only ever increase the value. So, to
38 : * get an actual buffer, it needs to be used modulo NBuffers.
39 : */
40 : pg_atomic_uint32 nextVictimBuffer;
41 :
42 : /*
43 : * Statistics. These counters should be wide enough that they can't
44 : * overflow during a single bgwriter cycle.
45 : */
46 : uint32 completePasses; /* Complete cycles of the clock-sweep */
47 : pg_atomic_uint32 numBufferAllocs; /* Buffers allocated since last reset */
48 :
49 : /*
50 : * Bgworker process to be notified upon activity or -1 if none. See
51 : * StrategyNotifyBgWriter.
52 : */
53 : int bgwprocno;
54 : } BufferStrategyControl;
55 :
56 : /* Pointers to shared state */
57 : static BufferStrategyControl *StrategyControl = NULL;
58 :
59 : /*
60 : * Private (non-shared) state for managing a ring of shared buffers to re-use.
61 : * This is currently the only kind of BufferAccessStrategy object, but someday
62 : * we might have more kinds.
63 : */
64 : typedef struct BufferAccessStrategyData
65 : {
66 : /* Overall strategy type */
67 : BufferAccessStrategyType btype;
68 : /* Number of elements in buffers[] array */
69 : int nbuffers;
70 :
71 : /*
72 : * Index of the "current" slot in the ring, ie, the one most recently
73 : * returned by GetBufferFromRing.
74 : */
75 : int current;
76 :
77 : /*
78 : * Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
79 : * have not yet selected a buffer for this ring slot. For allocation
80 : * simplicity this is palloc'd together with the fixed fields of the
81 : * struct.
82 : */
83 : Buffer buffers[FLEXIBLE_ARRAY_MEMBER];
84 : } BufferAccessStrategyData;
85 :
86 :
87 : /* Prototypes for internal functions */
88 : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
89 : uint32 *buf_state);
90 : static void AddBufferToRing(BufferAccessStrategy strategy,
91 : BufferDesc *buf);
92 :
93 : /*
94 : * ClockSweepTick - Helper routine for StrategyGetBuffer()
95 : *
96 : * Move the clock hand one buffer ahead of its current position and return the
97 : * id of the buffer now under the hand.
98 : */
99 : static inline uint32
100 9475212 : ClockSweepTick(void)
101 : {
102 : uint32 victim;
103 :
104 : /*
105 : * Atomically move hand ahead one buffer - if there's several processes
106 : * doing this, this can lead to buffers being returned slightly out of
107 : * apparent order.
108 : */
109 : victim =
110 9475212 : pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
111 :
112 9475212 : if (victim >= NBuffers)
113 : {
114 65134 : uint32 originalVictim = victim;
115 :
116 : /* always wrap what we look up in BufferDescriptors */
117 65134 : victim = victim % NBuffers;
118 :
119 : /*
120 : * If we're the one that just caused a wraparound, force
121 : * completePasses to be incremented while holding the spinlock. We
122 : * need the spinlock so StrategySyncStart() can return a consistent
123 : * value consisting of nextVictimBuffer and completePasses.
124 : */
125 65134 : if (victim == 0)
126 : {
127 : uint32 expected;
128 : uint32 wrapped;
129 64746 : bool success = false;
130 :
131 64746 : expected = originalVictim + 1;
132 :
133 129784 : while (!success)
134 : {
135 : /*
136 : * Acquire the spinlock while increasing completePasses. That
137 : * allows other readers to read nextVictimBuffer and
138 : * completePasses in a consistent manner which is required for
139 : * StrategySyncStart(). In theory delaying the increment
140 : * could lead to an overflow of nextVictimBuffers, but that's
141 : * highly unlikely and wouldn't be particularly harmful.
142 : */
143 65038 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
144 :
145 65038 : wrapped = expected % NBuffers;
146 :
147 65038 : success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
148 : &expected, wrapped);
149 65038 : if (success)
150 64746 : StrategyControl->completePasses++;
151 65038 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
152 : }
153 : }
154 : }
155 9475212 : return victim;
156 : }
157 :
158 : /*
159 : * StrategyGetBuffer
160 : *
161 : * Called by the bufmgr to get the next candidate buffer to use in
162 : * BufferAlloc(). The only hard requirement BufferAlloc() has is that
163 : * the selected buffer must not currently be pinned by anyone.
164 : *
165 : * strategy is a BufferAccessStrategy object, or NULL for default strategy.
166 : *
167 : * To ensure that no one else can pin the buffer before we do, we must
168 : * return the buffer with the buffer header spinlock still held.
169 : */
170 : BufferDesc *
171 3810990 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
172 : {
173 : BufferDesc *buf;
174 : int bgwprocno;
175 : int trycounter;
176 : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
177 :
178 3810990 : *from_ring = false;
179 :
180 : /*
181 : * If given a strategy object, see whether it can select a buffer. We
182 : * assume strategy objects don't need buffer_strategy_lock.
183 : */
184 3810990 : if (strategy != NULL)
185 : {
186 1590048 : buf = GetBufferFromRing(strategy, buf_state);
187 1590048 : if (buf != NULL)
188 : {
189 606706 : *from_ring = true;
190 606706 : return buf;
191 : }
192 : }
193 :
194 : /*
195 : * If asked, we need to waken the bgwriter. Since we don't want to rely on
196 : * a spinlock for this we force a read from shared memory once, and then
197 : * set the latch based on that value. We need to go through that length
198 : * because otherwise bgwprocno might be reset while/after we check because
199 : * the compiler might just reread from memory.
200 : *
201 : * This can possibly set the latch of the wrong process if the bgwriter
202 : * dies in the wrong moment. But since PGPROC->procLatch is never
203 : * deallocated the worst consequence of that is that we set the latch of
204 : * some arbitrary process.
205 : */
206 3204284 : bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
207 3204284 : if (bgwprocno != -1)
208 : {
209 : /* reset bgwprocno first, before setting the latch */
210 1214 : StrategyControl->bgwprocno = -1;
211 :
212 : /*
213 : * Not acquiring ProcArrayLock here which is slightly icky. It's
214 : * actually fine because procLatch isn't ever freed, so we just can
215 : * potentially set the wrong process' (or no process') latch.
216 : */
217 1214 : SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
218 : }
219 :
220 : /*
221 : * We count buffer allocation requests so that the bgwriter can estimate
222 : * the rate of buffer consumption. Note that buffers recycled by a
223 : * strategy object are intentionally not counted here.
224 : */
225 3204284 : pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
226 :
227 : /* Use the "clock sweep" algorithm to find a free buffer */
228 3204284 : trycounter = NBuffers;
229 : for (;;)
230 : {
231 9475212 : buf = GetBufferDescriptor(ClockSweepTick());
232 :
233 : /*
234 : * If the buffer is pinned or has a nonzero usage_count, we cannot use
235 : * it; decrement the usage_count (unless pinned) and keep scanning.
236 : */
237 9475212 : local_buf_state = LockBufHdr(buf);
238 :
239 9475212 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
240 : {
241 9295452 : if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
242 : {
243 6091168 : local_buf_state -= BUF_USAGECOUNT_ONE;
244 :
245 6091168 : trycounter = NBuffers;
246 : }
247 : else
248 : {
249 : /* Found a usable buffer */
250 3204284 : if (strategy != NULL)
251 983342 : AddBufferToRing(strategy, buf);
252 3204284 : *buf_state = local_buf_state;
253 3204284 : return buf;
254 : }
255 : }
256 179760 : else if (--trycounter == 0)
257 : {
258 : /*
259 : * We've scanned all the buffers without making any state changes,
260 : * so all the buffers are pinned (or were when we looked at them).
261 : * We could hope that someone will free one eventually, but it's
262 : * probably better to fail than to risk getting stuck in an
263 : * infinite loop.
264 : */
265 0 : UnlockBufHdr(buf, local_buf_state);
266 0 : elog(ERROR, "no unpinned buffers available");
267 : }
268 6270928 : UnlockBufHdr(buf, local_buf_state);
269 : }
270 : }
271 :
272 : /*
273 : * StrategySyncStart -- tell BgBufferSync where to start syncing
274 : *
275 : * The result is the buffer index of the best buffer to sync first.
276 : * BgBufferSync() will proceed circularly around the buffer array from there.
277 : *
278 : * In addition, we return the completed-pass count (which is effectively
279 : * the higher-order bits of nextVictimBuffer) and the count of recent buffer
280 : * allocs if non-NULL pointers are passed. The alloc count is reset after
281 : * being read.
282 : */
283 : int
284 25394 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
285 : {
286 : uint32 nextVictimBuffer;
287 : int result;
288 :
289 25394 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
290 25394 : nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
291 25394 : result = nextVictimBuffer % NBuffers;
292 :
293 25394 : if (complete_passes)
294 : {
295 25394 : *complete_passes = StrategyControl->completePasses;
296 :
297 : /*
298 : * Additionally add the number of wraparounds that happened before
299 : * completePasses could be incremented. C.f. ClockSweepTick().
300 : */
301 25394 : *complete_passes += nextVictimBuffer / NBuffers;
302 : }
303 :
304 25394 : if (num_buf_alloc)
305 : {
306 25394 : *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
307 : }
308 25394 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
309 25394 : return result;
310 : }
311 :
312 : /*
313 : * StrategyNotifyBgWriter -- set or clear allocation notification latch
314 : *
315 : * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
316 : * set that latch. Pass -1 to clear the pending notification before it
317 : * happens. This feature is used by the bgwriter process to wake itself up
318 : * from hibernation, and is not meant for anybody else to use.
319 : */
320 : void
321 2472 : StrategyNotifyBgWriter(int bgwprocno)
322 : {
323 : /*
324 : * We acquire buffer_strategy_lock just to ensure that the store appears
325 : * atomic to StrategyGetBuffer. The bgwriter should call this rather
326 : * infrequently, so there's no performance penalty from being safe.
327 : */
328 2472 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
329 2472 : StrategyControl->bgwprocno = bgwprocno;
330 2472 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
331 2472 : }
332 :
333 :
334 : /*
335 : * StrategyShmemSize
336 : *
337 : * estimate the size of shared memory used by the freelist-related structures.
338 : *
339 : * Note: for somewhat historical reasons, the buffer lookup hashtable size
340 : * is also determined here.
341 : */
342 : Size
343 4048 : StrategyShmemSize(void)
344 : {
345 4048 : Size size = 0;
346 :
347 : /* size of lookup hash table ... see comment in StrategyInitialize */
348 4048 : size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
349 :
350 : /* size of the shared replacement strategy control block */
351 4048 : size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
352 :
353 4048 : return size;
354 : }
355 :
356 : /*
357 : * StrategyInitialize -- initialize the buffer cache replacement
358 : * strategy.
359 : *
360 : * Assumes: All of the buffers are already built into a linked list.
361 : * Only called by postmaster and only during initialization.
362 : */
363 : void
364 2174 : StrategyInitialize(bool init)
365 : {
366 : bool found;
367 :
368 : /*
369 : * Initialize the shared buffer lookup hashtable.
370 : *
371 : * Since we can't tolerate running out of lookup table entries, we must be
372 : * sure to specify an adequate table size here. The maximum steady-state
373 : * usage is of course NBuffers entries, but BufferAlloc() tries to insert
374 : * a new entry before deleting the old. In principle this could be
375 : * happening in each partition concurrently, so we could need as many as
376 : * NBuffers + NUM_BUFFER_PARTITIONS entries.
377 : */
378 2174 : InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
379 :
380 : /*
381 : * Get or create the shared strategy control block
382 : */
383 2174 : StrategyControl = (BufferStrategyControl *)
384 2174 : ShmemInitStruct("Buffer Strategy Status",
385 : sizeof(BufferStrategyControl),
386 : &found);
387 :
388 2174 : if (!found)
389 : {
390 : /*
391 : * Only done once, usually in postmaster
392 : */
393 : Assert(init);
394 :
395 2174 : SpinLockInit(&StrategyControl->buffer_strategy_lock);
396 :
397 : /* Initialize the clock-sweep pointer */
398 2174 : pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
399 :
400 : /* Clear statistics */
401 2174 : StrategyControl->completePasses = 0;
402 2174 : pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
403 :
404 : /* No pending notification */
405 2174 : StrategyControl->bgwprocno = -1;
406 : }
407 : else
408 : Assert(!init);
409 2174 : }
410 :
411 :
412 : /* ----------------------------------------------------------------
413 : * Backend-private buffer ring management
414 : * ----------------------------------------------------------------
415 : */
416 :
417 :
418 : /*
419 : * GetAccessStrategy -- create a BufferAccessStrategy object
420 : *
421 : * The object is allocated in the current memory context.
422 : */
423 : BufferAccessStrategy
424 283996 : GetAccessStrategy(BufferAccessStrategyType btype)
425 : {
426 : int ring_size_kb;
427 :
428 : /*
429 : * Select ring size to use. See buffer/README for rationales.
430 : *
431 : * Note: if you change the ring size for BAS_BULKREAD, see also
432 : * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
433 : */
434 283996 : switch (btype)
435 : {
436 0 : case BAS_NORMAL:
437 : /* if someone asks for NORMAL, just give 'em a "default" object */
438 0 : return NULL;
439 :
440 157772 : case BAS_BULKREAD:
441 : {
442 : int ring_max_kb;
443 :
444 : /*
445 : * The ring always needs to be large enough to allow some
446 : * separation in time between providing a buffer to the user
447 : * of the strategy and that buffer being reused. Otherwise the
448 : * user's pin will prevent reuse of the buffer, even without
449 : * concurrent activity.
450 : *
451 : * We also need to ensure the ring always is large enough for
452 : * SYNC_SCAN_REPORT_INTERVAL, as noted above.
453 : *
454 : * Thus we start out a minimal size and increase the size
455 : * further if appropriate.
456 : */
457 157772 : ring_size_kb = 256;
458 :
459 : /*
460 : * There's no point in a larger ring if we won't be allowed to
461 : * pin sufficiently many buffers. But we never limit to less
462 : * than the minimal size above.
463 : */
464 157772 : ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
465 157772 : ring_max_kb = Max(ring_size_kb, ring_max_kb);
466 :
467 : /*
468 : * We would like the ring to additionally have space for the
469 : * configured degree of IO concurrency. While being read in,
470 : * buffers can obviously not yet be reused.
471 : *
472 : * Each IO can be up to io_combine_limit blocks large, and we
473 : * want to start up to effective_io_concurrency IOs.
474 : *
475 : * Note that effective_io_concurrency may be 0, which disables
476 : * AIO.
477 : */
478 157772 : ring_size_kb += (BLCKSZ / 1024) *
479 157772 : io_combine_limit * effective_io_concurrency;
480 :
481 157772 : if (ring_size_kb > ring_max_kb)
482 157772 : ring_size_kb = ring_max_kb;
483 157772 : break;
484 : }
485 126224 : case BAS_BULKWRITE:
486 126224 : ring_size_kb = 16 * 1024;
487 126224 : break;
488 0 : case BAS_VACUUM:
489 0 : ring_size_kb = 2048;
490 0 : break;
491 :
492 0 : default:
493 0 : elog(ERROR, "unrecognized buffer access strategy: %d",
494 : (int) btype);
495 : return NULL; /* keep compiler quiet */
496 : }
497 :
498 283996 : return GetAccessStrategyWithSize(btype, ring_size_kb);
499 : }
500 :
501 : /*
502 : * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
503 : * number of buffers equivalent to the passed in size.
504 : *
505 : * If the given ring size is 0, no BufferAccessStrategy will be created and
506 : * the function will return NULL. ring_size_kb must not be negative.
507 : */
508 : BufferAccessStrategy
509 300804 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
510 : {
511 : int ring_buffers;
512 : BufferAccessStrategy strategy;
513 :
514 : Assert(ring_size_kb >= 0);
515 :
516 : /* Figure out how many buffers ring_size_kb is */
517 300804 : ring_buffers = ring_size_kb / (BLCKSZ / 1024);
518 :
519 : /* 0 means unlimited, so no BufferAccessStrategy required */
520 300804 : if (ring_buffers == 0)
521 12 : return NULL;
522 :
523 : /* Cap to 1/8th of shared_buffers */
524 300792 : ring_buffers = Min(NBuffers / 8, ring_buffers);
525 :
526 : /* NBuffers should never be less than 16, so this shouldn't happen */
527 : Assert(ring_buffers > 0);
528 :
529 : /* Allocate the object and initialize all elements to zeroes */
530 : strategy = (BufferAccessStrategy)
531 300792 : palloc0(offsetof(BufferAccessStrategyData, buffers) +
532 : ring_buffers * sizeof(Buffer));
533 :
534 : /* Set fields that don't start out zero */
535 300792 : strategy->btype = btype;
536 300792 : strategy->nbuffers = ring_buffers;
537 :
538 300792 : return strategy;
539 : }
540 :
541 : /*
542 : * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
543 : * the ring
544 : *
545 : * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
546 : * returning NULL with 0 size.
547 : */
548 : int
549 34 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
550 : {
551 34 : if (strategy == NULL)
552 0 : return 0;
553 :
554 34 : return strategy->nbuffers;
555 : }
556 :
557 : /*
558 : * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
559 : *
560 : * When pinning extra buffers to look ahead, users of a ring-based strategy are
561 : * in danger of pinning too much of the ring at once while performing look-ahead.
562 : * For some strategies, that means "escaping" from the ring, and in others it
563 : * means forcing dirty data to disk very frequently with associated WAL
564 : * flushing. Since external code has no insight into any of that, allow
565 : * individual strategy types to expose a clamp that should be applied when
566 : * deciding on a maximum number of buffers to pin at once.
567 : *
568 : * Callers should combine this number with other relevant limits and take the
569 : * minimum.
570 : */
571 : int
572 1100148 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
573 : {
574 1100148 : if (strategy == NULL)
575 794992 : return NBuffers;
576 :
577 305156 : switch (strategy->btype)
578 : {
579 148394 : case BAS_BULKREAD:
580 :
581 : /*
582 : * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
583 : * shouldn't be a problem and the caller is free to pin up to the
584 : * entire ring at once.
585 : */
586 148394 : return strategy->nbuffers;
587 :
588 156762 : default:
589 :
590 : /*
591 : * Tell caller not to pin more than half the buffers in the ring.
592 : * This is a trade-off between look ahead distance and deferring
593 : * writeback and associated WAL traffic.
594 : */
595 156762 : return strategy->nbuffers / 2;
596 : }
597 : }
598 :
599 : /*
600 : * FreeAccessStrategy -- release a BufferAccessStrategy object
601 : *
602 : * A simple pfree would do at the moment, but we would prefer that callers
603 : * don't assume that much about the representation of BufferAccessStrategy.
604 : */
605 : void
606 272156 : FreeAccessStrategy(BufferAccessStrategy strategy)
607 : {
608 : /* don't crash if called on a "default" strategy */
609 272156 : if (strategy != NULL)
610 272156 : pfree(strategy);
611 272156 : }
612 :
613 : /*
614 : * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
615 : * ring is empty / not usable.
616 : *
617 : * The bufhdr spin lock is held on the returned buffer.
618 : */
619 : static BufferDesc *
620 1590048 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
621 : {
622 : BufferDesc *buf;
623 : Buffer bufnum;
624 : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
625 :
626 :
627 : /* Advance to next ring slot */
628 1590048 : if (++strategy->current >= strategy->nbuffers)
629 46224 : strategy->current = 0;
630 :
631 : /*
632 : * If the slot hasn't been filled yet, tell the caller to allocate a new
633 : * buffer with the normal allocation strategy. He will then fill this
634 : * slot by calling AddBufferToRing with the new buffer.
635 : */
636 1590048 : bufnum = strategy->buffers[strategy->current];
637 1590048 : if (bufnum == InvalidBuffer)
638 963884 : return NULL;
639 :
640 : /*
641 : * If the buffer is pinned we cannot use it under any circumstances.
642 : *
643 : * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
644 : * since our own previous usage of the ring element would have left it
645 : * there, but it might've been decremented by clock-sweep since then). A
646 : * higher usage_count indicates someone else has touched the buffer, so we
647 : * shouldn't re-use it.
648 : */
649 626164 : buf = GetBufferDescriptor(bufnum - 1);
650 626164 : local_buf_state = LockBufHdr(buf);
651 626164 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
652 616662 : && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
653 : {
654 606706 : *buf_state = local_buf_state;
655 606706 : return buf;
656 : }
657 19458 : UnlockBufHdr(buf, local_buf_state);
658 :
659 : /*
660 : * Tell caller to allocate a new buffer with the normal allocation
661 : * strategy. He'll then replace this ring element via AddBufferToRing.
662 : */
663 19458 : return NULL;
664 : }
665 :
666 : /*
667 : * AddBufferToRing -- add a buffer to the buffer ring
668 : *
669 : * Caller must hold the buffer header spinlock on the buffer. Since this
670 : * is called with the spinlock held, it had better be quite cheap.
671 : */
672 : static void
673 983342 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
674 : {
675 983342 : strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
676 983342 : }
677 :
678 : /*
679 : * Utility function returning the IOContext of a given BufferAccessStrategy's
680 : * strategy ring.
681 : */
682 : IOContext
683 124442794 : IOContextForStrategy(BufferAccessStrategy strategy)
684 : {
685 124442794 : if (!strategy)
686 119766906 : return IOCONTEXT_NORMAL;
687 :
688 4675888 : switch (strategy->btype)
689 : {
690 : case BAS_NORMAL:
691 :
692 : /*
693 : * Currently, GetAccessStrategy() returns NULL for
694 : * BufferAccessStrategyType BAS_NORMAL, so this case is
695 : * unreachable.
696 : */
697 : pg_unreachable();
698 : return IOCONTEXT_NORMAL;
699 3100106 : case BAS_BULKREAD:
700 3100106 : return IOCONTEXT_BULKREAD;
701 556752 : case BAS_BULKWRITE:
702 556752 : return IOCONTEXT_BULKWRITE;
703 1019030 : case BAS_VACUUM:
704 1019030 : return IOCONTEXT_VACUUM;
705 : }
706 :
707 0 : elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
708 : pg_unreachable();
709 : }
710 :
711 : /*
712 : * StrategyRejectBuffer -- consider rejecting a dirty buffer
713 : *
714 : * When a nondefault strategy is used, the buffer manager calls this function
715 : * when it turns out that the buffer selected by StrategyGetBuffer needs to
716 : * be written out and doing so would require flushing WAL too. This gives us
717 : * a chance to choose a different victim.
718 : *
719 : * Returns true if buffer manager should ask for a new victim, and false
720 : * if this buffer should be written and re-used.
721 : */
722 : bool
723 17692 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
724 : {
725 : /* We only do this in bulkread mode */
726 17692 : if (strategy->btype != BAS_BULKREAD)
727 4976 : return false;
728 :
729 : /* Don't muck with behavior of normal buffer-replacement strategy */
730 24074 : if (!from_ring ||
731 11358 : strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
732 1358 : return false;
733 :
734 : /*
735 : * Remove the dirty buffer from the ring; necessary to prevent infinite
736 : * loop if all ring members are dirty.
737 : */
738 11358 : strategy->buffers[strategy->current] = InvalidBuffer;
739 :
740 11358 : return true;
741 : }
|