Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * buf_internals.h
4 : * Internal definitions for buffer manager and the buffer replacement
5 : * strategy.
6 : *
7 : *
8 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
9 : * Portions Copyright (c) 1994, Regents of the University of California
10 : *
11 : * src/include/storage/buf_internals.h
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #ifndef BUFMGR_INTERNALS_H
16 : #define BUFMGR_INTERNALS_H
17 :
18 : #include "pgstat.h"
19 : #include "port/atomics.h"
20 : #include "storage/aio_types.h"
21 : #include "storage/buf.h"
22 : #include "storage/bufmgr.h"
23 : #include "storage/condition_variable.h"
24 : #include "storage/lwlock.h"
25 : #include "storage/procnumber.h"
26 : #include "storage/proclist_types.h"
27 : #include "storage/shmem.h"
28 : #include "storage/smgr.h"
29 : #include "storage/spin.h"
30 : #include "utils/relcache.h"
31 : #include "utils/resowner.h"
32 :
33 : /*
34 : * Buffer state is a single 64-bit variable where following data is combined.
35 : *
36 : * State of the buffer itself (in order):
37 : * - 18 bits refcount
38 : * - 4 bits usage count
39 : * - 12 bits of flags
40 : * - 18 bits share-lock count
41 : * - 1 bit share-exclusive locked
42 : * - 1 bit exclusive locked
43 : *
44 : * Combining these values allows to perform some operations without locking
45 : * the buffer header, by modifying them together with a CAS loop.
46 : *
47 : * The definition of buffer state components is below.
48 : */
49 : #define BUF_REFCOUNT_BITS 18
50 : #define BUF_USAGECOUNT_BITS 4
51 : #define BUF_FLAG_BITS 12
52 : #define BUF_LOCK_BITS (18+2)
53 :
54 : StaticAssertDecl(BUF_REFCOUNT_BITS + BUF_USAGECOUNT_BITS + BUF_FLAG_BITS + BUF_LOCK_BITS <= 64,
55 : "parts of buffer state space need to be <= 64");
56 :
57 : /* refcount related definitions */
58 : #define BUF_REFCOUNT_ONE 1
59 : #define BUF_REFCOUNT_MASK \
60 : ((UINT64CONST(1) << BUF_REFCOUNT_BITS) - 1)
61 :
62 : /* usage count related definitions */
63 : #define BUF_USAGECOUNT_SHIFT \
64 : BUF_REFCOUNT_BITS
65 : #define BUF_USAGECOUNT_MASK \
66 : (((UINT64CONST(1) << BUF_USAGECOUNT_BITS) - 1) << (BUF_USAGECOUNT_SHIFT))
67 : #define BUF_USAGECOUNT_ONE \
68 : (UINT64CONST(1) << BUF_REFCOUNT_BITS)
69 :
70 : /* flags related definitions */
71 : #define BUF_FLAG_SHIFT \
72 : (BUF_REFCOUNT_BITS + BUF_USAGECOUNT_BITS)
73 : #define BUF_FLAG_MASK \
74 : (((UINT64CONST(1) << BUF_FLAG_BITS) - 1) << BUF_FLAG_SHIFT)
75 :
76 : /* lock state related definitions */
77 : #define BM_LOCK_SHIFT \
78 : (BUF_FLAG_SHIFT + BUF_FLAG_BITS)
79 : #define BM_LOCK_VAL_SHARED \
80 : (UINT64CONST(1) << (BM_LOCK_SHIFT))
81 : #define BM_LOCK_VAL_SHARE_EXCLUSIVE \
82 : (UINT64CONST(1) << (BM_LOCK_SHIFT + MAX_BACKENDS_BITS))
83 : #define BM_LOCK_VAL_EXCLUSIVE \
84 : (UINT64CONST(1) << (BM_LOCK_SHIFT + MAX_BACKENDS_BITS + 1))
85 : #define BM_LOCK_MASK \
86 : ((((uint64) MAX_BACKENDS) << BM_LOCK_SHIFT) | BM_LOCK_VAL_SHARE_EXCLUSIVE | BM_LOCK_VAL_EXCLUSIVE)
87 :
88 :
89 : /* Get refcount and usagecount from buffer state */
90 : #define BUF_STATE_GET_REFCOUNT(state) \
91 : ((uint32)((state) & BUF_REFCOUNT_MASK))
92 : #define BUF_STATE_GET_USAGECOUNT(state) \
93 : ((uint32)(((state) & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT))
94 :
95 : /*
96 : * Flags for buffer descriptors
97 : *
98 : * Note: BM_TAG_VALID essentially means that there is a buffer hashtable
99 : * entry associated with the buffer's tag.
100 : */
101 :
102 : #define BUF_DEFINE_FLAG(flagno) \
103 : (UINT64CONST(1) << (BUF_FLAG_SHIFT + (flagno)))
104 :
105 : /* buffer header is locked */
106 : #define BM_LOCKED BUF_DEFINE_FLAG( 0)
107 : /* data needs writing */
108 : #define BM_DIRTY BUF_DEFINE_FLAG( 1)
109 : /* data is valid */
110 : #define BM_VALID BUF_DEFINE_FLAG( 2)
111 : /* tag is assigned */
112 : #define BM_TAG_VALID BUF_DEFINE_FLAG( 3)
113 : /* read or write in progress */
114 : #define BM_IO_IN_PROGRESS BUF_DEFINE_FLAG( 4)
115 : /* previous I/O failed */
116 : #define BM_IO_ERROR BUF_DEFINE_FLAG( 5)
117 : /* flag bit 6 is not used anymore */
118 : /* have waiter for sole pin */
119 : #define BM_PIN_COUNT_WAITER BUF_DEFINE_FLAG( 7)
120 : /* must write for checkpoint */
121 : #define BM_CHECKPOINT_NEEDED BUF_DEFINE_FLAG( 8)
122 : /* permanent buffer (not unlogged, or init fork) */
123 : #define BM_PERMANENT BUF_DEFINE_FLAG( 9)
124 : /* content lock has waiters */
125 : #define BM_LOCK_HAS_WAITERS BUF_DEFINE_FLAG(10)
126 : /* waiter for content lock has been signalled but not yet run */
127 : #define BM_LOCK_WAKE_IN_PROGRESS BUF_DEFINE_FLAG(11)
128 :
129 :
130 : StaticAssertDecl(MAX_BACKENDS_BITS <= BUF_REFCOUNT_BITS,
131 : "MAX_BACKENDS_BITS needs to be <= BUF_REFCOUNT_BITS");
132 : StaticAssertDecl(MAX_BACKENDS_BITS <= (BUF_LOCK_BITS - 2),
133 : "MAX_BACKENDS_BITS needs to be <= BUF_LOCK_BITS - 2");
134 :
135 :
136 : /*
137 : * The maximum allowed value of usage_count represents a tradeoff between
138 : * accuracy and speed of the clock-sweep buffer management algorithm. A
139 : * large value (comparable to NBuffers) would approximate LRU semantics.
140 : * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of the
141 : * clock-sweep hand to find a free buffer, so in practice we don't want the
142 : * value to be very large.
143 : */
144 : #define BM_MAX_USAGE_COUNT 5
145 :
146 : StaticAssertDecl(BM_MAX_USAGE_COUNT < (UINT64CONST(1) << BUF_USAGECOUNT_BITS),
147 : "BM_MAX_USAGE_COUNT doesn't fit in BUF_USAGECOUNT_BITS bits");
148 :
149 : /*
150 : * Buffer tag identifies which disk block the buffer contains.
151 : *
152 : * Note: the BufferTag data must be sufficient to determine where to write the
153 : * block, without reference to pg_class or pg_tablespace entries. It's
154 : * possible that the backend flushing the buffer doesn't even believe the
155 : * relation is visible yet (its xact may have started before the xact that
156 : * created the rel). The storage manager must be able to cope anyway.
157 : *
158 : * Note: if there's any pad bytes in the struct, InitBufferTag will have
159 : * to be fixed to zero them, since this struct is used as a hash key.
160 : */
161 : typedef struct buftag
162 : {
163 : Oid spcOid; /* tablespace oid */
164 : Oid dbOid; /* database oid */
165 : RelFileNumber relNumber; /* relation file number */
166 : ForkNumber forkNum; /* fork number */
167 : BlockNumber blockNum; /* blknum relative to begin of reln */
168 : } BufferTag;
169 :
170 : static inline RelFileNumber
171 241269798 : BufTagGetRelNumber(const BufferTag *tag)
172 : {
173 241269798 : return tag->relNumber;
174 : }
175 :
176 : static inline ForkNumber
177 27756411 : BufTagGetForkNum(const BufferTag *tag)
178 : {
179 27756411 : return tag->forkNum;
180 : }
181 :
182 : static inline void
183 98492700 : BufTagSetRelForkDetails(BufferTag *tag, RelFileNumber relnumber,
184 : ForkNumber forknum)
185 : {
186 98492700 : tag->relNumber = relnumber;
187 98492700 : tag->forkNum = forknum;
188 98492700 : }
189 :
190 : static inline RelFileLocator
191 29947722 : BufTagGetRelFileLocator(const BufferTag *tag)
192 : {
193 : RelFileLocator rlocator;
194 :
195 29947722 : rlocator.spcOid = tag->spcOid;
196 29947722 : rlocator.dbOid = tag->dbOid;
197 29947722 : rlocator.relNumber = BufTagGetRelNumber(tag);
198 :
199 29947722 : return rlocator;
200 : }
201 :
202 : static inline void
203 13388441 : ClearBufferTag(BufferTag *tag)
204 : {
205 13388441 : tag->spcOid = InvalidOid;
206 13388441 : tag->dbOid = InvalidOid;
207 13388441 : BufTagSetRelForkDetails(tag, InvalidRelFileNumber, InvalidForkNumber);
208 13388441 : tag->blockNum = InvalidBlockNumber;
209 13388441 : }
210 :
211 : static inline void
212 85104259 : InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator,
213 : ForkNumber forkNum, BlockNumber blockNum)
214 : {
215 85104259 : tag->spcOid = rlocator->spcOid;
216 85104259 : tag->dbOid = rlocator->dbOid;
217 85104259 : BufTagSetRelForkDetails(tag, rlocator->relNumber, forkNum);
218 85104259 : tag->blockNum = blockNum;
219 85104259 : }
220 :
221 : static inline bool
222 164312 : BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
223 : {
224 328621 : return (tag1->spcOid == tag2->spcOid) &&
225 164309 : (tag1->dbOid == tag2->dbOid) &&
226 164309 : (tag1->relNumber == tag2->relNumber) &&
227 492897 : (tag1->blockNum == tag2->blockNum) &&
228 164276 : (tag1->forkNum == tag2->forkNum);
229 : }
230 :
231 : static inline bool
232 563064146 : BufTagMatchesRelFileLocator(const BufferTag *tag,
233 : const RelFileLocator *rlocator)
234 : {
235 815322975 : return (tag->spcOid == rlocator->spcOid) &&
236 774012102 : (tag->dbOid == rlocator->dbOid) &&
237 210947956 : (BufTagGetRelNumber(tag) == rlocator->relNumber);
238 : }
239 :
240 :
241 : /*
242 : * The shared buffer mapping table is partitioned to reduce contention.
243 : * To determine which partition lock a given tag requires, compute the tag's
244 : * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
245 : * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
246 : */
247 : static inline uint32
248 84975002 : BufTableHashPartition(uint32 hashcode)
249 : {
250 84975002 : return hashcode % NUM_BUFFER_PARTITIONS;
251 : }
252 :
253 : static inline LWLock *
254 84975002 : BufMappingPartitionLock(uint32 hashcode)
255 : {
256 84975002 : return &MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET +
257 84975002 : BufTableHashPartition(hashcode)].lock;
258 : }
259 :
260 : static inline LWLock *
261 : BufMappingPartitionLockByIndex(uint32 index)
262 : {
263 : return &MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + index].lock;
264 : }
265 :
266 : /*
267 : * BufferDesc -- shared descriptor/state data for a single shared buffer.
268 : *
269 : * The state of the buffer is controlled by the, drumroll, state variable. It
270 : * only may be modified using atomic operations. The state variable combines
271 : * various flags, the buffer's refcount and usage count. See comment above
272 : * BUF_REFCOUNT_BITS for details about the division. This layout allow us to
273 : * do some operations in a single atomic operation, without actually acquiring
274 : * and releasing the spinlock; for instance, increasing or decreasing the
275 : * refcount.
276 : *
277 : * One of the aforementioned flags is BM_LOCKED, used to implement the buffer
278 : * header lock. See the following paragraphs, as well as the documentation for
279 : * individual fields, for more details.
280 : *
281 : * The identity of the buffer (BufferDesc.tag) can only be changed by the
282 : * backend holding the buffer header lock.
283 : *
284 : * If the lock is held by another backend, neither additional buffer pins may
285 : * be established (we would like to relax this eventually), nor can flags be
286 : * set/cleared. These operations either need to acquire the buffer header
287 : * spinlock, or need to use a CAS loop, waiting for the lock to be released if
288 : * it is held. However, existing buffer pins may be released while the buffer
289 : * header spinlock is held, using an atomic subtraction.
290 : *
291 : * If we have the buffer pinned, its tag can't change underneath us, so we can
292 : * examine the tag without locking the buffer header. Also, in places we do
293 : * one-time reads of the flags without bothering to lock the buffer header;
294 : * this is generally for situations where we don't expect the flag bit being
295 : * tested to be changing.
296 : *
297 : * We can't physically remove items from a disk page if another backend has
298 : * the buffer pinned. Hence, a backend may need to wait for all other pins
299 : * to go away. This is signaled by storing its own pgprocno into
300 : * wait_backend_pgprocno and setting flag bit BM_PIN_COUNT_WAITER. At present,
301 : * there can be only one such waiter per buffer.
302 : *
303 : * The content of buffers is protected via the buffer content lock,
304 : * implemented as part of the buffer state. Note that the buffer header lock
305 : * is *not* used to control access to the data in the buffer! We used to use
306 : * an LWLock to implement the content lock, but having a dedicated
307 : * implementation of content locks allows us to implement some otherwise hard
308 : * things (e.g. race-freely checking if AIO is in progress before locking a
309 : * buffer exclusively) and enables otherwise impossible optimizations
310 : * (e.g. unlocking and unpinning a buffer in one atomic operation).
311 : *
312 : * We use this same struct for local buffer headers, but the locks are not
313 : * used and not all of the flag bits are useful either. To avoid unnecessary
314 : * overhead, manipulations of the state field should be done without actual
315 : * atomic operations (i.e. only pg_atomic_read_u64() and
316 : * pg_atomic_unlocked_write_u64()).
317 : *
318 : * Be careful to avoid increasing the size of the struct when adding or
319 : * reordering members. Keeping it below 64 bytes (the most common CPU
320 : * cache line size) is fairly important for performance.
321 : *
322 : * Per-buffer I/O condition variables are currently kept outside this struct in
323 : * a separate array. They could be moved in here and still fit within that
324 : * limit on common systems, but for now that is not done.
325 : */
326 : typedef struct BufferDesc
327 : {
328 : /*
329 : * ID of page contained in buffer. The buffer header spinlock needs to be
330 : * held to modify this field.
331 : */
332 : BufferTag tag;
333 :
334 : /*
335 : * Buffer's index number (from 0). The field never changes after
336 : * initialization, so does not need locking.
337 : */
338 : int buf_id;
339 :
340 : /*
341 : * State of the buffer, containing flags, refcount and usagecount. See
342 : * BUF_* and BM_* defines at the top of this file.
343 : */
344 : pg_atomic_uint64 state;
345 :
346 : /*
347 : * Backend of pin-count waiter. The buffer header spinlock needs to be
348 : * held to modify this field.
349 : */
350 : int wait_backend_pgprocno;
351 :
352 : PgAioWaitRef io_wref; /* set iff AIO is in progress */
353 :
354 : /*
355 : * List of PGPROCs waiting for the buffer content lock. Protected by the
356 : * buffer header spinlock.
357 : */
358 : proclist_head lock_waiters;
359 : } BufferDesc;
360 :
361 : /*
362 : * Concurrent access to buffer headers has proven to be more efficient if
363 : * they're cache line aligned. So we force the start of the BufferDescriptors
364 : * array to be on a cache line boundary and force the elements to be cache
365 : * line sized.
366 : *
367 : * XXX: As this is primarily matters in highly concurrent workloads which
368 : * probably all are 64bit these days, and the space wastage would be a bit
369 : * more noticeable on 32bit systems, we don't force the stride to be cache
370 : * line sized on those. If somebody does actual performance testing, we can
371 : * reevaluate.
372 : *
373 : * Note that local buffer descriptors aren't forced to be aligned - as there's
374 : * no concurrent access to those it's unlikely to be beneficial.
375 : *
376 : * We use a 64-byte cache line size here, because that's the most common
377 : * size. Making it bigger would be a waste of memory. Even if running on a
378 : * platform with either 32 or 128 byte line sizes, it's good to align to
379 : * boundaries and avoid false sharing.
380 : */
381 : #define BUFFERDESC_PAD_TO_SIZE (SIZEOF_VOID_P == 8 ? 64 : 1)
382 :
383 : typedef union BufferDescPadded
384 : {
385 : BufferDesc bufferdesc;
386 : char pad[BUFFERDESC_PAD_TO_SIZE];
387 : } BufferDescPadded;
388 :
389 : /*
390 : * The PendingWriteback & WritebackContext structure are used to keep
391 : * information about pending flush requests to be issued to the OS.
392 : */
393 : typedef struct PendingWriteback
394 : {
395 : /* could store different types of pending flushes here */
396 : BufferTag tag;
397 : } PendingWriteback;
398 :
399 : /* struct forward declared in bufmgr.h */
400 : typedef struct WritebackContext
401 : {
402 : /* pointer to the max number of writeback requests to coalesce */
403 : int *max_pending;
404 :
405 : /* current number of pending writeback requests */
406 : int nr_pending;
407 :
408 : /* pending requests */
409 : PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES];
410 : } WritebackContext;
411 :
412 : /* in buf_init.c */
413 : extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
414 : extern PGDLLIMPORT ConditionVariableMinimallyPadded *BufferIOCVArray;
415 : extern PGDLLIMPORT WritebackContext BackendWritebackContext;
416 :
417 : /* in localbuf.c */
418 : extern PGDLLIMPORT BufferDesc *LocalBufferDescriptors;
419 :
420 :
421 : static inline BufferDesc *
422 760960101 : GetBufferDescriptor(uint32 id)
423 : {
424 760960101 : return &(BufferDescriptors[id]).bufferdesc;
425 : }
426 :
427 : static inline BufferDesc *
428 13295642 : GetLocalBufferDescriptor(uint32 id)
429 : {
430 13295642 : return &LocalBufferDescriptors[id];
431 : }
432 :
433 : static inline Buffer
434 279582085 : BufferDescriptorGetBuffer(const BufferDesc *bdesc)
435 : {
436 279582085 : return (Buffer) (bdesc->buf_id + 1);
437 : }
438 :
439 : static inline ConditionVariable *
440 14626000 : BufferDescriptorGetIOCV(const BufferDesc *bdesc)
441 : {
442 14626000 : return &(BufferIOCVArray[bdesc->buf_id]).cv;
443 : }
444 :
445 : /*
446 : * Functions for acquiring/releasing a shared buffer header's spinlock. Do
447 : * not apply these to local buffers!
448 : */
449 : extern uint64 LockBufHdr(BufferDesc *desc);
450 :
451 : /*
452 : * Unlock the buffer header.
453 : *
454 : * This can only be used if the caller did not modify BufferDesc.state. To
455 : * set/unset flag bits or change the refcount use UnlockBufHdrExt().
456 : */
457 : static inline void
458 3087401 : UnlockBufHdr(BufferDesc *desc)
459 : {
460 : Assert(pg_atomic_read_u64(&desc->state) & BM_LOCKED);
461 :
462 3087401 : pg_atomic_fetch_sub_u64(&desc->state, BM_LOCKED);
463 3087401 : }
464 :
465 : /*
466 : * Unlock the buffer header, while atomically adding the flags in set_bits,
467 : * unsetting the ones in unset_bits and changing the refcount by
468 : * refcount_change.
469 : *
470 : * Note that this approach would not work for usagecount, since we need to cap
471 : * the usagecount at BM_MAX_USAGE_COUNT.
472 : */
473 : static inline uint64
474 25685069 : UnlockBufHdrExt(BufferDesc *desc, uint64 old_buf_state,
475 : uint64 set_bits, uint64 unset_bits,
476 : int refcount_change)
477 : {
478 : for (;;)
479 5 : {
480 25685074 : uint64 buf_state = old_buf_state;
481 :
482 : Assert(buf_state & BM_LOCKED);
483 :
484 25685074 : buf_state |= set_bits;
485 25685074 : buf_state &= ~unset_bits;
486 25685074 : buf_state &= ~BM_LOCKED;
487 :
488 25685074 : if (refcount_change != 0)
489 3489156 : buf_state += BUF_REFCOUNT_ONE * refcount_change;
490 :
491 25685074 : if (pg_atomic_compare_exchange_u64(&desc->state, &old_buf_state,
492 : buf_state))
493 : {
494 25685069 : return old_buf_state;
495 : }
496 : }
497 : }
498 :
499 : extern uint64 WaitBufHdrUnlocked(BufferDesc *buf);
500 :
501 : /* in bufmgr.c */
502 :
503 : /*
504 : * Structure to sort buffers per file on checkpoints.
505 : *
506 : * This structure is allocated per buffer in shared memory, so it should be
507 : * kept as small as possible.
508 : */
509 : typedef struct CkptSortItem
510 : {
511 : Oid tsId;
512 : RelFileNumber relNumber;
513 : ForkNumber forkNum;
514 : BlockNumber blockNum;
515 : int buf_id;
516 : } CkptSortItem;
517 :
518 : extern PGDLLIMPORT CkptSortItem *CkptBufferIds;
519 :
520 : /* ResourceOwner callbacks to hold buffer I/Os and pins */
521 : extern PGDLLIMPORT const ResourceOwnerDesc buffer_io_resowner_desc;
522 : extern PGDLLIMPORT const ResourceOwnerDesc buffer_resowner_desc;
523 :
524 : /* Convenience wrappers over ResourceOwnerRemember/Forget */
525 : static inline void
526 100752697 : ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
527 : {
528 100752697 : ResourceOwnerRemember(owner, Int32GetDatum(buffer), &buffer_resowner_desc);
529 100752697 : }
530 : static inline void
531 100742172 : ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
532 : {
533 100742172 : ResourceOwnerForget(owner, Int32GetDatum(buffer), &buffer_resowner_desc);
534 100742172 : }
535 : static inline void
536 2963830 : ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)
537 : {
538 2963830 : ResourceOwnerRemember(owner, Int32GetDatum(buffer), &buffer_io_resowner_desc);
539 2963830 : }
540 : static inline void
541 2963815 : ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
542 : {
543 2963815 : ResourceOwnerForget(owner, Int32GetDatum(buffer), &buffer_io_resowner_desc);
544 2963815 : }
545 :
546 : /*
547 : * Internal buffer management routines
548 : */
549 : /* bufmgr.c */
550 : extern void WritebackContextInit(WritebackContext *context, int *max_pending);
551 : extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context);
552 : extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context,
553 : IOContext io_context, BufferTag *tag);
554 :
555 : extern void TrackNewBufferPin(Buffer buf);
556 :
557 : /*
558 : * Return value for StartBufferIO / StartSharedBufferIO / StartLocalBufferIO.
559 : *
560 : * When preparing a buffer for I/O and setting BM_IO_IN_PROGRESS, the buffer
561 : * may already have I/O in progress or the I/O may have been done by another
562 : * backend. See the documentation of StartSharedBufferIO for more details.
563 : */
564 : typedef enum StartBufferIOResult
565 : {
566 : BUFFER_IO_ALREADY_DONE,
567 : BUFFER_IO_IN_PROGRESS,
568 : BUFFER_IO_READY_FOR_IO,
569 : } StartBufferIOResult;
570 :
571 : /* the following are exposed to make it easier to write tests */
572 : extern StartBufferIOResult StartBufferIO(Buffer buffer, bool forInput, bool wait,
573 : PgAioWaitRef *io_wref);
574 : extern StartBufferIOResult StartSharedBufferIO(BufferDesc *buf, bool forInput, bool wait,
575 : PgAioWaitRef *io_wref);
576 : extern void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits,
577 : bool forget_owner, bool release_aio);
578 :
579 :
580 : /* freelist.c */
581 : extern IOContext IOContextForStrategy(BufferAccessStrategy strategy);
582 : extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
583 : uint64 *buf_state, bool *from_ring);
584 : extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
585 : BufferDesc *buf, bool from_ring);
586 :
587 : extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
588 : extern void StrategyNotifyBgWriter(int bgwprocno);
589 :
590 : /* buf_table.c */
591 : extern uint32 BufTableHashCode(BufferTag *tagPtr);
592 : extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
593 : extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
594 : extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
595 :
596 : /* localbuf.c */
597 : extern bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount);
598 : extern void UnpinLocalBuffer(Buffer buffer);
599 : extern void UnpinLocalBufferNoOwner(Buffer buffer);
600 : extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
601 : ForkNumber forkNum,
602 : BlockNumber blockNum);
603 : extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
604 : BlockNumber blockNum, bool *foundPtr);
605 : extern BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr,
606 : ForkNumber fork,
607 : uint32 flags,
608 : uint32 extend_by,
609 : BlockNumber extend_upto,
610 : Buffer *buffers,
611 : uint32 *extended_by);
612 : extern void MarkLocalBufferDirty(Buffer buffer);
613 : extern void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty,
614 : uint64 set_flag_bits, bool release_aio);
615 : extern StartBufferIOResult StartLocalBufferIO(BufferDesc *bufHdr, bool forInput,
616 : bool wait, PgAioWaitRef *io_wref);
617 : extern void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln);
618 : extern void InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced);
619 : extern void DropRelationLocalBuffers(RelFileLocator rlocator,
620 : ForkNumber *forkNum, int nforks,
621 : BlockNumber *firstDelBlock);
622 : extern void DropRelationAllLocalBuffers(RelFileLocator rlocator);
623 : extern void AtEOXact_LocalBuffers(bool isCommit);
624 :
625 : #endif /* BUFMGR_INTERNALS_H */
|