Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufmgr.h
4 : * POSTGRES buffer manager definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufmgr.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFMGR_H
15 : #define BUFMGR_H
16 :
17 : #include "port/pg_iovec.h"
18 : #include "storage/aio_types.h"
19 : #include "storage/block.h"
20 : #include "storage/buf.h"
21 : #include "storage/bufpage.h"
22 : #include "storage/relfilelocator.h"
23 : #include "utils/relcache.h"
24 : #include "utils/snapmgr.h"
25 :
26 : typedef void *Block;
27 :
28 : /*
29 : * Possible arguments for GetAccessStrategy().
30 : *
31 : * If adding a new BufferAccessStrategyType, also add a new IOContext so
32 : * IO statistics using this strategy are tracked.
33 : */
34 : typedef enum BufferAccessStrategyType
35 : {
36 : BAS_NORMAL, /* Normal random access */
37 : BAS_BULKREAD, /* Large read-only scan (hint bit updates are
38 : * ok) */
39 : BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
40 : BAS_VACUUM, /* VACUUM */
41 : } BufferAccessStrategyType;
42 :
43 : /* Possible modes for ReadBufferExtended() */
44 : typedef enum
45 : {
46 : RBM_NORMAL, /* Normal read */
47 : RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
48 : * initialize. Also locks the page. */
49 : RBM_ZERO_AND_CLEANUP_LOCK, /* Like RBM_ZERO_AND_LOCK, but locks the page
50 : * in "cleanup" mode */
51 : RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
52 : RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
53 : * replay; otherwise same as RBM_NORMAL */
54 : } ReadBufferMode;
55 :
56 : /*
57 : * Type returned by PrefetchBuffer().
58 : */
59 : typedef struct PrefetchBufferResult
60 : {
61 : Buffer recent_buffer; /* If valid, a hit (recheck needed!) */
62 : bool initiated_io; /* If true, a miss resulting in async I/O */
63 : } PrefetchBufferResult;
64 :
65 : /*
66 : * Flags influencing the behaviour of ExtendBufferedRel*
67 : */
68 : typedef enum ExtendBufferedFlags
69 : {
70 : /*
71 : * Don't acquire extension lock. This is safe only if the relation isn't
72 : * shared, an access exclusive lock is held or if this is the startup
73 : * process.
74 : */
75 : EB_SKIP_EXTENSION_LOCK = (1 << 0),
76 :
77 : /* Is this extension part of recovery? */
78 : EB_PERFORMING_RECOVERY = (1 << 1),
79 :
80 : /*
81 : * Should the fork be created if it does not currently exist? This likely
82 : * only ever makes sense for relation forks.
83 : */
84 : EB_CREATE_FORK_IF_NEEDED = (1 << 2),
85 :
86 : /* Should the first (possibly only) return buffer be returned locked? */
87 : EB_LOCK_FIRST = (1 << 3),
88 :
89 : /* Should the smgr size cache be cleared? */
90 : EB_CLEAR_SIZE_CACHE = (1 << 4),
91 :
92 : /* internal flags follow */
93 : EB_LOCK_TARGET = (1 << 5),
94 : } ExtendBufferedFlags;
95 :
96 : /* forward declared, to avoid including smgr.h here */
97 : typedef struct SMgrRelationData *SMgrRelation;
98 :
99 : /*
100 : * Some functions identify relations either by relation or smgr +
101 : * relpersistence, initialized via the BMR_REL()/BMR_SMGR() macros below.
102 : * This allows us to use the same function for both recovery and normal
103 : * operation. When BMR_REL is used, it's not valid to cache its rd_smgr here,
104 : * because our pointer would be obsolete in case of relcache invalidation.
105 : * For simplicity, use BMR_GET_SMGR to read the smgr.
106 : */
107 : typedef struct BufferManagerRelation
108 : {
109 : Relation rel;
110 : SMgrRelation smgr;
111 : char relpersistence;
112 : } BufferManagerRelation;
113 :
114 : #define BMR_REL(p_rel) \
115 : ((BufferManagerRelation){.rel = p_rel})
116 : #define BMR_SMGR(p_smgr, p_relpersistence) \
117 : ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
118 : #define BMR_GET_SMGR(bmr) \
119 : (RelationIsValid((bmr).rel) ? RelationGetSmgr((bmr).rel) : (bmr).smgr)
120 :
121 : /* Zero out page if reading fails. */
122 : #define READ_BUFFERS_ZERO_ON_ERROR (1 << 0)
123 : /* Call smgrprefetch() if I/O necessary. */
124 : #define READ_BUFFERS_ISSUE_ADVICE (1 << 1)
125 : /* Don't treat page as invalid due to checksum failures. */
126 : #define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES (1 << 2)
127 : /* IO will immediately be waited for */
128 : #define READ_BUFFERS_SYNCHRONOUSLY (1 << 3)
129 :
130 :
131 : struct ReadBuffersOperation
132 : {
133 : /* The following members should be set by the caller. */
134 : Relation rel; /* optional */
135 : SMgrRelation smgr;
136 : char persistence;
137 : ForkNumber forknum;
138 : BufferAccessStrategy strategy;
139 :
140 : /*
141 : * The following private members are private state for communication
142 : * between StartReadBuffers() and WaitReadBuffers(), initialized only if
143 : * an actual read is required, and should not be modified.
144 : */
145 : Buffer *buffers;
146 : BlockNumber blocknum;
147 : uint16 flags;
148 : int16 nblocks;
149 : int16 nblocks_done;
150 : /* true if waiting on another backend's IO */
151 : bool foreign_io;
152 : PgAioWaitRef io_wref;
153 : PgAioReturn io_return;
154 : };
155 :
156 : typedef struct ReadBuffersOperation ReadBuffersOperation;
157 :
158 : /* to avoid having to expose buf_internals.h here */
159 : typedef struct WritebackContext WritebackContext;
160 :
161 : /* in globals.c ... this duplicates miscadmin.h */
162 : extern PGDLLIMPORT int NBuffers;
163 :
164 : /* in bufmgr.c */
165 : extern PGDLLIMPORT bool zero_damaged_pages;
166 : extern PGDLLIMPORT int bgwriter_lru_maxpages;
167 : extern PGDLLIMPORT double bgwriter_lru_multiplier;
168 : extern PGDLLIMPORT bool track_io_timing;
169 :
170 : #define DEFAULT_EFFECTIVE_IO_CONCURRENCY 16
171 : #define DEFAULT_MAINTENANCE_IO_CONCURRENCY 16
172 : extern PGDLLIMPORT int effective_io_concurrency;
173 : extern PGDLLIMPORT int maintenance_io_concurrency;
174 :
175 : #define MAX_IO_COMBINE_LIMIT PG_IOV_MAX
176 : #define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
177 : extern PGDLLIMPORT int io_combine_limit; /* min of the two GUCs below */
178 : extern PGDLLIMPORT int io_combine_limit_guc;
179 : extern PGDLLIMPORT int io_max_combine_limit;
180 :
181 : extern PGDLLIMPORT int checkpoint_flush_after;
182 : extern PGDLLIMPORT int backend_flush_after;
183 : extern PGDLLIMPORT int bgwriter_flush_after;
184 :
185 : extern PGDLLIMPORT const PgAioHandleCallbacks aio_shared_buffer_readv_cb;
186 : extern PGDLLIMPORT const PgAioHandleCallbacks aio_local_buffer_readv_cb;
187 :
188 : /* in buf_init.c */
189 : extern PGDLLIMPORT char *BufferBlocks;
190 :
191 : /* in localbuf.c */
192 : extern PGDLLIMPORT int NLocBuffer;
193 : extern PGDLLIMPORT Block *LocalBufferBlockPointers;
194 : extern PGDLLIMPORT int32 *LocalRefCount;
195 :
196 : /* upper limit for effective_io_concurrency */
197 : #define MAX_IO_CONCURRENCY 1000
198 :
199 : /* special block number for ReadBuffer() */
200 : #define P_NEW InvalidBlockNumber /* grow the file to get a new page */
201 :
202 : /*
203 : * Buffer content lock modes (mode argument for LockBuffer())
204 : */
205 : typedef enum BufferLockMode
206 : {
207 : BUFFER_LOCK_UNLOCK,
208 :
209 : /*
210 : * A share lock conflicts with exclusive locks.
211 : */
212 : BUFFER_LOCK_SHARE,
213 :
214 : /*
215 : * A share-exclusive lock conflicts with itself and exclusive locks.
216 : */
217 : BUFFER_LOCK_SHARE_EXCLUSIVE,
218 :
219 : /*
220 : * An exclusive lock conflicts with every other lock type.
221 : */
222 : BUFFER_LOCK_EXCLUSIVE,
223 : } BufferLockMode;
224 :
225 :
226 : /*
227 : * prototypes for functions in bufmgr.c
228 : */
229 : extern PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln,
230 : ForkNumber forkNum,
231 : BlockNumber blockNum);
232 : extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
233 : BlockNumber blockNum);
234 : extern bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum,
235 : BlockNumber blockNum, Buffer recent_buffer);
236 : extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
237 : extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
238 : BlockNumber blockNum, ReadBufferMode mode,
239 : BufferAccessStrategy strategy);
240 : extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
241 : ForkNumber forkNum, BlockNumber blockNum,
242 : ReadBufferMode mode, BufferAccessStrategy strategy,
243 : bool permanent);
244 :
245 : extern bool StartReadBuffer(ReadBuffersOperation *operation,
246 : Buffer *buffer,
247 : BlockNumber blocknum,
248 : int flags);
249 : extern bool StartReadBuffers(ReadBuffersOperation *operation,
250 : Buffer *buffers,
251 : BlockNumber blockNum,
252 : int *nblocks,
253 : int flags);
254 : extern bool WaitReadBuffers(ReadBuffersOperation *operation);
255 :
256 : extern void ReleaseBuffer(Buffer buffer);
257 : extern void UnlockReleaseBuffer(Buffer buffer);
258 : extern bool BufferIsLockedByMe(Buffer buffer);
259 : extern bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode);
260 : extern bool BufferIsDirty(Buffer buffer);
261 : extern void MarkBufferDirty(Buffer buffer);
262 : extern void IncrBufferRefCount(Buffer buffer);
263 : extern void CheckBufferIsPinnedOnce(Buffer buffer);
264 : extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
265 : BlockNumber blockNum);
266 :
267 : extern Buffer ExtendBufferedRel(BufferManagerRelation bmr,
268 : ForkNumber forkNum,
269 : BufferAccessStrategy strategy,
270 : uint32 flags);
271 : extern BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr,
272 : ForkNumber fork,
273 : BufferAccessStrategy strategy,
274 : uint32 flags,
275 : uint32 extend_by,
276 : Buffer *buffers,
277 : uint32 *extended_by);
278 : extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr,
279 : ForkNumber fork,
280 : BufferAccessStrategy strategy,
281 : uint32 flags,
282 : BlockNumber extend_to,
283 : ReadBufferMode mode);
284 :
285 : extern void InitBufferManagerAccess(void);
286 : extern void AtEOXact_Buffers(bool isCommit);
287 : #ifdef USE_ASSERT_CHECKING
288 : extern void AssertBufferLocksPermitCatalogRead(void);
289 : #endif
290 : extern char *DebugPrintBufferRefcount(Buffer buffer);
291 : extern void CheckPointBuffers(int flags);
292 : extern BlockNumber BufferGetBlockNumber(Buffer buffer);
293 : extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
294 : ForkNumber forkNum);
295 : extern void FlushOneBuffer(Buffer buffer);
296 : extern void FlushRelationBuffers(Relation rel);
297 : extern void FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels);
298 : extern void CreateAndCopyRelationData(RelFileLocator src_rlocator,
299 : RelFileLocator dst_rlocator,
300 : bool permanent);
301 : extern void FlushDatabaseBuffers(Oid dbid);
302 : extern void DropRelationBuffers(SMgrRelation smgr_reln,
303 : ForkNumber *forkNum,
304 : int nforks, BlockNumber *firstDelBlock);
305 : extern void DropRelationsAllBuffers(SMgrRelation *smgr_reln,
306 : int nlocators);
307 : extern void DropDatabaseBuffers(Oid dbid);
308 :
309 : #define RelationGetNumberOfBlocks(reln) \
310 : RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
311 :
312 : extern bool BufferIsPermanent(Buffer buffer);
313 : extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
314 : extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
315 : ForkNumber *forknum, BlockNumber *blknum);
316 :
317 : extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
318 :
319 : extern bool BufferSetHintBits16(uint16 *ptr, uint16 val, Buffer buffer);
320 : extern bool BufferBeginSetHintBits(Buffer buffer);
321 : extern void BufferFinishSetHintBits(Buffer buffer, bool mark_dirty, bool buffer_std);
322 :
323 : extern void UnlockBuffers(void);
324 : extern void UnlockBuffer(Buffer buffer);
325 : extern void LockBufferInternal(Buffer buffer, BufferLockMode mode);
326 :
327 : /*
328 : * Handling BUFFER_LOCK_UNLOCK in bufmgr.c leads to sufficiently worse branch
329 : * prediction to impact performance. Therefore handle that switch here, where
330 : * most of the time `mode` will be a constant and thus can be optimized out by
331 : * the compiler.
332 : */
333 : static inline void
334 186661567 : LockBuffer(Buffer buffer, BufferLockMode mode)
335 : {
336 186661567 : if (mode == BUFFER_LOCK_UNLOCK)
337 69466356 : UnlockBuffer(buffer);
338 : else
339 117195211 : LockBufferInternal(buffer, mode);
340 186661567 : }
341 :
342 : extern bool ConditionalLockBuffer(Buffer buffer);
343 : extern void LockBufferForCleanup(Buffer buffer);
344 : extern bool ConditionalLockBufferForCleanup(Buffer buffer);
345 : extern bool IsBufferCleanupOK(Buffer buffer);
346 : extern bool HoldingBufferPinThatDelaysRecovery(void);
347 :
348 : extern bool BgBufferSync(WritebackContext *wb_context);
349 :
350 : extern uint32 GetPinLimit(void);
351 : extern uint32 GetLocalPinLimit(void);
352 : extern uint32 GetAdditionalPinLimit(void);
353 : extern uint32 GetAdditionalLocalPinLimit(void);
354 : extern void LimitAdditionalPins(uint32 *additional_pins);
355 : extern void LimitAdditionalLocalPins(uint32 *additional_pins);
356 :
357 : extern bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed);
358 : extern void EvictAllUnpinnedBuffers(int32 *buffers_evicted,
359 : int32 *buffers_flushed,
360 : int32 *buffers_skipped);
361 : extern void EvictRelUnpinnedBuffers(Relation rel,
362 : int32 *buffers_evicted,
363 : int32 *buffers_flushed,
364 : int32 *buffers_skipped);
365 : extern bool MarkDirtyUnpinnedBuffer(Buffer buf, bool *buffer_already_dirty);
366 : extern void MarkDirtyRelUnpinnedBuffers(Relation rel,
367 : int32 *buffers_dirtied,
368 : int32 *buffers_already_dirty,
369 : int32 *buffers_skipped);
370 : extern void MarkDirtyAllUnpinnedBuffers(int32 *buffers_dirtied,
371 : int32 *buffers_already_dirty,
372 : int32 *buffers_skipped);
373 :
374 : /* in localbuf.c */
375 : extern void AtProcExit_LocalBuffers(void);
376 :
377 : /* in freelist.c */
378 :
379 : extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
380 : extern BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType btype,
381 : int ring_size_kb);
382 : extern int GetAccessStrategyBufferCount(BufferAccessStrategy strategy);
383 : extern int GetAccessStrategyPinLimit(BufferAccessStrategy strategy);
384 :
385 : extern void FreeAccessStrategy(BufferAccessStrategy strategy);
386 :
387 :
388 : /* inline functions */
389 :
390 : /*
391 : * Although this header file is nominally backend-only, certain frontend
392 : * programs like pg_waldump include it. For compilers that emit static
393 : * inline functions even when they're unused, that leads to unsatisfied
394 : * external references; hence hide these with #ifndef FRONTEND.
395 : */
396 :
397 : #ifndef FRONTEND
398 :
399 : /*
400 : * BufferIsValid
401 : * True iff the given buffer number is valid (either as a shared
402 : * or local buffer).
403 : *
404 : * Note: For a long time this was defined the same as BufferIsPinned,
405 : * that is it would say False if you didn't hold a pin on the buffer.
406 : * I believe this was bogus and served only to mask logic errors.
407 : * Code should always know whether it has a buffer reference,
408 : * independently of the pin state.
409 : *
410 : * Note: For a further long time this was not quite the inverse of the
411 : * BufferIsInvalid() macro, in that it also did sanity checks to verify
412 : * that the buffer number was in range. Most likely, this macro was
413 : * originally intended only to be used in assertions, but its use has
414 : * since expanded quite a bit, and the overhead of making those checks
415 : * even in non-assert-enabled builds can be significant. Thus, we've
416 : * now demoted the range checks to assertions within the macro itself.
417 : */
418 : static inline bool
419 371380474 : BufferIsValid(Buffer bufnum)
420 : {
421 : Assert(bufnum <= NBuffers);
422 : Assert(bufnum >= -NLocBuffer);
423 :
424 371380474 : return bufnum != InvalidBuffer;
425 : }
426 :
427 : /*
428 : * BufferGetBlock
429 : * Returns a reference to a disk page image associated with a buffer.
430 : *
431 : * Note:
432 : * Assumes buffer is valid.
433 : */
434 : static inline Block
435 501439526 : BufferGetBlock(Buffer buffer)
436 : {
437 : Assert(BufferIsValid(buffer));
438 :
439 501439526 : if (BufferIsLocal(buffer))
440 14538524 : return LocalBufferBlockPointers[-buffer - 1];
441 : else
442 486901002 : return (Block) (BufferBlocks + ((Size) (buffer - 1)) * BLCKSZ);
443 : }
444 :
445 : /*
446 : * BufferGetPageSize
447 : * Returns the page size within a buffer.
448 : *
449 : * Notes:
450 : * Assumes buffer is valid.
451 : *
452 : * The buffer can be a raw disk block and need not contain a valid
453 : * (formatted) disk page.
454 : */
455 : /* XXX should dig out of buffer descriptor */
456 : static inline Size
457 283954 : BufferGetPageSize(Buffer buffer)
458 : {
459 : Assert(BufferIsValid(buffer));
460 283954 : return (Size) BLCKSZ;
461 : }
462 :
463 : /*
464 : * BufferGetPage
465 : * Returns the page associated with a buffer.
466 : */
467 : static inline Page
468 498147355 : BufferGetPage(Buffer buffer)
469 : {
470 498147355 : return (Page) BufferGetBlock(buffer);
471 : }
472 :
473 : #endif /* FRONTEND */
474 :
475 : #endif /* BUFMGR_H */
|