Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufmgr.h
4 : * POSTGRES buffer manager definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufmgr.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFMGR_H
15 : #define BUFMGR_H
16 :
17 : #include "port/pg_iovec.h"
18 : #include "storage/aio_types.h"
19 : #include "storage/block.h"
20 : #include "storage/buf.h"
21 : #include "storage/bufpage.h"
22 : #include "storage/relfilelocator.h"
23 : #include "utils/relcache.h"
24 : #include "utils/snapmgr.h"
25 :
26 : typedef void *Block;
27 :
28 : /*
29 : * Possible arguments for GetAccessStrategy().
30 : *
31 : * If adding a new BufferAccessStrategyType, also add a new IOContext so
32 : * IO statistics using this strategy are tracked.
33 : */
34 : typedef enum BufferAccessStrategyType
35 : {
36 : BAS_NORMAL, /* Normal random access */
37 : BAS_BULKREAD, /* Large read-only scan (hint bit updates are
38 : * ok) */
39 : BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
40 : BAS_VACUUM, /* VACUUM */
41 : } BufferAccessStrategyType;
42 :
43 : /* Possible modes for ReadBufferExtended() */
44 : typedef enum
45 : {
46 : RBM_NORMAL, /* Normal read */
47 : RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
48 : * initialize. Also locks the page. */
49 : RBM_ZERO_AND_CLEANUP_LOCK, /* Like RBM_ZERO_AND_LOCK, but locks the page
50 : * in "cleanup" mode */
51 : RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
52 : RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
53 : * replay; otherwise same as RBM_NORMAL */
54 : } ReadBufferMode;
55 :
56 : /*
57 : * Type returned by PrefetchBuffer().
58 : */
59 : typedef struct PrefetchBufferResult
60 : {
61 : Buffer recent_buffer; /* If valid, a hit (recheck needed!) */
62 : bool initiated_io; /* If true, a miss resulting in async I/O */
63 : } PrefetchBufferResult;
64 :
65 : /*
66 : * Flags influencing the behaviour of ExtendBufferedRel*
67 : */
68 : typedef enum ExtendBufferedFlags
69 : {
70 : /*
71 : * Don't acquire extension lock. This is safe only if the relation isn't
72 : * shared, an access exclusive lock is held or if this is the startup
73 : * process.
74 : */
75 : EB_SKIP_EXTENSION_LOCK = (1 << 0),
76 :
77 : /* Is this extension part of recovery? */
78 : EB_PERFORMING_RECOVERY = (1 << 1),
79 :
80 : /*
81 : * Should the fork be created if it does not currently exist? This likely
82 : * only ever makes sense for relation forks.
83 : */
84 : EB_CREATE_FORK_IF_NEEDED = (1 << 2),
85 :
86 : /* Should the first (possibly only) return buffer be returned locked? */
87 : EB_LOCK_FIRST = (1 << 3),
88 :
89 : /* Should the smgr size cache be cleared? */
90 : EB_CLEAR_SIZE_CACHE = (1 << 4),
91 :
92 : /* internal flags follow */
93 : EB_LOCK_TARGET = (1 << 5),
94 : } ExtendBufferedFlags;
95 :
96 : /*
97 : * Some functions identify relations either by relation or smgr +
98 : * relpersistence. Used via the BMR_REL()/BMR_SMGR() macros below. This
99 : * allows us to use the same function for both recovery and normal operation.
100 : */
101 : typedef struct BufferManagerRelation
102 : {
103 : Relation rel;
104 : struct SMgrRelationData *smgr;
105 : char relpersistence;
106 : } BufferManagerRelation;
107 :
108 : #define BMR_REL(p_rel) ((BufferManagerRelation){.rel = p_rel})
109 : #define BMR_SMGR(p_smgr, p_relpersistence) ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
110 :
111 : /* Zero out page if reading fails. */
112 : #define READ_BUFFERS_ZERO_ON_ERROR (1 << 0)
113 : /* Call smgrprefetch() if I/O necessary. */
114 : #define READ_BUFFERS_ISSUE_ADVICE (1 << 1)
115 : /* Don't treat page as invalid due to checksum failures. */
116 : #define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES (1 << 2)
117 : /* IO will immediately be waited for */
118 : #define READ_BUFFERS_SYNCHRONOUSLY (1 << 3)
119 :
120 :
121 : struct ReadBuffersOperation
122 : {
123 : /* The following members should be set by the caller. */
124 : Relation rel; /* optional */
125 : struct SMgrRelationData *smgr;
126 : char persistence;
127 : ForkNumber forknum;
128 : BufferAccessStrategy strategy;
129 :
130 : /*
131 : * The following private members are private state for communication
132 : * between StartReadBuffers() and WaitReadBuffers(), initialized only if
133 : * an actual read is required, and should not be modified.
134 : */
135 : Buffer *buffers;
136 : BlockNumber blocknum;
137 : int flags;
138 : int16 nblocks;
139 : int16 nblocks_done;
140 : PgAioWaitRef io_wref;
141 : PgAioReturn io_return;
142 : };
143 :
144 : typedef struct ReadBuffersOperation ReadBuffersOperation;
145 :
146 : /* forward declared, to avoid having to expose buf_internals.h here */
147 : struct WritebackContext;
148 :
149 : /* forward declared, to avoid including smgr.h here */
150 : struct SMgrRelationData;
151 :
152 : /* in globals.c ... this duplicates miscadmin.h */
153 : extern PGDLLIMPORT int NBuffers;
154 :
155 : /* in bufmgr.c */
156 : extern PGDLLIMPORT bool zero_damaged_pages;
157 : extern PGDLLIMPORT int bgwriter_lru_maxpages;
158 : extern PGDLLIMPORT double bgwriter_lru_multiplier;
159 : extern PGDLLIMPORT bool track_io_timing;
160 :
161 : #define DEFAULT_EFFECTIVE_IO_CONCURRENCY 16
162 : #define DEFAULT_MAINTENANCE_IO_CONCURRENCY 16
163 : extern PGDLLIMPORT int effective_io_concurrency;
164 : extern PGDLLIMPORT int maintenance_io_concurrency;
165 :
166 : #define MAX_IO_COMBINE_LIMIT PG_IOV_MAX
167 : #define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
168 : extern PGDLLIMPORT int io_combine_limit; /* min of the two GUCs below */
169 : extern PGDLLIMPORT int io_combine_limit_guc;
170 : extern PGDLLIMPORT int io_max_combine_limit;
171 :
172 : extern PGDLLIMPORT int checkpoint_flush_after;
173 : extern PGDLLIMPORT int backend_flush_after;
174 : extern PGDLLIMPORT int bgwriter_flush_after;
175 :
176 : extern const PgAioHandleCallbacks aio_shared_buffer_readv_cb;
177 : extern const PgAioHandleCallbacks aio_local_buffer_readv_cb;
178 :
179 : /* in buf_init.c */
180 : extern PGDLLIMPORT char *BufferBlocks;
181 :
182 : /* in localbuf.c */
183 : extern PGDLLIMPORT int NLocBuffer;
184 : extern PGDLLIMPORT Block *LocalBufferBlockPointers;
185 : extern PGDLLIMPORT int32 *LocalRefCount;
186 :
187 : /* upper limit for effective_io_concurrency */
188 : #define MAX_IO_CONCURRENCY 1000
189 :
190 : /* special block number for ReadBuffer() */
191 : #define P_NEW InvalidBlockNumber /* grow the file to get a new page */
192 :
193 : /*
194 : * Buffer content lock modes (mode argument for LockBuffer())
195 : */
196 : #define BUFFER_LOCK_UNLOCK 0
197 : #define BUFFER_LOCK_SHARE 1
198 : #define BUFFER_LOCK_EXCLUSIVE 2
199 :
200 :
201 : /*
202 : * prototypes for functions in bufmgr.c
203 : */
204 : extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
205 : ForkNumber forkNum,
206 : BlockNumber blockNum);
207 : extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
208 : BlockNumber blockNum);
209 : extern bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum,
210 : BlockNumber blockNum, Buffer recent_buffer);
211 : extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
212 : extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
213 : BlockNumber blockNum, ReadBufferMode mode,
214 : BufferAccessStrategy strategy);
215 : extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
216 : ForkNumber forkNum, BlockNumber blockNum,
217 : ReadBufferMode mode, BufferAccessStrategy strategy,
218 : bool permanent);
219 :
220 : extern bool StartReadBuffer(ReadBuffersOperation *operation,
221 : Buffer *buffer,
222 : BlockNumber blocknum,
223 : int flags);
224 : extern bool StartReadBuffers(ReadBuffersOperation *operation,
225 : Buffer *buffers,
226 : BlockNumber blockNum,
227 : int *nblocks,
228 : int flags);
229 : extern void WaitReadBuffers(ReadBuffersOperation *operation);
230 :
231 : extern void ReleaseBuffer(Buffer buffer);
232 : extern void UnlockReleaseBuffer(Buffer buffer);
233 : extern bool BufferIsExclusiveLocked(Buffer buffer);
234 : extern bool BufferIsDirty(Buffer buffer);
235 : extern void MarkBufferDirty(Buffer buffer);
236 : extern void IncrBufferRefCount(Buffer buffer);
237 : extern void CheckBufferIsPinnedOnce(Buffer buffer);
238 : extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
239 : BlockNumber blockNum);
240 :
241 : extern Buffer ExtendBufferedRel(BufferManagerRelation bmr,
242 : ForkNumber forkNum,
243 : BufferAccessStrategy strategy,
244 : uint32 flags);
245 : extern BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr,
246 : ForkNumber fork,
247 : BufferAccessStrategy strategy,
248 : uint32 flags,
249 : uint32 extend_by,
250 : Buffer *buffers,
251 : uint32 *extended_by);
252 : extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr,
253 : ForkNumber fork,
254 : BufferAccessStrategy strategy,
255 : uint32 flags,
256 : BlockNumber extend_to,
257 : ReadBufferMode mode);
258 :
259 : extern void InitBufferManagerAccess(void);
260 : extern void AtEOXact_Buffers(bool isCommit);
261 : extern char *DebugPrintBufferRefcount(Buffer buffer);
262 : extern void CheckPointBuffers(int flags);
263 : extern BlockNumber BufferGetBlockNumber(Buffer buffer);
264 : extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
265 : ForkNumber forkNum);
266 : extern void FlushOneBuffer(Buffer buffer);
267 : extern void FlushRelationBuffers(Relation rel);
268 : extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
269 : extern void CreateAndCopyRelationData(RelFileLocator src_rlocator,
270 : RelFileLocator dst_rlocator,
271 : bool permanent);
272 : extern void FlushDatabaseBuffers(Oid dbid);
273 : extern void DropRelationBuffers(struct SMgrRelationData *smgr_reln,
274 : ForkNumber *forkNum,
275 : int nforks, BlockNumber *firstDelBlock);
276 : extern void DropRelationsAllBuffers(struct SMgrRelationData **smgr_reln,
277 : int nlocators);
278 : extern void DropDatabaseBuffers(Oid dbid);
279 :
280 : #define RelationGetNumberOfBlocks(reln) \
281 : RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
282 :
283 : extern bool BufferIsPermanent(Buffer buffer);
284 : extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
285 : extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
286 : ForkNumber *forknum, BlockNumber *blknum);
287 :
288 : extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
289 :
290 : extern void UnlockBuffers(void);
291 : extern void LockBuffer(Buffer buffer, int mode);
292 : extern bool ConditionalLockBuffer(Buffer buffer);
293 : extern void LockBufferForCleanup(Buffer buffer);
294 : extern bool ConditionalLockBufferForCleanup(Buffer buffer);
295 : extern bool IsBufferCleanupOK(Buffer buffer);
296 : extern bool HoldingBufferPinThatDelaysRecovery(void);
297 :
298 : extern bool BgBufferSync(struct WritebackContext *wb_context);
299 :
300 : extern uint32 GetPinLimit(void);
301 : extern uint32 GetLocalPinLimit(void);
302 : extern uint32 GetAdditionalPinLimit(void);
303 : extern uint32 GetAdditionalLocalPinLimit(void);
304 : extern void LimitAdditionalPins(uint32 *additional_pins);
305 : extern void LimitAdditionalLocalPins(uint32 *additional_pins);
306 :
307 : extern bool EvictUnpinnedBuffer(Buffer buf);
308 :
309 : /* in buf_init.c */
310 : extern void BufferManagerShmemInit(void);
311 : extern Size BufferManagerShmemSize(void);
312 :
313 : /* in localbuf.c */
314 : extern void AtProcExit_LocalBuffers(void);
315 :
316 : /* in freelist.c */
317 :
318 : extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
319 : extern BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType btype,
320 : int ring_size_kb);
321 : extern int GetAccessStrategyBufferCount(BufferAccessStrategy strategy);
322 : extern int GetAccessStrategyPinLimit(BufferAccessStrategy strategy);
323 :
324 : extern void FreeAccessStrategy(BufferAccessStrategy strategy);
325 :
326 :
327 : /* inline functions */
328 :
329 : /*
330 : * Although this header file is nominally backend-only, certain frontend
331 : * programs like pg_waldump include it. For compilers that emit static
332 : * inline functions even when they're unused, that leads to unsatisfied
333 : * external references; hence hide these with #ifndef FRONTEND.
334 : */
335 :
336 : #ifndef FRONTEND
337 :
338 : /*
339 : * BufferIsValid
340 : * True iff the given buffer number is valid (either as a shared
341 : * or local buffer).
342 : *
343 : * Note: For a long time this was defined the same as BufferIsPinned,
344 : * that is it would say False if you didn't hold a pin on the buffer.
345 : * I believe this was bogus and served only to mask logic errors.
346 : * Code should always know whether it has a buffer reference,
347 : * independently of the pin state.
348 : *
349 : * Note: For a further long time this was not quite the inverse of the
350 : * BufferIsInvalid() macro, in that it also did sanity checks to verify
351 : * that the buffer number was in range. Most likely, this macro was
352 : * originally intended only to be used in assertions, but its use has
353 : * since expanded quite a bit, and the overhead of making those checks
354 : * even in non-assert-enabled builds can be significant. Thus, we've
355 : * now demoted the range checks to assertions within the macro itself.
356 : */
357 : static inline bool
358 618171128 : BufferIsValid(Buffer bufnum)
359 : {
360 : Assert(bufnum <= NBuffers);
361 : Assert(bufnum >= -NLocBuffer);
362 :
363 618171128 : return bufnum != InvalidBuffer;
364 : }
365 :
366 : /*
367 : * BufferGetBlock
368 : * Returns a reference to a disk page image associated with a buffer.
369 : *
370 : * Note:
371 : * Assumes buffer is valid.
372 : */
373 : static inline Block
374 720892730 : BufferGetBlock(Buffer buffer)
375 : {
376 : Assert(BufferIsValid(buffer));
377 :
378 720892730 : if (BufferIsLocal(buffer))
379 23419060 : return LocalBufferBlockPointers[-buffer - 1];
380 : else
381 697473670 : return (Block) (BufferBlocks + ((Size) (buffer - 1)) * BLCKSZ);
382 : }
383 :
384 : /*
385 : * BufferGetPageSize
386 : * Returns the page size within a buffer.
387 : *
388 : * Notes:
389 : * Assumes buffer is valid.
390 : *
391 : * The buffer can be a raw disk block and need not contain a valid
392 : * (formatted) disk page.
393 : */
394 : /* XXX should dig out of buffer descriptor */
395 : static inline Size
396 456574 : BufferGetPageSize(Buffer buffer)
397 : {
398 : Assert(BufferIsValid(buffer));
399 456574 : return (Size) BLCKSZ;
400 : }
401 :
402 : /*
403 : * BufferGetPage
404 : * Returns the page associated with a buffer.
405 : */
406 : static inline Page
407 715396982 : BufferGetPage(Buffer buffer)
408 : {
409 715396982 : return (Page) BufferGetBlock(buffer);
410 : }
411 :
412 : #endif /* FRONTEND */
413 :
414 : #endif /* BUFMGR_H */
|