Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufmgr.h
4 : * POSTGRES buffer manager definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufmgr.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFMGR_H
15 : #define BUFMGR_H
16 :
17 : #include "port/pg_iovec.h"
18 : #include "storage/block.h"
19 : #include "storage/buf.h"
20 : #include "storage/bufpage.h"
21 : #include "storage/relfilelocator.h"
22 : #include "utils/relcache.h"
23 : #include "utils/snapmgr.h"
24 :
25 : typedef void *Block;
26 :
27 : /*
28 : * Possible arguments for GetAccessStrategy().
29 : *
30 : * If adding a new BufferAccessStrategyType, also add a new IOContext so
31 : * IO statistics using this strategy are tracked.
32 : */
33 : typedef enum BufferAccessStrategyType
34 : {
35 : BAS_NORMAL, /* Normal random access */
36 : BAS_BULKREAD, /* Large read-only scan (hint bit updates are
37 : * ok) */
38 : BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
39 : BAS_VACUUM, /* VACUUM */
40 : } BufferAccessStrategyType;
41 :
42 : /* Possible modes for ReadBufferExtended() */
43 : typedef enum
44 : {
45 : RBM_NORMAL, /* Normal read */
46 : RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
47 : * initialize. Also locks the page. */
48 : RBM_ZERO_AND_CLEANUP_LOCK, /* Like RBM_ZERO_AND_LOCK, but locks the page
49 : * in "cleanup" mode */
50 : RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
51 : RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
52 : * replay; otherwise same as RBM_NORMAL */
53 : } ReadBufferMode;
54 :
55 : /*
56 : * Type returned by PrefetchBuffer().
57 : */
58 : typedef struct PrefetchBufferResult
59 : {
60 : Buffer recent_buffer; /* If valid, a hit (recheck needed!) */
61 : bool initiated_io; /* If true, a miss resulting in async I/O */
62 : } PrefetchBufferResult;
63 :
64 : /*
65 : * Flags influencing the behaviour of ExtendBufferedRel*
66 : */
67 : typedef enum ExtendBufferedFlags
68 : {
69 : /*
70 : * Don't acquire extension lock. This is safe only if the relation isn't
71 : * shared, an access exclusive lock is held or if this is the startup
72 : * process.
73 : */
74 : EB_SKIP_EXTENSION_LOCK = (1 << 0),
75 :
76 : /* Is this extension part of recovery? */
77 : EB_PERFORMING_RECOVERY = (1 << 1),
78 :
79 : /*
80 : * Should the fork be created if it does not currently exist? This likely
81 : * only ever makes sense for relation forks.
82 : */
83 : EB_CREATE_FORK_IF_NEEDED = (1 << 2),
84 :
85 : /* Should the first (possibly only) return buffer be returned locked? */
86 : EB_LOCK_FIRST = (1 << 3),
87 :
88 : /* Should the smgr size cache be cleared? */
89 : EB_CLEAR_SIZE_CACHE = (1 << 4),
90 :
91 : /* internal flags follow */
92 : EB_LOCK_TARGET = (1 << 5),
93 : } ExtendBufferedFlags;
94 :
95 : /*
96 : * Some functions identify relations either by relation or smgr +
97 : * relpersistence. Used via the BMR_REL()/BMR_SMGR() macros below. This
98 : * allows us to use the same function for both recovery and normal operation.
99 : */
100 : typedef struct BufferManagerRelation
101 : {
102 : Relation rel;
103 : struct SMgrRelationData *smgr;
104 : char relpersistence;
105 : } BufferManagerRelation;
106 :
107 : #define BMR_REL(p_rel) ((BufferManagerRelation){.rel = p_rel})
108 : #define BMR_SMGR(p_smgr, p_relpersistence) ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
109 :
110 : /* Zero out page if reading fails. */
111 : #define READ_BUFFERS_ZERO_ON_ERROR (1 << 0)
112 : /* Call smgrprefetch() if I/O necessary. */
113 : #define READ_BUFFERS_ISSUE_ADVICE (1 << 1)
114 :
115 : struct ReadBuffersOperation
116 : {
117 : /* The following members should be set by the caller. */
118 : Relation rel; /* optional */
119 : struct SMgrRelationData *smgr;
120 : char persistence;
121 : ForkNumber forknum;
122 : BufferAccessStrategy strategy;
123 :
124 : /*
125 : * The following private members are private state for communication
126 : * between StartReadBuffers() and WaitReadBuffers(), initialized only if
127 : * an actual read is required, and should not be modified.
128 : */
129 : Buffer *buffers;
130 : BlockNumber blocknum;
131 : int flags;
132 : int16 nblocks;
133 : int16 io_buffers_len;
134 : };
135 :
136 : typedef struct ReadBuffersOperation ReadBuffersOperation;
137 :
138 : /* forward declared, to avoid having to expose buf_internals.h here */
139 : struct WritebackContext;
140 :
141 : /* forward declared, to avoid including smgr.h here */
142 : struct SMgrRelationData;
143 :
144 : /* in globals.c ... this duplicates miscadmin.h */
145 : extern PGDLLIMPORT int NBuffers;
146 :
147 : /* in bufmgr.c */
148 : extern PGDLLIMPORT bool zero_damaged_pages;
149 : extern PGDLLIMPORT int bgwriter_lru_maxpages;
150 : extern PGDLLIMPORT double bgwriter_lru_multiplier;
151 : extern PGDLLIMPORT bool track_io_timing;
152 :
153 : /* only applicable when prefetching is available */
154 : #ifdef USE_PREFETCH
155 : #define DEFAULT_EFFECTIVE_IO_CONCURRENCY 1
156 : #define DEFAULT_MAINTENANCE_IO_CONCURRENCY 10
157 : #else
158 : #define DEFAULT_EFFECTIVE_IO_CONCURRENCY 0
159 : #define DEFAULT_MAINTENANCE_IO_CONCURRENCY 0
160 : #endif
161 : extern PGDLLIMPORT int effective_io_concurrency;
162 : extern PGDLLIMPORT int maintenance_io_concurrency;
163 :
164 : #define MAX_IO_COMBINE_LIMIT PG_IOV_MAX
165 : #define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
166 : extern PGDLLIMPORT int io_combine_limit;
167 :
168 : extern PGDLLIMPORT int checkpoint_flush_after;
169 : extern PGDLLIMPORT int backend_flush_after;
170 : extern PGDLLIMPORT int bgwriter_flush_after;
171 :
172 : /* in buf_init.c */
173 : extern PGDLLIMPORT char *BufferBlocks;
174 :
175 : /* in localbuf.c */
176 : extern PGDLLIMPORT int NLocBuffer;
177 : extern PGDLLIMPORT Block *LocalBufferBlockPointers;
178 : extern PGDLLIMPORT int32 *LocalRefCount;
179 :
180 : /* upper limit for effective_io_concurrency */
181 : #define MAX_IO_CONCURRENCY 1000
182 :
183 : /* special block number for ReadBuffer() */
184 : #define P_NEW InvalidBlockNumber /* grow the file to get a new page */
185 :
186 : /*
187 : * Buffer content lock modes (mode argument for LockBuffer())
188 : */
189 : #define BUFFER_LOCK_UNLOCK 0
190 : #define BUFFER_LOCK_SHARE 1
191 : #define BUFFER_LOCK_EXCLUSIVE 2
192 :
193 :
194 : /*
195 : * prototypes for functions in bufmgr.c
196 : */
197 : extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
198 : ForkNumber forkNum,
199 : BlockNumber blockNum);
200 : extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
201 : BlockNumber blockNum);
202 : extern bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum,
203 : BlockNumber blockNum, Buffer recent_buffer);
204 : extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
205 : extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
206 : BlockNumber blockNum, ReadBufferMode mode,
207 : BufferAccessStrategy strategy);
208 : extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
209 : ForkNumber forkNum, BlockNumber blockNum,
210 : ReadBufferMode mode, BufferAccessStrategy strategy,
211 : bool permanent);
212 :
213 : extern bool StartReadBuffer(ReadBuffersOperation *operation,
214 : Buffer *buffer,
215 : BlockNumber blocknum,
216 : int flags);
217 : extern bool StartReadBuffers(ReadBuffersOperation *operation,
218 : Buffer *buffers,
219 : BlockNumber blockNum,
220 : int *nblocks,
221 : int flags);
222 : extern void WaitReadBuffers(ReadBuffersOperation *operation);
223 :
224 : extern void ReleaseBuffer(Buffer buffer);
225 : extern void UnlockReleaseBuffer(Buffer buffer);
226 : extern bool BufferIsExclusiveLocked(Buffer buffer);
227 : extern bool BufferIsDirty(Buffer buffer);
228 : extern void MarkBufferDirty(Buffer buffer);
229 : extern void IncrBufferRefCount(Buffer buffer);
230 : extern void CheckBufferIsPinnedOnce(Buffer buffer);
231 : extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
232 : BlockNumber blockNum);
233 :
234 : extern Buffer ExtendBufferedRel(BufferManagerRelation bmr,
235 : ForkNumber forkNum,
236 : BufferAccessStrategy strategy,
237 : uint32 flags);
238 : extern BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr,
239 : ForkNumber fork,
240 : BufferAccessStrategy strategy,
241 : uint32 flags,
242 : uint32 extend_by,
243 : Buffer *buffers,
244 : uint32 *extended_by);
245 : extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr,
246 : ForkNumber fork,
247 : BufferAccessStrategy strategy,
248 : uint32 flags,
249 : BlockNumber extend_to,
250 : ReadBufferMode mode);
251 :
252 : extern void InitBufferManagerAccess(void);
253 : extern void AtEOXact_Buffers(bool isCommit);
254 : extern char *DebugPrintBufferRefcount(Buffer buffer);
255 : extern void CheckPointBuffers(int flags);
256 : extern BlockNumber BufferGetBlockNumber(Buffer buffer);
257 : extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
258 : ForkNumber forkNum);
259 : extern void FlushOneBuffer(Buffer buffer);
260 : extern void FlushRelationBuffers(Relation rel);
261 : extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
262 : extern void CreateAndCopyRelationData(RelFileLocator src_rlocator,
263 : RelFileLocator dst_rlocator,
264 : bool permanent);
265 : extern void FlushDatabaseBuffers(Oid dbid);
266 : extern void DropRelationBuffers(struct SMgrRelationData *smgr_reln,
267 : ForkNumber *forkNum,
268 : int nforks, BlockNumber *firstDelBlock);
269 : extern void DropRelationsAllBuffers(struct SMgrRelationData **smgr_reln,
270 : int nlocators);
271 : extern void DropDatabaseBuffers(Oid dbid);
272 :
273 : #define RelationGetNumberOfBlocks(reln) \
274 : RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
275 :
276 : extern bool BufferIsPermanent(Buffer buffer);
277 : extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
278 :
279 : #ifdef NOT_USED
280 : extern void PrintPinnedBufs(void);
281 : #endif
282 : extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
283 : ForkNumber *forknum, BlockNumber *blknum);
284 :
285 : extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
286 :
287 : extern void UnlockBuffers(void);
288 : extern void LockBuffer(Buffer buffer, int mode);
289 : extern bool ConditionalLockBuffer(Buffer buffer);
290 : extern void LockBufferForCleanup(Buffer buffer);
291 : extern bool ConditionalLockBufferForCleanup(Buffer buffer);
292 : extern bool IsBufferCleanupOK(Buffer buffer);
293 : extern bool HoldingBufferPinThatDelaysRecovery(void);
294 :
295 : extern bool BgBufferSync(struct WritebackContext *wb_context);
296 :
297 : extern void LimitAdditionalPins(uint32 *additional_pins);
298 : extern void LimitAdditionalLocalPins(uint32 *additional_pins);
299 :
300 : extern bool EvictUnpinnedBuffer(Buffer buf);
301 :
302 : /* in buf_init.c */
303 : extern void BufferManagerShmemInit(void);
304 : extern Size BufferManagerShmemSize(void);
305 :
306 : /* in localbuf.c */
307 : extern void AtProcExit_LocalBuffers(void);
308 :
309 : /* in freelist.c */
310 :
311 : extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
312 : extern BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType btype,
313 : int ring_size_kb);
314 : extern int GetAccessStrategyBufferCount(BufferAccessStrategy strategy);
315 : extern int GetAccessStrategyPinLimit(BufferAccessStrategy strategy);
316 :
317 : extern void FreeAccessStrategy(BufferAccessStrategy strategy);
318 :
319 :
320 : /* inline functions */
321 :
322 : /*
323 : * Although this header file is nominally backend-only, certain frontend
324 : * programs like pg_waldump include it. For compilers that emit static
325 : * inline functions even when they're unused, that leads to unsatisfied
326 : * external references; hence hide these with #ifndef FRONTEND.
327 : */
328 :
329 : #ifndef FRONTEND
330 :
331 : /*
332 : * BufferIsValid
333 : * True iff the given buffer number is valid (either as a shared
334 : * or local buffer).
335 : *
336 : * Note: For a long time this was defined the same as BufferIsPinned,
337 : * that is it would say False if you didn't hold a pin on the buffer.
338 : * I believe this was bogus and served only to mask logic errors.
339 : * Code should always know whether it has a buffer reference,
340 : * independently of the pin state.
341 : *
342 : * Note: For a further long time this was not quite the inverse of the
343 : * BufferIsInvalid() macro, in that it also did sanity checks to verify
344 : * that the buffer number was in range. Most likely, this macro was
345 : * originally intended only to be used in assertions, but its use has
346 : * since expanded quite a bit, and the overhead of making those checks
347 : * even in non-assert-enabled builds can be significant. Thus, we've
348 : * now demoted the range checks to assertions within the macro itself.
349 : */
350 : static inline bool
351 568128580 : BufferIsValid(Buffer bufnum)
352 : {
353 : Assert(bufnum <= NBuffers);
354 : Assert(bufnum >= -NLocBuffer);
355 :
356 568128580 : return bufnum != InvalidBuffer;
357 : }
358 :
359 : /*
360 : * BufferGetBlock
361 : * Returns a reference to a disk page image associated with a buffer.
362 : *
363 : * Note:
364 : * Assumes buffer is valid.
365 : */
366 : static inline Block
367 659542624 : BufferGetBlock(Buffer buffer)
368 : {
369 : Assert(BufferIsValid(buffer));
370 :
371 659542624 : if (BufferIsLocal(buffer))
372 20216430 : return LocalBufferBlockPointers[-buffer - 1];
373 : else
374 639326194 : return (Block) (BufferBlocks + ((Size) (buffer - 1)) * BLCKSZ);
375 : }
376 :
377 : /*
378 : * BufferGetPageSize
379 : * Returns the page size within a buffer.
380 : *
381 : * Notes:
382 : * Assumes buffer is valid.
383 : *
384 : * The buffer can be a raw disk block and need not contain a valid
385 : * (formatted) disk page.
386 : */
387 : /* XXX should dig out of buffer descriptor */
388 : static inline Size
389 418664 : BufferGetPageSize(Buffer buffer)
390 : {
391 : AssertMacro(BufferIsValid(buffer));
392 418664 : return (Size) BLCKSZ;
393 : }
394 :
395 : /*
396 : * BufferGetPage
397 : * Returns the page associated with a buffer.
398 : */
399 : static inline Page
400 656806316 : BufferGetPage(Buffer buffer)
401 : {
402 656806316 : return (Page) BufferGetBlock(buffer);
403 : }
404 :
405 : #endif /* FRONTEND */
406 :
407 : #endif /* BUFMGR_H */
|