Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.h
4 : * Standard POSTGRES buffer page definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufpage.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFPAGE_H
15 : #define BUFPAGE_H
16 :
17 : #include "access/xlogdefs.h"
18 : #include "storage/block.h"
19 : #include "storage/item.h"
20 : #include "storage/off.h"
21 :
22 : /* GUC variable */
23 : extern PGDLLIMPORT bool ignore_checksum_failure;
24 :
25 : /*
26 : * A postgres disk page is an abstraction layered on top of a postgres
27 : * disk block (which is simply a unit of i/o, see block.h).
28 : *
29 : * specifically, while a disk block can be unformatted, a postgres
30 : * disk page is always a slotted page of the form:
31 : *
32 : * +----------------+---------------------------------+
33 : * | PageHeaderData | linp1 linp2 linp3 ... |
34 : * +-----------+----+---------------------------------+
35 : * | ... linpN | |
36 : * +-----------+--------------------------------------+
37 : * | ^ pd_lower |
38 : * | |
39 : * | v pd_upper |
40 : * +-------------+------------------------------------+
41 : * | | tupleN ... |
42 : * +-------------+------------------+-----------------+
43 : * | ... tuple3 tuple2 tuple1 | "special space" |
44 : * +--------------------------------+-----------------+
45 : * ^ pd_special
46 : *
47 : * a page is full when nothing can be added between pd_lower and
48 : * pd_upper.
49 : *
50 : * all blocks written out by an access method must be disk pages.
51 : *
52 : * EXCEPTIONS:
53 : *
54 : * obviously, a page is not formatted before it is initialized by
55 : * a call to PageInit.
56 : *
57 : * NOTES:
58 : *
59 : * linp1..N form an ItemId (line pointer) array. ItemPointers point
60 : * to a physical block number and a logical offset (line pointer
61 : * number) within that block/page. Note that OffsetNumbers
62 : * conventionally start at 1, not 0.
63 : *
64 : * tuple1..N are added "backwards" on the page. Since an ItemPointer
65 : * offset is used to access an ItemId entry rather than an actual
66 : * byte-offset position, tuples can be physically shuffled on a page
67 : * whenever the need arises. This indirection also keeps crash recovery
68 : * relatively simple, because the low-level details of page space
69 : * management can be controlled by standard buffer page code during
70 : * logging, and during recovery.
71 : *
72 : * AM-generic per-page information is kept in PageHeaderData.
73 : *
74 : * AM-specific per-page data (if any) is kept in the area marked "special
75 : * space"; each AM has an "opaque" structure defined somewhere that is
76 : * stored as the page trailer. An access method should always
77 : * initialize its pages with PageInit and then set its own opaque
78 : * fields.
79 : */
80 :
81 : typedef Pointer Page;
82 :
83 :
84 : /*
85 : * location (byte offset) within a page.
86 : *
87 : * note that this is actually limited to 2^15 because we have limited
88 : * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
89 : */
90 : typedef uint16 LocationIndex;
91 :
92 :
93 : /*
94 : * For historical reasons, the 64-bit LSN value is stored as two 32-bit
95 : * values.
96 : */
97 : typedef struct
98 : {
99 : uint32 xlogid; /* high bits */
100 : uint32 xrecoff; /* low bits */
101 : } PageXLogRecPtr;
102 :
103 : static inline XLogRecPtr
104 58115460 : PageXLogRecPtrGet(PageXLogRecPtr val)
105 : {
106 58115460 : return (uint64) val.xlogid << 32 | val.xrecoff;
107 : }
108 :
109 : #define PageXLogRecPtrSet(ptr, lsn) \
110 : ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
111 :
112 : /*
113 : * disk page organization
114 : *
115 : * space management information generic to any page
116 : *
117 : * pd_lsn - identifies xlog record for last change to this page.
118 : * pd_checksum - page checksum, if set.
119 : * pd_flags - flag bits.
120 : * pd_lower - offset to start of free space.
121 : * pd_upper - offset to end of free space.
122 : * pd_special - offset to start of special space.
123 : * pd_pagesize_version - size in bytes and page layout version number.
124 : * pd_prune_xid - oldest XID among potentially prunable tuples on page.
125 : *
126 : * The LSN is used by the buffer manager to enforce the basic rule of WAL:
127 : * "thou shalt write xlog before data". A dirty buffer cannot be dumped
128 : * to disk until xlog has been flushed at least as far as the page's LSN.
129 : *
130 : * pd_checksum stores the page checksum, if it has been set for this page;
131 : * zero is a valid value for a checksum. If a checksum is not in use then
132 : * we leave the field unset. This will typically mean the field is zero
133 : * though non-zero values may also be present if databases have been
134 : * pg_upgraded from releases prior to 9.3, when the same byte offset was
135 : * used to store the current timelineid when the page was last updated.
136 : * Note that there is no indication on a page as to whether the checksum
137 : * is valid or not, a deliberate design choice which avoids the problem
138 : * of relying on the page contents to decide whether to verify it. Hence
139 : * there are no flag bits relating to checksums.
140 : *
141 : * pd_prune_xid is a hint field that helps determine whether pruning will be
142 : * useful. It is currently unused in index pages.
143 : *
144 : * The page version number and page size are packed together into a single
145 : * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
146 : * there was no concept of a page version number, and doing it this way
147 : * lets us pretend that pre-7.3 databases have page version number zero.
148 : * We constrain page sizes to be multiples of 256, leaving the low eight
149 : * bits available for a version number.
150 : *
151 : * Minimum possible page size is perhaps 64B to fit page header, opaque space
152 : * and a minimal tuple; of course, in reality you want it much bigger, so
153 : * the constraint on pagesize mod 256 is not an important restriction.
154 : * On the high end, we can only support pages up to 32KB because lp_off/lp_len
155 : * are 15 bits.
156 : */
157 :
158 : typedef struct PageHeaderData
159 : {
160 : /* XXX LSN is member of *any* block, not only page-organized ones */
161 : PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
162 : * record for last change to this page */
163 : uint16 pd_checksum; /* checksum */
164 : uint16 pd_flags; /* flag bits, see below */
165 : LocationIndex pd_lower; /* offset to start of free space */
166 : LocationIndex pd_upper; /* offset to end of free space */
167 : LocationIndex pd_special; /* offset to start of special space */
168 : uint16 pd_pagesize_version;
169 : TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
170 : ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
171 : } PageHeaderData;
172 :
173 : typedef PageHeaderData *PageHeader;
174 :
175 : /*
176 : * pd_flags contains the following flag bits. Undefined bits are initialized
177 : * to zero and may be used in the future.
178 : *
179 : * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
180 : * pd_lower. This should be considered a hint rather than the truth, since
181 : * changes to it are not WAL-logged.
182 : *
183 : * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
184 : * page for its new tuple version; this suggests that a prune is needed.
185 : * Again, this is just a hint.
186 : */
187 : #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
188 : #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
189 : #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
190 : * everyone */
191 :
192 : #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
193 :
194 : /*
195 : * Page layout version number 0 is for pre-7.3 Postgres releases.
196 : * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
197 : * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
198 : * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
199 : * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
200 : * added the pd_flags field (by stealing some bits from pd_tli),
201 : * as well as adding the pd_prune_xid field (which enlarges the header).
202 : *
203 : * As of Release 9.3, the checksum version must also be considered when
204 : * handling pages.
205 : */
206 : #define PG_PAGE_LAYOUT_VERSION 4
207 : #define PG_DATA_CHECKSUM_VERSION 1
208 :
209 : /* ----------------------------------------------------------------
210 : * page support functions
211 : * ----------------------------------------------------------------
212 : */
213 :
214 : /*
215 : * line pointer(s) do not count as part of header
216 : */
217 : #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
218 :
219 : /*
220 : * PageIsEmpty
221 : * returns true iff no itemid has been allocated on the page
222 : */
223 : static inline bool
224 3119138 : PageIsEmpty(Page page)
225 : {
226 3119138 : return ((PageHeader) page)->pd_lower <= SizeOfPageHeaderData;
227 : }
228 :
229 : /*
230 : * PageIsNew
231 : * returns true iff page has not been initialized (by PageInit)
232 : */
233 : static inline bool
234 73782364 : PageIsNew(Page page)
235 : {
236 73782364 : return ((PageHeader) page)->pd_upper == 0;
237 : }
238 :
239 : /*
240 : * PageGetItemId
241 : * Returns an item identifier of a page.
242 : */
243 : static inline ItemId
244 2318194244 : PageGetItemId(Page page, OffsetNumber offsetNumber)
245 : {
246 2318194244 : return &((PageHeader) page)->pd_linp[offsetNumber - 1];
247 : }
248 :
249 : /*
250 : * PageGetContents
251 : * To be used in cases where the page does not contain line pointers.
252 : *
253 : * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
254 : * Now it is. Beware of old code that might think the offset to the contents
255 : * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
256 : */
257 : static inline char *
258 44345656 : PageGetContents(Page page)
259 : {
260 44345656 : return (char *) page + MAXALIGN(SizeOfPageHeaderData);
261 : }
262 :
263 : /* ----------------
264 : * functions to access page size info
265 : * ----------------
266 : */
267 :
268 : /*
269 : * PageGetPageSize
270 : * Returns the page size of a page.
271 : *
272 : * this can only be called on a formatted page (unlike
273 : * BufferGetPageSize, which can be called on an unformatted page).
274 : * however, it can be called on a page that is not stored in a buffer.
275 : */
276 : static inline Size
277 66174648 : PageGetPageSize(Page page)
278 : {
279 66174648 : return (Size) (((PageHeader) page)->pd_pagesize_version & (uint16) 0xFF00);
280 : }
281 :
282 : /*
283 : * PageGetPageLayoutVersion
284 : * Returns the page layout version of a page.
285 : */
286 : static inline uint8
287 6 : PageGetPageLayoutVersion(Page page)
288 : {
289 6 : return (((PageHeader) page)->pd_pagesize_version & 0x00FF);
290 : }
291 :
292 : /*
293 : * PageSetPageSizeAndVersion
294 : * Sets the page size and page layout version number of a page.
295 : *
296 : * We could support setting these two values separately, but there's
297 : * no real need for it at the moment.
298 : */
299 : static inline void
300 649936 : PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
301 : {
302 : Assert((size & 0xFF00) == size);
303 : Assert((version & 0x00FF) == version);
304 :
305 649936 : ((PageHeader) page)->pd_pagesize_version = size | version;
306 649936 : }
307 :
308 : /* ----------------
309 : * page special data functions
310 : * ----------------
311 : */
312 : /*
313 : * PageGetSpecialSize
314 : * Returns size of special space on a page.
315 : */
316 : static inline uint16
317 43238726 : PageGetSpecialSize(Page page)
318 : {
319 43238726 : return (PageGetPageSize(page) - ((PageHeader) page)->pd_special);
320 : }
321 :
322 : /*
323 : * Using assertions, validate that the page special pointer is OK.
324 : *
325 : * This is intended to catch use of the pointer before page initialization.
326 : */
327 : static inline void
328 494720002 : PageValidateSpecialPointer(Page page)
329 : {
330 : Assert(page);
331 : Assert(((PageHeader) page)->pd_special <= BLCKSZ);
332 : Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData);
333 494720002 : }
334 :
335 : /*
336 : * PageGetSpecialPointer
337 : * Returns pointer to special space on a page.
338 : */
339 : static inline char *
340 494720002 : PageGetSpecialPointer(Page page)
341 : {
342 494720002 : PageValidateSpecialPointer(page);
343 494720002 : return (char *) page + ((PageHeader) page)->pd_special;
344 : }
345 :
346 : /*
347 : * PageGetItem
348 : * Retrieves an item on the given page.
349 : *
350 : * Note:
351 : * This does not change the status of any of the resources passed.
352 : * The semantics may change in the future.
353 : */
354 : static inline Item
355 1074199386 : PageGetItem(Page page, ItemId itemId)
356 : {
357 : Assert(page);
358 : Assert(ItemIdHasStorage(itemId));
359 :
360 1074199386 : return (Item) (((char *) page) + ItemIdGetOffset(itemId));
361 : }
362 :
363 : /*
364 : * PageGetMaxOffsetNumber
365 : * Returns the maximum offset number used by the given page.
366 : * Since offset numbers are 1-based, this is also the number
367 : * of items on the page.
368 : *
369 : * NOTE: if the page is not initialized (pd_lower == 0), we must
370 : * return zero to ensure sane behavior.
371 : */
372 : static inline OffsetNumber
373 708273810 : PageGetMaxOffsetNumber(Page page)
374 : {
375 708273810 : PageHeader pageheader = (PageHeader) page;
376 :
377 708273810 : if (pageheader->pd_lower <= SizeOfPageHeaderData)
378 882130 : return 0;
379 : else
380 707391680 : return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
381 : }
382 :
383 : /*
384 : * Additional functions for access to page headers.
385 : */
386 : static inline XLogRecPtr
387 55963600 : PageGetLSN(const char *page)
388 : {
389 55963600 : return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
390 : }
391 : static inline void
392 35565006 : PageSetLSN(Page page, XLogRecPtr lsn)
393 : {
394 35565006 : PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
395 35565006 : }
396 :
397 : static inline bool
398 22163490 : PageHasFreeLinePointers(Page page)
399 : {
400 22163490 : return ((PageHeader) page)->pd_flags & PD_HAS_FREE_LINES;
401 : }
402 : static inline void
403 50896 : PageSetHasFreeLinePointers(Page page)
404 : {
405 50896 : ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
406 50896 : }
407 : static inline void
408 9034064 : PageClearHasFreeLinePointers(Page page)
409 : {
410 9034064 : ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
411 9034064 : }
412 :
413 : static inline bool
414 3156414 : PageIsFull(Page page)
415 : {
416 3156414 : return ((PageHeader) page)->pd_flags & PD_PAGE_FULL;
417 : }
418 : static inline void
419 282038 : PageSetFull(Page page)
420 : {
421 282038 : ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
422 282038 : }
423 : static inline void
424 9031386 : PageClearFull(Page page)
425 : {
426 9031386 : ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
427 9031386 : }
428 :
429 : static inline bool
430 66332474 : PageIsAllVisible(Page page)
431 : {
432 66332474 : return ((PageHeader) page)->pd_flags & PD_ALL_VISIBLE;
433 : }
434 : static inline void
435 93062 : PageSetAllVisible(Page page)
436 : {
437 93062 : ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
438 93062 : }
439 : static inline void
440 8957756 : PageClearAllVisible(Page page)
441 : {
442 8957756 : ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
443 8957756 : }
444 :
445 : /*
446 : * These two require "access/transam.h", so left as macros.
447 : */
448 : #define PageSetPrunable(page, xid) \
449 : do { \
450 : Assert(TransactionIdIsNormal(xid)); \
451 : if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
452 : TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
453 : ((PageHeader) (page))->pd_prune_xid = (xid); \
454 : } while (0)
455 : #define PageClearPrunable(page) \
456 : (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
457 :
458 :
459 : /* ----------------------------------------------------------------
460 : * extern declarations
461 : * ----------------------------------------------------------------
462 : */
463 :
464 : /* flags for PageAddItemExtended() */
465 : #define PAI_OVERWRITE (1 << 0)
466 : #define PAI_IS_HEAP (1 << 1)
467 :
468 : /* flags for PageIsVerifiedExtended() */
469 : #define PIV_LOG_WARNING (1 << 0)
470 : #define PIV_REPORT_STAT (1 << 1)
471 :
472 : #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
473 : PageAddItemExtended(page, item, size, offsetNumber, \
474 : ((overwrite) ? PAI_OVERWRITE : 0) | \
475 : ((is_heap) ? PAI_IS_HEAP : 0))
476 :
477 : #define PageIsVerified(page, blkno) \
478 : PageIsVerifiedExtended(page, blkno, \
479 : PIV_LOG_WARNING | PIV_REPORT_STAT)
480 :
481 : /*
482 : * Check that BLCKSZ is a multiple of sizeof(size_t). In
483 : * PageIsVerifiedExtended(), it is much faster to check if a page is
484 : * full of zeroes using the native word size. Note that this assertion
485 : * is kept within a header to make sure that StaticAssertDecl() works
486 : * across various combinations of platforms and compilers.
487 : */
488 : StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
489 : "BLCKSZ has to be a multiple of sizeof(size_t)");
490 :
491 : extern void PageInit(Page page, Size pageSize, Size specialSize);
492 : extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
493 : extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
494 : OffsetNumber offsetNumber, int flags);
495 : extern Page PageGetTempPage(Page page);
496 : extern Page PageGetTempPageCopy(Page page);
497 : extern Page PageGetTempPageCopySpecial(Page page);
498 : extern void PageRestoreTempPage(Page tempPage, Page oldPage);
499 : extern void PageRepairFragmentation(Page page);
500 : extern void PageTruncateLinePointerArray(Page page);
501 : extern Size PageGetFreeSpace(Page page);
502 : extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
503 : extern Size PageGetExactFreeSpace(Page page);
504 : extern Size PageGetHeapFreeSpace(Page page);
505 : extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
506 : extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
507 : extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
508 : extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
509 : Item newtup, Size newsize);
510 : extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
511 : extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
512 :
513 : #endif /* BUFPAGE_H */
|