Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.h
4 : * Standard POSTGRES buffer page definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufpage.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFPAGE_H
15 : #define BUFPAGE_H
16 :
17 : #include "access/xlogdefs.h"
18 : #include "storage/block.h"
19 : #include "storage/off.h"
20 :
21 : /* GUC variable */
22 : extern PGDLLIMPORT bool ignore_checksum_failure;
23 :
24 : /*
25 : * A postgres disk page is an abstraction layered on top of a postgres
26 : * disk block (which is simply a unit of i/o, see block.h).
27 : *
28 : * specifically, while a disk block can be unformatted, a postgres
29 : * disk page is always a slotted page of the form:
30 : *
31 : * +----------------+---------------------------------+
32 : * | PageHeaderData | linp1 linp2 linp3 ... |
33 : * +-----------+----+---------------------------------+
34 : * | ... linpN | |
35 : * +-----------+--------------------------------------+
36 : * | ^ pd_lower |
37 : * | |
38 : * | v pd_upper |
39 : * +-------------+------------------------------------+
40 : * | | tupleN ... |
41 : * +-------------+------------------+-----------------+
42 : * | ... tuple3 tuple2 tuple1 | "special space" |
43 : * +--------------------------------+-----------------+
44 : * ^ pd_special
45 : *
46 : * a page is full when nothing can be added between pd_lower and
47 : * pd_upper.
48 : *
49 : * all blocks written out by an access method must be disk pages.
50 : *
51 : * EXCEPTIONS:
52 : *
53 : * obviously, a page is not formatted before it is initialized by
54 : * a call to PageInit.
55 : *
56 : * NOTES:
57 : *
58 : * linp1..N form an ItemId (line pointer) array. ItemPointers point
59 : * to a physical block number and a logical offset (line pointer
60 : * number) within that block/page. Note that OffsetNumbers
61 : * conventionally start at 1, not 0.
62 : *
63 : * tuple1..N are added "backwards" on the page. Since an ItemPointer
64 : * offset is used to access an ItemId entry rather than an actual
65 : * byte-offset position, tuples can be physically shuffled on a page
66 : * whenever the need arises. This indirection also keeps crash recovery
67 : * relatively simple, because the low-level details of page space
68 : * management can be controlled by standard buffer page code during
69 : * logging, and during recovery.
70 : *
71 : * AM-generic per-page information is kept in PageHeaderData.
72 : *
73 : * AM-specific per-page data (if any) is kept in the area marked "special
74 : * space"; each AM has an "opaque" structure defined somewhere that is
75 : * stored as the page trailer. An access method should always
76 : * initialize its pages with PageInit and then set its own opaque
77 : * fields.
78 : */
79 :
80 : typedef char PageData;
81 : typedef PageData *Page;
82 :
83 :
84 : /*
85 : * location (byte offset) within a page.
86 : *
87 : * note that this is actually limited to 2^15 because we have limited
88 : * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
89 : */
90 : typedef uint16 LocationIndex;
91 :
92 :
93 : /*
94 : * Store the LSN as a single 64-bit value, to allow atomic loads/stores.
95 : *
96 : * For historical reasons, the storage of 64-bit LSN values depends on CPU
97 : * endianness; PageXLogRecPtr used to be a struct consisting of two 32-bit
98 : * values. When reading (and writing) the pd_lsn field from page headers, the
99 : * caller must convert from (and convert to) the platform's native endianness.
100 : */
101 : typedef struct
102 : {
103 : uint64 lsn;
104 : } PageXLogRecPtr;
105 :
106 : #ifdef WORDS_BIGENDIAN
107 :
108 : static inline XLogRecPtr
109 : PageXLogRecPtrGet(const volatile PageXLogRecPtr *val)
110 : {
111 : return val->lsn;
112 : }
113 :
114 : static inline void
115 : PageXLogRecPtrSet(volatile PageXLogRecPtr *ptr, XLogRecPtr lsn)
116 : {
117 : ptr->lsn = lsn;
118 : }
119 :
120 : #else
121 :
122 : static inline XLogRecPtr
123 40288899 : PageXLogRecPtrGet(const volatile PageXLogRecPtr *val)
124 : {
125 40288899 : PageXLogRecPtr tmp = {val->lsn};
126 :
127 40288899 : return (tmp.lsn << 32) | (tmp.lsn >> 32);
128 : }
129 :
130 : static inline void
131 35960980 : PageXLogRecPtrSet(volatile PageXLogRecPtr *ptr, XLogRecPtr lsn)
132 : {
133 35960980 : ptr->lsn = (lsn << 32) | (lsn >> 32);
134 35960980 : }
135 :
136 : #endif
137 :
138 : /*
139 : * disk page organization
140 : *
141 : * space management information generic to any page
142 : *
143 : * pd_lsn - identifies xlog record for last change to this page.
144 : * pd_checksum - page checksum, if set.
145 : * pd_flags - flag bits.
146 : * pd_lower - offset to start of free space.
147 : * pd_upper - offset to end of free space.
148 : * pd_special - offset to start of special space.
149 : * pd_pagesize_version - size in bytes and page layout version number.
150 : * pd_prune_xid - oldest XID among potentially prunable tuples on page.
151 : *
152 : * The LSN is used by the buffer manager to enforce the basic rule of WAL:
153 : * "thou shalt write xlog before data". A dirty buffer cannot be dumped
154 : * to disk until xlog has been flushed at least as far as the page's LSN.
155 : *
156 : * pd_checksum stores the page checksum, if it has been set for this page;
157 : * zero is a valid value for a checksum. If a checksum is not in use then
158 : * we leave the field unset. This will typically mean the field is zero
159 : * though non-zero values may also be present if databases have been
160 : * pg_upgraded from releases prior to 9.3, when the same byte offset was
161 : * used to store the current timelineid when the page was last updated.
162 : * Note that there is no indication on a page as to whether the checksum
163 : * is valid or not, a deliberate design choice which avoids the problem
164 : * of relying on the page contents to decide whether to verify it. Hence
165 : * there are no flag bits relating to checksums.
166 : *
167 : * pd_prune_xid is a hint field that helps determine whether pruning will be
168 : * useful. It is currently unused in index pages.
169 : *
170 : * The page version number and page size are packed together into a single
171 : * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
172 : * there was no concept of a page version number, and doing it this way
173 : * lets us pretend that pre-7.3 databases have page version number zero.
174 : * We constrain page sizes to be multiples of 256, leaving the low eight
175 : * bits available for a version number.
176 : *
177 : * Minimum possible page size is perhaps 64B to fit page header, opaque space
178 : * and a minimal tuple; of course, in reality you want it much bigger, so
179 : * the constraint on pagesize mod 256 is not an important restriction.
180 : * On the high end, we can only support pages up to 32KB because lp_off/lp_len
181 : * are 15 bits.
182 : */
183 :
184 : typedef struct PageHeaderData
185 : {
186 : /* XXX LSN is member of *any* block, not only page-organized ones */
187 : PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
188 : * record for last change to this page */
189 : uint16 pd_checksum; /* checksum */
190 : uint16 pd_flags; /* flag bits, see below */
191 : LocationIndex pd_lower; /* offset to start of free space */
192 : LocationIndex pd_upper; /* offset to end of free space */
193 : LocationIndex pd_special; /* offset to start of special space */
194 : uint16 pd_pagesize_version;
195 : TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
196 : ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
197 : } PageHeaderData;
198 :
199 : typedef PageHeaderData *PageHeader;
200 :
201 : /*
202 : * pd_flags contains the following flag bits. Undefined bits are initialized
203 : * to zero and may be used in the future.
204 : *
205 : * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
206 : * pd_lower. This should be considered a hint rather than the truth, since
207 : * changes to it are not WAL-logged.
208 : *
209 : * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
210 : * page for its new tuple version; this suggests that a prune is needed.
211 : * Again, this is just a hint.
212 : */
213 : #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
214 : #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
215 : #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
216 : * everyone */
217 :
218 : #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
219 :
220 : /*
221 : * Page layout version number 0 is for pre-7.3 Postgres releases.
222 : * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
223 : * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
224 : * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
225 : * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
226 : * added the pd_flags field (by stealing some bits from pd_tli),
227 : * as well as adding the pd_prune_xid field (which enlarges the header).
228 : *
229 : * As of Release 9.3, the checksum version must also be considered when
230 : * handling pages.
231 : */
232 : #define PG_PAGE_LAYOUT_VERSION 4
233 :
234 : /* ----------------------------------------------------------------
235 : * page support functions
236 : * ----------------------------------------------------------------
237 : */
238 :
239 : /*
240 : * line pointer(s) do not count as part of header
241 : */
242 : #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
243 :
244 : /*
245 : * PageIsEmpty
246 : * returns true iff no itemid has been allocated on the page
247 : */
248 : static inline bool
249 2186166 : PageIsEmpty(const PageData *page)
250 : {
251 2186166 : return ((const PageHeaderData *) page)->pd_lower <= SizeOfPageHeaderData;
252 : }
253 :
254 : /*
255 : * PageIsNew
256 : * returns true iff page has not been initialized (by PageInit)
257 : */
258 : static inline bool
259 55299077 : PageIsNew(const PageData *page)
260 : {
261 55299077 : return ((const PageHeaderData *) page)->pd_upper == 0;
262 : }
263 :
264 : /*
265 : * PageGetItemId
266 : * Returns an item identifier of a page.
267 : */
268 : static inline ItemId
269 1556685740 : PageGetItemId(Page page, OffsetNumber offsetNumber)
270 : {
271 1556685740 : return &((PageHeader) page)->pd_linp[offsetNumber - 1];
272 : }
273 :
274 : /*
275 : * PageGetContents
276 : * To be used in cases where the page does not contain line pointers.
277 : *
278 : * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
279 : * Now it is. Beware of old code that might think the offset to the contents
280 : * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
281 : */
282 : static inline char *
283 26310101 : PageGetContents(Page page)
284 : {
285 26310101 : return (char *) page + MAXALIGN(SizeOfPageHeaderData);
286 : }
287 :
288 : /* ----------------
289 : * functions to access page size info
290 : * ----------------
291 : */
292 :
293 : /*
294 : * PageGetPageSize
295 : * Returns the page size of a page.
296 : *
297 : * this can only be called on a formatted page (unlike
298 : * BufferGetPageSize, which can be called on an unformatted page).
299 : * however, it can be called on a page that is not stored in a buffer.
300 : */
301 : static inline Size
302 32996605 : PageGetPageSize(const PageData *page)
303 : {
304 32996605 : return (Size) (((const PageHeaderData *) page)->pd_pagesize_version & (uint16) 0xFF00);
305 : }
306 :
307 : /*
308 : * PageGetPageLayoutVersion
309 : * Returns the page layout version of a page.
310 : */
311 : static inline uint8
312 4 : PageGetPageLayoutVersion(const PageData *page)
313 : {
314 4 : return (((const PageHeaderData *) page)->pd_pagesize_version & 0x00FF);
315 : }
316 :
317 : /*
318 : * PageSetPageSizeAndVersion
319 : * Sets the page size and page layout version number of a page.
320 : *
321 : * We could support setting these two values separately, but there's
322 : * no real need for it at the moment.
323 : */
324 : static inline void
325 435667 : PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
326 : {
327 : Assert((size & 0xFF00) == size);
328 : Assert((version & 0x00FF) == version);
329 :
330 435667 : ((PageHeader) page)->pd_pagesize_version = size | version;
331 435667 : }
332 :
333 : /* ----------------
334 : * page special data functions
335 : * ----------------
336 : */
337 : /*
338 : * PageGetSpecialSize
339 : * Returns size of special space on a page.
340 : */
341 : static inline uint16
342 32405313 : PageGetSpecialSize(const PageData *page)
343 : {
344 32405313 : return (PageGetPageSize(page) - ((const PageHeaderData *) page)->pd_special);
345 : }
346 :
347 : /*
348 : * Using assertions, validate that the page special pointer is OK.
349 : *
350 : * This is intended to catch use of the pointer before page initialization.
351 : */
352 : static inline void
353 365676707 : PageValidateSpecialPointer(const PageData *page)
354 : {
355 : Assert(page);
356 : Assert(((const PageHeaderData *) page)->pd_special <= BLCKSZ);
357 : Assert(((const PageHeaderData *) page)->pd_special >= SizeOfPageHeaderData);
358 365676707 : }
359 :
360 : /*
361 : * PageGetSpecialPointer
362 : * Returns pointer to special space on a page.
363 : */
364 : #define PageGetSpecialPointer(page) \
365 : ( \
366 : PageValidateSpecialPointer(page), \
367 : ((page) + ((PageHeader) (page))->pd_special) \
368 : )
369 :
370 : /*
371 : * PageGetItem
372 : * Retrieves an item on the given page.
373 : *
374 : * Note:
375 : * This does not change the status of any of the resources passed.
376 : * The semantics may change in the future.
377 : */
378 : static inline void *
379 719153852 : PageGetItem(PageData *page, const ItemIdData *itemId)
380 : {
381 : Assert(page);
382 : Assert(ItemIdHasStorage(itemId));
383 :
384 719153852 : return (char *) page + ItemIdGetOffset(itemId);
385 : }
386 :
387 : /*
388 : * PageGetMaxOffsetNumber
389 : * Returns the maximum offset number used by the given page.
390 : * Since offset numbers are 1-based, this is also the number
391 : * of items on the page.
392 : *
393 : * NOTE: if the page is not initialized (pd_lower == 0), we must
394 : * return zero to ensure sane behavior.
395 : */
396 : static inline OffsetNumber
397 432247106 : PageGetMaxOffsetNumber(const PageData *page)
398 : {
399 432247106 : const PageHeaderData *pageheader = (const PageHeaderData *) page;
400 :
401 432247106 : if (pageheader->pd_lower <= SizeOfPageHeaderData)
402 619066 : return 0;
403 : else
404 431628040 : return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
405 : }
406 :
407 : /*
408 : * Additional functions for access to page headers.
409 : */
410 : static inline XLogRecPtr
411 39045370 : PageGetLSN(const PageData *page)
412 : {
413 39045370 : return PageXLogRecPtrGet(&((const PageHeaderData *) page)->pd_lsn);
414 : }
415 :
416 : static inline void
417 30997208 : PageSetLSN(Page page, XLogRecPtr lsn)
418 : {
419 30997208 : PageXLogRecPtrSet(&((PageHeader) page)->pd_lsn, lsn);
420 30997208 : }
421 :
422 : static inline bool
423 17740040 : PageHasFreeLinePointers(const PageData *page)
424 : {
425 17740040 : return ((const PageHeaderData *) page)->pd_flags & PD_HAS_FREE_LINES;
426 : }
427 : static inline void
428 32928 : PageSetHasFreeLinePointers(Page page)
429 : {
430 32928 : ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
431 32928 : }
432 : static inline void
433 4872061 : PageClearHasFreeLinePointers(Page page)
434 : {
435 4872061 : ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
436 4872061 : }
437 :
438 : static inline bool
439 3000778 : PageIsFull(const PageData *page)
440 : {
441 3000778 : return ((const PageHeaderData *) page)->pd_flags & PD_PAGE_FULL;
442 : }
443 : static inline void
444 2196920 : PageSetFull(Page page)
445 : {
446 2196920 : ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
447 2196920 : }
448 : static inline void
449 4969552 : PageClearFull(Page page)
450 : {
451 4969552 : ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
452 4969552 : }
453 :
454 : static inline bool
455 74437505 : PageIsAllVisible(const PageData *page)
456 : {
457 74437505 : return ((const PageHeaderData *) page)->pd_flags & PD_ALL_VISIBLE;
458 : }
459 : static inline void
460 78639 : PageSetAllVisible(Page page)
461 : {
462 78639 : ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
463 78639 : }
464 : static inline void
465 4817961 : PageClearAllVisible(Page page)
466 : {
467 4817961 : ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
468 4817961 : }
469 :
470 : static inline TransactionId
471 21098153 : PageGetPruneXid(const PageData *page)
472 : {
473 21098153 : return ((const PageHeaderData *) page)->pd_prune_xid;
474 : }
475 :
476 : /*
477 : * These two require "access/transam.h", so left as macros.
478 : */
479 : #define PageSetPrunable(page, xid) \
480 : do { \
481 : Assert(TransactionIdIsNormal(xid)); \
482 : if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
483 : TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
484 : ((PageHeader) (page))->pd_prune_xid = (xid); \
485 : } while (0)
486 : #define PageClearPrunable(page) \
487 : (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
488 :
489 :
490 : /* ----------------------------------------------------------------
491 : * extern declarations
492 : * ----------------------------------------------------------------
493 : */
494 :
495 : /* flags for PageAddItemExtended() */
496 : #define PAI_OVERWRITE (1 << 0)
497 : #define PAI_IS_HEAP (1 << 1)
498 :
499 : /* flags for PageIsVerified() */
500 : #define PIV_LOG_WARNING (1 << 0)
501 : #define PIV_LOG_LOG (1 << 1)
502 : #define PIV_IGNORE_CHECKSUM_FAILURE (1 << 2)
503 : #define PIV_ZERO_BUFFERS_ON_ERROR (1 << 3)
504 :
505 : #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
506 : PageAddItemExtended(page, item, size, offsetNumber, \
507 : ((overwrite) ? PAI_OVERWRITE : 0) | \
508 : ((is_heap) ? PAI_IS_HEAP : 0))
509 :
510 : /*
511 : * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), it
512 : * is much faster to check if a page is full of zeroes using the native word
513 : * size. Note that this assertion is kept within a header to make sure that
514 : * StaticAssertDecl() works across various combinations of platforms and
515 : * compilers.
516 : */
517 : StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
518 : "BLCKSZ has to be a multiple of sizeof(size_t)");
519 :
520 : extern void PageInit(Page page, Size pageSize, Size specialSize);
521 : extern bool PageIsVerified(PageData *page, BlockNumber blkno, int flags,
522 : bool *checksum_failure_p);
523 : extern OffsetNumber PageAddItemExtended(Page page, const void *item, Size size,
524 : OffsetNumber offsetNumber, int flags);
525 : extern Page PageGetTempPage(const PageData *page);
526 : extern Page PageGetTempPageCopy(const PageData *page);
527 : extern Page PageGetTempPageCopySpecial(const PageData *page);
528 : extern void PageRestoreTempPage(Page tempPage, Page oldPage);
529 : extern void PageRepairFragmentation(Page page);
530 : extern void PageTruncateLinePointerArray(Page page);
531 : extern Size PageGetFreeSpace(const PageData *page);
532 : extern Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups);
533 : extern Size PageGetExactFreeSpace(const PageData *page);
534 : extern Size PageGetHeapFreeSpace(const PageData *page);
535 : extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
536 : extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
537 : extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
538 : extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
539 : const void *newtup, Size newsize);
540 : extern void PageSetChecksum(Page page, BlockNumber blkno);
541 :
542 : #endif /* BUFPAGE_H */
|