LCOV - code coverage report
Current view: top level - src/include/storage - bufpage.h (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 60 60 100.0 %
Date: 2024-11-21 08:14:44 Functions: 24 24 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bufpage.h
       4             :  *    Standard POSTGRES buffer page definitions.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * src/include/storage/bufpage.h
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #ifndef BUFPAGE_H
      15             : #define BUFPAGE_H
      16             : 
      17             : #include "access/xlogdefs.h"
      18             : #include "storage/block.h"
      19             : #include "storage/item.h"
      20             : #include "storage/off.h"
      21             : 
      22             : /* GUC variable */
      23             : extern PGDLLIMPORT bool ignore_checksum_failure;
      24             : 
      25             : /*
      26             :  * A postgres disk page is an abstraction layered on top of a postgres
      27             :  * disk block (which is simply a unit of i/o, see block.h).
      28             :  *
      29             :  * specifically, while a disk block can be unformatted, a postgres
      30             :  * disk page is always a slotted page of the form:
      31             :  *
      32             :  * +----------------+---------------------------------+
      33             :  * | PageHeaderData | linp1 linp2 linp3 ...           |
      34             :  * +-----------+----+---------------------------------+
      35             :  * | ... linpN |                                      |
      36             :  * +-----------+--------------------------------------+
      37             :  * |           ^ pd_lower                             |
      38             :  * |                                                  |
      39             :  * |             v pd_upper                           |
      40             :  * +-------------+------------------------------------+
      41             :  * |             | tupleN ...                         |
      42             :  * +-------------+------------------+-----------------+
      43             :  * |       ... tuple3 tuple2 tuple1 | "special space" |
      44             :  * +--------------------------------+-----------------+
      45             :  *                                  ^ pd_special
      46             :  *
      47             :  * a page is full when nothing can be added between pd_lower and
      48             :  * pd_upper.
      49             :  *
      50             :  * all blocks written out by an access method must be disk pages.
      51             :  *
      52             :  * EXCEPTIONS:
      53             :  *
      54             :  * obviously, a page is not formatted before it is initialized by
      55             :  * a call to PageInit.
      56             :  *
      57             :  * NOTES:
      58             :  *
      59             :  * linp1..N form an ItemId (line pointer) array.  ItemPointers point
      60             :  * to a physical block number and a logical offset (line pointer
      61             :  * number) within that block/page.  Note that OffsetNumbers
      62             :  * conventionally start at 1, not 0.
      63             :  *
      64             :  * tuple1..N are added "backwards" on the page.  Since an ItemPointer
      65             :  * offset is used to access an ItemId entry rather than an actual
      66             :  * byte-offset position, tuples can be physically shuffled on a page
      67             :  * whenever the need arises.  This indirection also keeps crash recovery
      68             :  * relatively simple, because the low-level details of page space
      69             :  * management can be controlled by standard buffer page code during
      70             :  * logging, and during recovery.
      71             :  *
      72             :  * AM-generic per-page information is kept in PageHeaderData.
      73             :  *
      74             :  * AM-specific per-page data (if any) is kept in the area marked "special
      75             :  * space"; each AM has an "opaque" structure defined somewhere that is
      76             :  * stored as the page trailer.  An access method should always
      77             :  * initialize its pages with PageInit and then set its own opaque
      78             :  * fields.
      79             :  */
      80             : 
      81             : typedef Pointer Page;
      82             : 
      83             : 
      84             : /*
      85             :  * location (byte offset) within a page.
      86             :  *
      87             :  * note that this is actually limited to 2^15 because we have limited
      88             :  * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
      89             :  */
      90             : typedef uint16 LocationIndex;
      91             : 
      92             : 
      93             : /*
      94             :  * For historical reasons, the 64-bit LSN value is stored as two 32-bit
      95             :  * values.
      96             :  */
      97             : typedef struct
      98             : {
      99             :     uint32      xlogid;         /* high bits */
     100             :     uint32      xrecoff;        /* low bits */
     101             : } PageXLogRecPtr;
     102             : 
     103             : static inline XLogRecPtr
     104    57854318 : PageXLogRecPtrGet(PageXLogRecPtr val)
     105             : {
     106    57854318 :     return (uint64) val.xlogid << 32 | val.xrecoff;
     107             : }
     108             : 
     109             : #define PageXLogRecPtrSet(ptr, lsn) \
     110             :     ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
     111             : 
     112             : /*
     113             :  * disk page organization
     114             :  *
     115             :  * space management information generic to any page
     116             :  *
     117             :  *      pd_lsn      - identifies xlog record for last change to this page.
     118             :  *      pd_checksum - page checksum, if set.
     119             :  *      pd_flags    - flag bits.
     120             :  *      pd_lower    - offset to start of free space.
     121             :  *      pd_upper    - offset to end of free space.
     122             :  *      pd_special  - offset to start of special space.
     123             :  *      pd_pagesize_version - size in bytes and page layout version number.
     124             :  *      pd_prune_xid - oldest XID among potentially prunable tuples on page.
     125             :  *
     126             :  * The LSN is used by the buffer manager to enforce the basic rule of WAL:
     127             :  * "thou shalt write xlog before data".  A dirty buffer cannot be dumped
     128             :  * to disk until xlog has been flushed at least as far as the page's LSN.
     129             :  *
     130             :  * pd_checksum stores the page checksum, if it has been set for this page;
     131             :  * zero is a valid value for a checksum. If a checksum is not in use then
     132             :  * we leave the field unset. This will typically mean the field is zero
     133             :  * though non-zero values may also be present if databases have been
     134             :  * pg_upgraded from releases prior to 9.3, when the same byte offset was
     135             :  * used to store the current timelineid when the page was last updated.
     136             :  * Note that there is no indication on a page as to whether the checksum
     137             :  * is valid or not, a deliberate design choice which avoids the problem
     138             :  * of relying on the page contents to decide whether to verify it. Hence
     139             :  * there are no flag bits relating to checksums.
     140             :  *
     141             :  * pd_prune_xid is a hint field that helps determine whether pruning will be
     142             :  * useful.  It is currently unused in index pages.
     143             :  *
     144             :  * The page version number and page size are packed together into a single
     145             :  * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
     146             :  * there was no concept of a page version number, and doing it this way
     147             :  * lets us pretend that pre-7.3 databases have page version number zero.
     148             :  * We constrain page sizes to be multiples of 256, leaving the low eight
     149             :  * bits available for a version number.
     150             :  *
     151             :  * Minimum possible page size is perhaps 64B to fit page header, opaque space
     152             :  * and a minimal tuple; of course, in reality you want it much bigger, so
     153             :  * the constraint on pagesize mod 256 is not an important restriction.
     154             :  * On the high end, we can only support pages up to 32KB because lp_off/lp_len
     155             :  * are 15 bits.
     156             :  */
     157             : 
     158             : typedef struct PageHeaderData
     159             : {
     160             :     /* XXX LSN is member of *any* block, not only page-organized ones */
     161             :     PageXLogRecPtr pd_lsn;      /* LSN: next byte after last byte of xlog
     162             :                                  * record for last change to this page */
     163             :     uint16      pd_checksum;    /* checksum */
     164             :     uint16      pd_flags;       /* flag bits, see below */
     165             :     LocationIndex pd_lower;     /* offset to start of free space */
     166             :     LocationIndex pd_upper;     /* offset to end of free space */
     167             :     LocationIndex pd_special;   /* offset to start of special space */
     168             :     uint16      pd_pagesize_version;
     169             :     TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
     170             :     ItemIdData  pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
     171             : } PageHeaderData;
     172             : 
     173             : typedef PageHeaderData *PageHeader;
     174             : 
     175             : /*
     176             :  * pd_flags contains the following flag bits.  Undefined bits are initialized
     177             :  * to zero and may be used in the future.
     178             :  *
     179             :  * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
     180             :  * pd_lower.  This should be considered a hint rather than the truth, since
     181             :  * changes to it are not WAL-logged.
     182             :  *
     183             :  * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
     184             :  * page for its new tuple version; this suggests that a prune is needed.
     185             :  * Again, this is just a hint.
     186             :  */
     187             : #define PD_HAS_FREE_LINES   0x0001  /* are there any unused line pointers? */
     188             : #define PD_PAGE_FULL        0x0002  /* not enough free space for new tuple? */
     189             : #define PD_ALL_VISIBLE      0x0004  /* all tuples on page are visible to
     190             :                                      * everyone */
     191             : 
     192             : #define PD_VALID_FLAG_BITS  0x0007  /* OR of all valid pd_flags bits */
     193             : 
     194             : /*
     195             :  * Page layout version number 0 is for pre-7.3 Postgres releases.
     196             :  * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
     197             :  * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
     198             :  * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
     199             :  * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
     200             :  *      added the pd_flags field (by stealing some bits from pd_tli),
     201             :  *      as well as adding the pd_prune_xid field (which enlarges the header).
     202             :  *
     203             :  * As of Release 9.3, the checksum version must also be considered when
     204             :  * handling pages.
     205             :  */
     206             : #define PG_PAGE_LAYOUT_VERSION      4
     207             : #define PG_DATA_CHECKSUM_VERSION    1
     208             : 
     209             : /* ----------------------------------------------------------------
     210             :  *                      page support functions
     211             :  * ----------------------------------------------------------------
     212             :  */
     213             : 
     214             : /*
     215             :  * line pointer(s) do not count as part of header
     216             :  */
     217             : #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
     218             : 
     219             : /*
     220             :  * PageIsEmpty
     221             :  *      returns true iff no itemid has been allocated on the page
     222             :  */
     223             : static inline bool
     224     2789478 : PageIsEmpty(Page page)
     225             : {
     226     2789478 :     return ((PageHeader) page)->pd_lower <= SizeOfPageHeaderData;
     227             : }
     228             : 
     229             : /*
     230             :  * PageIsNew
     231             :  *      returns true iff page has not been initialized (by PageInit)
     232             :  */
     233             : static inline bool
     234    72656252 : PageIsNew(Page page)
     235             : {
     236    72656252 :     return ((PageHeader) page)->pd_upper == 0;
     237             : }
     238             : 
     239             : /*
     240             :  * PageGetItemId
     241             :  *      Returns an item identifier of a page.
     242             :  */
     243             : static inline ItemId
     244  2217480342 : PageGetItemId(Page page, OffsetNumber offsetNumber)
     245             : {
     246  2217480342 :     return &((PageHeader) page)->pd_linp[offsetNumber - 1];
     247             : }
     248             : 
     249             : /*
     250             :  * PageGetContents
     251             :  *      To be used in cases where the page does not contain line pointers.
     252             :  *
     253             :  * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
     254             :  * Now it is.  Beware of old code that might think the offset to the contents
     255             :  * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
     256             :  */
     257             : static inline char *
     258    43368090 : PageGetContents(Page page)
     259             : {
     260    43368090 :     return (char *) page + MAXALIGN(SizeOfPageHeaderData);
     261             : }
     262             : 
     263             : /* ----------------
     264             :  *      functions to access page size info
     265             :  * ----------------
     266             :  */
     267             : 
     268             : /*
     269             :  * PageGetPageSize
     270             :  *      Returns the page size of a page.
     271             :  *
     272             :  * this can only be called on a formatted page (unlike
     273             :  * BufferGetPageSize, which can be called on an unformatted page).
     274             :  * however, it can be called on a page that is not stored in a buffer.
     275             :  */
     276             : static inline Size
     277    65871252 : PageGetPageSize(Page page)
     278             : {
     279    65871252 :     return (Size) (((PageHeader) page)->pd_pagesize_version & (uint16) 0xFF00);
     280             : }
     281             : 
     282             : /*
     283             :  * PageGetPageLayoutVersion
     284             :  *      Returns the page layout version of a page.
     285             :  */
     286             : static inline uint8
     287           6 : PageGetPageLayoutVersion(Page page)
     288             : {
     289           6 :     return (((PageHeader) page)->pd_pagesize_version & 0x00FF);
     290             : }
     291             : 
     292             : /*
     293             :  * PageSetPageSizeAndVersion
     294             :  *      Sets the page size and page layout version number of a page.
     295             :  *
     296             :  * We could support setting these two values separately, but there's
     297             :  * no real need for it at the moment.
     298             :  */
     299             : static inline void
     300      647200 : PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
     301             : {
     302             :     Assert((size & 0xFF00) == size);
     303             :     Assert((version & 0x00FF) == version);
     304             : 
     305      647200 :     ((PageHeader) page)->pd_pagesize_version = size | version;
     306      647200 : }
     307             : 
     308             : /* ----------------
     309             :  *      page special data functions
     310             :  * ----------------
     311             :  */
     312             : /*
     313             :  * PageGetSpecialSize
     314             :  *      Returns size of special space on a page.
     315             :  */
     316             : static inline uint16
     317    43170688 : PageGetSpecialSize(Page page)
     318             : {
     319    43170688 :     return (PageGetPageSize(page) - ((PageHeader) page)->pd_special);
     320             : }
     321             : 
     322             : /*
     323             :  * Using assertions, validate that the page special pointer is OK.
     324             :  *
     325             :  * This is intended to catch use of the pointer before page initialization.
     326             :  */
     327             : static inline void
     328   492167680 : PageValidateSpecialPointer(Page page)
     329             : {
     330             :     Assert(page);
     331             :     Assert(((PageHeader) page)->pd_special <= BLCKSZ);
     332             :     Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData);
     333   492167680 : }
     334             : 
     335             : /*
     336             :  * PageGetSpecialPointer
     337             :  *      Returns pointer to special space on a page.
     338             :  */
     339             : static inline char *
     340   492167680 : PageGetSpecialPointer(Page page)
     341             : {
     342   492167680 :     PageValidateSpecialPointer(page);
     343   492167680 :     return (char *) page + ((PageHeader) page)->pd_special;
     344             : }
     345             : 
     346             : /*
     347             :  * PageGetItem
     348             :  *      Retrieves an item on the given page.
     349             :  *
     350             :  * Note:
     351             :  *      This does not change the status of any of the resources passed.
     352             :  *      The semantics may change in the future.
     353             :  */
     354             : static inline Item
     355  1003824034 : PageGetItem(Page page, ItemId itemId)
     356             : {
     357             :     Assert(page);
     358             :     Assert(ItemIdHasStorage(itemId));
     359             : 
     360  1003824034 :     return (Item) (((char *) page) + ItemIdGetOffset(itemId));
     361             : }
     362             : 
     363             : /*
     364             :  * PageGetMaxOffsetNumber
     365             :  *      Returns the maximum offset number used by the given page.
     366             :  *      Since offset numbers are 1-based, this is also the number
     367             :  *      of items on the page.
     368             :  *
     369             :  *      NOTE: if the page is not initialized (pd_lower == 0), we must
     370             :  *      return zero to ensure sane behavior.
     371             :  */
     372             : static inline OffsetNumber
     373   703269140 : PageGetMaxOffsetNumber(Page page)
     374             : {
     375   703269140 :     PageHeader  pageheader = (PageHeader) page;
     376             : 
     377   703269140 :     if (pageheader->pd_lower <= SizeOfPageHeaderData)
     378      861844 :         return 0;
     379             :     else
     380   702407296 :         return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
     381             : }
     382             : 
     383             : /*
     384             :  * Additional functions for access to page headers.
     385             :  */
     386             : static inline XLogRecPtr
     387    55707584 : PageGetLSN(const char *page)
     388             : {
     389    55707584 :     return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
     390             : }
     391             : static inline void
     392    35455126 : PageSetLSN(Page page, XLogRecPtr lsn)
     393             : {
     394    35455126 :     PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
     395    35455126 : }
     396             : 
     397             : static inline bool
     398    22140700 : PageHasFreeLinePointers(Page page)
     399             : {
     400    22140700 :     return ((PageHeader) page)->pd_flags & PD_HAS_FREE_LINES;
     401             : }
     402             : static inline void
     403       46846 : PageSetHasFreeLinePointers(Page page)
     404             : {
     405       46846 :     ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
     406       46846 : }
     407             : static inline void
     408     8983966 : PageClearHasFreeLinePointers(Page page)
     409             : {
     410     8983966 :     ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
     411     8983966 : }
     412             : 
     413             : static inline bool
     414     2669702 : PageIsFull(Page page)
     415             : {
     416     2669702 :     return ((PageHeader) page)->pd_flags & PD_PAGE_FULL;
     417             : }
     418             : static inline void
     419      281042 : PageSetFull(Page page)
     420             : {
     421      281042 :     ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
     422      281042 : }
     423             : static inline void
     424     8981354 : PageClearFull(Page page)
     425             : {
     426     8981354 :     ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
     427     8981354 : }
     428             : 
     429             : static inline bool
     430    65601790 : PageIsAllVisible(Page page)
     431             : {
     432    65601790 :     return ((PageHeader) page)->pd_flags & PD_ALL_VISIBLE;
     433             : }
     434             : static inline void
     435       84234 : PageSetAllVisible(Page page)
     436             : {
     437       84234 :     ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
     438       84234 : }
     439             : static inline void
     440     8909964 : PageClearAllVisible(Page page)
     441             : {
     442     8909964 :     ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
     443     8909964 : }
     444             : 
     445             : /*
     446             :  * These two require "access/transam.h", so left as macros.
     447             :  */
     448             : #define PageSetPrunable(page, xid) \
     449             : do { \
     450             :     Assert(TransactionIdIsNormal(xid)); \
     451             :     if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
     452             :         TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
     453             :         ((PageHeader) (page))->pd_prune_xid = (xid); \
     454             : } while (0)
     455             : #define PageClearPrunable(page) \
     456             :     (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
     457             : 
     458             : 
     459             : /* ----------------------------------------------------------------
     460             :  *      extern declarations
     461             :  * ----------------------------------------------------------------
     462             :  */
     463             : 
     464             : /* flags for PageAddItemExtended() */
     465             : #define PAI_OVERWRITE           (1 << 0)
     466             : #define PAI_IS_HEAP             (1 << 1)
     467             : 
     468             : /* flags for PageIsVerifiedExtended() */
     469             : #define PIV_LOG_WARNING         (1 << 0)
     470             : #define PIV_REPORT_STAT         (1 << 1)
     471             : 
     472             : #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
     473             :     PageAddItemExtended(page, item, size, offsetNumber, \
     474             :                         ((overwrite) ? PAI_OVERWRITE : 0) | \
     475             :                         ((is_heap) ? PAI_IS_HEAP : 0))
     476             : 
     477             : #define PageIsVerified(page, blkno) \
     478             :     PageIsVerifiedExtended(page, blkno, \
     479             :                            PIV_LOG_WARNING | PIV_REPORT_STAT)
     480             : 
     481             : /*
     482             :  * Check that BLCKSZ is a multiple of sizeof(size_t).  In
     483             :  * PageIsVerifiedExtended(), it is much faster to check if a page is
     484             :  * full of zeroes using the native word size.  Note that this assertion
     485             :  * is kept within a header to make sure that StaticAssertDecl() works
     486             :  * across various combinations of platforms and compilers.
     487             :  */
     488             : StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
     489             :                  "BLCKSZ has to be a multiple of sizeof(size_t)");
     490             : 
     491             : extern void PageInit(Page page, Size pageSize, Size specialSize);
     492             : extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
     493             : extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
     494             :                                         OffsetNumber offsetNumber, int flags);
     495             : extern Page PageGetTempPage(Page page);
     496             : extern Page PageGetTempPageCopy(Page page);
     497             : extern Page PageGetTempPageCopySpecial(Page page);
     498             : extern void PageRestoreTempPage(Page tempPage, Page oldPage);
     499             : extern void PageRepairFragmentation(Page page);
     500             : extern void PageTruncateLinePointerArray(Page page);
     501             : extern Size PageGetFreeSpace(Page page);
     502             : extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
     503             : extern Size PageGetExactFreeSpace(Page page);
     504             : extern Size PageGetHeapFreeSpace(Page page);
     505             : extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
     506             : extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
     507             : extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
     508             : extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
     509             :                                     Item newtup, Size newsize);
     510             : extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
     511             : extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
     512             : 
     513             : #endif                          /* BUFPAGE_H */

Generated by: LCOV version 1.14