LCOV - code coverage report
Current view: top level - src/backend/access/transam - generic_xlog.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 74.7 % 154 115
Test Date: 2026-02-28 11:14:57 Functions: 70.0 % 10 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * generic_xlog.c
       4              :  *   Implementation of generic xlog records.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * src/backend/access/transam/generic_xlog.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #include "postgres.h"
      15              : 
      16              : #include "access/bufmask.h"
      17              : #include "access/generic_xlog.h"
      18              : #include "access/xlogutils.h"
      19              : #include "miscadmin.h"
      20              : 
      21              : /*-------------------------------------------------------------------------
      22              :  * Internally, a delta between pages consists of a set of fragments.  Each
      23              :  * fragment represents changes made in a given region of a page.  A fragment
      24              :  * is made up as follows:
      25              :  *
      26              :  * - offset of page region (OffsetNumber)
      27              :  * - length of page region (OffsetNumber)
      28              :  * - data - the data to place into the region ('length' number of bytes)
      29              :  *
      30              :  * Unchanged regions of a page are not represented in its delta.  As a result,
      31              :  * a delta can be more compact than the full page image.  But having an
      32              :  * unchanged region between two fragments that is smaller than the fragment
      33              :  * header (offset+length) does not pay off in terms of the overall size of
      34              :  * the delta.  For this reason, we merge adjacent fragments if the unchanged
      35              :  * region between them is <= MATCH_THRESHOLD bytes.
      36              :  *
      37              :  * We do not bother to merge fragments across the "lower" and "upper" parts
      38              :  * of a page; it's very seldom the case that pd_lower and pd_upper are within
      39              :  * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
      40              :  * would complicate and slow down the delta-computation code unduly.
      41              :  * Therefore, the worst-case delta size includes two fragment headers plus
      42              :  * a full page's worth of data.
      43              :  *-------------------------------------------------------------------------
      44              :  */
      45              : #define FRAGMENT_HEADER_SIZE    (2 * sizeof(OffsetNumber))
      46              : #define MATCH_THRESHOLD         FRAGMENT_HEADER_SIZE
      47              : #define MAX_DELTA_SIZE          (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
      48              : 
      49              : /* Struct of generic xlog data for single page */
      50              : typedef struct
      51              : {
      52              :     Buffer      buffer;         /* registered buffer */
      53              :     int         flags;          /* flags for this buffer */
      54              :     int         deltaLen;       /* space consumed in delta field */
      55              :     char       *image;          /* copy of page image for modification, do not
      56              :                                  * do it in-place to have aligned memory chunk */
      57              :     char        delta[MAX_DELTA_SIZE];  /* delta between page images */
      58              : } GenericXLogPageData;
      59              : 
      60              : /*
      61              :  * State of generic xlog record construction.  Must be allocated at an I/O
      62              :  * aligned address.
      63              :  */
      64              : struct GenericXLogState
      65              : {
      66              :     /* Page images (properly aligned, must be first) */
      67              :     PGIOAlignedBlock images[MAX_GENERIC_XLOG_PAGES];
      68              :     /* Info about each page, see above */
      69              :     GenericXLogPageData pages[MAX_GENERIC_XLOG_PAGES];
      70              :     bool        isLogged;
      71              : };
      72              : 
      73              : static void writeFragment(GenericXLogPageData *pageData, OffsetNumber offset,
      74              :                           OffsetNumber length, const char *data);
      75              : static void computeRegionDelta(GenericXLogPageData *pageData,
      76              :                                const char *curpage, const char *targetpage,
      77              :                                int targetStart, int targetEnd,
      78              :                                int validStart, int validEnd);
      79              : static void computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage);
      80              : static void applyPageRedo(Page page, const char *delta, Size deltaSize);
      81              : 
      82              : 
      83              : /*
      84              :  * Write next fragment into pageData's delta.
      85              :  *
      86              :  * The fragment has the given offset and length, and data points to the
      87              :  * actual data (of length length).
      88              :  */
      89              : static void
      90       416465 : writeFragment(GenericXLogPageData *pageData, OffsetNumber offset, OffsetNumber length,
      91              :               const char *data)
      92              : {
      93       416465 :     char       *ptr = pageData->delta + pageData->deltaLen;
      94              : 
      95              :     /* Verify we have enough space */
      96              :     Assert(pageData->deltaLen + sizeof(offset) +
      97              :            sizeof(length) + length <= sizeof(pageData->delta));
      98              : 
      99              :     /* Write fragment data */
     100       416465 :     memcpy(ptr, &offset, sizeof(offset));
     101       416465 :     ptr += sizeof(offset);
     102       416465 :     memcpy(ptr, &length, sizeof(length));
     103       416465 :     ptr += sizeof(length);
     104       416465 :     memcpy(ptr, data, length);
     105       416465 :     ptr += length;
     106              : 
     107       416465 :     pageData->deltaLen = ptr - pageData->delta;
     108       416465 : }
     109              : 
     110              : /*
     111              :  * Compute the XLOG fragments needed to transform a region of curpage into the
     112              :  * corresponding region of targetpage, and append them to pageData's delta
     113              :  * field.  The region to transform runs from targetStart to targetEnd-1.
     114              :  * Bytes in curpage outside the range validStart to validEnd-1 should be
     115              :  * considered invalid, and always overwritten with target data.
     116              :  *
     117              :  * This function is a hot spot, so it's worth being as tense as possible
     118              :  * about the data-matching loops.
     119              :  */
     120              : static void
     121       210630 : computeRegionDelta(GenericXLogPageData *pageData,
     122              :                    const char *curpage, const char *targetpage,
     123              :                    int targetStart, int targetEnd,
     124              :                    int validStart, int validEnd)
     125              : {
     126              :     int         i,
     127              :                 loopEnd,
     128       210630 :                 fragmentBegin = -1,
     129       210630 :                 fragmentEnd = -1;
     130              : 
     131              :     /* Deal with any invalid start region by including it in first fragment */
     132       210630 :     if (validStart > targetStart)
     133              :     {
     134            0 :         fragmentBegin = targetStart;
     135            0 :         targetStart = validStart;
     136              :     }
     137              : 
     138              :     /* We'll deal with any invalid end region after the main loop */
     139       210630 :     loopEnd = Min(targetEnd, validEnd);
     140              : 
     141              :     /* Examine all the potentially matchable bytes */
     142       210630 :     i = targetStart;
     143      1691358 :     while (i < loopEnd)
     144              :     {
     145      1481156 :         if (curpage[i] != targetpage[i])
     146              :         {
     147              :             /* On unmatched byte, start new fragment if not already in one */
     148      1375072 :             if (fragmentBegin < 0)
     149       312578 :                 fragmentBegin = i;
     150              :             /* Mark unmatched-data endpoint as uncertain */
     151      1375072 :             fragmentEnd = -1;
     152              :             /* Extend the fragment as far as possible in a tight loop */
     153      1375072 :             i++;
     154      2242419 :             while (i < loopEnd && curpage[i] != targetpage[i])
     155       867347 :                 i++;
     156      1375072 :             if (i >= loopEnd)
     157          428 :                 break;
     158              :         }
     159              : 
     160              :         /* Found a matched byte, so remember end of unmatched fragment */
     161      1480728 :         fragmentEnd = i;
     162              : 
     163              :         /*
     164              :          * Extend the match as far as possible in a tight loop.  (On typical
     165              :          * workloads, this inner loop is the bulk of this function's runtime.)
     166              :          */
     167      1480728 :         i++;
     168    592815133 :         while (i < loopEnd && curpage[i] == targetpage[i])
     169    591334405 :             i++;
     170              : 
     171              :         /*
     172              :          * There are several possible cases at this point:
     173              :          *
     174              :          * 1. We have no unwritten fragment (fragmentBegin < 0).  There's
     175              :          * nothing to write; and it doesn't matter what fragmentEnd is.
     176              :          *
     177              :          * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
     178              :          * Dump out the unwritten fragment, stopping at fragmentEnd.
     179              :          *
     180              :          * 3. The match extends to loopEnd.  We'll do nothing here, exit the
     181              :          * loop, and then dump the unwritten fragment, after merging it with
     182              :          * the invalid end region if any.  If we don't so merge, fragmentEnd
     183              :          * establishes how much the final writeFragment call needs to write.
     184              :          *
     185              :          * 4. We found an unmatched byte before loopEnd.  The loop will repeat
     186              :          * and will enter the unmatched-byte stanza above.  So in this case
     187              :          * also, it doesn't matter what fragmentEnd is.  The matched bytes
     188              :          * will get merged into the continuing unmatched fragment.
     189              :          *
     190              :          * Only in case 3 do we reach the bottom of the loop with a meaningful
     191              :          * fragmentEnd value, which is why it's OK that we unconditionally
     192              :          * assign "fragmentEnd = i" above.
     193              :          */
     194      1480728 :         if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
     195              :         {
     196       311927 :             writeFragment(pageData, fragmentBegin,
     197       311927 :                           fragmentEnd - fragmentBegin,
     198              :                           targetpage + fragmentBegin);
     199       311927 :             fragmentBegin = -1;
     200       311927 :             fragmentEnd = -1;   /* not really necessary */
     201              :         }
     202              :     }
     203              : 
     204              :     /* Deal with any invalid end region by including it in final fragment */
     205       210630 :     if (loopEnd < targetEnd)
     206              :     {
     207       103887 :         if (fragmentBegin < 0)
     208       103887 :             fragmentBegin = loopEnd;
     209       103887 :         fragmentEnd = targetEnd;
     210              :     }
     211              : 
     212              :     /* Write final fragment if any */
     213       210630 :     if (fragmentBegin >= 0)
     214              :     {
     215       104538 :         if (fragmentEnd < 0)
     216          428 :             fragmentEnd = targetEnd;
     217       104538 :         writeFragment(pageData, fragmentBegin,
     218       104538 :                       fragmentEnd - fragmentBegin,
     219              :                       targetpage + fragmentBegin);
     220              :     }
     221       210630 : }
     222              : 
     223              : /*
     224              :  * Compute the XLOG delta record needed to transform curpage into targetpage,
     225              :  * and store it in pageData's delta field.
     226              :  */
     227              : static void
     228       105315 : computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage)
     229              : {
     230       105315 :     int         targetLower = ((PageHeader) targetpage)->pd_lower,
     231       105315 :                 targetUpper = ((PageHeader) targetpage)->pd_upper,
     232       105315 :                 curLower = ((PageHeader) curpage)->pd_lower,
     233       105315 :                 curUpper = ((PageHeader) curpage)->pd_upper;
     234              : 
     235       105315 :     pageData->deltaLen = 0;
     236              : 
     237              :     /* Compute delta records for lower part of page ... */
     238       105315 :     computeRegionDelta(pageData, curpage, targetpage,
     239              :                        0, targetLower,
     240              :                        0, curLower);
     241              :     /* ... and for upper part, ignoring what's between */
     242       105315 :     computeRegionDelta(pageData, curpage, targetpage,
     243              :                        targetUpper, BLCKSZ,
     244              :                        curUpper, BLCKSZ);
     245              : 
     246              :     /*
     247              :      * If xlog debug is enabled, then check produced delta.  Result of delta
     248              :      * application to curpage should be equivalent to targetpage.
     249              :      */
     250              : #ifdef WAL_DEBUG
     251              :     if (XLOG_DEBUG)
     252              :     {
     253              :         PGAlignedBlock tmp;
     254              : 
     255              :         memcpy(tmp.data, curpage, BLCKSZ);
     256              :         applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
     257              :         if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
     258              :             memcmp(tmp.data + targetUpper, targetpage + targetUpper,
     259              :                    BLCKSZ - targetUpper) != 0)
     260              :             elog(ERROR, "result of generic xlog apply does not match");
     261              :     }
     262              : #endif
     263       105315 : }
     264              : 
     265              : /*
     266              :  * Start new generic xlog record for modifications to specified relation.
     267              :  */
     268              : GenericXLogState *
     269       105477 : GenericXLogStart(Relation relation)
     270              : {
     271              :     GenericXLogState *state;
     272              :     int         i;
     273              : 
     274       105477 :     state = (GenericXLogState *) palloc_aligned(sizeof(GenericXLogState),
     275              :                                                 PG_IO_ALIGN_SIZE,
     276              :                                                 0);
     277       105477 :     state->isLogged = RelationNeedsWAL(relation);
     278              : 
     279       527385 :     for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     280              :     {
     281       421908 :         state->pages[i].image = state->images[i].data;
     282       421908 :         state->pages[i].buffer = InvalidBuffer;
     283              :     }
     284              : 
     285       105477 :     return state;
     286              : }
     287              : 
     288              : /*
     289              :  * Register new buffer for generic xlog record.
     290              :  *
     291              :  * Returns pointer to the page's image in the GenericXLogState, which
     292              :  * is what the caller should modify.
     293              :  *
     294              :  * If the buffer is already registered, just return its existing entry.
     295              :  * (It's not very clear what to do with the flags in such a case, but
     296              :  * for now we stay with the original flags.)
     297              :  */
     298              : Page
     299       106235 : GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
     300              : {
     301              :     int         block_id;
     302              : 
     303              :     /* Search array for existing entry or first unused slot */
     304       106993 :     for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
     305              :     {
     306       106993 :         GenericXLogPageData *page = &state->pages[block_id];
     307              : 
     308       106993 :         if (BufferIsInvalid(page->buffer))
     309              :         {
     310              :             /* Empty slot, so use it (there cannot be a match later) */
     311       106235 :             page->buffer = buffer;
     312       106235 :             page->flags = flags;
     313       106235 :             memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
     314       106235 :             return (Page) page->image;
     315              :         }
     316          758 :         else if (page->buffer == buffer)
     317              :         {
     318              :             /*
     319              :              * Buffer is already registered.  Just return the image, which is
     320              :              * already prepared.
     321              :              */
     322            0 :             return (Page) page->image;
     323              :         }
     324              :     }
     325              : 
     326            0 :     elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
     327              :          MAX_GENERIC_XLOG_PAGES);
     328              :     /* keep compiler quiet */
     329              :     return NULL;
     330              : }
     331              : 
     332              : /*
     333              :  * Apply changes represented by GenericXLogState to the actual buffers,
     334              :  * and emit a generic xlog record.
     335              :  */
     336              : XLogRecPtr
     337       104712 : GenericXLogFinish(GenericXLogState *state)
     338              : {
     339              :     XLogRecPtr  lsn;
     340              :     int         i;
     341              : 
     342       104712 :     if (state->isLogged)
     343              :     {
     344              :         /* Logged relation: make xlog record in critical section. */
     345       104706 :         XLogBeginInsert();
     346              : 
     347       104706 :         START_CRIT_SECTION();
     348              : 
     349              :         /*
     350              :          * Compute deltas if necessary, write changes to buffers, mark buffers
     351              :          * dirty, and register changes.
     352              :          */
     353       523530 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     354              :         {
     355       418824 :             GenericXLogPageData *pageData = &state->pages[i];
     356              :             Page        page;
     357              :             PageHeader  pageHeader;
     358              : 
     359       418824 :             if (BufferIsInvalid(pageData->buffer))
     360       313360 :                 continue;
     361              : 
     362       105464 :             page = BufferGetPage(pageData->buffer);
     363       105464 :             pageHeader = (PageHeader) pageData->image;
     364              : 
     365              :             /*
     366              :              * Compute delta while we still have both the unmodified page and
     367              :              * the new image. Not needed if we are logging the full image.
     368              :              */
     369       105464 :             if (!(pageData->flags & GENERIC_XLOG_FULL_IMAGE))
     370       105315 :                 computeDelta(pageData, page, (Page) pageData->image);
     371              : 
     372              :             /*
     373              :              * Apply the image, being careful to zero the "hole" between
     374              :              * pd_lower and pd_upper in order to avoid divergence between
     375              :              * actual page state and what replay would produce.
     376              :              */
     377       105464 :             memcpy(page, pageData->image, pageHeader->pd_lower);
     378       105464 :             memset(page + pageHeader->pd_lower, 0,
     379       105464 :                    pageHeader->pd_upper - pageHeader->pd_lower);
     380       105464 :             memcpy(page + pageHeader->pd_upper,
     381       105464 :                    pageData->image + pageHeader->pd_upper,
     382       105464 :                    BLCKSZ - pageHeader->pd_upper);
     383              : 
     384       105464 :             MarkBufferDirty(pageData->buffer);
     385              : 
     386       105464 :             if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
     387              :             {
     388          149 :                 XLogRegisterBuffer(i, pageData->buffer,
     389              :                                    REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     390              :             }
     391              :             else
     392              :             {
     393       105315 :                 XLogRegisterBuffer(i, pageData->buffer, REGBUF_STANDARD);
     394       105315 :                 XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
     395              :             }
     396              :         }
     397              : 
     398              :         /* Insert xlog record */
     399       104706 :         lsn = XLogInsert(RM_GENERIC_ID, 0);
     400              : 
     401              :         /* Set LSN */
     402       523530 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     403              :         {
     404       418824 :             GenericXLogPageData *pageData = &state->pages[i];
     405              : 
     406       418824 :             if (BufferIsInvalid(pageData->buffer))
     407       313360 :                 continue;
     408       105464 :             PageSetLSN(BufferGetPage(pageData->buffer), lsn);
     409              :         }
     410       104706 :         END_CRIT_SECTION();
     411              :     }
     412              :     else
     413              :     {
     414              :         /* Unlogged relation: skip xlog-related stuff */
     415            6 :         START_CRIT_SECTION();
     416           30 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     417              :         {
     418           24 :             GenericXLogPageData *pageData = &state->pages[i];
     419              : 
     420           24 :             if (BufferIsInvalid(pageData->buffer))
     421           18 :                 continue;
     422           12 :             memcpy(BufferGetPage(pageData->buffer),
     423            6 :                    pageData->image,
     424              :                    BLCKSZ);
     425              :             /* We don't worry about zeroing the "hole" in this case */
     426            6 :             MarkBufferDirty(pageData->buffer);
     427              :         }
     428            6 :         END_CRIT_SECTION();
     429              :         /* We don't have a LSN to return, in this case */
     430            6 :         lsn = InvalidXLogRecPtr;
     431              :     }
     432              : 
     433       104712 :     pfree(state);
     434              : 
     435       104712 :     return lsn;
     436              : }
     437              : 
     438              : /*
     439              :  * Abort generic xlog record construction.  No changes are applied to buffers.
     440              :  *
     441              :  * Note: caller is responsible for releasing locks/pins on buffers, if needed.
     442              :  */
     443              : void
     444          765 : GenericXLogAbort(GenericXLogState *state)
     445              : {
     446          765 :     pfree(state);
     447          765 : }
     448              : 
     449              : /*
     450              :  * Apply delta to given page image.
     451              :  */
     452              : static void
     453            0 : applyPageRedo(Page page, const char *delta, Size deltaSize)
     454              : {
     455            0 :     const char *ptr = delta;
     456            0 :     const char *end = delta + deltaSize;
     457              : 
     458            0 :     while (ptr < end)
     459              :     {
     460              :         OffsetNumber offset,
     461              :                     length;
     462              : 
     463            0 :         memcpy(&offset, ptr, sizeof(offset));
     464            0 :         ptr += sizeof(offset);
     465            0 :         memcpy(&length, ptr, sizeof(length));
     466            0 :         ptr += sizeof(length);
     467              : 
     468            0 :         memcpy(page + offset, ptr, length);
     469              : 
     470            0 :         ptr += length;
     471              :     }
     472            0 : }
     473              : 
     474              : /*
     475              :  * Redo function for generic xlog record.
     476              :  */
     477              : void
     478            0 : generic_redo(XLogReaderState *record)
     479              : {
     480            0 :     XLogRecPtr  lsn = record->EndRecPtr;
     481              :     Buffer      buffers[MAX_GENERIC_XLOG_PAGES];
     482              :     uint8       block_id;
     483              : 
     484              :     /* Protect limited size of buffers[] array */
     485              :     Assert(XLogRecMaxBlockId(record) < MAX_GENERIC_XLOG_PAGES);
     486              : 
     487              :     /* Iterate over blocks */
     488            0 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     489              :     {
     490              :         XLogRedoAction action;
     491              : 
     492            0 :         if (!XLogRecHasBlockRef(record, block_id))
     493              :         {
     494            0 :             buffers[block_id] = InvalidBuffer;
     495            0 :             continue;
     496              :         }
     497              : 
     498            0 :         action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
     499              : 
     500              :         /* Apply redo to given block if needed */
     501            0 :         if (action == BLK_NEEDS_REDO)
     502              :         {
     503              :             Page        page;
     504              :             PageHeader  pageHeader;
     505              :             char       *blockDelta;
     506              :             Size        blockDeltaSize;
     507              : 
     508            0 :             page = BufferGetPage(buffers[block_id]);
     509            0 :             blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
     510            0 :             applyPageRedo(page, blockDelta, blockDeltaSize);
     511              : 
     512              :             /*
     513              :              * Since the delta contains no information about what's in the
     514              :              * "hole" between pd_lower and pd_upper, set that to zero to
     515              :              * ensure we produce the same page state that application of the
     516              :              * logged action by GenericXLogFinish did.
     517              :              */
     518            0 :             pageHeader = (PageHeader) page;
     519            0 :             memset(page + pageHeader->pd_lower, 0,
     520            0 :                    pageHeader->pd_upper - pageHeader->pd_lower);
     521              : 
     522            0 :             PageSetLSN(page, lsn);
     523            0 :             MarkBufferDirty(buffers[block_id]);
     524              :         }
     525              :     }
     526              : 
     527              :     /* Changes are done: unlock and release all buffers */
     528            0 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     529              :     {
     530            0 :         if (BufferIsValid(buffers[block_id]))
     531            0 :             UnlockReleaseBuffer(buffers[block_id]);
     532              :     }
     533            0 : }
     534              : 
     535              : /*
     536              :  * Mask a generic page before performing consistency checks on it.
     537              :  */
     538              : void
     539            0 : generic_mask(char *page, BlockNumber blkno)
     540              : {
     541            0 :     mask_page_lsn_and_checksum(page);
     542              : 
     543            0 :     mask_unused_space(page);
     544            0 : }
        

Generated by: LCOV version 2.0-1