LCOV - code coverage report
Current view: top level - src/backend/access/transam - generic_xlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 115 154 74.7 %
Date: 2025-01-18 05:15:39 Functions: 7 10 70.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * generic_xlog.c
       4             :  *   Implementation of generic xlog records.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * src/backend/access/transam/generic_xlog.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/bufmask.h"
      17             : #include "access/generic_xlog.h"
      18             : #include "access/xlogutils.h"
      19             : #include "miscadmin.h"
      20             : 
      21             : /*-------------------------------------------------------------------------
      22             :  * Internally, a delta between pages consists of a set of fragments.  Each
      23             :  * fragment represents changes made in a given region of a page.  A fragment
      24             :  * is made up as follows:
      25             :  *
      26             :  * - offset of page region (OffsetNumber)
      27             :  * - length of page region (OffsetNumber)
      28             :  * - data - the data to place into the region ('length' number of bytes)
      29             :  *
      30             :  * Unchanged regions of a page are not represented in its delta.  As a result,
      31             :  * a delta can be more compact than the full page image.  But having an
      32             :  * unchanged region between two fragments that is smaller than the fragment
      33             :  * header (offset+length) does not pay off in terms of the overall size of
      34             :  * the delta.  For this reason, we merge adjacent fragments if the unchanged
      35             :  * region between them is <= MATCH_THRESHOLD bytes.
      36             :  *
      37             :  * We do not bother to merge fragments across the "lower" and "upper" parts
      38             :  * of a page; it's very seldom the case that pd_lower and pd_upper are within
      39             :  * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
      40             :  * would complicate and slow down the delta-computation code unduly.
      41             :  * Therefore, the worst-case delta size includes two fragment headers plus
      42             :  * a full page's worth of data.
      43             :  *-------------------------------------------------------------------------
      44             :  */
      45             : #define FRAGMENT_HEADER_SIZE    (2 * sizeof(OffsetNumber))
      46             : #define MATCH_THRESHOLD         FRAGMENT_HEADER_SIZE
      47             : #define MAX_DELTA_SIZE          (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
      48             : 
      49             : /* Struct of generic xlog data for single page */
      50             : typedef struct
      51             : {
      52             :     Buffer      buffer;         /* registered buffer */
      53             :     int         flags;          /* flags for this buffer */
      54             :     int         deltaLen;       /* space consumed in delta field */
      55             :     char       *image;          /* copy of page image for modification, do not
      56             :                                  * do it in-place to have aligned memory chunk */
      57             :     char        delta[MAX_DELTA_SIZE];  /* delta between page images */
      58             : } GenericXLogPageData;
      59             : 
      60             : /*
      61             :  * State of generic xlog record construction.  Must be allocated at an I/O
      62             :  * aligned address.
      63             :  */
      64             : struct GenericXLogState
      65             : {
      66             :     /* Page images (properly aligned, must be first) */
      67             :     PGIOAlignedBlock images[MAX_GENERIC_XLOG_PAGES];
      68             :     /* Info about each page, see above */
      69             :     GenericXLogPageData pages[MAX_GENERIC_XLOG_PAGES];
      70             :     bool        isLogged;
      71             : };
      72             : 
      73             : static void writeFragment(GenericXLogPageData *pageData, OffsetNumber offset,
      74             :                           OffsetNumber length, const char *data);
      75             : static void computeRegionDelta(GenericXLogPageData *pageData,
      76             :                                const char *curpage, const char *targetpage,
      77             :                                int targetStart, int targetEnd,
      78             :                                int validStart, int validEnd);
      79             : static void computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage);
      80             : static void applyPageRedo(Page page, const char *delta, Size deltaSize);
      81             : 
      82             : 
      83             : /*
      84             :  * Write next fragment into pageData's delta.
      85             :  *
      86             :  * The fragment has the given offset and length, and data points to the
      87             :  * actual data (of length length).
      88             :  */
      89             : static void
      90      832930 : writeFragment(GenericXLogPageData *pageData, OffsetNumber offset, OffsetNumber length,
      91             :               const char *data)
      92             : {
      93      832930 :     char       *ptr = pageData->delta + pageData->deltaLen;
      94             : 
      95             :     /* Verify we have enough space */
      96             :     Assert(pageData->deltaLen + sizeof(offset) +
      97             :            sizeof(length) + length <= sizeof(pageData->delta));
      98             : 
      99             :     /* Write fragment data */
     100      832930 :     memcpy(ptr, &offset, sizeof(offset));
     101      832930 :     ptr += sizeof(offset);
     102      832930 :     memcpy(ptr, &length, sizeof(length));
     103      832930 :     ptr += sizeof(length);
     104      832930 :     memcpy(ptr, data, length);
     105      832930 :     ptr += length;
     106             : 
     107      832930 :     pageData->deltaLen = ptr - pageData->delta;
     108      832930 : }
     109             : 
     110             : /*
     111             :  * Compute the XLOG fragments needed to transform a region of curpage into the
     112             :  * corresponding region of targetpage, and append them to pageData's delta
     113             :  * field.  The region to transform runs from targetStart to targetEnd-1.
     114             :  * Bytes in curpage outside the range validStart to validEnd-1 should be
     115             :  * considered invalid, and always overwritten with target data.
     116             :  *
     117             :  * This function is a hot spot, so it's worth being as tense as possible
     118             :  * about the data-matching loops.
     119             :  */
     120             : static void
     121      421260 : computeRegionDelta(GenericXLogPageData *pageData,
     122             :                    const char *curpage, const char *targetpage,
     123             :                    int targetStart, int targetEnd,
     124             :                    int validStart, int validEnd)
     125             : {
     126             :     int         i,
     127             :                 loopEnd,
     128      421260 :                 fragmentBegin = -1,
     129      421260 :                 fragmentEnd = -1;
     130             : 
     131             :     /* Deal with any invalid start region by including it in first fragment */
     132      421260 :     if (validStart > targetStart)
     133             :     {
     134           0 :         fragmentBegin = targetStart;
     135           0 :         targetStart = validStart;
     136             :     }
     137             : 
     138             :     /* We'll deal with any invalid end region after the main loop */
     139      421260 :     loopEnd = Min(targetEnd, validEnd);
     140             : 
     141             :     /* Examine all the potentially matchable bytes */
     142      421260 :     i = targetStart;
     143     3382716 :     while (i < loopEnd)
     144             :     {
     145     2962312 :         if (curpage[i] != targetpage[i])
     146             :         {
     147             :             /* On unmatched byte, start new fragment if not already in one */
     148     2750144 :             if (fragmentBegin < 0)
     149      625156 :                 fragmentBegin = i;
     150             :             /* Mark unmatched-data endpoint as uncertain */
     151     2750144 :             fragmentEnd = -1;
     152             :             /* Extend the fragment as far as possible in a tight loop */
     153     2750144 :             i++;
     154     4484838 :             while (i < loopEnd && curpage[i] != targetpage[i])
     155     1734694 :                 i++;
     156     2750144 :             if (i >= loopEnd)
     157         856 :                 break;
     158             :         }
     159             : 
     160             :         /* Found a matched byte, so remember end of unmatched fragment */
     161     2961456 :         fragmentEnd = i;
     162             : 
     163             :         /*
     164             :          * Extend the match as far as possible in a tight loop.  (On typical
     165             :          * workloads, this inner loop is the bulk of this function's runtime.)
     166             :          */
     167     2961456 :         i++;
     168  1185630266 :         while (i < loopEnd && curpage[i] == targetpage[i])
     169  1182668810 :             i++;
     170             : 
     171             :         /*
     172             :          * There are several possible cases at this point:
     173             :          *
     174             :          * 1. We have no unwritten fragment (fragmentBegin < 0).  There's
     175             :          * nothing to write; and it doesn't matter what fragmentEnd is.
     176             :          *
     177             :          * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
     178             :          * Dump out the unwritten fragment, stopping at fragmentEnd.
     179             :          *
     180             :          * 3. The match extends to loopEnd.  We'll do nothing here, exit the
     181             :          * loop, and then dump the unwritten fragment, after merging it with
     182             :          * the invalid end region if any.  If we don't so merge, fragmentEnd
     183             :          * establishes how much the final writeFragment call needs to write.
     184             :          *
     185             :          * 4. We found an unmatched byte before loopEnd.  The loop will repeat
     186             :          * and will enter the unmatched-byte stanza above.  So in this case
     187             :          * also, it doesn't matter what fragmentEnd is.  The matched bytes
     188             :          * will get merged into the continuing unmatched fragment.
     189             :          *
     190             :          * Only in case 3 do we reach the bottom of the loop with a meaningful
     191             :          * fragmentEnd value, which is why it's OK that we unconditionally
     192             :          * assign "fragmentEnd = i" above.
     193             :          */
     194     2961456 :         if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
     195             :         {
     196      623854 :             writeFragment(pageData, fragmentBegin,
     197      623854 :                           fragmentEnd - fragmentBegin,
     198             :                           targetpage + fragmentBegin);
     199      623854 :             fragmentBegin = -1;
     200      623854 :             fragmentEnd = -1;   /* not really necessary */
     201             :         }
     202             :     }
     203             : 
     204             :     /* Deal with any invalid end region by including it in final fragment */
     205      421260 :     if (loopEnd < targetEnd)
     206             :     {
     207      207774 :         if (fragmentBegin < 0)
     208      207774 :             fragmentBegin = loopEnd;
     209      207774 :         fragmentEnd = targetEnd;
     210             :     }
     211             : 
     212             :     /* Write final fragment if any */
     213      421260 :     if (fragmentBegin >= 0)
     214             :     {
     215      209076 :         if (fragmentEnd < 0)
     216         856 :             fragmentEnd = targetEnd;
     217      209076 :         writeFragment(pageData, fragmentBegin,
     218      209076 :                       fragmentEnd - fragmentBegin,
     219             :                       targetpage + fragmentBegin);
     220             :     }
     221      421260 : }
     222             : 
     223             : /*
     224             :  * Compute the XLOG delta record needed to transform curpage into targetpage,
     225             :  * and store it in pageData's delta field.
     226             :  */
     227             : static void
     228      210630 : computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage)
     229             : {
     230      210630 :     int         targetLower = ((PageHeader) targetpage)->pd_lower,
     231      210630 :                 targetUpper = ((PageHeader) targetpage)->pd_upper,
     232      210630 :                 curLower = ((PageHeader) curpage)->pd_lower,
     233      210630 :                 curUpper = ((PageHeader) curpage)->pd_upper;
     234             : 
     235      210630 :     pageData->deltaLen = 0;
     236             : 
     237             :     /* Compute delta records for lower part of page ... */
     238      210630 :     computeRegionDelta(pageData, curpage, targetpage,
     239             :                        0, targetLower,
     240             :                        0, curLower);
     241             :     /* ... and for upper part, ignoring what's between */
     242      210630 :     computeRegionDelta(pageData, curpage, targetpage,
     243             :                        targetUpper, BLCKSZ,
     244             :                        curUpper, BLCKSZ);
     245             : 
     246             :     /*
     247             :      * If xlog debug is enabled, then check produced delta.  Result of delta
     248             :      * application to curpage should be equivalent to targetpage.
     249             :      */
     250             : #ifdef WAL_DEBUG
     251             :     if (XLOG_DEBUG)
     252             :     {
     253             :         PGAlignedBlock tmp;
     254             : 
     255             :         memcpy(tmp.data, curpage, BLCKSZ);
     256             :         applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
     257             :         if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
     258             :             memcmp(tmp.data + targetUpper, targetpage + targetUpper,
     259             :                    BLCKSZ - targetUpper) != 0)
     260             :             elog(ERROR, "result of generic xlog apply does not match");
     261             :     }
     262             : #endif
     263      210630 : }
     264             : 
     265             : /*
     266             :  * Start new generic xlog record for modifications to specified relation.
     267             :  */
     268             : GenericXLogState *
     269      210954 : GenericXLogStart(Relation relation)
     270             : {
     271             :     GenericXLogState *state;
     272             :     int         i;
     273             : 
     274      210954 :     state = (GenericXLogState *) palloc_aligned(sizeof(GenericXLogState),
     275             :                                                 PG_IO_ALIGN_SIZE,
     276             :                                                 0);
     277      210954 :     state->isLogged = RelationNeedsWAL(relation);
     278             : 
     279     1054770 :     for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     280             :     {
     281      843816 :         state->pages[i].image = state->images[i].data;
     282      843816 :         state->pages[i].buffer = InvalidBuffer;
     283             :     }
     284             : 
     285      210954 :     return state;
     286             : }
     287             : 
     288             : /*
     289             :  * Register new buffer for generic xlog record.
     290             :  *
     291             :  * Returns pointer to the page's image in the GenericXLogState, which
     292             :  * is what the caller should modify.
     293             :  *
     294             :  * If the buffer is already registered, just return its existing entry.
     295             :  * (It's not very clear what to do with the flags in such a case, but
     296             :  * for now we stay with the original flags.)
     297             :  */
     298             : Page
     299      212470 : GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
     300             : {
     301             :     int         block_id;
     302             : 
     303             :     /* Search array for existing entry or first unused slot */
     304      213986 :     for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
     305             :     {
     306      213986 :         GenericXLogPageData *page = &state->pages[block_id];
     307             : 
     308      213986 :         if (BufferIsInvalid(page->buffer))
     309             :         {
     310             :             /* Empty slot, so use it (there cannot be a match later) */
     311      212470 :             page->buffer = buffer;
     312      212470 :             page->flags = flags;
     313      212470 :             memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
     314      212470 :             return (Page) page->image;
     315             :         }
     316        1516 :         else if (page->buffer == buffer)
     317             :         {
     318             :             /*
     319             :              * Buffer is already registered.  Just return the image, which is
     320             :              * already prepared.
     321             :              */
     322           0 :             return (Page) page->image;
     323             :         }
     324             :     }
     325             : 
     326           0 :     elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
     327             :          MAX_GENERIC_XLOG_PAGES);
     328             :     /* keep compiler quiet */
     329             :     return NULL;
     330             : }
     331             : 
     332             : /*
     333             :  * Apply changes represented by GenericXLogState to the actual buffers,
     334             :  * and emit a generic xlog record.
     335             :  */
     336             : XLogRecPtr
     337      209424 : GenericXLogFinish(GenericXLogState *state)
     338             : {
     339             :     XLogRecPtr  lsn;
     340             :     int         i;
     341             : 
     342      209424 :     if (state->isLogged)
     343             :     {
     344             :         /* Logged relation: make xlog record in critical section. */
     345      209412 :         XLogBeginInsert();
     346             : 
     347      209412 :         START_CRIT_SECTION();
     348             : 
     349             :         /*
     350             :          * Compute deltas if necessary, write changes to buffers, mark buffers
     351             :          * dirty, and register changes.
     352             :          */
     353     1047060 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     354             :         {
     355      837648 :             GenericXLogPageData *pageData = &state->pages[i];
     356             :             Page        page;
     357             :             PageHeader  pageHeader;
     358             : 
     359      837648 :             if (BufferIsInvalid(pageData->buffer))
     360      626720 :                 continue;
     361             : 
     362      210928 :             page = BufferGetPage(pageData->buffer);
     363      210928 :             pageHeader = (PageHeader) pageData->image;
     364             : 
     365             :             /*
     366             :              * Compute delta while we still have both the unmodified page and
     367             :              * the new image. Not needed if we are logging the full image.
     368             :              */
     369      210928 :             if (!(pageData->flags & GENERIC_XLOG_FULL_IMAGE))
     370      210630 :                 computeDelta(pageData, page, (Page) pageData->image);
     371             : 
     372             :             /*
     373             :              * Apply the image, being careful to zero the "hole" between
     374             :              * pd_lower and pd_upper in order to avoid divergence between
     375             :              * actual page state and what replay would produce.
     376             :              */
     377      210928 :             memcpy(page, pageData->image, pageHeader->pd_lower);
     378      210928 :             memset(page + pageHeader->pd_lower, 0,
     379      210928 :                    pageHeader->pd_upper - pageHeader->pd_lower);
     380      210928 :             memcpy(page + pageHeader->pd_upper,
     381      210928 :                    pageData->image + pageHeader->pd_upper,
     382      210928 :                    BLCKSZ - pageHeader->pd_upper);
     383             : 
     384      210928 :             MarkBufferDirty(pageData->buffer);
     385             : 
     386      210928 :             if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
     387             :             {
     388         298 :                 XLogRegisterBuffer(i, pageData->buffer,
     389             :                                    REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     390             :             }
     391             :             else
     392             :             {
     393      210630 :                 XLogRegisterBuffer(i, pageData->buffer, REGBUF_STANDARD);
     394      210630 :                 XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
     395             :             }
     396             :         }
     397             : 
     398             :         /* Insert xlog record */
     399      209412 :         lsn = XLogInsert(RM_GENERIC_ID, 0);
     400             : 
     401             :         /* Set LSN */
     402     1047060 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     403             :         {
     404      837648 :             GenericXLogPageData *pageData = &state->pages[i];
     405             : 
     406      837648 :             if (BufferIsInvalid(pageData->buffer))
     407      626720 :                 continue;
     408      210928 :             PageSetLSN(BufferGetPage(pageData->buffer), lsn);
     409             :         }
     410      209412 :         END_CRIT_SECTION();
     411             :     }
     412             :     else
     413             :     {
     414             :         /* Unlogged relation: skip xlog-related stuff */
     415          12 :         START_CRIT_SECTION();
     416          60 :         for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
     417             :         {
     418          48 :             GenericXLogPageData *pageData = &state->pages[i];
     419             : 
     420          48 :             if (BufferIsInvalid(pageData->buffer))
     421          36 :                 continue;
     422          24 :             memcpy(BufferGetPage(pageData->buffer),
     423          12 :                    pageData->image,
     424             :                    BLCKSZ);
     425             :             /* We don't worry about zeroing the "hole" in this case */
     426          12 :             MarkBufferDirty(pageData->buffer);
     427             :         }
     428          12 :         END_CRIT_SECTION();
     429             :         /* We don't have a LSN to return, in this case */
     430          12 :         lsn = InvalidXLogRecPtr;
     431             :     }
     432             : 
     433      209424 :     pfree(state);
     434             : 
     435      209424 :     return lsn;
     436             : }
     437             : 
     438             : /*
     439             :  * Abort generic xlog record construction.  No changes are applied to buffers.
     440             :  *
     441             :  * Note: caller is responsible for releasing locks/pins on buffers, if needed.
     442             :  */
     443             : void
     444        1530 : GenericXLogAbort(GenericXLogState *state)
     445             : {
     446        1530 :     pfree(state);
     447        1530 : }
     448             : 
     449             : /*
     450             :  * Apply delta to given page image.
     451             :  */
     452             : static void
     453           0 : applyPageRedo(Page page, const char *delta, Size deltaSize)
     454             : {
     455           0 :     const char *ptr = delta;
     456           0 :     const char *end = delta + deltaSize;
     457             : 
     458           0 :     while (ptr < end)
     459             :     {
     460             :         OffsetNumber offset,
     461             :                     length;
     462             : 
     463           0 :         memcpy(&offset, ptr, sizeof(offset));
     464           0 :         ptr += sizeof(offset);
     465           0 :         memcpy(&length, ptr, sizeof(length));
     466           0 :         ptr += sizeof(length);
     467             : 
     468           0 :         memcpy(page + offset, ptr, length);
     469             : 
     470           0 :         ptr += length;
     471             :     }
     472           0 : }
     473             : 
     474             : /*
     475             :  * Redo function for generic xlog record.
     476             :  */
     477             : void
     478           0 : generic_redo(XLogReaderState *record)
     479             : {
     480           0 :     XLogRecPtr  lsn = record->EndRecPtr;
     481             :     Buffer      buffers[MAX_GENERIC_XLOG_PAGES];
     482             :     uint8       block_id;
     483             : 
     484             :     /* Protect limited size of buffers[] array */
     485             :     Assert(XLogRecMaxBlockId(record) < MAX_GENERIC_XLOG_PAGES);
     486             : 
     487             :     /* Iterate over blocks */
     488           0 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     489             :     {
     490             :         XLogRedoAction action;
     491             : 
     492           0 :         if (!XLogRecHasBlockRef(record, block_id))
     493             :         {
     494           0 :             buffers[block_id] = InvalidBuffer;
     495           0 :             continue;
     496             :         }
     497             : 
     498           0 :         action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
     499             : 
     500             :         /* Apply redo to given block if needed */
     501           0 :         if (action == BLK_NEEDS_REDO)
     502             :         {
     503             :             Page        page;
     504             :             PageHeader  pageHeader;
     505             :             char       *blockDelta;
     506             :             Size        blockDeltaSize;
     507             : 
     508           0 :             page = BufferGetPage(buffers[block_id]);
     509           0 :             blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
     510           0 :             applyPageRedo(page, blockDelta, blockDeltaSize);
     511             : 
     512             :             /*
     513             :              * Since the delta contains no information about what's in the
     514             :              * "hole" between pd_lower and pd_upper, set that to zero to
     515             :              * ensure we produce the same page state that application of the
     516             :              * logged action by GenericXLogFinish did.
     517             :              */
     518           0 :             pageHeader = (PageHeader) page;
     519           0 :             memset(page + pageHeader->pd_lower, 0,
     520           0 :                    pageHeader->pd_upper - pageHeader->pd_lower);
     521             : 
     522           0 :             PageSetLSN(page, lsn);
     523           0 :             MarkBufferDirty(buffers[block_id]);
     524             :         }
     525             :     }
     526             : 
     527             :     /* Changes are done: unlock and release all buffers */
     528           0 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     529             :     {
     530           0 :         if (BufferIsValid(buffers[block_id]))
     531           0 :             UnlockReleaseBuffer(buffers[block_id]);
     532             :     }
     533           0 : }
     534             : 
     535             : /*
     536             :  * Mask a generic page before performing consistency checks on it.
     537             :  */
     538             : void
     539           0 : generic_mask(char *page, BlockNumber blkno)
     540             : {
     541           0 :     mask_page_lsn_and_checksum(page);
     542             : 
     543           0 :     mask_unused_space(page);
     544           0 : }

Generated by: LCOV version 1.14