LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_xlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 504 570 88.4 %
Date: 2025-12-16 08:17:58 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heapam_xlog.c
       4             :  *    WAL replay logic for heap access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/heapam_xlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/heapam.h"
      19             : #include "access/visibilitymap.h"
      20             : #include "access/xlog.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/freespace.h"
      23             : #include "storage/standby.h"
      24             : 
      25             : 
      26             : /*
      27             :  * Replay XLOG_HEAP2_PRUNE_* records.
      28             :  */
      29             : static void
      30       30618 : heap_xlog_prune_freeze(XLogReaderState *record)
      31             : {
      32       30618 :     XLogRecPtr  lsn = record->EndRecPtr;
      33       30618 :     char       *maindataptr = XLogRecGetData(record);
      34             :     xl_heap_prune xlrec;
      35             :     Buffer      buffer;
      36             :     RelFileLocator rlocator;
      37             :     BlockNumber blkno;
      38       30618 :     Buffer      vmbuffer = InvalidBuffer;
      39       30618 :     uint8       vmflags = 0;
      40       30618 :     Size        freespace = 0;
      41             : 
      42       30618 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
      43       30618 :     memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
      44       30618 :     maindataptr += SizeOfHeapPrune;
      45             : 
      46             :     /*
      47             :      * We will take an ordinary exclusive lock or a cleanup lock depending on
      48             :      * whether the XLHP_CLEANUP_LOCK flag is set.  With an ordinary exclusive
      49             :      * lock, we better not be doing anything that requires moving existing
      50             :      * tuple data.
      51             :      */
      52             :     Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
      53             :            (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
      54             : 
      55       30618 :     if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
      56             :     {
      57        7648 :         vmflags = VISIBILITYMAP_ALL_VISIBLE;
      58        7648 :         if (xlrec.flags & XLHP_VM_ALL_FROZEN)
      59        6372 :             vmflags |= VISIBILITYMAP_ALL_FROZEN;
      60             :     }
      61             : 
      62             :     /*
      63             :      * After xl_heap_prune is the optional snapshot conflict horizon.
      64             :      *
      65             :      * In Hot Standby mode, we must ensure that there are no running queries
      66             :      * which would conflict with the changes in this record. That means we
      67             :      * can't replay this record if it removes tuples that are still visible to
      68             :      * transactions on the standby, freeze tuples with xids that are still
      69             :      * considered running on the standby, or set a page as all-visible in the
      70             :      * VM if it isn't all-visible to all transactions on the standby.
      71             :      */
      72       30618 :     if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
      73             :     {
      74             :         TransactionId snapshot_conflict_horizon;
      75             : 
      76             :         /* memcpy() because snapshot_conflict_horizon is stored unaligned */
      77       22346 :         memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
      78       22346 :         maindataptr += sizeof(TransactionId);
      79             : 
      80       22346 :         if (InHotStandby)
      81       21882 :             ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
      82       21882 :                                                 (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
      83             :                                                 rlocator);
      84             :     }
      85             : 
      86             :     /*
      87             :      * If we have a full-page image of the heap block, restore it and we're
      88             :      * done with the heap block.
      89             :      */
      90       30618 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
      91       30618 :                                       (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
      92             :                                       &buffer) == BLK_NEEDS_REDO)
      93             :     {
      94       21608 :         Page        page = BufferGetPage(buffer);
      95             :         OffsetNumber *redirected;
      96             :         OffsetNumber *nowdead;
      97             :         OffsetNumber *nowunused;
      98             :         int         nredirected;
      99             :         int         ndead;
     100             :         int         nunused;
     101             :         int         nplans;
     102             :         Size        datalen;
     103             :         xlhp_freeze_plan *plans;
     104             :         OffsetNumber *frz_offsets;
     105       21608 :         char       *dataptr = XLogRecGetBlockData(record, 0, &datalen);
     106             :         bool        do_prune;
     107             : 
     108       21608 :         heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
     109             :                                                &nplans, &plans, &frz_offsets,
     110             :                                                &nredirected, &redirected,
     111             :                                                &ndead, &nowdead,
     112             :                                                &nunused, &nowunused);
     113             : 
     114       21608 :         do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
     115             : 
     116             :         /* Ensure the record does something */
     117             :         Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
     118             : 
     119             :         /*
     120             :          * Update all line pointers per the record, and repair fragmentation
     121             :          * if needed.
     122             :          */
     123       21608 :         if (do_prune)
     124       20514 :             heap_page_prune_execute(buffer,
     125       20514 :                                     (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
     126             :                                     redirected, nredirected,
     127             :                                     nowdead, ndead,
     128             :                                     nowunused, nunused);
     129             : 
     130             :         /* Freeze tuples */
     131       23742 :         for (int p = 0; p < nplans; p++)
     132             :         {
     133             :             HeapTupleFreeze frz;
     134             : 
     135             :             /*
     136             :              * Convert freeze plan representation from WAL record into
     137             :              * per-tuple format used by heap_execute_freeze_tuple
     138             :              */
     139        2134 :             frz.xmax = plans[p].xmax;
     140        2134 :             frz.t_infomask2 = plans[p].t_infomask2;
     141        2134 :             frz.t_infomask = plans[p].t_infomask;
     142        2134 :             frz.frzflags = plans[p].frzflags;
     143        2134 :             frz.offset = InvalidOffsetNumber;   /* unused, but be tidy */
     144             : 
     145       98342 :             for (int i = 0; i < plans[p].ntuples; i++)
     146             :             {
     147       96208 :                 OffsetNumber offset = *(frz_offsets++);
     148             :                 ItemId      lp;
     149             :                 HeapTupleHeader tuple;
     150             : 
     151       96208 :                 lp = PageGetItemId(page, offset);
     152       96208 :                 tuple = (HeapTupleHeader) PageGetItem(page, lp);
     153       96208 :                 heap_execute_freeze_tuple(tuple, &frz);
     154             :             }
     155             :         }
     156             : 
     157             :         /* There should be no more data */
     158             :         Assert((char *) frz_offsets == dataptr + datalen);
     159             : 
     160             :         /*
     161             :          * The critical integrity requirement here is that we must never end
     162             :          * up with the visibility map bit set and the page-level
     163             :          * PD_ALL_VISIBLE bit unset.  If that were to occur, a subsequent page
     164             :          * modification would fail to clear the visibility map bit.
     165             :          */
     166       21608 :         if (vmflags & VISIBILITYMAP_VALID_BITS)
     167        5070 :             PageSetAllVisible(page);
     168             : 
     169       21608 :         MarkBufferDirty(buffer);
     170             : 
     171             :         /*
     172             :          * See log_heap_prune_and_freeze() for commentary on when we set the
     173             :          * heap page LSN.
     174             :          */
     175       21608 :         if (do_prune || nplans > 0 ||
     176           0 :             ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
     177       21608 :             PageSetLSN(page, lsn);
     178             : 
     179             :         /*
     180             :          * Note: we don't worry about updating the page's prunability hints.
     181             :          * At worst this will cause an extra prune cycle to occur soon.
     182             :          */
     183             :     }
     184             : 
     185             :     /*
     186             :      * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
     187             :      * or the VM, update the freespace map.
     188             :      *
     189             :      * Even when no actual space is freed (when only marking the page
     190             :      * all-visible or frozen), we still update the FSM. Because the FSM is
     191             :      * unlogged and maintained heuristically, it often becomes stale on
     192             :      * standbys. If such a standby is later promoted and runs VACUUM, it will
     193             :      * skip recalculating free space for pages that were marked
     194             :      * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
     195             :      * optimistic free space values upward, causing future insertions to
     196             :      * select pages that turn out to be unusable. In bulk, this can lead to
     197             :      * long stalls.
     198             :      *
     199             :      * To prevent this, always update the FSM even when only marking a page
     200             :      * all-visible/all-frozen.
     201             :      *
     202             :      * Do this regardless of whether a full-page image is logged, since FSM
     203             :      * data is not part of the page itself.
     204             :      */
     205       30618 :     if (BufferIsValid(buffer))
     206             :     {
     207       30618 :         if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
     208             :                             XLHP_HAS_DEAD_ITEMS |
     209        3644 :                             XLHP_HAS_NOW_UNUSED_ITEMS)) ||
     210        3644 :             (vmflags & VISIBILITYMAP_VALID_BITS))
     211       26974 :             freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
     212             : 
     213             :         /*
     214             :          * We want to avoid holding an exclusive lock on the heap buffer while
     215             :          * doing IO (either of the FSM or the VM), so we'll release it now.
     216             :          */
     217       30618 :         UnlockReleaseBuffer(buffer);
     218             :     }
     219             : 
     220             :     /*
     221             :      * Now read and update the VM block.
     222             :      *
     223             :      * We must redo changes to the VM even if the heap page was skipped due to
     224             :      * LSN interlock. See comment in heap_xlog_multi_insert() for more details
     225             :      * on replaying changes to the VM.
     226             :      */
     227       38266 :     if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
     228        7648 :         XLogReadBufferForRedoExtended(record, 1,
     229             :                                       RBM_ZERO_ON_ERROR,
     230             :                                       false,
     231             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     232             :     {
     233        7482 :         Page        vmpage = BufferGetPage(vmbuffer);
     234             : 
     235             :         /* initialize the page if it was read as zeros */
     236        7482 :         if (PageIsNew(vmpage))
     237           0 :             PageInit(vmpage, BLCKSZ, 0);
     238             : 
     239        7482 :         visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
     240             : 
     241             :         Assert(BufferIsDirty(vmbuffer));
     242        7482 :         PageSetLSN(vmpage, lsn);
     243             :     }
     244             : 
     245       30618 :     if (BufferIsValid(vmbuffer))
     246        7648 :         UnlockReleaseBuffer(vmbuffer);
     247             : 
     248       30618 :     if (freespace > 0)
     249       26796 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     250       30618 : }
     251             : 
     252             : /*
     253             :  * Replay XLOG_HEAP2_VISIBLE records.
     254             :  *
     255             :  * The critical integrity requirement here is that we must never end up with
     256             :  * a situation where the visibility map bit is set, and the page-level
     257             :  * PD_ALL_VISIBLE bit is clear.  If that were to occur, then a subsequent
     258             :  * page modification would fail to clear the visibility map bit.
     259             :  */
     260             : static void
     261       10442 : heap_xlog_visible(XLogReaderState *record)
     262             : {
     263       10442 :     XLogRecPtr  lsn = record->EndRecPtr;
     264       10442 :     xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
     265       10442 :     Buffer      vmbuffer = InvalidBuffer;
     266             :     Buffer      buffer;
     267             :     Page        page;
     268             :     RelFileLocator rlocator;
     269             :     BlockNumber blkno;
     270             :     XLogRedoAction action;
     271             : 
     272             :     Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
     273             : 
     274       10442 :     XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
     275             : 
     276             :     /*
     277             :      * If there are any Hot Standby transactions running that have an xmin
     278             :      * horizon old enough that this page isn't all-visible for them, they
     279             :      * might incorrectly decide that an index-only scan can skip a heap fetch.
     280             :      *
     281             :      * NB: It might be better to throw some kind of "soft" conflict here that
     282             :      * forces any index-only scan that is in flight to perform heap fetches,
     283             :      * rather than killing the transaction outright.
     284             :      */
     285       10442 :     if (InHotStandby)
     286       10094 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     287       10094 :                                             xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
     288             :                                             rlocator);
     289             : 
     290             :     /*
     291             :      * Read the heap page, if it still exists. If the heap file has dropped or
     292             :      * truncated later in recovery, we don't need to update the page, but we'd
     293             :      * better still update the visibility map.
     294             :      */
     295       10442 :     action = XLogReadBufferForRedo(record, 1, &buffer);
     296       10442 :     if (action == BLK_NEEDS_REDO)
     297             :     {
     298             :         /*
     299             :          * We don't bump the LSN of the heap page when setting the visibility
     300             :          * map bit (unless checksums or wal_hint_bits is enabled, in which
     301             :          * case we must). This exposes us to torn page hazards, but since
     302             :          * we're not inspecting the existing page contents in any way, we
     303             :          * don't care.
     304             :          */
     305        6126 :         page = BufferGetPage(buffer);
     306             : 
     307        6126 :         PageSetAllVisible(page);
     308             : 
     309        6126 :         if (XLogHintBitIsNeeded())
     310        6126 :             PageSetLSN(page, lsn);
     311             : 
     312        6126 :         MarkBufferDirty(buffer);
     313             :     }
     314             :     else if (action == BLK_RESTORED)
     315             :     {
     316             :         /*
     317             :          * If heap block was backed up, we already restored it and there's
     318             :          * nothing more to do. (This can only happen with checksums or
     319             :          * wal_log_hints enabled.)
     320             :          */
     321             :     }
     322             : 
     323       10442 :     if (BufferIsValid(buffer))
     324             :     {
     325       10442 :         Size        space = PageGetFreeSpace(BufferGetPage(buffer));
     326             : 
     327       10442 :         UnlockReleaseBuffer(buffer);
     328             : 
     329             :         /*
     330             :          * Since FSM is not WAL-logged and only updated heuristically, it
     331             :          * easily becomes stale in standbys.  If the standby is later promoted
     332             :          * and runs VACUUM, it will skip updating individual free space
     333             :          * figures for pages that became all-visible (or all-frozen, depending
     334             :          * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
     335             :          * propagates too optimistic free space values to upper FSM layers;
     336             :          * later inserters try to use such pages only to find out that they
     337             :          * are unusable.  This can cause long stalls when there are many such
     338             :          * pages.
     339             :          *
     340             :          * Forestall those problems by updating FSM's idea about a page that
     341             :          * is becoming all-visible or all-frozen.
     342             :          *
     343             :          * Do this regardless of a full-page image being applied, since the
     344             :          * FSM data is not in the page anyway.
     345             :          */
     346       10442 :         if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
     347       10442 :             XLogRecordPageWithFreeSpace(rlocator, blkno, space);
     348             :     }
     349             : 
     350             :     /*
     351             :      * Even if we skipped the heap page update due to the LSN interlock, it's
     352             :      * still safe to update the visibility map.  Any WAL record that clears
     353             :      * the visibility map bit does so before checking the page LSN, so any
     354             :      * bits that need to be cleared will still be cleared.
     355             :      */
     356       10442 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
     357             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     358             :     {
     359       10016 :         Page        vmpage = BufferGetPage(vmbuffer);
     360             :         Relation    reln;
     361             :         uint8       vmbits;
     362             : 
     363             :         /* initialize the page if it was read as zeros */
     364       10016 :         if (PageIsNew(vmpage))
     365           0 :             PageInit(vmpage, BLCKSZ, 0);
     366             : 
     367             :         /* remove VISIBILITYMAP_XLOG_* */
     368       10016 :         vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
     369             : 
     370             :         /*
     371             :          * XLogReadBufferForRedoExtended locked the buffer. But
     372             :          * visibilitymap_set will handle locking itself.
     373             :          */
     374       10016 :         LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
     375             : 
     376       10016 :         reln = CreateFakeRelcacheEntry(rlocator);
     377             : 
     378       10016 :         visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
     379             :                           xlrec->snapshotConflictHorizon, vmbits);
     380             : 
     381       10016 :         ReleaseBuffer(vmbuffer);
     382       10016 :         FreeFakeRelcacheEntry(reln);
     383             :     }
     384         426 :     else if (BufferIsValid(vmbuffer))
     385         426 :         UnlockReleaseBuffer(vmbuffer);
     386       10442 : }
     387             : 
     388             : /*
     389             :  * Given an "infobits" field from an XLog record, set the correct bits in the
     390             :  * given infomask and infomask2 for the tuple touched by the record.
     391             :  *
     392             :  * (This is the reverse of compute_infobits).
     393             :  */
     394             : static void
     395      914420 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
     396             : {
     397      914420 :     *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
     398             :                    HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
     399      914420 :     *infomask2 &= ~HEAP_KEYS_UPDATED;
     400             : 
     401      914420 :     if (infobits & XLHL_XMAX_IS_MULTI)
     402           4 :         *infomask |= HEAP_XMAX_IS_MULTI;
     403      914420 :     if (infobits & XLHL_XMAX_LOCK_ONLY)
     404      111284 :         *infomask |= HEAP_XMAX_LOCK_ONLY;
     405      914420 :     if (infobits & XLHL_XMAX_EXCL_LOCK)
     406      110466 :         *infomask |= HEAP_XMAX_EXCL_LOCK;
     407             :     /* note HEAP_XMAX_SHR_LOCK isn't considered here */
     408      914420 :     if (infobits & XLHL_XMAX_KEYSHR_LOCK)
     409         842 :         *infomask |= HEAP_XMAX_KEYSHR_LOCK;
     410             : 
     411      914420 :     if (infobits & XLHL_KEYS_UPDATED)
     412      615470 :         *infomask2 |= HEAP_KEYS_UPDATED;
     413      914420 : }
     414             : 
     415             : /*
     416             :  * Replay XLOG_HEAP_DELETE records.
     417             :  */
     418             : static void
     419      617010 : heap_xlog_delete(XLogReaderState *record)
     420             : {
     421      617010 :     XLogRecPtr  lsn = record->EndRecPtr;
     422      617010 :     xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
     423             :     Buffer      buffer;
     424             :     Page        page;
     425             :     ItemId      lp;
     426             :     HeapTupleHeader htup;
     427             :     BlockNumber blkno;
     428             :     RelFileLocator target_locator;
     429             :     ItemPointerData target_tid;
     430             : 
     431      617010 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     432      617010 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     433      617010 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     434             : 
     435             :     /*
     436             :      * The visibility map may need to be fixed even if the heap page is
     437             :      * already up-to-date.
     438             :      */
     439      617010 :     if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     440             :     {
     441          36 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     442          36 :         Buffer      vmbuffer = InvalidBuffer;
     443             : 
     444          36 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     445          36 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     446          36 :         ReleaseBuffer(vmbuffer);
     447          36 :         FreeFakeRelcacheEntry(reln);
     448             :     }
     449             : 
     450      617010 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     451             :     {
     452      612698 :         page = BufferGetPage(buffer);
     453             : 
     454      612698 :         if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
     455           0 :             elog(PANIC, "offnum out of range");
     456      612698 :         lp = PageGetItemId(page, xlrec->offnum);
     457      612698 :         if (!ItemIdIsNormal(lp))
     458           0 :             elog(PANIC, "invalid lp");
     459             : 
     460      612698 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     461             : 
     462      612698 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     463      612698 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     464      612698 :         HeapTupleHeaderClearHotUpdated(htup);
     465      612698 :         fix_infomask_from_infobits(xlrec->infobits_set,
     466             :                                    &htup->t_infomask, &htup->t_infomask2);
     467      612698 :         if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
     468      612698 :             HeapTupleHeaderSetXmax(htup, xlrec->xmax);
     469             :         else
     470           0 :             HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
     471      612698 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     472             : 
     473             :         /* Mark the page as a candidate for pruning */
     474      612698 :         PageSetPrunable(page, XLogRecGetXid(record));
     475             : 
     476      612698 :         if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     477          22 :             PageClearAllVisible(page);
     478             : 
     479             :         /* Make sure t_ctid is set correctly */
     480      612698 :         if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
     481         288 :             HeapTupleHeaderSetMovedPartitions(htup);
     482             :         else
     483      612410 :             htup->t_ctid = target_tid;
     484      612698 :         PageSetLSN(page, lsn);
     485      612698 :         MarkBufferDirty(buffer);
     486             :     }
     487      617010 :     if (BufferIsValid(buffer))
     488      617010 :         UnlockReleaseBuffer(buffer);
     489      617010 : }
     490             : 
     491             : /*
     492             :  * Replay XLOG_HEAP_INSERT records.
     493             :  */
     494             : static void
     495     2590110 : heap_xlog_insert(XLogReaderState *record)
     496             : {
     497     2590110 :     XLogRecPtr  lsn = record->EndRecPtr;
     498     2590110 :     xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
     499             :     Buffer      buffer;
     500             :     Page        page;
     501             :     union
     502             :     {
     503             :         HeapTupleHeaderData hdr;
     504             :         char        data[MaxHeapTupleSize];
     505             :     }           tbuf;
     506             :     HeapTupleHeader htup;
     507             :     xl_heap_header xlhdr;
     508             :     uint32      newlen;
     509     2590110 :     Size        freespace = 0;
     510             :     RelFileLocator target_locator;
     511             :     BlockNumber blkno;
     512             :     ItemPointerData target_tid;
     513             :     XLogRedoAction action;
     514             : 
     515     2590110 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     516     2590110 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     517     2590110 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     518             : 
     519             :     /* No freezing in the heap_insert() code path */
     520             :     Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
     521             : 
     522             :     /*
     523             :      * The visibility map may need to be fixed even if the heap page is
     524             :      * already up-to-date.
     525             :      */
     526     2590110 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     527             :     {
     528        1890 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     529        1890 :         Buffer      vmbuffer = InvalidBuffer;
     530             : 
     531        1890 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     532        1890 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     533        1890 :         ReleaseBuffer(vmbuffer);
     534        1890 :         FreeFakeRelcacheEntry(reln);
     535             :     }
     536             : 
     537             :     /*
     538             :      * If we inserted the first and only tuple on the page, re-initialize the
     539             :      * page from scratch.
     540             :      */
     541     2590110 :     if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     542             :     {
     543       34862 :         buffer = XLogInitBufferForRedo(record, 0);
     544       34862 :         page = BufferGetPage(buffer);
     545       34862 :         PageInit(page, BufferGetPageSize(buffer), 0);
     546       34862 :         action = BLK_NEEDS_REDO;
     547             :     }
     548             :     else
     549     2555248 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     550     2590110 :     if (action == BLK_NEEDS_REDO)
     551             :     {
     552             :         Size        datalen;
     553             :         char       *data;
     554             : 
     555     2584392 :         page = BufferGetPage(buffer);
     556             : 
     557     2584392 :         if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
     558           0 :             elog(PANIC, "invalid max offset number");
     559             : 
     560     2584392 :         data = XLogRecGetBlockData(record, 0, &datalen);
     561             : 
     562     2584392 :         newlen = datalen - SizeOfHeapHeader;
     563             :         Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
     564     2584392 :         memcpy(&xlhdr, data, SizeOfHeapHeader);
     565     2584392 :         data += SizeOfHeapHeader;
     566             : 
     567     2584392 :         htup = &tbuf.hdr;
     568     2584392 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     569             :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     570     2584392 :         memcpy((char *) htup + SizeofHeapTupleHeader,
     571             :                data,
     572             :                newlen);
     573     2584392 :         newlen += SizeofHeapTupleHeader;
     574     2584392 :         htup->t_infomask2 = xlhdr.t_infomask2;
     575     2584392 :         htup->t_infomask = xlhdr.t_infomask;
     576     2584392 :         htup->t_hoff = xlhdr.t_hoff;
     577     2584392 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     578     2584392 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
     579     2584392 :         htup->t_ctid = target_tid;
     580             : 
     581     2584392 :         if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
     582           0 :             elog(PANIC, "failed to add tuple");
     583             : 
     584     2584392 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     585             : 
     586     2584392 :         PageSetLSN(page, lsn);
     587             : 
     588     2584392 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     589         626 :             PageClearAllVisible(page);
     590             : 
     591     2584392 :         MarkBufferDirty(buffer);
     592             :     }
     593     2590110 :     if (BufferIsValid(buffer))
     594     2590110 :         UnlockReleaseBuffer(buffer);
     595             : 
     596             :     /*
     597             :      * If the page is running low on free space, update the FSM as well.
     598             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     599             :      * better than that without knowing the fill-factor for the table.
     600             :      *
     601             :      * XXX: Don't do this if the page was restored from full page image. We
     602             :      * don't bother to update the FSM in that case, it doesn't need to be
     603             :      * totally accurate anyway.
     604             :      */
     605     2590110 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     606      508070 :         XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
     607     2590110 : }
     608             : 
     609             : /*
     610             :  * Replay XLOG_HEAP2_MULTI_INSERT records.
     611             :  */
     612             : static void
     613      122668 : heap_xlog_multi_insert(XLogReaderState *record)
     614             : {
     615      122668 :     XLogRecPtr  lsn = record->EndRecPtr;
     616             :     xl_heap_multi_insert *xlrec;
     617             :     RelFileLocator rlocator;
     618             :     BlockNumber blkno;
     619             :     Buffer      buffer;
     620             :     Page        page;
     621             :     union
     622             :     {
     623             :         HeapTupleHeaderData hdr;
     624             :         char        data[MaxHeapTupleSize];
     625             :     }           tbuf;
     626             :     HeapTupleHeader htup;
     627             :     uint32      newlen;
     628      122668 :     Size        freespace = 0;
     629             :     int         i;
     630      122668 :     bool        isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
     631             :     XLogRedoAction action;
     632      122668 :     Buffer      vmbuffer = InvalidBuffer;
     633             : 
     634             :     /*
     635             :      * Insertion doesn't overwrite MVCC data, so no conflict processing is
     636             :      * required.
     637             :      */
     638      122668 :     xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
     639             : 
     640      122668 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
     641             : 
     642             :     /* check that the mutually exclusive flags are not both set */
     643             :     Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
     644             :              (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
     645             : 
     646             :     /*
     647             :      * The visibility map may need to be fixed even if the heap page is
     648             :      * already up-to-date.
     649             :      */
     650      122668 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     651             :     {
     652        1788 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     653             : 
     654        1788 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     655        1788 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     656        1788 :         ReleaseBuffer(vmbuffer);
     657        1788 :         vmbuffer = InvalidBuffer;
     658        1788 :         FreeFakeRelcacheEntry(reln);
     659             :     }
     660             : 
     661      122668 :     if (isinit)
     662             :     {
     663        3770 :         buffer = XLogInitBufferForRedo(record, 0);
     664        3770 :         page = BufferGetPage(buffer);
     665        3770 :         PageInit(page, BufferGetPageSize(buffer), 0);
     666        3770 :         action = BLK_NEEDS_REDO;
     667             :     }
     668             :     else
     669      118898 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     670      122668 :     if (action == BLK_NEEDS_REDO)
     671             :     {
     672             :         char       *tupdata;
     673             :         char       *endptr;
     674             :         Size        len;
     675             : 
     676             :         /* Tuples are stored as block data */
     677      119442 :         tupdata = XLogRecGetBlockData(record, 0, &len);
     678      119442 :         endptr = tupdata + len;
     679             : 
     680      119442 :         page = BufferGetPage(buffer);
     681             : 
     682      544536 :         for (i = 0; i < xlrec->ntuples; i++)
     683             :         {
     684             :             OffsetNumber offnum;
     685             :             xl_multi_insert_tuple *xlhdr;
     686             : 
     687             :             /*
     688             :              * If we're reinitializing the page, the tuples are stored in
     689             :              * order from FirstOffsetNumber. Otherwise there's an array of
     690             :              * offsets in the WAL record, and the tuples come after that.
     691             :              */
     692      425094 :             if (isinit)
     693      198964 :                 offnum = FirstOffsetNumber + i;
     694             :             else
     695      226130 :                 offnum = xlrec->offsets[i];
     696      425094 :             if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     697           0 :                 elog(PANIC, "invalid max offset number");
     698             : 
     699      425094 :             xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
     700      425094 :             tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
     701             : 
     702      425094 :             newlen = xlhdr->datalen;
     703             :             Assert(newlen <= MaxHeapTupleSize);
     704      425094 :             htup = &tbuf.hdr;
     705      425094 :             MemSet(htup, 0, SizeofHeapTupleHeader);
     706             :             /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     707      425094 :             memcpy((char *) htup + SizeofHeapTupleHeader,
     708             :                    tupdata,
     709             :                    newlen);
     710      425094 :             tupdata += newlen;
     711             : 
     712      425094 :             newlen += SizeofHeapTupleHeader;
     713      425094 :             htup->t_infomask2 = xlhdr->t_infomask2;
     714      425094 :             htup->t_infomask = xlhdr->t_infomask;
     715      425094 :             htup->t_hoff = xlhdr->t_hoff;
     716      425094 :             HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     717      425094 :             HeapTupleHeaderSetCmin(htup, FirstCommandId);
     718      425094 :             ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
     719      425094 :             ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
     720             : 
     721      425094 :             offnum = PageAddItem(page, htup, newlen, offnum, true, true);
     722      425094 :             if (offnum == InvalidOffsetNumber)
     723           0 :                 elog(PANIC, "failed to add tuple");
     724             :         }
     725      119442 :         if (tupdata != endptr)
     726           0 :             elog(PANIC, "total tuple length mismatch");
     727             : 
     728      119442 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     729             : 
     730      119442 :         PageSetLSN(page, lsn);
     731             : 
     732      119442 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     733         200 :             PageClearAllVisible(page);
     734             : 
     735             :         /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
     736      119442 :         if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
     737           8 :             PageSetAllVisible(page);
     738             : 
     739      119442 :         MarkBufferDirty(buffer);
     740             :     }
     741      122668 :     if (BufferIsValid(buffer))
     742      122668 :         UnlockReleaseBuffer(buffer);
     743             : 
     744      122668 :     buffer = InvalidBuffer;
     745             : 
     746             :     /*
     747             :      * Read and update the visibility map (VM) block.
     748             :      *
     749             :      * We must always redo VM changes, even if the corresponding heap page
     750             :      * update was skipped due to the LSN interlock. Each VM block covers
     751             :      * multiple heap pages, so later WAL records may update other bits in the
     752             :      * same block. If this record includes an FPI (full-page image),
     753             :      * subsequent WAL records may depend on it to guard against torn pages.
     754             :      *
     755             :      * Heap page changes are replayed first to preserve the invariant:
     756             :      * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
     757             :      *
     758             :      * Note that we released the heap page lock above. During normal
     759             :      * operation, this would be unsafe — a concurrent modification could
     760             :      * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
     761             :      * invariant.
     762             :      *
     763             :      * During recovery, however, no concurrent writers exist. Therefore,
     764             :      * updating the VM without holding the heap page lock is safe enough. This
     765             :      * same approach is taken when replaying xl_heap_visible records (see
     766             :      * heap_xlog_visible()).
     767             :      */
     768      122676 :     if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
     769           8 :         XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
     770             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     771             :     {
     772           0 :         Page        vmpage = BufferGetPage(vmbuffer);
     773             : 
     774             :         /* initialize the page if it was read as zeros */
     775           0 :         if (PageIsNew(vmpage))
     776           0 :             PageInit(vmpage, BLCKSZ, 0);
     777             : 
     778           0 :         visibilitymap_set_vmbits(blkno,
     779             :                                  vmbuffer,
     780             :                                  VISIBILITYMAP_ALL_VISIBLE |
     781             :                                  VISIBILITYMAP_ALL_FROZEN,
     782             :                                  rlocator);
     783             : 
     784             :         Assert(BufferIsDirty(vmbuffer));
     785           0 :         PageSetLSN(vmpage, lsn);
     786             :     }
     787             : 
     788      122668 :     if (BufferIsValid(vmbuffer))
     789           8 :         UnlockReleaseBuffer(vmbuffer);
     790             : 
     791             :     /*
     792             :      * If the page is running low on free space, update the FSM as well.
     793             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     794             :      * better than that without knowing the fill-factor for the table.
     795             :      *
     796             :      * XXX: Don't do this if the page was restored from full page image. We
     797             :      * don't bother to update the FSM in that case, it doesn't need to be
     798             :      * totally accurate anyway.
     799             :      */
     800      122668 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     801       33372 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     802      122668 : }
     803             : 
     804             : /*
     805             :  * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
     806             :  */
     807             : static void
     808      191126 : heap_xlog_update(XLogReaderState *record, bool hot_update)
     809             : {
     810      191126 :     XLogRecPtr  lsn = record->EndRecPtr;
     811      191126 :     xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
     812             :     RelFileLocator rlocator;
     813             :     BlockNumber oldblk;
     814             :     BlockNumber newblk;
     815             :     ItemPointerData newtid;
     816             :     Buffer      obuffer,
     817             :                 nbuffer;
     818             :     Page        page;
     819             :     OffsetNumber offnum;
     820             :     ItemId      lp;
     821             :     HeapTupleData oldtup;
     822             :     HeapTupleHeader htup;
     823      191126 :     uint16      prefixlen = 0,
     824      191126 :                 suffixlen = 0;
     825             :     char       *newp;
     826             :     union
     827             :     {
     828             :         HeapTupleHeaderData hdr;
     829             :         char        data[MaxHeapTupleSize];
     830             :     }           tbuf;
     831             :     xl_heap_header xlhdr;
     832             :     uint32      newlen;
     833      191126 :     Size        freespace = 0;
     834             :     XLogRedoAction oldaction;
     835             :     XLogRedoAction newaction;
     836             : 
     837             :     /* initialize to keep the compiler quiet */
     838      191126 :     oldtup.t_data = NULL;
     839      191126 :     oldtup.t_len = 0;
     840             : 
     841      191126 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
     842      191126 :     if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
     843             :     {
     844             :         /* HOT updates are never done across pages */
     845             :         Assert(!hot_update);
     846             :     }
     847             :     else
     848       81238 :         oldblk = newblk;
     849             : 
     850      191126 :     ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
     851             : 
     852             :     /*
     853             :      * The visibility map may need to be fixed even if the heap page is
     854             :      * already up-to-date.
     855             :      */
     856      191126 :     if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     857             :     {
     858         528 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     859         528 :         Buffer      vmbuffer = InvalidBuffer;
     860             : 
     861         528 :         visibilitymap_pin(reln, oldblk, &vmbuffer);
     862         528 :         visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     863         528 :         ReleaseBuffer(vmbuffer);
     864         528 :         FreeFakeRelcacheEntry(reln);
     865             :     }
     866             : 
     867             :     /*
     868             :      * In normal operation, it is important to lock the two pages in
     869             :      * page-number order, to avoid possible deadlocks against other update
     870             :      * operations going the other way.  However, during WAL replay there can
     871             :      * be no other update happening, so we don't need to worry about that. But
     872             :      * we *do* need to worry that we don't expose an inconsistent state to Hot
     873             :      * Standby queries --- so the original page can't be unlocked before we've
     874             :      * added the new tuple to the new page.
     875             :      */
     876             : 
     877             :     /* Deal with old tuple version */
     878      191126 :     oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
     879             :                                       &obuffer);
     880      191126 :     if (oldaction == BLK_NEEDS_REDO)
     881             :     {
     882      190438 :         page = BufferGetPage(obuffer);
     883      190438 :         offnum = xlrec->old_offnum;
     884      190438 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
     885           0 :             elog(PANIC, "offnum out of range");
     886      190438 :         lp = PageGetItemId(page, offnum);
     887      190438 :         if (!ItemIdIsNormal(lp))
     888           0 :             elog(PANIC, "invalid lp");
     889             : 
     890      190438 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     891             : 
     892      190438 :         oldtup.t_data = htup;
     893      190438 :         oldtup.t_len = ItemIdGetLength(lp);
     894             : 
     895      190438 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     896      190438 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     897      190438 :         if (hot_update)
     898       74532 :             HeapTupleHeaderSetHotUpdated(htup);
     899             :         else
     900      115906 :             HeapTupleHeaderClearHotUpdated(htup);
     901      190438 :         fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
     902             :                                    &htup->t_infomask2);
     903      190438 :         HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
     904      190438 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     905             :         /* Set forward chain link in t_ctid */
     906      190438 :         htup->t_ctid = newtid;
     907             : 
     908             :         /* Mark the page as a candidate for pruning */
     909      190438 :         PageSetPrunable(page, XLogRecGetXid(record));
     910             : 
     911      190438 :         if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     912         444 :             PageClearAllVisible(page);
     913             : 
     914      190438 :         PageSetLSN(page, lsn);
     915      190438 :         MarkBufferDirty(obuffer);
     916             :     }
     917             : 
     918             :     /*
     919             :      * Read the page the new tuple goes into, if different from old.
     920             :      */
     921      191126 :     if (oldblk == newblk)
     922             :     {
     923       81238 :         nbuffer = obuffer;
     924       81238 :         newaction = oldaction;
     925             :     }
     926      109888 :     else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     927             :     {
     928        1302 :         nbuffer = XLogInitBufferForRedo(record, 0);
     929        1302 :         page = BufferGetPage(nbuffer);
     930        1302 :         PageInit(page, BufferGetPageSize(nbuffer), 0);
     931        1302 :         newaction = BLK_NEEDS_REDO;
     932             :     }
     933             :     else
     934      108586 :         newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
     935             : 
     936             :     /*
     937             :      * The visibility map may need to be fixed even if the heap page is
     938             :      * already up-to-date.
     939             :      */
     940      191126 :     if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
     941             :     {
     942         402 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     943         402 :         Buffer      vmbuffer = InvalidBuffer;
     944             : 
     945         402 :         visibilitymap_pin(reln, newblk, &vmbuffer);
     946         402 :         visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     947         402 :         ReleaseBuffer(vmbuffer);
     948         402 :         FreeFakeRelcacheEntry(reln);
     949             :     }
     950             : 
     951             :     /* Deal with new tuple */
     952      191126 :     if (newaction == BLK_NEEDS_REDO)
     953             :     {
     954             :         char       *recdata;
     955             :         char       *recdata_end;
     956             :         Size        datalen;
     957             :         Size        tuplen;
     958             : 
     959      190014 :         recdata = XLogRecGetBlockData(record, 0, &datalen);
     960      190014 :         recdata_end = recdata + datalen;
     961             : 
     962      190014 :         page = BufferGetPage(nbuffer);
     963             : 
     964      190014 :         offnum = xlrec->new_offnum;
     965      190014 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     966           0 :             elog(PANIC, "invalid max offset number");
     967             : 
     968      190014 :         if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
     969             :         {
     970             :             Assert(newblk == oldblk);
     971       33458 :             memcpy(&prefixlen, recdata, sizeof(uint16));
     972       33458 :             recdata += sizeof(uint16);
     973             :         }
     974      190014 :         if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
     975             :         {
     976             :             Assert(newblk == oldblk);
     977       69488 :             memcpy(&suffixlen, recdata, sizeof(uint16));
     978       69488 :             recdata += sizeof(uint16);
     979             :         }
     980             : 
     981      190014 :         memcpy(&xlhdr, recdata, SizeOfHeapHeader);
     982      190014 :         recdata += SizeOfHeapHeader;
     983             : 
     984      190014 :         tuplen = recdata_end - recdata;
     985             :         Assert(tuplen <= MaxHeapTupleSize);
     986             : 
     987      190014 :         htup = &tbuf.hdr;
     988      190014 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     989             : 
     990             :         /*
     991             :          * Reconstruct the new tuple using the prefix and/or suffix from the
     992             :          * old tuple, and the data stored in the WAL record.
     993             :          */
     994      190014 :         newp = (char *) htup + SizeofHeapTupleHeader;
     995      190014 :         if (prefixlen > 0)
     996             :         {
     997             :             int         len;
     998             : 
     999             :             /* copy bitmap [+ padding] [+ oid] from WAL record */
    1000       33458 :             len = xlhdr.t_hoff - SizeofHeapTupleHeader;
    1001       33458 :             memcpy(newp, recdata, len);
    1002       33458 :             recdata += len;
    1003       33458 :             newp += len;
    1004             : 
    1005             :             /* copy prefix from old tuple */
    1006       33458 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
    1007       33458 :             newp += prefixlen;
    1008             : 
    1009             :             /* copy new tuple data from WAL record */
    1010       33458 :             len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
    1011       33458 :             memcpy(newp, recdata, len);
    1012       33458 :             recdata += len;
    1013       33458 :             newp += len;
    1014             :         }
    1015             :         else
    1016             :         {
    1017             :             /*
    1018             :              * copy bitmap [+ padding] [+ oid] + data from record, all in one
    1019             :              * go
    1020             :              */
    1021      156556 :             memcpy(newp, recdata, tuplen);
    1022      156556 :             recdata += tuplen;
    1023      156556 :             newp += tuplen;
    1024             :         }
    1025             :         Assert(recdata == recdata_end);
    1026             : 
    1027             :         /* copy suffix from old tuple */
    1028      190014 :         if (suffixlen > 0)
    1029       69488 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
    1030             : 
    1031      190014 :         newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
    1032      190014 :         htup->t_infomask2 = xlhdr.t_infomask2;
    1033      190014 :         htup->t_infomask = xlhdr.t_infomask;
    1034      190014 :         htup->t_hoff = xlhdr.t_hoff;
    1035             : 
    1036      190014 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
    1037      190014 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
    1038      190014 :         HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
    1039             :         /* Make sure there is no forward chain link in t_ctid */
    1040      190014 :         htup->t_ctid = newtid;
    1041             : 
    1042      190014 :         offnum = PageAddItem(page, htup, newlen, offnum, true, true);
    1043      190014 :         if (offnum == InvalidOffsetNumber)
    1044           0 :             elog(PANIC, "failed to add tuple");
    1045             : 
    1046      190014 :         if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
    1047         234 :             PageClearAllVisible(page);
    1048             : 
    1049      190014 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
    1050             : 
    1051      190014 :         PageSetLSN(page, lsn);
    1052      190014 :         MarkBufferDirty(nbuffer);
    1053             :     }
    1054             : 
    1055      191126 :     if (BufferIsValid(nbuffer) && nbuffer != obuffer)
    1056      109888 :         UnlockReleaseBuffer(nbuffer);
    1057      191126 :     if (BufferIsValid(obuffer))
    1058      191126 :         UnlockReleaseBuffer(obuffer);
    1059             : 
    1060             :     /*
    1061             :      * If the new page is running low on free space, update the FSM as well.
    1062             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
    1063             :      * better than that without knowing the fill-factor for the table.
    1064             :      *
    1065             :      * However, don't update the FSM on HOT updates, because after crash
    1066             :      * recovery, either the old or the new tuple will certainly be dead and
    1067             :      * prunable. After pruning, the page will have roughly as much free space
    1068             :      * as it did before the update, assuming the new tuple is about the same
    1069             :      * size as the old one.
    1070             :      *
    1071             :      * XXX: Don't do this if the page was restored from full page image. We
    1072             :      * don't bother to update the FSM in that case, it doesn't need to be
    1073             :      * totally accurate anyway.
    1074             :      */
    1075      191126 :     if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
    1076       23746 :         XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
    1077      191126 : }
    1078             : 
    1079             : /*
    1080             :  * Replay XLOG_HEAP_CONFIRM records.
    1081             :  */
    1082             : static void
    1083         166 : heap_xlog_confirm(XLogReaderState *record)
    1084             : {
    1085         166 :     XLogRecPtr  lsn = record->EndRecPtr;
    1086         166 :     xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
    1087             :     Buffer      buffer;
    1088             :     Page        page;
    1089             :     OffsetNumber offnum;
    1090             :     ItemId      lp;
    1091             :     HeapTupleHeader htup;
    1092             : 
    1093         166 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1094             :     {
    1095         166 :         page = BufferGetPage(buffer);
    1096             : 
    1097         166 :         offnum = xlrec->offnum;
    1098         166 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1099           0 :             elog(PANIC, "offnum out of range");
    1100         166 :         lp = PageGetItemId(page, offnum);
    1101         166 :         if (!ItemIdIsNormal(lp))
    1102           0 :             elog(PANIC, "invalid lp");
    1103             : 
    1104         166 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1105             : 
    1106             :         /*
    1107             :          * Confirm tuple as actually inserted
    1108             :          */
    1109         166 :         ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
    1110             : 
    1111         166 :         PageSetLSN(page, lsn);
    1112         166 :         MarkBufferDirty(buffer);
    1113             :     }
    1114         166 :     if (BufferIsValid(buffer))
    1115         166 :         UnlockReleaseBuffer(buffer);
    1116         166 : }
    1117             : 
    1118             : /*
    1119             :  * Replay XLOG_HEAP_LOCK records.
    1120             :  */
    1121             : static void
    1122      111724 : heap_xlog_lock(XLogReaderState *record)
    1123             : {
    1124      111724 :     XLogRecPtr  lsn = record->EndRecPtr;
    1125      111724 :     xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
    1126             :     Buffer      buffer;
    1127             :     Page        page;
    1128             :     OffsetNumber offnum;
    1129             :     ItemId      lp;
    1130             :     HeapTupleHeader htup;
    1131             : 
    1132             :     /*
    1133             :      * The visibility map may need to be fixed even if the heap page is
    1134             :      * already up-to-date.
    1135             :      */
    1136      111724 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1137             :     {
    1138             :         RelFileLocator rlocator;
    1139         128 :         Buffer      vmbuffer = InvalidBuffer;
    1140             :         BlockNumber block;
    1141             :         Relation    reln;
    1142             : 
    1143         128 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1144         128 :         reln = CreateFakeRelcacheEntry(rlocator);
    1145             : 
    1146         128 :         visibilitymap_pin(reln, block, &vmbuffer);
    1147         128 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1148             : 
    1149         128 :         ReleaseBuffer(vmbuffer);
    1150         128 :         FreeFakeRelcacheEntry(reln);
    1151             :     }
    1152             : 
    1153      111724 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1154             :     {
    1155      111284 :         page = BufferGetPage(buffer);
    1156             : 
    1157      111284 :         offnum = xlrec->offnum;
    1158      111284 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1159           0 :             elog(PANIC, "offnum out of range");
    1160      111284 :         lp = PageGetItemId(page, offnum);
    1161      111284 :         if (!ItemIdIsNormal(lp))
    1162           0 :             elog(PANIC, "invalid lp");
    1163             : 
    1164      111284 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1165             : 
    1166      111284 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1167      111284 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1168      111284 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1169             :                                    &htup->t_infomask2);
    1170             : 
    1171             :         /*
    1172             :          * Clear relevant update flags, but only if the modified infomask says
    1173             :          * there's no update.
    1174             :          */
    1175      111284 :         if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
    1176             :         {
    1177      111284 :             HeapTupleHeaderClearHotUpdated(htup);
    1178             :             /* Make sure there is no forward chain link in t_ctid */
    1179      111284 :             ItemPointerSet(&htup->t_ctid,
    1180             :                            BufferGetBlockNumber(buffer),
    1181             :                            offnum);
    1182             :         }
    1183      111284 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1184      111284 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    1185      111284 :         PageSetLSN(page, lsn);
    1186      111284 :         MarkBufferDirty(buffer);
    1187             :     }
    1188      111724 :     if (BufferIsValid(buffer))
    1189      111724 :         UnlockReleaseBuffer(buffer);
    1190      111724 : }
    1191             : 
    1192             : /*
    1193             :  * Replay XLOG_HEAP2_LOCK_UPDATED records.
    1194             :  */
    1195             : static void
    1196           0 : heap_xlog_lock_updated(XLogReaderState *record)
    1197             : {
    1198           0 :     XLogRecPtr  lsn = record->EndRecPtr;
    1199             :     xl_heap_lock_updated *xlrec;
    1200             :     Buffer      buffer;
    1201             :     Page        page;
    1202             :     OffsetNumber offnum;
    1203             :     ItemId      lp;
    1204             :     HeapTupleHeader htup;
    1205             : 
    1206           0 :     xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
    1207             : 
    1208             :     /*
    1209             :      * The visibility map may need to be fixed even if the heap page is
    1210             :      * already up-to-date.
    1211             :      */
    1212           0 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1213             :     {
    1214             :         RelFileLocator rlocator;
    1215           0 :         Buffer      vmbuffer = InvalidBuffer;
    1216             :         BlockNumber block;
    1217             :         Relation    reln;
    1218             : 
    1219           0 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1220           0 :         reln = CreateFakeRelcacheEntry(rlocator);
    1221             : 
    1222           0 :         visibilitymap_pin(reln, block, &vmbuffer);
    1223           0 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1224             : 
    1225           0 :         ReleaseBuffer(vmbuffer);
    1226           0 :         FreeFakeRelcacheEntry(reln);
    1227             :     }
    1228             : 
    1229           0 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1230             :     {
    1231           0 :         page = BufferGetPage(buffer);
    1232             : 
    1233           0 :         offnum = xlrec->offnum;
    1234           0 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1235           0 :             elog(PANIC, "offnum out of range");
    1236           0 :         lp = PageGetItemId(page, offnum);
    1237           0 :         if (!ItemIdIsNormal(lp))
    1238           0 :             elog(PANIC, "invalid lp");
    1239             : 
    1240           0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1241             : 
    1242           0 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1243           0 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1244           0 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1245             :                                    &htup->t_infomask2);
    1246           0 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1247             : 
    1248           0 :         PageSetLSN(page, lsn);
    1249           0 :         MarkBufferDirty(buffer);
    1250             :     }
    1251           0 :     if (BufferIsValid(buffer))
    1252           0 :         UnlockReleaseBuffer(buffer);
    1253           0 : }
    1254             : 
    1255             : /*
    1256             :  * Replay XLOG_HEAP_INPLACE records.
    1257             :  */
    1258             : static void
    1259       15964 : heap_xlog_inplace(XLogReaderState *record)
    1260             : {
    1261       15964 :     XLogRecPtr  lsn = record->EndRecPtr;
    1262       15964 :     xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
    1263             :     Buffer      buffer;
    1264             :     Page        page;
    1265             :     OffsetNumber offnum;
    1266             :     ItemId      lp;
    1267             :     HeapTupleHeader htup;
    1268             :     uint32      oldlen;
    1269             :     Size        newlen;
    1270             : 
    1271       15964 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1272             :     {
    1273       15544 :         char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
    1274             : 
    1275       15544 :         page = BufferGetPage(buffer);
    1276             : 
    1277       15544 :         offnum = xlrec->offnum;
    1278       15544 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1279           0 :             elog(PANIC, "offnum out of range");
    1280       15544 :         lp = PageGetItemId(page, offnum);
    1281       15544 :         if (!ItemIdIsNormal(lp))
    1282           0 :             elog(PANIC, "invalid lp");
    1283             : 
    1284       15544 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1285             : 
    1286       15544 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    1287       15544 :         if (oldlen != newlen)
    1288           0 :             elog(PANIC, "wrong tuple length");
    1289             : 
    1290       15544 :         memcpy((char *) htup + htup->t_hoff, newtup, newlen);
    1291             : 
    1292       15544 :         PageSetLSN(page, lsn);
    1293       15544 :         MarkBufferDirty(buffer);
    1294             :     }
    1295       15964 :     if (BufferIsValid(buffer))
    1296       15964 :         UnlockReleaseBuffer(buffer);
    1297             : 
    1298       15964 :     ProcessCommittedInvalidationMessages(xlrec->msgs,
    1299             :                                          xlrec->nmsgs,
    1300       15964 :                                          xlrec->relcacheInitFileInval,
    1301             :                                          xlrec->dbId,
    1302             :                                          xlrec->tsId);
    1303       15964 : }
    1304             : 
    1305             : void
    1306     3526104 : heap_redo(XLogReaderState *record)
    1307             : {
    1308     3526104 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1309             : 
    1310             :     /*
    1311             :      * These operations don't overwrite MVCC data so no conflict processing is
    1312             :      * required. The ones in heap2 rmgr do.
    1313             :      */
    1314             : 
    1315     3526104 :     switch (info & XLOG_HEAP_OPMASK)
    1316             :     {
    1317     2590110 :         case XLOG_HEAP_INSERT:
    1318     2590110 :             heap_xlog_insert(record);
    1319     2590110 :             break;
    1320      617010 :         case XLOG_HEAP_DELETE:
    1321      617010 :             heap_xlog_delete(record);
    1322      617010 :             break;
    1323      115972 :         case XLOG_HEAP_UPDATE:
    1324      115972 :             heap_xlog_update(record, false);
    1325      115972 :             break;
    1326           4 :         case XLOG_HEAP_TRUNCATE:
    1327             : 
    1328             :             /*
    1329             :              * TRUNCATE is a no-op because the actions are already logged as
    1330             :              * SMGR WAL records.  TRUNCATE WAL record only exists for logical
    1331             :              * decoding.
    1332             :              */
    1333           4 :             break;
    1334       75154 :         case XLOG_HEAP_HOT_UPDATE:
    1335       75154 :             heap_xlog_update(record, true);
    1336       75154 :             break;
    1337         166 :         case XLOG_HEAP_CONFIRM:
    1338         166 :             heap_xlog_confirm(record);
    1339         166 :             break;
    1340      111724 :         case XLOG_HEAP_LOCK:
    1341      111724 :             heap_xlog_lock(record);
    1342      111724 :             break;
    1343       15964 :         case XLOG_HEAP_INPLACE:
    1344       15964 :             heap_xlog_inplace(record);
    1345       15964 :             break;
    1346           0 :         default:
    1347           0 :             elog(PANIC, "heap_redo: unknown op code %u", info);
    1348             :     }
    1349     3526104 : }
    1350             : 
    1351             : void
    1352      165692 : heap2_redo(XLogReaderState *record)
    1353             : {
    1354      165692 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1355             : 
    1356      165692 :     switch (info & XLOG_HEAP_OPMASK)
    1357             :     {
    1358       30618 :         case XLOG_HEAP2_PRUNE_ON_ACCESS:
    1359             :         case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
    1360             :         case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
    1361       30618 :             heap_xlog_prune_freeze(record);
    1362       30618 :             break;
    1363       10442 :         case XLOG_HEAP2_VISIBLE:
    1364       10442 :             heap_xlog_visible(record);
    1365       10442 :             break;
    1366      122668 :         case XLOG_HEAP2_MULTI_INSERT:
    1367      122668 :             heap_xlog_multi_insert(record);
    1368      122668 :             break;
    1369           0 :         case XLOG_HEAP2_LOCK_UPDATED:
    1370           0 :             heap_xlog_lock_updated(record);
    1371           0 :             break;
    1372        1964 :         case XLOG_HEAP2_NEW_CID:
    1373             : 
    1374             :             /*
    1375             :              * Nothing to do on a real replay, only used during logical
    1376             :              * decoding.
    1377             :              */
    1378        1964 :             break;
    1379           0 :         case XLOG_HEAP2_REWRITE:
    1380           0 :             heap_xlog_logical_rewrite(record);
    1381           0 :             break;
    1382           0 :         default:
    1383           0 :             elog(PANIC, "heap2_redo: unknown op code %u", info);
    1384             :     }
    1385      165692 : }
    1386             : 
    1387             : /*
    1388             :  * Mask a heap page before performing consistency checks on it.
    1389             :  */
    1390             : void
    1391     5926696 : heap_mask(char *pagedata, BlockNumber blkno)
    1392             : {
    1393     5926696 :     Page        page = (Page) pagedata;
    1394             :     OffsetNumber off;
    1395             : 
    1396     5926696 :     mask_page_lsn_and_checksum(page);
    1397             : 
    1398     5926696 :     mask_page_hint_bits(page);
    1399     5926696 :     mask_unused_space(page);
    1400             : 
    1401   484809756 :     for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
    1402             :     {
    1403   478883060 :         ItemId      iid = PageGetItemId(page, off);
    1404             :         char       *page_item;
    1405             : 
    1406   478883060 :         page_item = (char *) (page + ItemIdGetOffset(iid));
    1407             : 
    1408   478883060 :         if (ItemIdIsNormal(iid))
    1409             :         {
    1410   451242252 :             HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
    1411             : 
    1412             :             /*
    1413             :              * If xmin of a tuple is not yet frozen, we should ignore
    1414             :              * differences in hint bits, since they can be set without
    1415             :              * emitting WAL.
    1416             :              */
    1417   451242252 :             if (!HeapTupleHeaderXminFrozen(page_htup))
    1418   446577916 :                 page_htup->t_infomask &= ~HEAP_XACT_MASK;
    1419             :             else
    1420             :             {
    1421             :                 /* Still we need to mask xmax hint bits. */
    1422     4664336 :                 page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
    1423     4664336 :                 page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
    1424             :             }
    1425             : 
    1426             :             /*
    1427             :              * During replay, we set Command Id to FirstCommandId. Hence, mask
    1428             :              * it. See heap_xlog_insert() for details.
    1429             :              */
    1430   451242252 :             page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
    1431             : 
    1432             :             /*
    1433             :              * For a speculative tuple, heap_insert() does not set ctid in the
    1434             :              * caller-passed heap tuple itself, leaving the ctid field to
    1435             :              * contain a speculative token value - a per-backend monotonically
    1436             :              * increasing identifier. Besides, it does not WAL-log ctid under
    1437             :              * any circumstances.
    1438             :              *
    1439             :              * During redo, heap_xlog_insert() sets t_ctid to current block
    1440             :              * number and self offset number. It doesn't care about any
    1441             :              * speculative insertions on the primary. Hence, we set t_ctid to
    1442             :              * current block number and self offset number to ignore any
    1443             :              * inconsistency.
    1444             :              */
    1445   451242252 :             if (HeapTupleHeaderIsSpeculative(page_htup))
    1446         168 :                 ItemPointerSet(&page_htup->t_ctid, blkno, off);
    1447             : 
    1448             :             /*
    1449             :              * NB: Not ignoring ctid changes due to the tuple having moved
    1450             :              * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
    1451             :              * important information that needs to be in-sync between primary
    1452             :              * and standby, and thus is WAL logged.
    1453             :              */
    1454             :         }
    1455             : 
    1456             :         /*
    1457             :          * Ignore any padding bytes after the tuple, when the length of the
    1458             :          * item is not MAXALIGNed.
    1459             :          */
    1460   478883060 :         if (ItemIdHasStorage(iid))
    1461             :         {
    1462   451242252 :             int         len = ItemIdGetLength(iid);
    1463   451242252 :             int         padlen = MAXALIGN(len) - len;
    1464             : 
    1465   451242252 :             if (padlen > 0)
    1466   241354068 :                 memset(page_item + len, MASK_MARKER, padlen);
    1467             :         }
    1468             :     }
    1469     5926696 : }

Generated by: LCOV version 1.16