LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_xlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 509 570 89.3 %
Date: 2025-11-05 04:18:35 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heapam_xlog.c
       4             :  *    WAL replay logic for heap access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/heapam_xlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/heapam.h"
      19             : #include "access/visibilitymap.h"
      20             : #include "access/xlog.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/freespace.h"
      23             : #include "storage/standby.h"
      24             : 
      25             : 
      26             : /*
      27             :  * Replay XLOG_HEAP2_PRUNE_* records.
      28             :  */
      29             : static void
      30       27968 : heap_xlog_prune_freeze(XLogReaderState *record)
      31             : {
      32       27968 :     XLogRecPtr  lsn = record->EndRecPtr;
      33       27968 :     char       *maindataptr = XLogRecGetData(record);
      34             :     xl_heap_prune xlrec;
      35             :     Buffer      buffer;
      36             :     RelFileLocator rlocator;
      37             :     BlockNumber blkno;
      38       27968 :     Buffer      vmbuffer = InvalidBuffer;
      39       27968 :     uint8       vmflags = 0;
      40       27968 :     Size        freespace = 0;
      41             : 
      42       27968 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
      43       27968 :     memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
      44       27968 :     maindataptr += SizeOfHeapPrune;
      45             : 
      46             :     /*
      47             :      * We will take an ordinary exclusive lock or a cleanup lock depending on
      48             :      * whether the XLHP_CLEANUP_LOCK flag is set.  With an ordinary exclusive
      49             :      * lock, we better not be doing anything that requires moving existing
      50             :      * tuple data.
      51             :      */
      52             :     Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
      53             :            (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
      54             : 
      55       27968 :     if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
      56             :     {
      57        5856 :         vmflags = VISIBILITYMAP_ALL_VISIBLE;
      58        5856 :         if (xlrec.flags & XLHP_VM_ALL_FROZEN)
      59        5086 :             vmflags |= VISIBILITYMAP_ALL_FROZEN;
      60             :     }
      61             : 
      62             :     /*
      63             :      * After xl_heap_prune is the optional snapshot conflict horizon.
      64             :      *
      65             :      * In Hot Standby mode, we must ensure that there are no running queries
      66             :      * which would conflict with the changes in this record. That means we
      67             :      * can't replay this record if it removes tuples that are still visible to
      68             :      * transactions on the standby, freeze tuples with xids that are still
      69             :      * considered running on the standby, or set a page as all-visible in the
      70             :      * VM if it isn't all-visible to all transactions on the standby.
      71             :      */
      72       27968 :     if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
      73             :     {
      74             :         TransactionId snapshot_conflict_horizon;
      75             : 
      76             :         /* memcpy() because snapshot_conflict_horizon is stored unaligned */
      77       20954 :         memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
      78       20954 :         maindataptr += sizeof(TransactionId);
      79             : 
      80       20954 :         if (InHotStandby)
      81       20490 :             ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
      82       20490 :                                                 (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
      83             :                                                 rlocator);
      84             :     }
      85             : 
      86             :     /*
      87             :      * If we have a full-page image of the heap block, restore it and we're
      88             :      * done with the heap block.
      89             :      */
      90       27968 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
      91       27968 :                                       (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
      92             :                                       &buffer) == BLK_NEEDS_REDO)
      93             :     {
      94       19620 :         Page        page = BufferGetPage(buffer);
      95             :         OffsetNumber *redirected;
      96             :         OffsetNumber *nowdead;
      97             :         OffsetNumber *nowunused;
      98             :         int         nredirected;
      99             :         int         ndead;
     100             :         int         nunused;
     101             :         int         nplans;
     102             :         Size        datalen;
     103             :         xlhp_freeze_plan *plans;
     104             :         OffsetNumber *frz_offsets;
     105       19620 :         char       *dataptr = XLogRecGetBlockData(record, 0, &datalen);
     106             :         bool        do_prune;
     107             : 
     108       19620 :         heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
     109             :                                                &nplans, &plans, &frz_offsets,
     110             :                                                &nredirected, &redirected,
     111             :                                                &ndead, &nowdead,
     112             :                                                &nunused, &nowunused);
     113             : 
     114       19620 :         do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
     115             : 
     116             :         /* Ensure the record does something */
     117             :         Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
     118             : 
     119             :         /*
     120             :          * Update all line pointers per the record, and repair fragmentation
     121             :          * if needed.
     122             :          */
     123       19620 :         if (do_prune)
     124       18470 :             heap_page_prune_execute(buffer,
     125       18470 :                                     (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
     126             :                                     redirected, nredirected,
     127             :                                     nowdead, ndead,
     128             :                                     nowunused, nunused);
     129             : 
     130             :         /* Freeze tuples */
     131       22042 :         for (int p = 0; p < nplans; p++)
     132             :         {
     133             :             HeapTupleFreeze frz;
     134             : 
     135             :             /*
     136             :              * Convert freeze plan representation from WAL record into
     137             :              * per-tuple format used by heap_execute_freeze_tuple
     138             :              */
     139        2422 :             frz.xmax = plans[p].xmax;
     140        2422 :             frz.t_infomask2 = plans[p].t_infomask2;
     141        2422 :             frz.t_infomask = plans[p].t_infomask;
     142        2422 :             frz.frzflags = plans[p].frzflags;
     143        2422 :             frz.offset = InvalidOffsetNumber;   /* unused, but be tidy */
     144             : 
     145      108130 :             for (int i = 0; i < plans[p].ntuples; i++)
     146             :             {
     147      105708 :                 OffsetNumber offset = *(frz_offsets++);
     148             :                 ItemId      lp;
     149             :                 HeapTupleHeader tuple;
     150             : 
     151      105708 :                 lp = PageGetItemId(page, offset);
     152      105708 :                 tuple = (HeapTupleHeader) PageGetItem(page, lp);
     153      105708 :                 heap_execute_freeze_tuple(tuple, &frz);
     154             :             }
     155             :         }
     156             : 
     157             :         /* There should be no more data */
     158             :         Assert((char *) frz_offsets == dataptr + datalen);
     159             : 
     160       19620 :         if (vmflags & VISIBILITYMAP_VALID_BITS)
     161        3036 :             PageSetAllVisible(page);
     162             : 
     163       19620 :         MarkBufferDirty(buffer);
     164             : 
     165             :         /*
     166             :          * See log_heap_prune_and_freeze() for commentary on when we set the
     167             :          * heap page LSN.
     168             :          */
     169       19620 :         if (do_prune || nplans > 0 ||
     170           0 :             ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
     171       19620 :             PageSetLSN(page, lsn);
     172             : 
     173             :         /*
     174             :          * Note: we don't worry about updating the page's prunability hints.
     175             :          * At worst this will cause an extra prune cycle to occur soon.
     176             :          */
     177             :     }
     178             : 
     179             :     /*
     180             :      * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
     181             :      * or the VM, update the freespace map.
     182             :      *
     183             :      * Even when no actual space is freed (when only marking the page
     184             :      * all-visible or frozen), we still update the FSM. Because the FSM is
     185             :      * unlogged and maintained heuristically, it often becomes stale on
     186             :      * standbys. If such a standby is later promoted and runs VACUUM, it will
     187             :      * skip recalculating free space for pages that were marked
     188             :      * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
     189             :      * optimistic free space values upward, causing future insertions to
     190             :      * select pages that turn out to be unusable. In bulk, this can lead to
     191             :      * long stalls.
     192             :      *
     193             :      * To prevent this, always update the FSM even when only marking a page
     194             :      * all-visible/all-frozen.
     195             :      *
     196             :      * Do this regardless of whether a full-page image is logged, since FSM
     197             :      * data is not part of the page itself.
     198             :      */
     199       27968 :     if (BufferIsValid(buffer))
     200             :     {
     201       27968 :         if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
     202             :                             XLHP_HAS_DEAD_ITEMS |
     203        3700 :                             XLHP_HAS_NOW_UNUSED_ITEMS)) ||
     204        3700 :             (vmflags & VISIBILITYMAP_VALID_BITS))
     205       24268 :             freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
     206             : 
     207             :         /*
     208             :          * We want to avoid holding an exclusive lock on the heap buffer while
     209             :          * doing IO (either of the FSM or the VM), so we'll release it now.
     210             :          */
     211       27968 :         UnlockReleaseBuffer(buffer);
     212             :     }
     213             : 
     214             :     /*
     215             :      * Now read and update the VM block.
     216             :      *
     217             :      * We must redo changes to the VM even if the heap page was skipped due to
     218             :      * LSN interlock. See comment in heap_xlog_multi_insert() for more details
     219             :      * on replaying changes to the VM.
     220             :      */
     221       33824 :     if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
     222        5856 :         XLogReadBufferForRedoExtended(record, 1,
     223             :                                       RBM_ZERO_ON_ERROR,
     224             :                                       false,
     225             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     226             :     {
     227        5696 :         Page        vmpage = BufferGetPage(vmbuffer);
     228             : 
     229             :         /* initialize the page if it was read as zeros */
     230        5696 :         if (PageIsNew(vmpage))
     231           0 :             PageInit(vmpage, BLCKSZ, 0);
     232             : 
     233        5696 :         visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
     234             : 
     235             :         Assert(BufferIsDirty(vmbuffer));
     236        5696 :         PageSetLSN(vmpage, lsn);
     237             :     }
     238             : 
     239       27968 :     if (BufferIsValid(vmbuffer))
     240        5856 :         UnlockReleaseBuffer(vmbuffer);
     241             : 
     242       27968 :     if (freespace > 0)
     243       24088 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     244       27968 : }
     245             : 
     246             : /*
     247             :  * Replay XLOG_HEAP2_VISIBLE records.
     248             :  *
     249             :  * The critical integrity requirement here is that we must never end up with
     250             :  * a situation where the visibility map bit is set, and the page-level
     251             :  * PD_ALL_VISIBLE bit is clear.  If that were to occur, then a subsequent
     252             :  * page modification would fail to clear the visibility map bit.
     253             :  */
     254             : static void
     255        9674 : heap_xlog_visible(XLogReaderState *record)
     256             : {
     257        9674 :     XLogRecPtr  lsn = record->EndRecPtr;
     258        9674 :     xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
     259        9674 :     Buffer      vmbuffer = InvalidBuffer;
     260             :     Buffer      buffer;
     261             :     Page        page;
     262             :     RelFileLocator rlocator;
     263             :     BlockNumber blkno;
     264             :     XLogRedoAction action;
     265             : 
     266             :     Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
     267             : 
     268        9674 :     XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
     269             : 
     270             :     /*
     271             :      * If there are any Hot Standby transactions running that have an xmin
     272             :      * horizon old enough that this page isn't all-visible for them, they
     273             :      * might incorrectly decide that an index-only scan can skip a heap fetch.
     274             :      *
     275             :      * NB: It might be better to throw some kind of "soft" conflict here that
     276             :      * forces any index-only scan that is in flight to perform heap fetches,
     277             :      * rather than killing the transaction outright.
     278             :      */
     279        9674 :     if (InHotStandby)
     280        9326 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     281        9326 :                                             xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
     282             :                                             rlocator);
     283             : 
     284             :     /*
     285             :      * Read the heap page, if it still exists. If the heap file has dropped or
     286             :      * truncated later in recovery, we don't need to update the page, but we'd
     287             :      * better still update the visibility map.
     288             :      */
     289        9674 :     action = XLogReadBufferForRedo(record, 1, &buffer);
     290        9674 :     if (action == BLK_NEEDS_REDO)
     291             :     {
     292             :         /*
     293             :          * We don't bump the LSN of the heap page when setting the visibility
     294             :          * map bit (unless checksums or wal_hint_bits is enabled, in which
     295             :          * case we must). This exposes us to torn page hazards, but since
     296             :          * we're not inspecting the existing page contents in any way, we
     297             :          * don't care.
     298             :          */
     299        6148 :         page = BufferGetPage(buffer);
     300             : 
     301        6148 :         PageSetAllVisible(page);
     302             : 
     303        6148 :         if (XLogHintBitIsNeeded())
     304        6148 :             PageSetLSN(page, lsn);
     305             : 
     306        6148 :         MarkBufferDirty(buffer);
     307             :     }
     308             :     else if (action == BLK_RESTORED)
     309             :     {
     310             :         /*
     311             :          * If heap block was backed up, we already restored it and there's
     312             :          * nothing more to do. (This can only happen with checksums or
     313             :          * wal_log_hints enabled.)
     314             :          */
     315             :     }
     316             : 
     317        9674 :     if (BufferIsValid(buffer))
     318             :     {
     319        9674 :         Size        space = PageGetFreeSpace(BufferGetPage(buffer));
     320             : 
     321        9674 :         UnlockReleaseBuffer(buffer);
     322             : 
     323             :         /*
     324             :          * Since FSM is not WAL-logged and only updated heuristically, it
     325             :          * easily becomes stale in standbys.  If the standby is later promoted
     326             :          * and runs VACUUM, it will skip updating individual free space
     327             :          * figures for pages that became all-visible (or all-frozen, depending
     328             :          * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
     329             :          * propagates too optimistic free space values to upper FSM layers;
     330             :          * later inserters try to use such pages only to find out that they
     331             :          * are unusable.  This can cause long stalls when there are many such
     332             :          * pages.
     333             :          *
     334             :          * Forestall those problems by updating FSM's idea about a page that
     335             :          * is becoming all-visible or all-frozen.
     336             :          *
     337             :          * Do this regardless of a full-page image being applied, since the
     338             :          * FSM data is not in the page anyway.
     339             :          */
     340        9674 :         if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
     341        9674 :             XLogRecordPageWithFreeSpace(rlocator, blkno, space);
     342             :     }
     343             : 
     344             :     /*
     345             :      * Even if we skipped the heap page update due to the LSN interlock, it's
     346             :      * still safe to update the visibility map.  Any WAL record that clears
     347             :      * the visibility map bit does so before checking the page LSN, so any
     348             :      * bits that need to be cleared will still be cleared.
     349             :      */
     350        9674 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
     351             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     352             :     {
     353        9256 :         Page        vmpage = BufferGetPage(vmbuffer);
     354             :         Relation    reln;
     355             :         uint8       vmbits;
     356             : 
     357             :         /* initialize the page if it was read as zeros */
     358        9256 :         if (PageIsNew(vmpage))
     359           0 :             PageInit(vmpage, BLCKSZ, 0);
     360             : 
     361             :         /* remove VISIBILITYMAP_XLOG_* */
     362        9256 :         vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
     363             : 
     364             :         /*
     365             :          * XLogReadBufferForRedoExtended locked the buffer. But
     366             :          * visibilitymap_set will handle locking itself.
     367             :          */
     368        9256 :         LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
     369             : 
     370        9256 :         reln = CreateFakeRelcacheEntry(rlocator);
     371             : 
     372        9256 :         visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
     373             :                           xlrec->snapshotConflictHorizon, vmbits);
     374             : 
     375        9256 :         ReleaseBuffer(vmbuffer);
     376        9256 :         FreeFakeRelcacheEntry(reln);
     377             :     }
     378         418 :     else if (BufferIsValid(vmbuffer))
     379         418 :         UnlockReleaseBuffer(vmbuffer);
     380        9674 : }
     381             : 
     382             : /*
     383             :  * Given an "infobits" field from an XLog record, set the correct bits in the
     384             :  * given infomask and infomask2 for the tuple touched by the record.
     385             :  *
     386             :  * (This is the reverse of compute_infobits).
     387             :  */
     388             : static void
     389      894764 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
     390             : {
     391      894764 :     *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
     392             :                    HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
     393      894764 :     *infomask2 &= ~HEAP_KEYS_UPDATED;
     394             : 
     395      894764 :     if (infobits & XLHL_XMAX_IS_MULTI)
     396           4 :         *infomask |= HEAP_XMAX_IS_MULTI;
     397      894764 :     if (infobits & XLHL_XMAX_LOCK_ONLY)
     398      110774 :         *infomask |= HEAP_XMAX_LOCK_ONLY;
     399      894764 :     if (infobits & XLHL_XMAX_EXCL_LOCK)
     400      109970 :         *infomask |= HEAP_XMAX_EXCL_LOCK;
     401             :     /* note HEAP_XMAX_SHR_LOCK isn't considered here */
     402      894764 :     if (infobits & XLHL_XMAX_KEYSHR_LOCK)
     403         828 :         *infomask |= HEAP_XMAX_KEYSHR_LOCK;
     404             : 
     405      894764 :     if (infobits & XLHL_KEYS_UPDATED)
     406      599590 :         *infomask2 |= HEAP_KEYS_UPDATED;
     407      894764 : }
     408             : 
     409             : /*
     410             :  * Replay XLOG_HEAP_DELETE records.
     411             :  */
     412             : static void
     413      601046 : heap_xlog_delete(XLogReaderState *record)
     414             : {
     415      601046 :     XLogRecPtr  lsn = record->EndRecPtr;
     416      601046 :     xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
     417             :     Buffer      buffer;
     418             :     Page        page;
     419      601046 :     ItemId      lp = NULL;
     420             :     HeapTupleHeader htup;
     421             :     BlockNumber blkno;
     422             :     RelFileLocator target_locator;
     423             :     ItemPointerData target_tid;
     424             : 
     425      601046 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     426      601046 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     427      601046 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     428             : 
     429             :     /*
     430             :      * The visibility map may need to be fixed even if the heap page is
     431             :      * already up-to-date.
     432             :      */
     433      601046 :     if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     434             :     {
     435          70 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     436          70 :         Buffer      vmbuffer = InvalidBuffer;
     437             : 
     438          70 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     439          70 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     440          70 :         ReleaseBuffer(vmbuffer);
     441          70 :         FreeFakeRelcacheEntry(reln);
     442             :     }
     443             : 
     444      601046 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     445             :     {
     446      596874 :         page = BufferGetPage(buffer);
     447             : 
     448      596874 :         if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
     449      596874 :             lp = PageGetItemId(page, xlrec->offnum);
     450             : 
     451      596874 :         if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
     452           0 :             elog(PANIC, "invalid lp");
     453             : 
     454      596874 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     455             : 
     456      596874 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     457      596874 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     458      596874 :         HeapTupleHeaderClearHotUpdated(htup);
     459      596874 :         fix_infomask_from_infobits(xlrec->infobits_set,
     460             :                                    &htup->t_infomask, &htup->t_infomask2);
     461      596874 :         if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
     462      596874 :             HeapTupleHeaderSetXmax(htup, xlrec->xmax);
     463             :         else
     464           0 :             HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
     465      596874 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     466             : 
     467             :         /* Mark the page as a candidate for pruning */
     468      596874 :         PageSetPrunable(page, XLogRecGetXid(record));
     469             : 
     470      596874 :         if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     471          22 :             PageClearAllVisible(page);
     472             : 
     473             :         /* Make sure t_ctid is set correctly */
     474      596874 :         if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
     475         288 :             HeapTupleHeaderSetMovedPartitions(htup);
     476             :         else
     477      596586 :             htup->t_ctid = target_tid;
     478      596874 :         PageSetLSN(page, lsn);
     479      596874 :         MarkBufferDirty(buffer);
     480             :     }
     481      601046 :     if (BufferIsValid(buffer))
     482      601046 :         UnlockReleaseBuffer(buffer);
     483      601046 : }
     484             : 
     485             : /*
     486             :  * Replay XLOG_HEAP_INSERT records.
     487             :  */
     488             : static void
     489     2582212 : heap_xlog_insert(XLogReaderState *record)
     490             : {
     491     2582212 :     XLogRecPtr  lsn = record->EndRecPtr;
     492     2582212 :     xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
     493             :     Buffer      buffer;
     494             :     Page        page;
     495             :     union
     496             :     {
     497             :         HeapTupleHeaderData hdr;
     498             :         char        data[MaxHeapTupleSize];
     499             :     }           tbuf;
     500             :     HeapTupleHeader htup;
     501             :     xl_heap_header xlhdr;
     502             :     uint32      newlen;
     503     2582212 :     Size        freespace = 0;
     504             :     RelFileLocator target_locator;
     505             :     BlockNumber blkno;
     506             :     ItemPointerData target_tid;
     507             :     XLogRedoAction action;
     508             : 
     509     2582212 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     510     2582212 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     511     2582212 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     512             : 
     513             :     /* No freezing in the heap_insert() code path */
     514             :     Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
     515             : 
     516             :     /*
     517             :      * The visibility map may need to be fixed even if the heap page is
     518             :      * already up-to-date.
     519             :      */
     520     2582212 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     521             :     {
     522        2044 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     523        2044 :         Buffer      vmbuffer = InvalidBuffer;
     524             : 
     525        2044 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     526        2044 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     527        2044 :         ReleaseBuffer(vmbuffer);
     528        2044 :         FreeFakeRelcacheEntry(reln);
     529             :     }
     530             : 
     531             :     /*
     532             :      * If we inserted the first and only tuple on the page, re-initialize the
     533             :      * page from scratch.
     534             :      */
     535     2582212 :     if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     536             :     {
     537       34298 :         buffer = XLogInitBufferForRedo(record, 0);
     538       34298 :         page = BufferGetPage(buffer);
     539       34298 :         PageInit(page, BufferGetPageSize(buffer), 0);
     540       34298 :         action = BLK_NEEDS_REDO;
     541             :     }
     542             :     else
     543     2547914 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     544     2582212 :     if (action == BLK_NEEDS_REDO)
     545             :     {
     546             :         Size        datalen;
     547             :         char       *data;
     548             : 
     549     2576504 :         page = BufferGetPage(buffer);
     550             : 
     551     2576504 :         if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
     552           0 :             elog(PANIC, "invalid max offset number");
     553             : 
     554     2576504 :         data = XLogRecGetBlockData(record, 0, &datalen);
     555             : 
     556     2576504 :         newlen = datalen - SizeOfHeapHeader;
     557             :         Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
     558     2576504 :         memcpy(&xlhdr, data, SizeOfHeapHeader);
     559     2576504 :         data += SizeOfHeapHeader;
     560             : 
     561     2576504 :         htup = &tbuf.hdr;
     562     2576504 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     563             :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     564     2576504 :         memcpy((char *) htup + SizeofHeapTupleHeader,
     565             :                data,
     566             :                newlen);
     567     2576504 :         newlen += SizeofHeapTupleHeader;
     568     2576504 :         htup->t_infomask2 = xlhdr.t_infomask2;
     569     2576504 :         htup->t_infomask = xlhdr.t_infomask;
     570     2576504 :         htup->t_hoff = xlhdr.t_hoff;
     571     2576504 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     572     2576504 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
     573     2576504 :         htup->t_ctid = target_tid;
     574             : 
     575     2576504 :         if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
     576           0 :             elog(PANIC, "failed to add tuple");
     577             : 
     578     2576504 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     579             : 
     580     2576504 :         PageSetLSN(page, lsn);
     581             : 
     582     2576504 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     583         620 :             PageClearAllVisible(page);
     584             : 
     585     2576504 :         MarkBufferDirty(buffer);
     586             :     }
     587     2582212 :     if (BufferIsValid(buffer))
     588     2582212 :         UnlockReleaseBuffer(buffer);
     589             : 
     590             :     /*
     591             :      * If the page is running low on free space, update the FSM as well.
     592             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     593             :      * better than that without knowing the fill-factor for the table.
     594             :      *
     595             :      * XXX: Don't do this if the page was restored from full page image. We
     596             :      * don't bother to update the FSM in that case, it doesn't need to be
     597             :      * totally accurate anyway.
     598             :      */
     599     2582212 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     600      507614 :         XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
     601     2582212 : }
     602             : 
     603             : /*
     604             :  * Replay XLOG_HEAP2_MULTI_INSERT records.
     605             :  */
     606             : static void
     607      115778 : heap_xlog_multi_insert(XLogReaderState *record)
     608             : {
     609      115778 :     XLogRecPtr  lsn = record->EndRecPtr;
     610             :     xl_heap_multi_insert *xlrec;
     611             :     RelFileLocator rlocator;
     612             :     BlockNumber blkno;
     613             :     Buffer      buffer;
     614             :     Page        page;
     615             :     union
     616             :     {
     617             :         HeapTupleHeaderData hdr;
     618             :         char        data[MaxHeapTupleSize];
     619             :     }           tbuf;
     620             :     HeapTupleHeader htup;
     621             :     uint32      newlen;
     622      115778 :     Size        freespace = 0;
     623             :     int         i;
     624      115778 :     bool        isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
     625             :     XLogRedoAction action;
     626      115778 :     Buffer      vmbuffer = InvalidBuffer;
     627             : 
     628             :     /*
     629             :      * Insertion doesn't overwrite MVCC data, so no conflict processing is
     630             :      * required.
     631             :      */
     632      115778 :     xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
     633             : 
     634      115778 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
     635             : 
     636             :     /* check that the mutually exclusive flags are not both set */
     637             :     Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
     638             :              (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
     639             : 
     640             :     /*
     641             :      * The visibility map may need to be fixed even if the heap page is
     642             :      * already up-to-date.
     643             :      */
     644      115778 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     645             :     {
     646        2052 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     647             : 
     648        2052 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     649        2052 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     650        2052 :         ReleaseBuffer(vmbuffer);
     651        2052 :         vmbuffer = InvalidBuffer;
     652        2052 :         FreeFakeRelcacheEntry(reln);
     653             :     }
     654             : 
     655      115778 :     if (isinit)
     656             :     {
     657        3454 :         buffer = XLogInitBufferForRedo(record, 0);
     658        3454 :         page = BufferGetPage(buffer);
     659        3454 :         PageInit(page, BufferGetPageSize(buffer), 0);
     660        3454 :         action = BLK_NEEDS_REDO;
     661             :     }
     662             :     else
     663      112324 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     664      115778 :     if (action == BLK_NEEDS_REDO)
     665             :     {
     666             :         char       *tupdata;
     667             :         char       *endptr;
     668             :         Size        len;
     669             : 
     670             :         /* Tuples are stored as block data */
     671      112422 :         tupdata = XLogRecGetBlockData(record, 0, &len);
     672      112422 :         endptr = tupdata + len;
     673             : 
     674      112422 :         page = BufferGetPage(buffer);
     675             : 
     676      525286 :         for (i = 0; i < xlrec->ntuples; i++)
     677             :         {
     678             :             OffsetNumber offnum;
     679             :             xl_multi_insert_tuple *xlhdr;
     680             : 
     681             :             /*
     682             :              * If we're reinitializing the page, the tuples are stored in
     683             :              * order from FirstOffsetNumber. Otherwise there's an array of
     684             :              * offsets in the WAL record, and the tuples come after that.
     685             :              */
     686      412864 :             if (isinit)
     687      198380 :                 offnum = FirstOffsetNumber + i;
     688             :             else
     689      214484 :                 offnum = xlrec->offsets[i];
     690      412864 :             if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     691           0 :                 elog(PANIC, "invalid max offset number");
     692             : 
     693      412864 :             xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
     694      412864 :             tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
     695             : 
     696      412864 :             newlen = xlhdr->datalen;
     697             :             Assert(newlen <= MaxHeapTupleSize);
     698      412864 :             htup = &tbuf.hdr;
     699      412864 :             MemSet(htup, 0, SizeofHeapTupleHeader);
     700             :             /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     701      412864 :             memcpy((char *) htup + SizeofHeapTupleHeader,
     702             :                    tupdata,
     703             :                    newlen);
     704      412864 :             tupdata += newlen;
     705             : 
     706      412864 :             newlen += SizeofHeapTupleHeader;
     707      412864 :             htup->t_infomask2 = xlhdr->t_infomask2;
     708      412864 :             htup->t_infomask = xlhdr->t_infomask;
     709      412864 :             htup->t_hoff = xlhdr->t_hoff;
     710      412864 :             HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     711      412864 :             HeapTupleHeaderSetCmin(htup, FirstCommandId);
     712      412864 :             ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
     713      412864 :             ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
     714             : 
     715      412864 :             offnum = PageAddItem(page, htup, newlen, offnum, true, true);
     716      412864 :             if (offnum == InvalidOffsetNumber)
     717           0 :                 elog(PANIC, "failed to add tuple");
     718             :         }
     719      112422 :         if (tupdata != endptr)
     720           0 :             elog(PANIC, "total tuple length mismatch");
     721             : 
     722      112422 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     723             : 
     724      112422 :         PageSetLSN(page, lsn);
     725             : 
     726      112422 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     727         184 :             PageClearAllVisible(page);
     728             : 
     729             :         /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
     730      112422 :         if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
     731           8 :             PageSetAllVisible(page);
     732             : 
     733      112422 :         MarkBufferDirty(buffer);
     734             :     }
     735      115778 :     if (BufferIsValid(buffer))
     736      115778 :         UnlockReleaseBuffer(buffer);
     737             : 
     738      115778 :     buffer = InvalidBuffer;
     739             : 
     740             :     /*
     741             :      * Read and update the visibility map (VM) block.
     742             :      *
     743             :      * We must always redo VM changes, even if the corresponding heap page
     744             :      * update was skipped due to the LSN interlock. Each VM block covers
     745             :      * multiple heap pages, so later WAL records may update other bits in the
     746             :      * same block. If this record includes an FPI (full-page image),
     747             :      * subsequent WAL records may depend on it to guard against torn pages.
     748             :      *
     749             :      * Heap page changes are replayed first to preserve the invariant:
     750             :      * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
     751             :      *
     752             :      * Note that we released the heap page lock above. During normal
     753             :      * operation, this would be unsafe — a concurrent modification could
     754             :      * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
     755             :      * invariant.
     756             :      *
     757             :      * During recovery, however, no concurrent writers exist. Therefore,
     758             :      * updating the VM without holding the heap page lock is safe enough. This
     759             :      * same approach is taken when replaying xl_heap_visible records (see
     760             :      * heap_xlog_visible()).
     761             :      */
     762      115786 :     if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
     763           8 :         XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
     764             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     765             :     {
     766           0 :         Page        vmpage = BufferGetPage(vmbuffer);
     767             : 
     768             :         /* initialize the page if it was read as zeros */
     769           0 :         if (PageIsNew(vmpage))
     770           0 :             PageInit(vmpage, BLCKSZ, 0);
     771             : 
     772           0 :         visibilitymap_set_vmbits(blkno,
     773             :                                  vmbuffer,
     774             :                                  VISIBILITYMAP_ALL_VISIBLE |
     775             :                                  VISIBILITYMAP_ALL_FROZEN,
     776             :                                  rlocator);
     777             : 
     778             :         Assert(BufferIsDirty(vmbuffer));
     779           0 :         PageSetLSN(vmpage, lsn);
     780             :     }
     781             : 
     782      115778 :     if (BufferIsValid(vmbuffer))
     783           8 :         UnlockReleaseBuffer(vmbuffer);
     784             : 
     785             :     /*
     786             :      * If the page is running low on free space, update the FSM as well.
     787             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     788             :      * better than that without knowing the fill-factor for the table.
     789             :      *
     790             :      * XXX: Don't do this if the page was restored from full page image. We
     791             :      * don't bother to update the FSM in that case, it doesn't need to be
     792             :      * totally accurate anyway.
     793             :      */
     794      115778 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     795       34034 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     796      115778 : }
     797             : 
     798             : /*
     799             :  * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
     800             :  */
     801             : static void
     802      187732 : heap_xlog_update(XLogReaderState *record, bool hot_update)
     803             : {
     804      187732 :     XLogRecPtr  lsn = record->EndRecPtr;
     805      187732 :     xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
     806             :     RelFileLocator rlocator;
     807             :     BlockNumber oldblk;
     808             :     BlockNumber newblk;
     809             :     ItemPointerData newtid;
     810             :     Buffer      obuffer,
     811             :                 nbuffer;
     812             :     Page        page;
     813             :     OffsetNumber offnum;
     814      187732 :     ItemId      lp = NULL;
     815             :     HeapTupleData oldtup;
     816             :     HeapTupleHeader htup;
     817      187732 :     uint16      prefixlen = 0,
     818      187732 :                 suffixlen = 0;
     819             :     char       *newp;
     820             :     union
     821             :     {
     822             :         HeapTupleHeaderData hdr;
     823             :         char        data[MaxHeapTupleSize];
     824             :     }           tbuf;
     825             :     xl_heap_header xlhdr;
     826             :     uint32      newlen;
     827      187732 :     Size        freespace = 0;
     828             :     XLogRedoAction oldaction;
     829             :     XLogRedoAction newaction;
     830             : 
     831             :     /* initialize to keep the compiler quiet */
     832      187732 :     oldtup.t_data = NULL;
     833      187732 :     oldtup.t_len = 0;
     834             : 
     835      187732 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
     836      187732 :     if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
     837             :     {
     838             :         /* HOT updates are never done across pages */
     839             :         Assert(!hot_update);
     840             :     }
     841             :     else
     842       78304 :         oldblk = newblk;
     843             : 
     844      187732 :     ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
     845             : 
     846             :     /*
     847             :      * The visibility map may need to be fixed even if the heap page is
     848             :      * already up-to-date.
     849             :      */
     850      187732 :     if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     851             :     {
     852         486 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     853         486 :         Buffer      vmbuffer = InvalidBuffer;
     854             : 
     855         486 :         visibilitymap_pin(reln, oldblk, &vmbuffer);
     856         486 :         visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     857         486 :         ReleaseBuffer(vmbuffer);
     858         486 :         FreeFakeRelcacheEntry(reln);
     859             :     }
     860             : 
     861             :     /*
     862             :      * In normal operation, it is important to lock the two pages in
     863             :      * page-number order, to avoid possible deadlocks against other update
     864             :      * operations going the other way.  However, during WAL replay there can
     865             :      * be no other update happening, so we don't need to worry about that. But
     866             :      * we *do* need to worry that we don't expose an inconsistent state to Hot
     867             :      * Standby queries --- so the original page can't be unlocked before we've
     868             :      * added the new tuple to the new page.
     869             :      */
     870             : 
     871             :     /* Deal with old tuple version */
     872      187732 :     oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
     873             :                                       &obuffer);
     874      187732 :     if (oldaction == BLK_NEEDS_REDO)
     875             :     {
     876      187116 :         page = BufferGetPage(obuffer);
     877      187116 :         offnum = xlrec->old_offnum;
     878      187116 :         if (PageGetMaxOffsetNumber(page) >= offnum)
     879      187116 :             lp = PageGetItemId(page, offnum);
     880             : 
     881      187116 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
     882           0 :             elog(PANIC, "invalid lp");
     883             : 
     884      187116 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     885             : 
     886      187116 :         oldtup.t_data = htup;
     887      187116 :         oldtup.t_len = ItemIdGetLength(lp);
     888             : 
     889      187116 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     890      187116 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     891      187116 :         if (hot_update)
     892       72152 :             HeapTupleHeaderSetHotUpdated(htup);
     893             :         else
     894      114964 :             HeapTupleHeaderClearHotUpdated(htup);
     895      187116 :         fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
     896             :                                    &htup->t_infomask2);
     897      187116 :         HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
     898      187116 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     899             :         /* Set forward chain link in t_ctid */
     900      187116 :         htup->t_ctid = newtid;
     901             : 
     902             :         /* Mark the page as a candidate for pruning */
     903      187116 :         PageSetPrunable(page, XLogRecGetXid(record));
     904             : 
     905      187116 :         if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     906         450 :             PageClearAllVisible(page);
     907             : 
     908      187116 :         PageSetLSN(page, lsn);
     909      187116 :         MarkBufferDirty(obuffer);
     910             :     }
     911             : 
     912             :     /*
     913             :      * Read the page the new tuple goes into, if different from old.
     914             :      */
     915      187732 :     if (oldblk == newblk)
     916             :     {
     917       78304 :         nbuffer = obuffer;
     918       78304 :         newaction = oldaction;
     919             :     }
     920      109428 :     else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     921             :     {
     922        1160 :         nbuffer = XLogInitBufferForRedo(record, 0);
     923        1160 :         page = BufferGetPage(nbuffer);
     924        1160 :         PageInit(page, BufferGetPageSize(nbuffer), 0);
     925        1160 :         newaction = BLK_NEEDS_REDO;
     926             :     }
     927             :     else
     928      108268 :         newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
     929             : 
     930             :     /*
     931             :      * The visibility map may need to be fixed even if the heap page is
     932             :      * already up-to-date.
     933             :      */
     934      187732 :     if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
     935             :     {
     936         398 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     937         398 :         Buffer      vmbuffer = InvalidBuffer;
     938             : 
     939         398 :         visibilitymap_pin(reln, newblk, &vmbuffer);
     940         398 :         visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     941         398 :         ReleaseBuffer(vmbuffer);
     942         398 :         FreeFakeRelcacheEntry(reln);
     943             :     }
     944             : 
     945             :     /* Deal with new tuple */
     946      187732 :     if (newaction == BLK_NEEDS_REDO)
     947             :     {
     948             :         char       *recdata;
     949             :         char       *recdata_end;
     950             :         Size        datalen;
     951             :         Size        tuplen;
     952             : 
     953      186600 :         recdata = XLogRecGetBlockData(record, 0, &datalen);
     954      186600 :         recdata_end = recdata + datalen;
     955             : 
     956      186600 :         page = BufferGetPage(nbuffer);
     957             : 
     958      186600 :         offnum = xlrec->new_offnum;
     959      186600 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     960           0 :             elog(PANIC, "invalid max offset number");
     961             : 
     962      186600 :         if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
     963             :         {
     964             :             Assert(newblk == oldblk);
     965       30536 :             memcpy(&prefixlen, recdata, sizeof(uint16));
     966       30536 :             recdata += sizeof(uint16);
     967             :         }
     968      186600 :         if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
     969             :         {
     970             :             Assert(newblk == oldblk);
     971       67212 :             memcpy(&suffixlen, recdata, sizeof(uint16));
     972       67212 :             recdata += sizeof(uint16);
     973             :         }
     974             : 
     975      186600 :         memcpy(&xlhdr, recdata, SizeOfHeapHeader);
     976      186600 :         recdata += SizeOfHeapHeader;
     977             : 
     978      186600 :         tuplen = recdata_end - recdata;
     979             :         Assert(tuplen <= MaxHeapTupleSize);
     980             : 
     981      186600 :         htup = &tbuf.hdr;
     982      186600 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     983             : 
     984             :         /*
     985             :          * Reconstruct the new tuple using the prefix and/or suffix from the
     986             :          * old tuple, and the data stored in the WAL record.
     987             :          */
     988      186600 :         newp = (char *) htup + SizeofHeapTupleHeader;
     989      186600 :         if (prefixlen > 0)
     990             :         {
     991             :             int         len;
     992             : 
     993             :             /* copy bitmap [+ padding] [+ oid] from WAL record */
     994       30536 :             len = xlhdr.t_hoff - SizeofHeapTupleHeader;
     995       30536 :             memcpy(newp, recdata, len);
     996       30536 :             recdata += len;
     997       30536 :             newp += len;
     998             : 
     999             :             /* copy prefix from old tuple */
    1000       30536 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
    1001       30536 :             newp += prefixlen;
    1002             : 
    1003             :             /* copy new tuple data from WAL record */
    1004       30536 :             len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
    1005       30536 :             memcpy(newp, recdata, len);
    1006       30536 :             recdata += len;
    1007       30536 :             newp += len;
    1008             :         }
    1009             :         else
    1010             :         {
    1011             :             /*
    1012             :              * copy bitmap [+ padding] [+ oid] + data from record, all in one
    1013             :              * go
    1014             :              */
    1015      156064 :             memcpy(newp, recdata, tuplen);
    1016      156064 :             recdata += tuplen;
    1017      156064 :             newp += tuplen;
    1018             :         }
    1019             :         Assert(recdata == recdata_end);
    1020             : 
    1021             :         /* copy suffix from old tuple */
    1022      186600 :         if (suffixlen > 0)
    1023       67212 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
    1024             : 
    1025      186600 :         newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
    1026      186600 :         htup->t_infomask2 = xlhdr.t_infomask2;
    1027      186600 :         htup->t_infomask = xlhdr.t_infomask;
    1028      186600 :         htup->t_hoff = xlhdr.t_hoff;
    1029             : 
    1030      186600 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
    1031      186600 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
    1032      186600 :         HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
    1033             :         /* Make sure there is no forward chain link in t_ctid */
    1034      186600 :         htup->t_ctid = newtid;
    1035             : 
    1036      186600 :         offnum = PageAddItem(page, htup, newlen, offnum, true, true);
    1037      186600 :         if (offnum == InvalidOffsetNumber)
    1038           0 :             elog(PANIC, "failed to add tuple");
    1039             : 
    1040      186600 :         if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
    1041         162 :             PageClearAllVisible(page);
    1042             : 
    1043      186600 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
    1044             : 
    1045      186600 :         PageSetLSN(page, lsn);
    1046      186600 :         MarkBufferDirty(nbuffer);
    1047             :     }
    1048             : 
    1049      187732 :     if (BufferIsValid(nbuffer) && nbuffer != obuffer)
    1050      109428 :         UnlockReleaseBuffer(nbuffer);
    1051      187732 :     if (BufferIsValid(obuffer))
    1052      187732 :         UnlockReleaseBuffer(obuffer);
    1053             : 
    1054             :     /*
    1055             :      * If the new page is running low on free space, update the FSM as well.
    1056             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
    1057             :      * better than that without knowing the fill-factor for the table.
    1058             :      *
    1059             :      * However, don't update the FSM on HOT updates, because after crash
    1060             :      * recovery, either the old or the new tuple will certainly be dead and
    1061             :      * prunable. After pruning, the page will have roughly as much free space
    1062             :      * as it did before the update, assuming the new tuple is about the same
    1063             :      * size as the old one.
    1064             :      *
    1065             :      * XXX: Don't do this if the page was restored from full page image. We
    1066             :      * don't bother to update the FSM in that case, it doesn't need to be
    1067             :      * totally accurate anyway.
    1068             :      */
    1069      187732 :     if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
    1070       23574 :         XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
    1071      187732 : }
    1072             : 
    1073             : /*
    1074             :  * Replay XLOG_HEAP_CONFIRM records.
    1075             :  */
    1076             : static void
    1077         164 : heap_xlog_confirm(XLogReaderState *record)
    1078             : {
    1079         164 :     XLogRecPtr  lsn = record->EndRecPtr;
    1080         164 :     xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
    1081             :     Buffer      buffer;
    1082             :     Page        page;
    1083             :     OffsetNumber offnum;
    1084         164 :     ItemId      lp = NULL;
    1085             :     HeapTupleHeader htup;
    1086             : 
    1087         164 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1088             :     {
    1089         164 :         page = BufferGetPage(buffer);
    1090             : 
    1091         164 :         offnum = xlrec->offnum;
    1092         164 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1093         164 :             lp = PageGetItemId(page, offnum);
    1094             : 
    1095         164 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1096           0 :             elog(PANIC, "invalid lp");
    1097             : 
    1098         164 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1099             : 
    1100             :         /*
    1101             :          * Confirm tuple as actually inserted
    1102             :          */
    1103         164 :         ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
    1104             : 
    1105         164 :         PageSetLSN(page, lsn);
    1106         164 :         MarkBufferDirty(buffer);
    1107             :     }
    1108         164 :     if (BufferIsValid(buffer))
    1109         164 :         UnlockReleaseBuffer(buffer);
    1110         164 : }
    1111             : 
    1112             : /*
    1113             :  * Replay XLOG_HEAP_LOCK records.
    1114             :  */
    1115             : static void
    1116      111256 : heap_xlog_lock(XLogReaderState *record)
    1117             : {
    1118      111256 :     XLogRecPtr  lsn = record->EndRecPtr;
    1119      111256 :     xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
    1120             :     Buffer      buffer;
    1121             :     Page        page;
    1122             :     OffsetNumber offnum;
    1123      111256 :     ItemId      lp = NULL;
    1124             :     HeapTupleHeader htup;
    1125             : 
    1126             :     /*
    1127             :      * The visibility map may need to be fixed even if the heap page is
    1128             :      * already up-to-date.
    1129             :      */
    1130      111256 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1131             :     {
    1132             :         RelFileLocator rlocator;
    1133         122 :         Buffer      vmbuffer = InvalidBuffer;
    1134             :         BlockNumber block;
    1135             :         Relation    reln;
    1136             : 
    1137         122 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1138         122 :         reln = CreateFakeRelcacheEntry(rlocator);
    1139             : 
    1140         122 :         visibilitymap_pin(reln, block, &vmbuffer);
    1141         122 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1142             : 
    1143         122 :         ReleaseBuffer(vmbuffer);
    1144         122 :         FreeFakeRelcacheEntry(reln);
    1145             :     }
    1146             : 
    1147      111256 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1148             :     {
    1149      110774 :         page = BufferGetPage(buffer);
    1150             : 
    1151      110774 :         offnum = xlrec->offnum;
    1152      110774 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1153      110774 :             lp = PageGetItemId(page, offnum);
    1154             : 
    1155      110774 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1156           0 :             elog(PANIC, "invalid lp");
    1157             : 
    1158      110774 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1159             : 
    1160      110774 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1161      110774 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1162      110774 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1163             :                                    &htup->t_infomask2);
    1164             : 
    1165             :         /*
    1166             :          * Clear relevant update flags, but only if the modified infomask says
    1167             :          * there's no update.
    1168             :          */
    1169      110774 :         if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
    1170             :         {
    1171      110774 :             HeapTupleHeaderClearHotUpdated(htup);
    1172             :             /* Make sure there is no forward chain link in t_ctid */
    1173      110774 :             ItemPointerSet(&htup->t_ctid,
    1174             :                            BufferGetBlockNumber(buffer),
    1175             :                            offnum);
    1176             :         }
    1177      110774 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1178      110774 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    1179      110774 :         PageSetLSN(page, lsn);
    1180      110774 :         MarkBufferDirty(buffer);
    1181             :     }
    1182      111256 :     if (BufferIsValid(buffer))
    1183      111256 :         UnlockReleaseBuffer(buffer);
    1184      111256 : }
    1185             : 
    1186             : /*
    1187             :  * Replay XLOG_HEAP2_LOCK_UPDATED records.
    1188             :  */
    1189             : static void
    1190           0 : heap_xlog_lock_updated(XLogReaderState *record)
    1191             : {
    1192           0 :     XLogRecPtr  lsn = record->EndRecPtr;
    1193             :     xl_heap_lock_updated *xlrec;
    1194             :     Buffer      buffer;
    1195             :     Page        page;
    1196             :     OffsetNumber offnum;
    1197           0 :     ItemId      lp = NULL;
    1198             :     HeapTupleHeader htup;
    1199             : 
    1200           0 :     xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
    1201             : 
    1202             :     /*
    1203             :      * The visibility map may need to be fixed even if the heap page is
    1204             :      * already up-to-date.
    1205             :      */
    1206           0 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1207             :     {
    1208             :         RelFileLocator rlocator;
    1209           0 :         Buffer      vmbuffer = InvalidBuffer;
    1210             :         BlockNumber block;
    1211             :         Relation    reln;
    1212             : 
    1213           0 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1214           0 :         reln = CreateFakeRelcacheEntry(rlocator);
    1215             : 
    1216           0 :         visibilitymap_pin(reln, block, &vmbuffer);
    1217           0 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1218             : 
    1219           0 :         ReleaseBuffer(vmbuffer);
    1220           0 :         FreeFakeRelcacheEntry(reln);
    1221             :     }
    1222             : 
    1223           0 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1224             :     {
    1225           0 :         page = BufferGetPage(buffer);
    1226             : 
    1227           0 :         offnum = xlrec->offnum;
    1228           0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1229           0 :             lp = PageGetItemId(page, offnum);
    1230             : 
    1231           0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1232           0 :             elog(PANIC, "invalid lp");
    1233             : 
    1234           0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1235             : 
    1236           0 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1237           0 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1238           0 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1239             :                                    &htup->t_infomask2);
    1240           0 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1241             : 
    1242           0 :         PageSetLSN(page, lsn);
    1243           0 :         MarkBufferDirty(buffer);
    1244             :     }
    1245           0 :     if (BufferIsValid(buffer))
    1246           0 :         UnlockReleaseBuffer(buffer);
    1247           0 : }
    1248             : 
    1249             : /*
    1250             :  * Replay XLOG_HEAP_INPLACE records.
    1251             :  */
    1252             : static void
    1253       15444 : heap_xlog_inplace(XLogReaderState *record)
    1254             : {
    1255       15444 :     XLogRecPtr  lsn = record->EndRecPtr;
    1256       15444 :     xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
    1257             :     Buffer      buffer;
    1258             :     Page        page;
    1259             :     OffsetNumber offnum;
    1260       15444 :     ItemId      lp = NULL;
    1261             :     HeapTupleHeader htup;
    1262             :     uint32      oldlen;
    1263             :     Size        newlen;
    1264             : 
    1265       15444 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1266             :     {
    1267       14948 :         char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
    1268             : 
    1269       14948 :         page = BufferGetPage(buffer);
    1270             : 
    1271       14948 :         offnum = xlrec->offnum;
    1272       14948 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1273       14948 :             lp = PageGetItemId(page, offnum);
    1274             : 
    1275       14948 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1276           0 :             elog(PANIC, "invalid lp");
    1277             : 
    1278       14948 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1279             : 
    1280       14948 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    1281       14948 :         if (oldlen != newlen)
    1282           0 :             elog(PANIC, "wrong tuple length");
    1283             : 
    1284       14948 :         memcpy((char *) htup + htup->t_hoff, newtup, newlen);
    1285             : 
    1286       14948 :         PageSetLSN(page, lsn);
    1287       14948 :         MarkBufferDirty(buffer);
    1288             :     }
    1289       15444 :     if (BufferIsValid(buffer))
    1290       15444 :         UnlockReleaseBuffer(buffer);
    1291             : 
    1292       15444 :     ProcessCommittedInvalidationMessages(xlrec->msgs,
    1293             :                                          xlrec->nmsgs,
    1294       15444 :                                          xlrec->relcacheInitFileInval,
    1295             :                                          xlrec->dbId,
    1296             :                                          xlrec->tsId);
    1297       15444 : }
    1298             : 
    1299             : void
    1300     3497858 : heap_redo(XLogReaderState *record)
    1301             : {
    1302     3497858 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1303             : 
    1304             :     /*
    1305             :      * These operations don't overwrite MVCC data so no conflict processing is
    1306             :      * required. The ones in heap2 rmgr do.
    1307             :      */
    1308             : 
    1309     3497858 :     switch (info & XLOG_HEAP_OPMASK)
    1310             :     {
    1311     2582212 :         case XLOG_HEAP_INSERT:
    1312     2582212 :             heap_xlog_insert(record);
    1313     2582212 :             break;
    1314      601046 :         case XLOG_HEAP_DELETE:
    1315      601046 :             heap_xlog_delete(record);
    1316      601046 :             break;
    1317      115032 :         case XLOG_HEAP_UPDATE:
    1318      115032 :             heap_xlog_update(record, false);
    1319      115032 :             break;
    1320           4 :         case XLOG_HEAP_TRUNCATE:
    1321             : 
    1322             :             /*
    1323             :              * TRUNCATE is a no-op because the actions are already logged as
    1324             :              * SMGR WAL records.  TRUNCATE WAL record only exists for logical
    1325             :              * decoding.
    1326             :              */
    1327           4 :             break;
    1328       72700 :         case XLOG_HEAP_HOT_UPDATE:
    1329       72700 :             heap_xlog_update(record, true);
    1330       72700 :             break;
    1331         164 :         case XLOG_HEAP_CONFIRM:
    1332         164 :             heap_xlog_confirm(record);
    1333         164 :             break;
    1334      111256 :         case XLOG_HEAP_LOCK:
    1335      111256 :             heap_xlog_lock(record);
    1336      111256 :             break;
    1337       15444 :         case XLOG_HEAP_INPLACE:
    1338       15444 :             heap_xlog_inplace(record);
    1339       15444 :             break;
    1340           0 :         default:
    1341           0 :             elog(PANIC, "heap_redo: unknown op code %u", info);
    1342             :     }
    1343     3497858 : }
    1344             : 
    1345             : void
    1346      155420 : heap2_redo(XLogReaderState *record)
    1347             : {
    1348      155420 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1349             : 
    1350      155420 :     switch (info & XLOG_HEAP_OPMASK)
    1351             :     {
    1352       27968 :         case XLOG_HEAP2_PRUNE_ON_ACCESS:
    1353             :         case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
    1354             :         case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
    1355       27968 :             heap_xlog_prune_freeze(record);
    1356       27968 :             break;
    1357        9674 :         case XLOG_HEAP2_VISIBLE:
    1358        9674 :             heap_xlog_visible(record);
    1359        9674 :             break;
    1360      115778 :         case XLOG_HEAP2_MULTI_INSERT:
    1361      115778 :             heap_xlog_multi_insert(record);
    1362      115778 :             break;
    1363           0 :         case XLOG_HEAP2_LOCK_UPDATED:
    1364           0 :             heap_xlog_lock_updated(record);
    1365           0 :             break;
    1366        2000 :         case XLOG_HEAP2_NEW_CID:
    1367             : 
    1368             :             /*
    1369             :              * Nothing to do on a real replay, only used during logical
    1370             :              * decoding.
    1371             :              */
    1372        2000 :             break;
    1373           0 :         case XLOG_HEAP2_REWRITE:
    1374           0 :             heap_xlog_logical_rewrite(record);
    1375           0 :             break;
    1376           0 :         default:
    1377           0 :             elog(PANIC, "heap2_redo: unknown op code %u", info);
    1378             :     }
    1379      155420 : }
    1380             : 
    1381             : /*
    1382             :  * Mask a heap page before performing consistency checks on it.
    1383             :  */
    1384             : void
    1385     5846152 : heap_mask(char *pagedata, BlockNumber blkno)
    1386             : {
    1387     5846152 :     Page        page = (Page) pagedata;
    1388             :     OffsetNumber off;
    1389             : 
    1390     5846152 :     mask_page_lsn_and_checksum(page);
    1391             : 
    1392     5846152 :     mask_page_hint_bits(page);
    1393     5846152 :     mask_unused_space(page);
    1394             : 
    1395   481546412 :     for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
    1396             :     {
    1397   475700260 :         ItemId      iid = PageGetItemId(page, off);
    1398             :         char       *page_item;
    1399             : 
    1400   475700260 :         page_item = (char *) (page + ItemIdGetOffset(iid));
    1401             : 
    1402   475700260 :         if (ItemIdIsNormal(iid))
    1403             :         {
    1404   448871652 :             HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
    1405             : 
    1406             :             /*
    1407             :              * If xmin of a tuple is not yet frozen, we should ignore
    1408             :              * differences in hint bits, since they can be set without
    1409             :              * emitting WAL.
    1410             :              */
    1411   448871652 :             if (!HeapTupleHeaderXminFrozen(page_htup))
    1412   442547020 :                 page_htup->t_infomask &= ~HEAP_XACT_MASK;
    1413             :             else
    1414             :             {
    1415             :                 /* Still we need to mask xmax hint bits. */
    1416     6324632 :                 page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
    1417     6324632 :                 page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
    1418             :             }
    1419             : 
    1420             :             /*
    1421             :              * During replay, we set Command Id to FirstCommandId. Hence, mask
    1422             :              * it. See heap_xlog_insert() for details.
    1423             :              */
    1424   448871652 :             page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
    1425             : 
    1426             :             /*
    1427             :              * For a speculative tuple, heap_insert() does not set ctid in the
    1428             :              * caller-passed heap tuple itself, leaving the ctid field to
    1429             :              * contain a speculative token value - a per-backend monotonically
    1430             :              * increasing identifier. Besides, it does not WAL-log ctid under
    1431             :              * any circumstances.
    1432             :              *
    1433             :              * During redo, heap_xlog_insert() sets t_ctid to current block
    1434             :              * number and self offset number. It doesn't care about any
    1435             :              * speculative insertions on the primary. Hence, we set t_ctid to
    1436             :              * current block number and self offset number to ignore any
    1437             :              * inconsistency.
    1438             :              */
    1439   448871652 :             if (HeapTupleHeaderIsSpeculative(page_htup))
    1440         166 :                 ItemPointerSet(&page_htup->t_ctid, blkno, off);
    1441             : 
    1442             :             /*
    1443             :              * NB: Not ignoring ctid changes due to the tuple having moved
    1444             :              * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
    1445             :              * important information that needs to be in-sync between primary
    1446             :              * and standby, and thus is WAL logged.
    1447             :              */
    1448             :         }
    1449             : 
    1450             :         /*
    1451             :          * Ignore any padding bytes after the tuple, when the length of the
    1452             :          * item is not MAXALIGNed.
    1453             :          */
    1454   475700260 :         if (ItemIdHasStorage(iid))
    1455             :         {
    1456   448871652 :             int         len = ItemIdGetLength(iid);
    1457   448871652 :             int         padlen = MAXALIGN(len) - len;
    1458             : 
    1459   448871652 :             if (padlen > 0)
    1460   239151740 :                 memset(page_item + len, MASK_MARKER, padlen);
    1461             :         }
    1462             :     }
    1463     5846152 : }

Generated by: LCOV version 1.16