LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_xlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 509 570 89.3 %
Date: 2025-10-16 02:17:52 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heapam_xlog.c
       4             :  *    WAL replay logic for heap access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/heapam_xlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/heapam.h"
      19             : #include "access/visibilitymap.h"
      20             : #include "access/xlog.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/freespace.h"
      23             : #include "storage/standby.h"
      24             : 
      25             : 
      26             : /*
      27             :  * Replay XLOG_HEAP2_PRUNE_* records.
      28             :  */
      29             : static void
      30       28428 : heap_xlog_prune_freeze(XLogReaderState *record)
      31             : {
      32       28428 :     XLogRecPtr  lsn = record->EndRecPtr;
      33       28428 :     char       *maindataptr = XLogRecGetData(record);
      34             :     xl_heap_prune xlrec;
      35             :     Buffer      buffer;
      36             :     RelFileLocator rlocator;
      37             :     BlockNumber blkno;
      38       28428 :     Buffer      vmbuffer = InvalidBuffer;
      39       28428 :     uint8       vmflags = 0;
      40       28428 :     Size        freespace = 0;
      41             : 
      42       28428 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
      43       28428 :     memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
      44       28428 :     maindataptr += SizeOfHeapPrune;
      45             : 
      46             :     /*
      47             :      * We will take an ordinary exclusive lock or a cleanup lock depending on
      48             :      * whether the XLHP_CLEANUP_LOCK flag is set.  With an ordinary exclusive
      49             :      * lock, we better not be doing anything that requires moving existing
      50             :      * tuple data.
      51             :      */
      52             :     Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
      53             :            (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
      54             : 
      55       28428 :     if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
      56             :     {
      57        6714 :         vmflags = VISIBILITYMAP_ALL_VISIBLE;
      58        6714 :         if (xlrec.flags & XLHP_VM_ALL_FROZEN)
      59        5840 :             vmflags |= VISIBILITYMAP_ALL_FROZEN;
      60             :     }
      61             : 
      62             :     /*
      63             :      * After xl_heap_prune is the optional snapshot conflict horizon.
      64             :      *
      65             :      * In Hot Standby mode, we must ensure that there are no running queries
      66             :      * which would conflict with the changes in this record. That means we
      67             :      * can't replay this record if it removes tuples that are still visible to
      68             :      * transactions on the standby, freeze tuples with xids that are still
      69             :      * considered running on the standby, or set a page as all-visible in the
      70             :      * VM if it isn't all-visible to all transactions on the standby.
      71             :      */
      72       28428 :     if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
      73             :     {
      74             :         TransactionId snapshot_conflict_horizon;
      75             : 
      76             :         /* memcpy() because snapshot_conflict_horizon is stored unaligned */
      77       20876 :         memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
      78       20876 :         maindataptr += sizeof(TransactionId);
      79             : 
      80       20876 :         if (InHotStandby)
      81       20414 :             ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
      82       20414 :                                                 (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
      83             :                                                 rlocator);
      84             :     }
      85             : 
      86             :     /*
      87             :      * If we have a full-page image of the heap block, restore it and we're
      88             :      * done with the heap block.
      89             :      */
      90       28428 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
      91       28428 :                                       (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
      92             :                                       &buffer) == BLK_NEEDS_REDO)
      93             :     {
      94       20914 :         Page        page = BufferGetPage(buffer);
      95             :         OffsetNumber *redirected;
      96             :         OffsetNumber *nowdead;
      97             :         OffsetNumber *nowunused;
      98             :         int         nredirected;
      99             :         int         ndead;
     100             :         int         nunused;
     101             :         int         nplans;
     102             :         Size        datalen;
     103             :         xlhp_freeze_plan *plans;
     104             :         OffsetNumber *frz_offsets;
     105       20914 :         char       *dataptr = XLogRecGetBlockData(record, 0, &datalen);
     106             :         bool        do_prune;
     107             : 
     108       20914 :         heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
     109             :                                                &nplans, &plans, &frz_offsets,
     110             :                                                &nredirected, &redirected,
     111             :                                                &ndead, &nowdead,
     112             :                                                &nunused, &nowunused);
     113             : 
     114       20914 :         do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
     115             : 
     116             :         /* Ensure the record does something */
     117             :         Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
     118             : 
     119             :         /*
     120             :          * Update all line pointers per the record, and repair fragmentation
     121             :          * if needed.
     122             :          */
     123       20914 :         if (do_prune)
     124       19848 :             heap_page_prune_execute(buffer,
     125       19848 :                                     (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
     126             :                                     redirected, nredirected,
     127             :                                     nowdead, ndead,
     128             :                                     nowunused, nunused);
     129             : 
     130             :         /* Freeze tuples */
     131       23188 :         for (int p = 0; p < nplans; p++)
     132             :         {
     133             :             HeapTupleFreeze frz;
     134             : 
     135             :             /*
     136             :              * Convert freeze plan representation from WAL record into
     137             :              * per-tuple format used by heap_execute_freeze_tuple
     138             :              */
     139        2274 :             frz.xmax = plans[p].xmax;
     140        2274 :             frz.t_infomask2 = plans[p].t_infomask2;
     141        2274 :             frz.t_infomask = plans[p].t_infomask;
     142        2274 :             frz.frzflags = plans[p].frzflags;
     143        2274 :             frz.offset = InvalidOffsetNumber;   /* unused, but be tidy */
     144             : 
     145      100536 :             for (int i = 0; i < plans[p].ntuples; i++)
     146             :             {
     147       98262 :                 OffsetNumber offset = *(frz_offsets++);
     148             :                 ItemId      lp;
     149             :                 HeapTupleHeader tuple;
     150             : 
     151       98262 :                 lp = PageGetItemId(page, offset);
     152       98262 :                 tuple = (HeapTupleHeader) PageGetItem(page, lp);
     153       98262 :                 heap_execute_freeze_tuple(tuple, &frz);
     154             :             }
     155             :         }
     156             : 
     157             :         /* There should be no more data */
     158             :         Assert((char *) frz_offsets == dataptr + datalen);
     159             : 
     160       20914 :         if (vmflags & VISIBILITYMAP_VALID_BITS)
     161        4674 :             PageSetAllVisible(page);
     162             : 
     163       20914 :         MarkBufferDirty(buffer);
     164             : 
     165             :         /*
     166             :          * See log_heap_prune_and_freeze() for commentary on when we set the
     167             :          * heap page LSN.
     168             :          */
     169       20914 :         if (do_prune || nplans > 0 ||
     170           0 :             ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
     171       20914 :             PageSetLSN(page, lsn);
     172             : 
     173             :         /*
     174             :          * Note: we don't worry about updating the page's prunability hints.
     175             :          * At worst this will cause an extra prune cycle to occur soon.
     176             :          */
     177             :     }
     178             : 
     179             :     /*
     180             :      * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
     181             :      * or the VM, update the freespace map.
     182             :      *
     183             :      * Even when no actual space is freed (when only marking the page
     184             :      * all-visible or frozen), we still update the FSM. Because the FSM is
     185             :      * unlogged and maintained heuristically, it often becomes stale on
     186             :      * standbys. If such a standby is later promoted and runs VACUUM, it will
     187             :      * skip recalculating free space for pages that were marked
     188             :      * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
     189             :      * optimistic free space values upward, causing future insertions to
     190             :      * select pages that turn out to be unusable. In bulk, this can lead to
     191             :      * long stalls.
     192             :      *
     193             :      * To prevent this, always update the FSM even when only marking a page
     194             :      * all-visible/all-frozen.
     195             :      *
     196             :      * Do this regardless of whether a full-page image is logged, since FSM
     197             :      * data is not part of the page itself.
     198             :      */
     199       28428 :     if (BufferIsValid(buffer))
     200             :     {
     201       28428 :         if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
     202             :                             XLHP_HAS_DEAD_ITEMS |
     203        3300 :                             XLHP_HAS_NOW_UNUSED_ITEMS)) ||
     204        3300 :             (vmflags & VISIBILITYMAP_VALID_BITS))
     205       25128 :             freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
     206             : 
     207             :         /*
     208             :          * We want to avoid holding an exclusive lock on the heap buffer while
     209             :          * doing IO (either of the FSM or the VM), so we'll release it now.
     210             :          */
     211       28428 :         UnlockReleaseBuffer(buffer);
     212             :     }
     213             : 
     214             :     /*
     215             :      * Now read and update the VM block.
     216             :      *
     217             :      * We must redo changes to the VM even if the heap page was skipped due to
     218             :      * LSN interlock. See comment in heap_xlog_multi_insert() for more details
     219             :      * on replaying changes to the VM.
     220             :      */
     221       35142 :     if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
     222        6714 :         XLogReadBufferForRedoExtended(record, 1,
     223             :                                       RBM_ZERO_ON_ERROR,
     224             :                                       false,
     225             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     226             :     {
     227        6556 :         Page        vmpage = BufferGetPage(vmbuffer);
     228             : 
     229             :         /* initialize the page if it was read as zeros */
     230        6556 :         if (PageIsNew(vmpage))
     231           0 :             PageInit(vmpage, BLCKSZ, 0);
     232             : 
     233        6556 :         visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
     234             : 
     235             :         Assert(BufferIsDirty(vmbuffer));
     236        6556 :         PageSetLSN(vmpage, lsn);
     237             :     }
     238             : 
     239       28428 :     if (BufferIsValid(vmbuffer))
     240        6714 :         UnlockReleaseBuffer(vmbuffer);
     241             : 
     242       28428 :     if (freespace > 0)
     243       25042 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     244       28428 : }
     245             : 
     246             : /*
     247             :  * Replay XLOG_HEAP2_VISIBLE records.
     248             :  *
     249             :  * The critical integrity requirement here is that we must never end up with
     250             :  * a situation where the visibility map bit is set, and the page-level
     251             :  * PD_ALL_VISIBLE bit is clear.  If that were to occur, then a subsequent
     252             :  * page modification would fail to clear the visibility map bit.
     253             :  */
     254             : static void
     255        9320 : heap_xlog_visible(XLogReaderState *record)
     256             : {
     257        9320 :     XLogRecPtr  lsn = record->EndRecPtr;
     258        9320 :     xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
     259        9320 :     Buffer      vmbuffer = InvalidBuffer;
     260             :     Buffer      buffer;
     261             :     Page        page;
     262             :     RelFileLocator rlocator;
     263             :     BlockNumber blkno;
     264             :     XLogRedoAction action;
     265             : 
     266             :     Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
     267             : 
     268        9320 :     XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
     269             : 
     270             :     /*
     271             :      * If there are any Hot Standby transactions running that have an xmin
     272             :      * horizon old enough that this page isn't all-visible for them, they
     273             :      * might incorrectly decide that an index-only scan can skip a heap fetch.
     274             :      *
     275             :      * NB: It might be better to throw some kind of "soft" conflict here that
     276             :      * forces any index-only scan that is in flight to perform heap fetches,
     277             :      * rather than killing the transaction outright.
     278             :      */
     279        9320 :     if (InHotStandby)
     280        8972 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     281        8972 :                                             xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
     282             :                                             rlocator);
     283             : 
     284             :     /*
     285             :      * Read the heap page, if it still exists. If the heap file has dropped or
     286             :      * truncated later in recovery, we don't need to update the page, but we'd
     287             :      * better still update the visibility map.
     288             :      */
     289        9320 :     action = XLogReadBufferForRedo(record, 1, &buffer);
     290        9320 :     if (action == BLK_NEEDS_REDO)
     291             :     {
     292             :         /*
     293             :          * We don't bump the LSN of the heap page when setting the visibility
     294             :          * map bit (unless checksums or wal_hint_bits is enabled, in which
     295             :          * case we must). This exposes us to torn page hazards, but since
     296             :          * we're not inspecting the existing page contents in any way, we
     297             :          * don't care.
     298             :          */
     299        5516 :         page = BufferGetPage(buffer);
     300             : 
     301        5516 :         PageSetAllVisible(page);
     302             : 
     303        5516 :         if (XLogHintBitIsNeeded())
     304        5516 :             PageSetLSN(page, lsn);
     305             : 
     306        5516 :         MarkBufferDirty(buffer);
     307             :     }
     308             :     else if (action == BLK_RESTORED)
     309             :     {
     310             :         /*
     311             :          * If heap block was backed up, we already restored it and there's
     312             :          * nothing more to do. (This can only happen with checksums or
     313             :          * wal_log_hints enabled.)
     314             :          */
     315             :     }
     316             : 
     317        9320 :     if (BufferIsValid(buffer))
     318             :     {
     319        9320 :         Size        space = PageGetFreeSpace(BufferGetPage(buffer));
     320             : 
     321        9320 :         UnlockReleaseBuffer(buffer);
     322             : 
     323             :         /*
     324             :          * Since FSM is not WAL-logged and only updated heuristically, it
     325             :          * easily becomes stale in standbys.  If the standby is later promoted
     326             :          * and runs VACUUM, it will skip updating individual free space
     327             :          * figures for pages that became all-visible (or all-frozen, depending
     328             :          * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
     329             :          * propagates too optimistic free space values to upper FSM layers;
     330             :          * later inserters try to use such pages only to find out that they
     331             :          * are unusable.  This can cause long stalls when there are many such
     332             :          * pages.
     333             :          *
     334             :          * Forestall those problems by updating FSM's idea about a page that
     335             :          * is becoming all-visible or all-frozen.
     336             :          *
     337             :          * Do this regardless of a full-page image being applied, since the
     338             :          * FSM data is not in the page anyway.
     339             :          */
     340        9320 :         if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
     341        9320 :             XLogRecordPageWithFreeSpace(rlocator, blkno, space);
     342             :     }
     343             : 
     344             :     /*
     345             :      * Even if we skipped the heap page update due to the LSN interlock, it's
     346             :      * still safe to update the visibility map.  Any WAL record that clears
     347             :      * the visibility map bit does so before checking the page LSN, so any
     348             :      * bits that need to be cleared will still be cleared.
     349             :      */
     350        9320 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
     351             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     352             :     {
     353        8910 :         Page        vmpage = BufferGetPage(vmbuffer);
     354             :         Relation    reln;
     355             :         uint8       vmbits;
     356             : 
     357             :         /* initialize the page if it was read as zeros */
     358        8910 :         if (PageIsNew(vmpage))
     359           0 :             PageInit(vmpage, BLCKSZ, 0);
     360             : 
     361             :         /* remove VISIBILITYMAP_XLOG_* */
     362        8910 :         vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
     363             : 
     364             :         /*
     365             :          * XLogReadBufferForRedoExtended locked the buffer. But
     366             :          * visibilitymap_set will handle locking itself.
     367             :          */
     368        8910 :         LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
     369             : 
     370        8910 :         reln = CreateFakeRelcacheEntry(rlocator);
     371             : 
     372        8910 :         visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
     373             :                           xlrec->snapshotConflictHorizon, vmbits);
     374             : 
     375        8910 :         ReleaseBuffer(vmbuffer);
     376        8910 :         FreeFakeRelcacheEntry(reln);
     377             :     }
     378         410 :     else if (BufferIsValid(vmbuffer))
     379         410 :         UnlockReleaseBuffer(vmbuffer);
     380        9320 : }
     381             : 
     382             : /*
     383             :  * Given an "infobits" field from an XLog record, set the correct bits in the
     384             :  * given infomask and infomask2 for the tuple touched by the record.
     385             :  *
     386             :  * (This is the reverse of compute_infobits).
     387             :  */
     388             : static void
     389      893152 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
     390             : {
     391      893152 :     *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
     392             :                    HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
     393      893152 :     *infomask2 &= ~HEAP_KEYS_UPDATED;
     394             : 
     395      893152 :     if (infobits & XLHL_XMAX_IS_MULTI)
     396           4 :         *infomask |= HEAP_XMAX_IS_MULTI;
     397      893152 :     if (infobits & XLHL_XMAX_LOCK_ONLY)
     398      110422 :         *infomask |= HEAP_XMAX_LOCK_ONLY;
     399      893152 :     if (infobits & XLHL_XMAX_EXCL_LOCK)
     400      109626 :         *infomask |= HEAP_XMAX_EXCL_LOCK;
     401             :     /* note HEAP_XMAX_SHR_LOCK isn't considered here */
     402      893152 :     if (infobits & XLHL_XMAX_KEYSHR_LOCK)
     403         818 :         *infomask |= HEAP_XMAX_KEYSHR_LOCK;
     404             : 
     405      893152 :     if (infobits & XLHL_KEYS_UPDATED)
     406      598728 :         *infomask2 |= HEAP_KEYS_UPDATED;
     407      893152 : }
     408             : 
     409             : /*
     410             :  * Replay XLOG_HEAP_DELETE records.
     411             :  */
     412             : static void
     413      600072 : heap_xlog_delete(XLogReaderState *record)
     414             : {
     415      600072 :     XLogRecPtr  lsn = record->EndRecPtr;
     416      600072 :     xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
     417             :     Buffer      buffer;
     418             :     Page        page;
     419      600072 :     ItemId      lp = NULL;
     420             :     HeapTupleHeader htup;
     421             :     BlockNumber blkno;
     422             :     RelFileLocator target_locator;
     423             :     ItemPointerData target_tid;
     424             : 
     425      600072 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     426      600072 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     427      600072 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     428             : 
     429             :     /*
     430             :      * The visibility map may need to be fixed even if the heap page is
     431             :      * already up-to-date.
     432             :      */
     433      600072 :     if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     434             :     {
     435          66 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     436          66 :         Buffer      vmbuffer = InvalidBuffer;
     437             : 
     438          66 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     439          66 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     440          66 :         ReleaseBuffer(vmbuffer);
     441          66 :         FreeFakeRelcacheEntry(reln);
     442             :     }
     443             : 
     444      600072 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     445             :     {
     446      596000 :         page = BufferGetPage(buffer);
     447             : 
     448      596000 :         if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
     449      596000 :             lp = PageGetItemId(page, xlrec->offnum);
     450             : 
     451      596000 :         if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
     452           0 :             elog(PANIC, "invalid lp");
     453             : 
     454      596000 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     455             : 
     456      596000 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     457      596000 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     458      596000 :         HeapTupleHeaderClearHotUpdated(htup);
     459      596000 :         fix_infomask_from_infobits(xlrec->infobits_set,
     460             :                                    &htup->t_infomask, &htup->t_infomask2);
     461      596000 :         if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
     462      596000 :             HeapTupleHeaderSetXmax(htup, xlrec->xmax);
     463             :         else
     464           0 :             HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
     465      596000 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     466             : 
     467             :         /* Mark the page as a candidate for pruning */
     468      596000 :         PageSetPrunable(page, XLogRecGetXid(record));
     469             : 
     470      596000 :         if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     471          12 :             PageClearAllVisible(page);
     472             : 
     473             :         /* Make sure t_ctid is set correctly */
     474      596000 :         if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
     475         284 :             HeapTupleHeaderSetMovedPartitions(htup);
     476             :         else
     477      595716 :             htup->t_ctid = target_tid;
     478      596000 :         PageSetLSN(page, lsn);
     479      596000 :         MarkBufferDirty(buffer);
     480             :     }
     481      600072 :     if (BufferIsValid(buffer))
     482      600072 :         UnlockReleaseBuffer(buffer);
     483      600072 : }
     484             : 
     485             : /*
     486             :  * Replay XLOG_HEAP_INSERT records.
     487             :  */
     488             : static void
     489     2581762 : heap_xlog_insert(XLogReaderState *record)
     490             : {
     491     2581762 :     XLogRecPtr  lsn = record->EndRecPtr;
     492     2581762 :     xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
     493             :     Buffer      buffer;
     494             :     Page        page;
     495             :     union
     496             :     {
     497             :         HeapTupleHeaderData hdr;
     498             :         char        data[MaxHeapTupleSize];
     499             :     }           tbuf;
     500             :     HeapTupleHeader htup;
     501             :     xl_heap_header xlhdr;
     502             :     uint32      newlen;
     503     2581762 :     Size        freespace = 0;
     504             :     RelFileLocator target_locator;
     505             :     BlockNumber blkno;
     506             :     ItemPointerData target_tid;
     507             :     XLogRedoAction action;
     508             : 
     509     2581762 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     510     2581762 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     511     2581762 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     512             : 
     513             :     /* No freezing in the heap_insert() code path */
     514             :     Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
     515             : 
     516             :     /*
     517             :      * The visibility map may need to be fixed even if the heap page is
     518             :      * already up-to-date.
     519             :      */
     520     2581762 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     521             :     {
     522        2168 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     523        2168 :         Buffer      vmbuffer = InvalidBuffer;
     524             : 
     525        2168 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     526        2168 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     527        2168 :         ReleaseBuffer(vmbuffer);
     528        2168 :         FreeFakeRelcacheEntry(reln);
     529             :     }
     530             : 
     531             :     /*
     532             :      * If we inserted the first and only tuple on the page, re-initialize the
     533             :      * page from scratch.
     534             :      */
     535     2581762 :     if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     536             :     {
     537       34248 :         buffer = XLogInitBufferForRedo(record, 0);
     538       34248 :         page = BufferGetPage(buffer);
     539       34248 :         PageInit(page, BufferGetPageSize(buffer), 0);
     540       34248 :         action = BLK_NEEDS_REDO;
     541             :     }
     542             :     else
     543     2547514 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     544     2581762 :     if (action == BLK_NEEDS_REDO)
     545             :     {
     546             :         Size        datalen;
     547             :         char       *data;
     548             : 
     549     2576076 :         page = BufferGetPage(buffer);
     550             : 
     551     2576076 :         if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
     552           0 :             elog(PANIC, "invalid max offset number");
     553             : 
     554     2576076 :         data = XLogRecGetBlockData(record, 0, &datalen);
     555             : 
     556     2576076 :         newlen = datalen - SizeOfHeapHeader;
     557             :         Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
     558     2576076 :         memcpy(&xlhdr, data, SizeOfHeapHeader);
     559     2576076 :         data += SizeOfHeapHeader;
     560             : 
     561     2576076 :         htup = &tbuf.hdr;
     562     2576076 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     563             :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     564     2576076 :         memcpy((char *) htup + SizeofHeapTupleHeader,
     565             :                data,
     566             :                newlen);
     567     2576076 :         newlen += SizeofHeapTupleHeader;
     568     2576076 :         htup->t_infomask2 = xlhdr.t_infomask2;
     569     2576076 :         htup->t_infomask = xlhdr.t_infomask;
     570     2576076 :         htup->t_hoff = xlhdr.t_hoff;
     571     2576076 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     572     2576076 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
     573     2576076 :         htup->t_ctid = target_tid;
     574             : 
     575     2576076 :         if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
     576             :                         true, true) == InvalidOffsetNumber)
     577           0 :             elog(PANIC, "failed to add tuple");
     578             : 
     579     2576076 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     580             : 
     581     2576076 :         PageSetLSN(page, lsn);
     582             : 
     583     2576076 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     584         676 :             PageClearAllVisible(page);
     585             : 
     586     2576076 :         MarkBufferDirty(buffer);
     587             :     }
     588     2581762 :     if (BufferIsValid(buffer))
     589     2581762 :         UnlockReleaseBuffer(buffer);
     590             : 
     591             :     /*
     592             :      * If the page is running low on free space, update the FSM as well.
     593             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     594             :      * better than that without knowing the fill-factor for the table.
     595             :      *
     596             :      * XXX: Don't do this if the page was restored from full page image. We
     597             :      * don't bother to update the FSM in that case, it doesn't need to be
     598             :      * totally accurate anyway.
     599             :      */
     600     2581762 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     601      507454 :         XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
     602     2581762 : }
     603             : 
     604             : /*
     605             :  * Replay XLOG_HEAP2_MULTI_INSERT records.
     606             :  */
     607             : static void
     608      115418 : heap_xlog_multi_insert(XLogReaderState *record)
     609             : {
     610      115418 :     XLogRecPtr  lsn = record->EndRecPtr;
     611             :     xl_heap_multi_insert *xlrec;
     612             :     RelFileLocator rlocator;
     613             :     BlockNumber blkno;
     614             :     Buffer      buffer;
     615             :     Page        page;
     616             :     union
     617             :     {
     618             :         HeapTupleHeaderData hdr;
     619             :         char        data[MaxHeapTupleSize];
     620             :     }           tbuf;
     621             :     HeapTupleHeader htup;
     622             :     uint32      newlen;
     623      115418 :     Size        freespace = 0;
     624             :     int         i;
     625      115418 :     bool        isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
     626             :     XLogRedoAction action;
     627      115418 :     Buffer      vmbuffer = InvalidBuffer;
     628             : 
     629             :     /*
     630             :      * Insertion doesn't overwrite MVCC data, so no conflict processing is
     631             :      * required.
     632             :      */
     633      115418 :     xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
     634             : 
     635      115418 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
     636             : 
     637             :     /* check that the mutually exclusive flags are not both set */
     638             :     Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
     639             :              (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
     640             : 
     641             :     /*
     642             :      * The visibility map may need to be fixed even if the heap page is
     643             :      * already up-to-date.
     644             :      */
     645      115418 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     646             :     {
     647        2302 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     648             : 
     649        2302 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     650        2302 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     651        2302 :         ReleaseBuffer(vmbuffer);
     652        2302 :         vmbuffer = InvalidBuffer;
     653        2302 :         FreeFakeRelcacheEntry(reln);
     654             :     }
     655             : 
     656      115418 :     if (isinit)
     657             :     {
     658        3290 :         buffer = XLogInitBufferForRedo(record, 0);
     659        3290 :         page = BufferGetPage(buffer);
     660        3290 :         PageInit(page, BufferGetPageSize(buffer), 0);
     661        3290 :         action = BLK_NEEDS_REDO;
     662             :     }
     663             :     else
     664      112128 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     665      115418 :     if (action == BLK_NEEDS_REDO)
     666             :     {
     667             :         char       *tupdata;
     668             :         char       *endptr;
     669             :         Size        len;
     670             : 
     671             :         /* Tuples are stored as block data */
     672      111766 :         tupdata = XLogRecGetBlockData(record, 0, &len);
     673      111766 :         endptr = tupdata + len;
     674             : 
     675      111766 :         page = BufferGetPage(buffer);
     676             : 
     677      523250 :         for (i = 0; i < xlrec->ntuples; i++)
     678             :         {
     679             :             OffsetNumber offnum;
     680             :             xl_multi_insert_tuple *xlhdr;
     681             : 
     682             :             /*
     683             :              * If we're reinitializing the page, the tuples are stored in
     684             :              * order from FirstOffsetNumber. Otherwise there's an array of
     685             :              * offsets in the WAL record, and the tuples come after that.
     686             :              */
     687      411484 :             if (isinit)
     688      197674 :                 offnum = FirstOffsetNumber + i;
     689             :             else
     690      213810 :                 offnum = xlrec->offsets[i];
     691      411484 :             if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     692           0 :                 elog(PANIC, "invalid max offset number");
     693             : 
     694      411484 :             xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
     695      411484 :             tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
     696             : 
     697      411484 :             newlen = xlhdr->datalen;
     698             :             Assert(newlen <= MaxHeapTupleSize);
     699      411484 :             htup = &tbuf.hdr;
     700      411484 :             MemSet(htup, 0, SizeofHeapTupleHeader);
     701             :             /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     702      411484 :             memcpy((char *) htup + SizeofHeapTupleHeader,
     703             :                    tupdata,
     704             :                    newlen);
     705      411484 :             tupdata += newlen;
     706             : 
     707      411484 :             newlen += SizeofHeapTupleHeader;
     708      411484 :             htup->t_infomask2 = xlhdr->t_infomask2;
     709      411484 :             htup->t_infomask = xlhdr->t_infomask;
     710      411484 :             htup->t_hoff = xlhdr->t_hoff;
     711      411484 :             HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     712      411484 :             HeapTupleHeaderSetCmin(htup, FirstCommandId);
     713      411484 :             ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
     714      411484 :             ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
     715             : 
     716      411484 :             offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
     717      411484 :             if (offnum == InvalidOffsetNumber)
     718           0 :                 elog(PANIC, "failed to add tuple");
     719             :         }
     720      111766 :         if (tupdata != endptr)
     721           0 :             elog(PANIC, "total tuple length mismatch");
     722             : 
     723      111766 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     724             : 
     725      111766 :         PageSetLSN(page, lsn);
     726             : 
     727      111766 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     728         180 :             PageClearAllVisible(page);
     729             : 
     730             :         /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
     731      111766 :         if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
     732           8 :             PageSetAllVisible(page);
     733             : 
     734      111766 :         MarkBufferDirty(buffer);
     735             :     }
     736      115418 :     if (BufferIsValid(buffer))
     737      115418 :         UnlockReleaseBuffer(buffer);
     738             : 
     739      115418 :     buffer = InvalidBuffer;
     740             : 
     741             :     /*
     742             :      * Read and update the visibility map (VM) block.
     743             :      *
     744             :      * We must always redo VM changes, even if the corresponding heap page
     745             :      * update was skipped due to the LSN interlock. Each VM block covers
     746             :      * multiple heap pages, so later WAL records may update other bits in the
     747             :      * same block. If this record includes an FPI (full-page image),
     748             :      * subsequent WAL records may depend on it to guard against torn pages.
     749             :      *
     750             :      * Heap page changes are replayed first to preserve the invariant:
     751             :      * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
     752             :      *
     753             :      * Note that we released the heap page lock above. During normal
     754             :      * operation, this would be unsafe — a concurrent modification could
     755             :      * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
     756             :      * invariant.
     757             :      *
     758             :      * During recovery, however, no concurrent writers exist. Therefore,
     759             :      * updating the VM without holding the heap page lock is safe enough. This
     760             :      * same approach is taken when replaying xl_heap_visible records (see
     761             :      * heap_xlog_visible()).
     762             :      */
     763      115426 :     if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
     764           8 :         XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
     765             :                                       &vmbuffer) == BLK_NEEDS_REDO)
     766             :     {
     767           0 :         Page        vmpage = BufferGetPage(vmbuffer);
     768             : 
     769             :         /* initialize the page if it was read as zeros */
     770           0 :         if (PageIsNew(vmpage))
     771           0 :             PageInit(vmpage, BLCKSZ, 0);
     772             : 
     773           0 :         visibilitymap_set_vmbits(blkno,
     774             :                                  vmbuffer,
     775             :                                  VISIBILITYMAP_ALL_VISIBLE |
     776             :                                  VISIBILITYMAP_ALL_FROZEN,
     777             :                                  rlocator);
     778             : 
     779             :         Assert(BufferIsDirty(vmbuffer));
     780           0 :         PageSetLSN(vmpage, lsn);
     781             :     }
     782             : 
     783      115418 :     if (BufferIsValid(vmbuffer))
     784           8 :         UnlockReleaseBuffer(vmbuffer);
     785             : 
     786             :     /*
     787             :      * If the page is running low on free space, update the FSM as well.
     788             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     789             :      * better than that without knowing the fill-factor for the table.
     790             :      *
     791             :      * XXX: Don't do this if the page was restored from full page image. We
     792             :      * don't bother to update the FSM in that case, it doesn't need to be
     793             :      * totally accurate anyway.
     794             :      */
     795      115418 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     796       33866 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     797      115418 : }
     798             : 
     799             : /*
     800             :  * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
     801             :  */
     802             : static void
     803      187376 : heap_xlog_update(XLogReaderState *record, bool hot_update)
     804             : {
     805      187376 :     XLogRecPtr  lsn = record->EndRecPtr;
     806      187376 :     xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
     807             :     RelFileLocator rlocator;
     808             :     BlockNumber oldblk;
     809             :     BlockNumber newblk;
     810             :     ItemPointerData newtid;
     811             :     Buffer      obuffer,
     812             :                 nbuffer;
     813             :     Page        page;
     814             :     OffsetNumber offnum;
     815      187376 :     ItemId      lp = NULL;
     816             :     HeapTupleData oldtup;
     817             :     HeapTupleHeader htup;
     818      187376 :     uint16      prefixlen = 0,
     819      187376 :                 suffixlen = 0;
     820             :     char       *newp;
     821             :     union
     822             :     {
     823             :         HeapTupleHeaderData hdr;
     824             :         char        data[MaxHeapTupleSize];
     825             :     }           tbuf;
     826             :     xl_heap_header xlhdr;
     827             :     uint32      newlen;
     828      187376 :     Size        freespace = 0;
     829             :     XLogRedoAction oldaction;
     830             :     XLogRedoAction newaction;
     831             : 
     832             :     /* initialize to keep the compiler quiet */
     833      187376 :     oldtup.t_data = NULL;
     834      187376 :     oldtup.t_len = 0;
     835             : 
     836      187376 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
     837      187376 :     if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
     838             :     {
     839             :         /* HOT updates are never done across pages */
     840             :         Assert(!hot_update);
     841             :     }
     842             :     else
     843       78310 :         oldblk = newblk;
     844             : 
     845      187376 :     ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
     846             : 
     847             :     /*
     848             :      * The visibility map may need to be fixed even if the heap page is
     849             :      * already up-to-date.
     850             :      */
     851      187376 :     if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     852             :     {
     853         462 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     854         462 :         Buffer      vmbuffer = InvalidBuffer;
     855             : 
     856         462 :         visibilitymap_pin(reln, oldblk, &vmbuffer);
     857         462 :         visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     858         462 :         ReleaseBuffer(vmbuffer);
     859         462 :         FreeFakeRelcacheEntry(reln);
     860             :     }
     861             : 
     862             :     /*
     863             :      * In normal operation, it is important to lock the two pages in
     864             :      * page-number order, to avoid possible deadlocks against other update
     865             :      * operations going the other way.  However, during WAL replay there can
     866             :      * be no other update happening, so we don't need to worry about that. But
     867             :      * we *do* need to worry that we don't expose an inconsistent state to Hot
     868             :      * Standby queries --- so the original page can't be unlocked before we've
     869             :      * added the new tuple to the new page.
     870             :      */
     871             : 
     872             :     /* Deal with old tuple version */
     873      187376 :     oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
     874             :                                       &obuffer);
     875      187376 :     if (oldaction == BLK_NEEDS_REDO)
     876             :     {
     877      186730 :         page = BufferGetPage(obuffer);
     878      186730 :         offnum = xlrec->old_offnum;
     879      186730 :         if (PageGetMaxOffsetNumber(page) >= offnum)
     880      186730 :             lp = PageGetItemId(page, offnum);
     881             : 
     882      186730 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
     883           0 :             elog(PANIC, "invalid lp");
     884             : 
     885      186730 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     886             : 
     887      186730 :         oldtup.t_data = htup;
     888      186730 :         oldtup.t_len = ItemIdGetLength(lp);
     889             : 
     890      186730 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     891      186730 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     892      186730 :         if (hot_update)
     893       72072 :             HeapTupleHeaderSetHotUpdated(htup);
     894             :         else
     895      114658 :             HeapTupleHeaderClearHotUpdated(htup);
     896      186730 :         fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
     897             :                                    &htup->t_infomask2);
     898      186730 :         HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
     899      186730 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     900             :         /* Set forward chain link in t_ctid */
     901      186730 :         htup->t_ctid = newtid;
     902             : 
     903             :         /* Mark the page as a candidate for pruning */
     904      186730 :         PageSetPrunable(page, XLogRecGetXid(record));
     905             : 
     906      186730 :         if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     907         434 :             PageClearAllVisible(page);
     908             : 
     909      186730 :         PageSetLSN(page, lsn);
     910      186730 :         MarkBufferDirty(obuffer);
     911             :     }
     912             : 
     913             :     /*
     914             :      * Read the page the new tuple goes into, if different from old.
     915             :      */
     916      187376 :     if (oldblk == newblk)
     917             :     {
     918       78310 :         nbuffer = obuffer;
     919       78310 :         newaction = oldaction;
     920             :     }
     921      109066 :     else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     922             :     {
     923        1124 :         nbuffer = XLogInitBufferForRedo(record, 0);
     924        1124 :         page = BufferGetPage(nbuffer);
     925        1124 :         PageInit(page, BufferGetPageSize(nbuffer), 0);
     926        1124 :         newaction = BLK_NEEDS_REDO;
     927             :     }
     928             :     else
     929      107942 :         newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
     930             : 
     931             :     /*
     932             :      * The visibility map may need to be fixed even if the heap page is
     933             :      * already up-to-date.
     934             :      */
     935      187376 :     if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
     936             :     {
     937         486 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     938         486 :         Buffer      vmbuffer = InvalidBuffer;
     939             : 
     940         486 :         visibilitymap_pin(reln, newblk, &vmbuffer);
     941         486 :         visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     942         486 :         ReleaseBuffer(vmbuffer);
     943         486 :         FreeFakeRelcacheEntry(reln);
     944             :     }
     945             : 
     946             :     /* Deal with new tuple */
     947      187376 :     if (newaction == BLK_NEEDS_REDO)
     948             :     {
     949             :         char       *recdata;
     950             :         char       *recdata_end;
     951             :         Size        datalen;
     952             :         Size        tuplen;
     953             : 
     954      186234 :         recdata = XLogRecGetBlockData(record, 0, &datalen);
     955      186234 :         recdata_end = recdata + datalen;
     956             : 
     957      186234 :         page = BufferGetPage(nbuffer);
     958             : 
     959      186234 :         offnum = xlrec->new_offnum;
     960      186234 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     961           0 :             elog(PANIC, "invalid max offset number");
     962             : 
     963      186234 :         if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
     964             :         {
     965             :             Assert(newblk == oldblk);
     966       30506 :             memcpy(&prefixlen, recdata, sizeof(uint16));
     967       30506 :             recdata += sizeof(uint16);
     968             :         }
     969      186234 :         if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
     970             :         {
     971             :             Assert(newblk == oldblk);
     972       67252 :             memcpy(&suffixlen, recdata, sizeof(uint16));
     973       67252 :             recdata += sizeof(uint16);
     974             :         }
     975             : 
     976      186234 :         memcpy(&xlhdr, recdata, SizeOfHeapHeader);
     977      186234 :         recdata += SizeOfHeapHeader;
     978             : 
     979      186234 :         tuplen = recdata_end - recdata;
     980             :         Assert(tuplen <= MaxHeapTupleSize);
     981             : 
     982      186234 :         htup = &tbuf.hdr;
     983      186234 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     984             : 
     985             :         /*
     986             :          * Reconstruct the new tuple using the prefix and/or suffix from the
     987             :          * old tuple, and the data stored in the WAL record.
     988             :          */
     989      186234 :         newp = (char *) htup + SizeofHeapTupleHeader;
     990      186234 :         if (prefixlen > 0)
     991             :         {
     992             :             int         len;
     993             : 
     994             :             /* copy bitmap [+ padding] [+ oid] from WAL record */
     995       30506 :             len = xlhdr.t_hoff - SizeofHeapTupleHeader;
     996       30506 :             memcpy(newp, recdata, len);
     997       30506 :             recdata += len;
     998       30506 :             newp += len;
     999             : 
    1000             :             /* copy prefix from old tuple */
    1001       30506 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
    1002       30506 :             newp += prefixlen;
    1003             : 
    1004             :             /* copy new tuple data from WAL record */
    1005       30506 :             len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
    1006       30506 :             memcpy(newp, recdata, len);
    1007       30506 :             recdata += len;
    1008       30506 :             newp += len;
    1009             :         }
    1010             :         else
    1011             :         {
    1012             :             /*
    1013             :              * copy bitmap [+ padding] [+ oid] + data from record, all in one
    1014             :              * go
    1015             :              */
    1016      155728 :             memcpy(newp, recdata, tuplen);
    1017      155728 :             recdata += tuplen;
    1018      155728 :             newp += tuplen;
    1019             :         }
    1020             :         Assert(recdata == recdata_end);
    1021             : 
    1022             :         /* copy suffix from old tuple */
    1023      186234 :         if (suffixlen > 0)
    1024       67252 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
    1025             : 
    1026      186234 :         newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
    1027      186234 :         htup->t_infomask2 = xlhdr.t_infomask2;
    1028      186234 :         htup->t_infomask = xlhdr.t_infomask;
    1029      186234 :         htup->t_hoff = xlhdr.t_hoff;
    1030             : 
    1031      186234 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
    1032      186234 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
    1033      186234 :         HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
    1034             :         /* Make sure there is no forward chain link in t_ctid */
    1035      186234 :         htup->t_ctid = newtid;
    1036             : 
    1037      186234 :         offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
    1038      186234 :         if (offnum == InvalidOffsetNumber)
    1039           0 :             elog(PANIC, "failed to add tuple");
    1040             : 
    1041      186234 :         if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
    1042         210 :             PageClearAllVisible(page);
    1043             : 
    1044      186234 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
    1045             : 
    1046      186234 :         PageSetLSN(page, lsn);
    1047      186234 :         MarkBufferDirty(nbuffer);
    1048             :     }
    1049             : 
    1050      187376 :     if (BufferIsValid(nbuffer) && nbuffer != obuffer)
    1051      109066 :         UnlockReleaseBuffer(nbuffer);
    1052      187376 :     if (BufferIsValid(obuffer))
    1053      187376 :         UnlockReleaseBuffer(obuffer);
    1054             : 
    1055             :     /*
    1056             :      * If the new page is running low on free space, update the FSM as well.
    1057             :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
    1058             :      * better than that without knowing the fill-factor for the table.
    1059             :      *
    1060             :      * However, don't update the FSM on HOT updates, because after crash
    1061             :      * recovery, either the old or the new tuple will certainly be dead and
    1062             :      * prunable. After pruning, the page will have roughly as much free space
    1063             :      * as it did before the update, assuming the new tuple is about the same
    1064             :      * size as the old one.
    1065             :      *
    1066             :      * XXX: Don't do this if the page was restored from full page image. We
    1067             :      * don't bother to update the FSM in that case, it doesn't need to be
    1068             :      * totally accurate anyway.
    1069             :      */
    1070      187376 :     if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
    1071       23486 :         XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
    1072      187376 : }
    1073             : 
    1074             : /*
    1075             :  * Replay XLOG_HEAP_CONFIRM records.
    1076             :  */
    1077             : static void
    1078         158 : heap_xlog_confirm(XLogReaderState *record)
    1079             : {
    1080         158 :     XLogRecPtr  lsn = record->EndRecPtr;
    1081         158 :     xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
    1082             :     Buffer      buffer;
    1083             :     Page        page;
    1084             :     OffsetNumber offnum;
    1085         158 :     ItemId      lp = NULL;
    1086             :     HeapTupleHeader htup;
    1087             : 
    1088         158 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1089             :     {
    1090         156 :         page = BufferGetPage(buffer);
    1091             : 
    1092         156 :         offnum = xlrec->offnum;
    1093         156 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1094         156 :             lp = PageGetItemId(page, offnum);
    1095             : 
    1096         156 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1097           0 :             elog(PANIC, "invalid lp");
    1098             : 
    1099         156 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1100             : 
    1101             :         /*
    1102             :          * Confirm tuple as actually inserted
    1103             :          */
    1104         156 :         ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
    1105             : 
    1106         156 :         PageSetLSN(page, lsn);
    1107         156 :         MarkBufferDirty(buffer);
    1108             :     }
    1109         158 :     if (BufferIsValid(buffer))
    1110         158 :         UnlockReleaseBuffer(buffer);
    1111         158 : }
    1112             : 
    1113             : /*
    1114             :  * Replay XLOG_HEAP_LOCK records.
    1115             :  */
    1116             : static void
    1117      110844 : heap_xlog_lock(XLogReaderState *record)
    1118             : {
    1119      110844 :     XLogRecPtr  lsn = record->EndRecPtr;
    1120      110844 :     xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
    1121             :     Buffer      buffer;
    1122             :     Page        page;
    1123             :     OffsetNumber offnum;
    1124      110844 :     ItemId      lp = NULL;
    1125             :     HeapTupleHeader htup;
    1126             : 
    1127             :     /*
    1128             :      * The visibility map may need to be fixed even if the heap page is
    1129             :      * already up-to-date.
    1130             :      */
    1131      110844 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1132             :     {
    1133             :         RelFileLocator rlocator;
    1134         132 :         Buffer      vmbuffer = InvalidBuffer;
    1135             :         BlockNumber block;
    1136             :         Relation    reln;
    1137             : 
    1138         132 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1139         132 :         reln = CreateFakeRelcacheEntry(rlocator);
    1140             : 
    1141         132 :         visibilitymap_pin(reln, block, &vmbuffer);
    1142         132 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1143             : 
    1144         132 :         ReleaseBuffer(vmbuffer);
    1145         132 :         FreeFakeRelcacheEntry(reln);
    1146             :     }
    1147             : 
    1148      110844 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1149             :     {
    1150      110422 :         page = BufferGetPage(buffer);
    1151             : 
    1152      110422 :         offnum = xlrec->offnum;
    1153      110422 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1154      110422 :             lp = PageGetItemId(page, offnum);
    1155             : 
    1156      110422 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1157           0 :             elog(PANIC, "invalid lp");
    1158             : 
    1159      110422 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1160             : 
    1161      110422 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1162      110422 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1163      110422 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1164             :                                    &htup->t_infomask2);
    1165             : 
    1166             :         /*
    1167             :          * Clear relevant update flags, but only if the modified infomask says
    1168             :          * there's no update.
    1169             :          */
    1170      110422 :         if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
    1171             :         {
    1172      110422 :             HeapTupleHeaderClearHotUpdated(htup);
    1173             :             /* Make sure there is no forward chain link in t_ctid */
    1174      110422 :             ItemPointerSet(&htup->t_ctid,
    1175             :                            BufferGetBlockNumber(buffer),
    1176             :                            offnum);
    1177             :         }
    1178      110422 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1179      110422 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    1180      110422 :         PageSetLSN(page, lsn);
    1181      110422 :         MarkBufferDirty(buffer);
    1182             :     }
    1183      110844 :     if (BufferIsValid(buffer))
    1184      110844 :         UnlockReleaseBuffer(buffer);
    1185      110844 : }
    1186             : 
    1187             : /*
    1188             :  * Replay XLOG_HEAP2_LOCK_UPDATED records.
    1189             :  */
    1190             : static void
    1191           0 : heap_xlog_lock_updated(XLogReaderState *record)
    1192             : {
    1193           0 :     XLogRecPtr  lsn = record->EndRecPtr;
    1194             :     xl_heap_lock_updated *xlrec;
    1195             :     Buffer      buffer;
    1196             :     Page        page;
    1197             :     OffsetNumber offnum;
    1198           0 :     ItemId      lp = NULL;
    1199             :     HeapTupleHeader htup;
    1200             : 
    1201           0 :     xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
    1202             : 
    1203             :     /*
    1204             :      * The visibility map may need to be fixed even if the heap page is
    1205             :      * already up-to-date.
    1206             :      */
    1207           0 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1208             :     {
    1209             :         RelFileLocator rlocator;
    1210           0 :         Buffer      vmbuffer = InvalidBuffer;
    1211             :         BlockNumber block;
    1212             :         Relation    reln;
    1213             : 
    1214           0 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1215           0 :         reln = CreateFakeRelcacheEntry(rlocator);
    1216             : 
    1217           0 :         visibilitymap_pin(reln, block, &vmbuffer);
    1218           0 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1219             : 
    1220           0 :         ReleaseBuffer(vmbuffer);
    1221           0 :         FreeFakeRelcacheEntry(reln);
    1222             :     }
    1223             : 
    1224           0 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1225             :     {
    1226           0 :         page = BufferGetPage(buffer);
    1227             : 
    1228           0 :         offnum = xlrec->offnum;
    1229           0 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1230           0 :             lp = PageGetItemId(page, offnum);
    1231             : 
    1232           0 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1233           0 :             elog(PANIC, "invalid lp");
    1234             : 
    1235           0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1236             : 
    1237           0 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1238           0 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1239           0 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1240             :                                    &htup->t_infomask2);
    1241           0 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1242             : 
    1243           0 :         PageSetLSN(page, lsn);
    1244           0 :         MarkBufferDirty(buffer);
    1245             :     }
    1246           0 :     if (BufferIsValid(buffer))
    1247           0 :         UnlockReleaseBuffer(buffer);
    1248           0 : }
    1249             : 
    1250             : /*
    1251             :  * Replay XLOG_HEAP_INPLACE records.
    1252             :  */
    1253             : static void
    1254       15366 : heap_xlog_inplace(XLogReaderState *record)
    1255             : {
    1256       15366 :     XLogRecPtr  lsn = record->EndRecPtr;
    1257       15366 :     xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
    1258             :     Buffer      buffer;
    1259             :     Page        page;
    1260             :     OffsetNumber offnum;
    1261       15366 :     ItemId      lp = NULL;
    1262             :     HeapTupleHeader htup;
    1263             :     uint32      oldlen;
    1264             :     Size        newlen;
    1265             : 
    1266       15366 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1267             :     {
    1268       14974 :         char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
    1269             : 
    1270       14974 :         page = BufferGetPage(buffer);
    1271             : 
    1272       14974 :         offnum = xlrec->offnum;
    1273       14974 :         if (PageGetMaxOffsetNumber(page) >= offnum)
    1274       14974 :             lp = PageGetItemId(page, offnum);
    1275             : 
    1276       14974 :         if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
    1277           0 :             elog(PANIC, "invalid lp");
    1278             : 
    1279       14974 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1280             : 
    1281       14974 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    1282       14974 :         if (oldlen != newlen)
    1283           0 :             elog(PANIC, "wrong tuple length");
    1284             : 
    1285       14974 :         memcpy((char *) htup + htup->t_hoff, newtup, newlen);
    1286             : 
    1287       14974 :         PageSetLSN(page, lsn);
    1288       14974 :         MarkBufferDirty(buffer);
    1289             :     }
    1290       15366 :     if (BufferIsValid(buffer))
    1291       15366 :         UnlockReleaseBuffer(buffer);
    1292             : 
    1293       15366 :     ProcessCommittedInvalidationMessages(xlrec->msgs,
    1294             :                                          xlrec->nmsgs,
    1295       15366 :                                          xlrec->relcacheInitFileInval,
    1296             :                                          xlrec->dbId,
    1297             :                                          xlrec->tsId);
    1298       15366 : }
    1299             : 
    1300             : void
    1301     3495582 : heap_redo(XLogReaderState *record)
    1302             : {
    1303     3495582 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1304             : 
    1305             :     /*
    1306             :      * These operations don't overwrite MVCC data so no conflict processing is
    1307             :      * required. The ones in heap2 rmgr do.
    1308             :      */
    1309             : 
    1310     3495582 :     switch (info & XLOG_HEAP_OPMASK)
    1311             :     {
    1312     2581762 :         case XLOG_HEAP_INSERT:
    1313     2581762 :             heap_xlog_insert(record);
    1314     2581762 :             break;
    1315      600072 :         case XLOG_HEAP_DELETE:
    1316      600072 :             heap_xlog_delete(record);
    1317      600072 :             break;
    1318      114748 :         case XLOG_HEAP_UPDATE:
    1319      114748 :             heap_xlog_update(record, false);
    1320      114748 :             break;
    1321           4 :         case XLOG_HEAP_TRUNCATE:
    1322             : 
    1323             :             /*
    1324             :              * TRUNCATE is a no-op because the actions are already logged as
    1325             :              * SMGR WAL records.  TRUNCATE WAL record only exists for logical
    1326             :              * decoding.
    1327             :              */
    1328           4 :             break;
    1329       72628 :         case XLOG_HEAP_HOT_UPDATE:
    1330       72628 :             heap_xlog_update(record, true);
    1331       72628 :             break;
    1332         158 :         case XLOG_HEAP_CONFIRM:
    1333         158 :             heap_xlog_confirm(record);
    1334         158 :             break;
    1335      110844 :         case XLOG_HEAP_LOCK:
    1336      110844 :             heap_xlog_lock(record);
    1337      110844 :             break;
    1338       15366 :         case XLOG_HEAP_INPLACE:
    1339       15366 :             heap_xlog_inplace(record);
    1340       15366 :             break;
    1341           0 :         default:
    1342           0 :             elog(PANIC, "heap_redo: unknown op code %u", info);
    1343             :     }
    1344     3495582 : }
    1345             : 
    1346             : void
    1347      155166 : heap2_redo(XLogReaderState *record)
    1348             : {
    1349      155166 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1350             : 
    1351      155166 :     switch (info & XLOG_HEAP_OPMASK)
    1352             :     {
    1353       28428 :         case XLOG_HEAP2_PRUNE_ON_ACCESS:
    1354             :         case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
    1355             :         case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
    1356       28428 :             heap_xlog_prune_freeze(record);
    1357       28428 :             break;
    1358        9320 :         case XLOG_HEAP2_VISIBLE:
    1359        9320 :             heap_xlog_visible(record);
    1360        9320 :             break;
    1361      115418 :         case XLOG_HEAP2_MULTI_INSERT:
    1362      115418 :             heap_xlog_multi_insert(record);
    1363      115418 :             break;
    1364           0 :         case XLOG_HEAP2_LOCK_UPDATED:
    1365           0 :             heap_xlog_lock_updated(record);
    1366           0 :             break;
    1367        2000 :         case XLOG_HEAP2_NEW_CID:
    1368             : 
    1369             :             /*
    1370             :              * Nothing to do on a real replay, only used during logical
    1371             :              * decoding.
    1372             :              */
    1373        2000 :             break;
    1374           0 :         case XLOG_HEAP2_REWRITE:
    1375           0 :             heap_xlog_logical_rewrite(record);
    1376           0 :             break;
    1377           0 :         default:
    1378           0 :             elog(PANIC, "heap2_redo: unknown op code %u", info);
    1379             :     }
    1380      155166 : }
    1381             : 
    1382             : /*
    1383             :  * Mask a heap page before performing consistency checks on it.
    1384             :  */
    1385             : void
    1386     5843248 : heap_mask(char *pagedata, BlockNumber blkno)
    1387             : {
    1388     5843248 :     Page        page = (Page) pagedata;
    1389             :     OffsetNumber off;
    1390             : 
    1391     5843248 :     mask_page_lsn_and_checksum(page);
    1392             : 
    1393     5843248 :     mask_page_hint_bits(page);
    1394     5843248 :     mask_unused_space(page);
    1395             : 
    1396   481168904 :     for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
    1397             :     {
    1398   475325656 :         ItemId      iid = PageGetItemId(page, off);
    1399             :         char       *page_item;
    1400             : 
    1401   475325656 :         page_item = (char *) (page + ItemIdGetOffset(iid));
    1402             : 
    1403   475325656 :         if (ItemIdIsNormal(iid))
    1404             :         {
    1405   448765308 :             HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
    1406             : 
    1407             :             /*
    1408             :              * If xmin of a tuple is not yet frozen, we should ignore
    1409             :              * differences in hint bits, since they can be set without
    1410             :              * emitting WAL.
    1411             :              */
    1412   448765308 :             if (!HeapTupleHeaderXminFrozen(page_htup))
    1413   441508488 :                 page_htup->t_infomask &= ~HEAP_XACT_MASK;
    1414             :             else
    1415             :             {
    1416             :                 /* Still we need to mask xmax hint bits. */
    1417     7256820 :                 page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
    1418     7256820 :                 page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
    1419             :             }
    1420             : 
    1421             :             /*
    1422             :              * During replay, we set Command Id to FirstCommandId. Hence, mask
    1423             :              * it. See heap_xlog_insert() for details.
    1424             :              */
    1425   448765308 :             page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
    1426             : 
    1427             :             /*
    1428             :              * For a speculative tuple, heap_insert() does not set ctid in the
    1429             :              * caller-passed heap tuple itself, leaving the ctid field to
    1430             :              * contain a speculative token value - a per-backend monotonically
    1431             :              * increasing identifier. Besides, it does not WAL-log ctid under
    1432             :              * any circumstances.
    1433             :              *
    1434             :              * During redo, heap_xlog_insert() sets t_ctid to current block
    1435             :              * number and self offset number. It doesn't care about any
    1436             :              * speculative insertions on the primary. Hence, we set t_ctid to
    1437             :              * current block number and self offset number to ignore any
    1438             :              * inconsistency.
    1439             :              */
    1440   448765308 :             if (HeapTupleHeaderIsSpeculative(page_htup))
    1441         160 :                 ItemPointerSet(&page_htup->t_ctid, blkno, off);
    1442             : 
    1443             :             /*
    1444             :              * NB: Not ignoring ctid changes due to the tuple having moved
    1445             :              * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
    1446             :              * important information that needs to be in-sync between primary
    1447             :              * and standby, and thus is WAL logged.
    1448             :              */
    1449             :         }
    1450             : 
    1451             :         /*
    1452             :          * Ignore any padding bytes after the tuple, when the length of the
    1453             :          * item is not MAXALIGNed.
    1454             :          */
    1455   475325656 :         if (ItemIdHasStorage(iid))
    1456             :         {
    1457   448765308 :             int         len = ItemIdGetLength(iid);
    1458   448765308 :             int         padlen = MAXALIGN(len) - len;
    1459             : 
    1460   448765308 :             if (padlen > 0)
    1461   239065244 :                 memset(page_item + len, MASK_MARKER, padlen);
    1462             :         }
    1463             :     }
    1464     5843248 : }

Generated by: LCOV version 1.16