LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_xlog.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 88.5 % 573 507
Test Date: 2026-03-05 16:15:32 Functions: 92.9 % 14 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * heapam_xlog.c
       4              :  *    WAL replay logic for heap access method.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/heap/heapam_xlog.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/bufmask.h"
      18              : #include "access/heapam.h"
      19              : #include "access/visibilitymap.h"
      20              : #include "access/xlog.h"
      21              : #include "access/xlogutils.h"
      22              : #include "storage/freespace.h"
      23              : #include "storage/standby.h"
      24              : 
      25              : 
      26              : /*
      27              :  * Replay XLOG_HEAP2_PRUNE_* records.
      28              :  */
      29              : static void
      30        14786 : heap_xlog_prune_freeze(XLogReaderState *record)
      31              : {
      32        14786 :     XLogRecPtr  lsn = record->EndRecPtr;
      33        14786 :     char       *maindataptr = XLogRecGetData(record);
      34              :     xl_heap_prune xlrec;
      35              :     Buffer      buffer;
      36              :     RelFileLocator rlocator;
      37              :     BlockNumber blkno;
      38        14786 :     Buffer      vmbuffer = InvalidBuffer;
      39        14786 :     uint8       vmflags = 0;
      40        14786 :     Size        freespace = 0;
      41              : 
      42        14786 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
      43        14786 :     memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
      44        14786 :     maindataptr += SizeOfHeapPrune;
      45              : 
      46              :     /*
      47              :      * We will take an ordinary exclusive lock or a cleanup lock depending on
      48              :      * whether the XLHP_CLEANUP_LOCK flag is set.  With an ordinary exclusive
      49              :      * lock, we better not be doing anything that requires moving existing
      50              :      * tuple data.
      51              :      */
      52              :     Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
      53              :            (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
      54              : 
      55        14786 :     if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
      56              :     {
      57         3470 :         vmflags = VISIBILITYMAP_ALL_VISIBLE;
      58         3470 :         if (xlrec.flags & XLHP_VM_ALL_FROZEN)
      59         2939 :             vmflags |= VISIBILITYMAP_ALL_FROZEN;
      60              :     }
      61              : 
      62              :     /*
      63              :      * After xl_heap_prune is the optional snapshot conflict horizon.
      64              :      *
      65              :      * In Hot Standby mode, we must ensure that there are no running queries
      66              :      * which would conflict with the changes in this record. That means we
      67              :      * can't replay this record if it removes tuples that are still visible to
      68              :      * transactions on the standby, freeze tuples with xids that are still
      69              :      * considered running on the standby, or set a page as all-visible in the
      70              :      * VM if it isn't all-visible to all transactions on the standby.
      71              :      */
      72        14786 :     if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
      73              :     {
      74              :         TransactionId snapshot_conflict_horizon;
      75              : 
      76              :         /* memcpy() because snapshot_conflict_horizon is stored unaligned */
      77        11154 :         memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
      78        11154 :         maindataptr += sizeof(TransactionId);
      79              : 
      80        11154 :         if (InHotStandby)
      81        10921 :             ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
      82        10921 :                                                 (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
      83              :                                                 rlocator);
      84              :     }
      85              : 
      86              :     /*
      87              :      * If we have a full-page image of the heap block, restore it and we're
      88              :      * done with the heap block.
      89              :      */
      90        14786 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
      91        14786 :                                       (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
      92              :                                       &buffer) == BLK_NEEDS_REDO)
      93              :     {
      94        10622 :         Page        page = BufferGetPage(buffer);
      95              :         OffsetNumber *redirected;
      96              :         OffsetNumber *nowdead;
      97              :         OffsetNumber *nowunused;
      98              :         int         nredirected;
      99              :         int         ndead;
     100              :         int         nunused;
     101              :         int         nplans;
     102              :         Size        datalen;
     103              :         xlhp_freeze_plan *plans;
     104              :         OffsetNumber *frz_offsets;
     105        10622 :         char       *dataptr = XLogRecGetBlockData(record, 0, &datalen);
     106              :         bool        do_prune;
     107              : 
     108        10622 :         heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
     109              :                                                &nplans, &plans, &frz_offsets,
     110              :                                                &nredirected, &redirected,
     111              :                                                &ndead, &nowdead,
     112              :                                                &nunused, &nowunused);
     113              : 
     114        10622 :         do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
     115              : 
     116              :         /* Ensure the record does something */
     117              :         Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
     118              : 
     119              :         /*
     120              :          * Update all line pointers per the record, and repair fragmentation
     121              :          * if needed.
     122              :          */
     123        10622 :         if (do_prune)
     124        10048 :             heap_page_prune_execute(buffer,
     125        10048 :                                     (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
     126              :                                     redirected, nredirected,
     127              :                                     nowdead, ndead,
     128              :                                     nowunused, nunused);
     129              : 
     130              :         /* Freeze tuples */
     131        11744 :         for (int p = 0; p < nplans; p++)
     132              :         {
     133              :             HeapTupleFreeze frz;
     134              : 
     135              :             /*
     136              :              * Convert freeze plan representation from WAL record into
     137              :              * per-tuple format used by heap_execute_freeze_tuple
     138              :              */
     139         1122 :             frz.xmax = plans[p].xmax;
     140         1122 :             frz.t_infomask2 = plans[p].t_infomask2;
     141         1122 :             frz.t_infomask = plans[p].t_infomask;
     142         1122 :             frz.frzflags = plans[p].frzflags;
     143         1122 :             frz.offset = InvalidOffsetNumber;   /* unused, but be tidy */
     144              : 
     145        54867 :             for (int i = 0; i < plans[p].ntuples; i++)
     146              :             {
     147        53745 :                 OffsetNumber offset = *(frz_offsets++);
     148              :                 ItemId      lp;
     149              :                 HeapTupleHeader tuple;
     150              : 
     151        53745 :                 lp = PageGetItemId(page, offset);
     152        53745 :                 tuple = (HeapTupleHeader) PageGetItem(page, lp);
     153        53745 :                 heap_execute_freeze_tuple(tuple, &frz);
     154              :             }
     155              :         }
     156              : 
     157              :         /* There should be no more data */
     158              :         Assert((char *) frz_offsets == dataptr + datalen);
     159              : 
     160              :         /*
     161              :          * The critical integrity requirement here is that we must never end
     162              :          * up with the visibility map bit set and the page-level
     163              :          * PD_ALL_VISIBLE bit unset.  If that were to occur, a subsequent page
     164              :          * modification would fail to clear the visibility map bit.
     165              :          */
     166        10622 :         if (vmflags & VISIBILITYMAP_VALID_BITS)
     167              :         {
     168         2291 :             PageSetAllVisible(page);
     169         2291 :             PageClearPrunable(page);
     170              :         }
     171              : 
     172        10622 :         MarkBufferDirty(buffer);
     173              : 
     174              :         /*
     175              :          * See log_heap_prune_and_freeze() for commentary on when we set the
     176              :          * heap page LSN.
     177              :          */
     178        10622 :         if (do_prune || nplans > 0 ||
     179            0 :             ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
     180        10622 :             PageSetLSN(page, lsn);
     181              : 
     182              :         /*
     183              :          * Note: we don't worry about updating the page's prunability hints.
     184              :          * At worst this will cause an extra prune cycle to occur soon.
     185              :          */
     186              :     }
     187              : 
     188              :     /*
     189              :      * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
     190              :      * or the VM, update the freespace map.
     191              :      *
     192              :      * Even when no actual space is freed (when only marking the page
     193              :      * all-visible or frozen), we still update the FSM. Because the FSM is
     194              :      * unlogged and maintained heuristically, it often becomes stale on
     195              :      * standbys. If such a standby is later promoted and runs VACUUM, it will
     196              :      * skip recalculating free space for pages that were marked
     197              :      * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
     198              :      * optimistic free space values upward, causing future insertions to
     199              :      * select pages that turn out to be unusable. In bulk, this can lead to
     200              :      * long stalls.
     201              :      *
     202              :      * To prevent this, always update the FSM even when only marking a page
     203              :      * all-visible/all-frozen.
     204              :      *
     205              :      * Do this regardless of whether a full-page image is logged, since FSM
     206              :      * data is not part of the page itself.
     207              :      */
     208        14786 :     if (BufferIsValid(buffer))
     209              :     {
     210        14786 :         if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
     211              :                             XLHP_HAS_DEAD_ITEMS |
     212         1812 :                             XLHP_HAS_NOW_UNUSED_ITEMS)) ||
     213         1812 :             (vmflags & VISIBILITYMAP_VALID_BITS))
     214        12974 :             freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
     215              : 
     216              :         /*
     217              :          * We want to avoid holding an exclusive lock on the heap buffer while
     218              :          * doing IO (either of the FSM or the VM), so we'll release it now.
     219              :          */
     220        14786 :         UnlockReleaseBuffer(buffer);
     221              :     }
     222              : 
     223              :     /*
     224              :      * Now read and update the VM block.
     225              :      *
     226              :      * We must redo changes to the VM even if the heap page was skipped due to
     227              :      * LSN interlock. See comment in heap_xlog_multi_insert() for more details
     228              :      * on replaying changes to the VM.
     229              :      */
     230        18256 :     if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
     231         3470 :         XLogReadBufferForRedoExtended(record, 1,
     232              :                                       RBM_ZERO_ON_ERROR,
     233              :                                       false,
     234              :                                       &vmbuffer) == BLK_NEEDS_REDO)
     235              :     {
     236         3404 :         Page        vmpage = BufferGetPage(vmbuffer);
     237              : 
     238              :         /* initialize the page if it was read as zeros */
     239         3404 :         if (PageIsNew(vmpage))
     240            0 :             PageInit(vmpage, BLCKSZ, 0);
     241              : 
     242         3404 :         visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
     243              : 
     244              :         Assert(BufferIsDirty(vmbuffer));
     245         3404 :         PageSetLSN(vmpage, lsn);
     246              :     }
     247              : 
     248        14786 :     if (BufferIsValid(vmbuffer))
     249         3470 :         UnlockReleaseBuffer(vmbuffer);
     250              : 
     251        14786 :     if (freespace > 0)
     252        12867 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     253        14786 : }
     254              : 
     255              : /*
     256              :  * Replay XLOG_HEAP2_VISIBLE records.
     257              :  *
     258              :  * The critical integrity requirement here is that we must never end up with
     259              :  * a situation where the visibility map bit is set, and the page-level
     260              :  * PD_ALL_VISIBLE bit is clear.  If that were to occur, then a subsequent
     261              :  * page modification would fail to clear the visibility map bit.
     262              :  */
     263              : static void
     264         5182 : heap_xlog_visible(XLogReaderState *record)
     265              : {
     266         5182 :     XLogRecPtr  lsn = record->EndRecPtr;
     267         5182 :     xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
     268         5182 :     Buffer      vmbuffer = InvalidBuffer;
     269              :     Buffer      buffer;
     270              :     Page        page;
     271              :     RelFileLocator rlocator;
     272              :     BlockNumber blkno;
     273              :     XLogRedoAction action;
     274              : 
     275              :     Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
     276              : 
     277         5182 :     XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
     278              : 
     279              :     /*
     280              :      * If there are any Hot Standby transactions running that have an xmin
     281              :      * horizon old enough that this page isn't all-visible for them, they
     282              :      * might incorrectly decide that an index-only scan can skip a heap fetch.
     283              :      *
     284              :      * NB: It might be better to throw some kind of "soft" conflict here that
     285              :      * forces any index-only scan that is in flight to perform heap fetches,
     286              :      * rather than killing the transaction outright.
     287              :      */
     288         5182 :     if (InHotStandby)
     289         5008 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     290         5008 :                                             xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
     291              :                                             rlocator);
     292              : 
     293              :     /*
     294              :      * Read the heap page, if it still exists. If the heap file has dropped or
     295              :      * truncated later in recovery, we don't need to update the page, but we'd
     296              :      * better still update the visibility map.
     297              :      */
     298         5182 :     action = XLogReadBufferForRedo(record, 1, &buffer);
     299         5182 :     if (action == BLK_NEEDS_REDO)
     300              :     {
     301              :         /*
     302              :          * We don't bump the LSN of the heap page when setting the visibility
     303              :          * map bit (unless checksums or wal_hint_bits is enabled, in which
     304              :          * case we must). This exposes us to torn page hazards, but since
     305              :          * we're not inspecting the existing page contents in any way, we
     306              :          * don't care.
     307              :          */
     308         2994 :         page = BufferGetPage(buffer);
     309              : 
     310         2994 :         PageSetAllVisible(page);
     311         2994 :         PageClearPrunable(page);
     312              : 
     313         2994 :         if (XLogHintBitIsNeeded())
     314         2994 :             PageSetLSN(page, lsn);
     315              : 
     316         2994 :         MarkBufferDirty(buffer);
     317              :     }
     318              :     else if (action == BLK_RESTORED)
     319              :     {
     320              :         /*
     321              :          * If heap block was backed up, we already restored it and there's
     322              :          * nothing more to do. (This can only happen with checksums or
     323              :          * wal_log_hints enabled.)
     324              :          */
     325              :     }
     326              : 
     327         5182 :     if (BufferIsValid(buffer))
     328              :     {
     329         5182 :         Size        space = PageGetFreeSpace(BufferGetPage(buffer));
     330              : 
     331         5182 :         UnlockReleaseBuffer(buffer);
     332              : 
     333              :         /*
     334              :          * Since FSM is not WAL-logged and only updated heuristically, it
     335              :          * easily becomes stale in standbys.  If the standby is later promoted
     336              :          * and runs VACUUM, it will skip updating individual free space
     337              :          * figures for pages that became all-visible (or all-frozen, depending
     338              :          * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
     339              :          * propagates too optimistic free space values to upper FSM layers;
     340              :          * later inserters try to use such pages only to find out that they
     341              :          * are unusable.  This can cause long stalls when there are many such
     342              :          * pages.
     343              :          *
     344              :          * Forestall those problems by updating FSM's idea about a page that
     345              :          * is becoming all-visible or all-frozen.
     346              :          *
     347              :          * Do this regardless of a full-page image being applied, since the
     348              :          * FSM data is not in the page anyway.
     349              :          */
     350         5182 :         if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
     351         5182 :             XLogRecordPageWithFreeSpace(rlocator, blkno, space);
     352              :     }
     353              : 
     354              :     /*
     355              :      * Even if we skipped the heap page update due to the LSN interlock, it's
     356              :      * still safe to update the visibility map.  Any WAL record that clears
     357              :      * the visibility map bit does so before checking the page LSN, so any
     358              :      * bits that need to be cleared will still be cleared.
     359              :      */
     360         5182 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
     361              :                                       &vmbuffer) == BLK_NEEDS_REDO)
     362              :     {
     363         4965 :         Page        vmpage = BufferGetPage(vmbuffer);
     364              :         Relation    reln;
     365              :         uint8       vmbits;
     366              : 
     367              :         /* initialize the page if it was read as zeros */
     368         4965 :         if (PageIsNew(vmpage))
     369            0 :             PageInit(vmpage, BLCKSZ, 0);
     370              : 
     371              :         /* remove VISIBILITYMAP_XLOG_* */
     372         4965 :         vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
     373              : 
     374              :         /*
     375              :          * XLogReadBufferForRedoExtended locked the buffer. But
     376              :          * visibilitymap_set will handle locking itself.
     377              :          */
     378         4965 :         LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
     379              : 
     380         4965 :         reln = CreateFakeRelcacheEntry(rlocator);
     381              : 
     382         4965 :         visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
     383              :                           xlrec->snapshotConflictHorizon, vmbits);
     384              : 
     385         4965 :         ReleaseBuffer(vmbuffer);
     386         4965 :         FreeFakeRelcacheEntry(reln);
     387              :     }
     388          217 :     else if (BufferIsValid(vmbuffer))
     389          217 :         UnlockReleaseBuffer(vmbuffer);
     390         5182 : }
     391              : 
     392              : /*
     393              :  * Given an "infobits" field from an XLog record, set the correct bits in the
     394              :  * given infomask and infomask2 for the tuple touched by the record.
     395              :  *
     396              :  * (This is the reverse of compute_infobits).
     397              :  */
     398              : static void
     399       457416 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
     400              : {
     401       457416 :     *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
     402              :                    HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
     403       457416 :     *infomask2 &= ~HEAP_KEYS_UPDATED;
     404              : 
     405       457416 :     if (infobits & XLHL_XMAX_IS_MULTI)
     406            3 :         *infomask |= HEAP_XMAX_IS_MULTI;
     407       457416 :     if (infobits & XLHL_XMAX_LOCK_ONLY)
     408        55229 :         *infomask |= HEAP_XMAX_LOCK_ONLY;
     409       457416 :     if (infobits & XLHL_XMAX_EXCL_LOCK)
     410        54820 :         *infomask |= HEAP_XMAX_EXCL_LOCK;
     411              :     /* note HEAP_XMAX_SHR_LOCK isn't considered here */
     412       457416 :     if (infobits & XLHL_XMAX_KEYSHR_LOCK)
     413          423 :         *infomask |= HEAP_XMAX_KEYSHR_LOCK;
     414              : 
     415       457416 :     if (infobits & XLHL_KEYS_UPDATED)
     416       308631 :         *infomask2 |= HEAP_KEYS_UPDATED;
     417       457416 : }
     418              : 
     419              : /*
     420              :  * Replay XLOG_HEAP_DELETE records.
     421              :  */
     422              : static void
     423       309394 : heap_xlog_delete(XLogReaderState *record)
     424              : {
     425       309394 :     XLogRecPtr  lsn = record->EndRecPtr;
     426       309394 :     xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
     427              :     Buffer      buffer;
     428              :     Page        page;
     429              :     ItemId      lp;
     430              :     HeapTupleHeader htup;
     431              :     BlockNumber blkno;
     432              :     RelFileLocator target_locator;
     433              :     ItemPointerData target_tid;
     434              : 
     435       309394 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     436       309394 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     437       309394 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     438              : 
     439              :     /*
     440              :      * The visibility map may need to be fixed even if the heap page is
     441              :      * already up-to-date.
     442              :      */
     443       309394 :     if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     444              :     {
     445           18 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     446           18 :         Buffer      vmbuffer = InvalidBuffer;
     447              : 
     448           18 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     449           18 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     450           18 :         ReleaseBuffer(vmbuffer);
     451           18 :         FreeFakeRelcacheEntry(reln);
     452              :     }
     453              : 
     454       309394 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     455              :     {
     456       307238 :         page = BufferGetPage(buffer);
     457              : 
     458       307238 :         if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
     459            0 :             elog(PANIC, "offnum out of range");
     460       307238 :         lp = PageGetItemId(page, xlrec->offnum);
     461       307238 :         if (!ItemIdIsNormal(lp))
     462            0 :             elog(PANIC, "invalid lp");
     463              : 
     464       307238 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     465              : 
     466       307238 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     467       307238 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     468       307238 :         HeapTupleHeaderClearHotUpdated(htup);
     469       307238 :         fix_infomask_from_infobits(xlrec->infobits_set,
     470              :                                    &htup->t_infomask, &htup->t_infomask2);
     471       307238 :         if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
     472       307238 :             HeapTupleHeaderSetXmax(htup, xlrec->xmax);
     473              :         else
     474            0 :             HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
     475       307238 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     476              : 
     477              :         /* Mark the page as a candidate for pruning */
     478       307238 :         PageSetPrunable(page, XLogRecGetXid(record));
     479              : 
     480       307238 :         if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
     481           13 :             PageClearAllVisible(page);
     482              : 
     483              :         /* Make sure t_ctid is set correctly */
     484       307238 :         if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
     485          141 :             HeapTupleHeaderSetMovedPartitions(htup);
     486              :         else
     487       307097 :             htup->t_ctid = target_tid;
     488       307238 :         PageSetLSN(page, lsn);
     489       307238 :         MarkBufferDirty(buffer);
     490              :     }
     491       309394 :     if (BufferIsValid(buffer))
     492       309394 :         UnlockReleaseBuffer(buffer);
     493       309394 : }
     494              : 
     495              : /*
     496              :  * Replay XLOG_HEAP_INSERT records.
     497              :  */
     498              : static void
     499      1295651 : heap_xlog_insert(XLogReaderState *record)
     500              : {
     501      1295651 :     XLogRecPtr  lsn = record->EndRecPtr;
     502      1295651 :     xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
     503              :     Buffer      buffer;
     504              :     Page        page;
     505              :     union
     506              :     {
     507              :         HeapTupleHeaderData hdr;
     508              :         char        data[MaxHeapTupleSize];
     509              :     }           tbuf;
     510              :     HeapTupleHeader htup;
     511              :     xl_heap_header xlhdr;
     512              :     uint32      newlen;
     513      1295651 :     Size        freespace = 0;
     514              :     RelFileLocator target_locator;
     515              :     BlockNumber blkno;
     516              :     ItemPointerData target_tid;
     517              :     XLogRedoAction action;
     518              : 
     519      1295651 :     XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
     520      1295651 :     ItemPointerSetBlockNumber(&target_tid, blkno);
     521      1295651 :     ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
     522              : 
     523              :     /* No freezing in the heap_insert() code path */
     524              :     Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
     525              : 
     526              :     /*
     527              :      * The visibility map may need to be fixed even if the heap page is
     528              :      * already up-to-date.
     529              :      */
     530      1295651 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     531              :     {
     532          894 :         Relation    reln = CreateFakeRelcacheEntry(target_locator);
     533          894 :         Buffer      vmbuffer = InvalidBuffer;
     534              : 
     535          894 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     536          894 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     537          894 :         ReleaseBuffer(vmbuffer);
     538          894 :         FreeFakeRelcacheEntry(reln);
     539              :     }
     540              : 
     541              :     /*
     542              :      * If we inserted the first and only tuple on the page, re-initialize the
     543              :      * page from scratch.
     544              :      */
     545      1295651 :     if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     546              :     {
     547        17560 :         buffer = XLogInitBufferForRedo(record, 0);
     548        17560 :         page = BufferGetPage(buffer);
     549        17560 :         PageInit(page, BufferGetPageSize(buffer), 0);
     550        17560 :         action = BLK_NEEDS_REDO;
     551              :     }
     552              :     else
     553      1278091 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     554      1295651 :     if (action == BLK_NEEDS_REDO)
     555              :     {
     556              :         Size        datalen;
     557              :         char       *data;
     558              : 
     559      1292795 :         page = BufferGetPage(buffer);
     560              : 
     561      1292795 :         if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
     562            0 :             elog(PANIC, "invalid max offset number");
     563              : 
     564      1292795 :         data = XLogRecGetBlockData(record, 0, &datalen);
     565              : 
     566      1292795 :         newlen = datalen - SizeOfHeapHeader;
     567              :         Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
     568      1292795 :         memcpy(&xlhdr, data, SizeOfHeapHeader);
     569      1292795 :         data += SizeOfHeapHeader;
     570              : 
     571      1292795 :         htup = &tbuf.hdr;
     572      1292795 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     573              :         /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     574      1292795 :         memcpy((char *) htup + SizeofHeapTupleHeader,
     575              :                data,
     576              :                newlen);
     577      1292795 :         newlen += SizeofHeapTupleHeader;
     578      1292795 :         htup->t_infomask2 = xlhdr.t_infomask2;
     579      1292795 :         htup->t_infomask = xlhdr.t_infomask;
     580      1292795 :         htup->t_hoff = xlhdr.t_hoff;
     581      1292795 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     582      1292795 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
     583      1292795 :         htup->t_ctid = target_tid;
     584              : 
     585      1292795 :         if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
     586            0 :             elog(PANIC, "failed to add tuple");
     587              : 
     588      1292795 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     589              : 
     590      1292795 :         PageSetLSN(page, lsn);
     591              : 
     592      1292795 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     593          313 :             PageClearAllVisible(page);
     594              : 
     595      1292795 :         MarkBufferDirty(buffer);
     596              :     }
     597      1295651 :     if (BufferIsValid(buffer))
     598      1295651 :         UnlockReleaseBuffer(buffer);
     599              : 
     600              :     /*
     601              :      * If the page is running low on free space, update the FSM as well.
     602              :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     603              :      * better than that without knowing the fill-factor for the table.
     604              :      *
     605              :      * XXX: Don't do this if the page was restored from full page image. We
     606              :      * don't bother to update the FSM in that case, it doesn't need to be
     607              :      * totally accurate anyway.
     608              :      */
     609      1295651 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     610       254188 :         XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
     611      1295651 : }
     612              : 
     613              : /*
     614              :  * Replay XLOG_HEAP2_MULTI_INSERT records.
     615              :  */
     616              : static void
     617        61687 : heap_xlog_multi_insert(XLogReaderState *record)
     618              : {
     619        61687 :     XLogRecPtr  lsn = record->EndRecPtr;
     620              :     xl_heap_multi_insert *xlrec;
     621              :     RelFileLocator rlocator;
     622              :     BlockNumber blkno;
     623              :     Buffer      buffer;
     624              :     Page        page;
     625              :     union
     626              :     {
     627              :         HeapTupleHeaderData hdr;
     628              :         char        data[MaxHeapTupleSize];
     629              :     }           tbuf;
     630              :     HeapTupleHeader htup;
     631              :     uint32      newlen;
     632        61687 :     Size        freespace = 0;
     633              :     int         i;
     634        61687 :     bool        isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
     635              :     XLogRedoAction action;
     636        61687 :     Buffer      vmbuffer = InvalidBuffer;
     637              : 
     638              :     /*
     639              :      * Insertion doesn't overwrite MVCC data, so no conflict processing is
     640              :      * required.
     641              :      */
     642        61687 :     xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
     643              : 
     644        61687 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
     645              : 
     646              :     /* check that the mutually exclusive flags are not both set */
     647              :     Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
     648              :              (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
     649              : 
     650              :     /*
     651              :      * The visibility map may need to be fixed even if the heap page is
     652              :      * already up-to-date.
     653              :      */
     654        61687 :     if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     655              :     {
     656          907 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     657              : 
     658          907 :         visibilitymap_pin(reln, blkno, &vmbuffer);
     659          907 :         visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
     660          907 :         ReleaseBuffer(vmbuffer);
     661          907 :         vmbuffer = InvalidBuffer;
     662          907 :         FreeFakeRelcacheEntry(reln);
     663              :     }
     664              : 
     665        61687 :     if (isinit)
     666              :     {
     667         1834 :         buffer = XLogInitBufferForRedo(record, 0);
     668         1834 :         page = BufferGetPage(buffer);
     669         1834 :         PageInit(page, BufferGetPageSize(buffer), 0);
     670         1834 :         action = BLK_NEEDS_REDO;
     671              :     }
     672              :     else
     673        59853 :         action = XLogReadBufferForRedo(record, 0, &buffer);
     674        61687 :     if (action == BLK_NEEDS_REDO)
     675              :     {
     676              :         char       *tupdata;
     677              :         char       *endptr;
     678              :         Size        len;
     679              : 
     680              :         /* Tuples are stored as block data */
     681        60068 :         tupdata = XLogRecGetBlockData(record, 0, &len);
     682        60068 :         endptr = tupdata + len;
     683              : 
     684        60068 :         page = BufferGetPage(buffer);
     685              : 
     686       273282 :         for (i = 0; i < xlrec->ntuples; i++)
     687              :         {
     688              :             OffsetNumber offnum;
     689              :             xl_multi_insert_tuple *xlhdr;
     690              : 
     691              :             /*
     692              :              * If we're reinitializing the page, the tuples are stored in
     693              :              * order from FirstOffsetNumber. Otherwise there's an array of
     694              :              * offsets in the WAL record, and the tuples come after that.
     695              :              */
     696       213214 :             if (isinit)
     697        99454 :                 offnum = FirstOffsetNumber + i;
     698              :             else
     699       113760 :                 offnum = xlrec->offsets[i];
     700       213214 :             if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     701            0 :                 elog(PANIC, "invalid max offset number");
     702              : 
     703       213214 :             xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
     704       213214 :             tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
     705              : 
     706       213214 :             newlen = xlhdr->datalen;
     707              :             Assert(newlen <= MaxHeapTupleSize);
     708       213214 :             htup = &tbuf.hdr;
     709       213214 :             MemSet(htup, 0, SizeofHeapTupleHeader);
     710              :             /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
     711       213214 :             memcpy((char *) htup + SizeofHeapTupleHeader,
     712              :                    tupdata,
     713              :                    newlen);
     714       213214 :             tupdata += newlen;
     715              : 
     716       213214 :             newlen += SizeofHeapTupleHeader;
     717       213214 :             htup->t_infomask2 = xlhdr->t_infomask2;
     718       213214 :             htup->t_infomask = xlhdr->t_infomask;
     719       213214 :             htup->t_hoff = xlhdr->t_hoff;
     720       213214 :             HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
     721       213214 :             HeapTupleHeaderSetCmin(htup, FirstCommandId);
     722       213214 :             ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
     723       213214 :             ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
     724              : 
     725       213214 :             offnum = PageAddItem(page, htup, newlen, offnum, true, true);
     726       213214 :             if (offnum == InvalidOffsetNumber)
     727            0 :                 elog(PANIC, "failed to add tuple");
     728              :         }
     729        60068 :         if (tupdata != endptr)
     730            0 :             elog(PANIC, "total tuple length mismatch");
     731              : 
     732        60068 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
     733              : 
     734        60068 :         PageSetLSN(page, lsn);
     735              : 
     736        60068 :         if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
     737          107 :             PageClearAllVisible(page);
     738              : 
     739              :         /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
     740        60068 :         if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
     741              :         {
     742            4 :             PageSetAllVisible(page);
     743            4 :             PageClearPrunable(page);
     744              :         }
     745              : 
     746        60068 :         MarkBufferDirty(buffer);
     747              :     }
     748        61687 :     if (BufferIsValid(buffer))
     749        61687 :         UnlockReleaseBuffer(buffer);
     750              : 
     751        61687 :     buffer = InvalidBuffer;
     752              : 
     753              :     /*
     754              :      * Read and update the visibility map (VM) block.
     755              :      *
     756              :      * We must always redo VM changes, even if the corresponding heap page
     757              :      * update was skipped due to the LSN interlock. Each VM block covers
     758              :      * multiple heap pages, so later WAL records may update other bits in the
     759              :      * same block. If this record includes an FPI (full-page image),
     760              :      * subsequent WAL records may depend on it to guard against torn pages.
     761              :      *
     762              :      * Heap page changes are replayed first to preserve the invariant:
     763              :      * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
     764              :      *
     765              :      * Note that we released the heap page lock above. During normal
     766              :      * operation, this would be unsafe — a concurrent modification could
     767              :      * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
     768              :      * invariant.
     769              :      *
     770              :      * During recovery, however, no concurrent writers exist. Therefore,
     771              :      * updating the VM without holding the heap page lock is safe enough. This
     772              :      * same approach is taken when replaying xl_heap_visible records (see
     773              :      * heap_xlog_visible()).
     774              :      */
     775        61691 :     if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
     776            4 :         XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
     777              :                                       &vmbuffer) == BLK_NEEDS_REDO)
     778              :     {
     779            0 :         Page        vmpage = BufferGetPage(vmbuffer);
     780              : 
     781              :         /* initialize the page if it was read as zeros */
     782            0 :         if (PageIsNew(vmpage))
     783            0 :             PageInit(vmpage, BLCKSZ, 0);
     784              : 
     785            0 :         visibilitymap_set_vmbits(blkno,
     786              :                                  vmbuffer,
     787              :                                  VISIBILITYMAP_ALL_VISIBLE |
     788              :                                  VISIBILITYMAP_ALL_FROZEN,
     789              :                                  rlocator);
     790              : 
     791              :         Assert(BufferIsDirty(vmbuffer));
     792            0 :         PageSetLSN(vmpage, lsn);
     793              :     }
     794              : 
     795        61687 :     if (BufferIsValid(vmbuffer))
     796            4 :         UnlockReleaseBuffer(vmbuffer);
     797              : 
     798              :     /*
     799              :      * If the page is running low on free space, update the FSM as well.
     800              :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
     801              :      * better than that without knowing the fill-factor for the table.
     802              :      *
     803              :      * XXX: Don't do this if the page was restored from full page image. We
     804              :      * don't bother to update the FSM in that case, it doesn't need to be
     805              :      * totally accurate anyway.
     806              :      */
     807        61687 :     if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
     808        16847 :         XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
     809        61687 : }
     810              : 
     811              : /*
     812              :  * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
     813              :  */
     814              : static void
     815        95286 : heap_xlog_update(XLogReaderState *record, bool hot_update)
     816              : {
     817        95286 :     XLogRecPtr  lsn = record->EndRecPtr;
     818        95286 :     xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
     819              :     RelFileLocator rlocator;
     820              :     BlockNumber oldblk;
     821              :     BlockNumber newblk;
     822              :     ItemPointerData newtid;
     823              :     Buffer      obuffer,
     824              :                 nbuffer;
     825              :     Page        page;
     826              :     OffsetNumber offnum;
     827              :     ItemId      lp;
     828              :     HeapTupleData oldtup;
     829              :     HeapTupleHeader htup;
     830        95286 :     uint16      prefixlen = 0,
     831        95286 :                 suffixlen = 0;
     832              :     char       *newp;
     833              :     union
     834              :     {
     835              :         HeapTupleHeaderData hdr;
     836              :         char        data[MaxHeapTupleSize];
     837              :     }           tbuf;
     838              :     xl_heap_header xlhdr;
     839              :     uint32      newlen;
     840        95286 :     Size        freespace = 0;
     841              :     XLogRedoAction oldaction;
     842              :     XLogRedoAction newaction;
     843              : 
     844              :     /* initialize to keep the compiler quiet */
     845        95286 :     oldtup.t_data = NULL;
     846        95286 :     oldtup.t_len = 0;
     847              : 
     848        95286 :     XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
     849        95286 :     if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
     850              :     {
     851              :         /* HOT updates are never done across pages */
     852              :         Assert(!hot_update);
     853              :     }
     854              :     else
     855        40783 :         oldblk = newblk;
     856              : 
     857        95286 :     ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
     858              : 
     859              :     /*
     860              :      * The visibility map may need to be fixed even if the heap page is
     861              :      * already up-to-date.
     862              :      */
     863        95286 :     if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     864              :     {
     865          207 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     866          207 :         Buffer      vmbuffer = InvalidBuffer;
     867              : 
     868          207 :         visibilitymap_pin(reln, oldblk, &vmbuffer);
     869          207 :         visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     870          207 :         ReleaseBuffer(vmbuffer);
     871          207 :         FreeFakeRelcacheEntry(reln);
     872              :     }
     873              : 
     874              :     /*
     875              :      * In normal operation, it is important to lock the two pages in
     876              :      * page-number order, to avoid possible deadlocks against other update
     877              :      * operations going the other way.  However, during WAL replay there can
     878              :      * be no other update happening, so we don't need to worry about that. But
     879              :      * we *do* need to worry that we don't expose an inconsistent state to Hot
     880              :      * Standby queries --- so the original page can't be unlocked before we've
     881              :      * added the new tuple to the new page.
     882              :      */
     883              : 
     884              :     /* Deal with old tuple version */
     885        95286 :     oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
     886              :                                       &obuffer);
     887        95286 :     if (oldaction == BLK_NEEDS_REDO)
     888              :     {
     889        94949 :         page = BufferGetPage(obuffer);
     890        94949 :         offnum = xlrec->old_offnum;
     891        94949 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
     892            0 :             elog(PANIC, "offnum out of range");
     893        94949 :         lp = PageGetItemId(page, offnum);
     894        94949 :         if (!ItemIdIsNormal(lp))
     895            0 :             elog(PANIC, "invalid lp");
     896              : 
     897        94949 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
     898              : 
     899        94949 :         oldtup.t_data = htup;
     900        94949 :         oldtup.t_len = ItemIdGetLength(lp);
     901              : 
     902        94949 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
     903        94949 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
     904        94949 :         if (hot_update)
     905        37515 :             HeapTupleHeaderSetHotUpdated(htup);
     906              :         else
     907        57434 :             HeapTupleHeaderClearHotUpdated(htup);
     908        94949 :         fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
     909              :                                    &htup->t_infomask2);
     910        94949 :         HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
     911        94949 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
     912              :         /* Set forward chain link in t_ctid */
     913        94949 :         htup->t_ctid = newtid;
     914              : 
     915              :         /* Mark the page as a candidate for pruning */
     916        94949 :         PageSetPrunable(page, XLogRecGetXid(record));
     917              : 
     918        94949 :         if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
     919          200 :             PageClearAllVisible(page);
     920              : 
     921        94949 :         PageSetLSN(page, lsn);
     922        94949 :         MarkBufferDirty(obuffer);
     923              :     }
     924              : 
     925              :     /*
     926              :      * Read the page the new tuple goes into, if different from old.
     927              :      */
     928        95286 :     if (oldblk == newblk)
     929              :     {
     930        40783 :         nbuffer = obuffer;
     931        40783 :         newaction = oldaction;
     932              :     }
     933        54503 :     else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
     934              :     {
     935          589 :         nbuffer = XLogInitBufferForRedo(record, 0);
     936          589 :         page = BufferGetPage(nbuffer);
     937          589 :         PageInit(page, BufferGetPageSize(nbuffer), 0);
     938          589 :         newaction = BLK_NEEDS_REDO;
     939              :     }
     940              :     else
     941        53914 :         newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
     942              : 
     943              :     /*
     944              :      * The visibility map may need to be fixed even if the heap page is
     945              :      * already up-to-date.
     946              :      */
     947        95286 :     if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
     948              :     {
     949          170 :         Relation    reln = CreateFakeRelcacheEntry(rlocator);
     950          170 :         Buffer      vmbuffer = InvalidBuffer;
     951              : 
     952          170 :         visibilitymap_pin(reln, newblk, &vmbuffer);
     953          170 :         visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
     954          170 :         ReleaseBuffer(vmbuffer);
     955          170 :         FreeFakeRelcacheEntry(reln);
     956              :     }
     957              : 
     958              :     /* Deal with new tuple */
     959        95286 :     if (newaction == BLK_NEEDS_REDO)
     960              :     {
     961              :         char       *recdata;
     962              :         char       *recdata_end;
     963              :         Size        datalen;
     964              :         Size        tuplen;
     965              : 
     966        94737 :         recdata = XLogRecGetBlockData(record, 0, &datalen);
     967        94737 :         recdata_end = recdata + datalen;
     968              : 
     969        94737 :         page = BufferGetPage(nbuffer);
     970              : 
     971        94737 :         offnum = xlrec->new_offnum;
     972        94737 :         if (PageGetMaxOffsetNumber(page) + 1 < offnum)
     973            0 :             elog(PANIC, "invalid max offset number");
     974              : 
     975        94737 :         if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
     976              :         {
     977              :             Assert(newblk == oldblk);
     978        16874 :             memcpy(&prefixlen, recdata, sizeof(uint16));
     979        16874 :             recdata += sizeof(uint16);
     980              :         }
     981        94737 :         if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
     982              :         {
     983              :             Assert(newblk == oldblk);
     984        34866 :             memcpy(&suffixlen, recdata, sizeof(uint16));
     985        34866 :             recdata += sizeof(uint16);
     986              :         }
     987              : 
     988        94737 :         memcpy(&xlhdr, recdata, SizeOfHeapHeader);
     989        94737 :         recdata += SizeOfHeapHeader;
     990              : 
     991        94737 :         tuplen = recdata_end - recdata;
     992              :         Assert(tuplen <= MaxHeapTupleSize);
     993              : 
     994        94737 :         htup = &tbuf.hdr;
     995        94737 :         MemSet(htup, 0, SizeofHeapTupleHeader);
     996              : 
     997              :         /*
     998              :          * Reconstruct the new tuple using the prefix and/or suffix from the
     999              :          * old tuple, and the data stored in the WAL record.
    1000              :          */
    1001        94737 :         newp = (char *) htup + SizeofHeapTupleHeader;
    1002        94737 :         if (prefixlen > 0)
    1003              :         {
    1004              :             int         len;
    1005              : 
    1006              :             /* copy bitmap [+ padding] [+ oid] from WAL record */
    1007        16874 :             len = xlhdr.t_hoff - SizeofHeapTupleHeader;
    1008        16874 :             memcpy(newp, recdata, len);
    1009        16874 :             recdata += len;
    1010        16874 :             newp += len;
    1011              : 
    1012              :             /* copy prefix from old tuple */
    1013        16874 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
    1014        16874 :             newp += prefixlen;
    1015              : 
    1016              :             /* copy new tuple data from WAL record */
    1017        16874 :             len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
    1018        16874 :             memcpy(newp, recdata, len);
    1019        16874 :             recdata += len;
    1020        16874 :             newp += len;
    1021              :         }
    1022              :         else
    1023              :         {
    1024              :             /*
    1025              :              * copy bitmap [+ padding] [+ oid] + data from record, all in one
    1026              :              * go
    1027              :              */
    1028        77863 :             memcpy(newp, recdata, tuplen);
    1029        77863 :             recdata += tuplen;
    1030        77863 :             newp += tuplen;
    1031              :         }
    1032              :         Assert(recdata == recdata_end);
    1033              : 
    1034              :         /* copy suffix from old tuple */
    1035        94737 :         if (suffixlen > 0)
    1036        34866 :             memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
    1037              : 
    1038        94737 :         newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
    1039        94737 :         htup->t_infomask2 = xlhdr.t_infomask2;
    1040        94737 :         htup->t_infomask = xlhdr.t_infomask;
    1041        94737 :         htup->t_hoff = xlhdr.t_hoff;
    1042              : 
    1043        94737 :         HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
    1044        94737 :         HeapTupleHeaderSetCmin(htup, FirstCommandId);
    1045        94737 :         HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
    1046              :         /* Make sure there is no forward chain link in t_ctid */
    1047        94737 :         htup->t_ctid = newtid;
    1048              : 
    1049        94737 :         offnum = PageAddItem(page, htup, newlen, offnum, true, true);
    1050        94737 :         if (offnum == InvalidOffsetNumber)
    1051            0 :             elog(PANIC, "failed to add tuple");
    1052              : 
    1053        94737 :         if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
    1054           86 :             PageClearAllVisible(page);
    1055              : 
    1056        94737 :         freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
    1057              : 
    1058        94737 :         PageSetLSN(page, lsn);
    1059        94737 :         MarkBufferDirty(nbuffer);
    1060              :     }
    1061              : 
    1062        95286 :     if (BufferIsValid(nbuffer) && nbuffer != obuffer)
    1063        54503 :         UnlockReleaseBuffer(nbuffer);
    1064        95286 :     if (BufferIsValid(obuffer))
    1065        95286 :         UnlockReleaseBuffer(obuffer);
    1066              : 
    1067              :     /*
    1068              :      * If the new page is running low on free space, update the FSM as well.
    1069              :      * Arbitrarily, our definition of "low" is less than 20%. We can't do much
    1070              :      * better than that without knowing the fill-factor for the table.
    1071              :      *
    1072              :      * However, don't update the FSM on HOT updates, because after crash
    1073              :      * recovery, either the old or the new tuple will certainly be dead and
    1074              :      * prunable. After pruning, the page will have roughly as much free space
    1075              :      * as it did before the update, assuming the new tuple is about the same
    1076              :      * size as the old one.
    1077              :      *
    1078              :      * XXX: Don't do this if the page was restored from full page image. We
    1079              :      * don't bother to update the FSM in that case, it doesn't need to be
    1080              :      * totally accurate anyway.
    1081              :      */
    1082        95286 :     if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
    1083        11683 :         XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
    1084        95286 : }
    1085              : 
    1086              : /*
    1087              :  * Replay XLOG_HEAP_CONFIRM records.
    1088              :  */
    1089              : static void
    1090           93 : heap_xlog_confirm(XLogReaderState *record)
    1091              : {
    1092           93 :     XLogRecPtr  lsn = record->EndRecPtr;
    1093           93 :     xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
    1094              :     Buffer      buffer;
    1095              :     Page        page;
    1096              :     OffsetNumber offnum;
    1097              :     ItemId      lp;
    1098              :     HeapTupleHeader htup;
    1099              : 
    1100           93 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1101              :     {
    1102           93 :         page = BufferGetPage(buffer);
    1103              : 
    1104           93 :         offnum = xlrec->offnum;
    1105           93 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1106            0 :             elog(PANIC, "offnum out of range");
    1107           93 :         lp = PageGetItemId(page, offnum);
    1108           93 :         if (!ItemIdIsNormal(lp))
    1109            0 :             elog(PANIC, "invalid lp");
    1110              : 
    1111           93 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1112              : 
    1113              :         /*
    1114              :          * Confirm tuple as actually inserted
    1115              :          */
    1116           93 :         ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
    1117              : 
    1118           93 :         PageSetLSN(page, lsn);
    1119           93 :         MarkBufferDirty(buffer);
    1120              :     }
    1121           93 :     if (BufferIsValid(buffer))
    1122           93 :         UnlockReleaseBuffer(buffer);
    1123           93 : }
    1124              : 
    1125              : /*
    1126              :  * Replay XLOG_HEAP_LOCK records.
    1127              :  */
    1128              : static void
    1129        55434 : heap_xlog_lock(XLogReaderState *record)
    1130              : {
    1131        55434 :     XLogRecPtr  lsn = record->EndRecPtr;
    1132        55434 :     xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
    1133              :     Buffer      buffer;
    1134              :     Page        page;
    1135              :     OffsetNumber offnum;
    1136              :     ItemId      lp;
    1137              :     HeapTupleHeader htup;
    1138              : 
    1139              :     /*
    1140              :      * The visibility map may need to be fixed even if the heap page is
    1141              :      * already up-to-date.
    1142              :      */
    1143        55434 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1144              :     {
    1145              :         RelFileLocator rlocator;
    1146           46 :         Buffer      vmbuffer = InvalidBuffer;
    1147              :         BlockNumber block;
    1148              :         Relation    reln;
    1149              : 
    1150           46 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1151           46 :         reln = CreateFakeRelcacheEntry(rlocator);
    1152              : 
    1153           46 :         visibilitymap_pin(reln, block, &vmbuffer);
    1154           46 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1155              : 
    1156           46 :         ReleaseBuffer(vmbuffer);
    1157           46 :         FreeFakeRelcacheEntry(reln);
    1158              :     }
    1159              : 
    1160        55434 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1161              :     {
    1162        55229 :         page = BufferGetPage(buffer);
    1163              : 
    1164        55229 :         offnum = xlrec->offnum;
    1165        55229 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1166            0 :             elog(PANIC, "offnum out of range");
    1167        55229 :         lp = PageGetItemId(page, offnum);
    1168        55229 :         if (!ItemIdIsNormal(lp))
    1169            0 :             elog(PANIC, "invalid lp");
    1170              : 
    1171        55229 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1172              : 
    1173        55229 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1174        55229 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1175        55229 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1176              :                                    &htup->t_infomask2);
    1177              : 
    1178              :         /*
    1179              :          * Clear relevant update flags, but only if the modified infomask says
    1180              :          * there's no update.
    1181              :          */
    1182        55229 :         if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
    1183              :         {
    1184        55229 :             HeapTupleHeaderClearHotUpdated(htup);
    1185              :             /* Make sure there is no forward chain link in t_ctid */
    1186        55229 :             ItemPointerSet(&htup->t_ctid,
    1187              :                            BufferGetBlockNumber(buffer),
    1188              :                            offnum);
    1189              :         }
    1190        55229 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1191        55229 :         HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
    1192        55229 :         PageSetLSN(page, lsn);
    1193        55229 :         MarkBufferDirty(buffer);
    1194              :     }
    1195        55434 :     if (BufferIsValid(buffer))
    1196        55434 :         UnlockReleaseBuffer(buffer);
    1197        55434 : }
    1198              : 
    1199              : /*
    1200              :  * Replay XLOG_HEAP2_LOCK_UPDATED records.
    1201              :  */
    1202              : static void
    1203            0 : heap_xlog_lock_updated(XLogReaderState *record)
    1204              : {
    1205            0 :     XLogRecPtr  lsn = record->EndRecPtr;
    1206              :     xl_heap_lock_updated *xlrec;
    1207              :     Buffer      buffer;
    1208              :     Page        page;
    1209              :     OffsetNumber offnum;
    1210              :     ItemId      lp;
    1211              :     HeapTupleHeader htup;
    1212              : 
    1213            0 :     xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
    1214              : 
    1215              :     /*
    1216              :      * The visibility map may need to be fixed even if the heap page is
    1217              :      * already up-to-date.
    1218              :      */
    1219            0 :     if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
    1220              :     {
    1221              :         RelFileLocator rlocator;
    1222            0 :         Buffer      vmbuffer = InvalidBuffer;
    1223              :         BlockNumber block;
    1224              :         Relation    reln;
    1225              : 
    1226            0 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
    1227            0 :         reln = CreateFakeRelcacheEntry(rlocator);
    1228              : 
    1229            0 :         visibilitymap_pin(reln, block, &vmbuffer);
    1230            0 :         visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
    1231              : 
    1232            0 :         ReleaseBuffer(vmbuffer);
    1233            0 :         FreeFakeRelcacheEntry(reln);
    1234              :     }
    1235              : 
    1236            0 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1237              :     {
    1238            0 :         page = BufferGetPage(buffer);
    1239              : 
    1240            0 :         offnum = xlrec->offnum;
    1241            0 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1242            0 :             elog(PANIC, "offnum out of range");
    1243            0 :         lp = PageGetItemId(page, offnum);
    1244            0 :         if (!ItemIdIsNormal(lp))
    1245            0 :             elog(PANIC, "invalid lp");
    1246              : 
    1247            0 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1248              : 
    1249            0 :         htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
    1250            0 :         htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
    1251            0 :         fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
    1252              :                                    &htup->t_infomask2);
    1253            0 :         HeapTupleHeaderSetXmax(htup, xlrec->xmax);
    1254              : 
    1255            0 :         PageSetLSN(page, lsn);
    1256            0 :         MarkBufferDirty(buffer);
    1257              :     }
    1258            0 :     if (BufferIsValid(buffer))
    1259            0 :         UnlockReleaseBuffer(buffer);
    1260            0 : }
    1261              : 
    1262              : /*
    1263              :  * Replay XLOG_HEAP_INPLACE records.
    1264              :  */
    1265              : static void
    1266         7880 : heap_xlog_inplace(XLogReaderState *record)
    1267              : {
    1268         7880 :     XLogRecPtr  lsn = record->EndRecPtr;
    1269         7880 :     xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
    1270              :     Buffer      buffer;
    1271              :     Page        page;
    1272              :     OffsetNumber offnum;
    1273              :     ItemId      lp;
    1274              :     HeapTupleHeader htup;
    1275              :     uint32      oldlen;
    1276              :     Size        newlen;
    1277              : 
    1278         7880 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
    1279              :     {
    1280         7691 :         char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
    1281              : 
    1282         7691 :         page = BufferGetPage(buffer);
    1283              : 
    1284         7691 :         offnum = xlrec->offnum;
    1285         7691 :         if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    1286            0 :             elog(PANIC, "offnum out of range");
    1287         7691 :         lp = PageGetItemId(page, offnum);
    1288         7691 :         if (!ItemIdIsNormal(lp))
    1289            0 :             elog(PANIC, "invalid lp");
    1290              : 
    1291         7691 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1292              : 
    1293         7691 :         oldlen = ItemIdGetLength(lp) - htup->t_hoff;
    1294         7691 :         if (oldlen != newlen)
    1295            0 :             elog(PANIC, "wrong tuple length");
    1296              : 
    1297         7691 :         memcpy((char *) htup + htup->t_hoff, newtup, newlen);
    1298              : 
    1299         7691 :         PageSetLSN(page, lsn);
    1300         7691 :         MarkBufferDirty(buffer);
    1301              :     }
    1302         7880 :     if (BufferIsValid(buffer))
    1303         7880 :         UnlockReleaseBuffer(buffer);
    1304              : 
    1305         7880 :     ProcessCommittedInvalidationMessages(xlrec->msgs,
    1306              :                                          xlrec->nmsgs,
    1307         7880 :                                          xlrec->relcacheInitFileInval,
    1308              :                                          xlrec->dbId,
    1309              :                                          xlrec->tsId);
    1310         7880 : }
    1311              : 
    1312              : void
    1313      1763740 : heap_redo(XLogReaderState *record)
    1314              : {
    1315      1763740 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1316              : 
    1317              :     /*
    1318              :      * These operations don't overwrite MVCC data so no conflict processing is
    1319              :      * required. The ones in heap2 rmgr do.
    1320              :      */
    1321              : 
    1322      1763740 :     switch (info & XLOG_HEAP_OPMASK)
    1323              :     {
    1324      1295651 :         case XLOG_HEAP_INSERT:
    1325      1295651 :             heap_xlog_insert(record);
    1326      1295651 :             break;
    1327       309394 :         case XLOG_HEAP_DELETE:
    1328       309394 :             heap_xlog_delete(record);
    1329       309394 :             break;
    1330        57473 :         case XLOG_HEAP_UPDATE:
    1331        57473 :             heap_xlog_update(record, false);
    1332        57473 :             break;
    1333            2 :         case XLOG_HEAP_TRUNCATE:
    1334              : 
    1335              :             /*
    1336              :              * TRUNCATE is a no-op because the actions are already logged as
    1337              :              * SMGR WAL records.  TRUNCATE WAL record only exists for logical
    1338              :              * decoding.
    1339              :              */
    1340            2 :             break;
    1341        37813 :         case XLOG_HEAP_HOT_UPDATE:
    1342        37813 :             heap_xlog_update(record, true);
    1343        37813 :             break;
    1344           93 :         case XLOG_HEAP_CONFIRM:
    1345           93 :             heap_xlog_confirm(record);
    1346           93 :             break;
    1347        55434 :         case XLOG_HEAP_LOCK:
    1348        55434 :             heap_xlog_lock(record);
    1349        55434 :             break;
    1350         7880 :         case XLOG_HEAP_INPLACE:
    1351         7880 :             heap_xlog_inplace(record);
    1352         7880 :             break;
    1353            0 :         default:
    1354            0 :             elog(PANIC, "heap_redo: unknown op code %u", info);
    1355              :     }
    1356      1763740 : }
    1357              : 
    1358              : void
    1359        82703 : heap2_redo(XLogReaderState *record)
    1360              : {
    1361        82703 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1362              : 
    1363        82703 :     switch (info & XLOG_HEAP_OPMASK)
    1364              :     {
    1365        14786 :         case XLOG_HEAP2_PRUNE_ON_ACCESS:
    1366              :         case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
    1367              :         case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
    1368        14786 :             heap_xlog_prune_freeze(record);
    1369        14786 :             break;
    1370         5182 :         case XLOG_HEAP2_VISIBLE:
    1371         5182 :             heap_xlog_visible(record);
    1372         5182 :             break;
    1373        61687 :         case XLOG_HEAP2_MULTI_INSERT:
    1374        61687 :             heap_xlog_multi_insert(record);
    1375        61687 :             break;
    1376            0 :         case XLOG_HEAP2_LOCK_UPDATED:
    1377            0 :             heap_xlog_lock_updated(record);
    1378            0 :             break;
    1379         1048 :         case XLOG_HEAP2_NEW_CID:
    1380              : 
    1381              :             /*
    1382              :              * Nothing to do on a real replay, only used during logical
    1383              :              * decoding.
    1384              :              */
    1385         1048 :             break;
    1386            0 :         case XLOG_HEAP2_REWRITE:
    1387            0 :             heap_xlog_logical_rewrite(record);
    1388            0 :             break;
    1389            0 :         default:
    1390            0 :             elog(PANIC, "heap2_redo: unknown op code %u", info);
    1391              :     }
    1392        82703 : }
    1393              : 
    1394              : /*
    1395              :  * Mask a heap page before performing consistency checks on it.
    1396              :  */
    1397              : void
    1398      2963238 : heap_mask(char *pagedata, BlockNumber blkno)
    1399              : {
    1400      2963238 :     Page        page = (Page) pagedata;
    1401              :     OffsetNumber off;
    1402              : 
    1403      2963238 :     mask_page_lsn_and_checksum(page);
    1404              : 
    1405      2963238 :     mask_page_hint_bits(page);
    1406      2963238 :     mask_unused_space(page);
    1407              : 
    1408    244637596 :     for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
    1409              :     {
    1410    241674358 :         ItemId      iid = PageGetItemId(page, off);
    1411              :         char       *page_item;
    1412              : 
    1413    241674358 :         page_item = (char *) (page + ItemIdGetOffset(iid));
    1414              : 
    1415    241674358 :         if (ItemIdIsNormal(iid))
    1416              :         {
    1417    225649244 :             HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
    1418              : 
    1419              :             /*
    1420              :              * If xmin of a tuple is not yet frozen, we should ignore
    1421              :              * differences in hint bits, since they can be set without
    1422              :              * emitting WAL.
    1423              :              */
    1424    225649244 :             if (!HeapTupleHeaderXminFrozen(page_htup))
    1425    223522740 :                 page_htup->t_infomask &= ~HEAP_XACT_MASK;
    1426              :             else
    1427              :             {
    1428              :                 /* Still we need to mask xmax hint bits. */
    1429      2126504 :                 page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
    1430      2126504 :                 page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
    1431              :             }
    1432              : 
    1433              :             /*
    1434              :              * During replay, we set Command Id to FirstCommandId. Hence, mask
    1435              :              * it. See heap_xlog_insert() for details.
    1436              :              */
    1437    225649244 :             page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
    1438              : 
    1439              :             /*
    1440              :              * For a speculative tuple, heap_insert() does not set ctid in the
    1441              :              * caller-passed heap tuple itself, leaving the ctid field to
    1442              :              * contain a speculative token value - a per-backend monotonically
    1443              :              * increasing identifier. Besides, it does not WAL-log ctid under
    1444              :              * any circumstances.
    1445              :              *
    1446              :              * During redo, heap_xlog_insert() sets t_ctid to current block
    1447              :              * number and self offset number. It doesn't care about any
    1448              :              * speculative insertions on the primary. Hence, we set t_ctid to
    1449              :              * current block number and self offset number to ignore any
    1450              :              * inconsistency.
    1451              :              */
    1452    225649244 :             if (HeapTupleHeaderIsSpeculative(page_htup))
    1453           94 :                 ItemPointerSet(&page_htup->t_ctid, blkno, off);
    1454              : 
    1455              :             /*
    1456              :              * NB: Not ignoring ctid changes due to the tuple having moved
    1457              :              * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
    1458              :              * important information that needs to be in-sync between primary
    1459              :              * and standby, and thus is WAL logged.
    1460              :              */
    1461              :         }
    1462              : 
    1463              :         /*
    1464              :          * Ignore any padding bytes after the tuple, when the length of the
    1465              :          * item is not MAXALIGNed.
    1466              :          */
    1467    241674358 :         if (ItemIdHasStorage(iid))
    1468              :         {
    1469    225649244 :             int         len = ItemIdGetLength(iid);
    1470    225649244 :             int         padlen = MAXALIGN(len) - len;
    1471              : 
    1472    225649244 :             if (padlen > 0)
    1473    120736210 :                 memset(page_item + len, MASK_MARKER, padlen);
    1474              :         }
    1475              :     }
    1476      2963238 : }
        

Generated by: LCOV version 2.0-1