LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 505 532 94.9 %
Date: 2025-11-28 17:17:43 Functions: 25 25 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pruneheap.c
       4             :  *    heap page pruning and HOT-chain management code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/pruneheap.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/heapam.h"
      18             : #include "access/heapam_xlog.h"
      19             : #include "access/htup_details.h"
      20             : #include "access/multixact.h"
      21             : #include "access/transam.h"
      22             : #include "access/visibilitymapdefs.h"
      23             : #include "access/xlog.h"
      24             : #include "access/xloginsert.h"
      25             : #include "commands/vacuum.h"
      26             : #include "executor/instrument.h"
      27             : #include "miscadmin.h"
      28             : #include "pgstat.h"
      29             : #include "storage/bufmgr.h"
      30             : #include "utils/rel.h"
      31             : #include "utils/snapmgr.h"
      32             : 
      33             : /* Working data for heap_page_prune_and_freeze() and subroutines */
      34             : typedef struct
      35             : {
      36             :     /*-------------------------------------------------------
      37             :      * Arguments passed to heap_page_prune_and_freeze()
      38             :      *-------------------------------------------------------
      39             :      */
      40             : 
      41             :     /* tuple visibility test, initialized for the relation */
      42             :     GlobalVisState *vistest;
      43             :     /* whether or not dead items can be set LP_UNUSED during pruning */
      44             :     bool        mark_unused_now;
      45             :     /* whether to attempt freezing tuples */
      46             :     bool        attempt_freeze;
      47             :     struct VacuumCutoffs *cutoffs;
      48             : 
      49             :     /*-------------------------------------------------------
      50             :      * Fields describing what to do to the page
      51             :      *-------------------------------------------------------
      52             :      */
      53             :     TransactionId new_prune_xid;    /* new prune hint value */
      54             :     TransactionId latest_xid_removed;
      55             :     int         nredirected;    /* numbers of entries in arrays below */
      56             :     int         ndead;
      57             :     int         nunused;
      58             :     int         nfrozen;
      59             :     /* arrays that accumulate indexes of items to be changed */
      60             :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      61             :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      62             :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      63             :     HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
      64             : 
      65             :     /*-------------------------------------------------------
      66             :      * Working state for HOT chain processing
      67             :      *-------------------------------------------------------
      68             :      */
      69             : 
      70             :     /*
      71             :      * 'root_items' contains offsets of all LP_REDIRECT line pointers and
      72             :      * normal non-HOT tuples.  They can be stand-alone items or the first item
      73             :      * in a HOT chain.  'heaponly_items' contains heap-only tuples which can
      74             :      * only be removed as part of a HOT chain.
      75             :      */
      76             :     int         nroot_items;
      77             :     OffsetNumber root_items[MaxHeapTuplesPerPage];
      78             :     int         nheaponly_items;
      79             :     OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
      80             : 
      81             :     /*
      82             :      * processed[offnum] is true if item at offnum has been processed.
      83             :      *
      84             :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
      85             :      * 1. Otherwise every access would need to subtract 1.
      86             :      */
      87             :     bool        processed[MaxHeapTuplesPerPage + 1];
      88             : 
      89             :     /*
      90             :      * Tuple visibility is only computed once for each tuple, for correctness
      91             :      * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
      92             :      * details.  This is of type int8[], instead of HTSV_Result[], so we can
      93             :      * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
      94             :      * items.
      95             :      *
      96             :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
      97             :      * 1. Otherwise every access would need to subtract 1.
      98             :      */
      99             :     int8        htsv[MaxHeapTuplesPerPage + 1];
     100             : 
     101             :     /*
     102             :      * Freezing-related state.
     103             :      */
     104             :     HeapPageFreeze pagefrz;
     105             : 
     106             :     /*-------------------------------------------------------
     107             :      * Information about what was done
     108             :      *
     109             :      * These fields are not used by pruning itself for the most part, but are
     110             :      * used to collect information about what was pruned and what state the
     111             :      * page is in after pruning, for the benefit of the caller.  They are
     112             :      * copied to the caller's PruneFreezeResult at the end.
     113             :      * -------------------------------------------------------
     114             :      */
     115             : 
     116             :     int         ndeleted;       /* Number of tuples deleted from the page */
     117             : 
     118             :     /* Number of live and recently dead tuples, after pruning */
     119             :     int         live_tuples;
     120             :     int         recently_dead_tuples;
     121             : 
     122             :     /* Whether or not the page makes rel truncation unsafe */
     123             :     bool        hastup;
     124             : 
     125             :     /*
     126             :      * LP_DEAD items on the page after pruning.  Includes existing LP_DEAD
     127             :      * items
     128             :      */
     129             :     int         lpdead_items;   /* number of items in the array */
     130             :     OffsetNumber *deadoffsets;  /* points directly to presult->deadoffsets */
     131             : 
     132             :     /*
     133             :      * The snapshot conflict horizon used when freezing tuples. The final
     134             :      * snapshot conflict horizon for the record may be newer if pruning
     135             :      * removes newer transaction IDs.
     136             :      */
     137             :     TransactionId frz_conflict_horizon;
     138             : 
     139             :     /*
     140             :      * all_visible and all_frozen indicate if the all-visible and all-frozen
     141             :      * bits in the visibility map can be set for this page after pruning.
     142             :      *
     143             :      * visibility_cutoff_xid is the newest xmin of live tuples on the page.
     144             :      * The caller can use it as the conflict horizon, when setting the VM
     145             :      * bits.  It is only valid if we froze some tuples, and all_frozen is
     146             :      * true.
     147             :      *
     148             :      * NOTE: all_visible and all_frozen initially don't include LP_DEAD items.
     149             :      * That's convenient for heap_page_prune_and_freeze() to use them to
     150             :      * decide whether to freeze the page or not.  The all_visible and
     151             :      * all_frozen values returned to the caller are adjusted to include
     152             :      * LP_DEAD items after we determine whether to opportunistically freeze.
     153             :      */
     154             :     bool        all_visible;
     155             :     bool        all_frozen;
     156             :     TransactionId visibility_cutoff_xid;
     157             : } PruneState;
     158             : 
     159             : /* Local functions */
     160             : static void prune_freeze_setup(PruneFreezeParams *params,
     161             :                                TransactionId new_relfrozen_xid,
     162             :                                MultiXactId new_relmin_mxid,
     163             :                                const PruneFreezeResult *presult,
     164             :                                PruneState *prstate);
     165             : static void prune_freeze_plan(Oid reloid, Buffer buffer,
     166             :                               PruneState *prstate,
     167             :                               OffsetNumber *off_loc);
     168             : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
     169             :                                                HeapTuple tup,
     170             :                                                Buffer buffer);
     171             : static inline HTSV_Result htsv_get_valid_status(int status);
     172             : static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
     173             :                              OffsetNumber rootoffnum, PruneState *prstate);
     174             : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
     175             : static void heap_prune_record_redirect(PruneState *prstate,
     176             :                                        OffsetNumber offnum, OffsetNumber rdoffnum,
     177             :                                        bool was_normal);
     178             : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
     179             :                                    bool was_normal);
     180             : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
     181             :                                              bool was_normal);
     182             : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
     183             : 
     184             : static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum);
     185             : static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum);
     186             : static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum);
     187             : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
     188             : 
     189             : static void page_verify_redirects(Page page);
     190             : 
     191             : static bool heap_page_will_freeze(Relation relation, Buffer buffer,
     192             :                                   bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
     193             :                                   PruneState *prstate);
     194             : 
     195             : 
     196             : /*
     197             :  * Optionally prune and repair fragmentation in the specified page.
     198             :  *
     199             :  * This is an opportunistic function.  It will perform housekeeping
     200             :  * only if the page heuristically looks like a candidate for pruning and we
     201             :  * can acquire buffer cleanup lock without blocking.
     202             :  *
     203             :  * Note: this is called quite often.  It's important that it fall out quickly
     204             :  * if there's not any use in pruning.
     205             :  *
     206             :  * Caller must have pin on the buffer, and must *not* have a lock on it.
     207             :  */
     208             : void
     209    32608568 : heap_page_prune_opt(Relation relation, Buffer buffer)
     210             : {
     211    32608568 :     Page        page = BufferGetPage(buffer);
     212             :     TransactionId prune_xid;
     213             :     GlobalVisState *vistest;
     214             :     Size        minfree;
     215             : 
     216             :     /*
     217             :      * We can't write WAL in recovery mode, so there's no point trying to
     218             :      * clean the page. The primary will likely issue a cleaning WAL record
     219             :      * soon anyway, so this is no particular loss.
     220             :      */
     221    32608568 :     if (RecoveryInProgress())
     222      442864 :         return;
     223             : 
     224             :     /*
     225             :      * First check whether there's any chance there's something to prune,
     226             :      * determining the appropriate horizon is a waste if there's no prune_xid
     227             :      * (i.e. no updates/deletes left potentially dead tuples around).
     228             :      */
     229    32165704 :     prune_xid = ((PageHeader) page)->pd_prune_xid;
     230    32165704 :     if (!TransactionIdIsValid(prune_xid))
     231    16649106 :         return;
     232             : 
     233             :     /*
     234             :      * Check whether prune_xid indicates that there may be dead rows that can
     235             :      * be cleaned up.
     236             :      */
     237    15516598 :     vistest = GlobalVisTestFor(relation);
     238             : 
     239    15516598 :     if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
     240    12993524 :         return;
     241             : 
     242             :     /*
     243             :      * We prune when a previous UPDATE failed to find enough space on the page
     244             :      * for a new tuple version, or when free space falls below the relation's
     245             :      * fill-factor target (but not less than 10%).
     246             :      *
     247             :      * Checking free space here is questionable since we aren't holding any
     248             :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     249             :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     250             :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     251             :      * important than sometimes getting a wrong answer in what is after all
     252             :      * just a heuristic estimate.
     253             :      */
     254     2523074 :     minfree = RelationGetTargetPageFreeSpace(relation,
     255             :                                              HEAP_DEFAULT_FILLFACTOR);
     256     2523074 :     minfree = Max(minfree, BLCKSZ / 10);
     257             : 
     258     2523074 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     259             :     {
     260             :         /* OK, try to get exclusive buffer lock */
     261       84450 :         if (!ConditionalLockBufferForCleanup(buffer))
     262        1036 :             return;
     263             : 
     264             :         /*
     265             :          * Now that we have buffer lock, get accurate information about the
     266             :          * page's free space, and recheck the heuristic about whether to
     267             :          * prune.
     268             :          */
     269       83414 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     270             :         {
     271             :             OffsetNumber dummy_off_loc;
     272             :             PruneFreezeResult presult;
     273             : 
     274             :             /*
     275             :              * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
     276             :              * regardless of whether or not the relation has indexes, since we
     277             :              * cannot safely determine that during on-access pruning with the
     278             :              * current implementation.
     279             :              */
     280       83414 :             PruneFreezeParams params = {
     281             :                 .relation = relation,
     282             :                 .buffer = buffer,
     283             :                 .reason = PRUNE_ON_ACCESS,
     284             :                 .options = 0,
     285             :                 .vistest = vistest,
     286             :                 .cutoffs = NULL,
     287             :             };
     288             : 
     289       83414 :             heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
     290             :                                        NULL, NULL);
     291             : 
     292             :             /*
     293             :              * Report the number of tuples reclaimed to pgstats.  This is
     294             :              * presult.ndeleted minus the number of newly-LP_DEAD-set items.
     295             :              *
     296             :              * We derive the number of dead tuples like this to avoid totally
     297             :              * forgetting about items that were set to LP_DEAD, since they
     298             :              * still need to be cleaned up by VACUUM.  We only want to count
     299             :              * heap-only tuples that just became LP_UNUSED in our report,
     300             :              * which don't.
     301             :              *
     302             :              * VACUUM doesn't have to compensate in the same way when it
     303             :              * tracks ndeleted, since it will set the same LP_DEAD items to
     304             :              * LP_UNUSED separately.
     305             :              */
     306       83414 :             if (presult.ndeleted > presult.nnewlpdead)
     307       37852 :                 pgstat_update_heap_dead_tuples(relation,
     308       37852 :                                                presult.ndeleted - presult.nnewlpdead);
     309             :         }
     310             : 
     311             :         /* And release buffer lock */
     312       83414 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     313             : 
     314             :         /*
     315             :          * We avoid reuse of any free space created on the page by unrelated
     316             :          * UPDATEs/INSERTs by opting to not update the FSM at this point.  The
     317             :          * free space should be reused by UPDATEs to *this* page.
     318             :          */
     319             :     }
     320             : }
     321             : 
     322             : /*
     323             :  * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
     324             :  * the provided parameters.
     325             :  */
     326             : static void
     327     1084810 : prune_freeze_setup(PruneFreezeParams *params,
     328             :                    TransactionId new_relfrozen_xid,
     329             :                    MultiXactId new_relmin_mxid,
     330             :                    const PruneFreezeResult *presult,
     331             :                    PruneState *prstate)
     332             : {
     333             :     /* Copy parameters to prstate */
     334     1084810 :     prstate->vistest = params->vistest;
     335     1084810 :     prstate->mark_unused_now =
     336     1084810 :         (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
     337             : 
     338             :     /* cutoffs must be provided if we will attempt freezing */
     339             :     Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
     340     1084810 :     prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
     341     1084810 :     prstate->cutoffs = params->cutoffs;
     342             : 
     343             :     /*
     344             :      * Our strategy is to scan the page and make lists of items to change,
     345             :      * then apply the changes within a critical section.  This keeps as much
     346             :      * logic as possible out of the critical section, and also ensures that
     347             :      * WAL replay will work the same as the normal case.
     348             :      *
     349             :      * First, initialize the new pd_prune_xid value to zero (indicating no
     350             :      * prunable tuples).  If we find any tuples which may soon become
     351             :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     352             :      * initialize the rest of our working state.
     353             :      */
     354     1084810 :     prstate->new_prune_xid = InvalidTransactionId;
     355     1084810 :     prstate->latest_xid_removed = InvalidTransactionId;
     356     1084810 :     prstate->nredirected = prstate->ndead = prstate->nunused = 0;
     357     1084810 :     prstate->nfrozen = 0;
     358     1084810 :     prstate->nroot_items = 0;
     359     1084810 :     prstate->nheaponly_items = 0;
     360             : 
     361             :     /* initialize page freezing working state */
     362     1084810 :     prstate->pagefrz.freeze_required = false;
     363     1084810 :     if (prstate->attempt_freeze)
     364             :     {
     365     1001396 :         prstate->pagefrz.FreezePageRelfrozenXid = new_relfrozen_xid;
     366     1001396 :         prstate->pagefrz.NoFreezePageRelfrozenXid = new_relfrozen_xid;
     367     1001396 :         prstate->pagefrz.FreezePageRelminMxid = new_relmin_mxid;
     368     1001396 :         prstate->pagefrz.NoFreezePageRelminMxid = new_relmin_mxid;
     369             :     }
     370             :     else
     371             :     {
     372             :         Assert(new_relfrozen_xid == InvalidTransactionId &&
     373             :                new_relmin_mxid == InvalidMultiXactId);
     374       83414 :         prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
     375       83414 :         prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
     376       83414 :         prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
     377       83414 :         prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
     378             :     }
     379             : 
     380     1084810 :     prstate->ndeleted = 0;
     381     1084810 :     prstate->live_tuples = 0;
     382     1084810 :     prstate->recently_dead_tuples = 0;
     383     1084810 :     prstate->hastup = false;
     384     1084810 :     prstate->lpdead_items = 0;
     385     1084810 :     prstate->deadoffsets = (OffsetNumber *) presult->deadoffsets;
     386     1084810 :     prstate->frz_conflict_horizon = InvalidTransactionId;
     387             : 
     388             :     /*
     389             :      * Vacuum may update the VM after we're done.  We can keep track of
     390             :      * whether the page will be all-visible and all-frozen after pruning and
     391             :      * freezing to help the caller to do that.
     392             :      *
     393             :      * Currently, only VACUUM sets the VM bits.  To save the effort, only do
     394             :      * the bookkeeping if the caller needs it.  Currently, that's tied to
     395             :      * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
     396             :      * to update the VM bits without also freezing or freeze without also
     397             :      * setting the VM bits.
     398             :      *
     399             :      * In addition to telling the caller whether it can set the VM bit, we
     400             :      * also use 'all_visible' and 'all_frozen' for our own decision-making. If
     401             :      * the whole page would become frozen, we consider opportunistically
     402             :      * freezing tuples.  We will not be able to freeze the whole page if there
     403             :      * are tuples present that are not visible to everyone or if there are
     404             :      * dead tuples which are not yet removable.  However, dead tuples which
     405             :      * will be removed by the end of vacuuming should not preclude us from
     406             :      * opportunistically freezing.  Because of that, we do not immediately
     407             :      * clear all_visible and all_frozen when we see LP_DEAD items.  We fix
     408             :      * that after scanning the line pointers. We must correct all_visible and
     409             :      * all_frozen before we return them to the caller, so that the caller
     410             :      * doesn't set the VM bits incorrectly.
     411             :      */
     412     1084810 :     if (prstate->attempt_freeze)
     413             :     {
     414     1001396 :         prstate->all_visible = true;
     415     1001396 :         prstate->all_frozen = true;
     416             :     }
     417             :     else
     418             :     {
     419             :         /*
     420             :          * Initializing to false allows skipping the work to update them in
     421             :          * heap_prune_record_unchanged_lp_normal().
     422             :          */
     423       83414 :         prstate->all_visible = false;
     424       83414 :         prstate->all_frozen = false;
     425             :     }
     426             : 
     427             :     /*
     428             :      * The visibility cutoff xid is the newest xmin of live tuples on the
     429             :      * page.  In the common case, this will be set as the conflict horizon the
     430             :      * caller can use for updating the VM.  If, at the end of freezing and
     431             :      * pruning, the page is all-frozen, there is no possibility that any
     432             :      * running transaction on the standby does not see tuples on the page as
     433             :      * all-visible, so the conflict horizon remains InvalidTransactionId.
     434             :      */
     435     1084810 :     prstate->visibility_cutoff_xid = InvalidTransactionId;
     436     1084810 : }
     437             : 
     438             : /*
     439             :  * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
     440             :  * page, examines its visibility information, and determines the appropriate
     441             :  * action for each tuple. All tuples are processed and classified during this
     442             :  * phase, but no modifications are made to the page until the later execution
     443             :  * stage.
     444             :  *
     445             :  * *off_loc is used for error callback and cleared before returning.
     446             :  */
     447             : static void
     448     1084810 : prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate,
     449             :                   OffsetNumber *off_loc)
     450             : {
     451     1084810 :     Page        page = BufferGetPage(buffer);
     452     1084810 :     BlockNumber blockno = BufferGetBlockNumber(buffer);
     453     1084810 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     454             :     OffsetNumber offnum;
     455             :     HeapTupleData tup;
     456             : 
     457     1084810 :     tup.t_tableOid = reloid;
     458             : 
     459             :     /*
     460             :      * Determine HTSV for all tuples, and queue them up for processing as HOT
     461             :      * chain roots or as heap-only items.
     462             :      *
     463             :      * Determining HTSV only once for each tuple is required for correctness,
     464             :      * to deal with cases where running HTSV twice could result in different
     465             :      * results.  For example, RECENTLY_DEAD can turn to DEAD if another
     466             :      * checked item causes GlobalVisTestIsRemovableFullXid() to update the
     467             :      * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
     468             :      * transaction aborts.
     469             :      *
     470             :      * It's also good for performance. Most commonly tuples within a page are
     471             :      * stored at decreasing offsets (while the items are stored at increasing
     472             :      * offsets). When processing all tuples on a page this leads to reading
     473             :      * memory at decreasing offsets within a page, with a variable stride.
     474             :      * That's hard for CPU prefetchers to deal with. Processing the items in
     475             :      * reverse order (and thus the tuples in increasing order) increases
     476             :      * prefetching efficiency significantly / decreases the number of cache
     477             :      * misses.
     478             :      */
     479     1084810 :     for (offnum = maxoff;
     480    54514926 :          offnum >= FirstOffsetNumber;
     481    53430116 :          offnum = OffsetNumberPrev(offnum))
     482             :     {
     483    53430116 :         ItemId      itemid = PageGetItemId(page, offnum);
     484             :         HeapTupleHeader htup;
     485             : 
     486             :         /*
     487             :          * Set the offset number so that we can display it along with any
     488             :          * error that occurred while processing this tuple.
     489             :          */
     490    53430116 :         *off_loc = offnum;
     491             : 
     492    53430116 :         prstate->processed[offnum] = false;
     493    53430116 :         prstate->htsv[offnum] = -1;
     494             : 
     495             :         /* Nothing to do if slot doesn't contain a tuple */
     496    53430116 :         if (!ItemIdIsUsed(itemid))
     497             :         {
     498      586464 :             heap_prune_record_unchanged_lp_unused(page, prstate, offnum);
     499      586464 :             continue;
     500             :         }
     501             : 
     502    52843652 :         if (ItemIdIsDead(itemid))
     503             :         {
     504             :             /*
     505             :              * If the caller set mark_unused_now true, we can set dead line
     506             :              * pointers LP_UNUSED now.
     507             :              */
     508     2126528 :             if (unlikely(prstate->mark_unused_now))
     509        1128 :                 heap_prune_record_unused(prstate, offnum, false);
     510             :             else
     511     2125400 :                 heap_prune_record_unchanged_lp_dead(page, prstate, offnum);
     512     2126528 :             continue;
     513             :         }
     514             : 
     515    50717124 :         if (ItemIdIsRedirected(itemid))
     516             :         {
     517             :             /* This is the start of a HOT chain */
     518      619490 :             prstate->root_items[prstate->nroot_items++] = offnum;
     519      619490 :             continue;
     520             :         }
     521             : 
     522             :         Assert(ItemIdIsNormal(itemid));
     523             : 
     524             :         /*
     525             :          * Get the tuple's visibility status and queue it up for processing.
     526             :          */
     527    50097634 :         htup = (HeapTupleHeader) PageGetItem(page, itemid);
     528    50097634 :         tup.t_data = htup;
     529    50097634 :         tup.t_len = ItemIdGetLength(itemid);
     530    50097634 :         ItemPointerSet(&tup.t_self, blockno, offnum);
     531             : 
     532    50097634 :         prstate->htsv[offnum] = heap_prune_satisfies_vacuum(prstate, &tup,
     533             :                                                             buffer);
     534             : 
     535    50097634 :         if (!HeapTupleHeaderIsHeapOnly(htup))
     536    49242544 :             prstate->root_items[prstate->nroot_items++] = offnum;
     537             :         else
     538      855090 :             prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
     539             :     }
     540             : 
     541             :     /*
     542             :      * Process HOT chains.
     543             :      *
     544             :      * We added the items to the array starting from 'maxoff', so by
     545             :      * processing the array in reverse order, we process the items in
     546             :      * ascending offset number order.  The order doesn't matter for
     547             :      * correctness, but some quick micro-benchmarking suggests that this is
     548             :      * faster.  (Earlier PostgreSQL versions, which scanned all the items on
     549             :      * the page instead of using the root_items array, also did it in
     550             :      * ascending offset number order.)
     551             :      */
     552    50946844 :     for (int i = prstate->nroot_items - 1; i >= 0; i--)
     553             :     {
     554    49862034 :         offnum = prstate->root_items[i];
     555             : 
     556             :         /* Ignore items already processed as part of an earlier chain */
     557    49862034 :         if (prstate->processed[offnum])
     558           0 :             continue;
     559             : 
     560             :         /* see preceding loop */
     561    49862034 :         *off_loc = offnum;
     562             : 
     563             :         /* Process this item or chain of items */
     564    49862034 :         heap_prune_chain(page, blockno, maxoff, offnum, prstate);
     565             :     }
     566             : 
     567             :     /*
     568             :      * Process any heap-only tuples that were not already processed as part of
     569             :      * a HOT chain.
     570             :      */
     571     1939900 :     for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
     572             :     {
     573      855090 :         offnum = prstate->heaponly_items[i];
     574             : 
     575      855090 :         if (prstate->processed[offnum])
     576      827314 :             continue;
     577             : 
     578             :         /* see preceding loop */
     579       27776 :         *off_loc = offnum;
     580             : 
     581             :         /*
     582             :          * If the tuple is DEAD and doesn't chain to anything else, mark it
     583             :          * unused.  (If it does chain, we can only remove it as part of
     584             :          * pruning its chain.)
     585             :          *
     586             :          * We need this primarily to handle aborted HOT updates, that is,
     587             :          * XMIN_INVALID heap-only tuples.  Those might not be linked to by any
     588             :          * chain, since the parent tuple might be re-updated before any
     589             :          * pruning occurs.  So we have to be able to reap them separately from
     590             :          * chain-pruning.  (Note that HeapTupleHeaderIsHotUpdated will never
     591             :          * return true for an XMIN_INVALID tuple, so this code will work even
     592             :          * when there were sequential updates within the aborted transaction.)
     593             :          */
     594       27776 :         if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
     595             :         {
     596        4182 :             ItemId      itemid = PageGetItemId(page, offnum);
     597        4182 :             HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
     598             : 
     599        4182 :             if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
     600             :             {
     601        4182 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
     602             :                                                       &prstate->latest_xid_removed);
     603        4182 :                 heap_prune_record_unused(prstate, offnum, true);
     604             :             }
     605             :             else
     606             :             {
     607             :                 /*
     608             :                  * This tuple should've been processed and removed as part of
     609             :                  * a HOT chain, so something's wrong.  To preserve evidence,
     610             :                  * we don't dare to remove it.  We cannot leave behind a DEAD
     611             :                  * tuple either, because that will cause VACUUM to error out.
     612             :                  * Throwing an error with a distinct error message seems like
     613             :                  * the least bad option.
     614             :                  */
     615           0 :                 elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
     616             :                      blockno, offnum);
     617             :             }
     618             :         }
     619             :         else
     620       23594 :             heap_prune_record_unchanged_lp_normal(page, prstate, offnum);
     621             :     }
     622             : 
     623             :     /* We should now have processed every tuple exactly once  */
     624             : #ifdef USE_ASSERT_CHECKING
     625             :     for (offnum = FirstOffsetNumber;
     626             :          offnum <= maxoff;
     627             :          offnum = OffsetNumberNext(offnum))
     628             :     {
     629             :         *off_loc = offnum;
     630             : 
     631             :         Assert(prstate->processed[offnum]);
     632             :     }
     633             : #endif
     634             : 
     635             :     /* Clear the offset information once we have processed the given page. */
     636     1084810 :     *off_loc = InvalidOffsetNumber;
     637     1084810 : }
     638             : 
     639             : /*
     640             :  * Decide whether to proceed with freezing according to the freeze plans
     641             :  * prepared for the given heap buffer. If freezing is chosen, this function
     642             :  * performs several pre-freeze checks.
     643             :  *
     644             :  * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
     645             :  * determined before calling this function.
     646             :  *
     647             :  * prstate is both an input and output parameter.
     648             :  *
     649             :  * Returns true if we should apply the freeze plans and freeze tuples on the
     650             :  * page, and false otherwise.
     651             :  */
     652             : static bool
     653     1084810 : heap_page_will_freeze(Relation relation, Buffer buffer,
     654             :                       bool did_tuple_hint_fpi,
     655             :                       bool do_prune,
     656             :                       bool do_hint_prune,
     657             :                       PruneState *prstate)
     658             : {
     659     1084810 :     bool        do_freeze = false;
     660             : 
     661             :     /*
     662             :      * If the caller specified we should not attempt to freeze any tuples,
     663             :      * validate that everything is in the right state and return.
     664             :      */
     665     1084810 :     if (!prstate->attempt_freeze)
     666             :     {
     667             :         Assert(!prstate->all_frozen && prstate->nfrozen == 0);
     668             :         Assert(prstate->lpdead_items == 0 || !prstate->all_visible);
     669       83414 :         return false;
     670             :     }
     671             : 
     672     1001396 :     if (prstate->pagefrz.freeze_required)
     673             :     {
     674             :         /*
     675             :          * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
     676             :          * before FreezeLimit/MultiXactCutoff is present.  Must freeze to
     677             :          * advance relfrozenxid/relminmxid.
     678             :          */
     679       41644 :         do_freeze = true;
     680             :     }
     681             :     else
     682             :     {
     683             :         /*
     684             :          * Opportunistically freeze the page if we are generating an FPI
     685             :          * anyway and if doing so means that we can set the page all-frozen
     686             :          * afterwards (might not happen until VACUUM's final heap pass).
     687             :          *
     688             :          * XXX: Previously, we knew if pruning emitted an FPI by checking
     689             :          * pgWalUsage.wal_fpi before and after pruning.  Once the freeze and
     690             :          * prune records were combined, this heuristic couldn't be used
     691             :          * anymore.  The opportunistic freeze heuristic must be improved;
     692             :          * however, for now, try to approximate the old logic.
     693             :          */
     694      959752 :         if (prstate->all_frozen && prstate->nfrozen > 0)
     695             :         {
     696             :             Assert(prstate->all_visible);
     697             : 
     698             :             /*
     699             :              * Freezing would make the page all-frozen.  Have already emitted
     700             :              * an FPI or will do so anyway?
     701             :              */
     702       30072 :             if (RelationNeedsWAL(relation))
     703             :             {
     704       26758 :                 if (did_tuple_hint_fpi)
     705        2376 :                     do_freeze = true;
     706       24382 :                 else if (do_prune)
     707             :                 {
     708        2686 :                     if (XLogCheckBufferNeedsBackup(buffer))
     709        1528 :                         do_freeze = true;
     710             :                 }
     711       21696 :                 else if (do_hint_prune)
     712             :                 {
     713          12 :                     if (XLogHintBitIsNeeded() && XLogCheckBufferNeedsBackup(buffer))
     714           4 :                         do_freeze = true;
     715             :                 }
     716             :             }
     717             :         }
     718             :     }
     719             : 
     720     1001396 :     if (do_freeze)
     721             :     {
     722             :         /*
     723             :          * Validate the tuples we will be freezing before entering the
     724             :          * critical section.
     725             :          */
     726       45552 :         heap_pre_freeze_checks(buffer, prstate->frozen, prstate->nfrozen);
     727             : 
     728             :         /*
     729             :          * Calculate what the snapshot conflict horizon should be for a record
     730             :          * freezing tuples. We can use the visibility_cutoff_xid as our cutoff
     731             :          * for conflicts when the whole page is eligible to become all-frozen
     732             :          * in the VM once we're done with it. Otherwise, we generate a
     733             :          * conservative cutoff by stepping back from OldestXmin.
     734             :          */
     735       45552 :         if (prstate->all_frozen)
     736       41006 :             prstate->frz_conflict_horizon = prstate->visibility_cutoff_xid;
     737             :         else
     738             :         {
     739             :             /* Avoids false conflicts when hot_standby_feedback in use */
     740        4546 :             prstate->frz_conflict_horizon = prstate->cutoffs->OldestXmin;
     741        4546 :             TransactionIdRetreat(prstate->frz_conflict_horizon);
     742             :         }
     743             :     }
     744      955844 :     else if (prstate->nfrozen > 0)
     745             :     {
     746             :         /*
     747             :          * The page contained some tuples that were not already frozen, and we
     748             :          * chose not to freeze them now.  The page won't be all-frozen then.
     749             :          */
     750             :         Assert(!prstate->pagefrz.freeze_required);
     751             : 
     752       26984 :         prstate->all_frozen = false;
     753       26984 :         prstate->nfrozen = 0;    /* avoid miscounts in instrumentation */
     754             :     }
     755             :     else
     756             :     {
     757             :         /*
     758             :          * We have no freeze plans to execute.  The page might already be
     759             :          * all-frozen (perhaps only following pruning), though.  Such pages
     760             :          * can be marked all-frozen in the VM by our caller, even though none
     761             :          * of its tuples were newly frozen here.
     762             :          */
     763             :     }
     764             : 
     765     1001396 :     return do_freeze;
     766             : }
     767             : 
     768             : 
     769             : /*
     770             :  * Prune and repair fragmentation and potentially freeze tuples on the
     771             :  * specified page.
     772             :  *
     773             :  * Caller must have pin and buffer cleanup lock on the page.  Note that we
     774             :  * don't update the FSM information for page on caller's behalf.  Caller might
     775             :  * also need to account for a reduction in the length of the line pointer
     776             :  * array following array truncation by us.
     777             :  *
     778             :  * params contains the input parameters used to control freezing and pruning
     779             :  * behavior. See the definition of PruneFreezeParams for more on what each
     780             :  * parameter does.
     781             :  *
     782             :  * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
     783             :  * tuples if it's required in order to advance relfrozenxid / relminmxid, or
     784             :  * if it's considered advantageous for overall system performance to do so
     785             :  * now.  The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
     786             :  * 'new_relmin_mxid' arguments are required when freezing.  When
     787             :  * HEAP_PAGE_PRUNE_FREEZE option is passed, we also set presult->all_visible
     788             :  * and presult->all_frozen after determining whether or not to
     789             :  * opportunistically freeze, to indicate if the VM bits can be set.  They are
     790             :  * always set to false when the HEAP_PAGE_PRUNE_FREEZE option is not passed,
     791             :  * because at the moment only callers that also freeze need that information.
     792             :  *
     793             :  * presult contains output parameters needed by callers, such as the number of
     794             :  * tuples removed and the offsets of dead items on the page after pruning.
     795             :  * heap_page_prune_and_freeze() is responsible for initializing it.  Required
     796             :  * by all callers.
     797             :  *
     798             :  * off_loc is the offset location required by the caller to use in error
     799             :  * callback.
     800             :  *
     801             :  * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
     802             :  * HEAP_PAGE_PRUNE_FREEZE option is set in params.  On entry, they contain the
     803             :  * oldest XID and multi-XID seen on the relation so far.  They will be updated
     804             :  * with the oldest values present on the page after pruning.  After processing
     805             :  * the whole relation, VACUUM can use these values as the new
     806             :  * relfrozenxid/relminmxid for the relation.
     807             :  */
     808             : void
     809     1084810 : heap_page_prune_and_freeze(PruneFreezeParams *params,
     810             :                            PruneFreezeResult *presult,
     811             :                            OffsetNumber *off_loc,
     812             :                            TransactionId *new_relfrozen_xid,
     813             :                            MultiXactId *new_relmin_mxid)
     814             : {
     815     1084810 :     Buffer      buffer = params->buffer;
     816     1084810 :     Page        page = BufferGetPage(buffer);
     817             :     PruneState  prstate;
     818             :     bool        do_freeze;
     819             :     bool        do_prune;
     820             :     bool        do_hint_prune;
     821             :     bool        did_tuple_hint_fpi;
     822     1084810 :     int64       fpi_before = pgWalUsage.wal_fpi;
     823             : 
     824             :     /* Initialize prstate */
     825     1084810 :     prune_freeze_setup(params,
     826             :                        new_relfrozen_xid ?
     827             :                        *new_relfrozen_xid : InvalidTransactionId,
     828             :                        new_relmin_mxid ?
     829             :                        *new_relmin_mxid : InvalidMultiXactId,
     830             :                        presult,
     831             :                        &prstate);
     832             : 
     833             :     /*
     834             :      * Examine all line pointers and tuple visibility information to determine
     835             :      * which line pointers should change state and which tuples may be frozen.
     836             :      * Prepare queue of state changes to later be executed in a critical
     837             :      * section.
     838             :      */
     839     1084810 :     prune_freeze_plan(RelationGetRelid(params->relation),
     840             :                       buffer, &prstate, off_loc);
     841             : 
     842             :     /*
     843             :      * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
     844             :      * checking tuple visibility information in prune_freeze_plan() may have
     845             :      * caused an FPI to be emitted.
     846             :      */
     847     1084810 :     did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
     848             : 
     849     3221080 :     do_prune = prstate.nredirected > 0 ||
     850     2064782 :         prstate.ndead > 0 ||
     851      979972 :         prstate.nunused > 0;
     852             : 
     853             :     /*
     854             :      * Even if we don't prune anything, if we found a new value for the
     855             :      * pd_prune_xid field or the page was marked full, we will update the hint
     856             :      * bit.
     857             :      */
     858     2064334 :     do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
     859      979524 :         PageIsFull(page);
     860             : 
     861             :     /*
     862             :      * Decide if we want to go ahead with freezing according to the freeze
     863             :      * plans we prepared, or not.
     864             :      */
     865     1084810 :     do_freeze = heap_page_will_freeze(params->relation, buffer,
     866             :                                       did_tuple_hint_fpi,
     867             :                                       do_prune,
     868             :                                       do_hint_prune,
     869             :                                       &prstate);
     870             : 
     871             :     /*
     872             :      * While scanning the line pointers, we did not clear
     873             :      * all_visible/all_frozen when encountering LP_DEAD items because we
     874             :      * wanted the decision whether or not to freeze the page to be unaffected
     875             :      * by the short-term presence of LP_DEAD items.  These LP_DEAD items are
     876             :      * effectively assumed to be LP_UNUSED items in the making.  It doesn't
     877             :      * matter which vacuum heap pass (initial pass or final pass) ends up
     878             :      * setting the page all-frozen, as long as the ongoing VACUUM does it.
     879             :      *
     880             :      * Now that we finished determining whether or not to freeze the page,
     881             :      * update all_visible and all_frozen so that they reflect the true state
     882             :      * of the page for setting PD_ALL_VISIBLE and VM bits.
     883             :      */
     884     1084810 :     if (prstate.lpdead_items > 0)
     885      108404 :         prstate.all_visible = prstate.all_frozen = false;
     886             : 
     887             :     Assert(!prstate.all_frozen || prstate.all_visible);
     888             : 
     889             :     /* Any error while applying the changes is critical */
     890     1084810 :     START_CRIT_SECTION();
     891             : 
     892     1084810 :     if (do_hint_prune)
     893             :     {
     894             :         /*
     895             :          * Update the page's pd_prune_xid field to either zero, or the lowest
     896             :          * XID of any soon-prunable tuple.
     897             :          */
     898      105526 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     899             : 
     900             :         /*
     901             :          * Also clear the "page is full" flag, since there's no point in
     902             :          * repeating the prune/defrag process until something else happens to
     903             :          * the page.
     904             :          */
     905      105526 :         PageClearFull(page);
     906             : 
     907             :         /*
     908             :          * If that's all we had to do to the page, this is a non-WAL-logged
     909             :          * hint.  If we are going to freeze or prune the page, we will mark
     910             :          * the buffer dirty below.
     911             :          */
     912      105526 :         if (!do_freeze && !do_prune)
     913         414 :             MarkBufferDirtyHint(buffer, true);
     914             :     }
     915             : 
     916     1084810 :     if (do_prune || do_freeze)
     917             :     {
     918             :         /* Apply the planned item changes and repair page fragmentation. */
     919      147548 :         if (do_prune)
     920             :         {
     921      105616 :             heap_page_prune_execute(buffer, false,
     922             :                                     prstate.redirected, prstate.nredirected,
     923             :                                     prstate.nowdead, prstate.ndead,
     924             :                                     prstate.nowunused, prstate.nunused);
     925             :         }
     926             : 
     927      147548 :         if (do_freeze)
     928       45552 :             heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
     929             : 
     930      147548 :         MarkBufferDirty(buffer);
     931             : 
     932             :         /*
     933             :          * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
     934             :          */
     935      147548 :         if (RelationNeedsWAL(params->relation))
     936             :         {
     937             :             /*
     938             :              * The snapshotConflictHorizon for the whole record should be the
     939             :              * most conservative of all the horizons calculated for any of the
     940             :              * possible modifications.  If this record will prune tuples, any
     941             :              * transactions on the standby older than the youngest xmax of the
     942             :              * most recently removed tuple this record will prune will
     943             :              * conflict.  If this record will freeze tuples, any transactions
     944             :              * on the standby with xids older than the youngest tuple this
     945             :              * record will freeze will conflict.
     946             :              */
     947             :             TransactionId conflict_xid;
     948             : 
     949      145794 :             if (TransactionIdFollows(prstate.frz_conflict_horizon,
     950             :                                      prstate.latest_xid_removed))
     951       42544 :                 conflict_xid = prstate.frz_conflict_horizon;
     952             :             else
     953      103250 :                 conflict_xid = prstate.latest_xid_removed;
     954             : 
     955      145794 :             log_heap_prune_and_freeze(params->relation, buffer,
     956             :                                       InvalidBuffer,    /* vmbuffer */
     957             :                                       0,    /* vmflags */
     958             :                                       conflict_xid,
     959             :                                       true, params->reason,
     960             :                                       prstate.frozen, prstate.nfrozen,
     961             :                                       prstate.redirected, prstate.nredirected,
     962             :                                       prstate.nowdead, prstate.ndead,
     963             :                                       prstate.nowunused, prstate.nunused);
     964             :         }
     965             :     }
     966             : 
     967     1084810 :     END_CRIT_SECTION();
     968             : 
     969             :     /* Copy information back for caller */
     970     1084810 :     presult->ndeleted = prstate.ndeleted;
     971     1084810 :     presult->nnewlpdead = prstate.ndead;
     972     1084810 :     presult->nfrozen = prstate.nfrozen;
     973     1084810 :     presult->live_tuples = prstate.live_tuples;
     974     1084810 :     presult->recently_dead_tuples = prstate.recently_dead_tuples;
     975     1084810 :     presult->all_visible = prstate.all_visible;
     976     1084810 :     presult->all_frozen = prstate.all_frozen;
     977     1084810 :     presult->hastup = prstate.hastup;
     978             : 
     979             :     /*
     980             :      * For callers planning to update the visibility map, the conflict horizon
     981             :      * for that record must be the newest xmin on the page.  However, if the
     982             :      * page is completely frozen, there can be no conflict and the
     983             :      * vm_conflict_horizon should remain InvalidTransactionId.  This includes
     984             :      * the case that we just froze all the tuples; the prune-freeze record
     985             :      * included the conflict XID already so the caller doesn't need it.
     986             :      */
     987     1084810 :     if (presult->all_frozen)
     988      407024 :         presult->vm_conflict_horizon = InvalidTransactionId;
     989             :     else
     990      677786 :         presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
     991             : 
     992     1084810 :     presult->lpdead_items = prstate.lpdead_items;
     993             :     /* the presult->deadoffsets array was already filled in */
     994             : 
     995     1084810 :     if (prstate.attempt_freeze)
     996             :     {
     997     1001396 :         if (presult->nfrozen > 0)
     998             :         {
     999       45552 :             *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
    1000       45552 :             *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
    1001             :         }
    1002             :         else
    1003             :         {
    1004      955844 :             *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
    1005      955844 :             *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
    1006             :         }
    1007             :     }
    1008     1084810 : }
    1009             : 
    1010             : 
    1011             : /*
    1012             :  * Perform visibility checks for heap pruning.
    1013             :  */
    1014             : static HTSV_Result
    1015    50097634 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
    1016             : {
    1017             :     HTSV_Result res;
    1018             :     TransactionId dead_after;
    1019             : 
    1020    50097634 :     res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
    1021             : 
    1022    50097634 :     if (res != HEAPTUPLE_RECENTLY_DEAD)
    1023    46786282 :         return res;
    1024             : 
    1025             :     /*
    1026             :      * For VACUUM, we must be sure to prune tuples with xmax older than
    1027             :      * OldestXmin -- a visibility cutoff determined at the beginning of
    1028             :      * vacuuming the relation. OldestXmin is used for freezing determination
    1029             :      * and we cannot freeze dead tuples' xmaxes.
    1030             :      */
    1031     3311352 :     if (prstate->cutoffs &&
    1032     1735570 :         TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
    1033     1735570 :         NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
    1034     1278952 :         return HEAPTUPLE_DEAD;
    1035             : 
    1036             :     /*
    1037             :      * Determine whether or not the tuple is considered dead when compared
    1038             :      * with the provided GlobalVisState. On-access pruning does not provide
    1039             :      * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
    1040             :      * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
    1041             :      * if the GlobalVisState has been updated since the beginning of vacuuming
    1042             :      * the relation.
    1043             :      */
    1044     2032400 :     if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
    1045     1522106 :         return HEAPTUPLE_DEAD;
    1046             : 
    1047      510294 :     return res;
    1048             : }
    1049             : 
    1050             : 
    1051             : /*
    1052             :  * Pruning calculates tuple visibility once and saves the results in an array
    1053             :  * of int8.  See PruneState.htsv for details.  This helper function is meant
    1054             :  * to guard against examining visibility status array members which have not
    1055             :  * yet been computed.
    1056             :  */
    1057             : static inline HTSV_Result
    1058    50069858 : htsv_get_valid_status(int status)
    1059             : {
    1060             :     Assert(status >= HEAPTUPLE_DEAD &&
    1061             :            status <= HEAPTUPLE_DELETE_IN_PROGRESS);
    1062    50069858 :     return (HTSV_Result) status;
    1063             : }
    1064             : 
    1065             : /*
    1066             :  * Prune specified line pointer or a HOT chain originating at line pointer.
    1067             :  *
    1068             :  * Tuple visibility information is provided in prstate->htsv.
    1069             :  *
    1070             :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
    1071             :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
    1072             :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
    1073             :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
    1074             :  * DEAD, our visibility test is just too coarse to detect it.
    1075             :  *
    1076             :  * Pruning must never leave behind a DEAD tuple that still has tuple storage.
    1077             :  * VACUUM isn't prepared to deal with that case.
    1078             :  *
    1079             :  * The root line pointer is redirected to the tuple immediately after the
    1080             :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
    1081             :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
    1082             :  * tuple, which we treat as a chain of length 1.)
    1083             :  *
    1084             :  * We don't actually change the page here. We just add entries to the arrays in
    1085             :  * prstate showing the changes to be made.  Items to be redirected are added
    1086             :  * to the redirected[] array (two entries per redirection); items to be set to
    1087             :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
    1088             :  * state are added to nowunused[].  We perform bookkeeping of live tuples,
    1089             :  * visibility etc. based on what the page will look like after the changes
    1090             :  * applied.  All that bookkeeping is performed in the heap_prune_record_*()
    1091             :  * subroutines.  The division of labor is that heap_prune_chain() decides the
    1092             :  * fate of each tuple, ie. whether it's going to be removed, redirected or
    1093             :  * left unchanged, and the heap_prune_record_*() subroutines update PruneState
    1094             :  * based on that outcome.
    1095             :  */
    1096             : static void
    1097    49862034 : heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
    1098             :                  OffsetNumber rootoffnum, PruneState *prstate)
    1099             : {
    1100    49862034 :     TransactionId priorXmax = InvalidTransactionId;
    1101             :     ItemId      rootlp;
    1102             :     OffsetNumber offnum;
    1103             :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
    1104             : 
    1105             :     /*
    1106             :      * After traversing the HOT chain, ndeadchain is the index in chainitems
    1107             :      * of the first live successor after the last dead item.
    1108             :      */
    1109    49862034 :     int         ndeadchain = 0,
    1110    49862034 :                 nchain = 0;
    1111             : 
    1112    49862034 :     rootlp = PageGetItemId(page, rootoffnum);
    1113             : 
    1114             :     /* Start from the root tuple */
    1115    49862034 :     offnum = rootoffnum;
    1116             : 
    1117             :     /* while not end of the chain */
    1118             :     for (;;)
    1119      827314 :     {
    1120             :         HeapTupleHeader htup;
    1121             :         ItemId      lp;
    1122             : 
    1123             :         /* Sanity check (pure paranoia) */
    1124    50689348 :         if (offnum < FirstOffsetNumber)
    1125           0 :             break;
    1126             : 
    1127             :         /*
    1128             :          * An offset past the end of page's line pointer array is possible
    1129             :          * when the array was truncated (original item must have been unused)
    1130             :          */
    1131    50689348 :         if (offnum > maxoff)
    1132           0 :             break;
    1133             : 
    1134             :         /* If item is already processed, stop --- it must not be same chain */
    1135    50689348 :         if (prstate->processed[offnum])
    1136           0 :             break;
    1137             : 
    1138    50689348 :         lp = PageGetItemId(page, offnum);
    1139             : 
    1140             :         /*
    1141             :          * Unused item obviously isn't part of the chain. Likewise, a dead
    1142             :          * line pointer can't be part of the chain.  Both of those cases were
    1143             :          * already marked as processed.
    1144             :          */
    1145             :         Assert(ItemIdIsUsed(lp));
    1146             :         Assert(!ItemIdIsDead(lp));
    1147             : 
    1148             :         /*
    1149             :          * If we are looking at the redirected root line pointer, jump to the
    1150             :          * first normal tuple in the chain.  If we find a redirect somewhere
    1151             :          * else, stop --- it must not be same chain.
    1152             :          */
    1153    50689348 :         if (ItemIdIsRedirected(lp))
    1154             :         {
    1155      619490 :             if (nchain > 0)
    1156           0 :                 break;          /* not at start of chain */
    1157      619490 :             chainitems[nchain++] = offnum;
    1158      619490 :             offnum = ItemIdGetRedirect(rootlp);
    1159      619490 :             continue;
    1160             :         }
    1161             : 
    1162             :         Assert(ItemIdIsNormal(lp));
    1163             : 
    1164    50069858 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1165             : 
    1166             :         /*
    1167             :          * Check the tuple XMIN against prior XMAX, if any
    1168             :          */
    1169    50277682 :         if (TransactionIdIsValid(priorXmax) &&
    1170      207824 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
    1171           0 :             break;
    1172             : 
    1173             :         /*
    1174             :          * OK, this tuple is indeed a member of the chain.
    1175             :          */
    1176    50069858 :         chainitems[nchain++] = offnum;
    1177             : 
    1178    50069858 :         switch (htsv_get_valid_status(prstate->htsv[offnum]))
    1179             :         {
    1180     2884628 :             case HEAPTUPLE_DEAD:
    1181             : 
    1182             :                 /* Remember the last DEAD tuple seen */
    1183     2884628 :                 ndeadchain = nchain;
    1184     2884628 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
    1185             :                                                       &prstate->latest_xid_removed);
    1186             :                 /* Advance to next chain member */
    1187     2884628 :                 break;
    1188             : 
    1189      510294 :             case HEAPTUPLE_RECENTLY_DEAD:
    1190             : 
    1191             :                 /*
    1192             :                  * We don't need to advance the conflict horizon for
    1193             :                  * RECENTLY_DEAD tuples, even if we are removing them.  This
    1194             :                  * is because we only remove RECENTLY_DEAD tuples if they
    1195             :                  * precede a DEAD tuple, and the DEAD tuple must have been
    1196             :                  * inserted by a newer transaction than the RECENTLY_DEAD
    1197             :                  * tuple by virtue of being later in the chain.  We will have
    1198             :                  * advanced the conflict horizon for the DEAD tuple.
    1199             :                  */
    1200             : 
    1201             :                 /*
    1202             :                  * Advance past RECENTLY_DEAD tuples just in case there's a
    1203             :                  * DEAD one after them.  We have to make sure that we don't
    1204             :                  * miss any DEAD tuples, since DEAD tuples that still have
    1205             :                  * tuple storage after pruning will confuse VACUUM.
    1206             :                  */
    1207      510294 :                 break;
    1208             : 
    1209    46674936 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1210             :             case HEAPTUPLE_LIVE:
    1211             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1212    46674936 :                 goto process_chain;
    1213             : 
    1214           0 :             default:
    1215           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1216             :                 goto process_chain;
    1217             :         }
    1218             : 
    1219             :         /*
    1220             :          * If the tuple is not HOT-updated, then we are at the end of this
    1221             :          * HOT-update chain.
    1222             :          */
    1223     3394922 :         if (!HeapTupleHeaderIsHotUpdated(htup))
    1224     3187098 :             goto process_chain;
    1225             : 
    1226             :         /* HOT implies it can't have moved to different partition */
    1227             :         Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    1228             : 
    1229             :         /*
    1230             :          * Advance to next chain member.
    1231             :          */
    1232             :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blockno);
    1233      207824 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1234      207824 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1235             :     }
    1236             : 
    1237           0 :     if (ItemIdIsRedirected(rootlp) && nchain < 2)
    1238             :     {
    1239             :         /*
    1240             :          * We found a redirect item that doesn't point to a valid follow-on
    1241             :          * item.  This can happen if the loop in heap_page_prune_and_freeze()
    1242             :          * caused us to visit the dead successor of a redirect item before
    1243             :          * visiting the redirect item.  We can clean up by setting the
    1244             :          * redirect item to LP_DEAD state or LP_UNUSED if the caller
    1245             :          * indicated.
    1246             :          */
    1247           0 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
    1248           0 :         return;
    1249             :     }
    1250             : 
    1251           0 : process_chain:
    1252             : 
    1253    49862034 :     if (ndeadchain == 0)
    1254             :     {
    1255             :         /*
    1256             :          * No DEAD tuple was found, so the chain is entirely composed of
    1257             :          * normal, unchanged tuples.  Leave it alone.
    1258             :          */
    1259    47045644 :         int         i = 0;
    1260             : 
    1261    47045644 :         if (ItemIdIsRedirected(rootlp))
    1262             :         {
    1263      585032 :             heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
    1264      585032 :             i++;
    1265             :         }
    1266    94099360 :         for (; i < nchain; i++)
    1267    47053716 :             heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
    1268             :     }
    1269     2816390 :     else if (ndeadchain == nchain)
    1270             :     {
    1271             :         /*
    1272             :          * The entire chain is dead.  Mark the root line pointer LP_DEAD, and
    1273             :          * fully remove the other tuples in the chain.
    1274             :          */
    1275     2686484 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
    1276     2751312 :         for (int i = 1; i < nchain; i++)
    1277       64828 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1278             :     }
    1279             :     else
    1280             :     {
    1281             :         /*
    1282             :          * We found a DEAD tuple in the chain.  Redirect the root line pointer
    1283             :          * to the first non-DEAD tuple, and mark as unused each intermediate
    1284             :          * item that we are able to remove from the chain.
    1285             :          */
    1286      129906 :         heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
    1287      129906 :                                    ItemIdIsNormal(rootlp));
    1288      167774 :         for (int i = 1; i < ndeadchain; i++)
    1289       37868 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1290             : 
    1291             :         /* the rest of tuples in the chain are normal, unchanged tuples */
    1292      261420 :         for (int i = ndeadchain; i < nchain; i++)
    1293      131514 :             heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
    1294             :     }
    1295             : }
    1296             : 
    1297             : /* Record lowest soon-prunable XID */
    1298             : static void
    1299    13761630 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
    1300             : {
    1301             :     /*
    1302             :      * This should exactly match the PageSetPrunable macro.  We can't store
    1303             :      * directly into the page header yet, so we update working state.
    1304             :      */
    1305             :     Assert(TransactionIdIsNormal(xid));
    1306    26978744 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
    1307    13217114 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
    1308      546776 :         prstate->new_prune_xid = xid;
    1309    13761630 : }
    1310             : 
    1311             : /* Record line pointer to be redirected */
    1312             : static void
    1313      129906 : heap_prune_record_redirect(PruneState *prstate,
    1314             :                            OffsetNumber offnum, OffsetNumber rdoffnum,
    1315             :                            bool was_normal)
    1316             : {
    1317             :     Assert(!prstate->processed[offnum]);
    1318      129906 :     prstate->processed[offnum] = true;
    1319             : 
    1320             :     /*
    1321             :      * Do not mark the redirect target here.  It needs to be counted
    1322             :      * separately as an unchanged tuple.
    1323             :      */
    1324             : 
    1325             :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
    1326      129906 :     prstate->redirected[prstate->nredirected * 2] = offnum;
    1327      129906 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
    1328             : 
    1329      129906 :     prstate->nredirected++;
    1330             : 
    1331             :     /*
    1332             :      * If the root entry had been a normal tuple, we are deleting it, so count
    1333             :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1334             :      * count.
    1335             :      */
    1336      129906 :     if (was_normal)
    1337      114406 :         prstate->ndeleted++;
    1338             : 
    1339      129906 :     prstate->hastup = true;
    1340      129906 : }
    1341             : 
    1342             : /* Record line pointer to be marked dead */
    1343             : static void
    1344     2617472 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
    1345             :                        bool was_normal)
    1346             : {
    1347             :     Assert(!prstate->processed[offnum]);
    1348     2617472 :     prstate->processed[offnum] = true;
    1349             : 
    1350             :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
    1351     2617472 :     prstate->nowdead[prstate->ndead] = offnum;
    1352     2617472 :     prstate->ndead++;
    1353             : 
    1354             :     /*
    1355             :      * Deliberately delay unsetting all_visible and all_frozen until later
    1356             :      * during pruning. Removable dead tuples shouldn't preclude freezing the
    1357             :      * page.
    1358             :      */
    1359             : 
    1360             :     /* Record the dead offset for vacuum */
    1361     2617472 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1362             : 
    1363             :     /*
    1364             :      * If the root entry had been a normal tuple, we are deleting it, so count
    1365             :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1366             :      * count.
    1367             :      */
    1368     2617472 :     if (was_normal)
    1369     2598514 :         prstate->ndeleted++;
    1370     2617472 : }
    1371             : 
    1372             : /*
    1373             :  * Depending on whether or not the caller set mark_unused_now to true, record that a
    1374             :  * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
    1375             :  * which we will mark line pointers LP_UNUSED, but we will not mark line
    1376             :  * pointers LP_DEAD if mark_unused_now is true.
    1377             :  */
    1378             : static void
    1379     2686484 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
    1380             :                                  bool was_normal)
    1381             : {
    1382             :     /*
    1383             :      * If the caller set mark_unused_now to true, we can remove dead tuples
    1384             :      * during pruning instead of marking their line pointers dead. Set this
    1385             :      * tuple's line pointer LP_UNUSED. We hint that this option is less
    1386             :      * likely.
    1387             :      */
    1388     2686484 :     if (unlikely(prstate->mark_unused_now))
    1389       69012 :         heap_prune_record_unused(prstate, offnum, was_normal);
    1390             :     else
    1391     2617472 :         heap_prune_record_dead(prstate, offnum, was_normal);
    1392     2686484 : }
    1393             : 
    1394             : /* Record line pointer to be marked unused */
    1395             : static void
    1396      177018 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
    1397             : {
    1398             :     Assert(!prstate->processed[offnum]);
    1399      177018 :     prstate->processed[offnum] = true;
    1400             : 
    1401             :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
    1402      177018 :     prstate->nowunused[prstate->nunused] = offnum;
    1403      177018 :     prstate->nunused++;
    1404             : 
    1405             :     /*
    1406             :      * If the root entry had been a normal tuple, we are deleting it, so count
    1407             :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1408             :      * count.
    1409             :      */
    1410      177018 :     if (was_normal)
    1411      175890 :         prstate->ndeleted++;
    1412      177018 : }
    1413             : 
    1414             : /*
    1415             :  * Record an unused line pointer that is left unchanged.
    1416             :  */
    1417             : static void
    1418      586464 : heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
    1419             : {
    1420             :     Assert(!prstate->processed[offnum]);
    1421      586464 :     prstate->processed[offnum] = true;
    1422      586464 : }
    1423             : 
    1424             : /*
    1425             :  * Record line pointer that is left unchanged.  We consider freezing it, and
    1426             :  * update bookkeeping of tuple counts and page visibility.
    1427             :  */
    1428             : static void
    1429    47208824 : heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
    1430             : {
    1431             :     HeapTupleHeader htup;
    1432             : 
    1433             :     Assert(!prstate->processed[offnum]);
    1434    47208824 :     prstate->processed[offnum] = true;
    1435             : 
    1436    47208824 :     prstate->hastup = true;      /* the page is not empty */
    1437             : 
    1438             :     /*
    1439             :      * The criteria for counting a tuple as live in this block need to match
    1440             :      * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
    1441             :      * ANALYZE may produce wildly different reltuples values, e.g. when there
    1442             :      * are many recently-dead tuples.
    1443             :      *
    1444             :      * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
    1445             :      * can't run inside a transaction block, which makes some cases impossible
    1446             :      * (e.g. in-progress insert from the same transaction).
    1447             :      *
    1448             :      * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
    1449             :      * subroutines.  They don't count dead items like acquire_sample_rows()
    1450             :      * does, because we assume that all dead items will become LP_UNUSED
    1451             :      * before VACUUM finishes.  This difference is only superficial.  VACUUM
    1452             :      * effectively agrees with ANALYZE about DEAD items, in the end.  VACUUM
    1453             :      * won't remember LP_DEAD items, but only because they're not supposed to
    1454             :      * be left behind when it is done. (Cases where we bypass index vacuuming
    1455             :      * will violate this optimistic assumption, but the overall impact of that
    1456             :      * should be negligible.)
    1457             :      */
    1458    47208824 :     htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
    1459             : 
    1460    47208824 :     switch (prstate->htsv[offnum])
    1461             :     {
    1462    33294776 :         case HEAPTUPLE_LIVE:
    1463             : 
    1464             :             /*
    1465             :              * Count it as live.  Not only is this natural, but it's also what
    1466             :              * acquire_sample_rows() does.
    1467             :              */
    1468    33294776 :             prstate->live_tuples++;
    1469             : 
    1470             :             /*
    1471             :              * Is the tuple definitely visible to all transactions?
    1472             :              *
    1473             :              * NB: Like with per-tuple hint bits, we can't set the
    1474             :              * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
    1475             :              * See SetHintBits for more info.  Check that the tuple is hinted
    1476             :              * xmin-committed because of that.
    1477             :              */
    1478    33294776 :             if (prstate->all_visible)
    1479             :             {
    1480             :                 TransactionId xmin;
    1481             : 
    1482    24737904 :                 if (!HeapTupleHeaderXminCommitted(htup))
    1483             :                 {
    1484         340 :                     prstate->all_visible = false;
    1485         340 :                     prstate->all_frozen = false;
    1486         340 :                     break;
    1487             :                 }
    1488             : 
    1489             :                 /*
    1490             :                  * The inserter definitely committed.  But is it old enough
    1491             :                  * that everyone sees it as committed?  A FrozenTransactionId
    1492             :                  * is seen as committed to everyone.  Otherwise, we check if
    1493             :                  * there is a snapshot that considers this xid to still be
    1494             :                  * running, and if so, we don't consider the page all-visible.
    1495             :                  */
    1496    24737564 :                 xmin = HeapTupleHeaderGetXmin(htup);
    1497             : 
    1498             :                 /*
    1499             :                  * For now always use prstate->cutoffs for this test, because
    1500             :                  * we only update 'all_visible' and 'all_frozen' when freezing
    1501             :                  * is requested. We could use GlobalVisTestIsRemovableXid
    1502             :                  * instead, if a non-freezing caller wanted to set the VM bit.
    1503             :                  */
    1504             :                 Assert(prstate->cutoffs);
    1505    24737564 :                 if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
    1506             :                 {
    1507        6334 :                     prstate->all_visible = false;
    1508        6334 :                     prstate->all_frozen = false;
    1509        6334 :                     break;
    1510             :                 }
    1511             : 
    1512             :                 /* Track newest xmin on page. */
    1513    24731230 :                 if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) &&
    1514             :                     TransactionIdIsNormal(xmin))
    1515      224158 :                     prstate->visibility_cutoff_xid = xmin;
    1516             :             }
    1517    33288102 :             break;
    1518             : 
    1519      510294 :         case HEAPTUPLE_RECENTLY_DEAD:
    1520      510294 :             prstate->recently_dead_tuples++;
    1521      510294 :             prstate->all_visible = false;
    1522      510294 :             prstate->all_frozen = false;
    1523             : 
    1524             :             /*
    1525             :              * This tuple will soon become DEAD.  Update the hint field so
    1526             :              * that the page is reconsidered for pruning in future.
    1527             :              */
    1528      510294 :             heap_prune_record_prunable(prstate,
    1529             :                                        HeapTupleHeaderGetUpdateXid(htup));
    1530      510294 :             break;
    1531             : 
    1532      152418 :         case HEAPTUPLE_INSERT_IN_PROGRESS:
    1533             : 
    1534             :             /*
    1535             :              * We do not count these rows as live, because we expect the
    1536             :              * inserting transaction to update the counters at commit, and we
    1537             :              * assume that will happen only after we report our results.  This
    1538             :              * assumption is a bit shaky, but it is what acquire_sample_rows()
    1539             :              * does, so be consistent.
    1540             :              */
    1541      152418 :             prstate->all_visible = false;
    1542      152418 :             prstate->all_frozen = false;
    1543             : 
    1544             :             /*
    1545             :              * If we wanted to optimize for aborts, we might consider marking
    1546             :              * the page prunable when we see INSERT_IN_PROGRESS.  But we
    1547             :              * don't.  See related decisions about when to mark the page
    1548             :              * prunable in heapam.c.
    1549             :              */
    1550      152418 :             break;
    1551             : 
    1552    13251336 :         case HEAPTUPLE_DELETE_IN_PROGRESS:
    1553             : 
    1554             :             /*
    1555             :              * This an expected case during concurrent vacuum.  Count such
    1556             :              * rows as live.  As above, we assume the deleting transaction
    1557             :              * will commit and update the counters after we report.
    1558             :              */
    1559    13251336 :             prstate->live_tuples++;
    1560    13251336 :             prstate->all_visible = false;
    1561    13251336 :             prstate->all_frozen = false;
    1562             : 
    1563             :             /*
    1564             :              * This tuple may soon become DEAD.  Update the hint field so that
    1565             :              * the page is reconsidered for pruning in future.
    1566             :              */
    1567    13251336 :             heap_prune_record_prunable(prstate,
    1568             :                                        HeapTupleHeaderGetUpdateXid(htup));
    1569    13251336 :             break;
    1570             : 
    1571           0 :         default:
    1572             : 
    1573             :             /*
    1574             :              * DEAD tuples should've been passed to heap_prune_record_dead()
    1575             :              * or heap_prune_record_unused() instead.
    1576             :              */
    1577           0 :             elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
    1578             :                  prstate->htsv[offnum]);
    1579             :             break;
    1580             :     }
    1581             : 
    1582             :     /* Consider freezing any normal tuples which will not be removed */
    1583    47208824 :     if (prstate->attempt_freeze)
    1584             :     {
    1585             :         bool        totally_frozen;
    1586             : 
    1587    44266688 :         if ((heap_prepare_freeze_tuple(htup,
    1588    44266688 :                                        prstate->cutoffs,
    1589             :                                        &prstate->pagefrz,
    1590    44266688 :                                        &prstate->frozen[prstate->nfrozen],
    1591             :                                        &totally_frozen)))
    1592             :         {
    1593             :             /* Save prepared freeze plan for later */
    1594     4291270 :             prstate->frozen[prstate->nfrozen++].offset = offnum;
    1595             :         }
    1596             : 
    1597             :         /*
    1598             :          * If any tuple isn't either totally frozen already or eligible to
    1599             :          * become totally frozen (according to its freeze plan), then the page
    1600             :          * definitely cannot be set all-frozen in the visibility map later on.
    1601             :          */
    1602    44266688 :         if (!totally_frozen)
    1603    14410810 :             prstate->all_frozen = false;
    1604             :     }
    1605    47208824 : }
    1606             : 
    1607             : 
    1608             : /*
    1609             :  * Record line pointer that was already LP_DEAD and is left unchanged.
    1610             :  */
    1611             : static void
    1612     2125400 : heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
    1613             : {
    1614             :     Assert(!prstate->processed[offnum]);
    1615     2125400 :     prstate->processed[offnum] = true;
    1616             : 
    1617             :     /*
    1618             :      * Deliberately don't set hastup for LP_DEAD items.  We make the soft
    1619             :      * assumption that any LP_DEAD items encountered here will become
    1620             :      * LP_UNUSED later on, before count_nondeletable_pages is reached.  If we
    1621             :      * don't make this assumption then rel truncation will only happen every
    1622             :      * other VACUUM, at most.  Besides, VACUUM must treat
    1623             :      * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
    1624             :      * handled (handled here, or handled later on).
    1625             :      *
    1626             :      * Similarly, don't unset all_visible and all_frozen until later, at the
    1627             :      * end of heap_page_prune_and_freeze().  This will allow us to attempt to
    1628             :      * freeze the page after pruning.  As long as we unset it before updating
    1629             :      * the visibility map, this will be correct.
    1630             :      */
    1631             : 
    1632             :     /* Record the dead offset for vacuum */
    1633     2125400 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1634     2125400 : }
    1635             : 
    1636             : /*
    1637             :  * Record LP_REDIRECT that is left unchanged.
    1638             :  */
    1639             : static void
    1640      585032 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
    1641             : {
    1642             :     /*
    1643             :      * A redirect line pointer doesn't count as a live tuple.
    1644             :      *
    1645             :      * If we leave a redirect line pointer in place, there will be another
    1646             :      * tuple on the page that it points to.  We will do the bookkeeping for
    1647             :      * that separately.  So we have nothing to do here, except remember that
    1648             :      * we processed this item.
    1649             :      */
    1650             :     Assert(!prstate->processed[offnum]);
    1651      585032 :     prstate->processed[offnum] = true;
    1652      585032 : }
    1653             : 
    1654             : /*
    1655             :  * Perform the actual page changes needed by heap_page_prune_and_freeze().
    1656             :  *
    1657             :  * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
    1658             :  * as unused, not redirecting or removing anything else.  The
    1659             :  * PageRepairFragmentation() call is skipped in that case.
    1660             :  *
    1661             :  * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
    1662             :  * the buffer.  If it is set, an ordinary exclusive lock suffices.
    1663             :  */
    1664             : void
    1665      125628 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
    1666             :                         OffsetNumber *redirected, int nredirected,
    1667             :                         OffsetNumber *nowdead, int ndead,
    1668             :                         OffsetNumber *nowunused, int nunused)
    1669             : {
    1670      125628 :     Page        page = BufferGetPage(buffer);
    1671             :     OffsetNumber *offnum;
    1672             :     HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
    1673             : 
    1674             :     /* Shouldn't be called unless there's something to do */
    1675             :     Assert(nredirected > 0 || ndead > 0 || nunused > 0);
    1676             : 
    1677             :     /* If 'lp_truncate_only', we can only remove already-dead line pointers */
    1678             :     Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
    1679             : 
    1680             :     /* Update all redirected line pointers */
    1681      125628 :     offnum = redirected;
    1682      292992 :     for (int i = 0; i < nredirected; i++)
    1683             :     {
    1684      167364 :         OffsetNumber fromoff = *offnum++;
    1685      167364 :         OffsetNumber tooff = *offnum++;
    1686      167364 :         ItemId      fromlp = PageGetItemId(page, fromoff);
    1687             :         ItemId      tolp PG_USED_FOR_ASSERTS_ONLY;
    1688             : 
    1689             : #ifdef USE_ASSERT_CHECKING
    1690             : 
    1691             :         /*
    1692             :          * Any existing item that we set as an LP_REDIRECT (any 'from' item)
    1693             :          * must be the first item from a HOT chain.  If the item has tuple
    1694             :          * storage then it can't be a heap-only tuple.  Otherwise we are just
    1695             :          * maintaining an existing LP_REDIRECT from an existing HOT chain that
    1696             :          * has been pruned at least once before now.
    1697             :          */
    1698             :         if (!ItemIdIsRedirected(fromlp))
    1699             :         {
    1700             :             Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
    1701             : 
    1702             :             htup = (HeapTupleHeader) PageGetItem(page, fromlp);
    1703             :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    1704             :         }
    1705             :         else
    1706             :         {
    1707             :             /* We shouldn't need to redundantly set the redirect */
    1708             :             Assert(ItemIdGetRedirect(fromlp) != tooff);
    1709             :         }
    1710             : 
    1711             :         /*
    1712             :          * The item that we're about to set as an LP_REDIRECT (the 'from'
    1713             :          * item) will point to an existing item (the 'to' item) that is
    1714             :          * already a heap-only tuple.  There can be at most one LP_REDIRECT
    1715             :          * item per HOT chain.
    1716             :          *
    1717             :          * We need to keep around an LP_REDIRECT item (after original
    1718             :          * non-heap-only root tuple gets pruned away) so that it's always
    1719             :          * possible for VACUUM to easily figure out what TID to delete from
    1720             :          * indexes when an entire HOT chain becomes dead.  A heap-only tuple
    1721             :          * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
    1722             :          * tuple can.
    1723             :          *
    1724             :          * This check may miss problems, e.g. the target of a redirect could
    1725             :          * be marked as unused subsequently. The page_verify_redirects() check
    1726             :          * below will catch such problems.
    1727             :          */
    1728             :         tolp = PageGetItemId(page, tooff);
    1729             :         Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
    1730             :         htup = (HeapTupleHeader) PageGetItem(page, tolp);
    1731             :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    1732             : #endif
    1733             : 
    1734      167364 :         ItemIdSetRedirect(fromlp, tooff);
    1735             :     }
    1736             : 
    1737             :     /* Update all now-dead line pointers */
    1738      125628 :     offnum = nowdead;
    1739     3292936 :     for (int i = 0; i < ndead; i++)
    1740             :     {
    1741     3167308 :         OffsetNumber off = *offnum++;
    1742     3167308 :         ItemId      lp = PageGetItemId(page, off);
    1743             : 
    1744             : #ifdef USE_ASSERT_CHECKING
    1745             : 
    1746             :         /*
    1747             :          * An LP_DEAD line pointer must be left behind when the original item
    1748             :          * (which is dead to everybody) could still be referenced by a TID in
    1749             :          * an index.  This should never be necessary with any individual
    1750             :          * heap-only tuple item, though. (It's not clear how much of a problem
    1751             :          * that would be, but there is no reason to allow it.)
    1752             :          */
    1753             :         if (ItemIdHasStorage(lp))
    1754             :         {
    1755             :             Assert(ItemIdIsNormal(lp));
    1756             :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1757             :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    1758             :         }
    1759             :         else
    1760             :         {
    1761             :             /* Whole HOT chain becomes dead */
    1762             :             Assert(ItemIdIsRedirected(lp));
    1763             :         }
    1764             : #endif
    1765             : 
    1766     3167308 :         ItemIdSetDead(lp);
    1767             :     }
    1768             : 
    1769             :     /* Update all now-unused line pointers */
    1770      125628 :     offnum = nowunused;
    1771      804958 :     for (int i = 0; i < nunused; i++)
    1772             :     {
    1773      679330 :         OffsetNumber off = *offnum++;
    1774      679330 :         ItemId      lp = PageGetItemId(page, off);
    1775             : 
    1776             : #ifdef USE_ASSERT_CHECKING
    1777             : 
    1778             :         if (lp_truncate_only)
    1779             :         {
    1780             :             /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
    1781             :             Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
    1782             :         }
    1783             :         else
    1784             :         {
    1785             :             /*
    1786             :              * When heap_page_prune_and_freeze() was called, mark_unused_now
    1787             :              * may have been passed as true, which allows would-be LP_DEAD
    1788             :              * items to be made LP_UNUSED instead.  This is only possible if
    1789             :              * the relation has no indexes.  If there are any dead items, then
    1790             :              * mark_unused_now was not true and every item being marked
    1791             :              * LP_UNUSED must refer to a heap-only tuple.
    1792             :              */
    1793             :             if (ndead > 0)
    1794             :             {
    1795             :                 Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
    1796             :                 htup = (HeapTupleHeader) PageGetItem(page, lp);
    1797             :                 Assert(HeapTupleHeaderIsHeapOnly(htup));
    1798             :             }
    1799             :             else
    1800             :                 Assert(ItemIdIsUsed(lp));
    1801             :         }
    1802             : 
    1803             : #endif
    1804             : 
    1805      679330 :         ItemIdSetUnused(lp);
    1806             :     }
    1807             : 
    1808      125628 :     if (lp_truncate_only)
    1809        5528 :         PageTruncateLinePointerArray(page);
    1810             :     else
    1811             :     {
    1812             :         /*
    1813             :          * Finally, repair any fragmentation, and update the page's hint bit
    1814             :          * about whether it has free pointers.
    1815             :          */
    1816      120100 :         PageRepairFragmentation(page);
    1817             : 
    1818             :         /*
    1819             :          * Now that the page has been modified, assert that redirect items
    1820             :          * still point to valid targets.
    1821             :          */
    1822      120100 :         page_verify_redirects(page);
    1823             :     }
    1824      125628 : }
    1825             : 
    1826             : 
    1827             : /*
    1828             :  * If built with assertions, verify that all LP_REDIRECT items point to a
    1829             :  * valid item.
    1830             :  *
    1831             :  * One way that bugs related to HOT pruning show is redirect items pointing to
    1832             :  * removed tuples. It's not trivial to reliably check that marking an item
    1833             :  * unused will not orphan a redirect item during heap_prune_chain() /
    1834             :  * heap_page_prune_execute(), so we additionally check the whole page after
    1835             :  * pruning. Without this check such bugs would typically only cause asserts
    1836             :  * later, potentially well after the corruption has been introduced.
    1837             :  *
    1838             :  * Also check comments in heap_page_prune_execute()'s redirection loop.
    1839             :  */
    1840             : static void
    1841      120100 : page_verify_redirects(Page page)
    1842             : {
    1843             : #ifdef USE_ASSERT_CHECKING
    1844             :     OffsetNumber offnum;
    1845             :     OffsetNumber maxoff;
    1846             : 
    1847             :     maxoff = PageGetMaxOffsetNumber(page);
    1848             :     for (offnum = FirstOffsetNumber;
    1849             :          offnum <= maxoff;
    1850             :          offnum = OffsetNumberNext(offnum))
    1851             :     {
    1852             :         ItemId      itemid = PageGetItemId(page, offnum);
    1853             :         OffsetNumber targoff;
    1854             :         ItemId      targitem;
    1855             :         HeapTupleHeader htup;
    1856             : 
    1857             :         if (!ItemIdIsRedirected(itemid))
    1858             :             continue;
    1859             : 
    1860             :         targoff = ItemIdGetRedirect(itemid);
    1861             :         targitem = PageGetItemId(page, targoff);
    1862             : 
    1863             :         Assert(ItemIdIsUsed(targitem));
    1864             :         Assert(ItemIdIsNormal(targitem));
    1865             :         Assert(ItemIdHasStorage(targitem));
    1866             :         htup = (HeapTupleHeader) PageGetItem(page, targitem);
    1867             :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    1868             :     }
    1869             : #endif
    1870      120100 : }
    1871             : 
    1872             : 
    1873             : /*
    1874             :  * For all items in this page, find their respective root line pointers.
    1875             :  * If item k is part of a HOT-chain with root at item j, then we set
    1876             :  * root_offsets[k - 1] = j.
    1877             :  *
    1878             :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
    1879             :  * Unused entries are filled with InvalidOffsetNumber (zero).
    1880             :  *
    1881             :  * The function must be called with at least share lock on the buffer, to
    1882             :  * prevent concurrent prune operations.
    1883             :  *
    1884             :  * Note: The information collected here is valid only as long as the caller
    1885             :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
    1886             :  * and reused by a completely unrelated tuple.
    1887             :  */
    1888             : void
    1889      224766 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
    1890             : {
    1891             :     OffsetNumber offnum,
    1892             :                 maxoff;
    1893             : 
    1894      224766 :     MemSet(root_offsets, InvalidOffsetNumber,
    1895             :            MaxHeapTuplesPerPage * sizeof(OffsetNumber));
    1896             : 
    1897      224766 :     maxoff = PageGetMaxOffsetNumber(page);
    1898    18115978 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    1899             :     {
    1900    17891212 :         ItemId      lp = PageGetItemId(page, offnum);
    1901             :         HeapTupleHeader htup;
    1902             :         OffsetNumber nextoffnum;
    1903             :         TransactionId priorXmax;
    1904             : 
    1905             :         /* skip unused and dead items */
    1906    17891212 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
    1907       22134 :             continue;
    1908             : 
    1909    17869078 :         if (ItemIdIsNormal(lp))
    1910             :         {
    1911    17860570 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1912             : 
    1913             :             /*
    1914             :              * Check if this tuple is part of a HOT-chain rooted at some other
    1915             :              * tuple. If so, skip it for now; we'll process it when we find
    1916             :              * its root.
    1917             :              */
    1918    17860570 :             if (HeapTupleHeaderIsHeapOnly(htup))
    1919        9000 :                 continue;
    1920             : 
    1921             :             /*
    1922             :              * This is either a plain tuple or the root of a HOT-chain.
    1923             :              * Remember it in the mapping.
    1924             :              */
    1925    17851570 :             root_offsets[offnum - 1] = offnum;
    1926             : 
    1927             :             /* If it's not the start of a HOT-chain, we're done with it */
    1928    17851570 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    1929    17851176 :                 continue;
    1930             : 
    1931             :             /* Set up to scan the HOT-chain */
    1932         394 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1933         394 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1934             :         }
    1935             :         else
    1936             :         {
    1937             :             /* Must be a redirect item. We do not set its root_offsets entry */
    1938             :             Assert(ItemIdIsRedirected(lp));
    1939             :             /* Set up to scan the HOT-chain */
    1940        8508 :             nextoffnum = ItemIdGetRedirect(lp);
    1941        8508 :             priorXmax = InvalidTransactionId;
    1942             :         }
    1943             : 
    1944             :         /*
    1945             :          * Now follow the HOT-chain and collect other tuples in the chain.
    1946             :          *
    1947             :          * Note: Even though this is a nested loop, the complexity of the
    1948             :          * function is O(N) because a tuple in the page should be visited not
    1949             :          * more than twice, once in the outer loop and once in HOT-chain
    1950             :          * chases.
    1951             :          */
    1952             :         for (;;)
    1953             :         {
    1954             :             /* Sanity check (pure paranoia) */
    1955        8994 :             if (offnum < FirstOffsetNumber)
    1956           0 :                 break;
    1957             : 
    1958             :             /*
    1959             :              * An offset past the end of page's line pointer array is possible
    1960             :              * when the array was truncated
    1961             :              */
    1962        8994 :             if (offnum > maxoff)
    1963           0 :                 break;
    1964             : 
    1965        8994 :             lp = PageGetItemId(page, nextoffnum);
    1966             : 
    1967             :             /* Check for broken chains */
    1968        8994 :             if (!ItemIdIsNormal(lp))
    1969           0 :                 break;
    1970             : 
    1971        8994 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1972             : 
    1973        9480 :             if (TransactionIdIsValid(priorXmax) &&
    1974         486 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
    1975           0 :                 break;
    1976             : 
    1977             :             /* Remember the root line pointer for this item */
    1978        8994 :             root_offsets[nextoffnum - 1] = offnum;
    1979             : 
    1980             :             /* Advance to next chain member, if any */
    1981        8994 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    1982        8902 :                 break;
    1983             : 
    1984             :             /* HOT implies it can't have moved to different partition */
    1985             :             Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    1986             : 
    1987          92 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1988          92 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1989             :         }
    1990             :     }
    1991      224766 : }
    1992             : 
    1993             : 
    1994             : /*
    1995             :  * Compare fields that describe actions required to freeze tuple with caller's
    1996             :  * open plan.  If everything matches then the frz tuple plan is equivalent to
    1997             :  * caller's plan.
    1998             :  */
    1999             : static inline bool
    2000     1863648 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2001             : {
    2002     1863648 :     if (plan->xmax == frz->xmax &&
    2003     1861060 :         plan->t_infomask2 == frz->t_infomask2 &&
    2004     1859352 :         plan->t_infomask == frz->t_infomask &&
    2005     1854216 :         plan->frzflags == frz->frzflags)
    2006     1854216 :         return true;
    2007             : 
    2008             :     /* Caller must call heap_log_freeze_new_plan again for frz */
    2009        9432 :     return false;
    2010             : }
    2011             : 
    2012             : /*
    2013             :  * Comparator used to deduplicate the freeze plans used in WAL records.
    2014             :  */
    2015             : static int
    2016     2597952 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
    2017             : {
    2018     2597952 :     HeapTupleFreeze *frz1 = (HeapTupleFreeze *) arg1;
    2019     2597952 :     HeapTupleFreeze *frz2 = (HeapTupleFreeze *) arg2;
    2020             : 
    2021     2597952 :     if (frz1->xmax < frz2->xmax)
    2022       26496 :         return -1;
    2023     2571456 :     else if (frz1->xmax > frz2->xmax)
    2024       27172 :         return 1;
    2025             : 
    2026     2544284 :     if (frz1->t_infomask2 < frz2->t_infomask2)
    2027        9158 :         return -1;
    2028     2535126 :     else if (frz1->t_infomask2 > frz2->t_infomask2)
    2029        9604 :         return 1;
    2030             : 
    2031     2525522 :     if (frz1->t_infomask < frz2->t_infomask)
    2032       22754 :         return -1;
    2033     2502768 :     else if (frz1->t_infomask > frz2->t_infomask)
    2034       31348 :         return 1;
    2035             : 
    2036     2471420 :     if (frz1->frzflags < frz2->frzflags)
    2037           0 :         return -1;
    2038     2471420 :     else if (frz1->frzflags > frz2->frzflags)
    2039           0 :         return 1;
    2040             : 
    2041             :     /*
    2042             :      * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
    2043             :      * (So the tuples will share a single canonical freeze plan.)
    2044             :      *
    2045             :      * We tiebreak on page offset number to keep each freeze plan's page
    2046             :      * offset number array individually sorted. (Unnecessary, but be tidy.)
    2047             :      */
    2048     2471420 :     if (frz1->offset < frz2->offset)
    2049     2078564 :         return -1;
    2050      392856 :     else if (frz1->offset > frz2->offset)
    2051      392856 :         return 1;
    2052             : 
    2053             :     Assert(false);
    2054           0 :     return 0;
    2055             : }
    2056             : 
    2057             : /*
    2058             :  * Start new plan initialized using tuple-level actions.  At least one tuple
    2059             :  * will have steps required to freeze described by caller's plan during REDO.
    2060             :  */
    2061             : static inline void
    2062       54980 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2063             : {
    2064       54980 :     plan->xmax = frz->xmax;
    2065       54980 :     plan->t_infomask2 = frz->t_infomask2;
    2066       54980 :     plan->t_infomask = frz->t_infomask;
    2067       54980 :     plan->frzflags = frz->frzflags;
    2068       54980 :     plan->ntuples = 1;           /* for now */
    2069       54980 : }
    2070             : 
    2071             : /*
    2072             :  * Deduplicate tuple-based freeze plans so that each distinct set of
    2073             :  * processing steps is only stored once in the WAL record.
    2074             :  * Called during original execution of freezing (for logged relations).
    2075             :  *
    2076             :  * Return value is number of plans set in *plans_out for caller.  Also writes
    2077             :  * an array of offset numbers into *offsets_out output argument for caller
    2078             :  * (actually there is one array per freeze plan, but that's not of immediate
    2079             :  * concern to our caller).
    2080             :  */
    2081             : static int
    2082       45548 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
    2083             :                      xlhp_freeze_plan *plans_out,
    2084             :                      OffsetNumber *offsets_out)
    2085             : {
    2086       45548 :     int         nplans = 0;
    2087             : 
    2088             :     /* Sort tuple-based freeze plans in the order required to deduplicate */
    2089       45548 :     qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
    2090             : 
    2091     1954744 :     for (int i = 0; i < ntuples; i++)
    2092             :     {
    2093     1909196 :         HeapTupleFreeze *frz = tuples + i;
    2094             : 
    2095     1909196 :         if (i == 0)
    2096             :         {
    2097             :             /* New canonical freeze plan starting with first tup */
    2098       45548 :             heap_log_freeze_new_plan(plans_out, frz);
    2099       45548 :             nplans++;
    2100             :         }
    2101     1863648 :         else if (heap_log_freeze_eq(plans_out, frz))
    2102             :         {
    2103             :             /* tup matches open canonical plan -- include tup in it */
    2104             :             Assert(offsets_out[i - 1] < frz->offset);
    2105     1854216 :             plans_out->ntuples++;
    2106             :         }
    2107             :         else
    2108             :         {
    2109             :             /* Tup doesn't match current plan -- done with it now */
    2110        9432 :             plans_out++;
    2111             : 
    2112             :             /* New canonical freeze plan starting with this tup */
    2113        9432 :             heap_log_freeze_new_plan(plans_out, frz);
    2114        9432 :             nplans++;
    2115             :         }
    2116             : 
    2117             :         /*
    2118             :          * Save page offset number in dedicated buffer in passing.
    2119             :          *
    2120             :          * REDO routine relies on the record's offset numbers array grouping
    2121             :          * offset numbers by freeze plan.  The sort order within each grouping
    2122             :          * is ascending offset number order, just to keep things tidy.
    2123             :          */
    2124     1909196 :         offsets_out[i] = frz->offset;
    2125             :     }
    2126             : 
    2127             :     Assert(nplans > 0 && nplans <= ntuples);
    2128             : 
    2129       45548 :     return nplans;
    2130             : }
    2131             : 
    2132             : /*
    2133             :  * Write an XLOG_HEAP2_PRUNE* WAL record
    2134             :  *
    2135             :  * This is used for several different page maintenance operations:
    2136             :  *
    2137             :  * - Page pruning, in VACUUM's 1st pass or on access: Some items are
    2138             :  *   redirected, some marked dead, and some removed altogether.
    2139             :  *
    2140             :  * - Freezing: Items are marked as 'frozen'.
    2141             :  *
    2142             :  * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
    2143             :  *
    2144             :  * They have enough commonalities that we use a single WAL record for them
    2145             :  * all.
    2146             :  *
    2147             :  * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
    2148             :  * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
    2149             :  * replaying 'unused' items depends on whether they were all previously marked
    2150             :  * as dead.
    2151             :  *
    2152             :  * If the VM is being updated, vmflags will contain the bits to set. In this
    2153             :  * case, vmbuffer should already have been updated and marked dirty and should
    2154             :  * still be pinned and locked.
    2155             :  *
    2156             :  * Note: This function scribbles on the 'frozen' array.
    2157             :  *
    2158             :  * Note: This is called in a critical section, so careful what you do here.
    2159             :  */
    2160             : void
    2161      173588 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
    2162             :                           Buffer vmbuffer, uint8 vmflags,
    2163             :                           TransactionId conflict_xid,
    2164             :                           bool cleanup_lock,
    2165             :                           PruneReason reason,
    2166             :                           HeapTupleFreeze *frozen, int nfrozen,
    2167             :                           OffsetNumber *redirected, int nredirected,
    2168             :                           OffsetNumber *dead, int ndead,
    2169             :                           OffsetNumber *unused, int nunused)
    2170             : {
    2171             :     xl_heap_prune xlrec;
    2172             :     XLogRecPtr  recptr;
    2173             :     uint8       info;
    2174             :     uint8       regbuf_flags_heap;
    2175             : 
    2176             :     /* The following local variables hold data registered in the WAL record: */
    2177             :     xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
    2178             :     xlhp_freeze_plans freeze_plans;
    2179             :     xlhp_prune_items redirect_items;
    2180             :     xlhp_prune_items dead_items;
    2181             :     xlhp_prune_items unused_items;
    2182             :     OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
    2183      173588 :     bool        do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
    2184      173588 :     bool        do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
    2185             : 
    2186             :     Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
    2187             : 
    2188      173588 :     xlrec.flags = 0;
    2189      173588 :     regbuf_flags_heap = REGBUF_STANDARD;
    2190             : 
    2191             :     /*
    2192             :      * We can avoid an FPI of the heap page if the only modification we are
    2193             :      * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
    2194             :      * disabled. Note that if we explicitly skip an FPI, we must not stamp the
    2195             :      * heap page with this record's LSN. Recovery skips records <= the stamped
    2196             :      * LSN, so this could lead to skipping an earlier FPI needed to repair a
    2197             :      * torn page.
    2198             :      */
    2199      173588 :     if (!do_prune &&
    2200           0 :         nfrozen == 0 &&
    2201           0 :         (!do_set_vm || !XLogHintBitIsNeeded()))
    2202           0 :         regbuf_flags_heap |= REGBUF_NO_IMAGE;
    2203             : 
    2204             :     /*
    2205             :      * Prepare data for the buffer.  The arrays are not actually in the
    2206             :      * buffer, but we pretend that they are.  When XLogInsert stores a full
    2207             :      * page image, the arrays can be omitted.
    2208             :      */
    2209      173588 :     XLogBeginInsert();
    2210      173588 :     XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
    2211             : 
    2212      173588 :     if (do_set_vm)
    2213       27478 :         XLogRegisterBuffer(1, vmbuffer, 0);
    2214             : 
    2215      173588 :     if (nfrozen > 0)
    2216             :     {
    2217             :         int         nplans;
    2218             : 
    2219       45548 :         xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
    2220             : 
    2221             :         /*
    2222             :          * Prepare deduplicated representation for use in the WAL record. This
    2223             :          * destructively sorts frozen tuples array in-place.
    2224             :          */
    2225       45548 :         nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
    2226             : 
    2227       45548 :         freeze_plans.nplans = nplans;
    2228       45548 :         XLogRegisterBufData(0, &freeze_plans,
    2229             :                             offsetof(xlhp_freeze_plans, plans));
    2230       45548 :         XLogRegisterBufData(0, plans,
    2231             :                             sizeof(xlhp_freeze_plan) * nplans);
    2232             :     }
    2233      173588 :     if (nredirected > 0)
    2234             :     {
    2235       33344 :         xlrec.flags |= XLHP_HAS_REDIRECTIONS;
    2236             : 
    2237       33344 :         redirect_items.ntargets = nredirected;
    2238       33344 :         XLogRegisterBufData(0, &redirect_items,
    2239             :                             offsetof(xlhp_prune_items, data));
    2240       33344 :         XLogRegisterBufData(0, redirected,
    2241             :                             sizeof(OffsetNumber[2]) * nredirected);
    2242             :     }
    2243      173588 :     if (ndead > 0)
    2244             :     {
    2245       77942 :         xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
    2246             : 
    2247       77942 :         dead_items.ntargets = ndead;
    2248       77942 :         XLogRegisterBufData(0, &dead_items,
    2249             :                             offsetof(xlhp_prune_items, data));
    2250       77942 :         XLogRegisterBufData(0, dead,
    2251             :                             sizeof(OffsetNumber) * ndead);
    2252             :     }
    2253      173588 :     if (nunused > 0)
    2254             :     {
    2255       50186 :         xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
    2256             : 
    2257       50186 :         unused_items.ntargets = nunused;
    2258       50186 :         XLogRegisterBufData(0, &unused_items,
    2259             :                             offsetof(xlhp_prune_items, data));
    2260       50186 :         XLogRegisterBufData(0, unused,
    2261             :                             sizeof(OffsetNumber) * nunused);
    2262             :     }
    2263      173588 :     if (nfrozen > 0)
    2264       45548 :         XLogRegisterBufData(0, frz_offsets,
    2265             :                             sizeof(OffsetNumber) * nfrozen);
    2266             : 
    2267             :     /*
    2268             :      * Prepare the main xl_heap_prune record.  We already set the XLHP_HAS_*
    2269             :      * flag above.
    2270             :      */
    2271      173588 :     if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
    2272             :     {
    2273       27478 :         xlrec.flags |= XLHP_VM_ALL_VISIBLE;
    2274       27478 :         if (vmflags & VISIBILITYMAP_ALL_FROZEN)
    2275       21248 :             xlrec.flags |= XLHP_VM_ALL_FROZEN;
    2276             :     }
    2277      173588 :     if (RelationIsAccessibleInLogicalDecoding(relation))
    2278        1286 :         xlrec.flags |= XLHP_IS_CATALOG_REL;
    2279      173588 :     if (TransactionIdIsValid(conflict_xid))
    2280      142128 :         xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
    2281      173588 :     if (cleanup_lock)
    2282      145794 :         xlrec.flags |= XLHP_CLEANUP_LOCK;
    2283             :     else
    2284             :     {
    2285             :         Assert(nredirected == 0 && ndead == 0);
    2286             :         /* also, any items in 'unused' must've been LP_DEAD previously */
    2287             :     }
    2288      173588 :     XLogRegisterData(&xlrec, SizeOfHeapPrune);
    2289      173588 :     if (TransactionIdIsValid(conflict_xid))
    2290      142128 :         XLogRegisterData(&conflict_xid, sizeof(TransactionId));
    2291             : 
    2292      173588 :     switch (reason)
    2293             :     {
    2294       82968 :         case PRUNE_ON_ACCESS:
    2295       82968 :             info = XLOG_HEAP2_PRUNE_ON_ACCESS;
    2296       82968 :             break;
    2297       62826 :         case PRUNE_VACUUM_SCAN:
    2298       62826 :             info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
    2299       62826 :             break;
    2300       27794 :         case PRUNE_VACUUM_CLEANUP:
    2301       27794 :             info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
    2302       27794 :             break;
    2303           0 :         default:
    2304           0 :             elog(ERROR, "unrecognized prune reason: %d", (int) reason);
    2305             :             break;
    2306             :     }
    2307      173588 :     recptr = XLogInsert(RM_HEAP2_ID, info);
    2308             : 
    2309      173588 :     if (do_set_vm)
    2310             :     {
    2311             :         Assert(BufferIsDirty(vmbuffer));
    2312       27478 :         PageSetLSN(BufferGetPage(vmbuffer), recptr);
    2313             :     }
    2314             : 
    2315             :     /*
    2316             :      * See comment at the top of the function about regbuf_flags_heap for
    2317             :      * details on when we can advance the page LSN.
    2318             :      */
    2319      173588 :     if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
    2320             :     {
    2321             :         Assert(BufferIsDirty(buffer));
    2322      173588 :         PageSetLSN(BufferGetPage(buffer), recptr);
    2323             :     }
    2324      173588 : }

Generated by: LCOV version 1.16