LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 94.9 % 532 505
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 25 25
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pruneheap.c
       4              :  *    heap page pruning and HOT-chain management code
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/heap/pruneheap.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/heapam.h"
      18              : #include "access/heapam_xlog.h"
      19              : #include "access/htup_details.h"
      20              : #include "access/multixact.h"
      21              : #include "access/transam.h"
      22              : #include "access/visibilitymapdefs.h"
      23              : #include "access/xlog.h"
      24              : #include "access/xloginsert.h"
      25              : #include "commands/vacuum.h"
      26              : #include "executor/instrument.h"
      27              : #include "miscadmin.h"
      28              : #include "pgstat.h"
      29              : #include "storage/bufmgr.h"
      30              : #include "utils/rel.h"
      31              : #include "utils/snapmgr.h"
      32              : 
      33              : /* Working data for heap_page_prune_and_freeze() and subroutines */
      34              : typedef struct
      35              : {
      36              :     /*-------------------------------------------------------
      37              :      * Arguments passed to heap_page_prune_and_freeze()
      38              :      *-------------------------------------------------------
      39              :      */
      40              : 
      41              :     /* tuple visibility test, initialized for the relation */
      42              :     GlobalVisState *vistest;
      43              :     /* whether or not dead items can be set LP_UNUSED during pruning */
      44              :     bool        mark_unused_now;
      45              :     /* whether to attempt freezing tuples */
      46              :     bool        attempt_freeze;
      47              :     struct VacuumCutoffs *cutoffs;
      48              : 
      49              :     /*-------------------------------------------------------
      50              :      * Fields describing what to do to the page
      51              :      *-------------------------------------------------------
      52              :      */
      53              :     TransactionId new_prune_xid;    /* new prune hint value */
      54              :     TransactionId latest_xid_removed;
      55              :     int         nredirected;    /* numbers of entries in arrays below */
      56              :     int         ndead;
      57              :     int         nunused;
      58              :     int         nfrozen;
      59              :     /* arrays that accumulate indexes of items to be changed */
      60              :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      61              :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      62              :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      63              :     HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
      64              : 
      65              :     /*-------------------------------------------------------
      66              :      * Working state for HOT chain processing
      67              :      *-------------------------------------------------------
      68              :      */
      69              : 
      70              :     /*
      71              :      * 'root_items' contains offsets of all LP_REDIRECT line pointers and
      72              :      * normal non-HOT tuples.  They can be stand-alone items or the first item
      73              :      * in a HOT chain.  'heaponly_items' contains heap-only tuples which can
      74              :      * only be removed as part of a HOT chain.
      75              :      */
      76              :     int         nroot_items;
      77              :     OffsetNumber root_items[MaxHeapTuplesPerPage];
      78              :     int         nheaponly_items;
      79              :     OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
      80              : 
      81              :     /*
      82              :      * processed[offnum] is true if item at offnum has been processed.
      83              :      *
      84              :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
      85              :      * 1. Otherwise every access would need to subtract 1.
      86              :      */
      87              :     bool        processed[MaxHeapTuplesPerPage + 1];
      88              : 
      89              :     /*
      90              :      * Tuple visibility is only computed once for each tuple, for correctness
      91              :      * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
      92              :      * details.  This is of type int8[], instead of HTSV_Result[], so we can
      93              :      * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
      94              :      * items.
      95              :      *
      96              :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
      97              :      * 1. Otherwise every access would need to subtract 1.
      98              :      */
      99              :     int8        htsv[MaxHeapTuplesPerPage + 1];
     100              : 
     101              :     /*
     102              :      * Freezing-related state.
     103              :      */
     104              :     HeapPageFreeze pagefrz;
     105              : 
     106              :     /*-------------------------------------------------------
     107              :      * Information about what was done
     108              :      *
     109              :      * These fields are not used by pruning itself for the most part, but are
     110              :      * used to collect information about what was pruned and what state the
     111              :      * page is in after pruning, for the benefit of the caller.  They are
     112              :      * copied to the caller's PruneFreezeResult at the end.
     113              :      * -------------------------------------------------------
     114              :      */
     115              : 
     116              :     int         ndeleted;       /* Number of tuples deleted from the page */
     117              : 
     118              :     /* Number of live and recently dead tuples, after pruning */
     119              :     int         live_tuples;
     120              :     int         recently_dead_tuples;
     121              : 
     122              :     /* Whether or not the page makes rel truncation unsafe */
     123              :     bool        hastup;
     124              : 
     125              :     /*
     126              :      * LP_DEAD items on the page after pruning.  Includes existing LP_DEAD
     127              :      * items
     128              :      */
     129              :     int         lpdead_items;   /* number of items in the array */
     130              :     OffsetNumber *deadoffsets;  /* points directly to presult->deadoffsets */
     131              : 
     132              :     /*
     133              :      * The snapshot conflict horizon used when freezing tuples. The final
     134              :      * snapshot conflict horizon for the record may be newer if pruning
     135              :      * removes newer transaction IDs.
     136              :      */
     137              :     TransactionId frz_conflict_horizon;
     138              : 
     139              :     /*
     140              :      * all_visible and all_frozen indicate if the all-visible and all-frozen
     141              :      * bits in the visibility map can be set for this page after pruning.
     142              :      *
     143              :      * visibility_cutoff_xid is the newest xmin of live tuples on the page.
     144              :      * The caller can use it as the conflict horizon, when setting the VM
     145              :      * bits.  It is only valid if we froze some tuples, and all_frozen is
     146              :      * true.
     147              :      *
     148              :      * NOTE: all_visible and all_frozen initially don't include LP_DEAD items.
     149              :      * That's convenient for heap_page_prune_and_freeze() to use them to
     150              :      * decide whether to freeze the page or not.  The all_visible and
     151              :      * all_frozen values returned to the caller are adjusted to include
     152              :      * LP_DEAD items after we determine whether to opportunistically freeze.
     153              :      */
     154              :     bool        all_visible;
     155              :     bool        all_frozen;
     156              :     TransactionId visibility_cutoff_xid;
     157              : } PruneState;
     158              : 
     159              : /* Local functions */
     160              : static void prune_freeze_setup(PruneFreezeParams *params,
     161              :                                TransactionId *new_relfrozen_xid,
     162              :                                MultiXactId *new_relmin_mxid,
     163              :                                PruneFreezeResult *presult,
     164              :                                PruneState *prstate);
     165              : static void prune_freeze_plan(Oid reloid, Buffer buffer,
     166              :                               PruneState *prstate,
     167              :                               OffsetNumber *off_loc);
     168              : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
     169              :                                                HeapTuple tup,
     170              :                                                Buffer buffer);
     171              : static inline HTSV_Result htsv_get_valid_status(int status);
     172              : static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
     173              :                              OffsetNumber rootoffnum, PruneState *prstate);
     174              : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
     175              : static void heap_prune_record_redirect(PruneState *prstate,
     176              :                                        OffsetNumber offnum, OffsetNumber rdoffnum,
     177              :                                        bool was_normal);
     178              : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
     179              :                                    bool was_normal);
     180              : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
     181              :                                              bool was_normal);
     182              : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
     183              : 
     184              : static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum);
     185              : static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum);
     186              : static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum);
     187              : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
     188              : 
     189              : static void page_verify_redirects(Page page);
     190              : 
     191              : static bool heap_page_will_freeze(Relation relation, Buffer buffer,
     192              :                                   bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
     193              :                                   PruneState *prstate);
     194              : 
     195              : 
     196              : /*
     197              :  * Optionally prune and repair fragmentation in the specified page.
     198              :  *
     199              :  * This is an opportunistic function.  It will perform housekeeping
     200              :  * only if the page heuristically looks like a candidate for pruning and we
     201              :  * can acquire buffer cleanup lock without blocking.
     202              :  *
     203              :  * Note: this is called quite often.  It's important that it fall out quickly
     204              :  * if there's not any use in pruning.
     205              :  *
     206              :  * Caller must have pin on the buffer, and must *not* have a lock on it.
     207              :  */
     208              : void
     209     16745504 : heap_page_prune_opt(Relation relation, Buffer buffer)
     210              : {
     211     16745504 :     Page        page = BufferGetPage(buffer);
     212              :     TransactionId prune_xid;
     213              :     GlobalVisState *vistest;
     214              :     Size        minfree;
     215              : 
     216              :     /*
     217              :      * We can't write WAL in recovery mode, so there's no point trying to
     218              :      * clean the page. The primary will likely issue a cleaning WAL record
     219              :      * soon anyway, so this is no particular loss.
     220              :      */
     221     16745504 :     if (RecoveryInProgress())
     222       226080 :         return;
     223              : 
     224              :     /*
     225              :      * First check whether there's any chance there's something to prune,
     226              :      * determining the appropriate horizon is a waste if there's no prune_xid
     227              :      * (i.e. no updates/deletes left potentially dead tuples around).
     228              :      */
     229     16519424 :     prune_xid = ((PageHeader) page)->pd_prune_xid;
     230     16519424 :     if (!TransactionIdIsValid(prune_xid))
     231      8631476 :         return;
     232              : 
     233              :     /*
     234              :      * Check whether prune_xid indicates that there may be dead rows that can
     235              :      * be cleaned up.
     236              :      */
     237      7887948 :     vistest = GlobalVisTestFor(relation);
     238              : 
     239      7887948 :     if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
     240      6548356 :         return;
     241              : 
     242              :     /*
     243              :      * We prune when a previous UPDATE failed to find enough space on the page
     244              :      * for a new tuple version, or when free space falls below the relation's
     245              :      * fill-factor target (but not less than 10%).
     246              :      *
     247              :      * Checking free space here is questionable since we aren't holding any
     248              :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     249              :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     250              :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     251              :      * important than sometimes getting a wrong answer in what is after all
     252              :      * just a heuristic estimate.
     253              :      */
     254      1339592 :     minfree = RelationGetTargetPageFreeSpace(relation,
     255              :                                              HEAP_DEFAULT_FILLFACTOR);
     256      1339592 :     minfree = Max(minfree, BLCKSZ / 10);
     257              : 
     258      1339592 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     259              :     {
     260              :         /* OK, try to get exclusive buffer lock */
     261        43188 :         if (!ConditionalLockBufferForCleanup(buffer))
     262          464 :             return;
     263              : 
     264              :         /*
     265              :          * Now that we have buffer lock, get accurate information about the
     266              :          * page's free space, and recheck the heuristic about whether to
     267              :          * prune.
     268              :          */
     269        42724 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     270              :         {
     271              :             OffsetNumber dummy_off_loc;
     272              :             PruneFreezeResult presult;
     273              : 
     274              :             /*
     275              :              * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
     276              :              * regardless of whether or not the relation has indexes, since we
     277              :              * cannot safely determine that during on-access pruning with the
     278              :              * current implementation.
     279              :              */
     280        42724 :             PruneFreezeParams params = {
     281              :                 .relation = relation,
     282              :                 .buffer = buffer,
     283              :                 .reason = PRUNE_ON_ACCESS,
     284              :                 .options = 0,
     285              :                 .vistest = vistest,
     286              :                 .cutoffs = NULL,
     287              :             };
     288              : 
     289        42724 :             heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
     290              :                                        NULL, NULL);
     291              : 
     292              :             /*
     293              :              * Report the number of tuples reclaimed to pgstats.  This is
     294              :              * presult.ndeleted minus the number of newly-LP_DEAD-set items.
     295              :              *
     296              :              * We derive the number of dead tuples like this to avoid totally
     297              :              * forgetting about items that were set to LP_DEAD, since they
     298              :              * still need to be cleaned up by VACUUM.  We only want to count
     299              :              * heap-only tuples that just became LP_UNUSED in our report,
     300              :              * which don't.
     301              :              *
     302              :              * VACUUM doesn't have to compensate in the same way when it
     303              :              * tracks ndeleted, since it will set the same LP_DEAD items to
     304              :              * LP_UNUSED separately.
     305              :              */
     306        42724 :             if (presult.ndeleted > presult.nnewlpdead)
     307        19523 :                 pgstat_update_heap_dead_tuples(relation,
     308        19523 :                                                presult.ndeleted - presult.nnewlpdead);
     309              :         }
     310              : 
     311              :         /* And release buffer lock */
     312        42724 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     313              : 
     314              :         /*
     315              :          * We avoid reuse of any free space created on the page by unrelated
     316              :          * UPDATEs/INSERTs by opting to not update the FSM at this point.  The
     317              :          * free space should be reused by UPDATEs to *this* page.
     318              :          */
     319              :     }
     320              : }
     321              : 
     322              : /*
     323              :  * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
     324              :  * the provided parameters.
     325              :  *
     326              :  * params, new_relfrozen_xid, new_relmin_mxid, and presult are input
     327              :  * parameters and are not modified by this function. Only prstate is modified.
     328              :  */
     329              : static void
     330       518853 : prune_freeze_setup(PruneFreezeParams *params,
     331              :                    TransactionId *new_relfrozen_xid,
     332              :                    MultiXactId *new_relmin_mxid,
     333              :                    PruneFreezeResult *presult,
     334              :                    PruneState *prstate)
     335              : {
     336              :     /* Copy parameters to prstate */
     337       518853 :     prstate->vistest = params->vistest;
     338       518853 :     prstate->mark_unused_now =
     339       518853 :         (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
     340              : 
     341              :     /* cutoffs must be provided if we will attempt freezing */
     342              :     Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
     343       518853 :     prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
     344       518853 :     prstate->cutoffs = params->cutoffs;
     345              : 
     346              :     /*
     347              :      * Our strategy is to scan the page and make lists of items to change,
     348              :      * then apply the changes within a critical section.  This keeps as much
     349              :      * logic as possible out of the critical section, and also ensures that
     350              :      * WAL replay will work the same as the normal case.
     351              :      *
     352              :      * First, initialize the new pd_prune_xid value to zero (indicating no
     353              :      * prunable tuples).  If we find any tuples which may soon become
     354              :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     355              :      * initialize the rest of our working state.
     356              :      */
     357       518853 :     prstate->new_prune_xid = InvalidTransactionId;
     358       518853 :     prstate->latest_xid_removed = InvalidTransactionId;
     359       518853 :     prstate->nredirected = prstate->ndead = prstate->nunused = 0;
     360       518853 :     prstate->nfrozen = 0;
     361       518853 :     prstate->nroot_items = 0;
     362       518853 :     prstate->nheaponly_items = 0;
     363              : 
     364              :     /* initialize page freezing working state */
     365       518853 :     prstate->pagefrz.freeze_required = false;
     366       518853 :     if (prstate->attempt_freeze)
     367              :     {
     368              :         Assert(new_relfrozen_xid && new_relmin_mxid);
     369       476129 :         prstate->pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
     370       476129 :         prstate->pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
     371       476129 :         prstate->pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
     372       476129 :         prstate->pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
     373              :     }
     374              :     else
     375              :     {
     376              :         Assert(!new_relfrozen_xid && !new_relmin_mxid);
     377        42724 :         prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
     378        42724 :         prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
     379        42724 :         prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
     380        42724 :         prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
     381              :     }
     382              : 
     383       518853 :     prstate->ndeleted = 0;
     384       518853 :     prstate->live_tuples = 0;
     385       518853 :     prstate->recently_dead_tuples = 0;
     386       518853 :     prstate->hastup = false;
     387       518853 :     prstate->lpdead_items = 0;
     388              : 
     389              :     /*
     390              :      * deadoffsets are filled in during pruning but are only used to populate
     391              :      * PruneFreezeResult->deadoffsets. To avoid needing two copies of the
     392              :      * array, just save a pointer to the result offsets array in the
     393              :      * PruneState.
     394              :      */
     395       518853 :     prstate->deadoffsets = presult->deadoffsets;
     396       518853 :     prstate->frz_conflict_horizon = InvalidTransactionId;
     397              : 
     398              :     /*
     399              :      * Vacuum may update the VM after we're done.  We can keep track of
     400              :      * whether the page will be all-visible and all-frozen after pruning and
     401              :      * freezing to help the caller to do that.
     402              :      *
     403              :      * Currently, only VACUUM sets the VM bits.  To save the effort, only do
     404              :      * the bookkeeping if the caller needs it.  Currently, that's tied to
     405              :      * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
     406              :      * to update the VM bits without also freezing or freeze without also
     407              :      * setting the VM bits.
     408              :      *
     409              :      * In addition to telling the caller whether it can set the VM bit, we
     410              :      * also use 'all_visible' and 'all_frozen' for our own decision-making. If
     411              :      * the whole page would become frozen, we consider opportunistically
     412              :      * freezing tuples.  We will not be able to freeze the whole page if there
     413              :      * are tuples present that are not visible to everyone or if there are
     414              :      * dead tuples which are not yet removable.  However, dead tuples which
     415              :      * will be removed by the end of vacuuming should not preclude us from
     416              :      * opportunistically freezing.  Because of that, we do not immediately
     417              :      * clear all_visible and all_frozen when we see LP_DEAD items.  We fix
     418              :      * that after scanning the line pointers. We must correct all_visible and
     419              :      * all_frozen before we return them to the caller, so that the caller
     420              :      * doesn't set the VM bits incorrectly.
     421              :      */
     422       518853 :     if (prstate->attempt_freeze)
     423              :     {
     424       476129 :         prstate->all_visible = true;
     425       476129 :         prstate->all_frozen = true;
     426              :     }
     427              :     else
     428              :     {
     429              :         /*
     430              :          * Initializing to false allows skipping the work to update them in
     431              :          * heap_prune_record_unchanged_lp_normal().
     432              :          */
     433        42724 :         prstate->all_visible = false;
     434        42724 :         prstate->all_frozen = false;
     435              :     }
     436              : 
     437              :     /*
     438              :      * The visibility cutoff xid is the newest xmin of live tuples on the
     439              :      * page.  In the common case, this will be set as the conflict horizon the
     440              :      * caller can use for updating the VM.  If, at the end of freezing and
     441              :      * pruning, the page is all-frozen, there is no possibility that any
     442              :      * running transaction on the standby does not see tuples on the page as
     443              :      * all-visible, so the conflict horizon remains InvalidTransactionId.
     444              :      */
     445       518853 :     prstate->visibility_cutoff_xid = InvalidTransactionId;
     446       518853 : }
     447              : 
     448              : /*
     449              :  * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
     450              :  * page, examines its visibility information, and determines the appropriate
     451              :  * action for each tuple. All tuples are processed and classified during this
     452              :  * phase, but no modifications are made to the page until the later execution
     453              :  * stage.
     454              :  *
     455              :  * *off_loc is used for error callback and cleared before returning.
     456              :  */
     457              : static void
     458       518853 : prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate,
     459              :                   OffsetNumber *off_loc)
     460              : {
     461       518853 :     Page        page = BufferGetPage(buffer);
     462       518853 :     BlockNumber blockno = BufferGetBlockNumber(buffer);
     463       518853 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     464              :     OffsetNumber offnum;
     465              :     HeapTupleData tup;
     466              : 
     467       518853 :     tup.t_tableOid = reloid;
     468              : 
     469              :     /*
     470              :      * Determine HTSV for all tuples, and queue them up for processing as HOT
     471              :      * chain roots or as heap-only items.
     472              :      *
     473              :      * Determining HTSV only once for each tuple is required for correctness,
     474              :      * to deal with cases where running HTSV twice could result in different
     475              :      * results.  For example, RECENTLY_DEAD can turn to DEAD if another
     476              :      * checked item causes GlobalVisTestIsRemovableFullXid() to update the
     477              :      * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
     478              :      * transaction aborts.
     479              :      *
     480              :      * It's also good for performance. Most commonly tuples within a page are
     481              :      * stored at decreasing offsets (while the items are stored at increasing
     482              :      * offsets). When processing all tuples on a page this leads to reading
     483              :      * memory at decreasing offsets within a page, with a variable stride.
     484              :      * That's hard for CPU prefetchers to deal with. Processing the items in
     485              :      * reverse order (and thus the tuples in increasing order) increases
     486              :      * prefetching efficiency significantly / decreases the number of cache
     487              :      * misses.
     488              :      */
     489       518853 :     for (offnum = maxoff;
     490     26613309 :          offnum >= FirstOffsetNumber;
     491     26094456 :          offnum = OffsetNumberPrev(offnum))
     492              :     {
     493     26094456 :         ItemId      itemid = PageGetItemId(page, offnum);
     494              :         HeapTupleHeader htup;
     495              : 
     496              :         /*
     497              :          * Set the offset number so that we can display it along with any
     498              :          * error that occurred while processing this tuple.
     499              :          */
     500     26094456 :         *off_loc = offnum;
     501              : 
     502     26094456 :         prstate->processed[offnum] = false;
     503     26094456 :         prstate->htsv[offnum] = -1;
     504              : 
     505              :         /* Nothing to do if slot doesn't contain a tuple */
     506     26094456 :         if (!ItemIdIsUsed(itemid))
     507              :         {
     508       335807 :             heap_prune_record_unchanged_lp_unused(page, prstate, offnum);
     509       335807 :             continue;
     510              :         }
     511              : 
     512     25758649 :         if (ItemIdIsDead(itemid))
     513              :         {
     514              :             /*
     515              :              * If the caller set mark_unused_now true, we can set dead line
     516              :              * pointers LP_UNUSED now.
     517              :              */
     518      1014805 :             if (unlikely(prstate->mark_unused_now))
     519         1026 :                 heap_prune_record_unused(prstate, offnum, false);
     520              :             else
     521      1013779 :                 heap_prune_record_unchanged_lp_dead(page, prstate, offnum);
     522      1014805 :             continue;
     523              :         }
     524              : 
     525     24743844 :         if (ItemIdIsRedirected(itemid))
     526              :         {
     527              :             /* This is the start of a HOT chain */
     528       316320 :             prstate->root_items[prstate->nroot_items++] = offnum;
     529       316320 :             continue;
     530              :         }
     531              : 
     532              :         Assert(ItemIdIsNormal(itemid));
     533              : 
     534              :         /*
     535              :          * Get the tuple's visibility status and queue it up for processing.
     536              :          */
     537     24427524 :         htup = (HeapTupleHeader) PageGetItem(page, itemid);
     538     24427524 :         tup.t_data = htup;
     539     24427524 :         tup.t_len = ItemIdGetLength(itemid);
     540     24427524 :         ItemPointerSet(&tup.t_self, blockno, offnum);
     541              : 
     542     24427524 :         prstate->htsv[offnum] = heap_prune_satisfies_vacuum(prstate, &tup,
     543              :                                                             buffer);
     544              : 
     545     24427524 :         if (!HeapTupleHeaderIsHeapOnly(htup))
     546     23986881 :             prstate->root_items[prstate->nroot_items++] = offnum;
     547              :         else
     548       440643 :             prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
     549              :     }
     550              : 
     551              :     /*
     552              :      * Process HOT chains.
     553              :      *
     554              :      * We added the items to the array starting from 'maxoff', so by
     555              :      * processing the array in reverse order, we process the items in
     556              :      * ascending offset number order.  The order doesn't matter for
     557              :      * correctness, but some quick micro-benchmarking suggests that this is
     558              :      * faster.  (Earlier PostgreSQL versions, which scanned all the items on
     559              :      * the page instead of using the root_items array, also did it in
     560              :      * ascending offset number order.)
     561              :      */
     562     24822054 :     for (int i = prstate->nroot_items - 1; i >= 0; i--)
     563              :     {
     564     24303201 :         offnum = prstate->root_items[i];
     565              : 
     566              :         /* Ignore items already processed as part of an earlier chain */
     567     24303201 :         if (prstate->processed[offnum])
     568            0 :             continue;
     569              : 
     570              :         /* see preceding loop */
     571     24303201 :         *off_loc = offnum;
     572              : 
     573              :         /* Process this item or chain of items */
     574     24303201 :         heap_prune_chain(page, blockno, maxoff, offnum, prstate);
     575              :     }
     576              : 
     577              :     /*
     578              :      * Process any heap-only tuples that were not already processed as part of
     579              :      * a HOT chain.
     580              :      */
     581       959496 :     for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
     582              :     {
     583       440643 :         offnum = prstate->heaponly_items[i];
     584              : 
     585       440643 :         if (prstate->processed[offnum])
     586       426780 :             continue;
     587              : 
     588              :         /* see preceding loop */
     589        13863 :         *off_loc = offnum;
     590              : 
     591              :         /*
     592              :          * If the tuple is DEAD and doesn't chain to anything else, mark it
     593              :          * unused.  (If it does chain, we can only remove it as part of
     594              :          * pruning its chain.)
     595              :          *
     596              :          * We need this primarily to handle aborted HOT updates, that is,
     597              :          * XMIN_INVALID heap-only tuples.  Those might not be linked to by any
     598              :          * chain, since the parent tuple might be re-updated before any
     599              :          * pruning occurs.  So we have to be able to reap them separately from
     600              :          * chain-pruning.  (Note that HeapTupleHeaderIsHotUpdated will never
     601              :          * return true for an XMIN_INVALID tuple, so this code will work even
     602              :          * when there were sequential updates within the aborted transaction.)
     603              :          */
     604        13863 :         if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
     605              :         {
     606         2242 :             ItemId      itemid = PageGetItemId(page, offnum);
     607         2242 :             HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
     608              : 
     609         2242 :             if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
     610              :             {
     611         2242 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
     612              :                                                       &prstate->latest_xid_removed);
     613         2242 :                 heap_prune_record_unused(prstate, offnum, true);
     614              :             }
     615              :             else
     616              :             {
     617              :                 /*
     618              :                  * This tuple should've been processed and removed as part of
     619              :                  * a HOT chain, so something's wrong.  To preserve evidence,
     620              :                  * we don't dare to remove it.  We cannot leave behind a DEAD
     621              :                  * tuple either, because that will cause VACUUM to error out.
     622              :                  * Throwing an error with a distinct error message seems like
     623              :                  * the least bad option.
     624              :                  */
     625            0 :                 elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
     626              :                      blockno, offnum);
     627              :             }
     628              :         }
     629              :         else
     630        11621 :             heap_prune_record_unchanged_lp_normal(page, prstate, offnum);
     631              :     }
     632              : 
     633              :     /* We should now have processed every tuple exactly once  */
     634              : #ifdef USE_ASSERT_CHECKING
     635              :     for (offnum = FirstOffsetNumber;
     636              :          offnum <= maxoff;
     637              :          offnum = OffsetNumberNext(offnum))
     638              :     {
     639              :         *off_loc = offnum;
     640              : 
     641              :         Assert(prstate->processed[offnum]);
     642              :     }
     643              : #endif
     644              : 
     645              :     /* Clear the offset information once we have processed the given page. */
     646       518853 :     *off_loc = InvalidOffsetNumber;
     647       518853 : }
     648              : 
     649              : /*
     650              :  * Decide whether to proceed with freezing according to the freeze plans
     651              :  * prepared for the given heap buffer. If freezing is chosen, this function
     652              :  * performs several pre-freeze checks.
     653              :  *
     654              :  * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
     655              :  * determined before calling this function.
     656              :  *
     657              :  * prstate is both an input and output parameter.
     658              :  *
     659              :  * Returns true if we should apply the freeze plans and freeze tuples on the
     660              :  * page, and false otherwise.
     661              :  */
     662              : static bool
     663       518853 : heap_page_will_freeze(Relation relation, Buffer buffer,
     664              :                       bool did_tuple_hint_fpi,
     665              :                       bool do_prune,
     666              :                       bool do_hint_prune,
     667              :                       PruneState *prstate)
     668              : {
     669       518853 :     bool        do_freeze = false;
     670              : 
     671              :     /*
     672              :      * If the caller specified we should not attempt to freeze any tuples,
     673              :      * validate that everything is in the right state and return.
     674              :      */
     675       518853 :     if (!prstate->attempt_freeze)
     676              :     {
     677              :         Assert(!prstate->all_frozen && prstate->nfrozen == 0);
     678              :         Assert(prstate->lpdead_items == 0 || !prstate->all_visible);
     679        42724 :         return false;
     680              :     }
     681              : 
     682       476129 :     if (prstate->pagefrz.freeze_required)
     683              :     {
     684              :         /*
     685              :          * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
     686              :          * before FreezeLimit/MultiXactCutoff is present.  Must freeze to
     687              :          * advance relfrozenxid/relminmxid.
     688              :          */
     689        21341 :         do_freeze = true;
     690              :     }
     691              :     else
     692              :     {
     693              :         /*
     694              :          * Opportunistically freeze the page if we are generating an FPI
     695              :          * anyway and if doing so means that we can set the page all-frozen
     696              :          * afterwards (might not happen until VACUUM's final heap pass).
     697              :          *
     698              :          * XXX: Previously, we knew if pruning emitted an FPI by checking
     699              :          * pgWalUsage.wal_fpi before and after pruning.  Once the freeze and
     700              :          * prune records were combined, this heuristic couldn't be used
     701              :          * anymore.  The opportunistic freeze heuristic must be improved;
     702              :          * however, for now, try to approximate the old logic.
     703              :          */
     704       454788 :         if (prstate->all_frozen && prstate->nfrozen > 0)
     705              :         {
     706              :             Assert(prstate->all_visible);
     707              : 
     708              :             /*
     709              :              * Freezing would make the page all-frozen.  Have already emitted
     710              :              * an FPI or will do so anyway?
     711              :              */
     712        15343 :             if (RelationNeedsWAL(relation))
     713              :             {
     714        13688 :                 if (did_tuple_hint_fpi)
     715         1036 :                     do_freeze = true;
     716        12652 :                 else if (do_prune)
     717              :                 {
     718         1426 :                     if (XLogCheckBufferNeedsBackup(buffer))
     719          494 :                         do_freeze = true;
     720              :                 }
     721        11226 :                 else if (do_hint_prune)
     722              :                 {
     723            5 :                     if (XLogHintBitIsNeeded() && XLogCheckBufferNeedsBackup(buffer))
     724            2 :                         do_freeze = true;
     725              :                 }
     726              :             }
     727              :         }
     728              :     }
     729              : 
     730       476129 :     if (do_freeze)
     731              :     {
     732              :         /*
     733              :          * Validate the tuples we will be freezing before entering the
     734              :          * critical section.
     735              :          */
     736        22873 :         heap_pre_freeze_checks(buffer, prstate->frozen, prstate->nfrozen);
     737              : 
     738              :         /*
     739              :          * Calculate what the snapshot conflict horizon should be for a record
     740              :          * freezing tuples. We can use the visibility_cutoff_xid as our cutoff
     741              :          * for conflicts when the whole page is eligible to become all-frozen
     742              :          * in the VM once we're done with it. Otherwise, we generate a
     743              :          * conservative cutoff by stepping back from OldestXmin.
     744              :          */
     745        22873 :         if (prstate->all_frozen)
     746        20561 :             prstate->frz_conflict_horizon = prstate->visibility_cutoff_xid;
     747              :         else
     748              :         {
     749              :             /* Avoids false conflicts when hot_standby_feedback in use */
     750         2312 :             prstate->frz_conflict_horizon = prstate->cutoffs->OldestXmin;
     751         2312 :             TransactionIdRetreat(prstate->frz_conflict_horizon);
     752              :         }
     753              :     }
     754       453256 :     else if (prstate->nfrozen > 0)
     755              :     {
     756              :         /*
     757              :          * The page contained some tuples that were not already frozen, and we
     758              :          * chose not to freeze them now.  The page won't be all-frozen then.
     759              :          */
     760              :         Assert(!prstate->pagefrz.freeze_required);
     761              : 
     762        14175 :         prstate->all_frozen = false;
     763        14175 :         prstate->nfrozen = 0;    /* avoid miscounts in instrumentation */
     764              :     }
     765              :     else
     766              :     {
     767              :         /*
     768              :          * We have no freeze plans to execute.  The page might already be
     769              :          * all-frozen (perhaps only following pruning), though.  Such pages
     770              :          * can be marked all-frozen in the VM by our caller, even though none
     771              :          * of its tuples were newly frozen here.
     772              :          */
     773              :     }
     774              : 
     775       476129 :     return do_freeze;
     776              : }
     777              : 
     778              : 
     779              : /*
     780              :  * Prune and repair fragmentation and potentially freeze tuples on the
     781              :  * specified page.
     782              :  *
     783              :  * Caller must have pin and buffer cleanup lock on the page.  Note that we
     784              :  * don't update the FSM information for page on caller's behalf.  Caller might
     785              :  * also need to account for a reduction in the length of the line pointer
     786              :  * array following array truncation by us.
     787              :  *
     788              :  * params contains the input parameters used to control freezing and pruning
     789              :  * behavior. See the definition of PruneFreezeParams for more on what each
     790              :  * parameter does.
     791              :  *
     792              :  * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
     793              :  * tuples if it's required in order to advance relfrozenxid / relminmxid, or
     794              :  * if it's considered advantageous for overall system performance to do so
     795              :  * now.  The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
     796              :  * 'new_relmin_mxid' arguments are required when freezing.  When
     797              :  * HEAP_PAGE_PRUNE_FREEZE option is passed, we also set presult->all_visible
     798              :  * and presult->all_frozen after determining whether or not to
     799              :  * opportunistically freeze, to indicate if the VM bits can be set.  They are
     800              :  * always set to false when the HEAP_PAGE_PRUNE_FREEZE option is not passed,
     801              :  * because at the moment only callers that also freeze need that information.
     802              :  *
     803              :  * presult contains output parameters needed by callers, such as the number of
     804              :  * tuples removed and the offsets of dead items on the page after pruning.
     805              :  * heap_page_prune_and_freeze() is responsible for initializing it.  Required
     806              :  * by all callers.
     807              :  *
     808              :  * off_loc is the offset location required by the caller to use in error
     809              :  * callback.
     810              :  *
     811              :  * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
     812              :  * HEAP_PAGE_PRUNE_FREEZE option is set in params.  On entry, they contain the
     813              :  * oldest XID and multi-XID seen on the relation so far.  They will be updated
     814              :  * with the oldest values present on the page after pruning.  After processing
     815              :  * the whole relation, VACUUM can use these values as the new
     816              :  * relfrozenxid/relminmxid for the relation.
     817              :  */
     818              : void
     819       518853 : heap_page_prune_and_freeze(PruneFreezeParams *params,
     820              :                            PruneFreezeResult *presult,
     821              :                            OffsetNumber *off_loc,
     822              :                            TransactionId *new_relfrozen_xid,
     823              :                            MultiXactId *new_relmin_mxid)
     824              : {
     825       518853 :     Buffer      buffer = params->buffer;
     826       518853 :     Page        page = BufferGetPage(buffer);
     827              :     PruneState  prstate;
     828              :     bool        do_freeze;
     829              :     bool        do_prune;
     830              :     bool        do_hint_prune;
     831              :     bool        did_tuple_hint_fpi;
     832       518853 :     int64       fpi_before = pgWalUsage.wal_fpi;
     833              : 
     834              :     /* Initialize prstate */
     835       518853 :     prune_freeze_setup(params,
     836              :                        new_relfrozen_xid, new_relmin_mxid,
     837              :                        presult, &prstate);
     838              : 
     839              :     /*
     840              :      * Examine all line pointers and tuple visibility information to determine
     841              :      * which line pointers should change state and which tuples may be frozen.
     842              :      * Prepare queue of state changes to later be executed in a critical
     843              :      * section.
     844              :      */
     845       518853 :     prune_freeze_plan(RelationGetRelid(params->relation),
     846              :                       buffer, &prstate, off_loc);
     847              : 
     848              :     /*
     849              :      * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
     850              :      * checking tuple visibility information in prune_freeze_plan() may have
     851              :      * caused an FPI to be emitted.
     852              :      */
     853       518853 :     did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
     854              : 
     855      1539309 :     do_prune = prstate.nredirected > 0 ||
     856       983567 :         prstate.ndead > 0 ||
     857       464714 :         prstate.nunused > 0;
     858              : 
     859              :     /*
     860              :      * Even if we don't prune anything, if we found a new value for the
     861              :      * pd_prune_xid field or the page was marked full, we will update the hint
     862              :      * bit.
     863              :      */
     864       983365 :     do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
     865       464512 :         PageIsFull(page);
     866              : 
     867              :     /*
     868              :      * Decide if we want to go ahead with freezing according to the freeze
     869              :      * plans we prepared, or not.
     870              :      */
     871       518853 :     do_freeze = heap_page_will_freeze(params->relation, buffer,
     872              :                                       did_tuple_hint_fpi,
     873              :                                       do_prune,
     874              :                                       do_hint_prune,
     875              :                                       &prstate);
     876              : 
     877              :     /*
     878              :      * While scanning the line pointers, we did not clear
     879              :      * all_visible/all_frozen when encountering LP_DEAD items because we
     880              :      * wanted the decision whether or not to freeze the page to be unaffected
     881              :      * by the short-term presence of LP_DEAD items.  These LP_DEAD items are
     882              :      * effectively assumed to be LP_UNUSED items in the making.  It doesn't
     883              :      * matter which vacuum heap pass (initial pass or final pass) ends up
     884              :      * setting the page all-frozen, as long as the ongoing VACUUM does it.
     885              :      *
     886              :      * Now that we finished determining whether or not to freeze the page,
     887              :      * update all_visible and all_frozen so that they reflect the true state
     888              :      * of the page for setting PD_ALL_VISIBLE and VM bits.
     889              :      */
     890       518853 :     if (prstate.lpdead_items > 0)
     891        54783 :         prstate.all_visible = prstate.all_frozen = false;
     892              : 
     893              :     Assert(!prstate.all_frozen || prstate.all_visible);
     894              : 
     895              :     /* Any error while applying the changes is critical */
     896       518853 :     START_CRIT_SECTION();
     897              : 
     898       518853 :     if (do_hint_prune)
     899              :     {
     900              :         /*
     901              :          * Update the page's pd_prune_xid field to either zero, or the lowest
     902              :          * XID of any soon-prunable tuple.
     903              :          */
     904        54427 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     905              : 
     906              :         /*
     907              :          * Also clear the "page is full" flag, since there's no point in
     908              :          * repeating the prune/defrag process until something else happens to
     909              :          * the page.
     910              :          */
     911        54427 :         PageClearFull(page);
     912              : 
     913              :         /*
     914              :          * If that's all we had to do to the page, this is a non-WAL-logged
     915              :          * hint.  If we are going to freeze or prune the page, we will mark
     916              :          * the buffer dirty below.
     917              :          */
     918        54427 :         if (!do_freeze && !do_prune)
     919          194 :             MarkBufferDirtyHint(buffer, true);
     920              :     }
     921              : 
     922       518853 :     if (do_prune || do_freeze)
     923              :     {
     924              :         /* Apply the planned item changes and repair page fragmentation. */
     925        75498 :         if (do_prune)
     926              :         {
     927        54433 :             heap_page_prune_execute(buffer, false,
     928              :                                     prstate.redirected, prstate.nredirected,
     929              :                                     prstate.nowdead, prstate.ndead,
     930              :                                     prstate.nowunused, prstate.nunused);
     931              :         }
     932              : 
     933        75498 :         if (do_freeze)
     934        22873 :             heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
     935              : 
     936        75498 :         MarkBufferDirty(buffer);
     937              : 
     938              :         /*
     939              :          * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
     940              :          */
     941        75498 :         if (RelationNeedsWAL(params->relation))
     942              :         {
     943              :             /*
     944              :              * The snapshotConflictHorizon for the whole record should be the
     945              :              * most conservative of all the horizons calculated for any of the
     946              :              * possible modifications.  If this record will prune tuples, any
     947              :              * transactions on the standby older than the youngest xmax of the
     948              :              * most recently removed tuple this record will prune will
     949              :              * conflict.  If this record will freeze tuples, any transactions
     950              :              * on the standby with xids older than the youngest tuple this
     951              :              * record will freeze will conflict.
     952              :              */
     953              :             TransactionId conflict_xid;
     954              : 
     955        74620 :             if (TransactionIdFollows(prstate.frz_conflict_horizon,
     956              :                                      prstate.latest_xid_removed))
     957        21449 :                 conflict_xid = prstate.frz_conflict_horizon;
     958              :             else
     959        53171 :                 conflict_xid = prstate.latest_xid_removed;
     960              : 
     961        74620 :             log_heap_prune_and_freeze(params->relation, buffer,
     962              :                                       InvalidBuffer,    /* vmbuffer */
     963              :                                       0,    /* vmflags */
     964              :                                       conflict_xid,
     965              :                                       true, params->reason,
     966              :                                       prstate.frozen, prstate.nfrozen,
     967              :                                       prstate.redirected, prstate.nredirected,
     968              :                                       prstate.nowdead, prstate.ndead,
     969              :                                       prstate.nowunused, prstate.nunused);
     970              :         }
     971              :     }
     972              : 
     973       518853 :     END_CRIT_SECTION();
     974              : 
     975              :     /* Copy information back for caller */
     976       518853 :     presult->ndeleted = prstate.ndeleted;
     977       518853 :     presult->nnewlpdead = prstate.ndead;
     978       518853 :     presult->nfrozen = prstate.nfrozen;
     979       518853 :     presult->live_tuples = prstate.live_tuples;
     980       518853 :     presult->recently_dead_tuples = prstate.recently_dead_tuples;
     981       518853 :     presult->all_visible = prstate.all_visible;
     982       518853 :     presult->all_frozen = prstate.all_frozen;
     983       518853 :     presult->hastup = prstate.hastup;
     984              : 
     985              :     /*
     986              :      * For callers planning to update the visibility map, the conflict horizon
     987              :      * for that record must be the newest xmin on the page.  However, if the
     988              :      * page is completely frozen, there can be no conflict and the
     989              :      * vm_conflict_horizon should remain InvalidTransactionId.  This includes
     990              :      * the case that we just froze all the tuples; the prune-freeze record
     991              :      * included the conflict XID already so the caller doesn't need it.
     992              :      */
     993       518853 :     if (presult->all_frozen)
     994       210483 :         presult->vm_conflict_horizon = InvalidTransactionId;
     995              :     else
     996       308370 :         presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
     997              : 
     998       518853 :     presult->lpdead_items = prstate.lpdead_items;
     999              :     /* the presult->deadoffsets array was already filled in */
    1000              : 
    1001       518853 :     if (prstate.attempt_freeze)
    1002              :     {
    1003       476129 :         if (presult->nfrozen > 0)
    1004              :         {
    1005        22873 :             *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
    1006        22873 :             *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
    1007              :         }
    1008              :         else
    1009              :         {
    1010       453256 :             *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
    1011       453256 :             *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
    1012              :         }
    1013              :     }
    1014       518853 : }
    1015              : 
    1016              : 
    1017              : /*
    1018              :  * Perform visibility checks for heap pruning.
    1019              :  */
    1020              : static HTSV_Result
    1021     24427524 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
    1022              : {
    1023              :     HTSV_Result res;
    1024              :     TransactionId dead_after;
    1025              : 
    1026     24427524 :     res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
    1027              : 
    1028     24427524 :     if (res != HEAPTUPLE_RECENTLY_DEAD)
    1029     22728650 :         return res;
    1030              : 
    1031              :     /*
    1032              :      * For VACUUM, we must be sure to prune tuples with xmax older than
    1033              :      * OldestXmin -- a visibility cutoff determined at the beginning of
    1034              :      * vacuuming the relation. OldestXmin is used for freezing determination
    1035              :      * and we cannot freeze dead tuples' xmaxes.
    1036              :      */
    1037      1698874 :     if (prstate->cutoffs &&
    1038       919578 :         TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
    1039       919578 :         NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
    1040       670647 :         return HEAPTUPLE_DEAD;
    1041              : 
    1042              :     /*
    1043              :      * Determine whether or not the tuple is considered dead when compared
    1044              :      * with the provided GlobalVisState. On-access pruning does not provide
    1045              :      * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
    1046              :      * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
    1047              :      * if the GlobalVisState has been updated since the beginning of vacuuming
    1048              :      * the relation.
    1049              :      */
    1050      1028227 :     if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
    1051       743983 :         return HEAPTUPLE_DEAD;
    1052              : 
    1053       284244 :     return res;
    1054              : }
    1055              : 
    1056              : 
    1057              : /*
    1058              :  * Pruning calculates tuple visibility once and saves the results in an array
    1059              :  * of int8.  See PruneState.htsv for details.  This helper function is meant
    1060              :  * to guard against examining visibility status array members which have not
    1061              :  * yet been computed.
    1062              :  */
    1063              : static inline HTSV_Result
    1064     24413661 : htsv_get_valid_status(int status)
    1065              : {
    1066              :     Assert(status >= HEAPTUPLE_DEAD &&
    1067              :            status <= HEAPTUPLE_DELETE_IN_PROGRESS);
    1068     24413661 :     return (HTSV_Result) status;
    1069              : }
    1070              : 
    1071              : /*
    1072              :  * Prune specified line pointer or a HOT chain originating at line pointer.
    1073              :  *
    1074              :  * Tuple visibility information is provided in prstate->htsv.
    1075              :  *
    1076              :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
    1077              :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
    1078              :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
    1079              :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
    1080              :  * DEAD, our visibility test is just too coarse to detect it.
    1081              :  *
    1082              :  * Pruning must never leave behind a DEAD tuple that still has tuple storage.
    1083              :  * VACUUM isn't prepared to deal with that case.
    1084              :  *
    1085              :  * The root line pointer is redirected to the tuple immediately after the
    1086              :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
    1087              :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
    1088              :  * tuple, which we treat as a chain of length 1.)
    1089              :  *
    1090              :  * We don't actually change the page here. We just add entries to the arrays in
    1091              :  * prstate showing the changes to be made.  Items to be redirected are added
    1092              :  * to the redirected[] array (two entries per redirection); items to be set to
    1093              :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
    1094              :  * state are added to nowunused[].  We perform bookkeeping of live tuples,
    1095              :  * visibility etc. based on what the page will look like after the changes
    1096              :  * applied.  All that bookkeeping is performed in the heap_prune_record_*()
    1097              :  * subroutines.  The division of labor is that heap_prune_chain() decides the
    1098              :  * fate of each tuple, ie. whether it's going to be removed, redirected or
    1099              :  * left unchanged, and the heap_prune_record_*() subroutines update PruneState
    1100              :  * based on that outcome.
    1101              :  */
    1102              : static void
    1103     24303201 : heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
    1104              :                  OffsetNumber rootoffnum, PruneState *prstate)
    1105              : {
    1106     24303201 :     TransactionId priorXmax = InvalidTransactionId;
    1107              :     ItemId      rootlp;
    1108              :     OffsetNumber offnum;
    1109              :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
    1110              : 
    1111              :     /*
    1112              :      * After traversing the HOT chain, ndeadchain is the index in chainitems
    1113              :      * of the first live successor after the last dead item.
    1114              :      */
    1115     24303201 :     int         ndeadchain = 0,
    1116     24303201 :                 nchain = 0;
    1117              : 
    1118     24303201 :     rootlp = PageGetItemId(page, rootoffnum);
    1119              : 
    1120              :     /* Start from the root tuple */
    1121     24303201 :     offnum = rootoffnum;
    1122              : 
    1123              :     /* while not end of the chain */
    1124              :     for (;;)
    1125       426780 :     {
    1126              :         HeapTupleHeader htup;
    1127              :         ItemId      lp;
    1128              : 
    1129              :         /* Sanity check (pure paranoia) */
    1130     24729981 :         if (offnum < FirstOffsetNumber)
    1131            0 :             break;
    1132              : 
    1133              :         /*
    1134              :          * An offset past the end of page's line pointer array is possible
    1135              :          * when the array was truncated (original item must have been unused)
    1136              :          */
    1137     24729981 :         if (offnum > maxoff)
    1138            0 :             break;
    1139              : 
    1140              :         /* If item is already processed, stop --- it must not be same chain */
    1141     24729981 :         if (prstate->processed[offnum])
    1142            0 :             break;
    1143              : 
    1144     24729981 :         lp = PageGetItemId(page, offnum);
    1145              : 
    1146              :         /*
    1147              :          * Unused item obviously isn't part of the chain. Likewise, a dead
    1148              :          * line pointer can't be part of the chain.  Both of those cases were
    1149              :          * already marked as processed.
    1150              :          */
    1151              :         Assert(ItemIdIsUsed(lp));
    1152              :         Assert(!ItemIdIsDead(lp));
    1153              : 
    1154              :         /*
    1155              :          * If we are looking at the redirected root line pointer, jump to the
    1156              :          * first normal tuple in the chain.  If we find a redirect somewhere
    1157              :          * else, stop --- it must not be same chain.
    1158              :          */
    1159     24729981 :         if (ItemIdIsRedirected(lp))
    1160              :         {
    1161       316320 :             if (nchain > 0)
    1162            0 :                 break;          /* not at start of chain */
    1163       316320 :             chainitems[nchain++] = offnum;
    1164       316320 :             offnum = ItemIdGetRedirect(rootlp);
    1165       316320 :             continue;
    1166              :         }
    1167              : 
    1168              :         Assert(ItemIdIsNormal(lp));
    1169              : 
    1170     24413661 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1171              : 
    1172              :         /*
    1173              :          * Check the tuple XMIN against prior XMAX, if any
    1174              :          */
    1175     24524121 :         if (TransactionIdIsValid(priorXmax) &&
    1176       110460 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
    1177            0 :             break;
    1178              : 
    1179              :         /*
    1180              :          * OK, this tuple is indeed a member of the chain.
    1181              :          */
    1182     24413661 :         chainitems[nchain++] = offnum;
    1183              : 
    1184     24413661 :         switch (htsv_get_valid_status(prstate->htsv[offnum]))
    1185              :         {
    1186      1457333 :             case HEAPTUPLE_DEAD:
    1187              : 
    1188              :                 /* Remember the last DEAD tuple seen */
    1189      1457333 :                 ndeadchain = nchain;
    1190      1457333 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
    1191              :                                                       &prstate->latest_xid_removed);
    1192              :                 /* Advance to next chain member */
    1193      1457333 :                 break;
    1194              : 
    1195       284244 :             case HEAPTUPLE_RECENTLY_DEAD:
    1196              : 
    1197              :                 /*
    1198              :                  * We don't need to advance the conflict horizon for
    1199              :                  * RECENTLY_DEAD tuples, even if we are removing them.  This
    1200              :                  * is because we only remove RECENTLY_DEAD tuples if they
    1201              :                  * precede a DEAD tuple, and the DEAD tuple must have been
    1202              :                  * inserted by a newer transaction than the RECENTLY_DEAD
    1203              :                  * tuple by virtue of being later in the chain.  We will have
    1204              :                  * advanced the conflict horizon for the DEAD tuple.
    1205              :                  */
    1206              : 
    1207              :                 /*
    1208              :                  * Advance past RECENTLY_DEAD tuples just in case there's a
    1209              :                  * DEAD one after them.  We have to make sure that we don't
    1210              :                  * miss any DEAD tuples, since DEAD tuples that still have
    1211              :                  * tuple storage after pruning will confuse VACUUM.
    1212              :                  */
    1213       284244 :                 break;
    1214              : 
    1215     22672084 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1216              :             case HEAPTUPLE_LIVE:
    1217              :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1218     22672084 :                 goto process_chain;
    1219              : 
    1220            0 :             default:
    1221            0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1222              :                 goto process_chain;
    1223              :         }
    1224              : 
    1225              :         /*
    1226              :          * If the tuple is not HOT-updated, then we are at the end of this
    1227              :          * HOT-update chain.
    1228              :          */
    1229      1741577 :         if (!HeapTupleHeaderIsHotUpdated(htup))
    1230      1631117 :             goto process_chain;
    1231              : 
    1232              :         /* HOT implies it can't have moved to different partition */
    1233              :         Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    1234              : 
    1235              :         /*
    1236              :          * Advance to next chain member.
    1237              :          */
    1238              :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blockno);
    1239       110460 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1240       110460 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1241              :     }
    1242              : 
    1243            0 :     if (ItemIdIsRedirected(rootlp) && nchain < 2)
    1244              :     {
    1245              :         /*
    1246              :          * We found a redirect item that doesn't point to a valid follow-on
    1247              :          * item.  This can happen if the loop in heap_page_prune_and_freeze()
    1248              :          * caused us to visit the dead successor of a redirect item before
    1249              :          * visiting the redirect item.  We can clean up by setting the
    1250              :          * redirect item to LP_DEAD state or LP_UNUSED if the caller
    1251              :          * indicated.
    1252              :          */
    1253            0 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
    1254            0 :         return;
    1255              :     }
    1256              : 
    1257            0 : process_chain:
    1258              : 
    1259     24303201 :     if (ndeadchain == 0)
    1260              :     {
    1261              :         /*
    1262              :          * No DEAD tuple was found, so the chain is entirely composed of
    1263              :          * normal, unchanged tuples.  Leave it alone.
    1264              :          */
    1265     22882409 :         int         i = 0;
    1266              : 
    1267     22882409 :         if (ItemIdIsRedirected(rootlp))
    1268              :         {
    1269       298581 :             heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
    1270       298581 :             i++;
    1271              :         }
    1272     45769013 :         for (; i < nchain; i++)
    1273     22886604 :             heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
    1274              :     }
    1275      1420792 :     else if (ndeadchain == nchain)
    1276              :     {
    1277              :         /*
    1278              :          * The entire chain is dead.  Mark the root line pointer LP_DEAD, and
    1279              :          * fully remove the other tuples in the chain.
    1280              :          */
    1281      1353706 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
    1282      1388908 :         for (int i = 1; i < nchain; i++)
    1283        35202 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1284              :     }
    1285              :     else
    1286              :     {
    1287              :         /*
    1288              :          * We found a DEAD tuple in the chain.  Redirect the root line pointer
    1289              :          * to the first non-DEAD tuple, and mark as unused each intermediate
    1290              :          * item that we are able to remove from the chain.
    1291              :          */
    1292        67086 :         heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
    1293        67086 :                                    ItemIdIsNormal(rootlp));
    1294        86164 :         for (int i = 1; i < ndeadchain; i++)
    1295        19078 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1296              : 
    1297              :         /* the rest of tuples in the chain are normal, unchanged tuples */
    1298       136810 :         for (int i = ndeadchain; i < nchain; i++)
    1299        69724 :             heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
    1300              :     }
    1301              : }
    1302              : 
    1303              : /* Record lowest soon-prunable XID */
    1304              : static void
    1305      6119315 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
    1306              : {
    1307              :     /*
    1308              :      * This should exactly match the PageSetPrunable macro.  We can't store
    1309              :      * directly into the page header yet, so we update working state.
    1310              :      */
    1311              :     Assert(TransactionIdIsNormal(xid));
    1312     11997552 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
    1313      5878237 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
    1314       242291 :         prstate->new_prune_xid = xid;
    1315      6119315 : }
    1316              : 
    1317              : /* Record line pointer to be redirected */
    1318              : static void
    1319        67086 : heap_prune_record_redirect(PruneState *prstate,
    1320              :                            OffsetNumber offnum, OffsetNumber rdoffnum,
    1321              :                            bool was_normal)
    1322              : {
    1323              :     Assert(!prstate->processed[offnum]);
    1324        67086 :     prstate->processed[offnum] = true;
    1325              : 
    1326              :     /*
    1327              :      * Do not mark the redirect target here.  It needs to be counted
    1328              :      * separately as an unchanged tuple.
    1329              :      */
    1330              : 
    1331              :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
    1332        67086 :     prstate->redirected[prstate->nredirected * 2] = offnum;
    1333        67086 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
    1334              : 
    1335        67086 :     prstate->nredirected++;
    1336              : 
    1337              :     /*
    1338              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1339              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1340              :      * count.
    1341              :      */
    1342        67086 :     if (was_normal)
    1343        59222 :         prstate->ndeleted++;
    1344              : 
    1345        67086 :     prstate->hastup = true;
    1346        67086 : }
    1347              : 
    1348              : /* Record line pointer to be marked dead */
    1349              : static void
    1350      1329658 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
    1351              :                        bool was_normal)
    1352              : {
    1353              :     Assert(!prstate->processed[offnum]);
    1354      1329658 :     prstate->processed[offnum] = true;
    1355              : 
    1356              :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
    1357      1329658 :     prstate->nowdead[prstate->ndead] = offnum;
    1358      1329658 :     prstate->ndead++;
    1359              : 
    1360              :     /*
    1361              :      * Deliberately delay unsetting all_visible and all_frozen until later
    1362              :      * during pruning. Removable dead tuples shouldn't preclude freezing the
    1363              :      * page.
    1364              :      */
    1365              : 
    1366              :     /* Record the dead offset for vacuum */
    1367      1329658 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1368              : 
    1369              :     /*
    1370              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1371              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1372              :      * count.
    1373              :      */
    1374      1329658 :     if (was_normal)
    1375      1319783 :         prstate->ndeleted++;
    1376      1329658 : }
    1377              : 
    1378              : /*
    1379              :  * Depending on whether or not the caller set mark_unused_now to true, record that a
    1380              :  * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
    1381              :  * which we will mark line pointers LP_UNUSED, but we will not mark line
    1382              :  * pointers LP_DEAD if mark_unused_now is true.
    1383              :  */
    1384              : static void
    1385      1353706 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
    1386              :                                  bool was_normal)
    1387              : {
    1388              :     /*
    1389              :      * If the caller set mark_unused_now to true, we can remove dead tuples
    1390              :      * during pruning instead of marking their line pointers dead. Set this
    1391              :      * tuple's line pointer LP_UNUSED. We hint that this option is less
    1392              :      * likely.
    1393              :      */
    1394      1353706 :     if (unlikely(prstate->mark_unused_now))
    1395        24048 :         heap_prune_record_unused(prstate, offnum, was_normal);
    1396              :     else
    1397      1329658 :         heap_prune_record_dead(prstate, offnum, was_normal);
    1398      1353706 : }
    1399              : 
    1400              : /* Record line pointer to be marked unused */
    1401              : static void
    1402        81596 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
    1403              : {
    1404              :     Assert(!prstate->processed[offnum]);
    1405        81596 :     prstate->processed[offnum] = true;
    1406              : 
    1407              :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
    1408        81596 :     prstate->nowunused[prstate->nunused] = offnum;
    1409        81596 :     prstate->nunused++;
    1410              : 
    1411              :     /*
    1412              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1413              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1414              :      * count.
    1415              :      */
    1416        81596 :     if (was_normal)
    1417        80570 :         prstate->ndeleted++;
    1418        81596 : }
    1419              : 
    1420              : /*
    1421              :  * Record an unused line pointer that is left unchanged.
    1422              :  */
    1423              : static void
    1424       335807 : heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
    1425              : {
    1426              :     Assert(!prstate->processed[offnum]);
    1427       335807 :     prstate->processed[offnum] = true;
    1428       335807 : }
    1429              : 
    1430              : /*
    1431              :  * Record line pointer that is left unchanged.  We consider freezing it, and
    1432              :  * update bookkeeping of tuple counts and page visibility.
    1433              :  */
    1434              : static void
    1435     22967949 : heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
    1436              : {
    1437              :     HeapTupleHeader htup;
    1438              : 
    1439              :     Assert(!prstate->processed[offnum]);
    1440     22967949 :     prstate->processed[offnum] = true;
    1441              : 
    1442     22967949 :     prstate->hastup = true;      /* the page is not empty */
    1443              : 
    1444              :     /*
    1445              :      * The criteria for counting a tuple as live in this block need to match
    1446              :      * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
    1447              :      * ANALYZE may produce wildly different reltuples values, e.g. when there
    1448              :      * are many recently-dead tuples.
    1449              :      *
    1450              :      * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
    1451              :      * can't run inside a transaction block, which makes some cases impossible
    1452              :      * (e.g. in-progress insert from the same transaction).
    1453              :      *
    1454              :      * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
    1455              :      * subroutines.  They don't count dead items like acquire_sample_rows()
    1456              :      * does, because we assume that all dead items will become LP_UNUSED
    1457              :      * before VACUUM finishes.  This difference is only superficial.  VACUUM
    1458              :      * effectively agrees with ANALYZE about DEAD items, in the end.  VACUUM
    1459              :      * won't remember LP_DEAD items, but only because they're not supposed to
    1460              :      * be left behind when it is done. (Cases where we bypass index vacuuming
    1461              :      * will violate this optimistic assumption, but the overall impact of that
    1462              :      * should be negligible.)
    1463              :      */
    1464     22967949 :     htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
    1465              : 
    1466     22967949 :     switch (prstate->htsv[offnum])
    1467              :     {
    1468     16785239 :         case HEAPTUPLE_LIVE:
    1469              : 
    1470              :             /*
    1471              :              * Count it as live.  Not only is this natural, but it's also what
    1472              :              * acquire_sample_rows() does.
    1473              :              */
    1474     16785239 :             prstate->live_tuples++;
    1475              : 
    1476              :             /*
    1477              :              * Is the tuple definitely visible to all transactions?
    1478              :              *
    1479              :              * NB: Like with per-tuple hint bits, we can't set the
    1480              :              * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
    1481              :              * See SetHintBits for more info.  Check that the tuple is hinted
    1482              :              * xmin-committed because of that.
    1483              :              */
    1484     16785239 :             if (prstate->all_visible)
    1485              :             {
    1486              :                 TransactionId xmin;
    1487              : 
    1488     12800655 :                 if (!HeapTupleHeaderXminCommitted(htup))
    1489              :                 {
    1490          195 :                     prstate->all_visible = false;
    1491          195 :                     prstate->all_frozen = false;
    1492          195 :                     break;
    1493              :                 }
    1494              : 
    1495              :                 /*
    1496              :                  * The inserter definitely committed.  But is it old enough
    1497              :                  * that everyone sees it as committed?  A FrozenTransactionId
    1498              :                  * is seen as committed to everyone.  Otherwise, we check if
    1499              :                  * there is a snapshot that considers this xid to still be
    1500              :                  * running, and if so, we don't consider the page all-visible.
    1501              :                  */
    1502     12800460 :                 xmin = HeapTupleHeaderGetXmin(htup);
    1503              : 
    1504              :                 /*
    1505              :                  * For now always use prstate->cutoffs for this test, because
    1506              :                  * we only update 'all_visible' and 'all_frozen' when freezing
    1507              :                  * is requested. We could use GlobalVisTestIsRemovableXid
    1508              :                  * instead, if a non-freezing caller wanted to set the VM bit.
    1509              :                  */
    1510              :                 Assert(prstate->cutoffs);
    1511     12800460 :                 if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
    1512              :                 {
    1513         2839 :                     prstate->all_visible = false;
    1514         2839 :                     prstate->all_frozen = false;
    1515         2839 :                     break;
    1516              :                 }
    1517              : 
    1518              :                 /* Track newest xmin on page. */
    1519     12797621 :                 if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) &&
    1520              :                     TransactionIdIsNormal(xmin))
    1521       113016 :                     prstate->visibility_cutoff_xid = xmin;
    1522              :             }
    1523     16782205 :             break;
    1524              : 
    1525       284244 :         case HEAPTUPLE_RECENTLY_DEAD:
    1526       284244 :             prstate->recently_dead_tuples++;
    1527       284244 :             prstate->all_visible = false;
    1528       284244 :             prstate->all_frozen = false;
    1529              : 
    1530              :             /*
    1531              :              * This tuple will soon become DEAD.  Update the hint field so
    1532              :              * that the page is reconsidered for pruning in future.
    1533              :              */
    1534       284244 :             heap_prune_record_prunable(prstate,
    1535              :                                        HeapTupleHeaderGetUpdateXid(htup));
    1536       284244 :             break;
    1537              : 
    1538        63395 :         case HEAPTUPLE_INSERT_IN_PROGRESS:
    1539              : 
    1540              :             /*
    1541              :              * We do not count these rows as live, because we expect the
    1542              :              * inserting transaction to update the counters at commit, and we
    1543              :              * assume that will happen only after we report our results.  This
    1544              :              * assumption is a bit shaky, but it is what acquire_sample_rows()
    1545              :              * does, so be consistent.
    1546              :              */
    1547        63395 :             prstate->all_visible = false;
    1548        63395 :             prstate->all_frozen = false;
    1549              : 
    1550              :             /*
    1551              :              * If we wanted to optimize for aborts, we might consider marking
    1552              :              * the page prunable when we see INSERT_IN_PROGRESS.  But we
    1553              :              * don't.  See related decisions about when to mark the page
    1554              :              * prunable in heapam.c.
    1555              :              */
    1556        63395 :             break;
    1557              : 
    1558      5835071 :         case HEAPTUPLE_DELETE_IN_PROGRESS:
    1559              : 
    1560              :             /*
    1561              :              * This an expected case during concurrent vacuum.  Count such
    1562              :              * rows as live.  As above, we assume the deleting transaction
    1563              :              * will commit and update the counters after we report.
    1564              :              */
    1565      5835071 :             prstate->live_tuples++;
    1566      5835071 :             prstate->all_visible = false;
    1567      5835071 :             prstate->all_frozen = false;
    1568              : 
    1569              :             /*
    1570              :              * This tuple may soon become DEAD.  Update the hint field so that
    1571              :              * the page is reconsidered for pruning in future.
    1572              :              */
    1573      5835071 :             heap_prune_record_prunable(prstate,
    1574              :                                        HeapTupleHeaderGetUpdateXid(htup));
    1575      5835071 :             break;
    1576              : 
    1577            0 :         default:
    1578              : 
    1579              :             /*
    1580              :              * DEAD tuples should've been passed to heap_prune_record_dead()
    1581              :              * or heap_prune_record_unused() instead.
    1582              :              */
    1583            0 :             elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
    1584              :                  prstate->htsv[offnum]);
    1585              :             break;
    1586              :     }
    1587              : 
    1588              :     /* Consider freezing any normal tuples which will not be removed */
    1589     22967949 :     if (prstate->attempt_freeze)
    1590              :     {
    1591              :         bool        totally_frozen;
    1592              : 
    1593     21450921 :         if ((heap_prepare_freeze_tuple(htup,
    1594     21450921 :                                        prstate->cutoffs,
    1595              :                                        &prstate->pagefrz,
    1596     21450921 :                                        &prstate->frozen[prstate->nfrozen],
    1597              :                                        &totally_frozen)))
    1598              :         {
    1599              :             /* Save prepared freeze plan for later */
    1600      2180663 :             prstate->frozen[prstate->nfrozen++].offset = offnum;
    1601              :         }
    1602              : 
    1603              :         /*
    1604              :          * If any tuple isn't either totally frozen already or eligible to
    1605              :          * become totally frozen (according to its freeze plan), then the page
    1606              :          * definitely cannot be set all-frozen in the visibility map later on.
    1607              :          */
    1608     21450921 :         if (!totally_frozen)
    1609      6402463 :             prstate->all_frozen = false;
    1610              :     }
    1611     22967949 : }
    1612              : 
    1613              : 
    1614              : /*
    1615              :  * Record line pointer that was already LP_DEAD and is left unchanged.
    1616              :  */
    1617              : static void
    1618      1013779 : heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
    1619              : {
    1620              :     Assert(!prstate->processed[offnum]);
    1621      1013779 :     prstate->processed[offnum] = true;
    1622              : 
    1623              :     /*
    1624              :      * Deliberately don't set hastup for LP_DEAD items.  We make the soft
    1625              :      * assumption that any LP_DEAD items encountered here will become
    1626              :      * LP_UNUSED later on, before count_nondeletable_pages is reached.  If we
    1627              :      * don't make this assumption then rel truncation will only happen every
    1628              :      * other VACUUM, at most.  Besides, VACUUM must treat
    1629              :      * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
    1630              :      * handled (handled here, or handled later on).
    1631              :      *
    1632              :      * Similarly, don't unset all_visible and all_frozen until later, at the
    1633              :      * end of heap_page_prune_and_freeze().  This will allow us to attempt to
    1634              :      * freeze the page after pruning.  As long as we unset it before updating
    1635              :      * the visibility map, this will be correct.
    1636              :      */
    1637              : 
    1638              :     /* Record the dead offset for vacuum */
    1639      1013779 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1640      1013779 : }
    1641              : 
    1642              : /*
    1643              :  * Record LP_REDIRECT that is left unchanged.
    1644              :  */
    1645              : static void
    1646       298581 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
    1647              : {
    1648              :     /*
    1649              :      * A redirect line pointer doesn't count as a live tuple.
    1650              :      *
    1651              :      * If we leave a redirect line pointer in place, there will be another
    1652              :      * tuple on the page that it points to.  We will do the bookkeeping for
    1653              :      * that separately.  So we have nothing to do here, except remember that
    1654              :      * we processed this item.
    1655              :      */
    1656              :     Assert(!prstate->processed[offnum]);
    1657       298581 :     prstate->processed[offnum] = true;
    1658       298581 : }
    1659              : 
    1660              : /*
    1661              :  * Perform the actual page changes needed by heap_page_prune_and_freeze().
    1662              :  *
    1663              :  * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
    1664              :  * as unused, not redirecting or removing anything else.  The
    1665              :  * PageRepairFragmentation() call is skipped in that case.
    1666              :  *
    1667              :  * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
    1668              :  * the buffer.  If it is set, an ordinary exclusive lock suffices.
    1669              :  */
    1670              : void
    1671        64743 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
    1672              :                         OffsetNumber *redirected, int nredirected,
    1673              :                         OffsetNumber *nowdead, int ndead,
    1674              :                         OffsetNumber *nowunused, int nunused)
    1675              : {
    1676        64743 :     Page        page = BufferGetPage(buffer);
    1677              :     OffsetNumber *offnum;
    1678              :     HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
    1679              : 
    1680              :     /* Shouldn't be called unless there's something to do */
    1681              :     Assert(nredirected > 0 || ndead > 0 || nunused > 0);
    1682              : 
    1683              :     /* If 'lp_truncate_only', we can only remove already-dead line pointers */
    1684              :     Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
    1685              : 
    1686              :     /* Update all redirected line pointers */
    1687        64743 :     offnum = redirected;
    1688       150951 :     for (int i = 0; i < nredirected; i++)
    1689              :     {
    1690        86208 :         OffsetNumber fromoff = *offnum++;
    1691        86208 :         OffsetNumber tooff = *offnum++;
    1692        86208 :         ItemId      fromlp = PageGetItemId(page, fromoff);
    1693              :         ItemId      tolp PG_USED_FOR_ASSERTS_ONLY;
    1694              : 
    1695              : #ifdef USE_ASSERT_CHECKING
    1696              : 
    1697              :         /*
    1698              :          * Any existing item that we set as an LP_REDIRECT (any 'from' item)
    1699              :          * must be the first item from a HOT chain.  If the item has tuple
    1700              :          * storage then it can't be a heap-only tuple.  Otherwise we are just
    1701              :          * maintaining an existing LP_REDIRECT from an existing HOT chain that
    1702              :          * has been pruned at least once before now.
    1703              :          */
    1704              :         if (!ItemIdIsRedirected(fromlp))
    1705              :         {
    1706              :             Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
    1707              : 
    1708              :             htup = (HeapTupleHeader) PageGetItem(page, fromlp);
    1709              :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    1710              :         }
    1711              :         else
    1712              :         {
    1713              :             /* We shouldn't need to redundantly set the redirect */
    1714              :             Assert(ItemIdGetRedirect(fromlp) != tooff);
    1715              :         }
    1716              : 
    1717              :         /*
    1718              :          * The item that we're about to set as an LP_REDIRECT (the 'from'
    1719              :          * item) will point to an existing item (the 'to' item) that is
    1720              :          * already a heap-only tuple.  There can be at most one LP_REDIRECT
    1721              :          * item per HOT chain.
    1722              :          *
    1723              :          * We need to keep around an LP_REDIRECT item (after original
    1724              :          * non-heap-only root tuple gets pruned away) so that it's always
    1725              :          * possible for VACUUM to easily figure out what TID to delete from
    1726              :          * indexes when an entire HOT chain becomes dead.  A heap-only tuple
    1727              :          * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
    1728              :          * tuple can.
    1729              :          *
    1730              :          * This check may miss problems, e.g. the target of a redirect could
    1731              :          * be marked as unused subsequently. The page_verify_redirects() check
    1732              :          * below will catch such problems.
    1733              :          */
    1734              :         tolp = PageGetItemId(page, tooff);
    1735              :         Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
    1736              :         htup = (HeapTupleHeader) PageGetItem(page, tolp);
    1737              :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    1738              : #endif
    1739              : 
    1740        86208 :         ItemIdSetRedirect(fromlp, tooff);
    1741              :     }
    1742              : 
    1743              :     /* Update all now-dead line pointers */
    1744        64743 :     offnum = nowdead;
    1745      1669423 :     for (int i = 0; i < ndead; i++)
    1746              :     {
    1747      1604680 :         OffsetNumber off = *offnum++;
    1748      1604680 :         ItemId      lp = PageGetItemId(page, off);
    1749              : 
    1750              : #ifdef USE_ASSERT_CHECKING
    1751              : 
    1752              :         /*
    1753              :          * An LP_DEAD line pointer must be left behind when the original item
    1754              :          * (which is dead to everybody) could still be referenced by a TID in
    1755              :          * an index.  This should never be necessary with any individual
    1756              :          * heap-only tuple item, though. (It's not clear how much of a problem
    1757              :          * that would be, but there is no reason to allow it.)
    1758              :          */
    1759              :         if (ItemIdHasStorage(lp))
    1760              :         {
    1761              :             Assert(ItemIdIsNormal(lp));
    1762              :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1763              :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    1764              :         }
    1765              :         else
    1766              :         {
    1767              :             /* Whole HOT chain becomes dead */
    1768              :             Assert(ItemIdIsRedirected(lp));
    1769              :         }
    1770              : #endif
    1771              : 
    1772      1604680 :         ItemIdSetDead(lp);
    1773              :     }
    1774              : 
    1775              :     /* Update all now-unused line pointers */
    1776        64743 :     offnum = nowunused;
    1777       369611 :     for (int i = 0; i < nunused; i++)
    1778              :     {
    1779       304868 :         OffsetNumber off = *offnum++;
    1780       304868 :         ItemId      lp = PageGetItemId(page, off);
    1781              : 
    1782              : #ifdef USE_ASSERT_CHECKING
    1783              : 
    1784              :         if (lp_truncate_only)
    1785              :         {
    1786              :             /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
    1787              :             Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
    1788              :         }
    1789              :         else
    1790              :         {
    1791              :             /*
    1792              :              * When heap_page_prune_and_freeze() was called, mark_unused_now
    1793              :              * may have been passed as true, which allows would-be LP_DEAD
    1794              :              * items to be made LP_UNUSED instead.  This is only possible if
    1795              :              * the relation has no indexes.  If there are any dead items, then
    1796              :              * mark_unused_now was not true and every item being marked
    1797              :              * LP_UNUSED must refer to a heap-only tuple.
    1798              :              */
    1799              :             if (ndead > 0)
    1800              :             {
    1801              :                 Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
    1802              :                 htup = (HeapTupleHeader) PageGetItem(page, lp);
    1803              :                 Assert(HeapTupleHeaderIsHeapOnly(htup));
    1804              :             }
    1805              :             else
    1806              :                 Assert(ItemIdIsUsed(lp));
    1807              :         }
    1808              : 
    1809              : #endif
    1810              : 
    1811       304868 :         ItemIdSetUnused(lp);
    1812              :     }
    1813              : 
    1814        64743 :     if (lp_truncate_only)
    1815         2473 :         PageTruncateLinePointerArray(page);
    1816              :     else
    1817              :     {
    1818              :         /*
    1819              :          * Finally, repair any fragmentation, and update the page's hint bit
    1820              :          * about whether it has free pointers.
    1821              :          */
    1822        62270 :         PageRepairFragmentation(page);
    1823              : 
    1824              :         /*
    1825              :          * Now that the page has been modified, assert that redirect items
    1826              :          * still point to valid targets.
    1827              :          */
    1828        62270 :         page_verify_redirects(page);
    1829              :     }
    1830        64743 : }
    1831              : 
    1832              : 
    1833              : /*
    1834              :  * If built with assertions, verify that all LP_REDIRECT items point to a
    1835              :  * valid item.
    1836              :  *
    1837              :  * One way that bugs related to HOT pruning show is redirect items pointing to
    1838              :  * removed tuples. It's not trivial to reliably check that marking an item
    1839              :  * unused will not orphan a redirect item during heap_prune_chain() /
    1840              :  * heap_page_prune_execute(), so we additionally check the whole page after
    1841              :  * pruning. Without this check such bugs would typically only cause asserts
    1842              :  * later, potentially well after the corruption has been introduced.
    1843              :  *
    1844              :  * Also check comments in heap_page_prune_execute()'s redirection loop.
    1845              :  */
    1846              : static void
    1847        62270 : page_verify_redirects(Page page)
    1848              : {
    1849              : #ifdef USE_ASSERT_CHECKING
    1850              :     OffsetNumber offnum;
    1851              :     OffsetNumber maxoff;
    1852              : 
    1853              :     maxoff = PageGetMaxOffsetNumber(page);
    1854              :     for (offnum = FirstOffsetNumber;
    1855              :          offnum <= maxoff;
    1856              :          offnum = OffsetNumberNext(offnum))
    1857              :     {
    1858              :         ItemId      itemid = PageGetItemId(page, offnum);
    1859              :         OffsetNumber targoff;
    1860              :         ItemId      targitem;
    1861              :         HeapTupleHeader htup;
    1862              : 
    1863              :         if (!ItemIdIsRedirected(itemid))
    1864              :             continue;
    1865              : 
    1866              :         targoff = ItemIdGetRedirect(itemid);
    1867              :         targitem = PageGetItemId(page, targoff);
    1868              : 
    1869              :         Assert(ItemIdIsUsed(targitem));
    1870              :         Assert(ItemIdIsNormal(targitem));
    1871              :         Assert(ItemIdHasStorage(targitem));
    1872              :         htup = (HeapTupleHeader) PageGetItem(page, targitem);
    1873              :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    1874              :     }
    1875              : #endif
    1876        62270 : }
    1877              : 
    1878              : 
    1879              : /*
    1880              :  * For all items in this page, find their respective root line pointers.
    1881              :  * If item k is part of a HOT-chain with root at item j, then we set
    1882              :  * root_offsets[k - 1] = j.
    1883              :  *
    1884              :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
    1885              :  * Unused entries are filled with InvalidOffsetNumber (zero).
    1886              :  *
    1887              :  * The function must be called with at least share lock on the buffer, to
    1888              :  * prevent concurrent prune operations.
    1889              :  *
    1890              :  * Note: The information collected here is valid only as long as the caller
    1891              :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
    1892              :  * and reused by a completely unrelated tuple.
    1893              :  */
    1894              : void
    1895       112486 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
    1896              : {
    1897              :     OffsetNumber offnum,
    1898              :                 maxoff;
    1899              : 
    1900       112486 :     MemSet(root_offsets, InvalidOffsetNumber,
    1901              :            MaxHeapTuplesPerPage * sizeof(OffsetNumber));
    1902              : 
    1903       112486 :     maxoff = PageGetMaxOffsetNumber(page);
    1904      9091260 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    1905              :     {
    1906      8978774 :         ItemId      lp = PageGetItemId(page, offnum);
    1907              :         HeapTupleHeader htup;
    1908              :         OffsetNumber nextoffnum;
    1909              :         TransactionId priorXmax;
    1910              : 
    1911              :         /* skip unused and dead items */
    1912      8978774 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
    1913        11016 :             continue;
    1914              : 
    1915      8967758 :         if (ItemIdIsNormal(lp))
    1916              :         {
    1917      8963571 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1918              : 
    1919              :             /*
    1920              :              * Check if this tuple is part of a HOT-chain rooted at some other
    1921              :              * tuple. If so, skip it for now; we'll process it when we find
    1922              :              * its root.
    1923              :              */
    1924      8963571 :             if (HeapTupleHeaderIsHeapOnly(htup))
    1925         4493 :                 continue;
    1926              : 
    1927              :             /*
    1928              :              * This is either a plain tuple or the root of a HOT-chain.
    1929              :              * Remember it in the mapping.
    1930              :              */
    1931      8959078 :             root_offsets[offnum - 1] = offnum;
    1932              : 
    1933              :             /* If it's not the start of a HOT-chain, we're done with it */
    1934      8959078 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    1935      8958857 :                 continue;
    1936              : 
    1937              :             /* Set up to scan the HOT-chain */
    1938          221 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1939          221 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1940              :         }
    1941              :         else
    1942              :         {
    1943              :             /* Must be a redirect item. We do not set its root_offsets entry */
    1944              :             Assert(ItemIdIsRedirected(lp));
    1945              :             /* Set up to scan the HOT-chain */
    1946         4187 :             nextoffnum = ItemIdGetRedirect(lp);
    1947         4187 :             priorXmax = InvalidTransactionId;
    1948              :         }
    1949              : 
    1950              :         /*
    1951              :          * Now follow the HOT-chain and collect other tuples in the chain.
    1952              :          *
    1953              :          * Note: Even though this is a nested loop, the complexity of the
    1954              :          * function is O(N) because a tuple in the page should be visited not
    1955              :          * more than twice, once in the outer loop and once in HOT-chain
    1956              :          * chases.
    1957              :          */
    1958              :         for (;;)
    1959              :         {
    1960              :             /* Sanity check (pure paranoia) */
    1961         4490 :             if (offnum < FirstOffsetNumber)
    1962            0 :                 break;
    1963              : 
    1964              :             /*
    1965              :              * An offset past the end of page's line pointer array is possible
    1966              :              * when the array was truncated
    1967              :              */
    1968         4490 :             if (offnum > maxoff)
    1969            0 :                 break;
    1970              : 
    1971         4490 :             lp = PageGetItemId(page, nextoffnum);
    1972              : 
    1973              :             /* Check for broken chains */
    1974         4490 :             if (!ItemIdIsNormal(lp))
    1975            0 :                 break;
    1976              : 
    1977         4490 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    1978              : 
    1979         4793 :             if (TransactionIdIsValid(priorXmax) &&
    1980          303 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
    1981            0 :                 break;
    1982              : 
    1983              :             /* Remember the root line pointer for this item */
    1984         4490 :             root_offsets[nextoffnum - 1] = offnum;
    1985              : 
    1986              :             /* Advance to next chain member, if any */
    1987         4490 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    1988         4408 :                 break;
    1989              : 
    1990              :             /* HOT implies it can't have moved to different partition */
    1991              :             Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    1992              : 
    1993           82 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1994           82 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1995              :         }
    1996              :     }
    1997       112486 : }
    1998              : 
    1999              : 
    2000              : /*
    2001              :  * Compare fields that describe actions required to freeze tuple with caller's
    2002              :  * open plan.  If everything matches then the frz tuple plan is equivalent to
    2003              :  * caller's plan.
    2004              :  */
    2005              : static inline bool
    2006       943969 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2007              : {
    2008       943969 :     if (plan->xmax == frz->xmax &&
    2009       942676 :         plan->t_infomask2 == frz->t_infomask2 &&
    2010       941841 :         plan->t_infomask == frz->t_infomask &&
    2011       939245 :         plan->frzflags == frz->frzflags)
    2012       939245 :         return true;
    2013              : 
    2014              :     /* Caller must call heap_log_freeze_new_plan again for frz */
    2015         4724 :     return false;
    2016              : }
    2017              : 
    2018              : /*
    2019              :  * Comparator used to deduplicate the freeze plans used in WAL records.
    2020              :  */
    2021              : static int
    2022      1332571 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
    2023              : {
    2024      1332571 :     const HeapTupleFreeze *frz1 = arg1;
    2025      1332571 :     const HeapTupleFreeze *frz2 = arg2;
    2026              : 
    2027      1332571 :     if (frz1->xmax < frz2->xmax)
    2028        13114 :         return -1;
    2029      1319457 :     else if (frz1->xmax > frz2->xmax)
    2030        14792 :         return 1;
    2031              : 
    2032      1304665 :     if (frz1->t_infomask2 < frz2->t_infomask2)
    2033         4237 :         return -1;
    2034      1300428 :     else if (frz1->t_infomask2 > frz2->t_infomask2)
    2035         4581 :         return 1;
    2036              : 
    2037      1295847 :     if (frz1->t_infomask < frz2->t_infomask)
    2038        10638 :         return -1;
    2039      1285209 :     else if (frz1->t_infomask > frz2->t_infomask)
    2040        18148 :         return 1;
    2041              : 
    2042      1267061 :     if (frz1->frzflags < frz2->frzflags)
    2043            0 :         return -1;
    2044      1267061 :     else if (frz1->frzflags > frz2->frzflags)
    2045            0 :         return 1;
    2046              : 
    2047              :     /*
    2048              :      * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
    2049              :      * (So the tuples will share a single canonical freeze plan.)
    2050              :      *
    2051              :      * We tiebreak on page offset number to keep each freeze plan's page
    2052              :      * offset number array individually sorted. (Unnecessary, but be tidy.)
    2053              :      */
    2054      1267061 :     if (frz1->offset < frz2->offset)
    2055      1064334 :         return -1;
    2056       202727 :     else if (frz1->offset > frz2->offset)
    2057       202727 :         return 1;
    2058              : 
    2059              :     Assert(false);
    2060            0 :     return 0;
    2061              : }
    2062              : 
    2063              : /*
    2064              :  * Start new plan initialized using tuple-level actions.  At least one tuple
    2065              :  * will have steps required to freeze described by caller's plan during REDO.
    2066              :  */
    2067              : static inline void
    2068        27594 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2069              : {
    2070        27594 :     plan->xmax = frz->xmax;
    2071        27594 :     plan->t_infomask2 = frz->t_infomask2;
    2072        27594 :     plan->t_infomask = frz->t_infomask;
    2073        27594 :     plan->frzflags = frz->frzflags;
    2074        27594 :     plan->ntuples = 1;           /* for now */
    2075        27594 : }
    2076              : 
    2077              : /*
    2078              :  * Deduplicate tuple-based freeze plans so that each distinct set of
    2079              :  * processing steps is only stored once in the WAL record.
    2080              :  * Called during original execution of freezing (for logged relations).
    2081              :  *
    2082              :  * Return value is number of plans set in *plans_out for caller.  Also writes
    2083              :  * an array of offset numbers into *offsets_out output argument for caller
    2084              :  * (actually there is one array per freeze plan, but that's not of immediate
    2085              :  * concern to our caller).
    2086              :  */
    2087              : static int
    2088        22870 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
    2089              :                      xlhp_freeze_plan *plans_out,
    2090              :                      OffsetNumber *offsets_out)
    2091              : {
    2092        22870 :     int         nplans = 0;
    2093              : 
    2094              :     /* Sort tuple-based freeze plans in the order required to deduplicate */
    2095        22870 :     qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
    2096              : 
    2097       989709 :     for (int i = 0; i < ntuples; i++)
    2098              :     {
    2099       966839 :         HeapTupleFreeze *frz = tuples + i;
    2100              : 
    2101       966839 :         if (i == 0)
    2102              :         {
    2103              :             /* New canonical freeze plan starting with first tup */
    2104        22870 :             heap_log_freeze_new_plan(plans_out, frz);
    2105        22870 :             nplans++;
    2106              :         }
    2107       943969 :         else if (heap_log_freeze_eq(plans_out, frz))
    2108              :         {
    2109              :             /* tup matches open canonical plan -- include tup in it */
    2110              :             Assert(offsets_out[i - 1] < frz->offset);
    2111       939245 :             plans_out->ntuples++;
    2112              :         }
    2113              :         else
    2114              :         {
    2115              :             /* Tup doesn't match current plan -- done with it now */
    2116         4724 :             plans_out++;
    2117              : 
    2118              :             /* New canonical freeze plan starting with this tup */
    2119         4724 :             heap_log_freeze_new_plan(plans_out, frz);
    2120         4724 :             nplans++;
    2121              :         }
    2122              : 
    2123              :         /*
    2124              :          * Save page offset number in dedicated buffer in passing.
    2125              :          *
    2126              :          * REDO routine relies on the record's offset numbers array grouping
    2127              :          * offset numbers by freeze plan.  The sort order within each grouping
    2128              :          * is ascending offset number order, just to keep things tidy.
    2129              :          */
    2130       966839 :         offsets_out[i] = frz->offset;
    2131              :     }
    2132              : 
    2133              :     Assert(nplans > 0 && nplans <= ntuples);
    2134              : 
    2135        22870 :     return nplans;
    2136              : }
    2137              : 
    2138              : /*
    2139              :  * Write an XLOG_HEAP2_PRUNE* WAL record
    2140              :  *
    2141              :  * This is used for several different page maintenance operations:
    2142              :  *
    2143              :  * - Page pruning, in VACUUM's 1st pass or on access: Some items are
    2144              :  *   redirected, some marked dead, and some removed altogether.
    2145              :  *
    2146              :  * - Freezing: Items are marked as 'frozen'.
    2147              :  *
    2148              :  * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
    2149              :  *
    2150              :  * They have enough commonalities that we use a single WAL record for them
    2151              :  * all.
    2152              :  *
    2153              :  * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
    2154              :  * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
    2155              :  * replaying 'unused' items depends on whether they were all previously marked
    2156              :  * as dead.
    2157              :  *
    2158              :  * If the VM is being updated, vmflags will contain the bits to set. In this
    2159              :  * case, vmbuffer should already have been updated and marked dirty and should
    2160              :  * still be pinned and locked.
    2161              :  *
    2162              :  * Note: This function scribbles on the 'frozen' array.
    2163              :  *
    2164              :  * Note: This is called in a critical section, so careful what you do here.
    2165              :  */
    2166              : void
    2167        88112 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
    2168              :                           Buffer vmbuffer, uint8 vmflags,
    2169              :                           TransactionId conflict_xid,
    2170              :                           bool cleanup_lock,
    2171              :                           PruneReason reason,
    2172              :                           HeapTupleFreeze *frozen, int nfrozen,
    2173              :                           OffsetNumber *redirected, int nredirected,
    2174              :                           OffsetNumber *dead, int ndead,
    2175              :                           OffsetNumber *unused, int nunused)
    2176              : {
    2177              :     xl_heap_prune xlrec;
    2178              :     XLogRecPtr  recptr;
    2179              :     uint8       info;
    2180              :     uint8       regbuf_flags_heap;
    2181              : 
    2182              :     /* The following local variables hold data registered in the WAL record: */
    2183              :     xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
    2184              :     xlhp_freeze_plans freeze_plans;
    2185              :     xlhp_prune_items redirect_items;
    2186              :     xlhp_prune_items dead_items;
    2187              :     xlhp_prune_items unused_items;
    2188              :     OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
    2189        88112 :     bool        do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
    2190        88112 :     bool        do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
    2191              : 
    2192              :     Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
    2193              : 
    2194        88112 :     xlrec.flags = 0;
    2195        88112 :     regbuf_flags_heap = REGBUF_STANDARD;
    2196              : 
    2197              :     /*
    2198              :      * We can avoid an FPI of the heap page if the only modification we are
    2199              :      * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
    2200              :      * disabled. Note that if we explicitly skip an FPI, we must not stamp the
    2201              :      * heap page with this record's LSN. Recovery skips records <= the stamped
    2202              :      * LSN, so this could lead to skipping an earlier FPI needed to repair a
    2203              :      * torn page.
    2204              :      */
    2205        88112 :     if (!do_prune &&
    2206            0 :         nfrozen == 0 &&
    2207            0 :         (!do_set_vm || !XLogHintBitIsNeeded()))
    2208            0 :         regbuf_flags_heap |= REGBUF_NO_IMAGE;
    2209              : 
    2210              :     /*
    2211              :      * Prepare data for the buffer.  The arrays are not actually in the
    2212              :      * buffer, but we pretend that they are.  When XLogInsert stores a full
    2213              :      * page image, the arrays can be omitted.
    2214              :      */
    2215        88112 :     XLogBeginInsert();
    2216        88112 :     XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
    2217              : 
    2218        88112 :     if (do_set_vm)
    2219        13383 :         XLogRegisterBuffer(1, vmbuffer, 0);
    2220              : 
    2221        88112 :     if (nfrozen > 0)
    2222              :     {
    2223              :         int         nplans;
    2224              : 
    2225        22870 :         xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
    2226              : 
    2227              :         /*
    2228              :          * Prepare deduplicated representation for use in the WAL record. This
    2229              :          * destructively sorts frozen tuples array in-place.
    2230              :          */
    2231        22870 :         nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
    2232              : 
    2233        22870 :         freeze_plans.nplans = nplans;
    2234        22870 :         XLogRegisterBufData(0, &freeze_plans,
    2235              :                             offsetof(xlhp_freeze_plans, plans));
    2236        22870 :         XLogRegisterBufData(0, plans,
    2237              :                             sizeof(xlhp_freeze_plan) * nplans);
    2238              :     }
    2239        88112 :     if (nredirected > 0)
    2240              :     {
    2241        17248 :         xlrec.flags |= XLHP_HAS_REDIRECTIONS;
    2242              : 
    2243        17248 :         redirect_items.ntargets = nredirected;
    2244        17248 :         XLogRegisterBufData(0, &redirect_items,
    2245              :                             offsetof(xlhp_prune_items, data));
    2246        17248 :         XLogRegisterBufData(0, redirected,
    2247              :                             sizeof(OffsetNumber[2]) * nredirected);
    2248              :     }
    2249        88112 :     if (ndead > 0)
    2250              :     {
    2251        40346 :         xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
    2252              : 
    2253        40346 :         dead_items.ntargets = ndead;
    2254        40346 :         XLogRegisterBufData(0, &dead_items,
    2255              :                             offsetof(xlhp_prune_items, data));
    2256        40346 :         XLogRegisterBufData(0, dead,
    2257              :                             sizeof(OffsetNumber) * ndead);
    2258              :     }
    2259        88112 :     if (nunused > 0)
    2260              :     {
    2261        25258 :         xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
    2262              : 
    2263        25258 :         unused_items.ntargets = nunused;
    2264        25258 :         XLogRegisterBufData(0, &unused_items,
    2265              :                             offsetof(xlhp_prune_items, data));
    2266        25258 :         XLogRegisterBufData(0, unused,
    2267              :                             sizeof(OffsetNumber) * nunused);
    2268              :     }
    2269        88112 :     if (nfrozen > 0)
    2270        22870 :         XLogRegisterBufData(0, frz_offsets,
    2271              :                             sizeof(OffsetNumber) * nfrozen);
    2272              : 
    2273              :     /*
    2274              :      * Prepare the main xl_heap_prune record.  We already set the XLHP_HAS_*
    2275              :      * flag above.
    2276              :      */
    2277        88112 :     if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
    2278              :     {
    2279        13383 :         xlrec.flags |= XLHP_VM_ALL_VISIBLE;
    2280        13383 :         if (vmflags & VISIBILITYMAP_ALL_FROZEN)
    2281        10386 :             xlrec.flags |= XLHP_VM_ALL_FROZEN;
    2282              :     }
    2283        88112 :     if (RelationIsAccessibleInLogicalDecoding(relation))
    2284          646 :         xlrec.flags |= XLHP_IS_CATALOG_REL;
    2285        88112 :     if (TransactionIdIsValid(conflict_xid))
    2286        72738 :         xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
    2287        88112 :     if (cleanup_lock)
    2288        74620 :         xlrec.flags |= XLHP_CLEANUP_LOCK;
    2289              :     else
    2290              :     {
    2291              :         Assert(nredirected == 0 && ndead == 0);
    2292              :         /* also, any items in 'unused' must've been LP_DEAD previously */
    2293              :     }
    2294        88112 :     XLogRegisterData(&xlrec, SizeOfHeapPrune);
    2295        88112 :     if (TransactionIdIsValid(conflict_xid))
    2296        72738 :         XLogRegisterData(&conflict_xid, sizeof(TransactionId));
    2297              : 
    2298        88112 :     switch (reason)
    2299              :     {
    2300        42503 :         case PRUNE_ON_ACCESS:
    2301        42503 :             info = XLOG_HEAP2_PRUNE_ON_ACCESS;
    2302        42503 :             break;
    2303        32117 :         case PRUNE_VACUUM_SCAN:
    2304        32117 :             info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
    2305        32117 :             break;
    2306        13492 :         case PRUNE_VACUUM_CLEANUP:
    2307        13492 :             info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
    2308        13492 :             break;
    2309            0 :         default:
    2310            0 :             elog(ERROR, "unrecognized prune reason: %d", (int) reason);
    2311              :             break;
    2312              :     }
    2313        88112 :     recptr = XLogInsert(RM_HEAP2_ID, info);
    2314              : 
    2315        88112 :     if (do_set_vm)
    2316              :     {
    2317              :         Assert(BufferIsDirty(vmbuffer));
    2318        13383 :         PageSetLSN(BufferGetPage(vmbuffer), recptr);
    2319              :     }
    2320              : 
    2321              :     /*
    2322              :      * See comment at the top of the function about regbuf_flags_heap for
    2323              :      * details on when we can advance the page LSN.
    2324              :      */
    2325        88112 :     if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
    2326              :     {
    2327              :         Assert(BufferIsDirty(buffer));
    2328        88112 :         PageSetLSN(BufferGetPage(buffer), recptr);
    2329              :     }
    2330        88112 : }
        

Generated by: LCOV version 2.0-1