LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.7 % 636 577
Test Date: 2026-03-26 09:17:35 Functions: 96.4 % 28 27
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pruneheap.c
       4              :  *    heap page pruning and HOT-chain management code
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/heap/pruneheap.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/heapam.h"
      18              : #include "access/heapam_xlog.h"
      19              : #include "access/htup_details.h"
      20              : #include "access/multixact.h"
      21              : #include "access/transam.h"
      22              : #include "access/visibilitymap.h"
      23              : #include "access/xlog.h"
      24              : #include "access/xloginsert.h"
      25              : #include "commands/vacuum.h"
      26              : #include "executor/instrument.h"
      27              : #include "miscadmin.h"
      28              : #include "pgstat.h"
      29              : #include "storage/bufmgr.h"
      30              : #include "utils/rel.h"
      31              : #include "utils/snapmgr.h"
      32              : 
      33              : /* Working data for heap_page_prune_and_freeze() and subroutines */
      34              : typedef struct
      35              : {
      36              :     /*-------------------------------------------------------
      37              :      * Arguments passed to heap_page_prune_and_freeze()
      38              :      *-------------------------------------------------------
      39              :      */
      40              : 
      41              :     /* tuple visibility test, initialized for the relation */
      42              :     GlobalVisState *vistest;
      43              :     /* whether or not dead items can be set LP_UNUSED during pruning */
      44              :     bool        mark_unused_now;
      45              :     /* whether to attempt freezing tuples */
      46              :     bool        attempt_freeze;
      47              :     struct VacuumCutoffs *cutoffs;
      48              :     Relation    relation;
      49              : 
      50              :     /*
      51              :      * Keep the buffer, block, and page handy so that helpers needing to
      52              :      * access them don't need to make repeated calls to BufferGetBlockNumber()
      53              :      * and BufferGetPage().
      54              :      */
      55              :     BlockNumber block;
      56              :     Buffer      buffer;
      57              :     Page        page;
      58              : 
      59              :     /*-------------------------------------------------------
      60              :      * Fields describing what to do to the page
      61              :      *-------------------------------------------------------
      62              :      */
      63              :     TransactionId new_prune_xid;    /* new prune hint value */
      64              :     TransactionId latest_xid_removed;
      65              :     int         nredirected;    /* numbers of entries in arrays below */
      66              :     int         ndead;
      67              :     int         nunused;
      68              :     int         nfrozen;
      69              :     /* arrays that accumulate indexes of items to be changed */
      70              :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      71              :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      72              :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      73              :     HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
      74              : 
      75              :     /*
      76              :      * set_all_visible and set_all_frozen indicate if the all-visible and
      77              :      * all-frozen bits in the visibility map can be set for this page after
      78              :      * pruning.
      79              :      *
      80              :      * NOTE: set_all_visible and set_all_frozen initially don't include
      81              :      * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
      82              :      * use them to decide whether to opportunistically freeze the page or not.
      83              :      * The set_all_visible and set_all_frozen values ultimately used to set
      84              :      * the VM are adjusted to include LP_DEAD items after we determine whether
      85              :      * or not to opportunistically freeze.
      86              :      */
      87              :     bool        set_all_visible;
      88              :     bool        set_all_frozen;
      89              : 
      90              :     /*-------------------------------------------------------
      91              :      * Working state for HOT chain processing
      92              :      *-------------------------------------------------------
      93              :      */
      94              : 
      95              :     /*
      96              :      * 'root_items' contains offsets of all LP_REDIRECT line pointers and
      97              :      * normal non-HOT tuples.  They can be stand-alone items or the first item
      98              :      * in a HOT chain.  'heaponly_items' contains heap-only tuples which can
      99              :      * only be removed as part of a HOT chain.
     100              :      */
     101              :     int         nroot_items;
     102              :     OffsetNumber root_items[MaxHeapTuplesPerPage];
     103              :     int         nheaponly_items;
     104              :     OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
     105              : 
     106              :     /*
     107              :      * processed[offnum] is true if item at offnum has been processed.
     108              :      *
     109              :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
     110              :      * 1. Otherwise every access would need to subtract 1.
     111              :      */
     112              :     bool        processed[MaxHeapTuplesPerPage + 1];
     113              : 
     114              :     /*
     115              :      * Tuple visibility is only computed once for each tuple, for correctness
     116              :      * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
     117              :      * details.  This is of type int8[], instead of HTSV_Result[], so we can
     118              :      * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
     119              :      * items.
     120              :      *
     121              :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
     122              :      * 1. Otherwise every access would need to subtract 1.
     123              :      */
     124              :     int8        htsv[MaxHeapTuplesPerPage + 1];
     125              : 
     126              :     /*-------------------------------------------------------
     127              :      * Working state for freezing
     128              :      *-------------------------------------------------------
     129              :      */
     130              :     HeapPageFreeze pagefrz;
     131              : 
     132              :     /*-------------------------------------------------------
     133              :      * Working state for visibility map processing
     134              :      *-------------------------------------------------------
     135              :      */
     136              : 
     137              :     /*
     138              :      * Caller must provide a pinned vmbuffer corresponding to the heap block
     139              :      * passed to heap_page_prune_and_freeze(). We will fix any corruption
     140              :      * found in the VM and set the VM if the page is all-visible/all-frozen.
     141              :      */
     142              :     Buffer      vmbuffer;
     143              : 
     144              :     /*
     145              :      * The state of the VM bits at the beginning of pruning and the state they
     146              :      * will be in at the end.
     147              :      */
     148              :     uint8       old_vmbits;
     149              :     uint8       new_vmbits;
     150              : 
     151              :     /* The newest xmin of live tuples on the page */
     152              :     TransactionId newest_live_xid;
     153              : 
     154              :     /*-------------------------------------------------------
     155              :      * Information about what was done
     156              :      *
     157              :      * These fields are not used by pruning itself for the most part, but are
     158              :      * used to collect information about what was pruned and what state the
     159              :      * page is in after pruning, for the benefit of the caller.  They are
     160              :      * copied to the caller's PruneFreezeResult at the end.
     161              :      * -------------------------------------------------------
     162              :      */
     163              : 
     164              :     int         ndeleted;       /* Number of tuples deleted from the page */
     165              : 
     166              :     /* Number of live and recently dead tuples, after pruning */
     167              :     int         live_tuples;
     168              :     int         recently_dead_tuples;
     169              : 
     170              :     /* Whether or not the page makes rel truncation unsafe */
     171              :     bool        hastup;
     172              : 
     173              :     /*
     174              :      * LP_DEAD items on the page after pruning.  Includes existing LP_DEAD
     175              :      * items
     176              :      */
     177              :     int         lpdead_items;   /* number of items in the array */
     178              :     OffsetNumber *deadoffsets;  /* points directly to presult->deadoffsets */
     179              : } PruneState;
     180              : 
     181              : /*
     182              :  * Type of visibility map corruption detected on a heap page and its
     183              :  * associated VM page. Passed to heap_page_fix_vm_corruption() so the caller
     184              :  * can specify what it found rather than having the function rederive the
     185              :  * corruption from page state.
     186              :  */
     187              : typedef enum VMCorruptionType
     188              : {
     189              :     /* VM bits are set but the heap page-level PD_ALL_VISIBLE flag is not */
     190              :     VM_CORRUPT_MISSING_PAGE_HINT,
     191              :     /* LP_DEAD line pointers found on a page marked all-visible */
     192              :     VM_CORRUPT_LPDEAD,
     193              :     /* Tuple not visible to all transactions on a page marked all-visible */
     194              :     VM_CORRUPT_TUPLE_VISIBILITY,
     195              : } VMCorruptionType;
     196              : 
     197              : /* Local functions */
     198              : static void prune_freeze_setup(PruneFreezeParams *params,
     199              :                                TransactionId *new_relfrozen_xid,
     200              :                                MultiXactId *new_relmin_mxid,
     201              :                                PruneFreezeResult *presult,
     202              :                                PruneState *prstate);
     203              : static void heap_page_fix_vm_corruption(PruneState *prstate,
     204              :                                         OffsetNumber offnum,
     205              :                                         VMCorruptionType ctype);
     206              : static void prune_freeze_fast_path(PruneState *prstate,
     207              :                                    PruneFreezeResult *presult);
     208              : static void prune_freeze_plan(PruneState *prstate,
     209              :                               OffsetNumber *off_loc);
     210              : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
     211              :                                                HeapTuple tup);
     212              : static inline HTSV_Result htsv_get_valid_status(int status);
     213              : static void heap_prune_chain(OffsetNumber maxoff,
     214              :                              OffsetNumber rootoffnum, PruneState *prstate);
     215              : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid,
     216              :                                        OffsetNumber offnum);
     217              : static void heap_prune_record_redirect(PruneState *prstate,
     218              :                                        OffsetNumber offnum, OffsetNumber rdoffnum,
     219              :                                        bool was_normal);
     220              : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
     221              :                                    bool was_normal);
     222              : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
     223              :                                              bool was_normal);
     224              : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
     225              : 
     226              : static void heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum);
     227              : static void heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum);
     228              : static void heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum);
     229              : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
     230              : 
     231              : static void page_verify_redirects(Page page);
     232              : 
     233              : static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
     234              :                                   PruneState *prstate);
     235              : static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
     236              : 
     237              : 
     238              : /*
     239              :  * Optionally prune and repair fragmentation in the specified page.
     240              :  *
     241              :  * This is an opportunistic function.  It will perform housekeeping
     242              :  * only if the page heuristically looks like a candidate for pruning and we
     243              :  * can acquire buffer cleanup lock without blocking.
     244              :  *
     245              :  * Note: this is called quite often.  It's important that it fall out quickly
     246              :  * if there's not any use in pruning.
     247              :  *
     248              :  * Caller must have pin on the buffer, and must *not* have a lock on it.
     249              :  *
     250              :  * This function may pin *vmbuffer. It's passed by reference so the caller can
     251              :  * reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
     252              :  * VM corruption during pruning, we will fix it. Caller is responsible for
     253              :  * unpinning *vmbuffer.
     254              :  */
     255              : void
     256     21392803 : heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
     257              : {
     258     21392803 :     Page        page = BufferGetPage(buffer);
     259              :     TransactionId prune_xid;
     260              :     GlobalVisState *vistest;
     261              :     Size        minfree;
     262              : 
     263              :     /*
     264              :      * We can't write WAL in recovery mode, so there's no point trying to
     265              :      * clean the page. The primary will likely issue a cleaning WAL record
     266              :      * soon anyway, so this is no particular loss.
     267              :      */
     268     21392803 :     if (RecoveryInProgress())
     269       240445 :         return;
     270              : 
     271              :     /*
     272              :      * First check whether there's any chance there's something to prune,
     273              :      * determining the appropriate horizon is a waste if there's no prune_xid
     274              :      * (i.e. no updates/deletes left potentially dead tuples around).
     275              :      */
     276     21152358 :     prune_xid = PageGetPruneXid(page);
     277     21152358 :     if (!TransactionIdIsValid(prune_xid))
     278     10879194 :         return;
     279              : 
     280              :     /*
     281              :      * Check whether prune_xid indicates that there may be dead rows that can
     282              :      * be cleaned up.
     283              :      */
     284     10273164 :     vistest = GlobalVisTestFor(relation);
     285              : 
     286     10273164 :     if (!GlobalVisTestIsRemovableXid(vistest, prune_xid, true))
     287      8530369 :         return;
     288              : 
     289              :     /*
     290              :      * We prune when a previous UPDATE failed to find enough space on the page
     291              :      * for a new tuple version, or when free space falls below the relation's
     292              :      * fill-factor target (but not less than 10%).
     293              :      *
     294              :      * Checking free space here is questionable since we aren't holding any
     295              :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     296              :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     297              :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     298              :      * important than sometimes getting a wrong answer in what is after all
     299              :      * just a heuristic estimate.
     300              :      */
     301      1742795 :     minfree = RelationGetTargetPageFreeSpace(relation,
     302              :                                              HEAP_DEFAULT_FILLFACTOR);
     303      1742795 :     minfree = Max(minfree, BLCKSZ / 10);
     304              : 
     305      1742795 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     306              :     {
     307              :         /* OK, try to get exclusive buffer lock */
     308        45480 :         if (!ConditionalLockBufferForCleanup(buffer))
     309          511 :             return;
     310              : 
     311              :         /*
     312              :          * Now that we have buffer lock, get accurate information about the
     313              :          * page's free space, and recheck the heuristic about whether to
     314              :          * prune.
     315              :          */
     316        44969 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     317              :         {
     318              :             OffsetNumber dummy_off_loc;
     319              :             PruneFreezeResult presult;
     320              :             PruneFreezeParams params;
     321              : 
     322        44969 :             visibilitymap_pin(relation, BufferGetBlockNumber(buffer),
     323              :                               vmbuffer);
     324              : 
     325        44969 :             params.relation = relation;
     326        44969 :             params.buffer = buffer;
     327        44969 :             params.vmbuffer = *vmbuffer;
     328        44969 :             params.reason = PRUNE_ON_ACCESS;
     329        44969 :             params.vistest = vistest;
     330        44969 :             params.cutoffs = NULL;
     331              : 
     332              :             /*
     333              :              * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
     334              :              * regardless of whether or not the relation has indexes, since we
     335              :              * cannot safely determine that during on-access pruning with the
     336              :              * current implementation.
     337              :              */
     338        44969 :             params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
     339              : 
     340        44969 :             heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
     341              :                                        NULL, NULL);
     342              : 
     343              :             /*
     344              :              * Report the number of tuples reclaimed to pgstats.  This is
     345              :              * presult.ndeleted minus the number of newly-LP_DEAD-set items.
     346              :              *
     347              :              * We derive the number of dead tuples like this to avoid totally
     348              :              * forgetting about items that were set to LP_DEAD, since they
     349              :              * still need to be cleaned up by VACUUM.  We only want to count
     350              :              * heap-only tuples that just became LP_UNUSED in our report,
     351              :              * which don't.
     352              :              *
     353              :              * VACUUM doesn't have to compensate in the same way when it
     354              :              * tracks ndeleted, since it will set the same LP_DEAD items to
     355              :              * LP_UNUSED separately.
     356              :              */
     357        44969 :             if (presult.ndeleted > presult.nnewlpdead)
     358        20253 :                 pgstat_update_heap_dead_tuples(relation,
     359        20253 :                                                presult.ndeleted - presult.nnewlpdead);
     360              :         }
     361              : 
     362              :         /* And release buffer lock */
     363        44969 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     364              : 
     365              :         /*
     366              :          * We avoid reuse of any free space created on the page by unrelated
     367              :          * UPDATEs/INSERTs by opting to not update the FSM at this point.  The
     368              :          * free space should be reused by UPDATEs to *this* page.
     369              :          */
     370              :     }
     371              : }
     372              : 
     373              : /*
     374              :  * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
     375              :  * the provided parameters.
     376              :  *
     377              :  * params, new_relfrozen_xid, new_relmin_mxid, and presult are input
     378              :  * parameters and are not modified by this function. Only prstate is modified.
     379              :  */
     380              : static void
     381       492708 : prune_freeze_setup(PruneFreezeParams *params,
     382              :                    TransactionId *new_relfrozen_xid,
     383              :                    MultiXactId *new_relmin_mxid,
     384              :                    PruneFreezeResult *presult,
     385              :                    PruneState *prstate)
     386              : {
     387              :     /* Copy parameters to prstate */
     388       492708 :     prstate->vistest = params->vistest;
     389       492708 :     prstate->mark_unused_now =
     390       492708 :         (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
     391              : 
     392              :     /* cutoffs must be provided if we will attempt freezing */
     393              :     Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
     394       492708 :     prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
     395       492708 :     prstate->cutoffs = params->cutoffs;
     396       492708 :     prstate->relation = params->relation;
     397       492708 :     prstate->block = BufferGetBlockNumber(params->buffer);
     398       492708 :     prstate->buffer = params->buffer;
     399       492708 :     prstate->page = BufferGetPage(params->buffer);
     400              : 
     401              :     Assert(BufferIsValid(params->vmbuffer));
     402       492708 :     prstate->vmbuffer = params->vmbuffer;
     403       492708 :     prstate->new_vmbits = 0;
     404       492708 :     prstate->old_vmbits = visibilitymap_get_status(prstate->relation,
     405              :                                                    prstate->block,
     406              :                                                    &prstate->vmbuffer);
     407              : 
     408              :     /*
     409              :      * Our strategy is to scan the page and make lists of items to change,
     410              :      * then apply the changes within a critical section.  This keeps as much
     411              :      * logic as possible out of the critical section, and also ensures that
     412              :      * WAL replay will work the same as the normal case.
     413              :      *
     414              :      * First, initialize the new pd_prune_xid value to zero (indicating no
     415              :      * prunable tuples).  If we find any tuples which may soon become
     416              :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     417              :      * initialize the rest of our working state.
     418              :      */
     419       492708 :     prstate->new_prune_xid = InvalidTransactionId;
     420       492708 :     prstate->latest_xid_removed = InvalidTransactionId;
     421       492708 :     prstate->nredirected = prstate->ndead = prstate->nunused = 0;
     422       492708 :     prstate->nfrozen = 0;
     423       492708 :     prstate->nroot_items = 0;
     424       492708 :     prstate->nheaponly_items = 0;
     425              : 
     426              :     /* initialize page freezing working state */
     427       492708 :     prstate->pagefrz.freeze_required = false;
     428       492708 :     prstate->pagefrz.FreezePageConflictXid = InvalidTransactionId;
     429       492708 :     if (prstate->attempt_freeze)
     430              :     {
     431              :         Assert(new_relfrozen_xid && new_relmin_mxid);
     432       447739 :         prstate->pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
     433       447739 :         prstate->pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
     434       447739 :         prstate->pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
     435       447739 :         prstate->pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
     436              :     }
     437              :     else
     438              :     {
     439              :         Assert(!new_relfrozen_xid && !new_relmin_mxid);
     440        44969 :         prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
     441        44969 :         prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
     442        44969 :         prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
     443        44969 :         prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
     444              :     }
     445              : 
     446       492708 :     prstate->ndeleted = 0;
     447       492708 :     prstate->live_tuples = 0;
     448       492708 :     prstate->recently_dead_tuples = 0;
     449       492708 :     prstate->hastup = false;
     450       492708 :     prstate->lpdead_items = 0;
     451              : 
     452              :     /*
     453              :      * deadoffsets are filled in during pruning but are only used to populate
     454              :      * PruneFreezeResult->deadoffsets. To avoid needing two copies of the
     455              :      * array, just save a pointer to the result offsets array in the
     456              :      * PruneState.
     457              :      */
     458       492708 :     prstate->deadoffsets = presult->deadoffsets;
     459              : 
     460              :     /*
     461              :      * We track whether the page will be all-visible/all-frozen at the end of
     462              :      * pruning and freezing. While examining tuple visibility, we'll set
     463              :      * set_all_visible to false if there are tuples on the page not visible to
     464              :      * all running and future transactions. set_all_visible is always
     465              :      * maintained but only VACUUM will set the VM if the page ends up being
     466              :      * all-visible.
     467              :      *
     468              :      * We also keep track of the newest live XID, which is used to calculate
     469              :      * the snapshot conflict horizon for a WAL record setting the VM.
     470              :      */
     471       492708 :     prstate->set_all_visible = true;
     472       492708 :     prstate->newest_live_xid = InvalidTransactionId;
     473              : 
     474              :     /*
     475              :      * Currently, only VACUUM performs freezing, but other callers may in the
     476              :      * future. We must initialize set_all_frozen based on whether or not the
     477              :      * caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
     478              :      * call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
     479              :      * will never be cleared for tuples that need freezing. This would lead to
     480              :      * incorrectly setting the visibility map all-frozen for this page.
     481              :      *
     482              :      * When freezing is not required (no XIDs/MXIDs older than the freeze
     483              :      * cutoff), we may still choose to "opportunistically" freeze if doing so
     484              :      * would make the page all-frozen.
     485              :      *
     486              :      * We will not be able to freeze the whole page at the end of vacuum if
     487              :      * there are tuples present that are not visible to everyone or if there
     488              :      * are dead tuples which will not be removable. However, dead tuples that
     489              :      * will be removed by the end of vacuum should not prevent this
     490              :      * opportunistic freezing.
     491              :      *
     492              :      * Therefore, we do not clear set_all_visible and set_all_frozen when we
     493              :      * encounter LP_DEAD items. Instead, we correct them after deciding
     494              :      * whether to freeze, but before updating the VM, to avoid setting the VM
     495              :      * bits incorrectly.
     496              :      */
     497       492708 :     prstate->set_all_frozen = prstate->attempt_freeze;
     498       492708 : }
     499              : 
     500              : /*
     501              :  * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
     502              :  * page, examines its visibility information, and determines the appropriate
     503              :  * action for each tuple. All tuples are processed and classified during this
     504              :  * phase, but no modifications are made to the page until the later execution
     505              :  * stage.
     506              :  *
     507              :  * *off_loc is used for error callback and cleared before returning.
     508              :  */
     509              : static void
     510       308838 : prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
     511              : {
     512       308838 :     Page        page = prstate->page;
     513       308838 :     BlockNumber blockno = prstate->block;
     514       308838 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(prstate->page);
     515              :     OffsetNumber offnum;
     516              :     HeapTupleData tup;
     517              : 
     518       308838 :     tup.t_tableOid = RelationGetRelid(prstate->relation);
     519              : 
     520              :     /*
     521              :      * Determine HTSV for all tuples, and queue them up for processing as HOT
     522              :      * chain roots or as heap-only items.
     523              :      *
     524              :      * Determining HTSV only once for each tuple is required for correctness,
     525              :      * to deal with cases where running HTSV twice could result in different
     526              :      * results.  For example, RECENTLY_DEAD can turn to DEAD if another
     527              :      * checked item causes GlobalVisTestIsRemovableFullXid() to update the
     528              :      * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
     529              :      * transaction aborts.
     530              :      *
     531              :      * It's also good for performance. Most commonly tuples within a page are
     532              :      * stored at decreasing offsets (while the items are stored at increasing
     533              :      * offsets). When processing all tuples on a page this leads to reading
     534              :      * memory at decreasing offsets within a page, with a variable stride.
     535              :      * That's hard for CPU prefetchers to deal with. Processing the items in
     536              :      * reverse order (and thus the tuples in increasing order) increases
     537              :      * prefetching efficiency significantly / decreases the number of cache
     538              :      * misses.
     539              :      */
     540       308838 :     for (offnum = maxoff;
     541     16301542 :          offnum >= FirstOffsetNumber;
     542     15992704 :          offnum = OffsetNumberPrev(offnum))
     543              :     {
     544     15992704 :         ItemId      itemid = PageGetItemId(page, offnum);
     545              :         HeapTupleHeader htup;
     546              : 
     547              :         /*
     548              :          * Set the offset number so that we can display it along with any
     549              :          * error that occurred while processing this tuple.
     550              :          */
     551     15992704 :         *off_loc = offnum;
     552              : 
     553     15992704 :         prstate->processed[offnum] = false;
     554     15992704 :         prstate->htsv[offnum] = -1;
     555              : 
     556              :         /* Nothing to do if slot doesn't contain a tuple */
     557     15992704 :         if (!ItemIdIsUsed(itemid))
     558              :         {
     559        94618 :             heap_prune_record_unchanged_lp_unused(prstate, offnum);
     560        94618 :             continue;
     561              :         }
     562              : 
     563     15898086 :         if (ItemIdIsDead(itemid))
     564              :         {
     565              :             /*
     566              :              * If the caller set mark_unused_now true, we can set dead line
     567              :              * pointers LP_UNUSED now.
     568              :              */
     569      1294951 :             if (unlikely(prstate->mark_unused_now))
     570         1991 :                 heap_prune_record_unused(prstate, offnum, false);
     571              :             else
     572      1292960 :                 heap_prune_record_unchanged_lp_dead(prstate, offnum);
     573      1294951 :             continue;
     574              :         }
     575              : 
     576     14603135 :         if (ItemIdIsRedirected(itemid))
     577              :         {
     578              :             /* This is the start of a HOT chain */
     579       173027 :             prstate->root_items[prstate->nroot_items++] = offnum;
     580       173027 :             continue;
     581              :         }
     582              : 
     583              :         Assert(ItemIdIsNormal(itemid));
     584              : 
     585              :         /*
     586              :          * Get the tuple's visibility status and queue it up for processing.
     587              :          */
     588     14430108 :         htup = (HeapTupleHeader) PageGetItem(page, itemid);
     589     14430108 :         tup.t_data = htup;
     590     14430108 :         tup.t_len = ItemIdGetLength(itemid);
     591     14430108 :         ItemPointerSet(&tup.t_self, blockno, offnum);
     592              : 
     593     14430108 :         prstate->htsv[offnum] = heap_prune_satisfies_vacuum(prstate, &tup);
     594              : 
     595     14430108 :         if (!HeapTupleHeaderIsHeapOnly(htup))
     596     14118157 :             prstate->root_items[prstate->nroot_items++] = offnum;
     597              :         else
     598       311951 :             prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
     599              :     }
     600              : 
     601              :     /*
     602              :      * Process HOT chains.
     603              :      *
     604              :      * We added the items to the array starting from 'maxoff', so by
     605              :      * processing the array in reverse order, we process the items in
     606              :      * ascending offset number order.  The order doesn't matter for
     607              :      * correctness, but some quick micro-benchmarking suggests that this is
     608              :      * faster.  (Earlier PostgreSQL versions, which scanned all the items on
     609              :      * the page instead of using the root_items array, also did it in
     610              :      * ascending offset number order.)
     611              :      */
     612     14600022 :     for (int i = prstate->nroot_items - 1; i >= 0; i--)
     613              :     {
     614     14291184 :         offnum = prstate->root_items[i];
     615              : 
     616              :         /* Ignore items already processed as part of an earlier chain */
     617     14291184 :         if (prstate->processed[offnum])
     618            0 :             continue;
     619              : 
     620              :         /* see preceding loop */
     621     14291184 :         *off_loc = offnum;
     622              : 
     623              :         /* Process this item or chain of items */
     624     14291184 :         heap_prune_chain(maxoff, offnum, prstate);
     625              :     }
     626              : 
     627              :     /*
     628              :      * Process any heap-only tuples that were not already processed as part of
     629              :      * a HOT chain.
     630              :      */
     631       620789 :     for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
     632              :     {
     633       311951 :         offnum = prstate->heaponly_items[i];
     634              : 
     635       311951 :         if (prstate->processed[offnum])
     636       296117 :             continue;
     637              : 
     638              :         /* see preceding loop */
     639        15834 :         *off_loc = offnum;
     640              : 
     641              :         /*
     642              :          * If the tuple is DEAD and doesn't chain to anything else, mark it
     643              :          * unused.  (If it does chain, we can only remove it as part of
     644              :          * pruning its chain.)
     645              :          *
     646              :          * We need this primarily to handle aborted HOT updates, that is,
     647              :          * XMIN_INVALID heap-only tuples.  Those might not be linked to by any
     648              :          * chain, since the parent tuple might be re-updated before any
     649              :          * pruning occurs.  So we have to be able to reap them separately from
     650              :          * chain-pruning.  (Note that HeapTupleHeaderIsHotUpdated will never
     651              :          * return true for an XMIN_INVALID tuple, so this code will work even
     652              :          * when there were sequential updates within the aborted transaction.)
     653              :          */
     654        15834 :         if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
     655              :         {
     656         3079 :             ItemId      itemid = PageGetItemId(page, offnum);
     657         3079 :             HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
     658              : 
     659         3079 :             if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
     660              :             {
     661         3079 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
     662              :                                                       &prstate->latest_xid_removed);
     663         3079 :                 heap_prune_record_unused(prstate, offnum, true);
     664              :             }
     665              :             else
     666              :             {
     667              :                 /*
     668              :                  * This tuple should've been processed and removed as part of
     669              :                  * a HOT chain, so something's wrong.  To preserve evidence,
     670              :                  * we don't dare to remove it.  We cannot leave behind a DEAD
     671              :                  * tuple either, because that will cause VACUUM to error out.
     672              :                  * Throwing an error with a distinct error message seems like
     673              :                  * the least bad option.
     674              :                  */
     675            0 :                 elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
     676              :                      blockno, offnum);
     677              :             }
     678              :         }
     679              :         else
     680        12755 :             heap_prune_record_unchanged_lp_normal(prstate, offnum);
     681              :     }
     682              : 
     683              :     /* We should now have processed every tuple exactly once  */
     684              : #ifdef USE_ASSERT_CHECKING
     685              :     for (offnum = FirstOffsetNumber;
     686              :          offnum <= maxoff;
     687              :          offnum = OffsetNumberNext(offnum))
     688              :     {
     689              :         *off_loc = offnum;
     690              : 
     691              :         Assert(prstate->processed[offnum]);
     692              :     }
     693              : #endif
     694              : 
     695              :     /* Clear the offset information once we have processed the given page. */
     696       308838 :     *off_loc = InvalidOffsetNumber;
     697       308838 : }
     698              : 
     699              : /*
     700              :  * Decide whether to proceed with freezing according to the freeze plans
     701              :  * prepared for the current heap buffer. If freezing is chosen, this function
     702              :  * performs several pre-freeze checks.
     703              :  *
     704              :  * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
     705              :  * determined before calling this function.
     706              :  *
     707              :  * prstate is both an input and output parameter.
     708              :  *
     709              :  * Returns true if we should apply the freeze plans and freeze tuples on the
     710              :  * page, and false otherwise.
     711              :  */
     712              : static bool
     713       308838 : heap_page_will_freeze(bool did_tuple_hint_fpi,
     714              :                       bool do_prune,
     715              :                       bool do_hint_prune,
     716              :                       PruneState *prstate)
     717              : {
     718       308838 :     bool        do_freeze = false;
     719              : 
     720              :     /*
     721              :      * If the caller specified we should not attempt to freeze any tuples,
     722              :      * validate that everything is in the right state and return.
     723              :      */
     724       308838 :     if (!prstate->attempt_freeze)
     725              :     {
     726              :         Assert(!prstate->set_all_frozen && prstate->nfrozen == 0);
     727        44969 :         return false;
     728              :     }
     729              : 
     730       263869 :     if (prstate->pagefrz.freeze_required)
     731              :     {
     732              :         /*
     733              :          * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
     734              :          * before FreezeLimit/MultiXactCutoff is present.  Must freeze to
     735              :          * advance relfrozenxid/relminmxid.
     736              :          */
     737        22060 :         do_freeze = true;
     738              :     }
     739              :     else
     740              :     {
     741              :         /*
     742              :          * Opportunistically freeze the page if we are generating an FPI
     743              :          * anyway and if doing so means that we can set the page all-frozen
     744              :          * afterwards (might not happen until VACUUM's final heap pass).
     745              :          *
     746              :          * XXX: Previously, we knew if pruning emitted an FPI by checking
     747              :          * pgWalUsage.wal_fpi before and after pruning.  Once the freeze and
     748              :          * prune records were combined, this heuristic couldn't be used
     749              :          * anymore.  The opportunistic freeze heuristic must be improved;
     750              :          * however, for now, try to approximate the old logic.
     751              :          */
     752       241809 :         if (prstate->set_all_frozen && prstate->nfrozen > 0)
     753              :         {
     754              :             Assert(prstate->set_all_visible);
     755              : 
     756              :             /*
     757              :              * Freezing would make the page all-frozen.  Have already emitted
     758              :              * an FPI or will do so anyway?
     759              :              */
     760        20614 :             if (RelationNeedsWAL(prstate->relation))
     761              :             {
     762        18957 :                 if (did_tuple_hint_fpi)
     763         1485 :                     do_freeze = true;
     764        17472 :                 else if (do_prune)
     765              :                 {
     766         1655 :                     if (XLogCheckBufferNeedsBackup(prstate->buffer))
     767          551 :                         do_freeze = true;
     768              :                 }
     769        15817 :                 else if (do_hint_prune)
     770              :                 {
     771           20 :                     if (XLogHintBitIsNeeded() &&
     772           10 :                         XLogCheckBufferNeedsBackup(prstate->buffer))
     773            2 :                         do_freeze = true;
     774              :                 }
     775              :             }
     776              :         }
     777              :     }
     778              : 
     779       263869 :     if (do_freeze)
     780              :     {
     781              :         /*
     782              :          * Validate the tuples we will be freezing before entering the
     783              :          * critical section.
     784              :          */
     785        24098 :         heap_pre_freeze_checks(prstate->buffer, prstate->frozen, prstate->nfrozen);
     786              :         Assert(TransactionIdPrecedes(prstate->pagefrz.FreezePageConflictXid,
     787              :                                      prstate->cutoffs->OldestXmin));
     788              :     }
     789       239771 :     else if (prstate->nfrozen > 0)
     790              :     {
     791              :         /*
     792              :          * The page contained some tuples that were not already frozen, and we
     793              :          * chose not to freeze them now.  The page won't be all-frozen then.
     794              :          */
     795              :         Assert(!prstate->pagefrz.freeze_required);
     796              : 
     797        19129 :         prstate->set_all_frozen = false;
     798        19129 :         prstate->nfrozen = 0;    /* avoid miscounts in instrumentation */
     799              :     }
     800              :     else
     801              :     {
     802              :         /*
     803              :          * We have no freeze plans to execute.  The page might already be
     804              :          * all-frozen (perhaps only following pruning), though.  Such pages
     805              :          * can be marked all-frozen in the VM by our caller, even though none
     806              :          * of its tuples were newly frozen here.
     807              :          */
     808              :     }
     809              : 
     810       263869 :     return do_freeze;
     811              : }
     812              : 
     813              : /*
     814              :  * Emit a warning about and fix visibility map corruption on the given page.
     815              :  *
     816              :  * The caller specifies the type of corruption it has already detected via
     817              :  * corruption_type, so that we can emit the appropriate warning. All cases
     818              :  * result in the VM bits being cleared; corruption types where PD_ALL_VISIBLE
     819              :  * is incorrectly set also clear PD_ALL_VISIBLE.
     820              :  *
     821              :  * Must be called while holding an exclusive lock on the heap buffer. Dead
     822              :  * items and not all-visible tuples must have been discovered under that same
     823              :  * lock. Although we do not hold a lock on the VM buffer, it is pinned, and
     824              :  * the heap buffer is exclusively locked, ensuring that no other backend can
     825              :  * update the VM bits corresponding to this heap page.
     826              :  *
     827              :  * This function makes changes to the VM and, potentially, the heap page, but
     828              :  * it does not need to be done in a critical section.
     829              :  */
     830              : static void
     831            0 : heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
     832              :                             VMCorruptionType corruption_type)
     833              : {
     834            0 :     const char *relname = RelationGetRelationName(prstate->relation);
     835            0 :     bool        do_clear_vm = false;
     836            0 :     bool        do_clear_heap = false;
     837              : 
     838              :     Assert(BufferIsLockedByMeInMode(prstate->buffer, BUFFER_LOCK_EXCLUSIVE));
     839              : 
     840            0 :     switch (corruption_type)
     841              :     {
     842            0 :         case VM_CORRUPT_LPDEAD:
     843            0 :             ereport(WARNING,
     844              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     845              :                      errmsg("dead line pointer found on page marked all-visible"),
     846              :                      errcontext("relation \"%s\", page %u, tuple %u",
     847              :                                 relname, prstate->block, offnum)));
     848            0 :             do_clear_vm = true;
     849            0 :             do_clear_heap = true;
     850            0 :             break;
     851              : 
     852            0 :         case VM_CORRUPT_TUPLE_VISIBILITY:
     853              : 
     854              :             /*
     855              :              * A HEAPTUPLE_LIVE tuple on an all-visible page can appear to not
     856              :              * be visible to everyone when
     857              :              * GetOldestNonRemovableTransactionId() returns a conservative
     858              :              * value that's older than the real safe xmin. That is not
     859              :              * corruption -- the PD_ALL_VISIBLE flag is still correct.
     860              :              *
     861              :              * However, dead tuple versions, in-progress inserts, and
     862              :              * in-progress deletes should never appear on a page marked
     863              :              * all-visible. That indicates real corruption. PD_ALL_VISIBLE
     864              :              * should have been cleared by the DML operation that deleted or
     865              :              * inserted the tuple.
     866              :              */
     867            0 :             ereport(WARNING,
     868              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     869              :                      errmsg("tuple not visible to all transactions found on page marked all-visible"),
     870              :                      errcontext("relation \"%s\", page %u, tuple %u",
     871              :                                 relname, prstate->block, offnum)));
     872            0 :             do_clear_vm = true;
     873            0 :             do_clear_heap = true;
     874            0 :             break;
     875              : 
     876            0 :         case VM_CORRUPT_MISSING_PAGE_HINT:
     877              : 
     878              :             /*
     879              :              * As of PostgreSQL 9.2, the visibility map bit should never be
     880              :              * set if the page-level bit is clear. However, for vacuum, it's
     881              :              * possible that the bit got cleared after
     882              :              * heap_vac_scan_next_block() was called, so we must recheck now
     883              :              * that we have the buffer lock before concluding that the VM is
     884              :              * corrupt.
     885              :              */
     886              :             Assert(!PageIsAllVisible(prstate->page));
     887              :             Assert(prstate->old_vmbits & VISIBILITYMAP_VALID_BITS);
     888            0 :             ereport(WARNING,
     889              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     890              :                      errmsg("page is not marked all-visible but visibility map bit is set"),
     891              :                      errcontext("relation \"%s\", page %u",
     892              :                                 relname, prstate->block)));
     893            0 :             do_clear_vm = true;
     894            0 :             break;
     895              :     }
     896              : 
     897              :     Assert(do_clear_heap || do_clear_vm);
     898              : 
     899              :     /* Avoid marking the buffer dirty if PD_ALL_VISIBLE is already clear */
     900            0 :     if (do_clear_heap)
     901              :     {
     902              :         Assert(PageIsAllVisible(prstate->page));
     903            0 :         PageClearAllVisible(prstate->page);
     904            0 :         MarkBufferDirtyHint(prstate->buffer, true);
     905              :     }
     906              : 
     907            0 :     if (do_clear_vm)
     908              :     {
     909            0 :         visibilitymap_clear(prstate->relation, prstate->block,
     910              :                             prstate->vmbuffer,
     911              :                             VISIBILITYMAP_VALID_BITS);
     912            0 :         prstate->old_vmbits = 0;
     913              :     }
     914            0 : }
     915              : 
     916              : /*
     917              :  * Decide whether to set the visibility map bits (all-visible and all-frozen)
     918              :  * for the current page using information from the PruneState and VM.
     919              :  *
     920              :  * This function does not actually set the VM bits or page-level visibility
     921              :  * hint, PD_ALL_VISIBLE.
     922              :  *
     923              :  * Returns true if one or both VM bits should be set and false otherwise.
     924              :  */
     925              : static bool
     926       308838 : heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
     927              : {
     928              :     /*
     929              :      * Though on-access pruning maintains prstate->set_all_visible, we don't
     930              :      * set the VM on-access for now.
     931              :      */
     932       308838 :     if (reason == PRUNE_ON_ACCESS)
     933        44969 :         return false;
     934              : 
     935       263869 :     if (!prstate->set_all_visible)
     936       218379 :         return false;
     937              : 
     938        45490 :     prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
     939              : 
     940        45490 :     if (prstate->set_all_frozen)
     941        30427 :         prstate->new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
     942              : 
     943        45490 :     if (prstate->new_vmbits == prstate->old_vmbits)
     944              :     {
     945         1127 :         prstate->new_vmbits = 0;
     946         1127 :         return false;
     947              :     }
     948              : 
     949        44363 :     return true;
     950              : }
     951              : 
     952              : /*
     953              :  * If the page is already all-frozen, or already all-visible and freezing
     954              :  * won't be attempted, there is no remaining work and we can use the fast path
     955              :  * to avoid the expensive overhead of heap_page_prune_and_freeze().
     956              :  *
     957              :  * This can happen when the page has a stale prune hint, or if VACUUM is
     958              :  * scanning an already all-frozen page due to SKIP_PAGES_THRESHOLD.
     959              :  *
     960              :  * The caller must already have examined the visibility map and saved the
     961              :  * status of the page's VM bits in prstate->old_vmbits. Caller must hold a
     962              :  * content lock on the heap page since it will examine line pointers.
     963              :  *
     964              :  * Before calling prune_freeze_fast_path(), the caller should first
     965              :  * check for and fix any discrepancy between the page-level visibility hint
     966              :  * and the visibility map. Otherwise, the fast path will always prevent us
     967              :  * from getting them in sync. Note that if there are tuples on the page that
     968              :  * are not visible to all but the VM is incorrectly marked
     969              :  * all-visible/all-frozen, we will not get the chance to fix that corruption
     970              :  * when using the fast path.
     971              :  */
     972              : static void
     973       183870 : prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
     974              : {
     975       183870 :     OffsetNumber maxoff = PageGetMaxOffsetNumber(prstate->page);
     976       183870 :     Page        page = prstate->page;
     977              : 
     978              :     Assert((prstate->old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
     979              :            ((prstate->old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
     980              :             !prstate->attempt_freeze));
     981              : 
     982              :     /* We'll fill in presult for the caller */
     983       183870 :     memset(presult, 0, sizeof(PruneFreezeResult));
     984              : 
     985              :     /* Clear any stale prune hint */
     986       183870 :     if (TransactionIdIsValid(PageGetPruneXid(page)))
     987              :     {
     988            0 :         PageClearPrunable(page);
     989            0 :         MarkBufferDirtyHint(prstate->buffer, true);
     990              :     }
     991              : 
     992       183870 :     if (PageIsEmpty(page))
     993            0 :         return;
     994              : 
     995              :     /*
     996              :      * Since the page is all-visible, a count of the normal ItemIds on the
     997              :      * page should be sufficient for vacuum's live tuple count.
     998              :      */
     999       183870 :     for (OffsetNumber off = FirstOffsetNumber;
    1000     10661875 :          off <= maxoff;
    1001     10478005 :          off = OffsetNumberNext(off))
    1002              :     {
    1003     10478005 :         ItemId      lp = PageGetItemId(page, off);
    1004              : 
    1005     10478005 :         if (!ItemIdIsUsed(lp))
    1006       172449 :             continue;
    1007              : 
    1008     10305556 :         presult->hastup = true;
    1009              : 
    1010     10305556 :         if (ItemIdIsNormal(lp))
    1011     10123490 :             prstate->live_tuples++;
    1012              :     }
    1013              : 
    1014       183870 :     presult->live_tuples = prstate->live_tuples;
    1015              : }
    1016              : 
    1017              : /*
    1018              :  * Prune and repair fragmentation and potentially freeze tuples on the
    1019              :  * specified page. If the page's visibility status has changed, update it in
    1020              :  * the VM.
    1021              :  *
    1022              :  * Caller must have pin and buffer cleanup lock on the page.  Note that we
    1023              :  * don't update the FSM information for page on caller's behalf.  Caller might
    1024              :  * also need to account for a reduction in the length of the line pointer
    1025              :  * array following array truncation by us.
    1026              :  *
    1027              :  * params contains the input parameters used to control freezing and pruning
    1028              :  * behavior. See the definition of PruneFreezeParams for more on what each
    1029              :  * parameter does.
    1030              :  *
    1031              :  * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
    1032              :  * tuples if it's required in order to advance relfrozenxid / relminmxid, or
    1033              :  * if it's considered advantageous for overall system performance to do so
    1034              :  * now.  The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
    1035              :  * 'new_relmin_mxid' arguments are required when freezing.
    1036              :  *
    1037              :  * A vmbuffer corresponding to the heap page is also passed and if the page is
    1038              :  * found to be all-visible/all-frozen, we will set it in the VM.
    1039              :  *
    1040              :  * presult contains output parameters needed by callers, such as the number of
    1041              :  * tuples removed and the offsets of dead items on the page after pruning.
    1042              :  * heap_page_prune_and_freeze() is responsible for initializing it.  Required
    1043              :  * by all callers.
    1044              :  *
    1045              :  * off_loc is the offset location required by the caller to use in error
    1046              :  * callback.
    1047              :  *
    1048              :  * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
    1049              :  * HEAP_PAGE_PRUNE_FREEZE option is set in params.  On entry, they contain the
    1050              :  * oldest XID and multi-XID seen on the relation so far.  They will be updated
    1051              :  * with the oldest values present on the page after pruning.  After processing
    1052              :  * the whole relation, VACUUM can use these values as the new
    1053              :  * relfrozenxid/relminmxid for the relation.
    1054              :  */
    1055              : void
    1056       492708 : heap_page_prune_and_freeze(PruneFreezeParams *params,
    1057              :                            PruneFreezeResult *presult,
    1058              :                            OffsetNumber *off_loc,
    1059              :                            TransactionId *new_relfrozen_xid,
    1060              :                            MultiXactId *new_relmin_mxid)
    1061              : {
    1062              :     PruneState  prstate;
    1063              :     bool        do_freeze;
    1064              :     bool        do_prune;
    1065              :     bool        do_hint_prune;
    1066              :     bool        do_set_vm;
    1067              :     bool        did_tuple_hint_fpi;
    1068       492708 :     int64       fpi_before = pgWalUsage.wal_fpi;
    1069              :     TransactionId conflict_xid;
    1070              : 
    1071              :     /* Initialize prstate */
    1072       492708 :     prune_freeze_setup(params,
    1073              :                        new_relfrozen_xid, new_relmin_mxid,
    1074              :                        presult, &prstate);
    1075              : 
    1076              :     /*
    1077              :      * If the VM is set but PD_ALL_VISIBLE is clear, fix that corruption
    1078              :      * before pruning and freezing so that the page and VM start out in a
    1079              :      * consistent state.
    1080              :      */
    1081       492708 :     if ((prstate.old_vmbits & VISIBILITYMAP_VALID_BITS) &&
    1082       185023 :         !PageIsAllVisible(prstate.page))
    1083            0 :         heap_page_fix_vm_corruption(&prstate, InvalidOffsetNumber,
    1084              :                                     VM_CORRUPT_MISSING_PAGE_HINT);
    1085              : 
    1086              :     /*
    1087              :      * If the page is already all-frozen, or already all-visible when freezing
    1088              :      * is not being attempted, take the fast path, skipping pruning and
    1089              :      * freezing code entirely. This must be done after fixing any discrepancy
    1090              :      * between the page-level visibility hint and the VM, since that may have
    1091              :      * cleared old_vmbits.
    1092              :      */
    1093       492708 :     if ((params->options & HEAP_PAGE_PRUNE_ALLOW_FAST_PATH) != 0 &&
    1094       491416 :         ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
    1095       307546 :          ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
    1096          695 :           !prstate.attempt_freeze)))
    1097              :     {
    1098       183870 :         prune_freeze_fast_path(&prstate, presult);
    1099       183870 :         return;
    1100              :     }
    1101              : 
    1102              :     /*
    1103              :      * Examine all line pointers and tuple visibility information to determine
    1104              :      * which line pointers should change state and which tuples may be frozen.
    1105              :      * Prepare queue of state changes to later be executed in a critical
    1106              :      * section.
    1107              :      */
    1108       308838 :     prune_freeze_plan(&prstate, off_loc);
    1109              : 
    1110              :     /*
    1111              :      * After processing all the live tuples on the page, if the newest xmin
    1112              :      * amongst them may be considered running by any snapshot, the page cannot
    1113              :      * be all-visible. This should be done before determining whether or not
    1114              :      * to opportunistically freeze.
    1115              :      */
    1116       308838 :     if (prstate.set_all_visible &&
    1117       156743 :         TransactionIdIsNormal(prstate.newest_live_xid) &&
    1118        63242 :         GlobalVisTestXidConsideredRunning(prstate.vistest,
    1119              :                                           prstate.newest_live_xid,
    1120              :                                           true))
    1121         3606 :         prstate.set_all_visible = prstate.set_all_frozen = false;
    1122              : 
    1123              :     /*
    1124              :      * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
    1125              :      * checking tuple visibility information in prune_freeze_plan() may have
    1126              :      * caused an FPI to be emitted.
    1127              :      */
    1128       308838 :     did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
    1129              : 
    1130       909360 :     do_prune = prstate.nredirected > 0 ||
    1131       560091 :         prstate.ndead > 0 ||
    1132       251253 :         prstate.nunused > 0;
    1133              : 
    1134              :     /*
    1135              :      * Even if we don't prune anything, if we found a new value for the
    1136              :      * pd_prune_xid field or the page was marked full, we will update the hint
    1137              :      * bit.
    1138              :      */
    1139       559746 :     do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
    1140       250908 :         PageIsFull(prstate.page);
    1141              : 
    1142              :     /*
    1143              :      * Decide if we want to go ahead with freezing according to the freeze
    1144              :      * plans we prepared, or not.
    1145              :      */
    1146       308838 :     do_freeze = heap_page_will_freeze(did_tuple_hint_fpi,
    1147              :                                       do_prune,
    1148              :                                       do_hint_prune,
    1149              :                                       &prstate);
    1150              : 
    1151              :     /*
    1152              :      * While scanning the line pointers, we did not clear
    1153              :      * set_all_visible/set_all_frozen when encountering LP_DEAD items because
    1154              :      * we wanted the decision whether or not to freeze the page to be
    1155              :      * unaffected by the short-term presence of LP_DEAD items.  These LP_DEAD
    1156              :      * items are effectively assumed to be LP_UNUSED items in the making.  It
    1157              :      * doesn't matter which vacuum heap pass (initial pass or final pass) ends
    1158              :      * up setting the page all-frozen, as long as the ongoing VACUUM does it.
    1159              :      *
    1160              :      * Now that we finished determining whether or not to freeze the page,
    1161              :      * update set_all_visible and set_all_frozen so that they reflect the true
    1162              :      * state of the page for setting PD_ALL_VISIBLE and VM bits.
    1163              :      */
    1164       308838 :     if (prstate.lpdead_items > 0)
    1165        56333 :         prstate.set_all_visible = prstate.set_all_frozen = false;
    1166              : 
    1167              :     Assert(!prstate.set_all_frozen || prstate.set_all_visible);
    1168              :     Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
    1169              : 
    1170       308838 :     do_set_vm = heap_page_will_set_vm(&prstate, params->reason);
    1171              : 
    1172              :     /*
    1173              :      * new_vmbits should be 0 regardless of whether or not the page is
    1174              :      * all-visible if we do not intend to set the VM.
    1175              :      */
    1176              :     Assert(do_set_vm || prstate.new_vmbits == 0);
    1177              : 
    1178              :     /*
    1179              :      * The snapshot conflict horizon for the whole record is the most
    1180              :      * conservative (newest) horizon required by any change in the record.
    1181              :      */
    1182       308838 :     conflict_xid = InvalidTransactionId;
    1183       308838 :     if (do_set_vm)
    1184        44363 :         conflict_xid = prstate.newest_live_xid;
    1185       308838 :     if (do_freeze && TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid, conflict_xid))
    1186         4237 :         conflict_xid = prstate.pagefrz.FreezePageConflictXid;
    1187       308838 :     if (do_prune && TransactionIdFollows(prstate.latest_xid_removed, conflict_xid))
    1188        51611 :         conflict_xid = prstate.latest_xid_removed;
    1189              : 
    1190              :     /* Lock vmbuffer before entering a critical section */
    1191       308838 :     if (do_set_vm)
    1192        44363 :         LockBuffer(prstate.vmbuffer, BUFFER_LOCK_EXCLUSIVE);
    1193              : 
    1194              :     /* Any error while applying the changes is critical */
    1195       308838 :     START_CRIT_SECTION();
    1196              : 
    1197       308838 :     if (do_hint_prune)
    1198              :     {
    1199              :         /*
    1200              :          * Update the page's pd_prune_xid field to either zero, or the lowest
    1201              :          * XID of any soon-prunable tuple.
    1202              :          */
    1203        57994 :         ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
    1204              : 
    1205              :         /*
    1206              :          * Also clear the "page is full" flag, since there's no point in
    1207              :          * repeating the prune/defrag process until something else happens to
    1208              :          * the page.
    1209              :          */
    1210        57994 :         PageClearFull(prstate.page);
    1211              : 
    1212              :         /*
    1213              :          * If that's all we had to do to the page, this is a non-WAL-logged
    1214              :          * hint. If we are going to freeze or prune the page or set
    1215              :          * PD_ALL_VISIBLE, we will mark the buffer dirty below.
    1216              :          *
    1217              :          * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
    1218              :          * for the VM to be set and PD_ALL_VISIBLE to be clear.
    1219              :          */
    1220        57994 :         if (!do_freeze && !do_prune && !do_set_vm)
    1221          203 :             MarkBufferDirtyHint(prstate.buffer, true);
    1222              :     }
    1223              : 
    1224       308838 :     if (do_prune || do_freeze || do_set_vm)
    1225              :     {
    1226              :         /* Apply the planned item changes and repair page fragmentation. */
    1227       104355 :         if (do_prune)
    1228              :         {
    1229        58023 :             heap_page_prune_execute(prstate.buffer, false,
    1230              :                                     prstate.redirected, prstate.nredirected,
    1231              :                                     prstate.nowdead, prstate.ndead,
    1232              :                                     prstate.nowunused, prstate.nunused);
    1233              :         }
    1234              : 
    1235       104355 :         if (do_freeze)
    1236        24098 :             heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
    1237              : 
    1238              :         /* Set the visibility map and page visibility hint */
    1239       104355 :         if (do_set_vm)
    1240              :         {
    1241              :             /*
    1242              :              * While it is valid for PD_ALL_VISIBLE to be set when the
    1243              :              * corresponding VM bit is clear, we strongly prefer to keep them
    1244              :              * in sync.
    1245              :              *
    1246              :              * The heap buffer must be marked dirty before adding it to the
    1247              :              * WAL chain when setting the VM. We don't worry about
    1248              :              * unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
    1249              :              * already set, though. It is extremely rare to have a clean heap
    1250              :              * buffer with PD_ALL_VISIBLE already set and the VM bits clear,
    1251              :              * so there is no point in optimizing it.
    1252              :              */
    1253        44363 :             PageSetAllVisible(prstate.page);
    1254        44363 :             PageClearPrunable(prstate.page);
    1255        44363 :             visibilitymap_set(prstate.block, prstate.vmbuffer, prstate.new_vmbits,
    1256        44363 :                               prstate.relation->rd_locator);
    1257              :         }
    1258              : 
    1259       104355 :         MarkBufferDirty(prstate.buffer);
    1260              : 
    1261              :         /*
    1262              :          * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
    1263              :          */
    1264       104355 :         if (RelationNeedsWAL(prstate.relation))
    1265              :         {
    1266       144472 :             log_heap_prune_and_freeze(prstate.relation, prstate.buffer,
    1267              :                                       do_set_vm ? prstate.vmbuffer : InvalidBuffer,
    1268        42794 :                                       do_set_vm ? prstate.new_vmbits : 0,
    1269              :                                       conflict_xid,
    1270              :                                       true, /* cleanup lock */
    1271              :                                       params->reason,
    1272              :                                       prstate.frozen, prstate.nfrozen,
    1273              :                                       prstate.redirected, prstate.nredirected,
    1274              :                                       prstate.nowdead, prstate.ndead,
    1275              :                                       prstate.nowunused, prstate.nunused);
    1276              :         }
    1277              :     }
    1278              : 
    1279       308838 :     END_CRIT_SECTION();
    1280              : 
    1281       308838 :     if (do_set_vm)
    1282        44363 :         LockBuffer(prstate.vmbuffer, BUFFER_LOCK_UNLOCK);
    1283              : 
    1284              :     /*
    1285              :      * During its second pass over the heap, VACUUM calls
    1286              :      * heap_page_would_be_all_visible() to determine whether a page is
    1287              :      * all-visible and all-frozen. The logic here is similar. After completing
    1288              :      * pruning and freezing, use an assertion to verify that our results
    1289              :      * remain consistent with heap_page_would_be_all_visible(). It's also a
    1290              :      * valuable cross-check of the page state after pruning and freezing.
    1291              :      */
    1292              : #ifdef USE_ASSERT_CHECKING
    1293              :     if (prstate.set_all_visible)
    1294              :     {
    1295              :         TransactionId debug_cutoff;
    1296              :         bool        debug_all_frozen;
    1297              : 
    1298              :         Assert(prstate.lpdead_items == 0);
    1299              : 
    1300              :         Assert(heap_page_is_all_visible(prstate.relation, prstate.buffer,
    1301              :                                         prstate.vistest,
    1302              :                                         &debug_all_frozen,
    1303              :                                         &debug_cutoff, off_loc));
    1304              : 
    1305              :         Assert(!TransactionIdIsValid(debug_cutoff) ||
    1306              :                debug_cutoff == prstate.newest_live_xid);
    1307              : 
    1308              :         /*
    1309              :          * It's possible the page is composed entirely of frozen tuples but is
    1310              :          * not set all-frozen in the VM and did not pass
    1311              :          * HEAP_PAGE_PRUNE_FREEZE. In this case, it's possible
    1312              :          * heap_page_is_all_visible() finds the page completely frozen, even
    1313              :          * though prstate.set_all_frozen is false.
    1314              :          */
    1315              :         Assert(!prstate.set_all_frozen || debug_all_frozen);
    1316              :     }
    1317              : #endif
    1318              : 
    1319              :     /* Copy information back for caller */
    1320       308838 :     presult->ndeleted = prstate.ndeleted;
    1321       308838 :     presult->nnewlpdead = prstate.ndead;
    1322       308838 :     presult->nfrozen = prstate.nfrozen;
    1323       308838 :     presult->live_tuples = prstate.live_tuples;
    1324       308838 :     presult->recently_dead_tuples = prstate.recently_dead_tuples;
    1325       308838 :     presult->hastup = prstate.hastup;
    1326              : 
    1327       308838 :     presult->lpdead_items = prstate.lpdead_items;
    1328              :     /* the presult->deadoffsets array was already filled in */
    1329              : 
    1330       308838 :     presult->newly_all_visible = false;
    1331       308838 :     presult->newly_all_frozen = false;
    1332       308838 :     presult->newly_all_visible_frozen = false;
    1333       308838 :     if (do_set_vm)
    1334              :     {
    1335        44363 :         if ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
    1336              :         {
    1337        44337 :             presult->newly_all_visible = true;
    1338        44337 :             if (prstate.set_all_frozen)
    1339        29944 :                 presult->newly_all_visible_frozen = true;
    1340              :         }
    1341           26 :         else if ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
    1342           26 :                  prstate.set_all_frozen)
    1343           26 :             presult->newly_all_frozen = true;
    1344              :     }
    1345              : 
    1346       308838 :     if (prstate.attempt_freeze)
    1347              :     {
    1348       263869 :         if (presult->nfrozen > 0)
    1349              :         {
    1350        24098 :             *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
    1351        24098 :             *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
    1352              :         }
    1353              :         else
    1354              :         {
    1355       239771 :             *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
    1356       239771 :             *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
    1357              :         }
    1358              :     }
    1359              : }
    1360              : 
    1361              : 
    1362              : /*
    1363              :  * Perform visibility checks for heap pruning.
    1364              :  */
    1365              : static HTSV_Result
    1366     14430108 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup)
    1367              : {
    1368              :     HTSV_Result res;
    1369              :     TransactionId dead_after;
    1370              : 
    1371     14430108 :     res = HeapTupleSatisfiesVacuumHorizon(tup, prstate->buffer, &dead_after);
    1372              : 
    1373     14430108 :     if (res != HEAPTUPLE_RECENTLY_DEAD)
    1374     12364299 :         return res;
    1375              : 
    1376              :     /*
    1377              :      * For VACUUM, we must be sure to prune tuples with xmax older than
    1378              :      * OldestXmin -- a visibility cutoff determined at the beginning of
    1379              :      * vacuuming the relation. OldestXmin is used for freezing determination
    1380              :      * and we cannot freeze dead tuples' xmaxes.
    1381              :      */
    1382      2065809 :     if (prstate->cutoffs &&
    1383      1122151 :         TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
    1384      1122151 :         NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
    1385       799950 :         return HEAPTUPLE_DEAD;
    1386              : 
    1387              :     /*
    1388              :      * Determine whether or not the tuple is considered dead when compared
    1389              :      * with the provided GlobalVisState. On-access pruning does not provide
    1390              :      * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
    1391              :      * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
    1392              :      * if the GlobalVisState has been updated since the beginning of vacuuming
    1393              :      * the relation.
    1394              :      */
    1395      1265859 :     if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after, true))
    1396       904655 :         return HEAPTUPLE_DEAD;
    1397              : 
    1398       361204 :     return res;
    1399              : }
    1400              : 
    1401              : 
    1402              : /*
    1403              :  * Pruning calculates tuple visibility once and saves the results in an array
    1404              :  * of int8.  See PruneState.htsv for details.  This helper function is meant
    1405              :  * to guard against examining visibility status array members which have not
    1406              :  * yet been computed.
    1407              :  */
    1408              : static inline HTSV_Result
    1409     14414274 : htsv_get_valid_status(int status)
    1410              : {
    1411              :     Assert(status >= HEAPTUPLE_DEAD &&
    1412              :            status <= HEAPTUPLE_DELETE_IN_PROGRESS);
    1413     14414274 :     return (HTSV_Result) status;
    1414              : }
    1415              : 
    1416              : /*
    1417              :  * Prune specified line pointer or a HOT chain originating at line pointer.
    1418              :  *
    1419              :  * Tuple visibility information is provided in prstate->htsv.
    1420              :  *
    1421              :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
    1422              :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
    1423              :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
    1424              :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
    1425              :  * DEAD, our visibility test is just too coarse to detect it.
    1426              :  *
    1427              :  * Pruning must never leave behind a DEAD tuple that still has tuple storage.
    1428              :  * VACUUM isn't prepared to deal with that case.
    1429              :  *
    1430              :  * The root line pointer is redirected to the tuple immediately after the
    1431              :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
    1432              :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
    1433              :  * tuple, which we treat as a chain of length 1.)
    1434              :  *
    1435              :  * We don't actually change the page here. We just add entries to the arrays in
    1436              :  * prstate showing the changes to be made.  Items to be redirected are added
    1437              :  * to the redirected[] array (two entries per redirection); items to be set to
    1438              :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
    1439              :  * state are added to nowunused[].  We perform bookkeeping of live tuples,
    1440              :  * visibility etc. based on what the page will look like after the changes
    1441              :  * applied.  All that bookkeeping is performed in the heap_prune_record_*()
    1442              :  * subroutines.  The division of labor is that heap_prune_chain() decides the
    1443              :  * fate of each tuple, ie. whether it's going to be removed, redirected or
    1444              :  * left unchanged, and the heap_prune_record_*() subroutines update PruneState
    1445              :  * based on that outcome.
    1446              :  */
    1447              : static void
    1448     14291184 : heap_prune_chain(OffsetNumber maxoff, OffsetNumber rootoffnum,
    1449              :                  PruneState *prstate)
    1450              : {
    1451     14291184 :     TransactionId priorXmax = InvalidTransactionId;
    1452              :     ItemId      rootlp;
    1453              :     OffsetNumber offnum;
    1454              :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
    1455     14291184 :     Page        page = prstate->page;
    1456              : 
    1457              :     /*
    1458              :      * After traversing the HOT chain, ndeadchain is the index in chainitems
    1459              :      * of the first live successor after the last dead item.
    1460              :      */
    1461     14291184 :     int         ndeadchain = 0,
    1462     14291184 :                 nchain = 0;
    1463              : 
    1464     14291184 :     rootlp = PageGetItemId(page, rootoffnum);
    1465              : 
    1466              :     /* Start from the root tuple */
    1467     14291184 :     offnum = rootoffnum;
    1468              : 
    1469              :     /* while not end of the chain */
    1470              :     for (;;)
    1471       296117 :     {
    1472              :         HeapTupleHeader htup;
    1473              :         ItemId      lp;
    1474              : 
    1475              :         /* Sanity check (pure paranoia) */
    1476     14587301 :         if (offnum < FirstOffsetNumber)
    1477            0 :             break;
    1478              : 
    1479              :         /*
    1480              :          * An offset past the end of page's line pointer array is possible
    1481              :          * when the array was truncated (original item must have been unused)
    1482              :          */
    1483     14587301 :         if (offnum > maxoff)
    1484            0 :             break;
    1485              : 
    1486              :         /* If item is already processed, stop --- it must not be same chain */
    1487     14587301 :         if (prstate->processed[offnum])
    1488            0 :             break;
    1489              : 
    1490     14587301 :         lp = PageGetItemId(page, offnum);
    1491              : 
    1492              :         /*
    1493              :          * Unused item obviously isn't part of the chain. Likewise, a dead
    1494              :          * line pointer can't be part of the chain.  Both of those cases were
    1495              :          * already marked as processed.
    1496              :          */
    1497              :         Assert(ItemIdIsUsed(lp));
    1498              :         Assert(!ItemIdIsDead(lp));
    1499              : 
    1500              :         /*
    1501              :          * If we are looking at the redirected root line pointer, jump to the
    1502              :          * first normal tuple in the chain.  If we find a redirect somewhere
    1503              :          * else, stop --- it must not be same chain.
    1504              :          */
    1505     14587301 :         if (ItemIdIsRedirected(lp))
    1506              :         {
    1507       173027 :             if (nchain > 0)
    1508            0 :                 break;          /* not at start of chain */
    1509       173027 :             chainitems[nchain++] = offnum;
    1510       173027 :             offnum = ItemIdGetRedirect(rootlp);
    1511       173027 :             continue;
    1512              :         }
    1513              : 
    1514              :         Assert(ItemIdIsNormal(lp));
    1515              : 
    1516     14414274 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
    1517              : 
    1518              :         /*
    1519              :          * Check the tuple XMIN against prior XMAX, if any
    1520              :          */
    1521     14537364 :         if (TransactionIdIsValid(priorXmax) &&
    1522       123090 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
    1523            0 :             break;
    1524              : 
    1525              :         /*
    1526              :          * OK, this tuple is indeed a member of the chain.
    1527              :          */
    1528     14414274 :         chainitems[nchain++] = offnum;
    1529              : 
    1530     14414274 :         switch (htsv_get_valid_status(prstate->htsv[offnum]))
    1531              :         {
    1532      1762798 :             case HEAPTUPLE_DEAD:
    1533              : 
    1534              :                 /* Remember the last DEAD tuple seen */
    1535      1762798 :                 ndeadchain = nchain;
    1536      1762798 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
    1537              :                                                       &prstate->latest_xid_removed);
    1538              :                 /* Advance to next chain member */
    1539      1762798 :                 break;
    1540              : 
    1541       361204 :             case HEAPTUPLE_RECENTLY_DEAD:
    1542              : 
    1543              :                 /*
    1544              :                  * We don't need to advance the conflict horizon for
    1545              :                  * RECENTLY_DEAD tuples, even if we are removing them.  This
    1546              :                  * is because we only remove RECENTLY_DEAD tuples if they
    1547              :                  * precede a DEAD tuple, and the DEAD tuple must have been
    1548              :                  * inserted by a newer transaction than the RECENTLY_DEAD
    1549              :                  * tuple by virtue of being later in the chain.  We will have
    1550              :                  * advanced the conflict horizon for the DEAD tuple.
    1551              :                  */
    1552              : 
    1553              :                 /*
    1554              :                  * Advance past RECENTLY_DEAD tuples just in case there's a
    1555              :                  * DEAD one after them.  We have to make sure that we don't
    1556              :                  * miss any DEAD tuples, since DEAD tuples that still have
    1557              :                  * tuple storage after pruning will confuse VACUUM.
    1558              :                  */
    1559       361204 :                 break;
    1560              : 
    1561     12290272 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1562              :             case HEAPTUPLE_LIVE:
    1563              :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1564     12290272 :                 goto process_chain;
    1565              : 
    1566            0 :             default:
    1567            0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1568              :                 goto process_chain;
    1569              :         }
    1570              : 
    1571              :         /*
    1572              :          * If the tuple is not HOT-updated, then we are at the end of this
    1573              :          * HOT-update chain.
    1574              :          */
    1575      2124002 :         if (!HeapTupleHeaderIsHotUpdated(htup))
    1576      2000912 :             goto process_chain;
    1577              : 
    1578              :         /* HOT implies it can't have moved to different partition */
    1579              :         Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    1580              : 
    1581              :         /*
    1582              :          * Advance to next chain member.
    1583              :          */
    1584              :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == prstate->block);
    1585       123090 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    1586       123090 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    1587              :     }
    1588              : 
    1589            0 :     if (ItemIdIsRedirected(rootlp) && nchain < 2)
    1590              :     {
    1591              :         /*
    1592              :          * We found a redirect item that doesn't point to a valid follow-on
    1593              :          * item.  This can happen if the loop in heap_page_prune_and_freeze()
    1594              :          * caused us to visit the dead successor of a redirect item before
    1595              :          * visiting the redirect item.  We can clean up by setting the
    1596              :          * redirect item to LP_DEAD state or LP_UNUSED if the caller
    1597              :          * indicated.
    1598              :          */
    1599            0 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
    1600            0 :         return;
    1601              :     }
    1602              : 
    1603            0 : process_chain:
    1604              : 
    1605     14291184 :     if (ndeadchain == 0)
    1606              :     {
    1607              :         /*
    1608              :          * No DEAD tuple was found, so the chain is entirely composed of
    1609              :          * normal, unchanged tuples.  Leave it alone.
    1610              :          */
    1611     12573714 :         int         i = 0;
    1612              : 
    1613     12573714 :         if (ItemIdIsRedirected(rootlp))
    1614              :         {
    1615       152842 :             heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
    1616       152842 :             i++;
    1617              :         }
    1618     25152416 :         for (; i < nchain; i++)
    1619     12578702 :             heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
    1620              :     }
    1621      1717470 :     else if (ndeadchain == nchain)
    1622              :     {
    1623              :         /*
    1624              :          * The entire chain is dead.  Mark the root line pointer LP_DEAD, and
    1625              :          * fully remove the other tuples in the chain.
    1626              :          */
    1627      1647310 :         heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
    1628      1691383 :         for (int i = 1; i < nchain; i++)
    1629        44073 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1630              :     }
    1631              :     else
    1632              :     {
    1633              :         /*
    1634              :          * We found a DEAD tuple in the chain.  Redirect the root line pointer
    1635              :          * to the first non-DEAD tuple, and mark as unused each intermediate
    1636              :          * item that we are able to remove from the chain.
    1637              :          */
    1638        70160 :         heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
    1639        70160 :                                    ItemIdIsNormal(rootlp));
    1640        91600 :         for (int i = 1; i < ndeadchain; i++)
    1641        21440 :             heap_prune_record_unused(prstate, chainitems[i], true);
    1642              : 
    1643              :         /* the rest of tuples in the chain are normal, unchanged tuples */
    1644       142934 :         for (int i = ndeadchain; i < nchain; i++)
    1645        72774 :             heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
    1646              :     }
    1647              : }
    1648              : 
    1649              : /* Record lowest soon-prunable XID */
    1650              : static void
    1651      5237642 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid,
    1652              :                            OffsetNumber offnum)
    1653              : {
    1654              :     /*
    1655              :      * This should exactly match the PageSetPrunable macro.  We can't store
    1656              :      * directly into the page header yet, so we update working state.
    1657              :      */
    1658              :     Assert(TransactionIdIsNormal(xid));
    1659     10270526 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
    1660      5032884 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
    1661       206103 :         prstate->new_prune_xid = xid;
    1662              : 
    1663              :     /*
    1664              :      * It's incorrect for a page to be marked all-visible if it contains
    1665              :      * prunable items.
    1666              :      */
    1667      5237642 :     if (PageIsAllVisible(prstate->page))
    1668            0 :         heap_page_fix_vm_corruption(prstate, offnum,
    1669              :                                     VM_CORRUPT_TUPLE_VISIBILITY);
    1670      5237642 : }
    1671              : 
    1672              : /* Record line pointer to be redirected */
    1673              : static void
    1674        70160 : heap_prune_record_redirect(PruneState *prstate,
    1675              :                            OffsetNumber offnum, OffsetNumber rdoffnum,
    1676              :                            bool was_normal)
    1677              : {
    1678              :     Assert(!prstate->processed[offnum]);
    1679        70160 :     prstate->processed[offnum] = true;
    1680              : 
    1681              :     /*
    1682              :      * Do not mark the redirect target here.  It needs to be counted
    1683              :      * separately as an unchanged tuple.
    1684              :      */
    1685              : 
    1686              :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
    1687        70160 :     prstate->redirected[prstate->nredirected * 2] = offnum;
    1688        70160 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
    1689              : 
    1690        70160 :     prstate->nredirected++;
    1691              : 
    1692              :     /*
    1693              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1694              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1695              :      * count.
    1696              :      */
    1697        70160 :     if (was_normal)
    1698        61948 :         prstate->ndeleted++;
    1699              : 
    1700        70160 :     prstate->hastup = true;
    1701        70160 : }
    1702              : 
    1703              : /* Record line pointer to be marked dead */
    1704              : static void
    1705      1613413 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
    1706              :                        bool was_normal)
    1707              : {
    1708              :     Assert(!prstate->processed[offnum]);
    1709      1613413 :     prstate->processed[offnum] = true;
    1710              : 
    1711              :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
    1712      1613413 :     prstate->nowdead[prstate->ndead] = offnum;
    1713      1613413 :     prstate->ndead++;
    1714              : 
    1715              :     /*
    1716              :      * Deliberately delay unsetting set_all_visible and set_all_frozen until
    1717              :      * later during pruning. Removable dead tuples shouldn't preclude freezing
    1718              :      * the page.
    1719              :      */
    1720              : 
    1721              :     /* Record the dead offset for vacuum */
    1722      1613413 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1723              : 
    1724              :     /*
    1725              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1726              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1727              :      * count.
    1728              :      */
    1729      1613413 :     if (was_normal)
    1730      1601440 :         prstate->ndeleted++;
    1731      1613413 : }
    1732              : 
    1733              : /*
    1734              :  * Depending on whether or not the caller set mark_unused_now to true, record that a
    1735              :  * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
    1736              :  * which we will mark line pointers LP_UNUSED, but we will not mark line
    1737              :  * pointers LP_DEAD if mark_unused_now is true.
    1738              :  */
    1739              : static void
    1740      1647310 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
    1741              :                                  bool was_normal)
    1742              : {
    1743              :     /*
    1744              :      * If the caller set mark_unused_now to true, we can remove dead tuples
    1745              :      * during pruning instead of marking their line pointers dead. Set this
    1746              :      * tuple's line pointer LP_UNUSED. We hint that this option is less
    1747              :      * likely.
    1748              :      */
    1749      1647310 :     if (unlikely(prstate->mark_unused_now))
    1750        33897 :         heap_prune_record_unused(prstate, offnum, was_normal);
    1751              :     else
    1752      1613413 :         heap_prune_record_dead(prstate, offnum, was_normal);
    1753              : 
    1754              :     /*
    1755              :      * It's incorrect for the page to be set all-visible if it contains dead
    1756              :      * items. Fix that on the heap page and check the VM for corruption as
    1757              :      * well. Do that here rather than in heap_prune_record_dead() so we also
    1758              :      * cover tuples that are directly marked LP_UNUSED via mark_unused_now.
    1759              :      */
    1760      1647310 :     if (PageIsAllVisible(prstate->page))
    1761            0 :         heap_page_fix_vm_corruption(prstate, offnum, VM_CORRUPT_LPDEAD);
    1762      1647310 : }
    1763              : 
    1764              : /* Record line pointer to be marked unused */
    1765              : static void
    1766       104480 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
    1767              : {
    1768              :     Assert(!prstate->processed[offnum]);
    1769       104480 :     prstate->processed[offnum] = true;
    1770              : 
    1771              :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
    1772       104480 :     prstate->nowunused[prstate->nunused] = offnum;
    1773       104480 :     prstate->nunused++;
    1774              : 
    1775              :     /*
    1776              :      * If the root entry had been a normal tuple, we are deleting it, so count
    1777              :      * it in the result.  But changing a redirect (even to DEAD state) doesn't
    1778              :      * count.
    1779              :      */
    1780       104480 :     if (was_normal)
    1781       102489 :         prstate->ndeleted++;
    1782       104480 : }
    1783              : 
    1784              : /*
    1785              :  * Record an unused line pointer that is left unchanged.
    1786              :  */
    1787              : static void
    1788        94618 : heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum)
    1789              : {
    1790              :     Assert(!prstate->processed[offnum]);
    1791        94618 :     prstate->processed[offnum] = true;
    1792        94618 : }
    1793              : 
    1794              : /*
    1795              :  * Record line pointer that is left unchanged.  We consider freezing it, and
    1796              :  * update bookkeeping of tuple counts and page visibility.
    1797              :  */
    1798              : static void
    1799     12664231 : heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum)
    1800              : {
    1801              :     HeapTupleHeader htup;
    1802              :     TransactionId xmin;
    1803     12664231 :     Page        page = prstate->page;
    1804              : 
    1805              :     Assert(!prstate->processed[offnum]);
    1806     12664231 :     prstate->processed[offnum] = true;
    1807              : 
    1808     12664231 :     prstate->hastup = true;      /* the page is not empty */
    1809              : 
    1810              :     /*
    1811              :      * The criteria for counting a tuple as live in this block need to match
    1812              :      * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
    1813              :      * ANALYZE may produce wildly different reltuples values, e.g. when there
    1814              :      * are many recently-dead tuples.
    1815              :      *
    1816              :      * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
    1817              :      * can't run inside a transaction block, which makes some cases impossible
    1818              :      * (e.g. in-progress insert from the same transaction).
    1819              :      *
    1820              :      * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
    1821              :      * subroutines.  They don't count dead items like acquire_sample_rows()
    1822              :      * does, because we assume that all dead items will become LP_UNUSED
    1823              :      * before VACUUM finishes.  This difference is only superficial.  VACUUM
    1824              :      * effectively agrees with ANALYZE about DEAD items, in the end.  VACUUM
    1825              :      * won't remember LP_DEAD items, but only because they're not supposed to
    1826              :      * be left behind when it is done. (Cases where we bypass index vacuuming
    1827              :      * will violate this optimistic assumption, but the overall impact of that
    1828              :      * should be negligible.)
    1829              :      */
    1830     12664231 :     htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
    1831              : 
    1832     12664231 :     switch (prstate->htsv[offnum])
    1833              :     {
    1834      7349335 :         case HEAPTUPLE_LIVE:
    1835              : 
    1836              :             /*
    1837              :              * Count it as live.  Not only is this natural, but it's also what
    1838              :              * acquire_sample_rows() does.
    1839              :              */
    1840      7349335 :             prstate->live_tuples++;
    1841              : 
    1842              :             /*
    1843              :              * Is the tuple definitely visible to all transactions?
    1844              :              *
    1845              :              * NB: Like with per-tuple hint bits, we can't set the
    1846              :              * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
    1847              :              * See SetHintBits for more info.  Check that the tuple is hinted
    1848              :              * xmin-committed because of that.
    1849              :              */
    1850      7349335 :             if (!HeapTupleHeaderXminCommitted(htup))
    1851              :             {
    1852        23786 :                 prstate->set_all_visible = false;
    1853        23786 :                 prstate->set_all_frozen = false;
    1854        23786 :                 break;
    1855              :             }
    1856              : 
    1857              :             /*
    1858              :              * The inserter definitely committed. But we don't know if it is
    1859              :              * old enough that everyone sees it as committed. Later, after
    1860              :              * processing all the tuples on the page, we'll check if there is
    1861              :              * any snapshot that still considers the newest xid on the page to
    1862              :              * be running. If so, we don't consider the page all-visible.
    1863              :              */
    1864      7325549 :             xmin = HeapTupleHeaderGetXmin(htup);
    1865              : 
    1866              :             /* Track newest xmin on page. */
    1867      7325549 :             if (TransactionIdFollows(xmin, prstate->newest_live_xid) &&
    1868              :                 TransactionIdIsNormal(xmin))
    1869       352394 :                 prstate->newest_live_xid = xmin;
    1870              : 
    1871      7325549 :             break;
    1872              : 
    1873       361204 :         case HEAPTUPLE_RECENTLY_DEAD:
    1874       361204 :             prstate->recently_dead_tuples++;
    1875       361204 :             prstate->set_all_visible = false;
    1876       361204 :             prstate->set_all_frozen = false;
    1877              : 
    1878              :             /*
    1879              :              * This tuple will soon become DEAD.  Update the hint field so
    1880              :              * that the page is reconsidered for pruning in future.
    1881              :              */
    1882       361204 :             heap_prune_record_prunable(prstate,
    1883              :                                        HeapTupleHeaderGetUpdateXid(htup),
    1884              :                                        offnum);
    1885       361204 :             break;
    1886              : 
    1887        77254 :         case HEAPTUPLE_INSERT_IN_PROGRESS:
    1888              : 
    1889              :             /*
    1890              :              * We do not count these rows as live, because we expect the
    1891              :              * inserting transaction to update the counters at commit, and we
    1892              :              * assume that will happen only after we report our results.  This
    1893              :              * assumption is a bit shaky, but it is what acquire_sample_rows()
    1894              :              * does, so be consistent.
    1895              :              */
    1896        77254 :             prstate->set_all_visible = false;
    1897        77254 :             prstate->set_all_frozen = false;
    1898              : 
    1899              :             /* The page should not be marked all-visible */
    1900        77254 :             if (PageIsAllVisible(page))
    1901            0 :                 heap_page_fix_vm_corruption(prstate, offnum,
    1902              :                                             VM_CORRUPT_TUPLE_VISIBILITY);
    1903              : 
    1904              :             /*
    1905              :              * If we wanted to optimize for aborts, we might consider marking
    1906              :              * the page prunable when we see INSERT_IN_PROGRESS.  But we
    1907              :              * don't.  See related decisions about when to mark the page
    1908              :              * prunable in heapam.c.
    1909              :              */
    1910        77254 :             break;
    1911              : 
    1912      4876438 :         case HEAPTUPLE_DELETE_IN_PROGRESS:
    1913              : 
    1914              :             /*
    1915              :              * This an expected case during concurrent vacuum.  Count such
    1916              :              * rows as live.  As above, we assume the deleting transaction
    1917              :              * will commit and update the counters after we report.
    1918              :              */
    1919      4876438 :             prstate->live_tuples++;
    1920      4876438 :             prstate->set_all_visible = false;
    1921      4876438 :             prstate->set_all_frozen = false;
    1922              : 
    1923              :             /*
    1924              :              * This tuple may soon become DEAD.  Update the hint field so that
    1925              :              * the page is reconsidered for pruning in future.
    1926              :              */
    1927      4876438 :             heap_prune_record_prunable(prstate,
    1928              :                                        HeapTupleHeaderGetUpdateXid(htup),
    1929              :                                        offnum);
    1930      4876438 :             break;
    1931              : 
    1932            0 :         default:
    1933              : 
    1934              :             /*
    1935              :              * DEAD tuples should've been passed to heap_prune_record_dead()
    1936              :              * or heap_prune_record_unused() instead.
    1937              :              */
    1938            0 :             elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
    1939              :                  prstate->htsv[offnum]);
    1940              :             break;
    1941              :     }
    1942              : 
    1943              :     /* Consider freezing any normal tuples which will not be removed */
    1944     12664231 :     if (prstate->attempt_freeze)
    1945              :     {
    1946              :         bool        totally_frozen;
    1947              : 
    1948     11108688 :         if ((heap_prepare_freeze_tuple(htup,
    1949     11108688 :                                        prstate->cutoffs,
    1950              :                                        &prstate->pagefrz,
    1951     11108688 :                                        &prstate->frozen[prstate->nfrozen],
    1952              :                                        &totally_frozen)))
    1953              :         {
    1954              :             /* Save prepared freeze plan for later */
    1955      2933083 :             prstate->frozen[prstate->nfrozen++].offset = offnum;
    1956              :         }
    1957              : 
    1958              :         /*
    1959              :          * If any tuple isn't either totally frozen already or eligible to
    1960              :          * become totally frozen (according to its freeze plan), then the page
    1961              :          * definitely cannot be set all-frozen in the visibility map later on.
    1962              :          */
    1963     11108688 :         if (!totally_frozen)
    1964      5595692 :             prstate->set_all_frozen = false;
    1965              :     }
    1966     12664231 : }
    1967              : 
    1968              : 
    1969              : /*
    1970              :  * Record line pointer that was already LP_DEAD and is left unchanged.
    1971              :  */
    1972              : static void
    1973      1292960 : heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum)
    1974              : {
    1975              :     Assert(!prstate->processed[offnum]);
    1976      1292960 :     prstate->processed[offnum] = true;
    1977              : 
    1978              :     /*
    1979              :      * Deliberately don't set hastup for LP_DEAD items.  We make the soft
    1980              :      * assumption that any LP_DEAD items encountered here will become
    1981              :      * LP_UNUSED later on, before count_nondeletable_pages is reached.  If we
    1982              :      * don't make this assumption then rel truncation will only happen every
    1983              :      * other VACUUM, at most.  Besides, VACUUM must treat
    1984              :      * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
    1985              :      * handled (handled here, or handled later on).
    1986              :      *
    1987              :      * Similarly, don't unset set_all_visible and set_all_frozen until later,
    1988              :      * at the end of heap_page_prune_and_freeze().  This will allow us to
    1989              :      * attempt to freeze the page after pruning.  As long as we unset it
    1990              :      * before updating the visibility map, this will be correct.
    1991              :      */
    1992              : 
    1993              :     /* Record the dead offset for vacuum */
    1994      1292960 :     prstate->deadoffsets[prstate->lpdead_items++] = offnum;
    1995              : 
    1996              :     /*
    1997              :      * It's incorrect for a page to be marked all-visible if it contains dead
    1998              :      * items.
    1999              :      */
    2000      1292960 :     if (PageIsAllVisible(prstate->page))
    2001            0 :         heap_page_fix_vm_corruption(prstate, offnum, VM_CORRUPT_LPDEAD);
    2002      1292960 : }
    2003              : 
    2004              : /*
    2005              :  * Record LP_REDIRECT that is left unchanged.
    2006              :  */
    2007              : static void
    2008       152842 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
    2009              : {
    2010              :     /*
    2011              :      * A redirect line pointer doesn't count as a live tuple.
    2012              :      *
    2013              :      * If we leave a redirect line pointer in place, there will be another
    2014              :      * tuple on the page that it points to.  We will do the bookkeeping for
    2015              :      * that separately.  So we have nothing to do here, except remember that
    2016              :      * we processed this item.
    2017              :      */
    2018              :     Assert(!prstate->processed[offnum]);
    2019       152842 :     prstate->processed[offnum] = true;
    2020       152842 : }
    2021              : 
    2022              : /*
    2023              :  * Perform the actual page changes needed by heap_page_prune_and_freeze().
    2024              :  *
    2025              :  * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
    2026              :  * as unused, not redirecting or removing anything else.  The
    2027              :  * PageRepairFragmentation() call is skipped in that case.
    2028              :  *
    2029              :  * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
    2030              :  * the buffer.  If it is set, an ordinary exclusive lock suffices.
    2031              :  */
    2032              : void
    2033        67738 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
    2034              :                         OffsetNumber *redirected, int nredirected,
    2035              :                         OffsetNumber *nowdead, int ndead,
    2036              :                         OffsetNumber *nowunused, int nunused)
    2037              : {
    2038        67738 :     Page        page = BufferGetPage(buffer);
    2039              :     OffsetNumber *offnum;
    2040              :     HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
    2041              : 
    2042              :     /* Shouldn't be called unless there's something to do */
    2043              :     Assert(nredirected > 0 || ndead > 0 || nunused > 0);
    2044              : 
    2045              :     /* If 'lp_truncate_only', we can only remove already-dead line pointers */
    2046              :     Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
    2047              : 
    2048              :     /* Update all redirected line pointers */
    2049        67738 :     offnum = redirected;
    2050       157212 :     for (int i = 0; i < nredirected; i++)
    2051              :     {
    2052        89474 :         OffsetNumber fromoff = *offnum++;
    2053        89474 :         OffsetNumber tooff = *offnum++;
    2054        89474 :         ItemId      fromlp = PageGetItemId(page, fromoff);
    2055              :         ItemId      tolp PG_USED_FOR_ASSERTS_ONLY;
    2056              : 
    2057              : #ifdef USE_ASSERT_CHECKING
    2058              : 
    2059              :         /*
    2060              :          * Any existing item that we set as an LP_REDIRECT (any 'from' item)
    2061              :          * must be the first item from a HOT chain.  If the item has tuple
    2062              :          * storage then it can't be a heap-only tuple.  Otherwise we are just
    2063              :          * maintaining an existing LP_REDIRECT from an existing HOT chain that
    2064              :          * has been pruned at least once before now.
    2065              :          */
    2066              :         if (!ItemIdIsRedirected(fromlp))
    2067              :         {
    2068              :             Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
    2069              : 
    2070              :             htup = (HeapTupleHeader) PageGetItem(page, fromlp);
    2071              :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    2072              :         }
    2073              :         else
    2074              :         {
    2075              :             /* We shouldn't need to redundantly set the redirect */
    2076              :             Assert(ItemIdGetRedirect(fromlp) != tooff);
    2077              :         }
    2078              : 
    2079              :         /*
    2080              :          * The item that we're about to set as an LP_REDIRECT (the 'from'
    2081              :          * item) will point to an existing item (the 'to' item) that is
    2082              :          * already a heap-only tuple.  There can be at most one LP_REDIRECT
    2083              :          * item per HOT chain.
    2084              :          *
    2085              :          * We need to keep around an LP_REDIRECT item (after original
    2086              :          * non-heap-only root tuple gets pruned away) so that it's always
    2087              :          * possible for VACUUM to easily figure out what TID to delete from
    2088              :          * indexes when an entire HOT chain becomes dead.  A heap-only tuple
    2089              :          * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
    2090              :          * tuple can.
    2091              :          *
    2092              :          * This check may miss problems, e.g. the target of a redirect could
    2093              :          * be marked as unused subsequently. The page_verify_redirects() check
    2094              :          * below will catch such problems.
    2095              :          */
    2096              :         tolp = PageGetItemId(page, tooff);
    2097              :         Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
    2098              :         htup = (HeapTupleHeader) PageGetItem(page, tolp);
    2099              :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    2100              : #endif
    2101              : 
    2102        89474 :         ItemIdSetRedirect(fromlp, tooff);
    2103              :     }
    2104              : 
    2105              :     /* Update all now-dead line pointers */
    2106        67738 :     offnum = nowdead;
    2107      1960081 :     for (int i = 0; i < ndead; i++)
    2108              :     {
    2109      1892343 :         OffsetNumber off = *offnum++;
    2110      1892343 :         ItemId      lp = PageGetItemId(page, off);
    2111              : 
    2112              : #ifdef USE_ASSERT_CHECKING
    2113              : 
    2114              :         /*
    2115              :          * An LP_DEAD line pointer must be left behind when the original item
    2116              :          * (which is dead to everybody) could still be referenced by a TID in
    2117              :          * an index.  This should never be necessary with any individual
    2118              :          * heap-only tuple item, though. (It's not clear how much of a problem
    2119              :          * that would be, but there is no reason to allow it.)
    2120              :          */
    2121              :         if (ItemIdHasStorage(lp))
    2122              :         {
    2123              :             Assert(ItemIdIsNormal(lp));
    2124              :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    2125              :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
    2126              :         }
    2127              :         else
    2128              :         {
    2129              :             /* Whole HOT chain becomes dead */
    2130              :             Assert(ItemIdIsRedirected(lp));
    2131              :         }
    2132              : #endif
    2133              : 
    2134      1892343 :         ItemIdSetDead(lp);
    2135              :     }
    2136              : 
    2137              :     /* Update all now-unused line pointers */
    2138        67738 :     offnum = nowunused;
    2139       305959 :     for (int i = 0; i < nunused; i++)
    2140              :     {
    2141       238221 :         OffsetNumber off = *offnum++;
    2142       238221 :         ItemId      lp = PageGetItemId(page, off);
    2143              : 
    2144              : #ifdef USE_ASSERT_CHECKING
    2145              : 
    2146              :         if (lp_truncate_only)
    2147              :         {
    2148              :             /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
    2149              :             Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
    2150              :         }
    2151              :         else
    2152              :         {
    2153              :             /*
    2154              :              * When heap_page_prune_and_freeze() was called, mark_unused_now
    2155              :              * may have been passed as true, which allows would-be LP_DEAD
    2156              :              * items to be made LP_UNUSED instead.  This is only possible if
    2157              :              * the relation has no indexes.  If there are any dead items, then
    2158              :              * mark_unused_now was not true and every item being marked
    2159              :              * LP_UNUSED must refer to a heap-only tuple.
    2160              :              */
    2161              :             if (ndead > 0)
    2162              :             {
    2163              :                 Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
    2164              :                 htup = (HeapTupleHeader) PageGetItem(page, lp);
    2165              :                 Assert(HeapTupleHeaderIsHeapOnly(htup));
    2166              :             }
    2167              :             else
    2168              :                 Assert(ItemIdIsUsed(lp));
    2169              :         }
    2170              : 
    2171              : #endif
    2172              : 
    2173       238221 :         ItemIdSetUnused(lp);
    2174              :     }
    2175              : 
    2176        67738 :     if (lp_truncate_only)
    2177         1585 :         PageTruncateLinePointerArray(page);
    2178              :     else
    2179              :     {
    2180              :         /*
    2181              :          * Finally, repair any fragmentation, and update the page's hint bit
    2182              :          * about whether it has free pointers.
    2183              :          */
    2184        66153 :         PageRepairFragmentation(page);
    2185              : 
    2186              :         /*
    2187              :          * Now that the page has been modified, assert that redirect items
    2188              :          * still point to valid targets.
    2189              :          */
    2190        66153 :         page_verify_redirects(page);
    2191              :     }
    2192        67738 : }
    2193              : 
    2194              : 
    2195              : /*
    2196              :  * If built with assertions, verify that all LP_REDIRECT items point to a
    2197              :  * valid item.
    2198              :  *
    2199              :  * One way that bugs related to HOT pruning show is redirect items pointing to
    2200              :  * removed tuples. It's not trivial to reliably check that marking an item
    2201              :  * unused will not orphan a redirect item during heap_prune_chain() /
    2202              :  * heap_page_prune_execute(), so we additionally check the whole page after
    2203              :  * pruning. Without this check such bugs would typically only cause asserts
    2204              :  * later, potentially well after the corruption has been introduced.
    2205              :  *
    2206              :  * Also check comments in heap_page_prune_execute()'s redirection loop.
    2207              :  */
    2208              : static void
    2209        66153 : page_verify_redirects(Page page)
    2210              : {
    2211              : #ifdef USE_ASSERT_CHECKING
    2212              :     OffsetNumber offnum;
    2213              :     OffsetNumber maxoff;
    2214              : 
    2215              :     maxoff = PageGetMaxOffsetNumber(page);
    2216              :     for (offnum = FirstOffsetNumber;
    2217              :          offnum <= maxoff;
    2218              :          offnum = OffsetNumberNext(offnum))
    2219              :     {
    2220              :         ItemId      itemid = PageGetItemId(page, offnum);
    2221              :         OffsetNumber targoff;
    2222              :         ItemId      targitem;
    2223              :         HeapTupleHeader htup;
    2224              : 
    2225              :         if (!ItemIdIsRedirected(itemid))
    2226              :             continue;
    2227              : 
    2228              :         targoff = ItemIdGetRedirect(itemid);
    2229              :         targitem = PageGetItemId(page, targoff);
    2230              : 
    2231              :         Assert(ItemIdIsUsed(targitem));
    2232              :         Assert(ItemIdIsNormal(targitem));
    2233              :         Assert(ItemIdHasStorage(targitem));
    2234              :         htup = (HeapTupleHeader) PageGetItem(page, targitem);
    2235              :         Assert(HeapTupleHeaderIsHeapOnly(htup));
    2236              :     }
    2237              : #endif
    2238        66153 : }
    2239              : 
    2240              : 
    2241              : /*
    2242              :  * For all items in this page, find their respective root line pointers.
    2243              :  * If item k is part of a HOT-chain with root at item j, then we set
    2244              :  * root_offsets[k - 1] = j.
    2245              :  *
    2246              :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
    2247              :  * Unused entries are filled with InvalidOffsetNumber (zero).
    2248              :  *
    2249              :  * The function must be called with at least share lock on the buffer, to
    2250              :  * prevent concurrent prune operations.
    2251              :  *
    2252              :  * Note: The information collected here is valid only as long as the caller
    2253              :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
    2254              :  * and reused by a completely unrelated tuple.
    2255              :  */
    2256              : void
    2257       133140 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
    2258              : {
    2259              :     OffsetNumber offnum,
    2260              :                 maxoff;
    2261              : 
    2262       133140 :     MemSet(root_offsets, InvalidOffsetNumber,
    2263              :            MaxHeapTuplesPerPage * sizeof(OffsetNumber));
    2264              : 
    2265       133140 :     maxoff = PageGetMaxOffsetNumber(page);
    2266     10782711 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    2267              :     {
    2268     10649571 :         ItemId      lp = PageGetItemId(page, offnum);
    2269              :         HeapTupleHeader htup;
    2270              :         OffsetNumber nextoffnum;
    2271              :         TransactionId priorXmax;
    2272              : 
    2273              :         /* skip unused and dead items */
    2274     10649571 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
    2275         9467 :             continue;
    2276              : 
    2277     10640104 :         if (ItemIdIsNormal(lp))
    2278              :         {
    2279     10636286 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    2280              : 
    2281              :             /*
    2282              :              * Check if this tuple is part of a HOT-chain rooted at some other
    2283              :              * tuple. If so, skip it for now; we'll process it when we find
    2284              :              * its root.
    2285              :              */
    2286     10636286 :             if (HeapTupleHeaderIsHeapOnly(htup))
    2287         4158 :                 continue;
    2288              : 
    2289              :             /*
    2290              :              * This is either a plain tuple or the root of a HOT-chain.
    2291              :              * Remember it in the mapping.
    2292              :              */
    2293     10632128 :             root_offsets[offnum - 1] = offnum;
    2294              : 
    2295              :             /* If it's not the start of a HOT-chain, we're done with it */
    2296     10632128 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    2297     10631864 :                 continue;
    2298              : 
    2299              :             /* Set up to scan the HOT-chain */
    2300          264 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    2301          264 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    2302              :         }
    2303              :         else
    2304              :         {
    2305              :             /* Must be a redirect item. We do not set its root_offsets entry */
    2306              :             Assert(ItemIdIsRedirected(lp));
    2307              :             /* Set up to scan the HOT-chain */
    2308         3818 :             nextoffnum = ItemIdGetRedirect(lp);
    2309         3818 :             priorXmax = InvalidTransactionId;
    2310              :         }
    2311              : 
    2312              :         /*
    2313              :          * Now follow the HOT-chain and collect other tuples in the chain.
    2314              :          *
    2315              :          * Note: Even though this is a nested loop, the complexity of the
    2316              :          * function is O(N) because a tuple in the page should be visited not
    2317              :          * more than twice, once in the outer loop and once in HOT-chain
    2318              :          * chases.
    2319              :          */
    2320              :         for (;;)
    2321              :         {
    2322              :             /* Sanity check (pure paranoia) */
    2323         4154 :             if (offnum < FirstOffsetNumber)
    2324            0 :                 break;
    2325              : 
    2326              :             /*
    2327              :              * An offset past the end of page's line pointer array is possible
    2328              :              * when the array was truncated
    2329              :              */
    2330         4154 :             if (offnum > maxoff)
    2331            0 :                 break;
    2332              : 
    2333         4154 :             lp = PageGetItemId(page, nextoffnum);
    2334              : 
    2335              :             /* Check for broken chains */
    2336         4154 :             if (!ItemIdIsNormal(lp))
    2337            0 :                 break;
    2338              : 
    2339         4154 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
    2340              : 
    2341         4490 :             if (TransactionIdIsValid(priorXmax) &&
    2342          336 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
    2343            0 :                 break;
    2344              : 
    2345              :             /* Remember the root line pointer for this item */
    2346         4154 :             root_offsets[nextoffnum - 1] = offnum;
    2347              : 
    2348              :             /* Advance to next chain member, if any */
    2349         4154 :             if (!HeapTupleHeaderIsHotUpdated(htup))
    2350         4082 :                 break;
    2351              : 
    2352              :             /* HOT implies it can't have moved to different partition */
    2353              :             Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
    2354              : 
    2355           72 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
    2356           72 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
    2357              :         }
    2358              :     }
    2359       133140 : }
    2360              : 
    2361              : 
    2362              : /*
    2363              :  * Compare fields that describe actions required to freeze tuple with caller's
    2364              :  * open plan.  If everything matches then the frz tuple plan is equivalent to
    2365              :  * caller's plan.
    2366              :  */
    2367              : static inline bool
    2368      1064066 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2369              : {
    2370      1064066 :     if (plan->xmax == frz->xmax &&
    2371      1062770 :         plan->t_infomask2 == frz->t_infomask2 &&
    2372      1062026 :         plan->t_infomask == frz->t_infomask &&
    2373      1059235 :         plan->frzflags == frz->frzflags)
    2374      1059235 :         return true;
    2375              : 
    2376              :     /* Caller must call heap_log_freeze_new_plan again for frz */
    2377         4831 :     return false;
    2378              : }
    2379              : 
    2380              : /*
    2381              :  * Comparator used to deduplicate the freeze plans used in WAL records.
    2382              :  */
    2383              : static int
    2384      1461280 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
    2385              : {
    2386      1461280 :     const HeapTupleFreeze *frz1 = arg1;
    2387      1461280 :     const HeapTupleFreeze *frz2 = arg2;
    2388              : 
    2389      1461280 :     if (frz1->xmax < frz2->xmax)
    2390        13152 :         return -1;
    2391      1448128 :     else if (frz1->xmax > frz2->xmax)
    2392        14762 :         return 1;
    2393              : 
    2394      1433366 :     if (frz1->t_infomask2 < frz2->t_infomask2)
    2395         5238 :         return -1;
    2396      1428128 :     else if (frz1->t_infomask2 > frz2->t_infomask2)
    2397         4369 :         return 1;
    2398              : 
    2399      1423759 :     if (frz1->t_infomask < frz2->t_infomask)
    2400        13194 :         return -1;
    2401      1410565 :     else if (frz1->t_infomask > frz2->t_infomask)
    2402        15790 :         return 1;
    2403              : 
    2404      1394775 :     if (frz1->frzflags < frz2->frzflags)
    2405            0 :         return -1;
    2406      1394775 :     else if (frz1->frzflags > frz2->frzflags)
    2407            0 :         return 1;
    2408              : 
    2409              :     /*
    2410              :      * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
    2411              :      * (So the tuples will share a single canonical freeze plan.)
    2412              :      *
    2413              :      * We tiebreak on page offset number to keep each freeze plan's page
    2414              :      * offset number array individually sorted. (Unnecessary, but be tidy.)
    2415              :      */
    2416      1394775 :     if (frz1->offset < frz2->offset)
    2417      1183915 :         return -1;
    2418       210860 :     else if (frz1->offset > frz2->offset)
    2419       210860 :         return 1;
    2420              : 
    2421              :     Assert(false);
    2422            0 :     return 0;
    2423              : }
    2424              : 
    2425              : /*
    2426              :  * Start new plan initialized using tuple-level actions.  At least one tuple
    2427              :  * will have steps required to freeze described by caller's plan during REDO.
    2428              :  */
    2429              : static inline void
    2430        28926 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
    2431              : {
    2432        28926 :     plan->xmax = frz->xmax;
    2433        28926 :     plan->t_infomask2 = frz->t_infomask2;
    2434        28926 :     plan->t_infomask = frz->t_infomask;
    2435        28926 :     plan->frzflags = frz->frzflags;
    2436        28926 :     plan->ntuples = 1;           /* for now */
    2437        28926 : }
    2438              : 
    2439              : /*
    2440              :  * Deduplicate tuple-based freeze plans so that each distinct set of
    2441              :  * processing steps is only stored once in the WAL record.
    2442              :  * Called during original execution of freezing (for logged relations).
    2443              :  *
    2444              :  * Return value is number of plans set in *plans_out for caller.  Also writes
    2445              :  * an array of offset numbers into *offsets_out output argument for caller
    2446              :  * (actually there is one array per freeze plan, but that's not of immediate
    2447              :  * concern to our caller).
    2448              :  */
    2449              : static int
    2450        24095 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
    2451              :                      xlhp_freeze_plan *plans_out,
    2452              :                      OffsetNumber *offsets_out)
    2453              : {
    2454        24095 :     int         nplans = 0;
    2455              : 
    2456              :     /* Sort tuple-based freeze plans in the order required to deduplicate */
    2457        24095 :     qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
    2458              : 
    2459      1112256 :     for (int i = 0; i < ntuples; i++)
    2460              :     {
    2461      1088161 :         HeapTupleFreeze *frz = tuples + i;
    2462              : 
    2463      1088161 :         if (i == 0)
    2464              :         {
    2465              :             /* New canonical freeze plan starting with first tup */
    2466        24095 :             heap_log_freeze_new_plan(plans_out, frz);
    2467        24095 :             nplans++;
    2468              :         }
    2469      1064066 :         else if (heap_log_freeze_eq(plans_out, frz))
    2470              :         {
    2471              :             /* tup matches open canonical plan -- include tup in it */
    2472              :             Assert(offsets_out[i - 1] < frz->offset);
    2473      1059235 :             plans_out->ntuples++;
    2474              :         }
    2475              :         else
    2476              :         {
    2477              :             /* Tup doesn't match current plan -- done with it now */
    2478         4831 :             plans_out++;
    2479              : 
    2480              :             /* New canonical freeze plan starting with this tup */
    2481         4831 :             heap_log_freeze_new_plan(plans_out, frz);
    2482         4831 :             nplans++;
    2483              :         }
    2484              : 
    2485              :         /*
    2486              :          * Save page offset number in dedicated buffer in passing.
    2487              :          *
    2488              :          * REDO routine relies on the record's offset numbers array grouping
    2489              :          * offset numbers by freeze plan.  The sort order within each grouping
    2490              :          * is ascending offset number order, just to keep things tidy.
    2491              :          */
    2492      1088161 :         offsets_out[i] = frz->offset;
    2493              :     }
    2494              : 
    2495              :     Assert(nplans > 0 && nplans <= ntuples);
    2496              : 
    2497        24095 :     return nplans;
    2498              : }
    2499              : 
    2500              : /*
    2501              :  * Write an XLOG_HEAP2_PRUNE* WAL record
    2502              :  *
    2503              :  * This is used for several different page maintenance operations:
    2504              :  *
    2505              :  * - Page pruning, in VACUUM's 1st pass or on access: Some items are
    2506              :  *   redirected, some marked dead, and some removed altogether.
    2507              :  *
    2508              :  * - Freezing: Items are marked as 'frozen'.
    2509              :  *
    2510              :  * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
    2511              :  *
    2512              :  * They have enough commonalities that we use a single WAL record for them
    2513              :  * all.
    2514              :  *
    2515              :  * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
    2516              :  * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
    2517              :  * replaying 'unused' items depends on whether they were all previously marked
    2518              :  * as dead.
    2519              :  *
    2520              :  * If the VM is being updated, vmflags will contain the bits to set. In this
    2521              :  * case, vmbuffer should already have been updated and marked dirty and should
    2522              :  * still be pinned and locked.
    2523              :  *
    2524              :  * Note: This function scribbles on the 'frozen' array.
    2525              :  *
    2526              :  * Note: This is called in a critical section, so careful what you do here.
    2527              :  */
    2528              : void
    2529       115604 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
    2530              :                           Buffer vmbuffer, uint8 vmflags,
    2531              :                           TransactionId conflict_xid,
    2532              :                           bool cleanup_lock,
    2533              :                           PruneReason reason,
    2534              :                           HeapTupleFreeze *frozen, int nfrozen,
    2535              :                           OffsetNumber *redirected, int nredirected,
    2536              :                           OffsetNumber *dead, int ndead,
    2537              :                           OffsetNumber *unused, int nunused)
    2538              : {
    2539              :     xl_heap_prune xlrec;
    2540              :     XLogRecPtr  recptr;
    2541              :     uint8       info;
    2542              :     uint8       regbuf_flags_heap;
    2543              : 
    2544       115604 :     Page        heap_page = BufferGetPage(buffer);
    2545              : 
    2546              :     /* The following local variables hold data registered in the WAL record: */
    2547              :     xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
    2548              :     xlhp_freeze_plans freeze_plans;
    2549              :     xlhp_prune_items redirect_items;
    2550              :     xlhp_prune_items dead_items;
    2551              :     xlhp_prune_items unused_items;
    2552              :     OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
    2553       115604 :     bool        do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
    2554       115604 :     bool        do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
    2555       115604 :     bool        heap_fpi_allowed = true;
    2556              : 
    2557              :     Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
    2558              : 
    2559       115604 :     xlrec.flags = 0;
    2560       115604 :     regbuf_flags_heap = REGBUF_STANDARD;
    2561              : 
    2562              :     /*
    2563              :      * We can avoid an FPI of the heap page if the only modification we are
    2564              :      * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
    2565              :      * disabled.
    2566              :      *
    2567              :      * However, if the page has never been WAL-logged (LSN is invalid), we
    2568              :      * must force an FPI regardless.  This can happen when another backend
    2569              :      * extends the heap, initializes the page, and then fails before WAL-
    2570              :      * logging it.  Since heap extension is not WAL-logged, recovery might try
    2571              :      * to replay our record and find that the page isn't initialized, which
    2572              :      * would cause a PANIC.
    2573              :      */
    2574       115604 :     if (!XLogRecPtrIsValid(PageGetLSN(heap_page)))
    2575            0 :         regbuf_flags_heap |= REGBUF_FORCE_IMAGE;
    2576       115604 :     else if (!do_prune && nfrozen == 0 && (!do_set_vm || !XLogHintBitIsNeeded()))
    2577              :     {
    2578         1128 :         regbuf_flags_heap |= REGBUF_NO_IMAGE;
    2579         1128 :         heap_fpi_allowed = false;
    2580              :     }
    2581              : 
    2582              :     /*
    2583              :      * Prepare data for the buffer.  The arrays are not actually in the
    2584              :      * buffer, but we pretend that they are.  When XLogInsert stores a full
    2585              :      * page image, the arrays can be omitted.
    2586              :      */
    2587       115604 :     XLogBeginInsert();
    2588       115604 :     XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
    2589              : 
    2590       115604 :     if (do_set_vm)
    2591        56647 :         XLogRegisterBuffer(1, vmbuffer, 0);
    2592              : 
    2593       115604 :     if (nfrozen > 0)
    2594              :     {
    2595              :         int         nplans;
    2596              : 
    2597        24095 :         xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
    2598              : 
    2599              :         /*
    2600              :          * Prepare deduplicated representation for use in the WAL record. This
    2601              :          * destructively sorts frozen tuples array in-place.
    2602              :          */
    2603        24095 :         nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
    2604              : 
    2605        24095 :         freeze_plans.nplans = nplans;
    2606        24095 :         XLogRegisterBufData(0, &freeze_plans,
    2607              :                             offsetof(xlhp_freeze_plans, plans));
    2608        24095 :         XLogRegisterBufData(0, plans,
    2609              :                             sizeof(xlhp_freeze_plan) * nplans);
    2610              :     }
    2611       115604 :     if (nredirected > 0)
    2612              :     {
    2613        17146 :         xlrec.flags |= XLHP_HAS_REDIRECTIONS;
    2614              : 
    2615        17146 :         redirect_items.ntargets = nredirected;
    2616        17146 :         XLogRegisterBufData(0, &redirect_items,
    2617              :                             offsetof(xlhp_prune_items, data));
    2618        17146 :         XLogRegisterBufData(0, redirected,
    2619              :                             sizeof(OffsetNumber[2]) * nredirected);
    2620              :     }
    2621       115604 :     if (ndead > 0)
    2622              :     {
    2623        44524 :         xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
    2624              : 
    2625        44524 :         dead_items.ntargets = ndead;
    2626        44524 :         XLogRegisterBufData(0, &dead_items,
    2627              :                             offsetof(xlhp_prune_items, data));
    2628        44524 :         XLogRegisterBufData(0, dead,
    2629              :                             sizeof(OffsetNumber) * ndead);
    2630              :     }
    2631       115604 :     if (nunused > 0)
    2632              :     {
    2633        27638 :         xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
    2634              : 
    2635        27638 :         unused_items.ntargets = nunused;
    2636        27638 :         XLogRegisterBufData(0, &unused_items,
    2637              :                             offsetof(xlhp_prune_items, data));
    2638        27638 :         XLogRegisterBufData(0, unused,
    2639              :                             sizeof(OffsetNumber) * nunused);
    2640              :     }
    2641       115604 :     if (nfrozen > 0)
    2642        24095 :         XLogRegisterBufData(0, frz_offsets,
    2643              :                             sizeof(OffsetNumber) * nfrozen);
    2644              : 
    2645              :     /*
    2646              :      * Prepare the main xl_heap_prune record.  We already set the XLHP_HAS_*
    2647              :      * flag above.
    2648              :      */
    2649       115604 :     if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
    2650              :     {
    2651        56647 :         xlrec.flags |= XLHP_VM_ALL_VISIBLE;
    2652        56647 :         if (vmflags & VISIBILITYMAP_ALL_FROZEN)
    2653        40393 :             xlrec.flags |= XLHP_VM_ALL_FROZEN;
    2654              :     }
    2655       115604 :     if (RelationIsAccessibleInLogicalDecoding(relation))
    2656          599 :         xlrec.flags |= XLHP_IS_CATALOG_REL;
    2657       115604 :     if (TransactionIdIsValid(conflict_xid))
    2658        89983 :         xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
    2659       115604 :     if (cleanup_lock)
    2660       101678 :         xlrec.flags |= XLHP_CLEANUP_LOCK;
    2661              :     else
    2662              :     {
    2663              :         Assert(nredirected == 0 && ndead == 0);
    2664              :         /* also, any items in 'unused' must've been LP_DEAD previously */
    2665              :     }
    2666       115604 :     XLogRegisterData(&xlrec, SizeOfHeapPrune);
    2667       115604 :     if (TransactionIdIsValid(conflict_xid))
    2668        89983 :         XLogRegisterData(&conflict_xid, sizeof(TransactionId));
    2669              : 
    2670       115604 :     switch (reason)
    2671              :     {
    2672        44718 :         case PRUNE_ON_ACCESS:
    2673        44718 :             info = XLOG_HEAP2_PRUNE_ON_ACCESS;
    2674        44718 :             break;
    2675        56960 :         case PRUNE_VACUUM_SCAN:
    2676        56960 :             info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
    2677        56960 :             break;
    2678        13926 :         case PRUNE_VACUUM_CLEANUP:
    2679        13926 :             info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
    2680        13926 :             break;
    2681            0 :         default:
    2682            0 :             elog(ERROR, "unrecognized prune reason: %d", (int) reason);
    2683              :             break;
    2684              :     }
    2685       115604 :     recptr = XLogInsert(RM_HEAP2_ID, info);
    2686              : 
    2687       115604 :     if (do_set_vm)
    2688              :     {
    2689              :         Assert(BufferIsDirty(vmbuffer));
    2690        56647 :         PageSetLSN(BufferGetPage(vmbuffer), recptr);
    2691              :     }
    2692              : 
    2693              :     /*
    2694              :      * If we explicitly skip an FPI, we must not stamp the heap page with this
    2695              :      * record's LSN. Recovery skips records <= the stamped LSN, so this could
    2696              :      * lead to skipping an earlier FPI needed to repair a torn page.
    2697              :      */
    2698       115604 :     if (heap_fpi_allowed)
    2699              :     {
    2700              :         Assert(BufferIsDirty(buffer));
    2701       114476 :         PageSetLSN(heap_page, recptr);
    2702              :     }
    2703       115604 : }
        

Generated by: LCOV version 2.0-1