LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 198 205 96.6 %
Date: 2019-11-15 23:07:02 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pruneheap.c
       4             :  *    heap page pruning and HOT-chain management code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/pruneheap.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/heapam.h"
      18             : #include "access/heapam_xlog.h"
      19             : #include "access/htup_details.h"
      20             : #include "access/transam.h"
      21             : #include "access/xlog.h"
      22             : #include "catalog/catalog.h"
      23             : #include "miscadmin.h"
      24             : #include "pgstat.h"
      25             : #include "storage/bufmgr.h"
      26             : #include "utils/rel.h"
      27             : #include "utils/snapmgr.h"
      28             : 
      29             : /* Working data for heap_page_prune and subroutines */
      30             : typedef struct
      31             : {
      32             :     TransactionId new_prune_xid;    /* new prune hint value for page */
      33             :     TransactionId latestRemovedXid; /* latest xid to be removed by this prune */
      34             :     int         nredirected;    /* numbers of entries in arrays below */
      35             :     int         ndead;
      36             :     int         nunused;
      37             :     /* arrays that accumulate indexes of items to be changed */
      38             :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      39             :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      40             :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      41             :     /* marked[i] is true if item i is entered in one of the above arrays */
      42             :     bool        marked[MaxHeapTuplesPerPage + 1];
      43             : } PruneState;
      44             : 
      45             : /* Local functions */
      46             : static int  heap_prune_chain(Relation relation, Buffer buffer,
      47             :                              OffsetNumber rootoffnum,
      48             :                              TransactionId OldestXmin,
      49             :                              PruneState *prstate);
      50             : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
      51             : static void heap_prune_record_redirect(PruneState *prstate,
      52             :                                        OffsetNumber offnum, OffsetNumber rdoffnum);
      53             : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
      54             : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
      55             : 
      56             : 
      57             : /*
      58             :  * Optionally prune and repair fragmentation in the specified page.
      59             :  *
      60             :  * This is an opportunistic function.  It will perform housekeeping
      61             :  * only if the page heuristically looks like a candidate for pruning and we
      62             :  * can acquire buffer cleanup lock without blocking.
      63             :  *
      64             :  * Note: this is called quite often.  It's important that it fall out quickly
      65             :  * if there's not any use in pruning.
      66             :  *
      67             :  * Caller must have pin on the buffer, and must *not* have a lock on it.
      68             :  *
      69             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
      70             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
      71             :  */
      72             : void
      73    13488166 : heap_page_prune_opt(Relation relation, Buffer buffer)
      74             : {
      75    13488166 :     Page        page = BufferGetPage(buffer);
      76             :     Size        minfree;
      77             :     TransactionId OldestXmin;
      78             : 
      79             :     /*
      80             :      * We can't write WAL in recovery mode, so there's no point trying to
      81             :      * clean the page. The master will likely issue a cleaning WAL record soon
      82             :      * anyway, so this is no particular loss.
      83             :      */
      84    13488166 :     if (RecoveryInProgress())
      85       52168 :         return;
      86             : 
      87             :     /*
      88             :      * Use the appropriate xmin horizon for this relation. If it's a proper
      89             :      * catalog relation or a user defined, additional, catalog relation, we
      90             :      * need to use the horizon that includes slots, otherwise the data-only
      91             :      * horizon can be used. Note that the toast relation of user defined
      92             :      * relations are *not* considered catalog relations.
      93             :      *
      94             :      * It is OK to apply the old snapshot limit before acquiring the cleanup
      95             :      * lock because the worst that can happen is that we are not quite as
      96             :      * aggressive about the cleanup (by however many transaction IDs are
      97             :      * consumed between this point and acquiring the lock).  This allows us to
      98             :      * save significant overhead in the case where the page is found not to be
      99             :      * prunable.
     100             :      */
     101    14609836 :     if (IsCatalogRelation(relation) ||
     102     1199300 :         RelationIsAccessibleInLogicalDecoding(relation))
     103    12262160 :         OldestXmin = RecentGlobalXmin;
     104             :     else
     105     1173838 :         OldestXmin =
     106     1173838 :             TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin,
     107             :                                                 relation);
     108             : 
     109             :     Assert(TransactionIdIsValid(OldestXmin));
     110             : 
     111             :     /*
     112             :      * Let's see if we really need pruning.
     113             :      *
     114             :      * Forget it if page is not hinted to contain something prunable that's
     115             :      * older than OldestXmin.
     116             :      */
     117    13435998 :     if (!PageIsPrunable(page, OldestXmin))
     118    12168838 :         return;
     119             : 
     120             :     /*
     121             :      * We prune when a previous UPDATE failed to find enough space on the page
     122             :      * for a new tuple version, or when free space falls below the relation's
     123             :      * fill-factor target (but not less than 10%).
     124             :      *
     125             :      * Checking free space here is questionable since we aren't holding any
     126             :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     127             :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     128             :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     129             :      * important than sometimes getting a wrong answer in what is after all
     130             :      * just a heuristic estimate.
     131             :      */
     132     1267160 :     minfree = RelationGetTargetPageFreeSpace(relation,
     133             :                                              HEAP_DEFAULT_FILLFACTOR);
     134     1267160 :     minfree = Max(minfree, BLCKSZ / 10);
     135             : 
     136     1267160 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     137             :     {
     138             :         /* OK, try to get exclusive buffer lock */
     139       66472 :         if (!ConditionalLockBufferForCleanup(buffer))
     140         146 :             return;
     141             : 
     142             :         /*
     143             :          * Now that we have buffer lock, get accurate information about the
     144             :          * page's free space, and recheck the heuristic about whether to
     145             :          * prune. (We needn't recheck PageIsPrunable, since no one else could
     146             :          * have pruned while we hold pin.)
     147             :          */
     148       66326 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     149             :         {
     150       66326 :             TransactionId ignore = InvalidTransactionId;    /* return value not
     151             :                                                              * needed */
     152             : 
     153             :             /* OK to prune */
     154       66326 :             (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
     155             :         }
     156             : 
     157             :         /* And release buffer lock */
     158       66326 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     159             :     }
     160             : }
     161             : 
     162             : 
     163             : /*
     164             :  * Prune and repair fragmentation in the specified page.
     165             :  *
     166             :  * Caller must have pin and buffer cleanup lock on the page.
     167             :  *
     168             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
     169             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
     170             :  *
     171             :  * If report_stats is true then we send the number of reclaimed heap-only
     172             :  * tuples to pgstats.  (This must be false during vacuum, since vacuum will
     173             :  * send its own new total to pgstats, and we don't want this delta applied
     174             :  * on top of that.)
     175             :  *
     176             :  * Returns the number of tuples deleted from the page and sets
     177             :  * latestRemovedXid.
     178             :  */
     179             : int
     180      237012 : heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
     181             :                 bool report_stats, TransactionId *latestRemovedXid)
     182             : {
     183      237012 :     int         ndeleted = 0;
     184      237012 :     Page        page = BufferGetPage(buffer);
     185             :     OffsetNumber offnum,
     186             :                 maxoff;
     187             :     PruneState  prstate;
     188             : 
     189             :     /*
     190             :      * Our strategy is to scan the page and make lists of items to change,
     191             :      * then apply the changes within a critical section.  This keeps as much
     192             :      * logic as possible out of the critical section, and also ensures that
     193             :      * WAL replay will work the same as the normal case.
     194             :      *
     195             :      * First, initialize the new pd_prune_xid value to zero (indicating no
     196             :      * prunable tuples).  If we find any tuples which may soon become
     197             :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     198             :      * initialize the rest of our working state.
     199             :      */
     200      237012 :     prstate.new_prune_xid = InvalidTransactionId;
     201      237012 :     prstate.latestRemovedXid = *latestRemovedXid;
     202      237012 :     prstate.nredirected = prstate.ndead = prstate.nunused = 0;
     203      237012 :     memset(prstate.marked, 0, sizeof(prstate.marked));
     204             : 
     205             :     /* Scan the page */
     206      237012 :     maxoff = PageGetMaxOffsetNumber(page);
     207    16597098 :     for (offnum = FirstOffsetNumber;
     208             :          offnum <= maxoff;
     209    16123074 :          offnum = OffsetNumberNext(offnum))
     210             :     {
     211             :         ItemId      itemid;
     212             : 
     213             :         /* Ignore items already processed as part of an earlier chain */
     214    16123074 :         if (prstate.marked[offnum])
     215      185928 :             continue;
     216             : 
     217             :         /* Nothing to do if slot is empty or already dead */
     218    15937146 :         itemid = PageGetItemId(page, offnum);
     219    15937146 :         if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
     220     1657188 :             continue;
     221             : 
     222             :         /* Process this item or chain of items */
     223    14279958 :         ndeleted += heap_prune_chain(relation, buffer, offnum,
     224             :                                      OldestXmin,
     225             :                                      &prstate);
     226             :     }
     227             : 
     228             :     /* Any error while applying the changes is critical */
     229      237012 :     START_CRIT_SECTION();
     230             : 
     231             :     /* Have we found any prunable items? */
     232      237012 :     if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
     233             :     {
     234             :         /*
     235             :          * Apply the planned item changes, then repair page fragmentation, and
     236             :          * update the page's hint bit about whether it has free line pointers.
     237             :          */
     238       76054 :         heap_page_prune_execute(buffer,
     239             :                                 prstate.redirected, prstate.nredirected,
     240             :                                 prstate.nowdead, prstate.ndead,
     241             :                                 prstate.nowunused, prstate.nunused);
     242             : 
     243             :         /*
     244             :          * Update the page's pd_prune_xid field to either zero, or the lowest
     245             :          * XID of any soon-prunable tuple.
     246             :          */
     247       76054 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     248             : 
     249             :         /*
     250             :          * Also clear the "page is full" flag, since there's no point in
     251             :          * repeating the prune/defrag process until something else happens to
     252             :          * the page.
     253             :          */
     254       76054 :         PageClearFull(page);
     255             : 
     256       76054 :         MarkBufferDirty(buffer);
     257             : 
     258             :         /*
     259             :          * Emit a WAL XLOG_HEAP2_CLEAN record showing what we did
     260             :          */
     261      152108 :         if (RelationNeedsWAL(relation))
     262             :         {
     263             :             XLogRecPtr  recptr;
     264             : 
     265       75936 :             recptr = log_heap_clean(relation, buffer,
     266             :                                     prstate.redirected, prstate.nredirected,
     267             :                                     prstate.nowdead, prstate.ndead,
     268             :                                     prstate.nowunused, prstate.nunused,
     269             :                                     prstate.latestRemovedXid);
     270             : 
     271       75936 :             PageSetLSN(BufferGetPage(buffer), recptr);
     272             :         }
     273             :     }
     274             :     else
     275             :     {
     276             :         /*
     277             :          * If we didn't prune anything, but have found a new value for the
     278             :          * pd_prune_xid field, update it and mark the buffer dirty. This is
     279             :          * treated as a non-WAL-logged hint.
     280             :          *
     281             :          * Also clear the "page is full" flag if it is set, since there's no
     282             :          * point in repeating the prune/defrag process until something else
     283             :          * happens to the page.
     284             :          */
     285      321848 :         if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
     286      160890 :             PageIsFull(page))
     287             :         {
     288         132 :             ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     289         132 :             PageClearFull(page);
     290         132 :             MarkBufferDirtyHint(buffer, true);
     291             :         }
     292             :     }
     293             : 
     294      237012 :     END_CRIT_SECTION();
     295             : 
     296             :     /*
     297             :      * If requested, report the number of tuples reclaimed to pgstats. This is
     298             :      * ndeleted minus ndead, because we don't want to count a now-DEAD root
     299             :      * item as a deletion for this purpose.
     300             :      */
     301      237012 :     if (report_stats && ndeleted > prstate.ndead)
     302       33202 :         pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
     303             : 
     304      237012 :     *latestRemovedXid = prstate.latestRemovedXid;
     305             : 
     306             :     /*
     307             :      * XXX Should we update the FSM information of this page ?
     308             :      *
     309             :      * There are two schools of thought here. We may not want to update FSM
     310             :      * information so that the page is not used for unrelated UPDATEs/INSERTs
     311             :      * and any free space in this page will remain available for further
     312             :      * UPDATEs in *this* page, thus improving chances for doing HOT updates.
     313             :      *
     314             :      * But for a large table and where a page does not receive further UPDATEs
     315             :      * for a long time, we might waste this space by not updating the FSM
     316             :      * information. The relation may get extended and fragmented further.
     317             :      *
     318             :      * One possibility is to leave "fillfactor" worth of space in this page
     319             :      * and update FSM with the remaining space.
     320             :      */
     321             : 
     322      237012 :     return ndeleted;
     323             : }
     324             : 
     325             : 
     326             : /*
     327             :  * Prune specified line pointer or a HOT chain originating at line pointer.
     328             :  *
     329             :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
     330             :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
     331             :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
     332             :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
     333             :  * DEAD, the OldestXmin test is just too coarse to detect it.
     334             :  *
     335             :  * The root line pointer is redirected to the tuple immediately after the
     336             :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
     337             :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
     338             :  * tuple, which we treat as a chain of length 1.)
     339             :  *
     340             :  * OldestXmin is the cutoff XID used to identify dead tuples.
     341             :  *
     342             :  * We don't actually change the page here, except perhaps for hint-bit updates
     343             :  * caused by HeapTupleSatisfiesVacuum.  We just add entries to the arrays in
     344             :  * prstate showing the changes to be made.  Items to be redirected are added
     345             :  * to the redirected[] array (two entries per redirection); items to be set to
     346             :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
     347             :  * state are added to nowunused[].
     348             :  *
     349             :  * Returns the number of tuples (to be) deleted from the page.
     350             :  */
     351             : static int
     352    14279958 : heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
     353             :                  TransactionId OldestXmin,
     354             :                  PruneState *prstate)
     355             : {
     356    14279958 :     int         ndeleted = 0;
     357    14279958 :     Page        dp = (Page) BufferGetPage(buffer);
     358    14279958 :     TransactionId priorXmax = InvalidTransactionId;
     359             :     ItemId      rootlp;
     360             :     HeapTupleHeader htup;
     361    14279958 :     OffsetNumber latestdead = InvalidOffsetNumber,
     362    14279958 :                 maxoff = PageGetMaxOffsetNumber(dp),
     363             :                 offnum;
     364             :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
     365    14279958 :     int         nchain = 0,
     366             :                 i;
     367             :     HeapTupleData tup;
     368             : 
     369    14279958 :     tup.t_tableOid = RelationGetRelid(relation);
     370             : 
     371    14279958 :     rootlp = PageGetItemId(dp, rootoffnum);
     372             : 
     373             :     /*
     374             :      * If it's a heap-only tuple, then it is not the start of a HOT chain.
     375             :      */
     376    14279958 :     if (ItemIdIsNormal(rootlp))
     377             :     {
     378    13843938 :         htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
     379             : 
     380    13843938 :         tup.t_data = htup;
     381    13843938 :         tup.t_len = ItemIdGetLength(rootlp);
     382    13843938 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
     383             : 
     384    13843938 :         if (HeapTupleHeaderIsHeapOnly(htup))
     385             :         {
     386             :             /*
     387             :              * If the tuple is DEAD and doesn't chain to anything else, mark
     388             :              * it unused immediately.  (If it does chain, we can only remove
     389             :              * it as part of pruning its chain.)
     390             :              *
     391             :              * We need this primarily to handle aborted HOT updates, that is,
     392             :              * XMIN_INVALID heap-only tuples.  Those might not be linked to by
     393             :              * any chain, since the parent tuple might be re-updated before
     394             :              * any pruning occurs.  So we have to be able to reap them
     395             :              * separately from chain-pruning.  (Note that
     396             :              * HeapTupleHeaderIsHotUpdated will never return true for an
     397             :              * XMIN_INVALID tuple, so this code will work even when there were
     398             :              * sequential updates within the aborted transaction.)
     399             :              *
     400             :              * Note that we might first arrive at a dead heap-only tuple
     401             :              * either here or while following a chain below.  Whichever path
     402             :              * gets there first will mark the tuple unused.
     403             :              */
     404      422712 :             if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
     405        8072 :                 == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
     406             :             {
     407        2660 :                 heap_prune_record_unused(prstate, rootoffnum);
     408        2660 :                 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     409             :                                                        &prstate->latestRemovedXid);
     410        2660 :                 ndeleted++;
     411             :             }
     412             : 
     413             :             /* Nothing more to do */
     414      422712 :             return ndeleted;
     415             :         }
     416             :     }
     417             : 
     418             :     /* Start from the root tuple */
     419    13857246 :     offnum = rootoffnum;
     420             : 
     421             :     /* while not end of the chain */
     422             :     for (;;)
     423      597166 :     {
     424             :         ItemId      lp;
     425             :         bool        tupdead,
     426             :                     recent_dead;
     427             : 
     428             :         /* Some sanity checks */
     429    14454412 :         if (offnum < FirstOffsetNumber || offnum > maxoff)
     430             :             break;
     431             : 
     432             :         /* If item is already processed, stop --- it must not be same chain */
     433    14454412 :         if (prstate->marked[offnum])
     434        1366 :             break;
     435             : 
     436    14453046 :         lp = PageGetItemId(dp, offnum);
     437             : 
     438             :         /* Unused item obviously isn't part of the chain */
     439    14453046 :         if (!ItemIdIsUsed(lp))
     440           0 :             break;
     441             : 
     442             :         /*
     443             :          * If we are looking at the redirected root line pointer, jump to the
     444             :          * first normal tuple in the chain.  If we find a redirect somewhere
     445             :          * else, stop --- it must not be same chain.
     446             :          */
     447    14453046 :         if (ItemIdIsRedirected(lp))
     448             :         {
     449      436020 :             if (nchain > 0)
     450           0 :                 break;          /* not at start of chain */
     451      436020 :             chainitems[nchain++] = offnum;
     452      436020 :             offnum = ItemIdGetRedirect(rootlp);
     453      436020 :             continue;
     454             :         }
     455             : 
     456             :         /*
     457             :          * Likewise, a dead line pointer can't be part of the chain. (We
     458             :          * already eliminated the case of dead root tuple outside this
     459             :          * function.)
     460             :          */
     461    14017026 :         if (ItemIdIsDead(lp))
     462           0 :             break;
     463             : 
     464             :         Assert(ItemIdIsNormal(lp));
     465    14017026 :         htup = (HeapTupleHeader) PageGetItem(dp, lp);
     466             : 
     467    14017026 :         tup.t_data = htup;
     468    14017026 :         tup.t_len = ItemIdGetLength(lp);
     469    14017026 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
     470             : 
     471             :         /*
     472             :          * Check the tuple XMIN against prior XMAX, if any
     473             :          */
     474    14177190 :         if (TransactionIdIsValid(priorXmax) &&
     475      160164 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
     476           0 :             break;
     477             : 
     478             :         /*
     479             :          * OK, this tuple is indeed a member of the chain.
     480             :          */
     481    14017026 :         chainitems[nchain++] = offnum;
     482             : 
     483             :         /*
     484             :          * Check tuple's visibility status.
     485             :          */
     486    14017026 :         tupdead = recent_dead = false;
     487             : 
     488    14017026 :         switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
     489             :         {
     490             :             case HEAPTUPLE_DEAD:
     491     1419616 :                 tupdead = true;
     492     1419616 :                 break;
     493             : 
     494             :             case HEAPTUPLE_RECENTLY_DEAD:
     495      717680 :                 recent_dead = true;
     496             : 
     497             :                 /*
     498             :                  * This tuple may soon become DEAD.  Update the hint field so
     499             :                  * that the page is reconsidered for pruning in future.
     500             :                  */
     501      717680 :                 heap_prune_record_prunable(prstate,
     502     1435360 :                                            HeapTupleHeaderGetUpdateXid(htup));
     503      717680 :                 break;
     504             : 
     505             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     506             : 
     507             :                 /*
     508             :                  * This tuple may soon become DEAD.  Update the hint field so
     509             :                  * that the page is reconsidered for pruning in future.
     510             :                  */
     511       12608 :                 heap_prune_record_prunable(prstate,
     512       25216 :                                            HeapTupleHeaderGetUpdateXid(htup));
     513       12608 :                 break;
     514             : 
     515             :             case HEAPTUPLE_LIVE:
     516             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     517             : 
     518             :                 /*
     519             :                  * If we wanted to optimize for aborts, we might consider
     520             :                  * marking the page prunable when we see INSERT_IN_PROGRESS.
     521             :                  * But we don't.  See related decisions about when to mark the
     522             :                  * page prunable in heapam.c.
     523             :                  */
     524    11867122 :                 break;
     525             : 
     526             :             default:
     527           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     528             :                 break;
     529             :         }
     530             : 
     531             :         /*
     532             :          * Remember the last DEAD tuple seen.  We will advance past
     533             :          * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
     534             :          * but we can't advance past anything else.  (XXX is it really worth
     535             :          * continuing to scan beyond RECENTLY_DEAD?  The case where we will
     536             :          * find another DEAD tuple is a fairly unusual corner case.)
     537             :          */
     538    14017026 :         if (tupdead)
     539             :         {
     540     1419616 :             latestdead = offnum;
     541     1419616 :             HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     542             :                                                    &prstate->latestRemovedXid);
     543             :         }
     544    12597410 :         else if (!recent_dead)
     545    11879730 :             break;
     546             : 
     547             :         /*
     548             :          * If the tuple is not HOT-updated, then we are at the end of this
     549             :          * HOT-update chain.
     550             :          */
     551     2137296 :         if (!HeapTupleHeaderIsHotUpdated(htup))
     552             :             break;
     553             : 
     554             :         /* HOT implies it can't have moved to different partition */
     555             :         Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
     556             : 
     557             :         /*
     558             :          * Advance to next chain member.
     559             :          */
     560             :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
     561             :                BufferGetBlockNumber(buffer));
     562      161146 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     563      161146 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     564             :     }
     565             : 
     566             :     /*
     567             :      * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
     568             :      * the DEAD tuples at the start of the chain are removed and the root line
     569             :      * pointer is appropriately redirected.
     570             :      */
     571    13857246 :     if (OffsetNumberIsValid(latestdead))
     572             :     {
     573             :         /*
     574             :          * Mark as unused each intermediate item that we are able to remove
     575             :          * from the chain.
     576             :          *
     577             :          * When the previous item is the last dead tuple seen, we are at the
     578             :          * right candidate for redirection.
     579             :          */
     580     1459130 :         for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
     581             :         {
     582       80058 :             heap_prune_record_unused(prstate, chainitems[i]);
     583       80058 :             ndeleted++;
     584             :         }
     585             : 
     586             :         /*
     587             :          * If the root entry had been a normal tuple, we are deleting it, so
     588             :          * count it in the result.  But changing a redirect (even to DEAD
     589             :          * state) doesn't count.
     590             :          */
     591     1379072 :         if (ItemIdIsNormal(rootlp))
     592     1339558 :             ndeleted++;
     593             : 
     594             :         /*
     595             :          * If the DEAD tuple is at the end of the chain, the entire chain is
     596             :          * dead and the root line pointer can be marked dead.  Otherwise just
     597             :          * redirect the root to the correct chain member.
     598             :          */
     599     1379072 :         if (i >= nchain)
     600     1262608 :             heap_prune_record_dead(prstate, rootoffnum);
     601             :         else
     602      116464 :             heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
     603             :     }
     604    12478174 :     else if (nchain < 2 && ItemIdIsRedirected(rootlp))
     605             :     {
     606             :         /*
     607             :          * We found a redirect item that doesn't point to a valid follow-on
     608             :          * item.  This can happen if the loop in heap_page_prune caused us to
     609             :          * visit the dead successor of a redirect item before visiting the
     610             :          * redirect item.  We can clean up by setting the redirect item to
     611             :          * DEAD state.
     612             :          */
     613         384 :         heap_prune_record_dead(prstate, rootoffnum);
     614             :     }
     615             : 
     616    13857246 :     return ndeleted;
     617             : }
     618             : 
     619             : /* Record lowest soon-prunable XID */
     620             : static void
     621      730288 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
     622             : {
     623             :     /*
     624             :      * This should exactly match the PageSetPrunable macro.  We can't store
     625             :      * directly into the page header yet, so we update working state.
     626             :      */
     627             :     Assert(TransactionIdIsNormal(xid));
     628     1445938 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
     629      715650 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
     630       15680 :         prstate->new_prune_xid = xid;
     631      730288 : }
     632             : 
     633             : /* Record line pointer to be redirected */
     634             : static void
     635      116464 : heap_prune_record_redirect(PruneState *prstate,
     636             :                            OffsetNumber offnum, OffsetNumber rdoffnum)
     637             : {
     638             :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
     639      116464 :     prstate->redirected[prstate->nredirected * 2] = offnum;
     640      116464 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
     641      116464 :     prstate->nredirected++;
     642             :     Assert(!prstate->marked[offnum]);
     643      116464 :     prstate->marked[offnum] = true;
     644             :     Assert(!prstate->marked[rdoffnum]);
     645      116464 :     prstate->marked[rdoffnum] = true;
     646      116464 : }
     647             : 
     648             : /* Record line pointer to be marked dead */
     649             : static void
     650     1262992 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
     651             : {
     652             :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
     653     1262992 :     prstate->nowdead[prstate->ndead] = offnum;
     654     1262992 :     prstate->ndead++;
     655             :     Assert(!prstate->marked[offnum]);
     656     1262992 :     prstate->marked[offnum] = true;
     657     1262992 : }
     658             : 
     659             : /* Record line pointer to be marked unused */
     660             : static void
     661       82718 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
     662             : {
     663             :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
     664       82718 :     prstate->nowunused[prstate->nunused] = offnum;
     665       82718 :     prstate->nunused++;
     666             :     Assert(!prstate->marked[offnum]);
     667       82718 :     prstate->marked[offnum] = true;
     668       82718 : }
     669             : 
     670             : 
     671             : /*
     672             :  * Perform the actual page changes needed by heap_page_prune.
     673             :  * It is expected that the caller has suitable pin and lock on the
     674             :  * buffer, and is inside a critical section.
     675             :  *
     676             :  * This is split out because it is also used by heap_xlog_clean()
     677             :  * to replay the WAL record when needed after a crash.  Note that the
     678             :  * arguments are identical to those of log_heap_clean().
     679             :  */
     680             : void
     681       77482 : heap_page_prune_execute(Buffer buffer,
     682             :                         OffsetNumber *redirected, int nredirected,
     683             :                         OffsetNumber *nowdead, int ndead,
     684             :                         OffsetNumber *nowunused, int nunused)
     685             : {
     686       77482 :     Page        page = (Page) BufferGetPage(buffer);
     687             :     OffsetNumber *offnum;
     688             :     int         i;
     689             : 
     690             :     /* Update all redirected line pointers */
     691       77482 :     offnum = redirected;
     692      213946 :     for (i = 0; i < nredirected; i++)
     693             :     {
     694      136464 :         OffsetNumber fromoff = *offnum++;
     695      136464 :         OffsetNumber tooff = *offnum++;
     696      136464 :         ItemId      fromlp = PageGetItemId(page, fromoff);
     697             : 
     698      136464 :         ItemIdSetRedirect(fromlp, tooff);
     699             :     }
     700             : 
     701             :     /* Update all now-dead line pointers */
     702       77482 :     offnum = nowdead;
     703     1365528 :     for (i = 0; i < ndead; i++)
     704             :     {
     705     1288046 :         OffsetNumber off = *offnum++;
     706     1288046 :         ItemId      lp = PageGetItemId(page, off);
     707             : 
     708     1288046 :         ItemIdSetDead(lp);
     709             :     }
     710             : 
     711             :     /* Update all now-unused line pointers */
     712       77482 :     offnum = nowunused;
     713      185252 :     for (i = 0; i < nunused; i++)
     714             :     {
     715      107770 :         OffsetNumber off = *offnum++;
     716      107770 :         ItemId      lp = PageGetItemId(page, off);
     717             : 
     718      107770 :         ItemIdSetUnused(lp);
     719             :     }
     720             : 
     721             :     /*
     722             :      * Finally, repair any fragmentation, and update the page's hint bit about
     723             :      * whether it has free pointers.
     724             :      */
     725       77482 :     PageRepairFragmentation(page);
     726       77482 : }
     727             : 
     728             : 
     729             : /*
     730             :  * For all items in this page, find their respective root line pointers.
     731             :  * If item k is part of a HOT-chain with root at item j, then we set
     732             :  * root_offsets[k - 1] = j.
     733             :  *
     734             :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
     735             :  * We zero out all unused entries.
     736             :  *
     737             :  * The function must be called with at least share lock on the buffer, to
     738             :  * prevent concurrent prune operations.
     739             :  *
     740             :  * Note: The information collected here is valid only as long as the caller
     741             :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
     742             :  * and reused by a completely unrelated tuple.
     743             :  */
     744             : void
     745      241084 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
     746             : {
     747             :     OffsetNumber offnum,
     748             :                 maxoff;
     749             : 
     750      241084 :     MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
     751             : 
     752      241084 :     maxoff = PageGetMaxOffsetNumber(page);
     753    17183030 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
     754             :     {
     755    16941946 :         ItemId      lp = PageGetItemId(page, offnum);
     756             :         HeapTupleHeader htup;
     757             :         OffsetNumber nextoffnum;
     758             :         TransactionId priorXmax;
     759             : 
     760             :         /* skip unused and dead items */
     761    16941946 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
     762       30396 :             continue;
     763             : 
     764    16911550 :         if (ItemIdIsNormal(lp))
     765             :         {
     766    16901000 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     767             : 
     768             :             /*
     769             :              * Check if this tuple is part of a HOT-chain rooted at some other
     770             :              * tuple. If so, skip it for now; we'll process it when we find
     771             :              * its root.
     772             :              */
     773    16901000 :             if (HeapTupleHeaderIsHeapOnly(htup))
     774       12372 :                 continue;
     775             : 
     776             :             /*
     777             :              * This is either a plain tuple or the root of a HOT-chain.
     778             :              * Remember it in the mapping.
     779             :              */
     780    16888628 :             root_offsets[offnum - 1] = offnum;
     781             : 
     782             :             /* If it's not the start of a HOT-chain, we're done with it */
     783    16888628 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     784    16886862 :                 continue;
     785             : 
     786             :             /* Set up to scan the HOT-chain */
     787        1766 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     788        1766 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     789             :         }
     790             :         else
     791             :         {
     792             :             /* Must be a redirect item. We do not set its root_offsets entry */
     793             :             Assert(ItemIdIsRedirected(lp));
     794             :             /* Set up to scan the HOT-chain */
     795       10550 :             nextoffnum = ItemIdGetRedirect(lp);
     796       10550 :             priorXmax = InvalidTransactionId;
     797             :         }
     798             : 
     799             :         /*
     800             :          * Now follow the HOT-chain and collect other tuples in the chain.
     801             :          *
     802             :          * Note: Even though this is a nested loop, the complexity of the
     803             :          * function is O(N) because a tuple in the page should be visited not
     804             :          * more than twice, once in the outer loop and once in HOT-chain
     805             :          * chases.
     806             :          */
     807             :         for (;;)
     808             :         {
     809       12428 :             lp = PageGetItemId(page, nextoffnum);
     810             : 
     811             :             /* Check for broken chains */
     812       12372 :             if (!ItemIdIsNormal(lp))
     813           0 :                 break;
     814             : 
     815       12372 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     816             : 
     817       14194 :             if (TransactionIdIsValid(priorXmax) &&
     818        1822 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
     819           0 :                 break;
     820             : 
     821             :             /* Remember the root line pointer for this item */
     822       12372 :             root_offsets[nextoffnum - 1] = offnum;
     823             : 
     824             :             /* Advance to next chain member, if any */
     825       12372 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     826             :                 break;
     827             : 
     828             :             /* HOT implies it can't have moved to different partition */
     829             :             Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
     830             : 
     831          56 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     832          56 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     833             :         }
     834             :     }
     835      241084 : }

Generated by: LCOV version 1.13