LCOV - code coverage report
Current view: top level - src/backend/access/spgist - spgvacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18beta1 Lines: 301 328 91.8 %
Date: 2025-05-12 21:15:27 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spgvacuum.c
       4             :  *    vacuum for SP-GiST
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/spgist/spgvacuum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "access/genam.h"
      19             : #include "access/spgist_private.h"
      20             : #include "access/spgxlog.h"
      21             : #include "access/transam.h"
      22             : #include "access/xloginsert.h"
      23             : #include "commands/vacuum.h"
      24             : #include "miscadmin.h"
      25             : #include "storage/bufmgr.h"
      26             : #include "storage/indexfsm.h"
      27             : #include "storage/lmgr.h"
      28             : #include "storage/read_stream.h"
      29             : #include "utils/snapmgr.h"
      30             : 
      31             : 
      32             : /* Entry in pending-list of TIDs we need to revisit */
      33             : typedef struct spgVacPendingItem
      34             : {
      35             :     ItemPointerData tid;        /* redirection target to visit */
      36             :     bool        done;           /* have we dealt with this? */
      37             :     struct spgVacPendingItem *next; /* list link */
      38             : } spgVacPendingItem;
      39             : 
      40             : /* Local state for vacuum operations */
      41             : typedef struct spgBulkDeleteState
      42             : {
      43             :     /* Parameters passed in to spgvacuumscan */
      44             :     IndexVacuumInfo *info;
      45             :     IndexBulkDeleteResult *stats;
      46             :     IndexBulkDeleteCallback callback;
      47             :     void       *callback_state;
      48             : 
      49             :     /* Additional working state */
      50             :     SpGistState spgstate;       /* for SPGiST operations that need one */
      51             :     spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
      52             :     TransactionId myXmin;       /* for detecting newly-added redirects */
      53             :     BlockNumber lastFilledBlock;    /* last non-deletable block */
      54             : } spgBulkDeleteState;
      55             : 
      56             : 
      57             : /*
      58             :  * Add TID to pendingList, but only if not already present.
      59             :  *
      60             :  * Note that new items are always appended at the end of the list; this
      61             :  * ensures that scans of the list don't miss items added during the scan.
      62             :  */
      63             : static void
      64       51882 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
      65             : {
      66             :     spgVacPendingItem *pitem;
      67             :     spgVacPendingItem **listLink;
      68             : 
      69             :     /* search the list for pre-existing entry */
      70       51882 :     listLink = &bds->pendingList;
      71     4582914 :     while (*listLink != NULL)
      72             :     {
      73     4548062 :         pitem = *listLink;
      74     4548062 :         if (ItemPointerEquals(tid, &pitem->tid))
      75       17030 :             return;             /* already in list, do nothing */
      76     4531032 :         listLink = &pitem->next;
      77             :     }
      78             :     /* not there, so append new entry */
      79       34852 :     pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
      80       34852 :     pitem->tid = *tid;
      81       34852 :     pitem->done = false;
      82       34852 :     pitem->next = NULL;
      83       34852 :     *listLink = pitem;
      84             : }
      85             : 
      86             : /*
      87             :  * Clear pendingList
      88             :  */
      89             : static void
      90         264 : spgClearPendingList(spgBulkDeleteState *bds)
      91             : {
      92             :     spgVacPendingItem *pitem;
      93             :     spgVacPendingItem *nitem;
      94             : 
      95       35116 :     for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
      96             :     {
      97       34852 :         nitem = pitem->next;
      98             :         /* All items in list should have been dealt with */
      99             :         Assert(pitem->done);
     100       34852 :         pfree(pitem);
     101             :     }
     102         264 :     bds->pendingList = NULL;
     103         264 : }
     104             : 
     105             : /*
     106             :  * Vacuum a regular (non-root) leaf page
     107             :  *
     108             :  * We must delete tuples that are targeted for deletion by the VACUUM,
     109             :  * but not move any tuples that are referenced by outside links; we assume
     110             :  * those are the ones that are heads of chains.
     111             :  *
     112             :  * If we find a REDIRECT that was made by a concurrently-running transaction,
     113             :  * we must add its target TID to pendingList.  (We don't try to visit the
     114             :  * target immediately, first because we don't want VACUUM locking more than
     115             :  * one buffer at a time, and second because the duplicate-filtering logic
     116             :  * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
     117             :  * loop in the face of continuous concurrent insertions.)
     118             :  *
     119             :  * If forPending is true, we are examining the page as a consequence of
     120             :  * chasing a redirect link, not as part of the normal sequential scan.
     121             :  * We still vacuum the page normally, but we don't increment the stats
     122             :  * about live tuples; else we'd double-count those tuples, since the page
     123             :  * has been or will be visited in the sequential scan as well.
     124             :  */
     125             : static void
     126       21998 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
     127             :                bool forPending)
     128             : {
     129       21998 :     Page        page = BufferGetPage(buffer);
     130             :     spgxlogVacuumLeaf xlrec;
     131             :     OffsetNumber toDead[MaxIndexTuplesPerPage];
     132             :     OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
     133             :     OffsetNumber moveSrc[MaxIndexTuplesPerPage];
     134             :     OffsetNumber moveDest[MaxIndexTuplesPerPage];
     135             :     OffsetNumber chainSrc[MaxIndexTuplesPerPage];
     136             :     OffsetNumber chainDest[MaxIndexTuplesPerPage];
     137             :     OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
     138             :     bool        deletable[MaxIndexTuplesPerPage + 1];
     139             :     int         nDeletable;
     140             :     OffsetNumber i,
     141       21998 :                 max = PageGetMaxOffsetNumber(page);
     142             : 
     143       21998 :     memset(predecessor, 0, sizeof(predecessor));
     144       21998 :     memset(deletable, 0, sizeof(deletable));
     145       21998 :     nDeletable = 0;
     146             : 
     147             :     /* Scan page, identify tuples to delete, accumulate stats */
     148     3541740 :     for (i = FirstOffsetNumber; i <= max; i++)
     149             :     {
     150             :         SpGistLeafTuple lt;
     151             : 
     152     3519742 :         lt = (SpGistLeafTuple) PageGetItem(page,
     153     3519742 :                                            PageGetItemId(page, i));
     154     3519742 :         if (lt->tupstate == SPGIST_LIVE)
     155             :         {
     156             :             Assert(ItemPointerIsValid(&lt->heapPtr));
     157             : 
     158     3262982 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     159             :             {
     160       69910 :                 bds->stats->tuples_removed += 1;
     161       69910 :                 deletable[i] = true;
     162       69910 :                 nDeletable++;
     163             :             }
     164             :             else
     165             :             {
     166     3193072 :                 if (!forPending)
     167      571964 :                     bds->stats->num_index_tuples += 1;
     168             :             }
     169             : 
     170             :             /* Form predecessor map, too */
     171     3262982 :             if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
     172             :             {
     173             :                 /* paranoia about corrupted chain links */
     174     3231090 :                 if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
     175     3231090 :                     SGLT_GET_NEXTOFFSET(lt) > max ||
     176     3231090 :                     predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
     177           0 :                     elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
     178             :                          BufferGetBlockNumber(buffer),
     179             :                          RelationGetRelationName(index));
     180     3231090 :                 predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
     181             :             }
     182             :         }
     183      256760 :         else if (lt->tupstate == SPGIST_REDIRECT)
     184             :         {
     185       18432 :             SpGistDeadTuple dt = (SpGistDeadTuple) lt;
     186             : 
     187             :             Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
     188             :             Assert(ItemPointerIsValid(&dt->pointer));
     189             : 
     190             :             /*
     191             :              * Add target TID to pending list if the redirection could have
     192             :              * happened since VACUUM started.  (If xid is invalid, assume it
     193             :              * must have happened before VACUUM started, since REINDEX
     194             :              * CONCURRENTLY locks out VACUUM.)
     195             :              *
     196             :              * Note: we could make a tighter test by seeing if the xid is
     197             :              * "running" according to the active snapshot; but snapmgr.c
     198             :              * doesn't currently export a suitable API, and it's not entirely
     199             :              * clear that a tighter test is worth the cycles anyway.
     200             :              */
     201       18432 :             if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
     202       17294 :                 spgAddPendingTID(bds, &dt->pointer);
     203             :         }
     204             :         else
     205             :         {
     206             :             Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
     207             :         }
     208             :     }
     209             : 
     210       21998 :     if (nDeletable == 0)
     211       21350 :         return;                 /* nothing more to do */
     212             : 
     213             :     /*----------
     214             :      * Figure out exactly what we have to do.  We do this separately from
     215             :      * actually modifying the page, mainly so that we have a representation
     216             :      * that can be dumped into WAL and then the replay code can do exactly
     217             :      * the same thing.  The output of this step consists of six arrays
     218             :      * describing four kinds of operations, to be performed in this order:
     219             :      *
     220             :      * toDead[]: tuple numbers to be replaced with DEAD tuples
     221             :      * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
     222             :      * moveSrc[]: tuple numbers that need to be relocated to another offset
     223             :      * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
     224             :      * moveDest[]: new locations for moveSrc tuples
     225             :      * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
     226             :      * chainDest[]: new values of nextOffset for chainSrc members
     227             :      *
     228             :      * It's easiest to figure out what we have to do by processing tuple
     229             :      * chains, so we iterate over all the tuples (not just the deletable
     230             :      * ones!) to identify chain heads, then chase down each chain and make
     231             :      * work item entries for deletable tuples within the chain.
     232             :      *----------
     233             :      */
     234         648 :     xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
     235             : 
     236      142216 :     for (i = FirstOffsetNumber; i <= max; i++)
     237             :     {
     238             :         SpGistLeafTuple head;
     239             :         bool        interveningDeletable;
     240             :         OffsetNumber prevLive;
     241             :         OffsetNumber j;
     242             : 
     243      141568 :         head = (SpGistLeafTuple) PageGetItem(page,
     244      141568 :                                              PageGetItemId(page, i));
     245      141568 :         if (head->tupstate != SPGIST_LIVE)
     246       43710 :             continue;           /* can't be a chain member */
     247       97858 :         if (predecessor[i] != 0)
     248       97164 :             continue;           /* not a chain head */
     249             : 
     250             :         /* initialize ... */
     251         694 :         interveningDeletable = false;
     252         694 :         prevLive = deletable[i] ? InvalidOffsetNumber : i;
     253             : 
     254             :         /* scan down the chain ... */
     255         694 :         j = SGLT_GET_NEXTOFFSET(head);
     256       97858 :         while (j != InvalidOffsetNumber)
     257             :         {
     258             :             SpGistLeafTuple lt;
     259             : 
     260       97164 :             lt = (SpGistLeafTuple) PageGetItem(page,
     261       97164 :                                                PageGetItemId(page, j));
     262       97164 :             if (lt->tupstate != SPGIST_LIVE)
     263             :             {
     264             :                 /* all tuples in chain should be live */
     265           0 :                 elog(ERROR, "unexpected SPGiST tuple state: %d",
     266             :                      lt->tupstate);
     267             :             }
     268             : 
     269       97164 :             if (deletable[j])
     270             :             {
     271             :                 /* This tuple should be replaced by a placeholder */
     272       69284 :                 toPlaceholder[xlrec.nPlaceholder] = j;
     273       69284 :                 xlrec.nPlaceholder++;
     274             :                 /* previous live tuple's chain link will need an update */
     275       69284 :                 interveningDeletable = true;
     276             :             }
     277       27880 :             else if (prevLive == InvalidOffsetNumber)
     278             :             {
     279             :                 /*
     280             :                  * This is the first live tuple in the chain.  It has to move
     281             :                  * to the head position.
     282             :                  */
     283         626 :                 moveSrc[xlrec.nMove] = j;
     284         626 :                 moveDest[xlrec.nMove] = i;
     285         626 :                 xlrec.nMove++;
     286             :                 /* Chain updates will be applied after the move */
     287         626 :                 prevLive = i;
     288         626 :                 interveningDeletable = false;
     289             :             }
     290             :             else
     291             :             {
     292             :                 /*
     293             :                  * Second or later live tuple.  Arrange to re-chain it to the
     294             :                  * previous live one, if there was a gap.
     295             :                  */
     296       27254 :                 if (interveningDeletable)
     297             :                 {
     298       19502 :                     chainSrc[xlrec.nChain] = prevLive;
     299       19502 :                     chainDest[xlrec.nChain] = j;
     300       19502 :                     xlrec.nChain++;
     301             :                 }
     302       27254 :                 prevLive = j;
     303       27254 :                 interveningDeletable = false;
     304             :             }
     305             : 
     306       97164 :             j = SGLT_GET_NEXTOFFSET(lt);
     307             :         }
     308             : 
     309         694 :         if (prevLive == InvalidOffsetNumber)
     310             :         {
     311             :             /* The chain is entirely removable, so we need a DEAD tuple */
     312           0 :             toDead[xlrec.nDead] = i;
     313           0 :             xlrec.nDead++;
     314             :         }
     315         694 :         else if (interveningDeletable)
     316             :         {
     317             :             /* One or more deletions at end of chain, so close it off */
     318         664 :             chainSrc[xlrec.nChain] = prevLive;
     319         664 :             chainDest[xlrec.nChain] = InvalidOffsetNumber;
     320         664 :             xlrec.nChain++;
     321             :         }
     322             :     }
     323             : 
     324             :     /* sanity check ... */
     325         648 :     if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
     326           0 :         elog(ERROR, "inconsistent counts of deletable tuples");
     327             : 
     328             :     /* Do the updates */
     329         648 :     START_CRIT_SECTION();
     330             : 
     331         648 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     332         648 :                             toDead, xlrec.nDead,
     333             :                             SPGIST_DEAD, SPGIST_DEAD,
     334             :                             InvalidBlockNumber, InvalidOffsetNumber);
     335             : 
     336         648 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     337         648 :                             toPlaceholder, xlrec.nPlaceholder,
     338             :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     339             :                             InvalidBlockNumber, InvalidOffsetNumber);
     340             : 
     341             :     /*
     342             :      * We implement the move step by swapping the line pointers of the source
     343             :      * and target tuples, then replacing the newly-source tuples with
     344             :      * placeholders.  This is perhaps unduly friendly with the page data
     345             :      * representation, but it's fast and doesn't risk page overflow when a
     346             :      * tuple to be relocated is large.
     347             :      */
     348        1274 :     for (i = 0; i < xlrec.nMove; i++)
     349             :     {
     350         626 :         ItemId      idSrc = PageGetItemId(page, moveSrc[i]);
     351         626 :         ItemId      idDest = PageGetItemId(page, moveDest[i]);
     352             :         ItemIdData  tmp;
     353             : 
     354         626 :         tmp = *idSrc;
     355         626 :         *idSrc = *idDest;
     356         626 :         *idDest = tmp;
     357             :     }
     358             : 
     359         648 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     360         648 :                             moveSrc, xlrec.nMove,
     361             :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     362             :                             InvalidBlockNumber, InvalidOffsetNumber);
     363             : 
     364       20814 :     for (i = 0; i < xlrec.nChain; i++)
     365             :     {
     366             :         SpGistLeafTuple lt;
     367             : 
     368       20166 :         lt = (SpGistLeafTuple) PageGetItem(page,
     369       20166 :                                            PageGetItemId(page, chainSrc[i]));
     370             :         Assert(lt->tupstate == SPGIST_LIVE);
     371       20166 :         SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
     372             :     }
     373             : 
     374         648 :     MarkBufferDirty(buffer);
     375             : 
     376         648 :     if (RelationNeedsWAL(index))
     377             :     {
     378             :         XLogRecPtr  recptr;
     379             : 
     380         648 :         XLogBeginInsert();
     381             : 
     382         648 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     383             : 
     384         648 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
     385             :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     386         648 :         XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
     387         648 :         XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
     388         648 :         XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
     389         648 :         XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
     390         648 :         XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
     391         648 :         XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
     392             : 
     393         648 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     394             : 
     395         648 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
     396             : 
     397         648 :         PageSetLSN(page, recptr);
     398             :     }
     399             : 
     400         648 :     END_CRIT_SECTION();
     401             : }
     402             : 
     403             : /*
     404             :  * Vacuum a root page when it is also a leaf
     405             :  *
     406             :  * On the root, we just delete any dead leaf tuples; no fancy business
     407             :  */
     408             : static void
     409          32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
     410             : {
     411          32 :     Page        page = BufferGetPage(buffer);
     412             :     spgxlogVacuumRoot xlrec;
     413             :     OffsetNumber toDelete[MaxIndexTuplesPerPage];
     414             :     OffsetNumber i,
     415          32 :                 max = PageGetMaxOffsetNumber(page);
     416             : 
     417          32 :     xlrec.nDelete = 0;
     418             : 
     419             :     /* Scan page, identify tuples to delete, accumulate stats */
     420         164 :     for (i = FirstOffsetNumber; i <= max; i++)
     421             :     {
     422             :         SpGistLeafTuple lt;
     423             : 
     424         132 :         lt = (SpGistLeafTuple) PageGetItem(page,
     425         132 :                                            PageGetItemId(page, i));
     426         132 :         if (lt->tupstate == SPGIST_LIVE)
     427             :         {
     428             :             Assert(ItemPointerIsValid(&lt->heapPtr));
     429             : 
     430         132 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     431             :             {
     432           0 :                 bds->stats->tuples_removed += 1;
     433           0 :                 toDelete[xlrec.nDelete] = i;
     434           0 :                 xlrec.nDelete++;
     435             :             }
     436             :             else
     437             :             {
     438         132 :                 bds->stats->num_index_tuples += 1;
     439             :             }
     440             :         }
     441             :         else
     442             :         {
     443             :             /* all tuples on root should be live */
     444           0 :             elog(ERROR, "unexpected SPGiST tuple state: %d",
     445             :                  lt->tupstate);
     446             :         }
     447             :     }
     448             : 
     449          32 :     if (xlrec.nDelete == 0)
     450          32 :         return;                 /* nothing more to do */
     451             : 
     452             :     /* Do the update */
     453           0 :     START_CRIT_SECTION();
     454             : 
     455             :     /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
     456           0 :     PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
     457             : 
     458           0 :     MarkBufferDirty(buffer);
     459             : 
     460           0 :     if (RelationNeedsWAL(index))
     461             :     {
     462             :         XLogRecPtr  recptr;
     463             : 
     464           0 :         XLogBeginInsert();
     465             : 
     466             :         /* Prepare WAL record */
     467           0 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     468             : 
     469           0 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
     470             :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     471           0 :         XLogRegisterData(toDelete,
     472           0 :                          sizeof(OffsetNumber) * xlrec.nDelete);
     473             : 
     474           0 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     475             : 
     476           0 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
     477             : 
     478           0 :         PageSetLSN(page, recptr);
     479             :     }
     480             : 
     481           0 :     END_CRIT_SECTION();
     482             : }
     483             : 
     484             : /*
     485             :  * Clean up redirect and placeholder tuples on the given page
     486             :  *
     487             :  * Redirect tuples can be marked placeholder once they're old enough.
     488             :  * Placeholder tuples can be removed if it won't change the offsets of
     489             :  * non-placeholder ones.
     490             :  *
     491             :  * Unlike the routines above, this works on both leaf and inner pages.
     492             :  */
     493             : static void
     494       22172 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
     495             : {
     496       22172 :     Page        page = BufferGetPage(buffer);
     497       22172 :     SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
     498             :     OffsetNumber i,
     499       22172 :                 max = PageGetMaxOffsetNumber(page),
     500       22172 :                 firstPlaceholder = InvalidOffsetNumber;
     501       22172 :     bool        hasNonPlaceholder = false;
     502       22172 :     bool        hasUpdate = false;
     503             :     OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
     504             :     OffsetNumber itemnos[MaxIndexTuplesPerPage];
     505             :     spgxlogVacuumRedirect xlrec;
     506             :     GlobalVisState *vistest;
     507             : 
     508       22172 :     xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
     509       22172 :     xlrec.nToPlaceholder = 0;
     510       22172 :     xlrec.snapshotConflictHorizon = InvalidTransactionId;
     511             : 
     512       22172 :     vistest = GlobalVisTestFor(heaprel);
     513             : 
     514       22172 :     START_CRIT_SECTION();
     515             : 
     516             :     /*
     517             :      * Scan backwards to convert old redirection tuples to placeholder tuples,
     518             :      * and identify location of last non-placeholder tuple while at it.
     519             :      */
     520     2909316 :     for (i = max;
     521     2891620 :          i >= FirstOffsetNumber &&
     522     2891620 :          (opaque->nRedirection > 0 || !hasNonPlaceholder);
     523     2887144 :          i--)
     524             :     {
     525             :         SpGistDeadTuple dt;
     526             : 
     527     2887144 :         dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
     528             : 
     529             :         /*
     530             :          * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
     531             :          * be any index scans "in flight" to it.  Such an index scan would
     532             :          * have to be in a transaction whose snapshot sees the REDIRECT's XID
     533             :          * as still running, so comparing the XID against global xmin is a
     534             :          * conservatively safe test.  If the XID is invalid, it must have been
     535             :          * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
     536             :          */
     537     2887144 :         if (dt->tupstate == SPGIST_REDIRECT &&
     538       36872 :             (!TransactionIdIsValid(dt->xid) ||
     539       18436 :              GlobalVisTestIsRemovableXid(vistest, dt->xid)))
     540             :         {
     541         838 :             dt->tupstate = SPGIST_PLACEHOLDER;
     542             :             Assert(opaque->nRedirection > 0);
     543         838 :             opaque->nRedirection--;
     544         838 :             opaque->nPlaceholder++;
     545             : 
     546             :             /* remember newest XID among the removed redirects */
     547        1264 :             if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
     548         426 :                 TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
     549         412 :                 xlrec.snapshotConflictHorizon = dt->xid;
     550             : 
     551         838 :             ItemPointerSetInvalid(&dt->pointer);
     552             : 
     553         838 :             itemToPlaceholder[xlrec.nToPlaceholder] = i;
     554         838 :             xlrec.nToPlaceholder++;
     555             : 
     556         838 :             hasUpdate = true;
     557             :         }
     558             : 
     559     2887144 :         if (dt->tupstate == SPGIST_PLACEHOLDER)
     560             :         {
     561      189886 :             if (!hasNonPlaceholder)
     562      182800 :                 firstPlaceholder = i;
     563             :         }
     564             :         else
     565             :         {
     566     2697258 :             hasNonPlaceholder = true;
     567             :         }
     568             :     }
     569             : 
     570             :     /*
     571             :      * Any placeholder tuples at the end of page can safely be removed.  We
     572             :      * can't remove ones before the last non-placeholder, though, because we
     573             :      * can't alter the offset numbers of non-placeholder tuples.
     574             :      */
     575       22172 :     if (firstPlaceholder != InvalidOffsetNumber)
     576             :     {
     577             :         /*
     578             :          * We do not store this array to rdata because it's easy to recreate.
     579             :          */
     580      185668 :         for (i = firstPlaceholder; i <= max; i++)
     581      182800 :             itemnos[i - firstPlaceholder] = i;
     582             : 
     583        2868 :         i = max - firstPlaceholder + 1;
     584             :         Assert(opaque->nPlaceholder >= i);
     585        2868 :         opaque->nPlaceholder -= i;
     586             : 
     587             :         /* The array is surely sorted, so can use PageIndexMultiDelete */
     588        2868 :         PageIndexMultiDelete(page, itemnos, i);
     589             : 
     590        2868 :         hasUpdate = true;
     591             :     }
     592             : 
     593       22172 :     xlrec.firstPlaceholder = firstPlaceholder;
     594             : 
     595       22172 :     if (hasUpdate)
     596        2878 :         MarkBufferDirty(buffer);
     597             : 
     598       22172 :     if (hasUpdate && RelationNeedsWAL(index))
     599             :     {
     600             :         XLogRecPtr  recptr;
     601             : 
     602        2878 :         XLogBeginInsert();
     603             : 
     604        2878 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
     605        2878 :         XLogRegisterData(itemToPlaceholder,
     606        2878 :                          sizeof(OffsetNumber) * xlrec.nToPlaceholder);
     607             : 
     608        2878 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     609             : 
     610        2878 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
     611             : 
     612        2878 :         PageSetLSN(page, recptr);
     613             :     }
     614             : 
     615       22172 :     END_CRIT_SECTION();
     616       22172 : }
     617             : 
     618             : /*
     619             :  * Process one page during a bulkdelete scan
     620             :  */
     621             : static void
     622        5024 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
     623             : {
     624        5024 :     Relation    index = bds->info->index;
     625        5024 :     BlockNumber blkno = BufferGetBlockNumber(buffer);
     626             :     Page        page;
     627             : 
     628        5024 :     LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     629        5024 :     page = (Page) BufferGetPage(buffer);
     630             : 
     631        5024 :     if (PageIsNew(page))
     632             :     {
     633             :         /*
     634             :          * We found an all-zero page, which could happen if the database
     635             :          * crashed just after extending the file.  Recycle it.
     636             :          */
     637             :     }
     638        5010 :     else if (PageIsEmpty(page))
     639             :     {
     640             :         /* nothing to do */
     641             :     }
     642        4910 :     else if (SpGistPageIsLeaf(page))
     643             :     {
     644        4736 :         if (SpGistBlockIsRoot(blkno))
     645             :         {
     646          32 :             vacuumLeafRoot(bds, index, buffer);
     647             :             /* no need for vacuumRedirectAndPlaceholder */
     648             :         }
     649             :         else
     650             :         {
     651        4704 :             vacuumLeafPage(bds, index, buffer, false);
     652        4704 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     653             :         }
     654             :     }
     655             :     else
     656             :     {
     657             :         /* inner page */
     658         174 :         vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     659             :     }
     660             : 
     661             :     /*
     662             :      * The root pages must never be deleted, nor marked as available in FSM,
     663             :      * because we don't want them ever returned by a search for a place to put
     664             :      * a new tuple.  Otherwise, check for empty page, and make sure the FSM
     665             :      * knows about it.
     666             :      */
     667        5024 :     if (!SpGistBlockIsRoot(blkno))
     668             :     {
     669        4860 :         if (PageIsNew(page) || PageIsEmpty(page))
     670             :         {
     671          82 :             RecordFreeIndexPage(index, blkno);
     672          82 :             bds->stats->pages_deleted++;
     673             :         }
     674             :         else
     675             :         {
     676        4778 :             SpGistSetLastUsedPage(index, buffer);
     677        4778 :             bds->lastFilledBlock = blkno;
     678             :         }
     679             :     }
     680             : 
     681        5024 :     UnlockReleaseBuffer(buffer);
     682        5024 : }
     683             : 
     684             : /*
     685             :  * Process the pending-TID list between pages of the main scan
     686             :  */
     687             : static void
     688         264 : spgprocesspending(spgBulkDeleteState *bds)
     689             : {
     690         264 :     Relation    index = bds->info->index;
     691             :     spgVacPendingItem *pitem;
     692             :     spgVacPendingItem *nitem;
     693             :     BlockNumber blkno;
     694             :     Buffer      buffer;
     695             :     Page        page;
     696             : 
     697       35116 :     for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
     698             :     {
     699       34852 :         if (pitem->done)
     700       17294 :             continue;           /* ignore already-done items */
     701             : 
     702             :         /* call vacuum_delay_point while not holding any buffer lock */
     703       17558 :         vacuum_delay_point(false);
     704             : 
     705             :         /* examine the referenced page */
     706       17558 :         blkno = ItemPointerGetBlockNumber(&pitem->tid);
     707       17558 :         buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
     708       17558 :                                     RBM_NORMAL, bds->info->strategy);
     709       17558 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     710       17558 :         page = (Page) BufferGetPage(buffer);
     711             : 
     712       17558 :         if (PageIsNew(page) || SpGistPageIsDeleted(page))
     713             :         {
     714             :             /* Probably shouldn't happen, but ignore it */
     715             :         }
     716       17558 :         else if (SpGistPageIsLeaf(page))
     717             :         {
     718       17294 :             if (SpGistBlockIsRoot(blkno))
     719             :             {
     720             :                 /* this should definitely not happen */
     721           0 :                 elog(ERROR, "redirection leads to root page of index \"%s\"",
     722             :                      RelationGetRelationName(index));
     723             :             }
     724             : 
     725             :             /* deal with any deletable tuples */
     726       17294 :             vacuumLeafPage(bds, index, buffer, true);
     727             :             /* might as well do this while we are here */
     728       17294 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     729             : 
     730       17294 :             SpGistSetLastUsedPage(index, buffer);
     731             : 
     732             :             /*
     733             :              * We can mark as done not only this item, but any later ones
     734             :              * pointing at the same page, since we vacuumed the whole page.
     735             :              */
     736       17294 :             pitem->done = true;
     737     1516458 :             for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
     738             :             {
     739     1499164 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     740         264 :                     nitem->done = true;
     741             :             }
     742             :         }
     743             :         else
     744             :         {
     745             :             /*
     746             :              * On an inner page, visit the referenced inner tuple and add all
     747             :              * its downlinks to the pending list.  We might have pending items
     748             :              * for more than one inner tuple on the same page (in fact this is
     749             :              * pretty likely given the way space allocation works), so get
     750             :              * them all while we are here.
     751             :              */
     752       35116 :             for (nitem = pitem; nitem != NULL; nitem = nitem->next)
     753             :             {
     754       34852 :                 if (nitem->done)
     755           0 :                     continue;
     756       34852 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     757             :                 {
     758             :                     OffsetNumber offset;
     759             :                     SpGistInnerTuple innerTuple;
     760             : 
     761       17294 :                     offset = ItemPointerGetOffsetNumber(&nitem->tid);
     762       17294 :                     innerTuple = (SpGistInnerTuple) PageGetItem(page,
     763       17294 :                                                                 PageGetItemId(page, offset));
     764       17294 :                     if (innerTuple->tupstate == SPGIST_LIVE)
     765             :                     {
     766             :                         SpGistNodeTuple node;
     767             :                         int         i;
     768             : 
     769       86470 :                         SGITITERATE(innerTuple, i, node)
     770             :                         {
     771       69176 :                             if (ItemPointerIsValid(&node->t_tid))
     772       34588 :                                 spgAddPendingTID(bds, &node->t_tid);
     773             :                         }
     774             :                     }
     775           0 :                     else if (innerTuple->tupstate == SPGIST_REDIRECT)
     776             :                     {
     777             :                         /* transfer attention to redirect point */
     778           0 :                         spgAddPendingTID(bds,
     779             :                                          &((SpGistDeadTuple) innerTuple)->pointer);
     780             :                     }
     781             :                     else
     782           0 :                         elog(ERROR, "unexpected SPGiST tuple state: %d",
     783             :                              innerTuple->tupstate);
     784             : 
     785       17294 :                     nitem->done = true;
     786             :                 }
     787             :             }
     788             :         }
     789             : 
     790       17558 :         UnlockReleaseBuffer(buffer);
     791             :     }
     792             : 
     793         264 :     spgClearPendingList(bds);
     794         264 : }
     795             : 
     796             : /*
     797             :  * Perform a bulkdelete scan
     798             :  */
     799             : static void
     800          82 : spgvacuumscan(spgBulkDeleteState *bds)
     801             : {
     802          82 :     Relation    index = bds->info->index;
     803             :     bool        needLock;
     804             :     BlockNumber num_pages;
     805             :     BlockRangeReadStreamPrivate p;
     806          82 :     ReadStream *stream = NULL;
     807             : 
     808             :     /* Finish setting up spgBulkDeleteState */
     809          82 :     initSpGistState(&bds->spgstate, index);
     810          82 :     bds->pendingList = NULL;
     811          82 :     bds->myXmin = GetActiveSnapshot()->xmin;
     812          82 :     bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
     813             : 
     814             :     /*
     815             :      * Reset counts that will be incremented during the scan; needed in case
     816             :      * of multiple scans during a single VACUUM command
     817             :      */
     818          82 :     bds->stats->estimated_count = false;
     819          82 :     bds->stats->num_index_tuples = 0;
     820          82 :     bds->stats->pages_deleted = 0;
     821             : 
     822             :     /* We can skip locking for new or temp relations */
     823          82 :     needLock = !RELATION_IS_LOCAL(index);
     824          82 :     p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
     825             : 
     826             :     /*
     827             :      * It is safe to use batchmode as block_range_read_stream_cb takes no
     828             :      * locks.
     829             :      */
     830          82 :     stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
     831             :                                         READ_STREAM_FULL |
     832             :                                         READ_STREAM_USE_BATCHING,
     833          82 :                                         bds->info->strategy,
     834             :                                         index,
     835             :                                         MAIN_FORKNUM,
     836             :                                         block_range_read_stream_cb,
     837             :                                         &p,
     838             :                                         0);
     839             : 
     840             :     /*
     841             :      * The outer loop iterates over all index pages except the metapage, in
     842             :      * physical order (we hope the kernel will cooperate in providing
     843             :      * read-ahead for speed).  It is critical that we visit all leaf pages,
     844             :      * including ones added after we start the scan, else we might fail to
     845             :      * delete some deletable tuples.  See more extensive comments about this
     846             :      * in btvacuumscan().
     847             :      */
     848             :     for (;;)
     849             :     {
     850             :         /* Get the current relation length */
     851         164 :         if (needLock)
     852         164 :             LockRelationForExtension(index, ExclusiveLock);
     853         164 :         num_pages = RelationGetNumberOfBlocks(index);
     854         164 :         if (needLock)
     855         164 :             UnlockRelationForExtension(index, ExclusiveLock);
     856             : 
     857             :         /* Quit if we've scanned the whole relation */
     858         164 :         if (p.current_blocknum >= num_pages)
     859          82 :             break;
     860             : 
     861          82 :         p.last_exclusive = num_pages;
     862             : 
     863             :         /* Iterate over pages, then loop back to recheck length */
     864             :         while (true)
     865        5024 :         {
     866             :             Buffer      buf;
     867             : 
     868             :             /* call vacuum_delay_point while not holding any buffer lock */
     869        5106 :             vacuum_delay_point(false);
     870             : 
     871        5106 :             buf = read_stream_next_buffer(stream, NULL);
     872             : 
     873        5106 :             if (!BufferIsValid(buf))
     874          82 :                 break;
     875             : 
     876        5024 :             spgvacuumpage(bds, buf);
     877             : 
     878             :             /* empty the pending-list after each page */
     879        5024 :             if (bds->pendingList != NULL)
     880         264 :                 spgprocesspending(bds);
     881             :         }
     882             : 
     883             :         /*
     884             :          * We have to reset the read stream to use it again. After returning
     885             :          * InvalidBuffer, the read stream API won't invoke our callback again
     886             :          * until the stream has been reset.
     887             :          */
     888          82 :         read_stream_reset(stream);
     889             :     }
     890             : 
     891          82 :     read_stream_end(stream);
     892             : 
     893             :     /* Propagate local lastUsedPages cache to metablock */
     894          82 :     SpGistUpdateMetaPage(index);
     895             : 
     896             :     /*
     897             :      * If we found any empty pages (and recorded them in the FSM), then
     898             :      * forcibly update the upper-level FSM pages to ensure that searchers can
     899             :      * find them.  It's possible that the pages were also found during
     900             :      * previous scans and so this is a waste of time, but it's cheap enough
     901             :      * relative to scanning the index that it shouldn't matter much, and
     902             :      * making sure that free pages are available sooner not later seems
     903             :      * worthwhile.
     904             :      *
     905             :      * Note that if no empty pages exist, we don't bother vacuuming the FSM at
     906             :      * all.
     907             :      */
     908          82 :     if (bds->stats->pages_deleted > 0)
     909          50 :         IndexFreeSpaceMapVacuum(index);
     910             : 
     911             :     /*
     912             :      * Truncate index if possible
     913             :      *
     914             :      * XXX disabled because it's unsafe due to possible concurrent inserts.
     915             :      * We'd have to rescan the pages to make sure they're still empty, and it
     916             :      * doesn't seem worth it.  Note that btree doesn't do this either.
     917             :      *
     918             :      * Another reason not to truncate is that it could invalidate the cached
     919             :      * pages-with-freespace pointers in the metapage and other backends'
     920             :      * relation caches, that is leave them pointing to nonexistent pages.
     921             :      * Adding RelationGetNumberOfBlocks calls to protect the places that use
     922             :      * those pointers would be unduly expensive.
     923             :      */
     924             : #ifdef NOT_USED
     925             :     if (num_pages > bds->lastFilledBlock + 1)
     926             :     {
     927             :         BlockNumber lastBlock = num_pages - 1;
     928             : 
     929             :         num_pages = bds->lastFilledBlock + 1;
     930             :         RelationTruncate(index, num_pages);
     931             :         bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
     932             :         bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
     933             :     }
     934             : #endif
     935             : 
     936             :     /* Report final stats */
     937          82 :     bds->stats->num_pages = num_pages;
     938          82 :     bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
     939          82 :     bds->stats->pages_free = bds->stats->pages_deleted;
     940          82 : }
     941             : 
     942             : /*
     943             :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     944             :  * The set of target tuples is specified via a callback routine that tells
     945             :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     946             :  *
     947             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     948             :  */
     949             : IndexBulkDeleteResult *
     950          10 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     951             :               IndexBulkDeleteCallback callback, void *callback_state)
     952             : {
     953             :     spgBulkDeleteState bds;
     954             : 
     955             :     /* allocate stats if first time through, else re-use existing struct */
     956          10 :     if (stats == NULL)
     957          10 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     958          10 :     bds.info = info;
     959          10 :     bds.stats = stats;
     960          10 :     bds.callback = callback;
     961          10 :     bds.callback_state = callback_state;
     962             : 
     963          10 :     spgvacuumscan(&bds);
     964             : 
     965          10 :     return stats;
     966             : }
     967             : 
     968             : /* Dummy callback to delete no tuples during spgvacuumcleanup */
     969             : static bool
     970     3165256 : dummy_callback(ItemPointer itemptr, void *state)
     971             : {
     972     3165256 :     return false;
     973             : }
     974             : 
     975             : /*
     976             :  * Post-VACUUM cleanup.
     977             :  *
     978             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     979             :  */
     980             : IndexBulkDeleteResult *
     981         106 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     982             : {
     983             :     spgBulkDeleteState bds;
     984             : 
     985             :     /* No-op in ANALYZE ONLY mode */
     986         106 :     if (info->analyze_only)
     987          24 :         return stats;
     988             : 
     989             :     /*
     990             :      * We don't need to scan the index if there was a preceding bulkdelete
     991             :      * pass.  Otherwise, make a pass that won't delete any live tuples, but
     992             :      * might still accomplish useful stuff with redirect/placeholder cleanup
     993             :      * and/or FSM housekeeping, and in any case will provide stats.
     994             :      */
     995          82 :     if (stats == NULL)
     996             :     {
     997          72 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     998          72 :         bds.info = info;
     999          72 :         bds.stats = stats;
    1000          72 :         bds.callback = dummy_callback;
    1001          72 :         bds.callback_state = NULL;
    1002             : 
    1003          72 :         spgvacuumscan(&bds);
    1004             :     }
    1005             : 
    1006             :     /*
    1007             :      * It's quite possible for us to be fooled by concurrent tuple moves into
    1008             :      * double-counting some index tuples, so disbelieve any total that exceeds
    1009             :      * the underlying heap's count ... if we know that accurately.  Otherwise
    1010             :      * this might just make matters worse.
    1011             :      */
    1012          82 :     if (!info->estimated_count)
    1013             :     {
    1014          82 :         if (stats->num_index_tuples > info->num_heap_tuples)
    1015           4 :             stats->num_index_tuples = info->num_heap_tuples;
    1016             :     }
    1017             : 
    1018          82 :     return stats;
    1019             : }

Generated by: LCOV version 1.14