LCOV - code coverage report
Current view: top level - src/backend/access/spgist - spgvacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 287 315 91.1 %
Date: 2025-01-18 04:15:08 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spgvacuum.c
       4             :  *    vacuum for SP-GiST
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/spgist/spgvacuum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "access/genam.h"
      19             : #include "access/spgist_private.h"
      20             : #include "access/spgxlog.h"
      21             : #include "access/transam.h"
      22             : #include "access/xloginsert.h"
      23             : #include "commands/vacuum.h"
      24             : #include "miscadmin.h"
      25             : #include "storage/bufmgr.h"
      26             : #include "storage/indexfsm.h"
      27             : #include "storage/lmgr.h"
      28             : #include "utils/snapmgr.h"
      29             : 
      30             : 
      31             : /* Entry in pending-list of TIDs we need to revisit */
      32             : typedef struct spgVacPendingItem
      33             : {
      34             :     ItemPointerData tid;        /* redirection target to visit */
      35             :     bool        done;           /* have we dealt with this? */
      36             :     struct spgVacPendingItem *next; /* list link */
      37             : } spgVacPendingItem;
      38             : 
      39             : /* Local state for vacuum operations */
      40             : typedef struct spgBulkDeleteState
      41             : {
      42             :     /* Parameters passed in to spgvacuumscan */
      43             :     IndexVacuumInfo *info;
      44             :     IndexBulkDeleteResult *stats;
      45             :     IndexBulkDeleteCallback callback;
      46             :     void       *callback_state;
      47             : 
      48             :     /* Additional working state */
      49             :     SpGistState spgstate;       /* for SPGiST operations that need one */
      50             :     spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
      51             :     TransactionId myXmin;       /* for detecting newly-added redirects */
      52             :     BlockNumber lastFilledBlock;    /* last non-deletable block */
      53             : } spgBulkDeleteState;
      54             : 
      55             : 
      56             : /*
      57             :  * Add TID to pendingList, but only if not already present.
      58             :  *
      59             :  * Note that new items are always appended at the end of the list; this
      60             :  * ensures that scans of the list don't miss items added during the scan.
      61             :  */
      62             : static void
      63      155628 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
      64             : {
      65             :     spgVacPendingItem *pitem;
      66             :     spgVacPendingItem **listLink;
      67             : 
      68             :     /* search the list for pre-existing entry */
      69      155628 :     listLink = &bds->pendingList;
      70    13748706 :     while (*listLink != NULL)
      71             :     {
      72    13644168 :         pitem = *listLink;
      73    13644168 :         if (ItemPointerEquals(tid, &pitem->tid))
      74       51090 :             return;             /* already in list, do nothing */
      75    13593078 :         listLink = &pitem->next;
      76             :     }
      77             :     /* not there, so append new entry */
      78      104538 :     pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
      79      104538 :     pitem->tid = *tid;
      80      104538 :     pitem->done = false;
      81      104538 :     pitem->next = NULL;
      82      104538 :     *listLink = pitem;
      83             : }
      84             : 
      85             : /*
      86             :  * Clear pendingList
      87             :  */
      88             : static void
      89         786 : spgClearPendingList(spgBulkDeleteState *bds)
      90             : {
      91             :     spgVacPendingItem *pitem;
      92             :     spgVacPendingItem *nitem;
      93             : 
      94      105324 :     for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
      95             :     {
      96      104538 :         nitem = pitem->next;
      97             :         /* All items in list should have been dealt with */
      98             :         Assert(pitem->done);
      99      104538 :         pfree(pitem);
     100             :     }
     101         786 :     bds->pendingList = NULL;
     102         786 : }
     103             : 
     104             : /*
     105             :  * Vacuum a regular (non-root) leaf page
     106             :  *
     107             :  * We must delete tuples that are targeted for deletion by the VACUUM,
     108             :  * but not move any tuples that are referenced by outside links; we assume
     109             :  * those are the ones that are heads of chains.
     110             :  *
     111             :  * If we find a REDIRECT that was made by a concurrently-running transaction,
     112             :  * we must add its target TID to pendingList.  (We don't try to visit the
     113             :  * target immediately, first because we don't want VACUUM locking more than
     114             :  * one buffer at a time, and second because the duplicate-filtering logic
     115             :  * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
     116             :  * loop in the face of continuous concurrent insertions.)
     117             :  *
     118             :  * If forPending is true, we are examining the page as a consequence of
     119             :  * chasing a redirect link, not as part of the normal sequential scan.
     120             :  * We still vacuum the page normally, but we don't increment the stats
     121             :  * about live tuples; else we'd double-count those tuples, since the page
     122             :  * has been or will be visited in the sequential scan as well.
     123             :  */
     124             : static void
     125       56154 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
     126             :                bool forPending)
     127             : {
     128       56154 :     Page        page = BufferGetPage(buffer);
     129             :     spgxlogVacuumLeaf xlrec;
     130             :     OffsetNumber toDead[MaxIndexTuplesPerPage];
     131             :     OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
     132             :     OffsetNumber moveSrc[MaxIndexTuplesPerPage];
     133             :     OffsetNumber moveDest[MaxIndexTuplesPerPage];
     134             :     OffsetNumber chainSrc[MaxIndexTuplesPerPage];
     135             :     OffsetNumber chainDest[MaxIndexTuplesPerPage];
     136             :     OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
     137             :     bool        deletable[MaxIndexTuplesPerPage + 1];
     138             :     int         nDeletable;
     139             :     OffsetNumber i,
     140       56154 :                 max = PageGetMaxOffsetNumber(page);
     141             : 
     142       56154 :     memset(predecessor, 0, sizeof(predecessor));
     143       56154 :     memset(deletable, 0, sizeof(deletable));
     144       56154 :     nDeletable = 0;
     145             : 
     146             :     /* Scan page, identify tuples to delete, accumulate stats */
     147     8759514 :     for (i = FirstOffsetNumber; i <= max; i++)
     148             :     {
     149             :         SpGistLeafTuple lt;
     150             : 
     151     8703360 :         lt = (SpGistLeafTuple) PageGetItem(page,
     152             :                                            PageGetItemId(page, i));
     153     8703360 :         if (lt->tupstate == SPGIST_LIVE)
     154             :         {
     155             :             Assert(ItemPointerIsValid(&lt->heapPtr));
     156             : 
     157     8434006 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     158             :             {
     159       39900 :                 bds->stats->tuples_removed += 1;
     160       39900 :                 deletable[i] = true;
     161       39900 :                 nDeletable++;
     162             :             }
     163             :             else
     164             :             {
     165     8394106 :                 if (!forPending)
     166      531694 :                     bds->stats->num_index_tuples += 1;
     167             :             }
     168             : 
     169             :             /* Form predecessor map, too */
     170     8434006 :             if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
     171             :             {
     172             :                 /* paranoia about corrupted chain links */
     173     8368494 :                 if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
     174     8368494 :                     SGLT_GET_NEXTOFFSET(lt) > max ||
     175     8368494 :                     predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
     176           0 :                     elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
     177             :                          BufferGetBlockNumber(buffer),
     178             :                          RelationGetRelationName(index));
     179     8368494 :                 predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
     180             :             }
     181             :         }
     182      269354 :         else if (lt->tupstate == SPGIST_REDIRECT)
     183             :         {
     184       52288 :             SpGistDeadTuple dt = (SpGistDeadTuple) lt;
     185             : 
     186             :             Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
     187             :             Assert(ItemPointerIsValid(&dt->pointer));
     188             : 
     189             :             /*
     190             :              * Add target TID to pending list if the redirection could have
     191             :              * happened since VACUUM started.  (If xid is invalid, assume it
     192             :              * must have happened before VACUUM started, since REINDEX
     193             :              * CONCURRENTLY locks out VACUUM.)
     194             :              *
     195             :              * Note: we could make a tighter test by seeing if the xid is
     196             :              * "running" according to the active snapshot; but snapmgr.c
     197             :              * doesn't currently export a suitable API, and it's not entirely
     198             :              * clear that a tighter test is worth the cycles anyway.
     199             :              */
     200       52288 :             if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
     201       51876 :                 spgAddPendingTID(bds, &dt->pointer);
     202             :         }
     203             :         else
     204             :         {
     205             :             Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
     206             :         }
     207             :     }
     208             : 
     209       56154 :     if (nDeletable == 0)
     210       55770 :         return;                 /* nothing more to do */
     211             : 
     212             :     /*----------
     213             :      * Figure out exactly what we have to do.  We do this separately from
     214             :      * actually modifying the page, mainly so that we have a representation
     215             :      * that can be dumped into WAL and then the replay code can do exactly
     216             :      * the same thing.  The output of this step consists of six arrays
     217             :      * describing four kinds of operations, to be performed in this order:
     218             :      *
     219             :      * toDead[]: tuple numbers to be replaced with DEAD tuples
     220             :      * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
     221             :      * moveSrc[]: tuple numbers that need to be relocated to another offset
     222             :      * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
     223             :      * moveDest[]: new locations for moveSrc tuples
     224             :      * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
     225             :      * chainDest[]: new values of nextOffset for chainSrc members
     226             :      *
     227             :      * It's easiest to figure out what we have to do by processing tuple
     228             :      * chains, so we iterate over all the tuples (not just the deletable
     229             :      * ones!) to identify chain heads, then chase down each chain and make
     230             :      * work item entries for deletable tuples within the chain.
     231             :      *----------
     232             :      */
     233         384 :     xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
     234             : 
     235       82436 :     for (i = FirstOffsetNumber; i <= max; i++)
     236             :     {
     237             :         SpGistLeafTuple head;
     238             :         bool        interveningDeletable;
     239             :         OffsetNumber prevLive;
     240             :         OffsetNumber j;
     241             : 
     242       82052 :         head = (SpGistLeafTuple) PageGetItem(page,
     243             :                                              PageGetItemId(page, i));
     244       82052 :         if (head->tupstate != SPGIST_LIVE)
     245       24206 :             continue;           /* can't be a chain member */
     246       57846 :         if (predecessor[i] != 0)
     247       57418 :             continue;           /* not a chain head */
     248             : 
     249             :         /* initialize ... */
     250         428 :         interveningDeletable = false;
     251         428 :         prevLive = deletable[i] ? InvalidOffsetNumber : i;
     252             : 
     253             :         /* scan down the chain ... */
     254         428 :         j = SGLT_GET_NEXTOFFSET(head);
     255       57846 :         while (j != InvalidOffsetNumber)
     256             :         {
     257             :             SpGistLeafTuple lt;
     258             : 
     259       57418 :             lt = (SpGistLeafTuple) PageGetItem(page,
     260             :                                                PageGetItemId(page, j));
     261       57418 :             if (lt->tupstate != SPGIST_LIVE)
     262             :             {
     263             :                 /* all tuples in chain should be live */
     264           0 :                 elog(ERROR, "unexpected SPGiST tuple state: %d",
     265             :                      lt->tupstate);
     266             :             }
     267             : 
     268       57418 :             if (deletable[j])
     269             :             {
     270             :                 /* This tuple should be replaced by a placeholder */
     271       39540 :                 toPlaceholder[xlrec.nPlaceholder] = j;
     272       39540 :                 xlrec.nPlaceholder++;
     273             :                 /* previous live tuple's chain link will need an update */
     274       39540 :                 interveningDeletable = true;
     275             :             }
     276       17878 :             else if (prevLive == InvalidOffsetNumber)
     277             :             {
     278             :                 /*
     279             :                  * This is the first live tuple in the chain.  It has to move
     280             :                  * to the head position.
     281             :                  */
     282         360 :                 moveSrc[xlrec.nMove] = j;
     283         360 :                 moveDest[xlrec.nMove] = i;
     284         360 :                 xlrec.nMove++;
     285             :                 /* Chain updates will be applied after the move */
     286         360 :                 prevLive = i;
     287         360 :                 interveningDeletable = false;
     288             :             }
     289             :             else
     290             :             {
     291             :                 /*
     292             :                  * Second or later live tuple.  Arrange to re-chain it to the
     293             :                  * previous live one, if there was a gap.
     294             :                  */
     295       17518 :                 if (interveningDeletable)
     296             :                 {
     297        9766 :                     chainSrc[xlrec.nChain] = prevLive;
     298        9766 :                     chainDest[xlrec.nChain] = j;
     299        9766 :                     xlrec.nChain++;
     300             :                 }
     301       17518 :                 prevLive = j;
     302       17518 :                 interveningDeletable = false;
     303             :             }
     304             : 
     305       57418 :             j = SGLT_GET_NEXTOFFSET(lt);
     306             :         }
     307             : 
     308         428 :         if (prevLive == InvalidOffsetNumber)
     309             :         {
     310             :             /* The chain is entirely removable, so we need a DEAD tuple */
     311           0 :             toDead[xlrec.nDead] = i;
     312           0 :             xlrec.nDead++;
     313             :         }
     314         428 :         else if (interveningDeletable)
     315             :         {
     316             :             /* One or more deletions at end of chain, so close it off */
     317         400 :             chainSrc[xlrec.nChain] = prevLive;
     318         400 :             chainDest[xlrec.nChain] = InvalidOffsetNumber;
     319         400 :             xlrec.nChain++;
     320             :         }
     321             :     }
     322             : 
     323             :     /* sanity check ... */
     324         384 :     if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
     325           0 :         elog(ERROR, "inconsistent counts of deletable tuples");
     326             : 
     327             :     /* Do the updates */
     328         384 :     START_CRIT_SECTION();
     329             : 
     330         384 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     331         384 :                             toDead, xlrec.nDead,
     332             :                             SPGIST_DEAD, SPGIST_DEAD,
     333             :                             InvalidBlockNumber, InvalidOffsetNumber);
     334             : 
     335         384 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     336         384 :                             toPlaceholder, xlrec.nPlaceholder,
     337             :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     338             :                             InvalidBlockNumber, InvalidOffsetNumber);
     339             : 
     340             :     /*
     341             :      * We implement the move step by swapping the line pointers of the source
     342             :      * and target tuples, then replacing the newly-source tuples with
     343             :      * placeholders.  This is perhaps unduly friendly with the page data
     344             :      * representation, but it's fast and doesn't risk page overflow when a
     345             :      * tuple to be relocated is large.
     346             :      */
     347         744 :     for (i = 0; i < xlrec.nMove; i++)
     348             :     {
     349         360 :         ItemId      idSrc = PageGetItemId(page, moveSrc[i]);
     350         360 :         ItemId      idDest = PageGetItemId(page, moveDest[i]);
     351             :         ItemIdData  tmp;
     352             : 
     353         360 :         tmp = *idSrc;
     354         360 :         *idSrc = *idDest;
     355         360 :         *idDest = tmp;
     356             :     }
     357             : 
     358         384 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     359         384 :                             moveSrc, xlrec.nMove,
     360             :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     361             :                             InvalidBlockNumber, InvalidOffsetNumber);
     362             : 
     363       10550 :     for (i = 0; i < xlrec.nChain; i++)
     364             :     {
     365             :         SpGistLeafTuple lt;
     366             : 
     367       10166 :         lt = (SpGistLeafTuple) PageGetItem(page,
     368       10166 :                                            PageGetItemId(page, chainSrc[i]));
     369             :         Assert(lt->tupstate == SPGIST_LIVE);
     370       10166 :         SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
     371             :     }
     372             : 
     373         384 :     MarkBufferDirty(buffer);
     374             : 
     375         384 :     if (RelationNeedsWAL(index))
     376             :     {
     377             :         XLogRecPtr  recptr;
     378             : 
     379         384 :         XLogBeginInsert();
     380             : 
     381         384 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     382             : 
     383         384 :         XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
     384             :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     385         384 :         XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
     386         384 :         XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
     387         384 :         XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
     388         384 :         XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
     389         384 :         XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
     390         384 :         XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
     391             : 
     392         384 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     393             : 
     394         384 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
     395             : 
     396         384 :         PageSetLSN(page, recptr);
     397             :     }
     398             : 
     399         384 :     END_CRIT_SECTION();
     400             : }
     401             : 
     402             : /*
     403             :  * Vacuum a root page when it is also a leaf
     404             :  *
     405             :  * On the root, we just delete any dead leaf tuples; no fancy business
     406             :  */
     407             : static void
     408          32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
     409             : {
     410          32 :     Page        page = BufferGetPage(buffer);
     411             :     spgxlogVacuumRoot xlrec;
     412             :     OffsetNumber toDelete[MaxIndexTuplesPerPage];
     413             :     OffsetNumber i,
     414          32 :                 max = PageGetMaxOffsetNumber(page);
     415             : 
     416          32 :     xlrec.nDelete = 0;
     417             : 
     418             :     /* Scan page, identify tuples to delete, accumulate stats */
     419         164 :     for (i = FirstOffsetNumber; i <= max; i++)
     420             :     {
     421             :         SpGistLeafTuple lt;
     422             : 
     423         132 :         lt = (SpGistLeafTuple) PageGetItem(page,
     424             :                                            PageGetItemId(page, i));
     425         132 :         if (lt->tupstate == SPGIST_LIVE)
     426             :         {
     427             :             Assert(ItemPointerIsValid(&lt->heapPtr));
     428             : 
     429         132 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     430             :             {
     431           0 :                 bds->stats->tuples_removed += 1;
     432           0 :                 toDelete[xlrec.nDelete] = i;
     433           0 :                 xlrec.nDelete++;
     434             :             }
     435             :             else
     436             :             {
     437         132 :                 bds->stats->num_index_tuples += 1;
     438             :             }
     439             :         }
     440             :         else
     441             :         {
     442             :             /* all tuples on root should be live */
     443           0 :             elog(ERROR, "unexpected SPGiST tuple state: %d",
     444             :                  lt->tupstate);
     445             :         }
     446             :     }
     447             : 
     448          32 :     if (xlrec.nDelete == 0)
     449          32 :         return;                 /* nothing more to do */
     450             : 
     451             :     /* Do the update */
     452           0 :     START_CRIT_SECTION();
     453             : 
     454             :     /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
     455           0 :     PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
     456             : 
     457           0 :     MarkBufferDirty(buffer);
     458             : 
     459           0 :     if (RelationNeedsWAL(index))
     460             :     {
     461             :         XLogRecPtr  recptr;
     462             : 
     463           0 :         XLogBeginInsert();
     464             : 
     465             :         /* Prepare WAL record */
     466           0 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     467             : 
     468           0 :         XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
     469             :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     470           0 :         XLogRegisterData((char *) toDelete,
     471           0 :                          sizeof(OffsetNumber) * xlrec.nDelete);
     472             : 
     473           0 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     474             : 
     475           0 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
     476             : 
     477           0 :         PageSetLSN(page, recptr);
     478             :     }
     479             : 
     480           0 :     END_CRIT_SECTION();
     481             : }
     482             : 
     483             : /*
     484             :  * Clean up redirect and placeholder tuples on the given page
     485             :  *
     486             :  * Redirect tuples can be marked placeholder once they're old enough.
     487             :  * Placeholder tuples can be removed if it won't change the offsets of
     488             :  * non-placeholder ones.
     489             :  *
     490             :  * Unlike the routines above, this works on both leaf and inner pages.
     491             :  */
     492             : static void
     493       56312 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
     494             : {
     495       56312 :     Page        page = BufferGetPage(buffer);
     496       56312 :     SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
     497             :     OffsetNumber i,
     498       56312 :                 max = PageGetMaxOffsetNumber(page),
     499       56312 :                 firstPlaceholder = InvalidOffsetNumber;
     500       56312 :     bool        hasNonPlaceholder = false;
     501       56312 :     bool        hasUpdate = false;
     502             :     OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
     503             :     OffsetNumber itemnos[MaxIndexTuplesPerPage];
     504             :     spgxlogVacuumRedirect xlrec;
     505             :     GlobalVisState *vistest;
     506             : 
     507       56312 :     xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
     508       56312 :     xlrec.nToPlaceholder = 0;
     509       56312 :     xlrec.snapshotConflictHorizon = InvalidTransactionId;
     510             : 
     511       56312 :     vistest = GlobalVisTestFor(heaprel);
     512             : 
     513       56312 :     START_CRIT_SECTION();
     514             : 
     515             :     /*
     516             :      * Scan backwards to convert old redirection tuples to placeholder tuples,
     517             :      * and identify location of last non-placeholder tuple while at it.
     518             :      */
     519     8152756 :     for (i = max;
     520     8100748 :          i >= FirstOffsetNumber &&
     521     8100748 :          (opaque->nRedirection > 0 || !hasNonPlaceholder);
     522     8096444 :          i--)
     523             :     {
     524             :         SpGistDeadTuple dt;
     525             : 
     526     8096444 :         dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
     527             : 
     528             :         /*
     529             :          * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
     530             :          * be any index scans "in flight" to it.  Such an index scan would
     531             :          * have to be in a transaction whose snapshot sees the REDIRECT's XID
     532             :          * as still running, so comparing the XID against global xmin is a
     533             :          * conservatively safe test.  If the XID is invalid, it must have been
     534             :          * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
     535             :          */
     536     8096444 :         if (dt->tupstate == SPGIST_REDIRECT &&
     537      104580 :             (!TransactionIdIsValid(dt->xid) ||
     538       52290 :              GlobalVisTestIsRemovableXid(vistest, dt->xid)))
     539             :         {
     540         372 :             dt->tupstate = SPGIST_PLACEHOLDER;
     541             :             Assert(opaque->nRedirection > 0);
     542         372 :             opaque->nRedirection--;
     543         372 :             opaque->nPlaceholder++;
     544             : 
     545             :             /* remember newest XID among the removed redirects */
     546         618 :             if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
     547         246 :                 TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
     548         126 :                 xlrec.snapshotConflictHorizon = dt->xid;
     549             : 
     550         372 :             ItemPointerSetInvalid(&dt->pointer);
     551             : 
     552         372 :             itemToPlaceholder[xlrec.nToPlaceholder] = i;
     553         372 :             xlrec.nToPlaceholder++;
     554             : 
     555         372 :             hasUpdate = true;
     556             :         }
     557             : 
     558     8096444 :         if (dt->tupstate == SPGIST_PLACEHOLDER)
     559             :         {
     560      169072 :             if (!hasNonPlaceholder)
     561      162564 :                 firstPlaceholder = i;
     562             :         }
     563             :         else
     564             :         {
     565     7927372 :             hasNonPlaceholder = true;
     566             :         }
     567             :     }
     568             : 
     569             :     /*
     570             :      * Any placeholder tuples at the end of page can safely be removed.  We
     571             :      * can't remove ones before the last non-placeholder, though, because we
     572             :      * can't alter the offset numbers of non-placeholder tuples.
     573             :      */
     574       56312 :     if (firstPlaceholder != InvalidOffsetNumber)
     575             :     {
     576             :         /*
     577             :          * We do not store this array to rdata because it's easy to recreate.
     578             :          */
     579      165102 :         for (i = firstPlaceholder; i <= max; i++)
     580      162564 :             itemnos[i - firstPlaceholder] = i;
     581             : 
     582        2538 :         i = max - firstPlaceholder + 1;
     583             :         Assert(opaque->nPlaceholder >= i);
     584        2538 :         opaque->nPlaceholder -= i;
     585             : 
     586             :         /* The array is surely sorted, so can use PageIndexMultiDelete */
     587        2538 :         PageIndexMultiDelete(page, itemnos, i);
     588             : 
     589        2538 :         hasUpdate = true;
     590             :     }
     591             : 
     592       56312 :     xlrec.firstPlaceholder = firstPlaceholder;
     593             : 
     594       56312 :     if (hasUpdate)
     595        2548 :         MarkBufferDirty(buffer);
     596             : 
     597       56312 :     if (hasUpdate && RelationNeedsWAL(index))
     598             :     {
     599             :         XLogRecPtr  recptr;
     600             : 
     601        2548 :         XLogBeginInsert();
     602             : 
     603        2548 :         XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
     604        2548 :         XLogRegisterData((char *) itemToPlaceholder,
     605        2548 :                          sizeof(OffsetNumber) * xlrec.nToPlaceholder);
     606             : 
     607        2548 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     608             : 
     609        2548 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
     610             : 
     611        2548 :         PageSetLSN(page, recptr);
     612             :     }
     613             : 
     614       56312 :     END_CRIT_SECTION();
     615       56312 : }
     616             : 
     617             : /*
     618             :  * Process one page during a bulkdelete scan
     619             :  */
     620             : static void
     621        4568 : spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
     622             : {
     623        4568 :     Relation    index = bds->info->index;
     624             :     Buffer      buffer;
     625             :     Page        page;
     626             : 
     627             :     /* call vacuum_delay_point while not holding any buffer lock */
     628        4568 :     vacuum_delay_point();
     629             : 
     630        4568 :     buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
     631        4568 :                                 RBM_NORMAL, bds->info->strategy);
     632        4568 :     LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     633        4568 :     page = (Page) BufferGetPage(buffer);
     634             : 
     635        4568 :     if (PageIsNew(page))
     636             :     {
     637             :         /*
     638             :          * We found an all-zero page, which could happen if the database
     639             :          * crashed just after extending the file.  Recycle it.
     640             :          */
     641             :     }
     642        4558 :     else if (PageIsEmpty(page))
     643             :     {
     644             :         /* nothing to do */
     645             :     }
     646        4468 :     else if (SpGistPageIsLeaf(page))
     647             :     {
     648        4310 :         if (SpGistBlockIsRoot(blkno))
     649             :         {
     650          32 :             vacuumLeafRoot(bds, index, buffer);
     651             :             /* no need for vacuumRedirectAndPlaceholder */
     652             :         }
     653             :         else
     654             :         {
     655        4278 :             vacuumLeafPage(bds, index, buffer, false);
     656        4278 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     657             :         }
     658             :     }
     659             :     else
     660             :     {
     661             :         /* inner page */
     662         158 :         vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     663             :     }
     664             : 
     665             :     /*
     666             :      * The root pages must never be deleted, nor marked as available in FSM,
     667             :      * because we don't want them ever returned by a search for a place to put
     668             :      * a new tuple.  Otherwise, check for empty page, and make sure the FSM
     669             :      * knows about it.
     670             :      */
     671        4568 :     if (!SpGistBlockIsRoot(blkno))
     672             :     {
     673        4416 :         if (PageIsNew(page) || PageIsEmpty(page))
     674             :         {
     675          74 :             RecordFreeIndexPage(index, blkno);
     676          74 :             bds->stats->pages_deleted++;
     677             :         }
     678             :         else
     679             :         {
     680        4342 :             SpGistSetLastUsedPage(index, buffer);
     681        4342 :             bds->lastFilledBlock = blkno;
     682             :         }
     683             :     }
     684             : 
     685        4568 :     UnlockReleaseBuffer(buffer);
     686        4568 : }
     687             : 
     688             : /*
     689             :  * Process the pending-TID list between pages of the main scan
     690             :  */
     691             : static void
     692         786 : spgprocesspending(spgBulkDeleteState *bds)
     693             : {
     694         786 :     Relation    index = bds->info->index;
     695             :     spgVacPendingItem *pitem;
     696             :     spgVacPendingItem *nitem;
     697             :     BlockNumber blkno;
     698             :     Buffer      buffer;
     699             :     Page        page;
     700             : 
     701      105324 :     for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
     702             :     {
     703      104538 :         if (pitem->done)
     704       51876 :             continue;           /* ignore already-done items */
     705             : 
     706             :         /* call vacuum_delay_point while not holding any buffer lock */
     707       52662 :         vacuum_delay_point();
     708             : 
     709             :         /* examine the referenced page */
     710       52662 :         blkno = ItemPointerGetBlockNumber(&pitem->tid);
     711       52662 :         buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
     712       52662 :                                     RBM_NORMAL, bds->info->strategy);
     713       52662 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     714       52662 :         page = (Page) BufferGetPage(buffer);
     715             : 
     716       52662 :         if (PageIsNew(page) || SpGistPageIsDeleted(page))
     717             :         {
     718             :             /* Probably shouldn't happen, but ignore it */
     719             :         }
     720       52662 :         else if (SpGistPageIsLeaf(page))
     721             :         {
     722       51876 :             if (SpGistBlockIsRoot(blkno))
     723             :             {
     724             :                 /* this should definitely not happen */
     725           0 :                 elog(ERROR, "redirection leads to root page of index \"%s\"",
     726             :                      RelationGetRelationName(index));
     727             :             }
     728             : 
     729             :             /* deal with any deletable tuples */
     730       51876 :             vacuumLeafPage(bds, index, buffer, true);
     731             :             /* might as well do this while we are here */
     732       51876 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     733             : 
     734       51876 :             SpGistSetLastUsedPage(index, buffer);
     735             : 
     736             :             /*
     737             :              * We can mark as done not only this item, but any later ones
     738             :              * pointing at the same page, since we vacuumed the whole page.
     739             :              */
     740       51876 :             pitem->done = true;
     741     4549362 :             for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
     742             :             {
     743     4497486 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     744         786 :                     nitem->done = true;
     745             :             }
     746             :         }
     747             :         else
     748             :         {
     749             :             /*
     750             :              * On an inner page, visit the referenced inner tuple and add all
     751             :              * its downlinks to the pending list.  We might have pending items
     752             :              * for more than one inner tuple on the same page (in fact this is
     753             :              * pretty likely given the way space allocation works), so get
     754             :              * them all while we are here.
     755             :              */
     756      105324 :             for (nitem = pitem; nitem != NULL; nitem = nitem->next)
     757             :             {
     758      104538 :                 if (nitem->done)
     759           0 :                     continue;
     760      104538 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     761             :                 {
     762             :                     OffsetNumber offset;
     763             :                     SpGistInnerTuple innerTuple;
     764             : 
     765       51876 :                     offset = ItemPointerGetOffsetNumber(&nitem->tid);
     766       51876 :                     innerTuple = (SpGistInnerTuple) PageGetItem(page,
     767             :                                                                 PageGetItemId(page, offset));
     768       51876 :                     if (innerTuple->tupstate == SPGIST_LIVE)
     769             :                     {
     770             :                         SpGistNodeTuple node;
     771             :                         int         i;
     772             : 
     773      259380 :                         SGITITERATE(innerTuple, i, node)
     774             :                         {
     775      207504 :                             if (ItemPointerIsValid(&node->t_tid))
     776      103752 :                                 spgAddPendingTID(bds, &node->t_tid);
     777             :                         }
     778             :                     }
     779           0 :                     else if (innerTuple->tupstate == SPGIST_REDIRECT)
     780             :                     {
     781             :                         /* transfer attention to redirect point */
     782           0 :                         spgAddPendingTID(bds,
     783             :                                          &((SpGistDeadTuple) innerTuple)->pointer);
     784             :                     }
     785             :                     else
     786           0 :                         elog(ERROR, "unexpected SPGiST tuple state: %d",
     787             :                              innerTuple->tupstate);
     788             : 
     789       51876 :                     nitem->done = true;
     790             :                 }
     791             :             }
     792             :         }
     793             : 
     794       52662 :         UnlockReleaseBuffer(buffer);
     795             :     }
     796             : 
     797         786 :     spgClearPendingList(bds);
     798         786 : }
     799             : 
     800             : /*
     801             :  * Perform a bulkdelete scan
     802             :  */
     803             : static void
     804          76 : spgvacuumscan(spgBulkDeleteState *bds)
     805             : {
     806          76 :     Relation    index = bds->info->index;
     807             :     bool        needLock;
     808             :     BlockNumber num_pages,
     809             :                 blkno;
     810             : 
     811             :     /* Finish setting up spgBulkDeleteState */
     812          76 :     initSpGistState(&bds->spgstate, index);
     813          76 :     bds->pendingList = NULL;
     814          76 :     bds->myXmin = GetActiveSnapshot()->xmin;
     815          76 :     bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
     816             : 
     817             :     /*
     818             :      * Reset counts that will be incremented during the scan; needed in case
     819             :      * of multiple scans during a single VACUUM command
     820             :      */
     821          76 :     bds->stats->estimated_count = false;
     822          76 :     bds->stats->num_index_tuples = 0;
     823          76 :     bds->stats->pages_deleted = 0;
     824             : 
     825             :     /* We can skip locking for new or temp relations */
     826          76 :     needLock = !RELATION_IS_LOCAL(index);
     827             : 
     828             :     /*
     829             :      * The outer loop iterates over all index pages except the metapage, in
     830             :      * physical order (we hope the kernel will cooperate in providing
     831             :      * read-ahead for speed).  It is critical that we visit all leaf pages,
     832             :      * including ones added after we start the scan, else we might fail to
     833             :      * delete some deletable tuples.  See more extensive comments about this
     834             :      * in btvacuumscan().
     835             :      */
     836          76 :     blkno = SPGIST_METAPAGE_BLKNO + 1;
     837             :     for (;;)
     838             :     {
     839             :         /* Get the current relation length */
     840         152 :         if (needLock)
     841         152 :             LockRelationForExtension(index, ExclusiveLock);
     842         152 :         num_pages = RelationGetNumberOfBlocks(index);
     843         152 :         if (needLock)
     844         152 :             UnlockRelationForExtension(index, ExclusiveLock);
     845             : 
     846             :         /* Quit if we've scanned the whole relation */
     847         152 :         if (blkno >= num_pages)
     848          76 :             break;
     849             :         /* Iterate over pages, then loop back to recheck length */
     850        4644 :         for (; blkno < num_pages; blkno++)
     851             :         {
     852        4568 :             spgvacuumpage(bds, blkno);
     853             :             /* empty the pending-list after each page */
     854        4568 :             if (bds->pendingList != NULL)
     855         786 :                 spgprocesspending(bds);
     856             :         }
     857             :     }
     858             : 
     859             :     /* Propagate local lastUsedPages cache to metablock */
     860          76 :     SpGistUpdateMetaPage(index);
     861             : 
     862             :     /*
     863             :      * If we found any empty pages (and recorded them in the FSM), then
     864             :      * forcibly update the upper-level FSM pages to ensure that searchers can
     865             :      * find them.  It's possible that the pages were also found during
     866             :      * previous scans and so this is a waste of time, but it's cheap enough
     867             :      * relative to scanning the index that it shouldn't matter much, and
     868             :      * making sure that free pages are available sooner not later seems
     869             :      * worthwhile.
     870             :      *
     871             :      * Note that if no empty pages exist, we don't bother vacuuming the FSM at
     872             :      * all.
     873             :      */
     874          76 :     if (bds->stats->pages_deleted > 0)
     875          44 :         IndexFreeSpaceMapVacuum(index);
     876             : 
     877             :     /*
     878             :      * Truncate index if possible
     879             :      *
     880             :      * XXX disabled because it's unsafe due to possible concurrent inserts.
     881             :      * We'd have to rescan the pages to make sure they're still empty, and it
     882             :      * doesn't seem worth it.  Note that btree doesn't do this either.
     883             :      *
     884             :      * Another reason not to truncate is that it could invalidate the cached
     885             :      * pages-with-freespace pointers in the metapage and other backends'
     886             :      * relation caches, that is leave them pointing to nonexistent pages.
     887             :      * Adding RelationGetNumberOfBlocks calls to protect the places that use
     888             :      * those pointers would be unduly expensive.
     889             :      */
     890             : #ifdef NOT_USED
     891             :     if (num_pages > bds->lastFilledBlock + 1)
     892             :     {
     893             :         BlockNumber lastBlock = num_pages - 1;
     894             : 
     895             :         num_pages = bds->lastFilledBlock + 1;
     896             :         RelationTruncate(index, num_pages);
     897             :         bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
     898             :         bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
     899             :     }
     900             : #endif
     901             : 
     902             :     /* Report final stats */
     903          76 :     bds->stats->num_pages = num_pages;
     904          76 :     bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
     905          76 :     bds->stats->pages_free = bds->stats->pages_deleted;
     906          76 : }
     907             : 
     908             : /*
     909             :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     910             :  * The set of target tuples is specified via a callback routine that tells
     911             :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     912             :  *
     913             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     914             :  */
     915             : IndexBulkDeleteResult *
     916           8 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     917             :               IndexBulkDeleteCallback callback, void *callback_state)
     918             : {
     919             :     spgBulkDeleteState bds;
     920             : 
     921             :     /* allocate stats if first time through, else re-use existing struct */
     922           8 :     if (stats == NULL)
     923           8 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     924           8 :     bds.info = info;
     925           8 :     bds.stats = stats;
     926           8 :     bds.callback = callback;
     927           8 :     bds.callback_state = callback_state;
     928             : 
     929           8 :     spgvacuumscan(&bds);
     930             : 
     931           8 :     return stats;
     932             : }
     933             : 
     934             : /* Dummy callback to delete no tuples during spgvacuumcleanup */
     935             : static bool
     936     8376292 : dummy_callback(ItemPointer itemptr, void *state)
     937             : {
     938     8376292 :     return false;
     939             : }
     940             : 
     941             : /*
     942             :  * Post-VACUUM cleanup.
     943             :  *
     944             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     945             :  */
     946             : IndexBulkDeleteResult *
     947          88 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     948             : {
     949             :     spgBulkDeleteState bds;
     950             : 
     951             :     /* No-op in ANALYZE ONLY mode */
     952          88 :     if (info->analyze_only)
     953          12 :         return stats;
     954             : 
     955             :     /*
     956             :      * We don't need to scan the index if there was a preceding bulkdelete
     957             :      * pass.  Otherwise, make a pass that won't delete any live tuples, but
     958             :      * might still accomplish useful stuff with redirect/placeholder cleanup
     959             :      * and/or FSM housekeeping, and in any case will provide stats.
     960             :      */
     961          76 :     if (stats == NULL)
     962             :     {
     963          68 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     964          68 :         bds.info = info;
     965          68 :         bds.stats = stats;
     966          68 :         bds.callback = dummy_callback;
     967          68 :         bds.callback_state = NULL;
     968             : 
     969          68 :         spgvacuumscan(&bds);
     970             :     }
     971             : 
     972             :     /*
     973             :      * It's quite possible for us to be fooled by concurrent tuple moves into
     974             :      * double-counting some index tuples, so disbelieve any total that exceeds
     975             :      * the underlying heap's count ... if we know that accurately.  Otherwise
     976             :      * this might just make matters worse.
     977             :      */
     978          76 :     if (!info->estimated_count)
     979             :     {
     980          76 :         if (stats->num_index_tuples > info->num_heap_tuples)
     981           0 :             stats->num_index_tuples = info->num_heap_tuples;
     982             :     }
     983             : 
     984          76 :     return stats;
     985             : }

Generated by: LCOV version 1.14