LCOV - code coverage report
Current view: top level - src/backend/access/spgist - spgvacuum.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.2 % 329 300
Test Date: 2026-03-02 15:15:07 Functions: 100.0 % 11 11
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * spgvacuum.c
       4              :  *    vacuum for SP-GiST
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/spgist/spgvacuum.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "access/genam.h"
      19              : #include "access/spgist_private.h"
      20              : #include "access/spgxlog.h"
      21              : #include "access/transam.h"
      22              : #include "access/xloginsert.h"
      23              : #include "commands/vacuum.h"
      24              : #include "miscadmin.h"
      25              : #include "storage/bufmgr.h"
      26              : #include "storage/indexfsm.h"
      27              : #include "storage/lmgr.h"
      28              : #include "storage/read_stream.h"
      29              : #include "utils/snapmgr.h"
      30              : 
      31              : 
      32              : /* Entry in pending-list of TIDs we need to revisit */
      33              : typedef struct spgVacPendingItem
      34              : {
      35              :     ItemPointerData tid;        /* redirection target to visit */
      36              :     bool        done;           /* have we dealt with this? */
      37              :     struct spgVacPendingItem *next; /* list link */
      38              : } spgVacPendingItem;
      39              : 
      40              : /* Local state for vacuum operations */
      41              : typedef struct spgBulkDeleteState
      42              : {
      43              :     /* Parameters passed in to spgvacuumscan */
      44              :     IndexVacuumInfo *info;
      45              :     IndexBulkDeleteResult *stats;
      46              :     IndexBulkDeleteCallback callback;
      47              :     void       *callback_state;
      48              : 
      49              :     /* Additional working state */
      50              :     SpGistState spgstate;       /* for SPGiST operations that need one */
      51              :     spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
      52              :     TransactionId myXmin;       /* for detecting newly-added redirects */
      53              :     BlockNumber lastFilledBlock;    /* last non-deletable block */
      54              : } spgBulkDeleteState;
      55              : 
      56              : 
      57              : /*
      58              :  * Add TID to pendingList, but only if not already present.
      59              :  *
      60              :  * Note that new items are always appended at the end of the list; this
      61              :  * ensures that scans of the list don't miss items added during the scan.
      62              :  */
      63              : static void
      64        51879 : spgAddPendingTID(spgBulkDeleteState *bds, const ItemPointerData *tid)
      65              : {
      66              :     spgVacPendingItem *pitem;
      67              :     spgVacPendingItem **listLink;
      68              : 
      69              :     /* search the list for pre-existing entry */
      70        51879 :     listLink = &bds->pendingList;
      71      4582908 :     while (*listLink != NULL)
      72              :     {
      73      4548059 :         pitem = *listLink;
      74      4548059 :         if (ItemPointerEquals(tid, &pitem->tid))
      75        17030 :             return;             /* already in list, do nothing */
      76      4531029 :         listLink = &pitem->next;
      77              :     }
      78              :     /* not there, so append new entry */
      79        34849 :     pitem = palloc_object(spgVacPendingItem);
      80        34849 :     pitem->tid = *tid;
      81        34849 :     pitem->done = false;
      82        34849 :     pitem->next = NULL;
      83        34849 :     *listLink = pitem;
      84              : }
      85              : 
      86              : /*
      87              :  * Clear pendingList
      88              :  */
      89              : static void
      90          263 : spgClearPendingList(spgBulkDeleteState *bds)
      91              : {
      92              :     spgVacPendingItem *pitem;
      93              :     spgVacPendingItem *nitem;
      94              : 
      95        35112 :     for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
      96              :     {
      97        34849 :         nitem = pitem->next;
      98              :         /* All items in list should have been dealt with */
      99              :         Assert(pitem->done);
     100        34849 :         pfree(pitem);
     101              :     }
     102          263 :     bds->pendingList = NULL;
     103          263 : }
     104              : 
     105              : /*
     106              :  * Vacuum a regular (non-root) leaf page
     107              :  *
     108              :  * We must delete tuples that are targeted for deletion by the VACUUM,
     109              :  * but not move any tuples that are referenced by outside links; we assume
     110              :  * those are the ones that are heads of chains.
     111              :  *
     112              :  * If we find a REDIRECT that was made by a concurrently-running transaction,
     113              :  * we must add its target TID to pendingList.  (We don't try to visit the
     114              :  * target immediately, first because we don't want VACUUM locking more than
     115              :  * one buffer at a time, and second because the duplicate-filtering logic
     116              :  * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
     117              :  * loop in the face of continuous concurrent insertions.)
     118              :  *
     119              :  * If forPending is true, we are examining the page as a consequence of
     120              :  * chasing a redirect link, not as part of the normal sequential scan.
     121              :  * We still vacuum the page normally, but we don't increment the stats
     122              :  * about live tuples; else we'd double-count those tuples, since the page
     123              :  * has been or will be visited in the sequential scan as well.
     124              :  */
     125              : static void
     126        19423 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
     127              :                bool forPending)
     128              : {
     129        19423 :     Page        page = BufferGetPage(buffer);
     130              :     spgxlogVacuumLeaf xlrec;
     131              :     OffsetNumber toDead[MaxIndexTuplesPerPage];
     132              :     OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
     133              :     OffsetNumber moveSrc[MaxIndexTuplesPerPage];
     134              :     OffsetNumber moveDest[MaxIndexTuplesPerPage];
     135              :     OffsetNumber chainSrc[MaxIndexTuplesPerPage];
     136              :     OffsetNumber chainDest[MaxIndexTuplesPerPage];
     137              :     OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
     138              :     bool        deletable[MaxIndexTuplesPerPage + 1];
     139              :     int         nDeletable;
     140              :     OffsetNumber i,
     141        19423 :                 max = PageGetMaxOffsetNumber(page);
     142              : 
     143        19423 :     memset(predecessor, 0, sizeof(predecessor));
     144        19423 :     memset(deletable, 0, sizeof(deletable));
     145        19423 :     nDeletable = 0;
     146              : 
     147              :     /* Scan page, identify tuples to delete, accumulate stats */
     148      3049587 :     for (i = FirstOffsetNumber; i <= max; i++)
     149              :     {
     150              :         SpGistLeafTuple lt;
     151              : 
     152      3030164 :         lt = (SpGistLeafTuple) PageGetItem(page,
     153      3030164 :                                            PageGetItemId(page, i));
     154      3030164 :         if (lt->tupstate == SPGIST_LIVE)
     155              :         {
     156              :             Assert(ItemPointerIsValid(&lt->heapPtr));
     157              : 
     158      2905786 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     159              :             {
     160        20882 :                 bds->stats->tuples_removed += 1;
     161        20882 :                 deletable[i] = true;
     162        20882 :                 nDeletable++;
     163              :             }
     164              :             else
     165              :             {
     166      2884904 :                 if (!forPending)
     167       263948 :                     bds->stats->num_index_tuples += 1;
     168              :             }
     169              : 
     170              :             /* Form predecessor map, too */
     171      2905786 :             if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
     172              :             {
     173              :                 /* paranoia about corrupted chain links */
     174      2881806 :                 if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
     175      2881806 :                     SGLT_GET_NEXTOFFSET(lt) > max ||
     176      2881806 :                     predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
     177            0 :                     elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
     178              :                          BufferGetBlockNumber(buffer),
     179              :                          RelationGetRelationName(index));
     180      2881806 :                 predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
     181              :             }
     182              :         }
     183       124378 :         else if (lt->tupstate == SPGIST_REDIRECT)
     184              :         {
     185        17621 :             SpGistDeadTuple dt = (SpGistDeadTuple) lt;
     186              : 
     187              :             Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
     188              :             Assert(ItemPointerIsValid(&dt->pointer));
     189              : 
     190              :             /*
     191              :              * Add target TID to pending list if the redirection could have
     192              :              * happened since VACUUM started.  (If xid is invalid, assume it
     193              :              * must have happened before VACUUM started, since REINDEX
     194              :              * CONCURRENTLY locks out VACUUM.)
     195              :              *
     196              :              * Note: we could make a tighter test by seeing if the xid is
     197              :              * "running" according to the active snapshot; but snapmgr.c
     198              :              * doesn't currently export a suitable API, and it's not entirely
     199              :              * clear that a tighter test is worth the cycles anyway.
     200              :              */
     201        17621 :             if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
     202        17293 :                 spgAddPendingTID(bds, &dt->pointer);
     203              :         }
     204              :         else
     205              :         {
     206              :             Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
     207              :         }
     208              :     }
     209              : 
     210        19423 :     if (nDeletable == 0)
     211        19225 :         return;                 /* nothing more to do */
     212              : 
     213              :     /*----------
     214              :      * Figure out exactly what we have to do.  We do this separately from
     215              :      * actually modifying the page, mainly so that we have a representation
     216              :      * that can be dumped into WAL and then the replay code can do exactly
     217              :      * the same thing.  The output of this step consists of six arrays
     218              :      * describing four kinds of operations, to be performed in this order:
     219              :      *
     220              :      * toDead[]: tuple numbers to be replaced with DEAD tuples
     221              :      * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
     222              :      * moveSrc[]: tuple numbers that need to be relocated to another offset
     223              :      * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
     224              :      * moveDest[]: new locations for moveSrc tuples
     225              :      * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
     226              :      * chainDest[]: new values of nextOffset for chainSrc members
     227              :      *
     228              :      * It's easiest to figure out what we have to do by processing tuple
     229              :      * chains, so we iterate over all the tuples (not just the deletable
     230              :      * ones!) to identify chain heads, then chase down each chain and make
     231              :      * work item entries for deletable tuples within the chain.
     232              :      *----------
     233              :      */
     234          198 :     xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
     235              : 
     236        41659 :     for (i = FirstOffsetNumber; i <= max; i++)
     237              :     {
     238              :         SpGistLeafTuple head;
     239              :         bool        interveningDeletable;
     240              :         OffsetNumber prevLive;
     241              :         OffsetNumber j;
     242              : 
     243        41461 :         head = (SpGistLeafTuple) PageGetItem(page,
     244        41461 :                                              PageGetItemId(page, i));
     245        41461 :         if (head->tupstate != SPGIST_LIVE)
     246        11578 :             continue;           /* can't be a chain member */
     247        29883 :         if (predecessor[i] != 0)
     248        29666 :             continue;           /* not a chain head */
     249              : 
     250              :         /* initialize ... */
     251          217 :         interveningDeletable = false;
     252          217 :         prevLive = deletable[i] ? InvalidOffsetNumber : i;
     253              : 
     254              :         /* scan down the chain ... */
     255          217 :         j = SGLT_GET_NEXTOFFSET(head);
     256        29883 :         while (j != InvalidOffsetNumber)
     257              :         {
     258              :             SpGistLeafTuple lt;
     259              : 
     260        29666 :             lt = (SpGistLeafTuple) PageGetItem(page,
     261        29666 :                                                PageGetItemId(page, j));
     262        29666 :             if (lt->tupstate != SPGIST_LIVE)
     263              :             {
     264              :                 /* all tuples in chain should be live */
     265            0 :                 elog(ERROR, "unexpected SPGiST tuple state: %d",
     266              :                      lt->tupstate);
     267              :             }
     268              : 
     269        29666 :             if (deletable[j])
     270              :             {
     271              :                 /* This tuple should be replaced by a placeholder */
     272        20689 :                 toPlaceholder[xlrec.nPlaceholder] = j;
     273        20689 :                 xlrec.nPlaceholder++;
     274              :                 /* previous live tuple's chain link will need an update */
     275        20689 :                 interveningDeletable = true;
     276              :             }
     277         8977 :             else if (prevLive == InvalidOffsetNumber)
     278              :             {
     279              :                 /*
     280              :                  * This is the first live tuple in the chain.  It has to move
     281              :                  * to the head position.
     282              :                  */
     283          193 :                 moveSrc[xlrec.nMove] = j;
     284          193 :                 moveDest[xlrec.nMove] = i;
     285          193 :                 xlrec.nMove++;
     286              :                 /* Chain updates will be applied after the move */
     287          193 :                 prevLive = i;
     288          193 :                 interveningDeletable = false;
     289              :             }
     290              :             else
     291              :             {
     292              :                 /*
     293              :                  * Second or later live tuple.  Arrange to re-chain it to the
     294              :                  * previous live one, if there was a gap.
     295              :                  */
     296         8784 :                 if (interveningDeletable)
     297              :                 {
     298         4880 :                     chainSrc[xlrec.nChain] = prevLive;
     299         4880 :                     chainDest[xlrec.nChain] = j;
     300         4880 :                     xlrec.nChain++;
     301              :                 }
     302         8784 :                 prevLive = j;
     303         8784 :                 interveningDeletable = false;
     304              :             }
     305              : 
     306        29666 :             j = SGLT_GET_NEXTOFFSET(lt);
     307              :         }
     308              : 
     309          217 :         if (prevLive == InvalidOffsetNumber)
     310              :         {
     311              :             /* The chain is entirely removable, so we need a DEAD tuple */
     312            0 :             toDead[xlrec.nDead] = i;
     313            0 :             xlrec.nDead++;
     314              :         }
     315          217 :         else if (interveningDeletable)
     316              :         {
     317              :             /* One or more deletions at end of chain, so close it off */
     318          205 :             chainSrc[xlrec.nChain] = prevLive;
     319          205 :             chainDest[xlrec.nChain] = InvalidOffsetNumber;
     320          205 :             xlrec.nChain++;
     321              :         }
     322              :     }
     323              : 
     324              :     /* sanity check ... */
     325          198 :     if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
     326            0 :         elog(ERROR, "inconsistent counts of deletable tuples");
     327              : 
     328              :     /* Do the updates */
     329          198 :     START_CRIT_SECTION();
     330              : 
     331          198 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     332          198 :                             toDead, xlrec.nDead,
     333              :                             SPGIST_DEAD, SPGIST_DEAD,
     334              :                             InvalidBlockNumber, InvalidOffsetNumber);
     335              : 
     336          198 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     337          198 :                             toPlaceholder, xlrec.nPlaceholder,
     338              :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     339              :                             InvalidBlockNumber, InvalidOffsetNumber);
     340              : 
     341              :     /*
     342              :      * We implement the move step by swapping the line pointers of the source
     343              :      * and target tuples, then replacing the newly-source tuples with
     344              :      * placeholders.  This is perhaps unduly friendly with the page data
     345              :      * representation, but it's fast and doesn't risk page overflow when a
     346              :      * tuple to be relocated is large.
     347              :      */
     348          391 :     for (i = 0; i < xlrec.nMove; i++)
     349              :     {
     350          193 :         ItemId      idSrc = PageGetItemId(page, moveSrc[i]);
     351          193 :         ItemId      idDest = PageGetItemId(page, moveDest[i]);
     352              :         ItemIdData  tmp;
     353              : 
     354          193 :         tmp = *idSrc;
     355          193 :         *idSrc = *idDest;
     356          193 :         *idDest = tmp;
     357              :     }
     358              : 
     359          198 :     spgPageIndexMultiDelete(&bds->spgstate, page,
     360          198 :                             moveSrc, xlrec.nMove,
     361              :                             SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
     362              :                             InvalidBlockNumber, InvalidOffsetNumber);
     363              : 
     364         5283 :     for (i = 0; i < xlrec.nChain; i++)
     365              :     {
     366              :         SpGistLeafTuple lt;
     367              : 
     368         5085 :         lt = (SpGistLeafTuple) PageGetItem(page,
     369         5085 :                                            PageGetItemId(page, chainSrc[i]));
     370              :         Assert(lt->tupstate == SPGIST_LIVE);
     371         5085 :         SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
     372              :     }
     373              : 
     374          198 :     MarkBufferDirty(buffer);
     375              : 
     376          198 :     if (RelationNeedsWAL(index))
     377              :     {
     378              :         XLogRecPtr  recptr;
     379              : 
     380          198 :         XLogBeginInsert();
     381              : 
     382          198 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     383              : 
     384          198 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
     385              :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     386          198 :         XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
     387          198 :         XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
     388          198 :         XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
     389          198 :         XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
     390          198 :         XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
     391          198 :         XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
     392              : 
     393          198 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     394              : 
     395          198 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
     396              : 
     397          198 :         PageSetLSN(page, recptr);
     398              :     }
     399              : 
     400          198 :     END_CRIT_SECTION();
     401              : }
     402              : 
     403              : /*
     404              :  * Vacuum a root page when it is also a leaf
     405              :  *
     406              :  * On the root, we just delete any dead leaf tuples; no fancy business
     407              :  */
     408              : static void
     409           16 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
     410              : {
     411           16 :     Page        page = BufferGetPage(buffer);
     412              :     spgxlogVacuumRoot xlrec;
     413              :     OffsetNumber toDelete[MaxIndexTuplesPerPage];
     414              :     OffsetNumber i,
     415           16 :                 max = PageGetMaxOffsetNumber(page);
     416              : 
     417           16 :     xlrec.nDelete = 0;
     418              : 
     419              :     /* Scan page, identify tuples to delete, accumulate stats */
     420           82 :     for (i = FirstOffsetNumber; i <= max; i++)
     421              :     {
     422              :         SpGistLeafTuple lt;
     423              : 
     424           66 :         lt = (SpGistLeafTuple) PageGetItem(page,
     425           66 :                                            PageGetItemId(page, i));
     426           66 :         if (lt->tupstate == SPGIST_LIVE)
     427              :         {
     428              :             Assert(ItemPointerIsValid(&lt->heapPtr));
     429              : 
     430           66 :             if (bds->callback(&lt->heapPtr, bds->callback_state))
     431              :             {
     432            0 :                 bds->stats->tuples_removed += 1;
     433            0 :                 toDelete[xlrec.nDelete] = i;
     434            0 :                 xlrec.nDelete++;
     435              :             }
     436              :             else
     437              :             {
     438           66 :                 bds->stats->num_index_tuples += 1;
     439              :             }
     440              :         }
     441              :         else
     442              :         {
     443              :             /* all tuples on root should be live */
     444            0 :             elog(ERROR, "unexpected SPGiST tuple state: %d",
     445              :                  lt->tupstate);
     446              :         }
     447              :     }
     448              : 
     449           16 :     if (xlrec.nDelete == 0)
     450           16 :         return;                 /* nothing more to do */
     451              : 
     452              :     /* Do the update */
     453            0 :     START_CRIT_SECTION();
     454              : 
     455              :     /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
     456            0 :     PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
     457              : 
     458            0 :     MarkBufferDirty(buffer);
     459              : 
     460            0 :     if (RelationNeedsWAL(index))
     461              :     {
     462              :         XLogRecPtr  recptr;
     463              : 
     464            0 :         XLogBeginInsert();
     465              : 
     466              :         /* Prepare WAL record */
     467            0 :         STORE_STATE(&bds->spgstate, xlrec.stateSrc);
     468              : 
     469            0 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
     470              :         /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
     471            0 :         XLogRegisterData(toDelete,
     472            0 :                          sizeof(OffsetNumber) * xlrec.nDelete);
     473              : 
     474            0 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     475              : 
     476            0 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
     477              : 
     478            0 :         PageSetLSN(page, recptr);
     479              :     }
     480              : 
     481            0 :     END_CRIT_SECTION();
     482              : }
     483              : 
     484              : /*
     485              :  * Clean up redirect and placeholder tuples on the given page
     486              :  *
     487              :  * Redirect tuples can be marked placeholder once they're old enough.
     488              :  * Placeholder tuples can be removed if it won't change the offsets of
     489              :  * non-placeholder ones.
     490              :  *
     491              :  * Unlike the routines above, this works on both leaf and inner pages.
     492              :  */
     493              : static void
     494        19502 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
     495              : {
     496        19502 :     Page        page = BufferGetPage(buffer);
     497        19502 :     SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
     498              :     OffsetNumber i,
     499        19502 :                 max = PageGetMaxOffsetNumber(page),
     500        19502 :                 firstPlaceholder = InvalidOffsetNumber;
     501        19502 :     bool        hasNonPlaceholder = false;
     502        19502 :     bool        hasUpdate = false;
     503              :     OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
     504              :     OffsetNumber itemnos[MaxIndexTuplesPerPage];
     505              :     spgxlogVacuumRedirect xlrec;
     506              :     GlobalVisState *vistest;
     507              : 
     508        19502 :     xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
     509        19502 :     xlrec.nToPlaceholder = 0;
     510        19502 :     xlrec.snapshotConflictHorizon = InvalidTransactionId;
     511              : 
     512        19502 :     vistest = GlobalVisTestFor(heaprel);
     513              : 
     514        19502 :     START_CRIT_SECTION();
     515              : 
     516              :     /*
     517              :      * Scan backwards to convert old redirection tuples to placeholder tuples,
     518              :      * and identify location of last non-placeholder tuple while at it.
     519              :      */
     520        19502 :     for (i = max;
     521      2766077 :          i >= FirstOffsetNumber &&
     522      2748601 :          (opaque->nRedirection > 0 || !hasNonPlaceholder);
     523      2746575 :          i--)
     524              :     {
     525              :         SpGistDeadTuple dt;
     526              : 
     527      2746575 :         dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
     528              : 
     529              :         /*
     530              :          * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
     531              :          * be any index scans "in flight" to it.  Such an index scan would
     532              :          * have to be in a transaction whose snapshot sees the REDIRECT's XID
     533              :          * as still running, so comparing the XID against global xmin is a
     534              :          * conservatively safe test.  If the XID is invalid, it must have been
     535              :          * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
     536              :          */
     537      2746575 :         if (dt->tupstate == SPGIST_REDIRECT &&
     538        35244 :             (!TransactionIdIsValid(dt->xid) ||
     539        17622 :              GlobalVisTestIsRemovableXid(vistest, dt->xid)))
     540              :         {
     541          193 :             dt->tupstate = SPGIST_PLACEHOLDER;
     542              :             Assert(opaque->nRedirection > 0);
     543          193 :             opaque->nRedirection--;
     544          193 :             opaque->nPlaceholder++;
     545              : 
     546              :             /* remember newest XID among the removed redirects */
     547          322 :             if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
     548          129 :                 TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
     549           64 :                 xlrec.snapshotConflictHorizon = dt->xid;
     550              : 
     551          193 :             ItemPointerSetInvalid(&dt->pointer);
     552              : 
     553          193 :             itemToPlaceholder[xlrec.nToPlaceholder] = i;
     554          193 :             xlrec.nToPlaceholder++;
     555              : 
     556          193 :             hasUpdate = true;
     557              :         }
     558              : 
     559      2746575 :         if (dt->tupstate == SPGIST_PLACEHOLDER)
     560              :         {
     561        82511 :             if (!hasNonPlaceholder)
     562        80925 :                 firstPlaceholder = i;
     563              :         }
     564              :         else
     565              :         {
     566      2664064 :             hasNonPlaceholder = true;
     567              :         }
     568              :     }
     569              : 
     570              :     /*
     571              :      * Any placeholder tuples at the end of page can safely be removed.  We
     572              :      * can't remove ones before the last non-placeholder, though, because we
     573              :      * can't alter the offset numbers of non-placeholder tuples.
     574              :      */
     575        19502 :     if (firstPlaceholder != InvalidOffsetNumber)
     576              :     {
     577              :         /*
     578              :          * We do not store this array to rdata because it's easy to recreate.
     579              :          */
     580        82201 :         for (i = firstPlaceholder; i <= max; i++)
     581        80925 :             itemnos[i - firstPlaceholder] = i;
     582              : 
     583         1276 :         i = max - firstPlaceholder + 1;
     584              :         Assert(opaque->nPlaceholder >= i);
     585         1276 :         opaque->nPlaceholder -= i;
     586              : 
     587              :         /* The array is surely sorted, so can use PageIndexMultiDelete */
     588         1276 :         PageIndexMultiDelete(page, itemnos, i);
     589              : 
     590         1276 :         hasUpdate = true;
     591              :     }
     592              : 
     593        19502 :     xlrec.firstPlaceholder = firstPlaceholder;
     594              : 
     595        19502 :     if (hasUpdate)
     596         1279 :         MarkBufferDirty(buffer);
     597              : 
     598        19502 :     if (hasUpdate && RelationNeedsWAL(index))
     599              :     {
     600              :         XLogRecPtr  recptr;
     601              : 
     602         1279 :         XLogBeginInsert();
     603              : 
     604         1279 :         XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
     605         1279 :         XLogRegisterData(itemToPlaceholder,
     606         1279 :                          sizeof(OffsetNumber) * xlrec.nToPlaceholder);
     607              : 
     608         1279 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     609              : 
     610         1279 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
     611              : 
     612         1279 :         PageSetLSN(page, recptr);
     613              :     }
     614              : 
     615        19502 :     END_CRIT_SECTION();
     616        19502 : }
     617              : 
     618              : /*
     619              :  * Process one page during a bulkdelete scan
     620              :  */
     621              : static void
     622         2276 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
     623              : {
     624         2276 :     Relation    index = bds->info->index;
     625         2276 :     BlockNumber blkno = BufferGetBlockNumber(buffer);
     626              :     Page        page;
     627              : 
     628         2276 :     LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     629         2276 :     page = BufferGetPage(buffer);
     630              : 
     631         2276 :     if (PageIsNew(page))
     632              :     {
     633              :         /*
     634              :          * We found an all-zero page, which could happen if the database
     635              :          * crashed just after extending the file.  Recycle it.
     636              :          */
     637              :     }
     638         2266 :     else if (PageIsEmpty(page))
     639              :     {
     640              :         /* nothing to do */
     641              :     }
     642         2225 :     else if (SpGistPageIsLeaf(page))
     643              :     {
     644         2146 :         if (SpGistBlockIsRoot(blkno))
     645              :         {
     646           16 :             vacuumLeafRoot(bds, index, buffer);
     647              :             /* no need for vacuumRedirectAndPlaceholder */
     648              :         }
     649              :         else
     650              :         {
     651         2130 :             vacuumLeafPage(bds, index, buffer, false);
     652         2130 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     653              :         }
     654              :     }
     655              :     else
     656              :     {
     657              :         /* inner page */
     658           79 :         vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     659              :     }
     660              : 
     661              :     /*
     662              :      * The root pages must never be deleted, nor marked as available in FSM,
     663              :      * because we don't want them ever returned by a search for a place to put
     664              :      * a new tuple.  Otherwise, check for empty page, and make sure the FSM
     665              :      * knows about it.
     666              :      */
     667         2276 :     if (!SpGistBlockIsRoot(blkno))
     668              :     {
     669         2200 :         if (PageIsNew(page) || PageIsEmpty(page))
     670              :         {
     671           38 :             RecordFreeIndexPage(index, blkno);
     672           38 :             bds->stats->pages_deleted++;
     673              :         }
     674              :         else
     675              :         {
     676         2162 :             SpGistSetLastUsedPage(index, buffer);
     677         2162 :             bds->lastFilledBlock = blkno;
     678              :         }
     679              :     }
     680              : 
     681         2276 :     UnlockReleaseBuffer(buffer);
     682         2276 : }
     683              : 
     684              : /*
     685              :  * Process the pending-TID list between pages of the main scan
     686              :  */
     687              : static void
     688          263 : spgprocesspending(spgBulkDeleteState *bds)
     689              : {
     690          263 :     Relation    index = bds->info->index;
     691              :     spgVacPendingItem *pitem;
     692              :     spgVacPendingItem *nitem;
     693              :     BlockNumber blkno;
     694              :     Buffer      buffer;
     695              :     Page        page;
     696              : 
     697        35112 :     for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
     698              :     {
     699        34849 :         if (pitem->done)
     700        17293 :             continue;           /* ignore already-done items */
     701              : 
     702              :         /* call vacuum_delay_point while not holding any buffer lock */
     703        17556 :         vacuum_delay_point(false);
     704              : 
     705              :         /* examine the referenced page */
     706        17556 :         blkno = ItemPointerGetBlockNumber(&pitem->tid);
     707        17556 :         buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
     708        17556 :                                     RBM_NORMAL, bds->info->strategy);
     709        17556 :         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     710        17556 :         page = BufferGetPage(buffer);
     711              : 
     712        17556 :         if (PageIsNew(page) || SpGistPageIsDeleted(page))
     713              :         {
     714              :             /* Probably shouldn't happen, but ignore it */
     715              :         }
     716        17556 :         else if (SpGistPageIsLeaf(page))
     717              :         {
     718        17293 :             if (SpGistBlockIsRoot(blkno))
     719              :             {
     720              :                 /* this should definitely not happen */
     721            0 :                 elog(ERROR, "redirection leads to root page of index \"%s\"",
     722              :                      RelationGetRelationName(index));
     723              :             }
     724              : 
     725              :             /* deal with any deletable tuples */
     726        17293 :             vacuumLeafPage(bds, index, buffer, true);
     727              :             /* might as well do this while we are here */
     728        17293 :             vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
     729              : 
     730        17293 :             SpGistSetLastUsedPage(index, buffer);
     731              : 
     732              :             /*
     733              :              * We can mark as done not only this item, but any later ones
     734              :              * pointing at the same page, since we vacuumed the whole page.
     735              :              */
     736        17293 :             pitem->done = true;
     737      1516456 :             for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
     738              :             {
     739      1499163 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     740          263 :                     nitem->done = true;
     741              :             }
     742              :         }
     743              :         else
     744              :         {
     745              :             /*
     746              :              * On an inner page, visit the referenced inner tuple and add all
     747              :              * its downlinks to the pending list.  We might have pending items
     748              :              * for more than one inner tuple on the same page (in fact this is
     749              :              * pretty likely given the way space allocation works), so get
     750              :              * them all while we are here.
     751              :              */
     752        35112 :             for (nitem = pitem; nitem != NULL; nitem = nitem->next)
     753              :             {
     754        34849 :                 if (nitem->done)
     755            0 :                     continue;
     756        34849 :                 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
     757              :                 {
     758              :                     OffsetNumber offset;
     759              :                     SpGistInnerTuple innerTuple;
     760              : 
     761        17293 :                     offset = ItemPointerGetOffsetNumber(&nitem->tid);
     762        17293 :                     innerTuple = (SpGistInnerTuple) PageGetItem(page,
     763        17293 :                                                                 PageGetItemId(page, offset));
     764        17293 :                     if (innerTuple->tupstate == SPGIST_LIVE)
     765              :                     {
     766              :                         SpGistNodeTuple node;
     767              :                         int         i;
     768              : 
     769        86465 :                         SGITITERATE(innerTuple, i, node)
     770              :                         {
     771        69172 :                             if (ItemPointerIsValid(&node->t_tid))
     772        34586 :                                 spgAddPendingTID(bds, &node->t_tid);
     773              :                         }
     774              :                     }
     775            0 :                     else if (innerTuple->tupstate == SPGIST_REDIRECT)
     776              :                     {
     777              :                         /* transfer attention to redirect point */
     778            0 :                         spgAddPendingTID(bds,
     779            0 :                                          &((SpGistDeadTuple) innerTuple)->pointer);
     780              :                     }
     781              :                     else
     782            0 :                         elog(ERROR, "unexpected SPGiST tuple state: %d",
     783              :                              innerTuple->tupstate);
     784              : 
     785        17293 :                     nitem->done = true;
     786              :                 }
     787              :             }
     788              :         }
     789              : 
     790        17556 :         UnlockReleaseBuffer(buffer);
     791              :     }
     792              : 
     793          263 :     spgClearPendingList(bds);
     794          263 : }
     795              : 
     796              : /*
     797              :  * Perform a bulkdelete scan
     798              :  */
     799              : static void
     800           38 : spgvacuumscan(spgBulkDeleteState *bds)
     801              : {
     802           38 :     Relation    index = bds->info->index;
     803              :     bool        needLock;
     804              :     BlockNumber num_pages;
     805              :     BlockRangeReadStreamPrivate p;
     806           38 :     ReadStream *stream = NULL;
     807              : 
     808              :     /* Finish setting up spgBulkDeleteState */
     809           38 :     initSpGistState(&bds->spgstate, index);
     810           38 :     bds->pendingList = NULL;
     811           38 :     bds->myXmin = GetActiveSnapshot()->xmin;
     812           38 :     bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
     813              : 
     814              :     /*
     815              :      * Reset counts that will be incremented during the scan; needed in case
     816              :      * of multiple scans during a single VACUUM command
     817              :      */
     818           38 :     bds->stats->estimated_count = false;
     819           38 :     bds->stats->num_index_tuples = 0;
     820           38 :     bds->stats->pages_deleted = 0;
     821              : 
     822              :     /* We can skip locking for new or temp relations */
     823           38 :     needLock = !RELATION_IS_LOCAL(index);
     824           38 :     p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
     825              : 
     826              :     /*
     827              :      * It is safe to use batchmode as block_range_read_stream_cb takes no
     828              :      * locks.
     829              :      */
     830           38 :     stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
     831              :                                         READ_STREAM_FULL |
     832              :                                         READ_STREAM_USE_BATCHING,
     833           38 :                                         bds->info->strategy,
     834              :                                         index,
     835              :                                         MAIN_FORKNUM,
     836              :                                         block_range_read_stream_cb,
     837              :                                         &p,
     838              :                                         0);
     839              : 
     840              :     /*
     841              :      * The outer loop iterates over all index pages except the metapage, in
     842              :      * physical order (we hope the kernel will cooperate in providing
     843              :      * read-ahead for speed).  It is critical that we visit all leaf pages,
     844              :      * including ones added after we start the scan, else we might fail to
     845              :      * delete some deletable tuples.  See more extensive comments about this
     846              :      * in btvacuumscan().
     847              :      */
     848              :     for (;;)
     849              :     {
     850              :         /* Get the current relation length */
     851           76 :         if (needLock)
     852           76 :             LockRelationForExtension(index, ExclusiveLock);
     853           76 :         num_pages = RelationGetNumberOfBlocks(index);
     854           76 :         if (needLock)
     855           76 :             UnlockRelationForExtension(index, ExclusiveLock);
     856              : 
     857              :         /* Quit if we've scanned the whole relation */
     858           76 :         if (p.current_blocknum >= num_pages)
     859           38 :             break;
     860              : 
     861           38 :         p.last_exclusive = num_pages;
     862              : 
     863              :         /* Iterate over pages, then loop back to recheck length */
     864              :         while (true)
     865         2276 :         {
     866              :             Buffer      buf;
     867              : 
     868              :             /* call vacuum_delay_point while not holding any buffer lock */
     869         2314 :             vacuum_delay_point(false);
     870              : 
     871         2314 :             buf = read_stream_next_buffer(stream, NULL);
     872              : 
     873         2314 :             if (!BufferIsValid(buf))
     874           38 :                 break;
     875              : 
     876         2276 :             spgvacuumpage(bds, buf);
     877              : 
     878              :             /* empty the pending-list after each page */
     879         2276 :             if (bds->pendingList != NULL)
     880          263 :                 spgprocesspending(bds);
     881              :         }
     882              : 
     883              :         /*
     884              :          * We have to reset the read stream to use it again. After returning
     885              :          * InvalidBuffer, the read stream API won't invoke our callback again
     886              :          * until the stream has been reset.
     887              :          */
     888           38 :         read_stream_reset(stream);
     889              :     }
     890              : 
     891           38 :     read_stream_end(stream);
     892              : 
     893              :     /* Propagate local lastUsedPages cache to metablock */
     894           38 :     SpGistUpdateMetaPage(index);
     895              : 
     896              :     /*
     897              :      * If we found any empty pages (and recorded them in the FSM), then
     898              :      * forcibly update the upper-level FSM pages to ensure that searchers can
     899              :      * find them.  It's possible that the pages were also found during
     900              :      * previous scans and so this is a waste of time, but it's cheap enough
     901              :      * relative to scanning the index that it shouldn't matter much, and
     902              :      * making sure that free pages are available sooner not later seems
     903              :      * worthwhile.
     904              :      *
     905              :      * Note that if no empty pages exist, we don't bother vacuuming the FSM at
     906              :      * all.
     907              :      */
     908           38 :     if (bds->stats->pages_deleted > 0)
     909           23 :         IndexFreeSpaceMapVacuum(index);
     910              : 
     911              :     /*
     912              :      * Truncate index if possible
     913              :      *
     914              :      * XXX disabled because it's unsafe due to possible concurrent inserts.
     915              :      * We'd have to rescan the pages to make sure they're still empty, and it
     916              :      * doesn't seem worth it.  Note that btree doesn't do this either.
     917              :      *
     918              :      * Another reason not to truncate is that it could invalidate the cached
     919              :      * pages-with-freespace pointers in the metapage and other backends'
     920              :      * relation caches, that is leave them pointing to nonexistent pages.
     921              :      * Adding RelationGetNumberOfBlocks calls to protect the places that use
     922              :      * those pointers would be unduly expensive.
     923              :      */
     924              : #ifdef NOT_USED
     925              :     if (num_pages > bds->lastFilledBlock + 1)
     926              :     {
     927              :         BlockNumber lastBlock = num_pages - 1;
     928              : 
     929              :         num_pages = bds->lastFilledBlock + 1;
     930              :         RelationTruncate(index, num_pages);
     931              :         bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
     932              :         bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
     933              :     }
     934              : #endif
     935              : 
     936              :     /* Report final stats */
     937           38 :     bds->stats->num_pages = num_pages;
     938           38 :     bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
     939           38 :     bds->stats->pages_free = bds->stats->pages_deleted;
     940           38 : }
     941              : 
     942              : /*
     943              :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     944              :  * The set of target tuples is specified via a callback routine that tells
     945              :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     946              :  *
     947              :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     948              :  */
     949              : IndexBulkDeleteResult *
     950            5 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     951              :               IndexBulkDeleteCallback callback, void *callback_state)
     952              : {
     953              :     spgBulkDeleteState bds;
     954              : 
     955              :     /* allocate stats if first time through, else re-use existing struct */
     956            5 :     if (stats == NULL)
     957            5 :         stats = palloc0_object(IndexBulkDeleteResult);
     958            5 :     bds.info = info;
     959            5 :     bds.stats = stats;
     960            5 :     bds.callback = callback;
     961            5 :     bds.callback_state = callback_state;
     962              : 
     963            5 :     spgvacuumscan(&bds);
     964              : 
     965            5 :     return stats;
     966              : }
     967              : 
     968              : /* Dummy callback to delete no tuples during spgvacuumcleanup */
     969              : static bool
     970      2875969 : dummy_callback(ItemPointer itemptr, void *state)
     971              : {
     972      2875969 :     return false;
     973              : }
     974              : 
     975              : /*
     976              :  * Post-VACUUM cleanup.
     977              :  *
     978              :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     979              :  */
     980              : IndexBulkDeleteResult *
     981           44 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     982              : {
     983              :     spgBulkDeleteState bds;
     984              : 
     985              :     /* No-op in ANALYZE ONLY mode */
     986           44 :     if (info->analyze_only)
     987            6 :         return stats;
     988              : 
     989              :     /*
     990              :      * We don't need to scan the index if there was a preceding bulkdelete
     991              :      * pass.  Otherwise, make a pass that won't delete any live tuples, but
     992              :      * might still accomplish useful stuff with redirect/placeholder cleanup
     993              :      * and/or FSM housekeeping, and in any case will provide stats.
     994              :      */
     995           38 :     if (stats == NULL)
     996              :     {
     997           33 :         stats = palloc0_object(IndexBulkDeleteResult);
     998           33 :         bds.info = info;
     999           33 :         bds.stats = stats;
    1000           33 :         bds.callback = dummy_callback;
    1001           33 :         bds.callback_state = NULL;
    1002              : 
    1003           33 :         spgvacuumscan(&bds);
    1004              :     }
    1005              : 
    1006              :     /*
    1007              :      * It's quite possible for us to be fooled by concurrent tuple moves into
    1008              :      * double-counting some index tuples, so disbelieve any total that exceeds
    1009              :      * the underlying heap's count ... if we know that accurately.  Otherwise
    1010              :      * this might just make matters worse.
    1011              :      */
    1012           38 :     if (!info->estimated_count)
    1013              :     {
    1014           38 :         if (stats->num_index_tuples > info->num_heap_tuples)
    1015            0 :             stats->num_index_tuples = info->num_heap_tuples;
    1016              :     }
    1017              : 
    1018           38 :     return stats;
    1019              : }
        

Generated by: LCOV version 2.0-1