LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 179 205 87.3 %
Date: 2025-04-01 14:15:22 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistvacuum.c
       4             :  *    vacuuming routines for the postgres GiST index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gistvacuum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/genam.h"
      18             : #include "access/gist_private.h"
      19             : #include "access/transam.h"
      20             : #include "commands/vacuum.h"
      21             : #include "lib/integerset.h"
      22             : #include "miscadmin.h"
      23             : #include "storage/indexfsm.h"
      24             : #include "storage/lmgr.h"
      25             : #include "storage/read_stream.h"
      26             : #include "utils/memutils.h"
      27             : 
      28             : /* Working state needed by gistbulkdelete */
      29             : typedef struct
      30             : {
      31             :     IndexVacuumInfo *info;
      32             :     IndexBulkDeleteResult *stats;
      33             :     IndexBulkDeleteCallback callback;
      34             :     void       *callback_state;
      35             :     GistNSN     startNSN;
      36             : 
      37             :     /*
      38             :      * These are used to memorize all internal and empty leaf pages.  They are
      39             :      * used for deleting all the empty pages.
      40             :      */
      41             :     IntegerSet *internal_page_set;
      42             :     IntegerSet *empty_leaf_set;
      43             :     MemoryContext page_set_context;
      44             : } GistVacState;
      45             : 
      46             : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      47             :                            IndexBulkDeleteCallback callback, void *callback_state);
      48             : static void gistvacuumpage(GistVacState *vstate, Buffer buffer);
      49             : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      50             :                                           GistVacState *vstate);
      51             : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      52             :                            Buffer parentBuffer, OffsetNumber downlink,
      53             :                            Buffer leafBuffer);
      54             : 
      55             : /*
      56             :  * VACUUM bulkdelete stage: remove index entries.
      57             :  */
      58             : IndexBulkDeleteResult *
      59          42 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      60             :                IndexBulkDeleteCallback callback, void *callback_state)
      61             : {
      62             :     /* allocate stats if first time through, else re-use existing struct */
      63          42 :     if (stats == NULL)
      64          42 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      65             : 
      66          42 :     gistvacuumscan(info, stats, callback, callback_state);
      67             : 
      68          42 :     return stats;
      69             : }
      70             : 
      71             : /*
      72             :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      73             :  */
      74             : IndexBulkDeleteResult *
      75          82 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
      76             : {
      77             :     /* No-op in ANALYZE ONLY mode */
      78          82 :     if (info->analyze_only)
      79          16 :         return stats;
      80             : 
      81             :     /*
      82             :      * If gistbulkdelete was called, we need not do anything, just return the
      83             :      * stats from the latest gistbulkdelete call.  If it wasn't called, we
      84             :      * still need to do a pass over the index, to obtain index statistics.
      85             :      */
      86          66 :     if (stats == NULL)
      87             :     {
      88          42 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      89          42 :         gistvacuumscan(info, stats, NULL, NULL);
      90             :     }
      91             : 
      92             :     /*
      93             :      * It's quite possible for us to be fooled by concurrent page splits into
      94             :      * double-counting some index tuples, so disbelieve any total that exceeds
      95             :      * the underlying heap's count ... if we know that accurately.  Otherwise
      96             :      * this might just make matters worse.
      97             :      */
      98          66 :     if (!info->estimated_count)
      99             :     {
     100          66 :         if (stats->num_index_tuples > info->num_heap_tuples)
     101           0 :             stats->num_index_tuples = info->num_heap_tuples;
     102             :     }
     103             : 
     104          66 :     return stats;
     105             : }
     106             : 
     107             : /*
     108             :  * gistvacuumscan --- scan the index for VACUUMing purposes
     109             :  *
     110             :  * This scans the index for leaf tuples that are deletable according to the
     111             :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     112             :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     113             :  * occurred).
     114             :  *
     115             :  * This also makes note of any empty leaf pages, as well as all internal
     116             :  * pages while looping over all index pages.  After scanning all the pages, we
     117             :  * remove the empty pages so that they can be reused.  Any deleted pages are
     118             :  * added directly to the free space map.  (They should've been added there
     119             :  * when they were originally deleted, already, but it's possible that the FSM
     120             :  * was lost at a crash, for example.)
     121             :  *
     122             :  * The caller is responsible for initially allocating/zeroing a stats struct.
     123             :  */
     124             : static void
     125          84 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     126             :                IndexBulkDeleteCallback callback, void *callback_state)
     127             : {
     128          84 :     Relation    rel = info->index;
     129             :     GistVacState vstate;
     130             :     BlockNumber num_pages;
     131             :     bool        needLock;
     132             :     MemoryContext oldctx;
     133             :     BlockRangeReadStreamPrivate p;
     134          84 :     ReadStream *stream = NULL;
     135             : 
     136             :     /*
     137             :      * Reset fields that track information about the entire index now.  This
     138             :      * avoids double-counting in the case where a single VACUUM command
     139             :      * requires multiple scans of the index.
     140             :      *
     141             :      * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
     142             :      * since they track information about the VACUUM command, and so must last
     143             :      * across each call to gistvacuumscan().
     144             :      *
     145             :      * (Note that pages_free is treated as state about the whole index, not
     146             :      * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
     147             :      * calls are idempotent, and get repeated for the same deleted pages in
     148             :      * some scenarios.  The point for us is to track the number of recyclable
     149             :      * pages in the index at the end of the VACUUM command.)
     150             :      */
     151          84 :     stats->num_pages = 0;
     152          84 :     stats->estimated_count = false;
     153          84 :     stats->num_index_tuples = 0;
     154          84 :     stats->pages_deleted = 0;
     155          84 :     stats->pages_free = 0;
     156             : 
     157             :     /*
     158             :      * Create the integer sets to remember all the internal and the empty leaf
     159             :      * pages in page_set_context.  Internally, the integer set will remember
     160             :      * this context so that the subsequent allocations for these integer sets
     161             :      * will be done from the same context.
     162             :      *
     163             :      * XXX the allocation sizes used below pre-date generation context's block
     164             :      * growing code.  These values should likely be benchmarked and set to
     165             :      * more suitable values.
     166             :      */
     167          84 :     vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
     168             :                                                       "GiST VACUUM page set context",
     169             :                                                       16 * 1024,
     170             :                                                       16 * 1024,
     171             :                                                       16 * 1024);
     172          84 :     oldctx = MemoryContextSwitchTo(vstate.page_set_context);
     173          84 :     vstate.internal_page_set = intset_create();
     174          84 :     vstate.empty_leaf_set = intset_create();
     175          84 :     MemoryContextSwitchTo(oldctx);
     176             : 
     177             :     /* Set up info to pass down to gistvacuumpage */
     178          84 :     vstate.info = info;
     179          84 :     vstate.stats = stats;
     180          84 :     vstate.callback = callback;
     181          84 :     vstate.callback_state = callback_state;
     182          84 :     if (RelationNeedsWAL(rel))
     183          84 :         vstate.startNSN = GetInsertRecPtr();
     184             :     else
     185           0 :         vstate.startNSN = gistGetFakeLSN(rel);
     186             : 
     187             :     /*
     188             :      * The outer loop iterates over all index pages, in physical order (we
     189             :      * hope the kernel will cooperate in providing read-ahead for speed).  It
     190             :      * is critical that we visit all leaf pages, including ones added after we
     191             :      * start the scan, else we might fail to delete some deletable tuples.
     192             :      * Hence, we must repeatedly check the relation length.  We must acquire
     193             :      * the relation-extension lock while doing so to avoid a race condition:
     194             :      * if someone else is extending the relation, there is a window where
     195             :      * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     196             :      * write-locked by gistNewBuffer().  If we manage to scan such a page
     197             :      * here, we'll improperly assume it can be recycled.  Taking the lock
     198             :      * synchronizes things enough to prevent a problem: either num_pages won't
     199             :      * include the new page, or gistNewBuffer already has write lock on the
     200             :      * buffer and it will be fully initialized before we can examine it.  (See
     201             :      * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     202             :      * if a page is added immediately after we look; the page splitting code
     203             :      * already has write-lock on the left page before it adds a right page, so
     204             :      * we must already have processed any tuples due to be moved into such a
     205             :      * page.
     206             :      *
     207             :      * We can skip locking for new or temp relations, however, since no one
     208             :      * else could be accessing them.
     209             :      */
     210          84 :     needLock = !RELATION_IS_LOCAL(rel);
     211             : 
     212          84 :     p.current_blocknum = GIST_ROOT_BLKNO;
     213             : 
     214             :     /*
     215             :      * It is safe to use batchmode as block_range_read_stream_cb takes no
     216             :      * locks.
     217             :      */
     218          84 :     stream = read_stream_begin_relation(READ_STREAM_FULL |
     219             :                                         READ_STREAM_USE_BATCHING,
     220             :                                         info->strategy,
     221             :                                         rel,
     222             :                                         MAIN_FORKNUM,
     223             :                                         block_range_read_stream_cb,
     224             :                                         &p,
     225             :                                         0);
     226             :     for (;;)
     227             :     {
     228             :         /* Get the current relation length */
     229         168 :         if (needLock)
     230         168 :             LockRelationForExtension(rel, ExclusiveLock);
     231         168 :         num_pages = RelationGetNumberOfBlocks(rel);
     232         168 :         if (needLock)
     233         168 :             UnlockRelationForExtension(rel, ExclusiveLock);
     234             : 
     235             :         /* Quit if we've scanned the whole relation */
     236         168 :         if (p.current_blocknum >= num_pages)
     237          84 :             break;
     238             : 
     239          84 :         p.last_exclusive = num_pages;
     240             : 
     241             :         /* Iterate over pages, then loop back to recheck relation length */
     242             :         while (true)
     243        2338 :         {
     244             :             Buffer      buf;
     245             : 
     246             :             /* call vacuum_delay_point while not holding any buffer lock */
     247        2422 :             vacuum_delay_point(false);
     248             : 
     249        2422 :             buf = read_stream_next_buffer(stream, NULL);
     250             : 
     251        2422 :             if (!BufferIsValid(buf))
     252          84 :                 break;
     253             : 
     254        2338 :             gistvacuumpage(&vstate, buf);
     255             :         }
     256             : 
     257             :         Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
     258             : 
     259             :         /*
     260             :          * We have to reset the read stream to use it again. After returning
     261             :          * InvalidBuffer, the read stream API won't invoke our callback again
     262             :          * until the stream has been reset.
     263             :          */
     264          84 :         read_stream_reset(stream);
     265             :     }
     266             : 
     267          84 :     read_stream_end(stream);
     268             : 
     269             :     /*
     270             :      * If we found any recyclable pages (and recorded them in the FSM), then
     271             :      * forcibly update the upper-level FSM pages to ensure that searchers can
     272             :      * find them.  It's possible that the pages were also found during
     273             :      * previous scans and so this is a waste of time, but it's cheap enough
     274             :      * relative to scanning the index that it shouldn't matter much, and
     275             :      * making sure that free pages are available sooner not later seems
     276             :      * worthwhile.
     277             :      *
     278             :      * Note that if no recyclable pages exist, we don't bother vacuuming the
     279             :      * FSM at all.
     280             :      */
     281          84 :     if (stats->pages_free > 0)
     282           0 :         IndexFreeSpaceMapVacuum(rel);
     283             : 
     284             :     /* update statistics */
     285          84 :     stats->num_pages = num_pages;
     286             : 
     287             :     /*
     288             :      * If we saw any empty pages, try to unlink them from the tree so that
     289             :      * they can be reused.
     290             :      */
     291          84 :     gistvacuum_delete_empty_pages(info, &vstate);
     292             : 
     293             :     /* we don't need the internal and empty page sets anymore */
     294          84 :     MemoryContextDelete(vstate.page_set_context);
     295          84 :     vstate.page_set_context = NULL;
     296          84 :     vstate.internal_page_set = NULL;
     297          84 :     vstate.empty_leaf_set = NULL;
     298          84 : }
     299             : 
     300             : /*
     301             :  * gistvacuumpage --- VACUUM one page
     302             :  *
     303             :  * This processes a single page for gistbulkdelete(). `buffer` contains the
     304             :  * page to process. In some cases we must go back and reexamine
     305             :  * previously-scanned pages; this routine recurses when necessary to handle
     306             :  * that case.
     307             :  */
     308             : static void
     309        2338 : gistvacuumpage(GistVacState *vstate, Buffer buffer)
     310             : {
     311        2338 :     IndexVacuumInfo *info = vstate->info;
     312        2338 :     IndexBulkDeleteCallback callback = vstate->callback;
     313        2338 :     void       *callback_state = vstate->callback_state;
     314        2338 :     Relation    rel = info->index;
     315        2338 :     BlockNumber orig_blkno = BufferGetBlockNumber(buffer);
     316             :     Page        page;
     317             :     BlockNumber recurse_to;
     318             : 
     319             :     /*
     320             :      * orig_blkno is the highest block number reached by the outer
     321             :      * gistvacuumscan() loop. This will be the same as blkno unless we are
     322             :      * recursing to reexamine a previous page.
     323             :      */
     324        2338 :     BlockNumber blkno = orig_blkno;
     325             : 
     326        2338 : restart:
     327        2338 :     recurse_to = InvalidBlockNumber;
     328             : 
     329             :     /*
     330             :      * We are not going to stay here for a long time, aggressively grab an
     331             :      * exclusive lock.
     332             :      */
     333        2338 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     334        2338 :     page = (Page) BufferGetPage(buffer);
     335             : 
     336        2338 :     if (gistPageRecyclable(page))
     337             :     {
     338             :         /* Okay to recycle this page */
     339           0 :         RecordFreeIndexPage(rel, blkno);
     340           0 :         vstate->stats->pages_deleted++;
     341           0 :         vstate->stats->pages_free++;
     342             :     }
     343        2338 :     else if (GistPageIsDeleted(page))
     344             :     {
     345             :         /* Already deleted, but can't recycle yet */
     346           0 :         vstate->stats->pages_deleted++;
     347             :     }
     348        2338 :     else if (GistPageIsLeaf(page))
     349             :     {
     350             :         OffsetNumber todelete[MaxOffsetNumber];
     351        2286 :         int         ntodelete = 0;
     352             :         int         nremain;
     353        2286 :         GISTPageOpaque opaque = GistPageGetOpaque(page);
     354        2286 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     355             : 
     356             :         /*
     357             :          * Check whether we need to recurse back to earlier pages.  What we
     358             :          * are concerned about is a page split that happened since we started
     359             :          * the vacuum scan.  If the split moved some tuples to a lower page
     360             :          * then we might have missed 'em.  If so, set up for tail recursion.
     361             :          *
     362             :          * This is similar to the checks we do during searches, when following
     363             :          * a downlink, but we don't need to jump to higher-numbered pages,
     364             :          * because we will process them later, anyway.
     365             :          */
     366        4572 :         if ((GistFollowRight(page) ||
     367        2286 :              vstate->startNSN < GistPageGetNSN(page)) &&
     368           0 :             (opaque->rightlink != InvalidBlockNumber) &&
     369           0 :             (opaque->rightlink < orig_blkno))
     370             :         {
     371           0 :             recurse_to = opaque->rightlink;
     372             :         }
     373             : 
     374             :         /*
     375             :          * Scan over all items to see which ones need to be deleted according
     376             :          * to the callback function.
     377             :          */
     378        2286 :         if (callback)
     379             :         {
     380             :             OffsetNumber off;
     381             : 
     382       62308 :             for (off = FirstOffsetNumber;
     383             :                  off <= maxoff;
     384       61780 :                  off = OffsetNumberNext(off))
     385             :             {
     386       61780 :                 ItemId      iid = PageGetItemId(page, off);
     387       61780 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     388             : 
     389       61780 :                 if (callback(&(idxtuple->t_tid), callback_state))
     390       44740 :                     todelete[ntodelete++] = off;
     391             :             }
     392             :         }
     393             : 
     394             :         /*
     395             :          * Apply any needed deletes.  We issue just one WAL record per page,
     396             :          * so as to minimize WAL traffic.
     397             :          */
     398        2286 :         if (ntodelete > 0)
     399             :         {
     400         510 :             START_CRIT_SECTION();
     401             : 
     402         510 :             MarkBufferDirty(buffer);
     403             : 
     404         510 :             PageIndexMultiDelete(page, todelete, ntodelete);
     405         510 :             GistMarkTuplesDeleted(page);
     406             : 
     407         510 :             if (RelationNeedsWAL(rel))
     408         510 :             {
     409             :                 XLogRecPtr  recptr;
     410             : 
     411         510 :                 recptr = gistXLogUpdate(buffer,
     412             :                                         todelete, ntodelete,
     413             :                                         NULL, 0, InvalidBuffer);
     414         510 :                 PageSetLSN(page, recptr);
     415             :             }
     416             :             else
     417           0 :                 PageSetLSN(page, gistGetFakeLSN(rel));
     418             : 
     419         510 :             END_CRIT_SECTION();
     420             : 
     421         510 :             vstate->stats->tuples_removed += ntodelete;
     422             :             /* must recompute maxoff */
     423         510 :             maxoff = PageGetMaxOffsetNumber(page);
     424             :         }
     425             : 
     426        2286 :         nremain = maxoff - FirstOffsetNumber + 1;
     427        2286 :         if (nremain == 0)
     428             :         {
     429             :             /*
     430             :              * The page is now completely empty.  Remember its block number,
     431             :              * so that we will try to delete the page in the second stage.
     432             :              *
     433             :              * Skip this when recursing, because IntegerSet requires that the
     434             :              * values are added in ascending order.  The next VACUUM will pick
     435             :              * it up.
     436             :              */
     437         174 :             if (blkno == orig_blkno)
     438         174 :                 intset_add_member(vstate->empty_leaf_set, blkno);
     439             :         }
     440             :         else
     441        2112 :             vstate->stats->num_index_tuples += nremain;
     442             :     }
     443             :     else
     444             :     {
     445             :         /*
     446             :          * On an internal page, check for "invalid tuples", left behind by an
     447             :          * incomplete page split on PostgreSQL 9.0 or below.  These are not
     448             :          * created by newer PostgreSQL versions, but unfortunately, there is
     449             :          * no version number anywhere in a GiST index, so we don't know
     450             :          * whether this index might still contain invalid tuples or not.
     451             :          */
     452          52 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     453             :         OffsetNumber off;
     454             : 
     455        2306 :         for (off = FirstOffsetNumber;
     456             :              off <= maxoff;
     457        2254 :              off = OffsetNumberNext(off))
     458             :         {
     459        2254 :             ItemId      iid = PageGetItemId(page, off);
     460        2254 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     461             : 
     462        2254 :             if (GistTupleIsInvalid(idxtuple))
     463           0 :                 ereport(LOG,
     464             :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     465             :                                 RelationGetRelationName(rel)),
     466             :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     467             :                          errhint("Please REINDEX it.")));
     468             :         }
     469             : 
     470             :         /*
     471             :          * Remember the block number of this page, so that we can revisit it
     472             :          * later in gistvacuum_delete_empty_pages(), when we search for
     473             :          * parents of empty leaf pages.
     474             :          */
     475          52 :         if (blkno == orig_blkno)
     476          52 :             intset_add_member(vstate->internal_page_set, blkno);
     477             :     }
     478             : 
     479        2338 :     UnlockReleaseBuffer(buffer);
     480             : 
     481             :     /*
     482             :      * This is really tail recursion, but if the compiler is too stupid to
     483             :      * optimize it as such, we'd eat an uncomfortably large amount of stack
     484             :      * space per recursion level (due to the deletable[] array).  A failure is
     485             :      * improbable since the number of levels isn't likely to be large ... but
     486             :      * just in case, let's hand-optimize into a loop.
     487             :      */
     488        2338 :     if (recurse_to != InvalidBlockNumber)
     489             :     {
     490           0 :         blkno = recurse_to;
     491             : 
     492             :         /* check for vacuum delay while not holding any buffer lock */
     493           0 :         vacuum_delay_point(false);
     494             : 
     495           0 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     496             :                                     info->strategy);
     497           0 :         goto restart;
     498             :     }
     499        2338 : }
     500             : 
     501             : /*
     502             :  * Scan all internal pages, and try to delete their empty child pages.
     503             :  */
     504             : static void
     505          84 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
     506             : {
     507          84 :     Relation    rel = info->index;
     508             :     BlockNumber empty_pages_remaining;
     509             :     uint64      blkno;
     510             : 
     511             :     /*
     512             :      * Rescan all inner pages to find those that have empty child pages.
     513             :      */
     514          84 :     empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
     515          84 :     intset_begin_iterate(vstate->internal_page_set);
     516         100 :     while (empty_pages_remaining > 0 &&
     517          14 :            intset_iterate_next(vstate->internal_page_set, &blkno))
     518             :     {
     519             :         Buffer      buffer;
     520             :         Page        page;
     521             :         OffsetNumber off,
     522             :                     maxoff;
     523             :         OffsetNumber todelete[MaxOffsetNumber];
     524             :         BlockNumber leafs_to_delete[MaxOffsetNumber];
     525             :         int         ntodelete;
     526             :         int         deleted;
     527             : 
     528           2 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     529             :                                     RBM_NORMAL, info->strategy);
     530             : 
     531           2 :         LockBuffer(buffer, GIST_SHARE);
     532           2 :         page = (Page) BufferGetPage(buffer);
     533             : 
     534           2 :         if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     535             :         {
     536             :             /*
     537             :              * This page was an internal page earlier, but now it's something
     538             :              * else. Shouldn't happen...
     539             :              */
     540             :             Assert(false);
     541           0 :             UnlockReleaseBuffer(buffer);
     542           0 :             continue;
     543             :         }
     544             : 
     545             :         /*
     546             :          * Scan all the downlinks, and see if any of them point to empty leaf
     547             :          * pages.
     548             :          */
     549           2 :         maxoff = PageGetMaxOffsetNumber(page);
     550           2 :         ntodelete = 0;
     551         328 :         for (off = FirstOffsetNumber;
     552         326 :              off <= maxoff && ntodelete < maxoff - 1;
     553         326 :              off = OffsetNumberNext(off))
     554             :         {
     555         326 :             ItemId      iid = PageGetItemId(page, off);
     556         326 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     557             :             BlockNumber leafblk;
     558             : 
     559         326 :             leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     560         326 :             if (intset_is_member(vstate->empty_leaf_set, leafblk))
     561             :             {
     562         162 :                 leafs_to_delete[ntodelete] = leafblk;
     563         162 :                 todelete[ntodelete++] = off;
     564             :             }
     565             :         }
     566             : 
     567             :         /*
     568             :          * In order to avoid deadlock, child page must be locked before
     569             :          * parent, so we must release the lock on the parent, lock the child,
     570             :          * and then re-acquire the lock the parent.  (And we wouldn't want to
     571             :          * do I/O, while holding a lock, anyway.)
     572             :          *
     573             :          * At the instant that we're not holding a lock on the parent, the
     574             :          * downlink might get moved by a concurrent insert, so we must
     575             :          * re-check that it still points to the same child page after we have
     576             :          * acquired both locks.  Also, another backend might have inserted a
     577             :          * tuple to the page, so that it is no longer empty.  gistdeletepage()
     578             :          * re-checks all these conditions.
     579             :          */
     580           2 :         LockBuffer(buffer, GIST_UNLOCK);
     581             : 
     582           2 :         deleted = 0;
     583         164 :         for (int i = 0; i < ntodelete; i++)
     584             :         {
     585             :             Buffer      leafbuf;
     586             : 
     587             :             /*
     588             :              * Don't remove the last downlink from the parent.  That would
     589             :              * confuse the insertion code.
     590             :              */
     591         162 :             if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     592           0 :                 break;
     593             : 
     594         162 :             leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     595             :                                          RBM_NORMAL, info->strategy);
     596         162 :             LockBuffer(leafbuf, GIST_EXCLUSIVE);
     597         162 :             gistcheckpage(rel, leafbuf);
     598             : 
     599         162 :             LockBuffer(buffer, GIST_EXCLUSIVE);
     600         162 :             if (gistdeletepage(info, vstate->stats,
     601         162 :                                buffer, todelete[i] - deleted,
     602             :                                leafbuf))
     603         162 :                 deleted++;
     604         162 :             LockBuffer(buffer, GIST_UNLOCK);
     605             : 
     606         162 :             UnlockReleaseBuffer(leafbuf);
     607             :         }
     608             : 
     609           2 :         ReleaseBuffer(buffer);
     610             : 
     611             :         /*
     612             :          * We can stop the scan as soon as we have seen the downlinks, even if
     613             :          * we were not able to remove them all.
     614             :          */
     615           2 :         empty_pages_remaining -= ntodelete;
     616             :     }
     617          84 : }
     618             : 
     619             : /*
     620             :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     621             :  * leaf.  Both pages must be locked.
     622             :  *
     623             :  * Even if the page was empty when we first saw it, a concurrent inserter might
     624             :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     625             :  * We re-check all the conditions, to make sure the page is still deletable,
     626             :  * before modifying anything.
     627             :  *
     628             :  * Returns true, if the page was deleted, and false if a concurrent update
     629             :  * prevented it.
     630             :  */
     631             : static bool
     632         162 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     633             :                Buffer parentBuffer, OffsetNumber downlink,
     634             :                Buffer leafBuffer)
     635             : {
     636         162 :     Page        parentPage = BufferGetPage(parentBuffer);
     637         162 :     Page        leafPage = BufferGetPage(leafBuffer);
     638             :     ItemId      iid;
     639             :     IndexTuple  idxtuple;
     640             :     XLogRecPtr  recptr;
     641             :     FullTransactionId txid;
     642             : 
     643             :     /*
     644             :      * Check that the leaf is still empty and deletable.
     645             :      */
     646         162 :     if (!GistPageIsLeaf(leafPage))
     647             :     {
     648             :         /* a leaf page should never become a non-leaf page */
     649             :         Assert(false);
     650           0 :         return false;
     651             :     }
     652             : 
     653         162 :     if (GistFollowRight(leafPage))
     654           0 :         return false;           /* don't mess with a concurrent page split */
     655             : 
     656         162 :     if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     657           0 :         return false;           /* not empty anymore */
     658             : 
     659             :     /*
     660             :      * Ok, the leaf is deletable.  Is the downlink in the parent page still
     661             :      * valid?  It might have been moved by a concurrent insert.  We could try
     662             :      * to re-find it by scanning the page again, possibly moving right if the
     663             :      * was split.  But for now, let's keep it simple and just give up.  The
     664             :      * next VACUUM will pick it up.
     665             :      */
     666         162 :     if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     667         162 :         GistPageIsLeaf(parentPage))
     668             :     {
     669             :         /* shouldn't happen, internal pages are never deleted */
     670             :         Assert(false);
     671           0 :         return false;
     672             :     }
     673             : 
     674         162 :     if (PageGetMaxOffsetNumber(parentPage) < downlink
     675         162 :         || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     676           0 :         return false;
     677             : 
     678         162 :     iid = PageGetItemId(parentPage, downlink);
     679         162 :     idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     680         324 :     if (BufferGetBlockNumber(leafBuffer) !=
     681         162 :         ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     682           0 :         return false;
     683             : 
     684             :     /*
     685             :      * All good, proceed with the deletion.
     686             :      *
     687             :      * The page cannot be immediately recycled, because in-progress scans that
     688             :      * saw the downlink might still visit it.  Mark the page with the current
     689             :      * next-XID counter, so that we know when it can be recycled.  Once that
     690             :      * XID becomes older than GlobalXmin, we know that all scans that are
     691             :      * currently in progress must have ended.  (That's much more conservative
     692             :      * than needed, but let's keep it safe and simple.)
     693             :      */
     694         162 :     txid = ReadNextFullTransactionId();
     695             : 
     696         162 :     START_CRIT_SECTION();
     697             : 
     698             :     /* mark the page as deleted */
     699         162 :     MarkBufferDirty(leafBuffer);
     700         162 :     GistPageSetDeleted(leafPage, txid);
     701         162 :     stats->pages_newly_deleted++;
     702         162 :     stats->pages_deleted++;
     703             : 
     704             :     /* remove the downlink from the parent */
     705         162 :     MarkBufferDirty(parentBuffer);
     706         162 :     PageIndexTupleDelete(parentPage, downlink);
     707             : 
     708         162 :     if (RelationNeedsWAL(info->index))
     709         162 :         recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     710             :     else
     711           0 :         recptr = gistGetFakeLSN(info->index);
     712         162 :     PageSetLSN(parentPage, recptr);
     713         162 :     PageSetLSN(leafPage, recptr);
     714             : 
     715         162 :     END_CRIT_SECTION();
     716             : 
     717         162 :     return true;
     718             : }

Generated by: LCOV version 1.14