LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.4 % 207 181
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * gistvacuum.c
       4              :  *    vacuuming routines for the postgres GiST index access method.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/gist/gistvacuum.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/genam.h"
      18              : #include "access/gist_private.h"
      19              : #include "access/transam.h"
      20              : #include "commands/vacuum.h"
      21              : #include "lib/integerset.h"
      22              : #include "miscadmin.h"
      23              : #include "storage/indexfsm.h"
      24              : #include "storage/lmgr.h"
      25              : #include "storage/read_stream.h"
      26              : #include "utils/memutils.h"
      27              : 
      28              : /* Working state needed by gistbulkdelete */
      29              : typedef struct
      30              : {
      31              :     IndexVacuumInfo *info;
      32              :     IndexBulkDeleteResult *stats;
      33              :     IndexBulkDeleteCallback callback;
      34              :     void       *callback_state;
      35              :     GistNSN     startNSN;
      36              : 
      37              :     /*
      38              :      * These are used to memorize all internal and empty leaf pages.  They are
      39              :      * used for deleting all the empty pages.
      40              :      */
      41              :     IntegerSet *internal_page_set;
      42              :     IntegerSet *empty_leaf_set;
      43              :     MemoryContext page_set_context;
      44              : } GistVacState;
      45              : 
      46              : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      47              :                            IndexBulkDeleteCallback callback, void *callback_state);
      48              : static void gistvacuumpage(GistVacState *vstate, Buffer buffer);
      49              : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      50              :                                           GistVacState *vstate);
      51              : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      52              :                            Buffer parentBuffer, OffsetNumber downlink,
      53              :                            Buffer leafBuffer);
      54              : 
      55              : /*
      56              :  * VACUUM bulkdelete stage: remove index entries.
      57              :  */
      58              : IndexBulkDeleteResult *
      59           21 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      60              :                IndexBulkDeleteCallback callback, void *callback_state)
      61              : {
      62              :     /* allocate stats if first time through, else re-use existing struct */
      63           21 :     if (stats == NULL)
      64           21 :         stats = palloc0_object(IndexBulkDeleteResult);
      65              : 
      66           21 :     gistvacuumscan(info, stats, callback, callback_state);
      67              : 
      68           21 :     return stats;
      69              : }
      70              : 
      71              : /*
      72              :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      73              :  */
      74              : IndexBulkDeleteResult *
      75           43 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
      76              : {
      77              :     /* No-op in ANALYZE ONLY mode */
      78           43 :     if (info->analyze_only)
      79            9 :         return stats;
      80              : 
      81              :     /*
      82              :      * If gistbulkdelete was called, we need not do anything, just return the
      83              :      * stats from the latest gistbulkdelete call.  If it wasn't called, we
      84              :      * still need to do a pass over the index, to obtain index statistics.
      85              :      */
      86           34 :     if (stats == NULL)
      87              :     {
      88           22 :         stats = palloc0_object(IndexBulkDeleteResult);
      89           22 :         gistvacuumscan(info, stats, NULL, NULL);
      90              :     }
      91              : 
      92              :     /*
      93              :      * It's quite possible for us to be fooled by concurrent page splits into
      94              :      * double-counting some index tuples, so disbelieve any total that exceeds
      95              :      * the underlying heap's count ... if we know that accurately.  Otherwise
      96              :      * this might just make matters worse.
      97              :      */
      98           34 :     if (!info->estimated_count)
      99              :     {
     100           34 :         if (stats->num_index_tuples > info->num_heap_tuples)
     101            0 :             stats->num_index_tuples = info->num_heap_tuples;
     102              :     }
     103              : 
     104           34 :     return stats;
     105              : }
     106              : 
     107              : /*
     108              :  * gistvacuumscan --- scan the index for VACUUMing purposes
     109              :  *
     110              :  * This scans the index for leaf tuples that are deletable according to the
     111              :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     112              :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     113              :  * occurred).
     114              :  *
     115              :  * This also makes note of any empty leaf pages, as well as all internal
     116              :  * pages while looping over all index pages.  After scanning all the pages, we
     117              :  * remove the empty pages so that they can be reused.  Any deleted pages are
     118              :  * added directly to the free space map.  (They should've been added there
     119              :  * when they were originally deleted, already, but it's possible that the FSM
     120              :  * was lost at a crash, for example.)
     121              :  *
     122              :  * The caller is responsible for initially allocating/zeroing a stats struct.
     123              :  */
     124              : static void
     125           43 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     126              :                IndexBulkDeleteCallback callback, void *callback_state)
     127              : {
     128           43 :     Relation    rel = info->index;
     129              :     GistVacState vstate;
     130              :     BlockNumber num_pages;
     131              :     bool        needLock;
     132              :     MemoryContext oldctx;
     133              :     BlockRangeReadStreamPrivate p;
     134           43 :     ReadStream *stream = NULL;
     135              : 
     136              :     /*
     137              :      * Reset fields that track information about the entire index now.  This
     138              :      * avoids double-counting in the case where a single VACUUM command
     139              :      * requires multiple scans of the index.
     140              :      *
     141              :      * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
     142              :      * since they track information about the VACUUM command, and so must last
     143              :      * across each call to gistvacuumscan().
     144              :      *
     145              :      * (Note that pages_free is treated as state about the whole index, not
     146              :      * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
     147              :      * calls are idempotent, and get repeated for the same deleted pages in
     148              :      * some scenarios.  The point for us is to track the number of recyclable
     149              :      * pages in the index at the end of the VACUUM command.)
     150              :      */
     151           43 :     stats->num_pages = 0;
     152           43 :     stats->estimated_count = false;
     153           43 :     stats->num_index_tuples = 0;
     154           43 :     stats->pages_deleted = 0;
     155           43 :     stats->pages_free = 0;
     156              : 
     157              :     /*
     158              :      * Create the integer sets to remember all the internal and the empty leaf
     159              :      * pages in page_set_context.  Internally, the integer set will remember
     160              :      * this context so that the subsequent allocations for these integer sets
     161              :      * will be done from the same context.
     162              :      *
     163              :      * XXX the allocation sizes used below pre-date generation context's block
     164              :      * growing code.  These values should likely be benchmarked and set to
     165              :      * more suitable values.
     166              :      */
     167           43 :     vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
     168              :                                                       "GiST VACUUM page set context",
     169              :                                                       16 * 1024,
     170              :                                                       16 * 1024,
     171              :                                                       16 * 1024);
     172           43 :     oldctx = MemoryContextSwitchTo(vstate.page_set_context);
     173           43 :     vstate.internal_page_set = intset_create();
     174           43 :     vstate.empty_leaf_set = intset_create();
     175           43 :     MemoryContextSwitchTo(oldctx);
     176              : 
     177              :     /* Set up info to pass down to gistvacuumpage */
     178           43 :     vstate.info = info;
     179           43 :     vstate.stats = stats;
     180           43 :     vstate.callback = callback;
     181           43 :     vstate.callback_state = callback_state;
     182           43 :     if (RelationNeedsWAL(rel))
     183           43 :         vstate.startNSN = GetInsertRecPtr();
     184              :     else
     185            0 :         vstate.startNSN = gistGetFakeLSN(rel);
     186              : 
     187              :     /*
     188              :      * The outer loop iterates over all index pages, in physical order (we
     189              :      * hope the kernel will cooperate in providing read-ahead for speed).  It
     190              :      * is critical that we visit all leaf pages, including ones added after we
     191              :      * start the scan, else we might fail to delete some deletable tuples.
     192              :      * Hence, we must repeatedly check the relation length.  We must acquire
     193              :      * the relation-extension lock while doing so to avoid a race condition:
     194              :      * if someone else is extending the relation, there is a window where
     195              :      * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     196              :      * write-locked by gistNewBuffer().  If we manage to scan such a page
     197              :      * here, we'll improperly assume it can be recycled.  Taking the lock
     198              :      * synchronizes things enough to prevent a problem: either num_pages won't
     199              :      * include the new page, or gistNewBuffer already has write lock on the
     200              :      * buffer and it will be fully initialized before we can examine it.  (See
     201              :      * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     202              :      * if a page is added immediately after we look; the page splitting code
     203              :      * already has write-lock on the left page before it adds a right page, so
     204              :      * we must already have processed any tuples due to be moved into such a
     205              :      * page.
     206              :      *
     207              :      * We can skip locking for new or temp relations, however, since no one
     208              :      * else could be accessing them.
     209              :      */
     210           43 :     needLock = !RELATION_IS_LOCAL(rel);
     211              : 
     212           43 :     p.current_blocknum = GIST_ROOT_BLKNO;
     213              : 
     214              :     /*
     215              :      * It is safe to use batchmode as block_range_read_stream_cb takes no
     216              :      * locks.
     217              :      */
     218           43 :     stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
     219              :                                         READ_STREAM_FULL |
     220              :                                         READ_STREAM_USE_BATCHING,
     221              :                                         info->strategy,
     222              :                                         rel,
     223              :                                         MAIN_FORKNUM,
     224              :                                         block_range_read_stream_cb,
     225              :                                         &p,
     226              :                                         0);
     227              :     for (;;)
     228              :     {
     229              :         /* Get the current relation length */
     230           86 :         if (needLock)
     231           86 :             LockRelationForExtension(rel, ExclusiveLock);
     232           86 :         num_pages = RelationGetNumberOfBlocks(rel);
     233           86 :         if (needLock)
     234           86 :             UnlockRelationForExtension(rel, ExclusiveLock);
     235              : 
     236              :         /* Quit if we've scanned the whole relation */
     237           86 :         if (p.current_blocknum >= num_pages)
     238           43 :             break;
     239              : 
     240           43 :         p.last_exclusive = num_pages;
     241              : 
     242              :         /* Iterate over pages, then loop back to recheck relation length */
     243              :         while (true)
     244         1319 :         {
     245              :             Buffer      buf;
     246              : 
     247              :             /* call vacuum_delay_point while not holding any buffer lock */
     248         1362 :             vacuum_delay_point(false);
     249              : 
     250         1362 :             buf = read_stream_next_buffer(stream, NULL);
     251              : 
     252         1362 :             if (!BufferIsValid(buf))
     253           43 :                 break;
     254              : 
     255         1319 :             gistvacuumpage(&vstate, buf);
     256              :         }
     257              : 
     258              :         /*
     259              :          * We have to reset the read stream to use it again. After returning
     260              :          * InvalidBuffer, the read stream API won't invoke our callback again
     261              :          * until the stream has been reset.
     262              :          */
     263           43 :         read_stream_reset(stream);
     264              :     }
     265              : 
     266           43 :     read_stream_end(stream);
     267              : 
     268              :     /*
     269              :      * If we found any recyclable pages (and recorded them in the FSM), then
     270              :      * forcibly update the upper-level FSM pages to ensure that searchers can
     271              :      * find them.  It's possible that the pages were also found during
     272              :      * previous scans and so this is a waste of time, but it's cheap enough
     273              :      * relative to scanning the index that it shouldn't matter much, and
     274              :      * making sure that free pages are available sooner not later seems
     275              :      * worthwhile.
     276              :      *
     277              :      * Note that if no recyclable pages exist, we don't bother vacuuming the
     278              :      * FSM at all.
     279              :      */
     280           43 :     if (stats->pages_free > 0)
     281            0 :         IndexFreeSpaceMapVacuum(rel);
     282              : 
     283              :     /* update statistics */
     284           43 :     stats->num_pages = num_pages;
     285              : 
     286              :     /*
     287              :      * If we saw any empty pages, try to unlink them from the tree so that
     288              :      * they can be reused.
     289              :      */
     290           43 :     gistvacuum_delete_empty_pages(info, &vstate);
     291              : 
     292              :     /* we don't need the internal and empty page sets anymore */
     293           43 :     MemoryContextDelete(vstate.page_set_context);
     294           43 :     vstate.page_set_context = NULL;
     295           43 :     vstate.internal_page_set = NULL;
     296           43 :     vstate.empty_leaf_set = NULL;
     297           43 : }
     298              : 
     299              : /*
     300              :  * gistvacuumpage --- VACUUM one page
     301              :  *
     302              :  * This processes a single page for gistbulkdelete(). `buffer` contains the
     303              :  * page to process. In some cases we must go back and reexamine
     304              :  * previously-scanned pages; this routine recurses when necessary to handle
     305              :  * that case.
     306              :  */
     307              : static void
     308         1319 : gistvacuumpage(GistVacState *vstate, Buffer buffer)
     309              : {
     310         1319 :     IndexVacuumInfo *info = vstate->info;
     311         1319 :     IndexBulkDeleteCallback callback = vstate->callback;
     312         1319 :     void       *callback_state = vstate->callback_state;
     313         1319 :     Relation    rel = info->index;
     314         1319 :     BlockNumber orig_blkno = BufferGetBlockNumber(buffer);
     315              :     Page        page;
     316              :     BlockNumber recurse_to;
     317              : 
     318              :     /*
     319              :      * orig_blkno is the highest block number reached by the outer
     320              :      * gistvacuumscan() loop. This will be the same as blkno unless we are
     321              :      * recursing to reexamine a previous page.
     322              :      */
     323         1319 :     BlockNumber blkno = orig_blkno;
     324              : 
     325         1319 : restart:
     326         1319 :     recurse_to = InvalidBlockNumber;
     327              : 
     328              :     /*
     329              :      * We are not going to stay here for a long time, aggressively grab an
     330              :      * exclusive lock.
     331              :      */
     332         1319 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     333         1319 :     page = BufferGetPage(buffer);
     334              : 
     335         1319 :     if (gistPageRecyclable(page))
     336              :     {
     337              :         /* Okay to recycle this page */
     338            0 :         RecordFreeIndexPage(rel, blkno);
     339            0 :         vstate->stats->pages_deleted++;
     340            0 :         vstate->stats->pages_free++;
     341              :     }
     342         1319 :     else if (GistPageIsDeleted(page))
     343              :     {
     344              :         /* Already deleted, but can't recycle yet */
     345            0 :         vstate->stats->pages_deleted++;
     346              :     }
     347         1319 :     else if (GistPageIsLeaf(page))
     348              :     {
     349              :         OffsetNumber todelete[MaxOffsetNumber];
     350         1292 :         int         ntodelete = 0;
     351              :         int         nremain;
     352         1292 :         GISTPageOpaque opaque = GistPageGetOpaque(page);
     353         1292 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     354              : 
     355              :         /*
     356              :          * Check whether we need to recurse back to earlier pages.  What we
     357              :          * are concerned about is a page split that happened since we started
     358              :          * the vacuum scan.  If the split moved some tuples to a lower page
     359              :          * then we might have missed 'em.  If so, set up for tail recursion.
     360              :          *
     361              :          * This is similar to the checks we do during searches, when following
     362              :          * a downlink, but we don't need to jump to higher-numbered pages,
     363              :          * because we will process them later, anyway.
     364              :          */
     365         2584 :         if ((GistFollowRight(page) ||
     366         1292 :              vstate->startNSN < GistPageGetNSN(page)) &&
     367            0 :             (opaque->rightlink != InvalidBlockNumber) &&
     368            0 :             (opaque->rightlink < orig_blkno))
     369              :         {
     370            0 :             recurse_to = opaque->rightlink;
     371              :         }
     372              : 
     373              :         /*
     374              :          * Scan over all items to see which ones need to be deleted according
     375              :          * to the callback function.
     376              :          */
     377         1292 :         if (callback)
     378              :         {
     379              :             OffsetNumber off;
     380              : 
     381          417 :             for (off = FirstOffsetNumber;
     382        50347 :                  off <= maxoff;
     383        49930 :                  off = OffsetNumberNext(off))
     384              :             {
     385        49930 :                 ItemId      iid = PageGetItemId(page, off);
     386        49930 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     387              : 
     388        49930 :                 if (callback(&(idxtuple->t_tid), callback_state))
     389        38938 :                     todelete[ntodelete++] = off;
     390              :             }
     391              :         }
     392              : 
     393              :         /*
     394              :          * Apply any needed deletes.  We issue just one WAL record per page,
     395              :          * so as to minimize WAL traffic.
     396              :          */
     397         1292 :         if (ntodelete > 0)
     398              :         {
     399          408 :             START_CRIT_SECTION();
     400              : 
     401          408 :             MarkBufferDirty(buffer);
     402              : 
     403          408 :             PageIndexMultiDelete(page, todelete, ntodelete);
     404          408 :             GistMarkTuplesDeleted(page);
     405              : 
     406          408 :             if (RelationNeedsWAL(rel))
     407          408 :             {
     408              :                 XLogRecPtr  recptr;
     409              : 
     410          408 :                 recptr = gistXLogUpdate(buffer,
     411              :                                         todelete, ntodelete,
     412              :                                         NULL, 0, InvalidBuffer);
     413          408 :                 PageSetLSN(page, recptr);
     414              :             }
     415              :             else
     416            0 :                 PageSetLSN(page, gistGetFakeLSN(rel));
     417              : 
     418          408 :             END_CRIT_SECTION();
     419              : 
     420          408 :             vstate->stats->tuples_removed += ntodelete;
     421              :             /* must recompute maxoff */
     422          408 :             maxoff = PageGetMaxOffsetNumber(page);
     423              :         }
     424              : 
     425         1292 :         nremain = maxoff - FirstOffsetNumber + 1;
     426         1292 :         if (nremain == 0)
     427              :         {
     428              :             /*
     429              :              * The page is now completely empty.  Remember its block number,
     430              :              * so that we will try to delete the page in the second stage.
     431              :              *
     432              :              * Skip this when recursing, because IntegerSet requires that the
     433              :              * values are added in ascending order.  The next VACUUM will pick
     434              :              * it up.
     435              :              */
     436          168 :             if (blkno == orig_blkno)
     437          168 :                 intset_add_member(vstate->empty_leaf_set, blkno);
     438              :         }
     439              :         else
     440         1124 :             vstate->stats->num_index_tuples += nremain;
     441              :     }
     442              :     else
     443              :     {
     444              :         /*
     445              :          * On an internal page, check for "invalid tuples", left behind by an
     446              :          * incomplete page split on PostgreSQL 9.0 or below.  These are not
     447              :          * created by newer PostgreSQL versions, but unfortunately, there is
     448              :          * no version number anywhere in a GiST index, so we don't know
     449              :          * whether this index might still contain invalid tuples or not.
     450              :          */
     451           27 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     452              :         OffsetNumber off;
     453              : 
     454           27 :         for (off = FirstOffsetNumber;
     455         1303 :              off <= maxoff;
     456         1276 :              off = OffsetNumberNext(off))
     457              :         {
     458         1276 :             ItemId      iid = PageGetItemId(page, off);
     459         1276 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     460              : 
     461         1276 :             if (GistTupleIsInvalid(idxtuple))
     462            0 :                 ereport(LOG,
     463              :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     464              :                                 RelationGetRelationName(rel)),
     465              :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     466              :                          errhint("Please REINDEX it.")));
     467              :         }
     468              : 
     469              :         /*
     470              :          * Remember the block number of this page, so that we can revisit it
     471              :          * later in gistvacuum_delete_empty_pages(), when we search for
     472              :          * parents of empty leaf pages.
     473              :          */
     474           27 :         if (blkno == orig_blkno)
     475           27 :             intset_add_member(vstate->internal_page_set, blkno);
     476              :     }
     477              : 
     478         1319 :     UnlockReleaseBuffer(buffer);
     479              : 
     480              :     /*
     481              :      * This is really tail recursion, but if the compiler is too stupid to
     482              :      * optimize it as such, we'd eat an uncomfortably large amount of stack
     483              :      * space per recursion level (due to the deletable[] array).  A failure is
     484              :      * improbable since the number of levels isn't likely to be large ... but
     485              :      * just in case, let's hand-optimize into a loop.
     486              :      */
     487         1319 :     if (recurse_to != InvalidBlockNumber)
     488              :     {
     489            0 :         blkno = recurse_to;
     490              : 
     491              :         /* check for vacuum delay while not holding any buffer lock */
     492            0 :         vacuum_delay_point(false);
     493              : 
     494            0 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     495              :                                     info->strategy);
     496            0 :         goto restart;
     497              :     }
     498         1319 : }
     499              : 
     500              : /*
     501              :  * Scan all internal pages, and try to delete their empty child pages.
     502              :  */
     503              : static void
     504           43 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
     505              : {
     506           43 :     Relation    rel = info->index;
     507              :     BlockNumber empty_pages_remaining;
     508              :     uint64      blkno;
     509              : 
     510              :     /*
     511              :      * Rescan all inner pages to find those that have empty child pages.
     512              :      */
     513           43 :     empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
     514           43 :     intset_begin_iterate(vstate->internal_page_set);
     515           53 :     while (empty_pages_remaining > 0 &&
     516            8 :            intset_iterate_next(vstate->internal_page_set, &blkno))
     517              :     {
     518              :         Buffer      buffer;
     519              :         Page        page;
     520              :         OffsetNumber off,
     521              :                     maxoff;
     522              :         OffsetNumber todelete[MaxOffsetNumber];
     523              :         BlockNumber leafs_to_delete[MaxOffsetNumber];
     524              :         int         ntodelete;
     525              :         int         deleted;
     526              : 
     527            2 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     528              :                                     RBM_NORMAL, info->strategy);
     529              : 
     530            2 :         LockBuffer(buffer, GIST_SHARE);
     531            2 :         page = BufferGetPage(buffer);
     532              : 
     533            2 :         if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     534              :         {
     535              :             /*
     536              :              * This page was an internal page earlier, but now it's something
     537              :              * else. Shouldn't happen...
     538              :              */
     539              :             Assert(false);
     540            0 :             UnlockReleaseBuffer(buffer);
     541            0 :             continue;
     542              :         }
     543              : 
     544              :         /*
     545              :          * Scan all the downlinks, and see if any of them point to empty leaf
     546              :          * pages.
     547              :          */
     548            2 :         maxoff = PageGetMaxOffsetNumber(page);
     549            2 :         ntodelete = 0;
     550            2 :         for (off = FirstOffsetNumber;
     551          328 :              off <= maxoff && ntodelete < maxoff - 1;
     552          326 :              off = OffsetNumberNext(off))
     553              :         {
     554          326 :             ItemId      iid = PageGetItemId(page, off);
     555          326 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     556              :             BlockNumber leafblk;
     557              : 
     558          326 :             leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     559          326 :             if (intset_is_member(vstate->empty_leaf_set, leafblk))
     560              :             {
     561          162 :                 leafs_to_delete[ntodelete] = leafblk;
     562          162 :                 todelete[ntodelete++] = off;
     563              :             }
     564              :         }
     565              : 
     566              :         /*
     567              :          * In order to avoid deadlock, child page must be locked before
     568              :          * parent, so we must release the lock on the parent, lock the child,
     569              :          * and then re-acquire the lock the parent.  (And we wouldn't want to
     570              :          * do I/O, while holding a lock, anyway.)
     571              :          *
     572              :          * At the instant that we're not holding a lock on the parent, the
     573              :          * downlink might get moved by a concurrent insert, so we must
     574              :          * re-check that it still points to the same child page after we have
     575              :          * acquired both locks.  Also, another backend might have inserted a
     576              :          * tuple to the page, so that it is no longer empty.  gistdeletepage()
     577              :          * re-checks all these conditions.
     578              :          */
     579            2 :         LockBuffer(buffer, GIST_UNLOCK);
     580              : 
     581            2 :         deleted = 0;
     582          164 :         for (int i = 0; i < ntodelete; i++)
     583              :         {
     584              :             Buffer      leafbuf;
     585              : 
     586              :             /*
     587              :              * Don't remove the last downlink from the parent.  That would
     588              :              * confuse the insertion code.
     589              :              */
     590          162 :             if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     591            0 :                 break;
     592              : 
     593          162 :             leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     594              :                                          RBM_NORMAL, info->strategy);
     595          162 :             LockBuffer(leafbuf, GIST_EXCLUSIVE);
     596          162 :             gistcheckpage(rel, leafbuf);
     597              : 
     598          162 :             LockBuffer(buffer, GIST_EXCLUSIVE);
     599          162 :             if (gistdeletepage(info, vstate->stats,
     600          162 :                                buffer, todelete[i] - deleted,
     601              :                                leafbuf))
     602          162 :                 deleted++;
     603          162 :             LockBuffer(buffer, GIST_UNLOCK);
     604              : 
     605          162 :             UnlockReleaseBuffer(leafbuf);
     606              :         }
     607              : 
     608            2 :         ReleaseBuffer(buffer);
     609              : 
     610              :         /*
     611              :          * We can stop the scan as soon as we have seen the downlinks, even if
     612              :          * we were not able to remove them all.
     613              :          */
     614            2 :         empty_pages_remaining -= ntodelete;
     615              :     }
     616           43 : }
     617              : 
     618              : /*
     619              :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     620              :  * leaf.  Both pages must be locked.
     621              :  *
     622              :  * Even if the page was empty when we first saw it, a concurrent inserter might
     623              :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     624              :  * We re-check all the conditions, to make sure the page is still deletable,
     625              :  * before modifying anything.
     626              :  *
     627              :  * Returns true, if the page was deleted, and false if a concurrent update
     628              :  * prevented it.
     629              :  */
     630              : static bool
     631          162 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     632              :                Buffer parentBuffer, OffsetNumber downlink,
     633              :                Buffer leafBuffer)
     634              : {
     635          162 :     Page        parentPage = BufferGetPage(parentBuffer);
     636          162 :     Page        leafPage = BufferGetPage(leafBuffer);
     637              :     ItemId      iid;
     638              :     IndexTuple  idxtuple;
     639              :     XLogRecPtr  recptr;
     640              :     FullTransactionId txid;
     641              : 
     642              :     /*
     643              :      * Check that the leaf is still empty and deletable.
     644              :      */
     645          162 :     if (!GistPageIsLeaf(leafPage))
     646              :     {
     647              :         /* a leaf page should never become a non-leaf page */
     648              :         Assert(false);
     649            0 :         return false;
     650              :     }
     651              : 
     652          162 :     if (GistFollowRight(leafPage))
     653            0 :         return false;           /* don't mess with a concurrent page split */
     654              : 
     655          162 :     if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     656            0 :         return false;           /* not empty anymore */
     657              : 
     658              :     /*
     659              :      * Ok, the leaf is deletable.  Is the downlink in the parent page still
     660              :      * valid?  It might have been moved by a concurrent insert.  We could try
     661              :      * to re-find it by scanning the page again, possibly moving right if the
     662              :      * was split.  But for now, let's keep it simple and just give up.  The
     663              :      * next VACUUM will pick it up.
     664              :      */
     665          162 :     if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     666          162 :         GistPageIsLeaf(parentPage))
     667              :     {
     668              :         /* shouldn't happen, internal pages are never deleted */
     669              :         Assert(false);
     670            0 :         return false;
     671              :     }
     672              : 
     673          162 :     if (PageGetMaxOffsetNumber(parentPage) < downlink
     674          162 :         || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     675            0 :         return false;
     676              : 
     677          162 :     iid = PageGetItemId(parentPage, downlink);
     678          162 :     idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     679          324 :     if (BufferGetBlockNumber(leafBuffer) !=
     680          162 :         ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     681            0 :         return false;
     682              : 
     683              :     /*
     684              :      * All good, proceed with the deletion.
     685              :      *
     686              :      * The page cannot be immediately recycled, because in-progress scans that
     687              :      * saw the downlink might still visit it.  Mark the page with the current
     688              :      * next-XID counter, so that we know when it can be recycled.  Once that
     689              :      * XID becomes older than GlobalXmin, we know that all scans that are
     690              :      * currently in progress must have ended.  (That's much more conservative
     691              :      * than needed, but let's keep it safe and simple.)
     692              :      */
     693          162 :     txid = ReadNextFullTransactionId();
     694              : 
     695          162 :     START_CRIT_SECTION();
     696              : 
     697              :     /* mark the page as deleted */
     698          162 :     MarkBufferDirty(leafBuffer);
     699          162 :     GistPageSetDeleted(leafPage, txid);
     700          162 :     stats->pages_newly_deleted++;
     701          162 :     stats->pages_deleted++;
     702              : 
     703              :     /* remove the downlink from the parent */
     704          162 :     MarkBufferDirty(parentBuffer);
     705          162 :     PageIndexTupleDelete(parentPage, downlink);
     706              : 
     707          162 :     if (RelationNeedsWAL(info->index))
     708          162 :         recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     709              :     else
     710            0 :         recptr = gistGetFakeLSN(info->index);
     711          162 :     PageSetLSN(parentPage, recptr);
     712          162 :     PageSetLSN(leafPage, recptr);
     713              : 
     714          162 :     END_CRIT_SECTION();
     715              : 
     716          162 :     return true;
     717              : }
        

Generated by: LCOV version 2.0-1