LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 169 193 87.6 %
Date: 2020-05-31 23:07:13 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistvacuum.c
       4             :  *    vacuuming routines for the postgres GiST index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gistvacuum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/genam.h"
      18             : #include "access/gist_private.h"
      19             : #include "access/transam.h"
      20             : #include "commands/vacuum.h"
      21             : #include "lib/integerset.h"
      22             : #include "miscadmin.h"
      23             : #include "storage/indexfsm.h"
      24             : #include "storage/lmgr.h"
      25             : #include "utils/memutils.h"
      26             : 
      27             : /* Working state needed by gistbulkdelete */
      28             : typedef struct
      29             : {
      30             :     IndexVacuumInfo *info;
      31             :     IndexBulkDeleteResult *stats;
      32             :     IndexBulkDeleteCallback callback;
      33             :     void       *callback_state;
      34             :     GistNSN     startNSN;
      35             : 
      36             :     /*
      37             :      * These are used to memorize all internal and empty leaf pages.  They are
      38             :      * used for deleting all the empty pages.
      39             :      */
      40             :     IntegerSet *internal_page_set;
      41             :     IntegerSet *empty_leaf_set;
      42             :     MemoryContext page_set_context;
      43             : } GistVacState;
      44             : 
      45             : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      46             :                            IndexBulkDeleteCallback callback, void *callback_state);
      47             : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
      48             :                            BlockNumber orig_blkno);
      49             : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      50             :                                           GistVacState *vstate);
      51             : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      52             :                            Buffer buffer, OffsetNumber downlink,
      53             :                            Buffer leafBuffer);
      54             : 
      55             : /*
      56             :  * VACUUM bulkdelete stage: remove index entries.
      57             :  */
      58             : IndexBulkDeleteResult *
      59          20 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      60             :                IndexBulkDeleteCallback callback, void *callback_state)
      61             : {
      62             :     /* allocate stats if first time through, else re-use existing struct */
      63          20 :     if (stats == NULL)
      64          20 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      65             : 
      66          20 :     gistvacuumscan(info, stats, callback, callback_state);
      67             : 
      68          20 :     return stats;
      69             : }
      70             : 
      71             : /*
      72             :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      73             :  */
      74             : IndexBulkDeleteResult *
      75         114 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
      76             : {
      77             :     /* No-op in ANALYZE ONLY mode */
      78         114 :     if (info->analyze_only)
      79          62 :         return stats;
      80             : 
      81             :     /*
      82             :      * If gistbulkdelete was called, we need not do anything, just return the
      83             :      * stats from the latest gistbulkdelete call.  If it wasn't called, we
      84             :      * still need to do a pass over the index, to obtain index statistics.
      85             :      */
      86          52 :     if (stats == NULL)
      87             :     {
      88          36 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      89          36 :         gistvacuumscan(info, stats, NULL, NULL);
      90             :     }
      91             : 
      92             :     /*
      93             :      * It's quite possible for us to be fooled by concurrent page splits into
      94             :      * double-counting some index tuples, so disbelieve any total that exceeds
      95             :      * the underlying heap's count ... if we know that accurately.  Otherwise
      96             :      * this might just make matters worse.
      97             :      */
      98          52 :     if (!info->estimated_count)
      99             :     {
     100          52 :         if (stats->num_index_tuples > info->num_heap_tuples)
     101           0 :             stats->num_index_tuples = info->num_heap_tuples;
     102             :     }
     103             : 
     104          52 :     return stats;
     105             : }
     106             : 
     107             : /*
     108             :  * gistvacuumscan --- scan the index for VACUUMing purposes
     109             :  *
     110             :  * This scans the index for leaf tuples that are deletable according to the
     111             :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     112             :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     113             :  * occurred).
     114             :  *
     115             :  * This also makes note of any empty leaf pages, as well as all internal
     116             :  * pages while looping over all index pages.  After scanning all the pages, we
     117             :  * remove the empty pages so that they can be reused.  Any deleted pages are
     118             :  * added directly to the free space map.  (They should've been added there
     119             :  * when they were originally deleted, already, but it's possible that the FSM
     120             :  * was lost at a crash, for example.)
     121             :  *
     122             :  * The caller is responsible for initially allocating/zeroing a stats struct.
     123             :  */
     124             : static void
     125          56 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     126             :                IndexBulkDeleteCallback callback, void *callback_state)
     127             : {
     128          56 :     Relation    rel = info->index;
     129             :     GistVacState vstate;
     130             :     BlockNumber num_pages;
     131             :     bool        needLock;
     132             :     BlockNumber blkno;
     133             :     MemoryContext oldctx;
     134             : 
     135             :     /*
     136             :      * Reset counts that will be incremented during the scan; needed in case
     137             :      * of multiple scans during a single VACUUM command.
     138             :      */
     139          56 :     stats->estimated_count = false;
     140          56 :     stats->num_index_tuples = 0;
     141          56 :     stats->pages_deleted = 0;
     142          56 :     stats->pages_free = 0;
     143             : 
     144             :     /*
     145             :      * Create the integer sets to remember all the internal and the empty leaf
     146             :      * pages in page_set_context.  Internally, the integer set will remember
     147             :      * this context so that the subsequent allocations for these integer sets
     148             :      * will be done from the same context.
     149             :      */
     150          56 :     vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
     151             :                                                       "GiST VACUUM page set context",
     152             :                                                       16 * 1024);
     153          56 :     oldctx = MemoryContextSwitchTo(vstate.page_set_context);
     154          56 :     vstate.internal_page_set = intset_create();
     155          56 :     vstate.empty_leaf_set = intset_create();
     156          56 :     MemoryContextSwitchTo(oldctx);
     157             : 
     158             :     /* Set up info to pass down to gistvacuumpage */
     159          56 :     vstate.info = info;
     160          56 :     vstate.stats = stats;
     161          56 :     vstate.callback = callback;
     162          56 :     vstate.callback_state = callback_state;
     163          56 :     if (RelationNeedsWAL(rel))
     164          56 :         vstate.startNSN = GetInsertRecPtr();
     165             :     else
     166           0 :         vstate.startNSN = gistGetFakeLSN(rel);
     167             : 
     168             :     /*
     169             :      * The outer loop iterates over all index pages, in physical order (we
     170             :      * hope the kernel will cooperate in providing read-ahead for speed).  It
     171             :      * is critical that we visit all leaf pages, including ones added after we
     172             :      * start the scan, else we might fail to delete some deletable tuples.
     173             :      * Hence, we must repeatedly check the relation length.  We must acquire
     174             :      * the relation-extension lock while doing so to avoid a race condition:
     175             :      * if someone else is extending the relation, there is a window where
     176             :      * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     177             :      * write-locked by gistNewBuffer().  If we manage to scan such a page
     178             :      * here, we'll improperly assume it can be recycled.  Taking the lock
     179             :      * synchronizes things enough to prevent a problem: either num_pages won't
     180             :      * include the new page, or gistNewBuffer already has write lock on the
     181             :      * buffer and it will be fully initialized before we can examine it.  (See
     182             :      * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     183             :      * if a page is added immediately after we look; the page splitting code
     184             :      * already has write-lock on the left page before it adds a right page, so
     185             :      * we must already have processed any tuples due to be moved into such a
     186             :      * page.
     187             :      *
     188             :      * We can skip locking for new or temp relations, however, since no one
     189             :      * else could be accessing them.
     190             :      */
     191          56 :     needLock = !RELATION_IS_LOCAL(rel);
     192             : 
     193          56 :     blkno = GIST_ROOT_BLKNO;
     194             :     for (;;)
     195             :     {
     196             :         /* Get the current relation length */
     197         112 :         if (needLock)
     198         112 :             LockRelationForExtension(rel, ExclusiveLock);
     199         112 :         num_pages = RelationGetNumberOfBlocks(rel);
     200         112 :         if (needLock)
     201         112 :             UnlockRelationForExtension(rel, ExclusiveLock);
     202             : 
     203             :         /* Quit if we've scanned the whole relation */
     204         112 :         if (blkno >= num_pages)
     205          56 :             break;
     206             :         /* Iterate over pages, then loop back to recheck length */
     207        4756 :         for (; blkno < num_pages; blkno++)
     208        4700 :             gistvacuumpage(&vstate, blkno, blkno);
     209             :     }
     210             : 
     211             :     /*
     212             :      * If we found any recyclable pages (and recorded them in the FSM), then
     213             :      * forcibly update the upper-level FSM pages to ensure that searchers can
     214             :      * find them.  It's possible that the pages were also found during
     215             :      * previous scans and so this is a waste of time, but it's cheap enough
     216             :      * relative to scanning the index that it shouldn't matter much, and
     217             :      * making sure that free pages are available sooner not later seems
     218             :      * worthwhile.
     219             :      *
     220             :      * Note that if no recyclable pages exist, we don't bother vacuuming the
     221             :      * FSM at all.
     222             :      */
     223          56 :     if (stats->pages_free > 0)
     224           0 :         IndexFreeSpaceMapVacuum(rel);
     225             : 
     226             :     /* update statistics */
     227          56 :     stats->num_pages = num_pages;
     228             : 
     229             :     /*
     230             :      * If we saw any empty pages, try to unlink them from the tree so that
     231             :      * they can be reused.
     232             :      */
     233          56 :     gistvacuum_delete_empty_pages(info, &vstate);
     234             : 
     235             :     /* we don't need the internal and empty page sets anymore */
     236          56 :     MemoryContextDelete(vstate.page_set_context);
     237          56 :     vstate.page_set_context = NULL;
     238          56 :     vstate.internal_page_set = NULL;
     239          56 :     vstate.empty_leaf_set = NULL;
     240          56 : }
     241             : 
     242             : /*
     243             :  * gistvacuumpage --- VACUUM one page
     244             :  *
     245             :  * This processes a single page for gistbulkdelete().  In some cases we
     246             :  * must go back and re-examine previously-scanned pages; this routine
     247             :  * recurses when necessary to handle that case.
     248             :  *
     249             :  * blkno is the page to process.  orig_blkno is the highest block number
     250             :  * reached by the outer gistvacuumscan loop (the same as blkno, unless we
     251             :  * are recursing to re-examine a previous page).
     252             :  */
     253             : static void
     254        4700 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
     255             : {
     256        4700 :     IndexVacuumInfo *info = vstate->info;
     257        4700 :     IndexBulkDeleteCallback callback = vstate->callback;
     258        4700 :     void       *callback_state = vstate->callback_state;
     259        4700 :     Relation    rel = info->index;
     260             :     Buffer      buffer;
     261             :     Page        page;
     262             :     BlockNumber recurse_to;
     263             : 
     264        4700 : restart:
     265        4700 :     recurse_to = InvalidBlockNumber;
     266             : 
     267             :     /* call vacuum_delay_point while not holding any buffer lock */
     268        4700 :     vacuum_delay_point();
     269             : 
     270        4700 :     buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     271             :                                 info->strategy);
     272             : 
     273             :     /*
     274             :      * We are not going to stay here for a long time, aggressively grab an
     275             :      * exclusive lock.
     276             :      */
     277        4700 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     278        4700 :     page = (Page) BufferGetPage(buffer);
     279             : 
     280        4700 :     if (gistPageRecyclable(page))
     281             :     {
     282             :         /* Okay to recycle this page */
     283           0 :         RecordFreeIndexPage(rel, blkno);
     284           0 :         vstate->stats->pages_free++;
     285           0 :         vstate->stats->pages_deleted++;
     286             :     }
     287        4700 :     else if (GistPageIsDeleted(page))
     288             :     {
     289             :         /* Already deleted, but can't recycle yet */
     290           0 :         vstate->stats->pages_deleted++;
     291             :     }
     292        4700 :     else if (GistPageIsLeaf(page))
     293             :     {
     294             :         OffsetNumber todelete[MaxOffsetNumber];
     295        4576 :         int         ntodelete = 0;
     296             :         int         nremain;
     297        4576 :         GISTPageOpaque opaque = GistPageGetOpaque(page);
     298        4576 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     299             : 
     300             :         /*
     301             :          * Check whether we need to recurse back to earlier pages.  What we
     302             :          * are concerned about is a page split that happened since we started
     303             :          * the vacuum scan.  If the split moved some tuples to a lower page
     304             :          * then we might have missed 'em.  If so, set up for tail recursion.
     305             :          *
     306             :          * This is similar to the checks we do during searches, when following
     307             :          * a downlink, but we don't need to jump to higher-numbered pages,
     308             :          * because we will process them later, anyway.
     309             :          */
     310        4576 :         if ((GistFollowRight(page) ||
     311        4576 :              vstate->startNSN < GistPageGetNSN(page)) &&
     312           0 :             (opaque->rightlink != InvalidBlockNumber) &&
     313           0 :             (opaque->rightlink < orig_blkno))
     314             :         {
     315           0 :             recurse_to = opaque->rightlink;
     316             :         }
     317             : 
     318             :         /*
     319             :          * Scan over all items to see which ones need to be deleted according
     320             :          * to the callback function.
     321             :          */
     322        4576 :         if (callback)
     323             :         {
     324             :             OffsetNumber off;
     325             : 
     326       94228 :             for (off = FirstOffsetNumber;
     327             :                  off <= maxoff;
     328       91960 :                  off = OffsetNumberNext(off))
     329             :             {
     330       91960 :                 ItemId      iid = PageGetItemId(page, off);
     331       91960 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     332             : 
     333       91960 :                 if (callback(&(idxtuple->t_tid), callback_state))
     334       75952 :                     todelete[ntodelete++] = off;
     335             :             }
     336             :         }
     337             : 
     338             :         /*
     339             :          * Apply any needed deletes.  We issue just one WAL record per page,
     340             :          * so as to minimize WAL traffic.
     341             :          */
     342        4576 :         if (ntodelete > 0)
     343             :         {
     344        2264 :             START_CRIT_SECTION();
     345             : 
     346        2264 :             MarkBufferDirty(buffer);
     347             : 
     348        2264 :             PageIndexMultiDelete(page, todelete, ntodelete);
     349        2264 :             GistMarkTuplesDeleted(page);
     350             : 
     351        2264 :             if (RelationNeedsWAL(rel))
     352        2264 :             {
     353             :                 XLogRecPtr  recptr;
     354             : 
     355        2264 :                 recptr = gistXLogUpdate(buffer,
     356             :                                         todelete, ntodelete,
     357             :                                         NULL, 0, InvalidBuffer);
     358        2264 :                 PageSetLSN(page, recptr);
     359             :             }
     360             :             else
     361           0 :                 PageSetLSN(page, gistGetFakeLSN(rel));
     362             : 
     363        2264 :             END_CRIT_SECTION();
     364             : 
     365        2264 :             vstate->stats->tuples_removed += ntodelete;
     366             :             /* must recompute maxoff */
     367        2264 :             maxoff = PageGetMaxOffsetNumber(page);
     368             :         }
     369             : 
     370        4576 :         nremain = maxoff - FirstOffsetNumber + 1;
     371        4576 :         if (nremain == 0)
     372             :         {
     373             :             /*
     374             :              * The page is now completely empty.  Remember its block number,
     375             :              * so that we will try to delete the page in the second stage.
     376             :              *
     377             :              * Skip this when recursing, because IntegerSet requires that the
     378             :              * values are added in ascending order.  The next VACUUM will pick
     379             :              * it up.
     380             :              */
     381        1076 :             if (blkno == orig_blkno)
     382        1076 :                 intset_add_member(vstate->empty_leaf_set, blkno);
     383             :         }
     384             :         else
     385        3500 :             vstate->stats->num_index_tuples += nremain;
     386             :     }
     387             :     else
     388             :     {
     389             :         /*
     390             :          * On an internal page, check for "invalid tuples", left behind by an
     391             :          * incomplete page split on PostgreSQL 9.0 or below.  These are not
     392             :          * created by newer PostgreSQL versions, but unfortunately, there is
     393             :          * no version number anywhere in a GiST index, so we don't know
     394             :          * whether this index might still contain invalid tuples or not.
     395             :          */
     396         124 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     397             :         OffsetNumber off;
     398             : 
     399        4768 :         for (off = FirstOffsetNumber;
     400             :              off <= maxoff;
     401        4644 :              off = OffsetNumberNext(off))
     402             :         {
     403        4644 :             ItemId      iid = PageGetItemId(page, off);
     404        4644 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     405             : 
     406        4644 :             if (GistTupleIsInvalid(idxtuple))
     407           0 :                 ereport(LOG,
     408             :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     409             :                                 RelationGetRelationName(rel)),
     410             :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     411             :                          errhint("Please REINDEX it.")));
     412             :         }
     413             : 
     414             :         /*
     415             :          * Remember the block number of this page, so that we can revisit it
     416             :          * later in gistvacuum_delete_empty_pages(), when we search for
     417             :          * parents of empty leaf pages.
     418             :          */
     419         124 :         if (blkno == orig_blkno)
     420         124 :             intset_add_member(vstate->internal_page_set, blkno);
     421             :     }
     422             : 
     423        4700 :     UnlockReleaseBuffer(buffer);
     424             : 
     425             :     /*
     426             :      * This is really tail recursion, but if the compiler is too stupid to
     427             :      * optimize it as such, we'd eat an uncomfortably large amount of stack
     428             :      * space per recursion level (due to the deletable[] array).  A failure is
     429             :      * improbable since the number of levels isn't likely to be large ... but
     430             :      * just in case, let's hand-optimize into a loop.
     431             :      */
     432        4700 :     if (recurse_to != InvalidBlockNumber)
     433             :     {
     434           0 :         blkno = recurse_to;
     435           0 :         goto restart;
     436             :     }
     437        4700 : }
     438             : 
     439             : /*
     440             :  * Scan all internal pages, and try to delete their empty child pages.
     441             :  */
     442             : static void
     443          56 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
     444             : {
     445          56 :     Relation    rel = info->index;
     446             :     BlockNumber empty_pages_remaining;
     447             :     uint64      blkno;
     448             : 
     449             :     /*
     450             :      * Rescan all inner pages to find those that have empty child pages.
     451             :      */
     452          56 :     empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
     453          56 :     intset_begin_iterate(vstate->internal_page_set);
     454         180 :     while (empty_pages_remaining > 0 &&
     455          64 :            intset_iterate_next(vstate->internal_page_set, &blkno))
     456             :     {
     457             :         Buffer      buffer;
     458             :         Page        page;
     459             :         OffsetNumber off,
     460             :                     maxoff;
     461             :         OffsetNumber todelete[MaxOffsetNumber];
     462             :         BlockNumber leafs_to_delete[MaxOffsetNumber];
     463             :         int         ntodelete;
     464             :         int         deleted;
     465             : 
     466          60 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     467             :                                     RBM_NORMAL, info->strategy);
     468             : 
     469          60 :         LockBuffer(buffer, GIST_SHARE);
     470          60 :         page = (Page) BufferGetPage(buffer);
     471             : 
     472          60 :         if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     473             :         {
     474             :             /*
     475             :              * This page was an internal page earlier, but now it's something
     476             :              * else. Shouldn't happen...
     477             :              */
     478             :             Assert(false);
     479           0 :             UnlockReleaseBuffer(buffer);
     480           0 :             continue;
     481             :         }
     482             : 
     483             :         /*
     484             :          * Scan all the downlinks, and see if any of them point to empty leaf
     485             :          * pages.
     486             :          */
     487          60 :         maxoff = PageGetMaxOffsetNumber(page);
     488          60 :         ntodelete = 0;
     489        2252 :         for (off = FirstOffsetNumber;
     490        2216 :              off <= maxoff && ntodelete < maxoff - 1;
     491        2192 :              off = OffsetNumberNext(off))
     492             :         {
     493        2192 :             ItemId      iid = PageGetItemId(page, off);
     494        2192 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     495             :             BlockNumber leafblk;
     496             : 
     497        2192 :             leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     498        2192 :             if (intset_is_member(vstate->empty_leaf_set, leafblk))
     499             :             {
     500        1052 :                 leafs_to_delete[ntodelete] = leafblk;
     501        1052 :                 todelete[ntodelete++] = off;
     502             :             }
     503             :         }
     504             : 
     505             :         /*
     506             :          * In order to avoid deadlock, child page must be locked before
     507             :          * parent, so we must release the lock on the parent, lock the child,
     508             :          * and then re-acquire the lock the parent.  (And we wouldn't want to
     509             :          * do I/O, while holding a lock, anyway.)
     510             :          *
     511             :          * At the instant that we're not holding a lock on the parent, the
     512             :          * downlink might get moved by a concurrent insert, so we must
     513             :          * re-check that it still points to the same child page after we have
     514             :          * acquired both locks.  Also, another backend might have inserted a
     515             :          * tuple to the page, so that it is no longer empty.  gistdeletepage()
     516             :          * re-checks all these conditions.
     517             :          */
     518          60 :         LockBuffer(buffer, GIST_UNLOCK);
     519             : 
     520          60 :         deleted = 0;
     521        1112 :         for (int i = 0; i < ntodelete; i++)
     522             :         {
     523             :             Buffer      leafbuf;
     524             : 
     525             :             /*
     526             :              * Don't remove the last downlink from the parent.  That would
     527             :              * confuse the insertion code.
     528             :              */
     529        1052 :             if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     530           0 :                 break;
     531             : 
     532        1052 :             leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     533             :                                          RBM_NORMAL, info->strategy);
     534        1052 :             LockBuffer(leafbuf, GIST_EXCLUSIVE);
     535        1052 :             gistcheckpage(rel, leafbuf);
     536             : 
     537        1052 :             LockBuffer(buffer, GIST_EXCLUSIVE);
     538        1052 :             if (gistdeletepage(info, vstate->stats,
     539        1052 :                                buffer, todelete[i] - deleted,
     540             :                                leafbuf))
     541        1052 :                 deleted++;
     542        1052 :             LockBuffer(buffer, GIST_UNLOCK);
     543             : 
     544        1052 :             UnlockReleaseBuffer(leafbuf);
     545             :         }
     546             : 
     547          60 :         ReleaseBuffer(buffer);
     548             : 
     549             :         /* update stats */
     550          60 :         vstate->stats->pages_removed += deleted;
     551             : 
     552             :         /*
     553             :          * We can stop the scan as soon as we have seen the downlinks, even if
     554             :          * we were not able to remove them all.
     555             :          */
     556          60 :         empty_pages_remaining -= ntodelete;
     557             :     }
     558          56 : }
     559             : 
     560             : /*
     561             :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     562             :  * leaf.  Both pages must be locked.
     563             :  *
     564             :  * Even if the page was empty when we first saw it, a concurrent inserter might
     565             :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     566             :  * We re-check all the conditions, to make sure the page is still deletable,
     567             :  * before modifying anything.
     568             :  *
     569             :  * Returns true, if the page was deleted, and false if a concurrent update
     570             :  * prevented it.
     571             :  */
     572             : static bool
     573        1052 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     574             :                Buffer parentBuffer, OffsetNumber downlink,
     575             :                Buffer leafBuffer)
     576             : {
     577        1052 :     Page        parentPage = BufferGetPage(parentBuffer);
     578        1052 :     Page        leafPage = BufferGetPage(leafBuffer);
     579             :     ItemId      iid;
     580             :     IndexTuple  idxtuple;
     581             :     XLogRecPtr  recptr;
     582             :     FullTransactionId txid;
     583             : 
     584             :     /*
     585             :      * Check that the leaf is still empty and deletable.
     586             :      */
     587        1052 :     if (!GistPageIsLeaf(leafPage))
     588             :     {
     589             :         /* a leaf page should never become a non-leaf page */
     590             :         Assert(false);
     591           0 :         return false;
     592             :     }
     593             : 
     594        1052 :     if (GistFollowRight(leafPage))
     595           0 :         return false;           /* don't mess with a concurrent page split */
     596             : 
     597        1052 :     if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     598           0 :         return false;           /* not empty anymore */
     599             : 
     600             :     /*
     601             :      * Ok, the leaf is deletable.  Is the downlink in the parent page still
     602             :      * valid?  It might have been moved by a concurrent insert.  We could try
     603             :      * to re-find it by scanning the page again, possibly moving right if the
     604             :      * was split.  But for now, let's keep it simple and just give up.  The
     605             :      * next VACUUM will pick it up.
     606             :      */
     607        1052 :     if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     608        1052 :         GistPageIsLeaf(parentPage))
     609             :     {
     610             :         /* shouldn't happen, internal pages are never deleted */
     611             :         Assert(false);
     612           0 :         return false;
     613             :     }
     614             : 
     615        1052 :     if (PageGetMaxOffsetNumber(parentPage) < downlink
     616        1052 :         || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     617           0 :         return false;
     618             : 
     619        1052 :     iid = PageGetItemId(parentPage, downlink);
     620        1052 :     idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     621        1052 :     if (BufferGetBlockNumber(leafBuffer) !=
     622        1052 :         ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     623           0 :         return false;
     624             : 
     625             :     /*
     626             :      * All good, proceed with the deletion.
     627             :      *
     628             :      * The page cannot be immediately recycled, because in-progress scans that
     629             :      * saw the downlink might still visit it.  Mark the page with the current
     630             :      * next-XID counter, so that we know when it can be recycled.  Once that
     631             :      * XID becomes older than GlobalXmin, we know that all scans that are
     632             :      * currently in progress must have ended.  (That's much more conservative
     633             :      * than needed, but let's keep it safe and simple.)
     634             :      */
     635        1052 :     txid = ReadNextFullTransactionId();
     636             : 
     637        1052 :     START_CRIT_SECTION();
     638             : 
     639             :     /* mark the page as deleted */
     640        1052 :     MarkBufferDirty(leafBuffer);
     641        1052 :     GistPageSetDeleted(leafPage, txid);
     642        1052 :     stats->pages_deleted++;
     643             : 
     644             :     /* remove the downlink from the parent */
     645        1052 :     MarkBufferDirty(parentBuffer);
     646        1052 :     PageIndexTupleDelete(parentPage, downlink);
     647             : 
     648        1052 :     if (RelationNeedsWAL(info->index))
     649        1052 :         recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     650             :     else
     651           0 :         recptr = gistGetFakeLSN(info->index);
     652        1052 :     PageSetLSN(parentPage, recptr);
     653        1052 :     PageSetLSN(leafPage, recptr);
     654             : 
     655        1052 :     END_CRIT_SECTION();
     656             : 
     657        1052 :     return true;
     658             : }

Generated by: LCOV version 1.13