LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 175 199 87.9 %
Date: 2019-11-15 23:07:02 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistvacuum.c
       4             :  *    vacuuming routines for the postgres GiST index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gistvacuum.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/genam.h"
      18             : #include "access/gist_private.h"
      19             : #include "access/transam.h"
      20             : #include "commands/vacuum.h"
      21             : #include "lib/integerset.h"
      22             : #include "miscadmin.h"
      23             : #include "storage/indexfsm.h"
      24             : #include "storage/lmgr.h"
      25             : #include "utils/memutils.h"
      26             : 
      27             : /*
      28             :  * State kept across vacuum stages.
      29             :  */
      30             : typedef struct
      31             : {
      32             :     IndexBulkDeleteResult stats;    /* must be first */
      33             : 
      34             :     /*
      35             :      * These are used to memorize all internal and empty leaf pages in the 1st
      36             :      * vacuum stage.  They are used in the 2nd stage, to delete all the empty
      37             :      * pages.
      38             :      */
      39             :     IntegerSet *internal_page_set;
      40             :     IntegerSet *empty_leaf_set;
      41             :     MemoryContext page_set_context;
      42             : } GistBulkDeleteResult;
      43             : 
      44             : /* Working state needed by gistbulkdelete */
      45             : typedef struct
      46             : {
      47             :     IndexVacuumInfo *info;
      48             :     GistBulkDeleteResult *stats;
      49             :     IndexBulkDeleteCallback callback;
      50             :     void       *callback_state;
      51             :     GistNSN     startNSN;
      52             : } GistVacState;
      53             : 
      54             : static void gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
      55             :                            IndexBulkDeleteCallback callback, void *callback_state);
      56             : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
      57             :                            BlockNumber orig_blkno);
      58             : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      59             :                                           GistBulkDeleteResult *stats);
      60             : static bool gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
      61             :                            Buffer buffer, OffsetNumber downlink,
      62             :                            Buffer leafBuffer);
      63             : 
      64             : /* allocate the 'stats' struct that's kept over vacuum stages */
      65             : static GistBulkDeleteResult *
      66          42 : create_GistBulkDeleteResult(void)
      67             : {
      68             :     GistBulkDeleteResult *gist_stats;
      69             : 
      70          42 :     gist_stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
      71          42 :     gist_stats->page_set_context =
      72          42 :         GenerationContextCreate(CurrentMemoryContext,
      73             :                                 "GiST VACUUM page set context",
      74             :                                 16 * 1024);
      75             : 
      76          42 :     return gist_stats;
      77             : }
      78             : 
      79             : /*
      80             :  * VACUUM bulkdelete stage: remove index entries.
      81             :  */
      82             : IndexBulkDeleteResult *
      83          16 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      84             :                IndexBulkDeleteCallback callback, void *callback_state)
      85             : {
      86          16 :     GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats;
      87             : 
      88             :     /* allocate stats if first time through, else re-use existing struct */
      89          16 :     if (gist_stats == NULL)
      90          16 :         gist_stats = create_GistBulkDeleteResult();
      91             : 
      92          16 :     gistvacuumscan(info, gist_stats, callback, callback_state);
      93             : 
      94          16 :     return (IndexBulkDeleteResult *) gist_stats;
      95             : }
      96             : 
      97             : /*
      98             :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      99             :  */
     100             : IndexBulkDeleteResult *
     101         100 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     102             : {
     103         100 :     GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats;
     104             : 
     105             :     /* No-op in ANALYZE ONLY mode */
     106         100 :     if (info->analyze_only)
     107          62 :         return stats;
     108             : 
     109             :     /*
     110             :      * If gistbulkdelete was called, we need not do anything, just return the
     111             :      * stats from the latest gistbulkdelete call.  If it wasn't called, we
     112             :      * still need to do a pass over the index, to obtain index statistics.
     113             :      */
     114          38 :     if (gist_stats == NULL)
     115             :     {
     116          26 :         gist_stats = create_GistBulkDeleteResult();
     117          26 :         gistvacuumscan(info, gist_stats, NULL, NULL);
     118             :     }
     119             : 
     120             :     /*
     121             :      * If we saw any empty pages, try to unlink them from the tree so that
     122             :      * they can be reused.
     123             :      */
     124          38 :     gistvacuum_delete_empty_pages(info, gist_stats);
     125             : 
     126             :     /* we don't need the internal and empty page sets anymore */
     127          38 :     MemoryContextDelete(gist_stats->page_set_context);
     128          38 :     gist_stats->page_set_context = NULL;
     129          38 :     gist_stats->internal_page_set = NULL;
     130          38 :     gist_stats->empty_leaf_set = NULL;
     131             : 
     132             :     /*
     133             :      * It's quite possible for us to be fooled by concurrent page splits into
     134             :      * double-counting some index tuples, so disbelieve any total that exceeds
     135             :      * the underlying heap's count ... if we know that accurately.  Otherwise
     136             :      * this might just make matters worse.
     137             :      */
     138          38 :     if (!info->estimated_count)
     139             :     {
     140          38 :         if (gist_stats->stats.num_index_tuples > info->num_heap_tuples)
     141           0 :             gist_stats->stats.num_index_tuples = info->num_heap_tuples;
     142             :     }
     143             : 
     144          38 :     return (IndexBulkDeleteResult *) gist_stats;
     145             : }
     146             : 
     147             : /*
     148             :  * gistvacuumscan --- scan the index for VACUUMing purposes
     149             :  *
     150             :  * This scans the index for leaf tuples that are deletable according to the
     151             :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     152             :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     153             :  * occurred).
     154             :  *
     155             :  * This also makes note of any empty leaf pages, as well as all internal
     156             :  * pages.  The second stage, gistvacuum_delete_empty_pages(), needs that
     157             :  * information.  Any deleted pages are added directly to the free space map.
     158             :  * (They should've been added there when they were originally deleted, already,
     159             :  * but it's possible that the FSM was lost at a crash, for example.)
     160             :  *
     161             :  * The caller is responsible for initially allocating/zeroing a stats struct.
     162             :  */
     163             : static void
     164          42 : gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
     165             :                IndexBulkDeleteCallback callback, void *callback_state)
     166             : {
     167          42 :     Relation    rel = info->index;
     168             :     GistVacState vstate;
     169             :     BlockNumber num_pages;
     170             :     bool        needLock;
     171             :     BlockNumber blkno;
     172             :     MemoryContext oldctx;
     173             : 
     174             :     /*
     175             :      * Reset counts that will be incremented during the scan; needed in case
     176             :      * of multiple scans during a single VACUUM command.
     177             :      */
     178          42 :     stats->stats.estimated_count = false;
     179          42 :     stats->stats.num_index_tuples = 0;
     180          42 :     stats->stats.pages_deleted = 0;
     181          42 :     stats->stats.pages_free = 0;
     182          42 :     MemoryContextReset(stats->page_set_context);
     183             : 
     184             :     /*
     185             :      * Create the integer sets to remember all the internal and the empty leaf
     186             :      * pages in page_set_context.  Internally, the integer set will remember
     187             :      * this context so that the subsequent allocations for these integer sets
     188             :      * will be done from the same context.
     189             :      */
     190          42 :     oldctx = MemoryContextSwitchTo(stats->page_set_context);
     191          42 :     stats->internal_page_set = intset_create();
     192          42 :     stats->empty_leaf_set = intset_create();
     193          42 :     MemoryContextSwitchTo(oldctx);
     194             : 
     195             :     /* Set up info to pass down to gistvacuumpage */
     196          42 :     vstate.info = info;
     197          42 :     vstate.stats = stats;
     198          42 :     vstate.callback = callback;
     199          42 :     vstate.callback_state = callback_state;
     200          42 :     if (RelationNeedsWAL(rel))
     201          42 :         vstate.startNSN = GetInsertRecPtr();
     202             :     else
     203           0 :         vstate.startNSN = gistGetFakeLSN(rel);
     204             : 
     205             :     /*
     206             :      * The outer loop iterates over all index pages, in physical order (we
     207             :      * hope the kernel will cooperate in providing read-ahead for speed).  It
     208             :      * is critical that we visit all leaf pages, including ones added after we
     209             :      * start the scan, else we might fail to delete some deletable tuples.
     210             :      * Hence, we must repeatedly check the relation length.  We must acquire
     211             :      * the relation-extension lock while doing so to avoid a race condition:
     212             :      * if someone else is extending the relation, there is a window where
     213             :      * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     214             :      * write-locked by gistNewBuffer().  If we manage to scan such a page
     215             :      * here, we'll improperly assume it can be recycled.  Taking the lock
     216             :      * synchronizes things enough to prevent a problem: either num_pages won't
     217             :      * include the new page, or gistNewBuffer already has write lock on the
     218             :      * buffer and it will be fully initialized before we can examine it.  (See
     219             :      * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     220             :      * if a page is added immediately after we look; the page splitting code
     221             :      * already has write-lock on the left page before it adds a right page, so
     222             :      * we must already have processed any tuples due to be moved into such a
     223             :      * page.
     224             :      *
     225             :      * We can skip locking for new or temp relations, however, since no one
     226             :      * else could be accessing them.
     227             :      */
     228          42 :     needLock = !RELATION_IS_LOCAL(rel);
     229             : 
     230          42 :     blkno = GIST_ROOT_BLKNO;
     231             :     for (;;)
     232             :     {
     233             :         /* Get the current relation length */
     234         126 :         if (needLock)
     235          84 :             LockRelationForExtension(rel, ExclusiveLock);
     236          84 :         num_pages = RelationGetNumberOfBlocks(rel);
     237          84 :         if (needLock)
     238          84 :             UnlockRelationForExtension(rel, ExclusiveLock);
     239             : 
     240             :         /* Quit if we've scanned the whole relation */
     241          84 :         if (blkno >= num_pages)
     242          42 :             break;
     243             :         /* Iterate over pages, then loop back to recheck length */
     244        3428 :         for (; blkno < num_pages; blkno++)
     245        3386 :             gistvacuumpage(&vstate, blkno, blkno);
     246             :     }
     247             : 
     248             :     /*
     249             :      * If we found any recyclable pages (and recorded them in the FSM), then
     250             :      * forcibly update the upper-level FSM pages to ensure that searchers can
     251             :      * find them.  It's possible that the pages were also found during
     252             :      * previous scans and so this is a waste of time, but it's cheap enough
     253             :      * relative to scanning the index that it shouldn't matter much, and
     254             :      * making sure that free pages are available sooner not later seems
     255             :      * worthwhile.
     256             :      *
     257             :      * Note that if no recyclable pages exist, we don't bother vacuuming the
     258             :      * FSM at all.
     259             :      */
     260          42 :     if (stats->stats.pages_free > 0)
     261           0 :         IndexFreeSpaceMapVacuum(rel);
     262             : 
     263             :     /* update statistics */
     264          42 :     stats->stats.num_pages = num_pages;
     265          42 : }
     266             : 
     267             : /*
     268             :  * gistvacuumpage --- VACUUM one page
     269             :  *
     270             :  * This processes a single page for gistbulkdelete().  In some cases we
     271             :  * must go back and re-examine previously-scanned pages; this routine
     272             :  * recurses when necessary to handle that case.
     273             :  *
     274             :  * blkno is the page to process.  orig_blkno is the highest block number
     275             :  * reached by the outer gistvacuumscan loop (the same as blkno, unless we
     276             :  * are recursing to re-examine a previous page).
     277             :  */
     278             : static void
     279        3386 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
     280             : {
     281        3386 :     GistBulkDeleteResult *stats = vstate->stats;
     282        3386 :     IndexVacuumInfo *info = vstate->info;
     283        3386 :     IndexBulkDeleteCallback callback = vstate->callback;
     284        3386 :     void       *callback_state = vstate->callback_state;
     285        3386 :     Relation    rel = info->index;
     286             :     Buffer      buffer;
     287             :     Page        page;
     288             :     BlockNumber recurse_to;
     289             : 
     290             : restart:
     291        3386 :     recurse_to = InvalidBlockNumber;
     292             : 
     293             :     /* call vacuum_delay_point while not holding any buffer lock */
     294        3386 :     vacuum_delay_point();
     295             : 
     296        3386 :     buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     297             :                                 info->strategy);
     298             : 
     299             :     /*
     300             :      * We are not going to stay here for a long time, aggressively grab an
     301             :      * exclusive lock.
     302             :      */
     303        3386 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     304        3386 :     page = (Page) BufferGetPage(buffer);
     305             : 
     306        3386 :     if (gistPageRecyclable(page))
     307             :     {
     308             :         /* Okay to recycle this page */
     309           0 :         RecordFreeIndexPage(rel, blkno);
     310           0 :         stats->stats.pages_free++;
     311           0 :         stats->stats.pages_deleted++;
     312             :     }
     313        3386 :     else if (GistPageIsDeleted(page))
     314             :     {
     315             :         /* Already deleted, but can't recycle yet */
     316           0 :         stats->stats.pages_deleted++;
     317             :     }
     318        3386 :     else if (GistPageIsLeaf(page))
     319             :     {
     320             :         OffsetNumber todelete[MaxOffsetNumber];
     321        3304 :         int         ntodelete = 0;
     322             :         int         nremain;
     323        3304 :         GISTPageOpaque opaque = GistPageGetOpaque(page);
     324        3304 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     325             : 
     326             :         /*
     327             :          * Check whether we need to recurse back to earlier pages.  What we
     328             :          * are concerned about is a page split that happened since we started
     329             :          * the vacuum scan.  If the split moved some tuples to a lower page
     330             :          * then we might have missed 'em.  If so, set up for tail recursion.
     331             :          *
     332             :          * This is similar to the checks we do during searches, when following
     333             :          * a downlink, but we don't need to jump to higher-numbered pages,
     334             :          * because we will process them later, anyway.
     335             :          */
     336        6608 :         if ((GistFollowRight(page) ||
     337        3304 :              vstate->startNSN < GistPageGetNSN(page)) &&
     338           0 :             (opaque->rightlink != InvalidBlockNumber) &&
     339           0 :             (opaque->rightlink < orig_blkno))
     340             :         {
     341           0 :             recurse_to = opaque->rightlink;
     342             :         }
     343             : 
     344             :         /*
     345             :          * Scan over all items to see which ones need to be deleted according
     346             :          * to the callback function.
     347             :          */
     348        3304 :         if (callback)
     349             :         {
     350             :             OffsetNumber off;
     351             : 
     352       64420 :             for (off = FirstOffsetNumber;
     353             :                  off <= maxoff;
     354       60076 :                  off = OffsetNumberNext(off))
     355             :             {
     356       60076 :                 ItemId      iid = PageGetItemId(page, off);
     357       60076 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     358             : 
     359       60076 :                 if (callback(&(idxtuple->t_tid), callback_state))
     360       35012 :                     todelete[ntodelete++] = off;
     361             :             }
     362             :         }
     363             : 
     364             :         /*
     365             :          * Apply any needed deletes.  We issue just one WAL record per page,
     366             :          * so as to minimize WAL traffic.
     367             :          */
     368        3304 :         if (ntodelete > 0)
     369             :         {
     370        2168 :             START_CRIT_SECTION();
     371             : 
     372        2168 :             MarkBufferDirty(buffer);
     373             : 
     374        2168 :             PageIndexMultiDelete(page, todelete, ntodelete);
     375        2168 :             GistMarkTuplesDeleted(page);
     376             : 
     377        2168 :             if (RelationNeedsWAL(rel))
     378             :             {
     379             :                 XLogRecPtr  recptr;
     380             : 
     381        2168 :                 recptr = gistXLogUpdate(buffer,
     382             :                                         todelete, ntodelete,
     383             :                                         NULL, 0, InvalidBuffer);
     384        2168 :                 PageSetLSN(page, recptr);
     385             :             }
     386             :             else
     387           0 :                 PageSetLSN(page, gistGetFakeLSN(rel));
     388             : 
     389        2168 :             END_CRIT_SECTION();
     390             : 
     391        2168 :             stats->stats.tuples_removed += ntodelete;
     392             :             /* must recompute maxoff */
     393        2168 :             maxoff = PageGetMaxOffsetNumber(page);
     394             :         }
     395             : 
     396        3304 :         nremain = maxoff - FirstOffsetNumber + 1;
     397        3304 :         if (nremain == 0)
     398             :         {
     399             :             /*
     400             :              * The page is now completely empty.  Remember its block number,
     401             :              * so that we will try to delete the page in the second stage.
     402             :              *
     403             :              * Skip this when recursing, because IntegerSet requires that the
     404             :              * values are added in ascending order.  The next VACUUM will pick
     405             :              * it up.
     406             :              */
     407         538 :             if (blkno == orig_blkno)
     408         538 :                 intset_add_member(stats->empty_leaf_set, blkno);
     409             :         }
     410             :         else
     411        2766 :             stats->stats.num_index_tuples += nremain;
     412             :     }
     413             :     else
     414             :     {
     415             :         /*
     416             :          * On an internal page, check for "invalid tuples", left behind by an
     417             :          * incomplete page split on PostgreSQL 9.0 or below.  These are not
     418             :          * created by newer PostgreSQL versions, but unfortunately, there is
     419             :          * no version number anywhere in a GiST index, so we don't know
     420             :          * whether this index might still contain invalid tuples or not.
     421             :          */
     422          82 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     423             :         OffsetNumber off;
     424             : 
     425        3508 :         for (off = FirstOffsetNumber;
     426             :              off <= maxoff;
     427        3344 :              off = OffsetNumberNext(off))
     428             :         {
     429        3344 :             ItemId      iid = PageGetItemId(page, off);
     430        3344 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     431             : 
     432        3344 :             if (GistTupleIsInvalid(idxtuple))
     433           0 :                 ereport(LOG,
     434             :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     435             :                                 RelationGetRelationName(rel)),
     436             :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     437             :                          errhint("Please REINDEX it.")));
     438             :         }
     439             : 
     440             :         /*
     441             :          * Remember the block number of this page, so that we can revisit it
     442             :          * later in gistvacuum_delete_empty_pages(), when we search for
     443             :          * parents of empty leaf pages.
     444             :          */
     445          82 :         if (blkno == orig_blkno)
     446          82 :             intset_add_member(stats->internal_page_set, blkno);
     447             :     }
     448             : 
     449        3386 :     UnlockReleaseBuffer(buffer);
     450             : 
     451             :     /*
     452             :      * This is really tail recursion, but if the compiler is too stupid to
     453             :      * optimize it as such, we'd eat an uncomfortably large amount of stack
     454             :      * space per recursion level (due to the deletable[] array).  A failure is
     455             :      * improbable since the number of levels isn't likely to be large ... but
     456             :      * just in case, let's hand-optimize into a loop.
     457             :      */
     458        3386 :     if (recurse_to != InvalidBlockNumber)
     459             :     {
     460           0 :         blkno = recurse_to;
     461           0 :         goto restart;
     462             :     }
     463        3386 : }
     464             : 
     465             : /*
     466             :  * Scan all internal pages, and try to delete their empty child pages.
     467             :  */
     468             : static void
     469          38 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats)
     470             : {
     471          38 :     Relation    rel = info->index;
     472             :     BlockNumber empty_pages_remaining;
     473             :     uint64      blkno;
     474             : 
     475             :     /*
     476             :      * Rescan all inner pages to find those that have empty child pages.
     477             :      */
     478          38 :     empty_pages_remaining = intset_num_entries(stats->empty_leaf_set);
     479          38 :     intset_begin_iterate(stats->internal_page_set);
     480         138 :     while (empty_pages_remaining > 0 &&
     481          32 :            intset_iterate_next(stats->internal_page_set, &blkno))
     482             :     {
     483             :         Buffer      buffer;
     484             :         Page        page;
     485             :         OffsetNumber off,
     486             :                     maxoff;
     487             :         OffsetNumber todelete[MaxOffsetNumber];
     488             :         BlockNumber leafs_to_delete[MaxOffsetNumber];
     489             :         int         ntodelete;
     490             :         int         deleted;
     491             : 
     492          30 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     493             :                                     RBM_NORMAL, info->strategy);
     494             : 
     495          30 :         LockBuffer(buffer, GIST_SHARE);
     496          30 :         page = (Page) BufferGetPage(buffer);
     497             : 
     498          30 :         if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     499             :         {
     500             :             /*
     501             :              * This page was an internal page earlier, but now it's something
     502             :              * else. Shouldn't happen...
     503             :              */
     504             :             Assert(false);
     505           0 :             UnlockReleaseBuffer(buffer);
     506           0 :             continue;
     507             :         }
     508             : 
     509             :         /*
     510             :          * Scan all the downlinks, and see if any of them point to empty leaf
     511             :          * pages.
     512             :          */
     513          30 :         maxoff = PageGetMaxOffsetNumber(page);
     514          30 :         ntodelete = 0;
     515        1156 :         for (off = FirstOffsetNumber;
     516        1108 :              off <= maxoff && ntodelete < maxoff - 1;
     517        1096 :              off = OffsetNumberNext(off))
     518             :         {
     519        1096 :             ItemId      iid = PageGetItemId(page, off);
     520        1096 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     521             :             BlockNumber leafblk;
     522             : 
     523        1096 :             leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     524        1096 :             if (intset_is_member(stats->empty_leaf_set, leafblk))
     525             :             {
     526         526 :                 leafs_to_delete[ntodelete] = leafblk;
     527         526 :                 todelete[ntodelete++] = off;
     528             :             }
     529             :         }
     530             : 
     531             :         /*
     532             :          * In order to avoid deadlock, child page must be locked before
     533             :          * parent, so we must release the lock on the parent, lock the child,
     534             :          * and then re-acquire the lock the parent.  (And we wouldn't want to
     535             :          * do I/O, while holding a lock, anyway.)
     536             :          *
     537             :          * At the instant that we're not holding a lock on the parent, the
     538             :          * downlink might get moved by a concurrent insert, so we must
     539             :          * re-check that it still points to the same child page after we have
     540             :          * acquired both locks.  Also, another backend might have inserted a
     541             :          * tuple to the page, so that it is no longer empty.  gistdeletepage()
     542             :          * re-checks all these conditions.
     543             :          */
     544          30 :         LockBuffer(buffer, GIST_UNLOCK);
     545             : 
     546          30 :         deleted = 0;
     547         556 :         for (int i = 0; i < ntodelete; i++)
     548             :         {
     549             :             Buffer      leafbuf;
     550             : 
     551             :             /*
     552             :              * Don't remove the last downlink from the parent.  That would
     553             :              * confuse the insertion code.
     554             :              */
     555         526 :             if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     556           0 :                 break;
     557             : 
     558         526 :             leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     559             :                                          RBM_NORMAL, info->strategy);
     560         526 :             LockBuffer(leafbuf, GIST_EXCLUSIVE);
     561         526 :             gistcheckpage(rel, leafbuf);
     562             : 
     563         526 :             LockBuffer(buffer, GIST_EXCLUSIVE);
     564         526 :             if (gistdeletepage(info, stats,
     565         526 :                                buffer, todelete[i] - deleted,
     566             :                                leafbuf))
     567         526 :                 deleted++;
     568         526 :             LockBuffer(buffer, GIST_UNLOCK);
     569             : 
     570         526 :             UnlockReleaseBuffer(leafbuf);
     571             :         }
     572             : 
     573          30 :         ReleaseBuffer(buffer);
     574             : 
     575             :         /* update stats */
     576          30 :         stats->stats.pages_removed += deleted;
     577             : 
     578             :         /*
     579             :          * We can stop the scan as soon as we have seen the downlinks, even if
     580             :          * we were not able to remove them all.
     581             :          */
     582          30 :         empty_pages_remaining -= ntodelete;
     583             :     }
     584          38 : }
     585             : 
     586             : /*
     587             :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     588             :  * leaf.  Both pages must be locked.
     589             :  *
     590             :  * Even if the page was empty when we first saw it, a concurrent inserter might
     591             :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     592             :  * We re-check all the conditions, to make sure the page is still deletable,
     593             :  * before modifying anything.
     594             :  *
     595             :  * Returns true, if the page was deleted, and false if a concurrent update
     596             :  * prevented it.
     597             :  */
     598             : static bool
     599         526 : gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
     600             :                Buffer parentBuffer, OffsetNumber downlink,
     601             :                Buffer leafBuffer)
     602             : {
     603         526 :     Page        parentPage = BufferGetPage(parentBuffer);
     604         526 :     Page        leafPage = BufferGetPage(leafBuffer);
     605             :     ItemId      iid;
     606             :     IndexTuple  idxtuple;
     607             :     XLogRecPtr  recptr;
     608             :     FullTransactionId txid;
     609             : 
     610             :     /*
     611             :      * Check that the leaf is still empty and deletable.
     612             :      */
     613         526 :     if (!GistPageIsLeaf(leafPage))
     614             :     {
     615             :         /* a leaf page should never become a non-leaf page */
     616             :         Assert(false);
     617           0 :         return false;
     618             :     }
     619             : 
     620         526 :     if (GistFollowRight(leafPage))
     621           0 :         return false;           /* don't mess with a concurrent page split */
     622             : 
     623         526 :     if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     624           0 :         return false;           /* not empty anymore */
     625             : 
     626             :     /*
     627             :      * Ok, the leaf is deletable.  Is the downlink in the parent page still
     628             :      * valid?  It might have been moved by a concurrent insert.  We could try
     629             :      * to re-find it by scanning the page again, possibly moving right if the
     630             :      * was split.  But for now, let's keep it simple and just give up.  The
     631             :      * next VACUUM will pick it up.
     632             :      */
     633        1052 :     if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     634         526 :         GistPageIsLeaf(parentPage))
     635             :     {
     636             :         /* shouldn't happen, internal pages are never deleted */
     637             :         Assert(false);
     638           0 :         return false;
     639             :     }
     640             : 
     641         526 :     if (PageGetMaxOffsetNumber(parentPage) < downlink
     642         526 :         || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     643           0 :         return false;
     644             : 
     645         526 :     iid = PageGetItemId(parentPage, downlink);
     646         526 :     idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     647        1052 :     if (BufferGetBlockNumber(leafBuffer) !=
     648         526 :         ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     649           0 :         return false;
     650             : 
     651             :     /*
     652             :      * All good, proceed with the deletion.
     653             :      *
     654             :      * The page cannot be immediately recycled, because in-progress scans that
     655             :      * saw the downlink might still visit it.  Mark the page with the current
     656             :      * next-XID counter, so that we know when it can be recycled.  Once that
     657             :      * XID becomes older than GlobalXmin, we know that all scans that are
     658             :      * currently in progress must have ended.  (That's much more conservative
     659             :      * than needed, but let's keep it safe and simple.)
     660             :      */
     661         526 :     txid = ReadNextFullTransactionId();
     662             : 
     663         526 :     START_CRIT_SECTION();
     664             : 
     665             :     /* mark the page as deleted */
     666         526 :     MarkBufferDirty(leafBuffer);
     667         526 :     GistPageSetDeleted(leafPage, txid);
     668         526 :     stats->stats.pages_deleted++;
     669             : 
     670             :     /* remove the downlink from the parent */
     671         526 :     MarkBufferDirty(parentBuffer);
     672         526 :     PageIndexTupleDelete(parentPage, downlink);
     673             : 
     674         526 :     if (RelationNeedsWAL(info->index))
     675         526 :         recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     676             :     else
     677           0 :         recptr = gistGetFakeLSN(info->index);
     678         526 :     PageSetLSN(parentPage, recptr);
     679         526 :     PageSetLSN(leafPage, recptr);
     680             : 
     681         526 :     END_CRIT_SECTION();
     682             : 
     683         526 :     return true;
     684             : }

Generated by: LCOV version 1.13