Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistvacuum.c
4 : * vacuuming routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/genam.h"
18 : #include "access/gist_private.h"
19 : #include "access/transam.h"
20 : #include "commands/vacuum.h"
21 : #include "lib/integerset.h"
22 : #include "miscadmin.h"
23 : #include "storage/indexfsm.h"
24 : #include "storage/lmgr.h"
25 : #include "utils/memutils.h"
26 :
27 : /* Working state needed by gistbulkdelete */
28 : typedef struct
29 : {
30 : IndexVacuumInfo *info;
31 : IndexBulkDeleteResult *stats;
32 : IndexBulkDeleteCallback callback;
33 : void *callback_state;
34 : GistNSN startNSN;
35 :
36 : /*
37 : * These are used to memorize all internal and empty leaf pages. They are
38 : * used for deleting all the empty pages.
39 : */
40 : IntegerSet *internal_page_set;
41 : IntegerSet *empty_leaf_set;
42 : MemoryContext page_set_context;
43 : } GistVacState;
44 :
45 : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
46 : IndexBulkDeleteCallback callback, void *callback_state);
47 : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
48 : BlockNumber orig_blkno);
49 : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
50 : GistVacState *vstate);
51 : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
52 : Buffer parentBuffer, OffsetNumber downlink,
53 : Buffer leafBuffer);
54 :
55 : /*
56 : * VACUUM bulkdelete stage: remove index entries.
57 : */
58 : IndexBulkDeleteResult *
59 38 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
60 : IndexBulkDeleteCallback callback, void *callback_state)
61 : {
62 : /* allocate stats if first time through, else re-use existing struct */
63 38 : if (stats == NULL)
64 38 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
65 :
66 38 : gistvacuumscan(info, stats, callback, callback_state);
67 :
68 38 : return stats;
69 : }
70 :
71 : /*
72 : * VACUUM cleanup stage: delete empty pages, and update index statistics.
73 : */
74 : IndexBulkDeleteResult *
75 82 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
76 : {
77 : /* No-op in ANALYZE ONLY mode */
78 82 : if (info->analyze_only)
79 16 : return stats;
80 :
81 : /*
82 : * If gistbulkdelete was called, we need not do anything, just return the
83 : * stats from the latest gistbulkdelete call. If it wasn't called, we
84 : * still need to do a pass over the index, to obtain index statistics.
85 : */
86 66 : if (stats == NULL)
87 : {
88 46 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
89 46 : gistvacuumscan(info, stats, NULL, NULL);
90 : }
91 :
92 : /*
93 : * It's quite possible for us to be fooled by concurrent page splits into
94 : * double-counting some index tuples, so disbelieve any total that exceeds
95 : * the underlying heap's count ... if we know that accurately. Otherwise
96 : * this might just make matters worse.
97 : */
98 66 : if (!info->estimated_count)
99 : {
100 66 : if (stats->num_index_tuples > info->num_heap_tuples)
101 0 : stats->num_index_tuples = info->num_heap_tuples;
102 : }
103 :
104 66 : return stats;
105 : }
106 :
107 : /*
108 : * gistvacuumscan --- scan the index for VACUUMing purposes
109 : *
110 : * This scans the index for leaf tuples that are deletable according to the
111 : * vacuum callback, and updates the stats. Both btbulkdelete and
112 : * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
113 : * occurred).
114 : *
115 : * This also makes note of any empty leaf pages, as well as all internal
116 : * pages while looping over all index pages. After scanning all the pages, we
117 : * remove the empty pages so that they can be reused. Any deleted pages are
118 : * added directly to the free space map. (They should've been added there
119 : * when they were originally deleted, already, but it's possible that the FSM
120 : * was lost at a crash, for example.)
121 : *
122 : * The caller is responsible for initially allocating/zeroing a stats struct.
123 : */
124 : static void
125 84 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
126 : IndexBulkDeleteCallback callback, void *callback_state)
127 : {
128 84 : Relation rel = info->index;
129 : GistVacState vstate;
130 : BlockNumber num_pages;
131 : bool needLock;
132 : BlockNumber blkno;
133 : MemoryContext oldctx;
134 :
135 : /*
136 : * Reset fields that track information about the entire index now. This
137 : * avoids double-counting in the case where a single VACUUM command
138 : * requires multiple scans of the index.
139 : *
140 : * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
141 : * since they track information about the VACUUM command, and so must last
142 : * across each call to gistvacuumscan().
143 : *
144 : * (Note that pages_free is treated as state about the whole index, not
145 : * the current VACUUM. This is appropriate because RecordFreeIndexPage()
146 : * calls are idempotent, and get repeated for the same deleted pages in
147 : * some scenarios. The point for us is to track the number of recyclable
148 : * pages in the index at the end of the VACUUM command.)
149 : */
150 84 : stats->num_pages = 0;
151 84 : stats->estimated_count = false;
152 84 : stats->num_index_tuples = 0;
153 84 : stats->pages_deleted = 0;
154 84 : stats->pages_free = 0;
155 :
156 : /*
157 : * Create the integer sets to remember all the internal and the empty leaf
158 : * pages in page_set_context. Internally, the integer set will remember
159 : * this context so that the subsequent allocations for these integer sets
160 : * will be done from the same context.
161 : *
162 : * XXX the allocation sizes used below pre-date generation context's block
163 : * growing code. These values should likely be benchmarked and set to
164 : * more suitable values.
165 : */
166 84 : vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
167 : "GiST VACUUM page set context",
168 : 16 * 1024,
169 : 16 * 1024,
170 : 16 * 1024);
171 84 : oldctx = MemoryContextSwitchTo(vstate.page_set_context);
172 84 : vstate.internal_page_set = intset_create();
173 84 : vstate.empty_leaf_set = intset_create();
174 84 : MemoryContextSwitchTo(oldctx);
175 :
176 : /* Set up info to pass down to gistvacuumpage */
177 84 : vstate.info = info;
178 84 : vstate.stats = stats;
179 84 : vstate.callback = callback;
180 84 : vstate.callback_state = callback_state;
181 84 : if (RelationNeedsWAL(rel))
182 84 : vstate.startNSN = GetInsertRecPtr();
183 : else
184 0 : vstate.startNSN = gistGetFakeLSN(rel);
185 :
186 : /*
187 : * The outer loop iterates over all index pages, in physical order (we
188 : * hope the kernel will cooperate in providing read-ahead for speed). It
189 : * is critical that we visit all leaf pages, including ones added after we
190 : * start the scan, else we might fail to delete some deletable tuples.
191 : * Hence, we must repeatedly check the relation length. We must acquire
192 : * the relation-extension lock while doing so to avoid a race condition:
193 : * if someone else is extending the relation, there is a window where
194 : * bufmgr/smgr have created a new all-zero page but it hasn't yet been
195 : * write-locked by gistNewBuffer(). If we manage to scan such a page
196 : * here, we'll improperly assume it can be recycled. Taking the lock
197 : * synchronizes things enough to prevent a problem: either num_pages won't
198 : * include the new page, or gistNewBuffer already has write lock on the
199 : * buffer and it will be fully initialized before we can examine it. (See
200 : * also vacuumlazy.c, which has the same issue.) Also, we need not worry
201 : * if a page is added immediately after we look; the page splitting code
202 : * already has write-lock on the left page before it adds a right page, so
203 : * we must already have processed any tuples due to be moved into such a
204 : * page.
205 : *
206 : * We can skip locking for new or temp relations, however, since no one
207 : * else could be accessing them.
208 : */
209 84 : needLock = !RELATION_IS_LOCAL(rel);
210 :
211 84 : blkno = GIST_ROOT_BLKNO;
212 : for (;;)
213 : {
214 : /* Get the current relation length */
215 168 : if (needLock)
216 168 : LockRelationForExtension(rel, ExclusiveLock);
217 168 : num_pages = RelationGetNumberOfBlocks(rel);
218 168 : if (needLock)
219 168 : UnlockRelationForExtension(rel, ExclusiveLock);
220 :
221 : /* Quit if we've scanned the whole relation */
222 168 : if (blkno >= num_pages)
223 84 : break;
224 : /* Iterate over pages, then loop back to recheck length */
225 2446 : for (; blkno < num_pages; blkno++)
226 2362 : gistvacuumpage(&vstate, blkno, blkno);
227 : }
228 :
229 : /*
230 : * If we found any recyclable pages (and recorded them in the FSM), then
231 : * forcibly update the upper-level FSM pages to ensure that searchers can
232 : * find them. It's possible that the pages were also found during
233 : * previous scans and so this is a waste of time, but it's cheap enough
234 : * relative to scanning the index that it shouldn't matter much, and
235 : * making sure that free pages are available sooner not later seems
236 : * worthwhile.
237 : *
238 : * Note that if no recyclable pages exist, we don't bother vacuuming the
239 : * FSM at all.
240 : */
241 84 : if (stats->pages_free > 0)
242 0 : IndexFreeSpaceMapVacuum(rel);
243 :
244 : /* update statistics */
245 84 : stats->num_pages = num_pages;
246 :
247 : /*
248 : * If we saw any empty pages, try to unlink them from the tree so that
249 : * they can be reused.
250 : */
251 84 : gistvacuum_delete_empty_pages(info, &vstate);
252 :
253 : /* we don't need the internal and empty page sets anymore */
254 84 : MemoryContextDelete(vstate.page_set_context);
255 84 : vstate.page_set_context = NULL;
256 84 : vstate.internal_page_set = NULL;
257 84 : vstate.empty_leaf_set = NULL;
258 84 : }
259 :
260 : /*
261 : * gistvacuumpage --- VACUUM one page
262 : *
263 : * This processes a single page for gistbulkdelete(). In some cases we
264 : * must go back and re-examine previously-scanned pages; this routine
265 : * recurses when necessary to handle that case.
266 : *
267 : * blkno is the page to process. orig_blkno is the highest block number
268 : * reached by the outer gistvacuumscan loop (the same as blkno, unless we
269 : * are recursing to re-examine a previous page).
270 : */
271 : static void
272 2362 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
273 : {
274 2362 : IndexVacuumInfo *info = vstate->info;
275 2362 : IndexBulkDeleteCallback callback = vstate->callback;
276 2362 : void *callback_state = vstate->callback_state;
277 2362 : Relation rel = info->index;
278 : Buffer buffer;
279 : Page page;
280 : BlockNumber recurse_to;
281 :
282 2362 : restart:
283 2362 : recurse_to = InvalidBlockNumber;
284 :
285 : /* call vacuum_delay_point while not holding any buffer lock */
286 2362 : vacuum_delay_point();
287 :
288 2362 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
289 : info->strategy);
290 :
291 : /*
292 : * We are not going to stay here for a long time, aggressively grab an
293 : * exclusive lock.
294 : */
295 2362 : LockBuffer(buffer, GIST_EXCLUSIVE);
296 2362 : page = (Page) BufferGetPage(buffer);
297 :
298 2362 : if (gistPageRecyclable(page))
299 : {
300 : /* Okay to recycle this page */
301 0 : RecordFreeIndexPage(rel, blkno);
302 0 : vstate->stats->pages_deleted++;
303 0 : vstate->stats->pages_free++;
304 : }
305 2362 : else if (GistPageIsDeleted(page))
306 : {
307 : /* Already deleted, but can't recycle yet */
308 0 : vstate->stats->pages_deleted++;
309 : }
310 2362 : else if (GistPageIsLeaf(page))
311 : {
312 : OffsetNumber todelete[MaxOffsetNumber];
313 2310 : int ntodelete = 0;
314 : int nremain;
315 2310 : GISTPageOpaque opaque = GistPageGetOpaque(page);
316 2310 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
317 :
318 : /*
319 : * Check whether we need to recurse back to earlier pages. What we
320 : * are concerned about is a page split that happened since we started
321 : * the vacuum scan. If the split moved some tuples to a lower page
322 : * then we might have missed 'em. If so, set up for tail recursion.
323 : *
324 : * This is similar to the checks we do during searches, when following
325 : * a downlink, but we don't need to jump to higher-numbered pages,
326 : * because we will process them later, anyway.
327 : */
328 4620 : if ((GistFollowRight(page) ||
329 2310 : vstate->startNSN < GistPageGetNSN(page)) &&
330 0 : (opaque->rightlink != InvalidBlockNumber) &&
331 0 : (opaque->rightlink < orig_blkno))
332 : {
333 0 : recurse_to = opaque->rightlink;
334 : }
335 :
336 : /*
337 : * Scan over all items to see which ones need to be deleted according
338 : * to the callback function.
339 : */
340 2310 : if (callback)
341 : {
342 : OffsetNumber off;
343 :
344 58428 : for (off = FirstOffsetNumber;
345 : off <= maxoff;
346 57940 : off = OffsetNumberNext(off))
347 : {
348 57940 : ItemId iid = PageGetItemId(page, off);
349 57940 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
350 :
351 57940 : if (callback(&(idxtuple->t_tid), callback_state))
352 44900 : todelete[ntodelete++] = off;
353 : }
354 : }
355 :
356 : /*
357 : * Apply any needed deletes. We issue just one WAL record per page,
358 : * so as to minimize WAL traffic.
359 : */
360 2310 : if (ntodelete > 0)
361 : {
362 470 : START_CRIT_SECTION();
363 :
364 470 : MarkBufferDirty(buffer);
365 :
366 470 : PageIndexMultiDelete(page, todelete, ntodelete);
367 470 : GistMarkTuplesDeleted(page);
368 :
369 470 : if (RelationNeedsWAL(rel))
370 470 : {
371 : XLogRecPtr recptr;
372 :
373 470 : recptr = gistXLogUpdate(buffer,
374 : todelete, ntodelete,
375 : NULL, 0, InvalidBuffer);
376 470 : PageSetLSN(page, recptr);
377 : }
378 : else
379 0 : PageSetLSN(page, gistGetFakeLSN(rel));
380 :
381 470 : END_CRIT_SECTION();
382 :
383 470 : vstate->stats->tuples_removed += ntodelete;
384 : /* must recompute maxoff */
385 470 : maxoff = PageGetMaxOffsetNumber(page);
386 : }
387 :
388 2310 : nremain = maxoff - FirstOffsetNumber + 1;
389 2310 : if (nremain == 0)
390 : {
391 : /*
392 : * The page is now completely empty. Remember its block number,
393 : * so that we will try to delete the page in the second stage.
394 : *
395 : * Skip this when recursing, because IntegerSet requires that the
396 : * values are added in ascending order. The next VACUUM will pick
397 : * it up.
398 : */
399 174 : if (blkno == orig_blkno)
400 174 : intset_add_member(vstate->empty_leaf_set, blkno);
401 : }
402 : else
403 2136 : vstate->stats->num_index_tuples += nremain;
404 : }
405 : else
406 : {
407 : /*
408 : * On an internal page, check for "invalid tuples", left behind by an
409 : * incomplete page split on PostgreSQL 9.0 or below. These are not
410 : * created by newer PostgreSQL versions, but unfortunately, there is
411 : * no version number anywhere in a GiST index, so we don't know
412 : * whether this index might still contain invalid tuples or not.
413 : */
414 52 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
415 : OffsetNumber off;
416 :
417 2330 : for (off = FirstOffsetNumber;
418 : off <= maxoff;
419 2278 : off = OffsetNumberNext(off))
420 : {
421 2278 : ItemId iid = PageGetItemId(page, off);
422 2278 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
423 :
424 2278 : if (GistTupleIsInvalid(idxtuple))
425 0 : ereport(LOG,
426 : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
427 : RelationGetRelationName(rel)),
428 : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
429 : errhint("Please REINDEX it.")));
430 : }
431 :
432 : /*
433 : * Remember the block number of this page, so that we can revisit it
434 : * later in gistvacuum_delete_empty_pages(), when we search for
435 : * parents of empty leaf pages.
436 : */
437 52 : if (blkno == orig_blkno)
438 52 : intset_add_member(vstate->internal_page_set, blkno);
439 : }
440 :
441 2362 : UnlockReleaseBuffer(buffer);
442 :
443 : /*
444 : * This is really tail recursion, but if the compiler is too stupid to
445 : * optimize it as such, we'd eat an uncomfortably large amount of stack
446 : * space per recursion level (due to the deletable[] array). A failure is
447 : * improbable since the number of levels isn't likely to be large ... but
448 : * just in case, let's hand-optimize into a loop.
449 : */
450 2362 : if (recurse_to != InvalidBlockNumber)
451 : {
452 0 : blkno = recurse_to;
453 0 : goto restart;
454 : }
455 2362 : }
456 :
457 : /*
458 : * Scan all internal pages, and try to delete their empty child pages.
459 : */
460 : static void
461 84 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
462 : {
463 84 : Relation rel = info->index;
464 : BlockNumber empty_pages_remaining;
465 : uint64 blkno;
466 :
467 : /*
468 : * Rescan all inner pages to find those that have empty child pages.
469 : */
470 84 : empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
471 84 : intset_begin_iterate(vstate->internal_page_set);
472 100 : while (empty_pages_remaining > 0 &&
473 14 : intset_iterate_next(vstate->internal_page_set, &blkno))
474 : {
475 : Buffer buffer;
476 : Page page;
477 : OffsetNumber off,
478 : maxoff;
479 : OffsetNumber todelete[MaxOffsetNumber];
480 : BlockNumber leafs_to_delete[MaxOffsetNumber];
481 : int ntodelete;
482 : int deleted;
483 :
484 2 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
485 : RBM_NORMAL, info->strategy);
486 :
487 2 : LockBuffer(buffer, GIST_SHARE);
488 2 : page = (Page) BufferGetPage(buffer);
489 :
490 2 : if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
491 : {
492 : /*
493 : * This page was an internal page earlier, but now it's something
494 : * else. Shouldn't happen...
495 : */
496 : Assert(false);
497 0 : UnlockReleaseBuffer(buffer);
498 0 : continue;
499 : }
500 :
501 : /*
502 : * Scan all the downlinks, and see if any of them point to empty leaf
503 : * pages.
504 : */
505 2 : maxoff = PageGetMaxOffsetNumber(page);
506 2 : ntodelete = 0;
507 328 : for (off = FirstOffsetNumber;
508 326 : off <= maxoff && ntodelete < maxoff - 1;
509 326 : off = OffsetNumberNext(off))
510 : {
511 326 : ItemId iid = PageGetItemId(page, off);
512 326 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
513 : BlockNumber leafblk;
514 :
515 326 : leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
516 326 : if (intset_is_member(vstate->empty_leaf_set, leafblk))
517 : {
518 162 : leafs_to_delete[ntodelete] = leafblk;
519 162 : todelete[ntodelete++] = off;
520 : }
521 : }
522 :
523 : /*
524 : * In order to avoid deadlock, child page must be locked before
525 : * parent, so we must release the lock on the parent, lock the child,
526 : * and then re-acquire the lock the parent. (And we wouldn't want to
527 : * do I/O, while holding a lock, anyway.)
528 : *
529 : * At the instant that we're not holding a lock on the parent, the
530 : * downlink might get moved by a concurrent insert, so we must
531 : * re-check that it still points to the same child page after we have
532 : * acquired both locks. Also, another backend might have inserted a
533 : * tuple to the page, so that it is no longer empty. gistdeletepage()
534 : * re-checks all these conditions.
535 : */
536 2 : LockBuffer(buffer, GIST_UNLOCK);
537 :
538 2 : deleted = 0;
539 164 : for (int i = 0; i < ntodelete; i++)
540 : {
541 : Buffer leafbuf;
542 :
543 : /*
544 : * Don't remove the last downlink from the parent. That would
545 : * confuse the insertion code.
546 : */
547 162 : if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
548 0 : break;
549 :
550 162 : leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
551 : RBM_NORMAL, info->strategy);
552 162 : LockBuffer(leafbuf, GIST_EXCLUSIVE);
553 162 : gistcheckpage(rel, leafbuf);
554 :
555 162 : LockBuffer(buffer, GIST_EXCLUSIVE);
556 162 : if (gistdeletepage(info, vstate->stats,
557 162 : buffer, todelete[i] - deleted,
558 : leafbuf))
559 162 : deleted++;
560 162 : LockBuffer(buffer, GIST_UNLOCK);
561 :
562 162 : UnlockReleaseBuffer(leafbuf);
563 : }
564 :
565 2 : ReleaseBuffer(buffer);
566 :
567 : /*
568 : * We can stop the scan as soon as we have seen the downlinks, even if
569 : * we were not able to remove them all.
570 : */
571 2 : empty_pages_remaining -= ntodelete;
572 : }
573 84 : }
574 :
575 : /*
576 : * gistdeletepage takes a leaf page, and its parent, and tries to delete the
577 : * leaf. Both pages must be locked.
578 : *
579 : * Even if the page was empty when we first saw it, a concurrent inserter might
580 : * have added a tuple to it since. Similarly, the downlink might have moved.
581 : * We re-check all the conditions, to make sure the page is still deletable,
582 : * before modifying anything.
583 : *
584 : * Returns true, if the page was deleted, and false if a concurrent update
585 : * prevented it.
586 : */
587 : static bool
588 162 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
589 : Buffer parentBuffer, OffsetNumber downlink,
590 : Buffer leafBuffer)
591 : {
592 162 : Page parentPage = BufferGetPage(parentBuffer);
593 162 : Page leafPage = BufferGetPage(leafBuffer);
594 : ItemId iid;
595 : IndexTuple idxtuple;
596 : XLogRecPtr recptr;
597 : FullTransactionId txid;
598 :
599 : /*
600 : * Check that the leaf is still empty and deletable.
601 : */
602 162 : if (!GistPageIsLeaf(leafPage))
603 : {
604 : /* a leaf page should never become a non-leaf page */
605 : Assert(false);
606 0 : return false;
607 : }
608 :
609 162 : if (GistFollowRight(leafPage))
610 0 : return false; /* don't mess with a concurrent page split */
611 :
612 162 : if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
613 0 : return false; /* not empty anymore */
614 :
615 : /*
616 : * Ok, the leaf is deletable. Is the downlink in the parent page still
617 : * valid? It might have been moved by a concurrent insert. We could try
618 : * to re-find it by scanning the page again, possibly moving right if the
619 : * was split. But for now, let's keep it simple and just give up. The
620 : * next VACUUM will pick it up.
621 : */
622 162 : if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
623 162 : GistPageIsLeaf(parentPage))
624 : {
625 : /* shouldn't happen, internal pages are never deleted */
626 : Assert(false);
627 0 : return false;
628 : }
629 :
630 162 : if (PageGetMaxOffsetNumber(parentPage) < downlink
631 162 : || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
632 0 : return false;
633 :
634 162 : iid = PageGetItemId(parentPage, downlink);
635 162 : idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
636 324 : if (BufferGetBlockNumber(leafBuffer) !=
637 162 : ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
638 0 : return false;
639 :
640 : /*
641 : * All good, proceed with the deletion.
642 : *
643 : * The page cannot be immediately recycled, because in-progress scans that
644 : * saw the downlink might still visit it. Mark the page with the current
645 : * next-XID counter, so that we know when it can be recycled. Once that
646 : * XID becomes older than GlobalXmin, we know that all scans that are
647 : * currently in progress must have ended. (That's much more conservative
648 : * than needed, but let's keep it safe and simple.)
649 : */
650 162 : txid = ReadNextFullTransactionId();
651 :
652 162 : START_CRIT_SECTION();
653 :
654 : /* mark the page as deleted */
655 162 : MarkBufferDirty(leafBuffer);
656 162 : GistPageSetDeleted(leafPage, txid);
657 162 : stats->pages_newly_deleted++;
658 162 : stats->pages_deleted++;
659 :
660 : /* remove the downlink from the parent */
661 162 : MarkBufferDirty(parentBuffer);
662 162 : PageIndexTupleDelete(parentPage, downlink);
663 :
664 162 : if (RelationNeedsWAL(info->index))
665 162 : recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
666 : else
667 0 : recptr = gistGetFakeLSN(info->index);
668 162 : PageSetLSN(parentPage, recptr);
669 162 : PageSetLSN(leafPage, recptr);
670 :
671 162 : END_CRIT_SECTION();
672 :
673 162 : return true;
674 : }
|