Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistvacuum.c
4 : * vacuuming routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/genam.h"
18 : #include "access/gist_private.h"
19 : #include "access/transam.h"
20 : #include "commands/vacuum.h"
21 : #include "lib/integerset.h"
22 : #include "miscadmin.h"
23 : #include "storage/indexfsm.h"
24 : #include "storage/lmgr.h"
25 : #include "utils/memutils.h"
26 :
27 : /* Working state needed by gistbulkdelete */
28 : typedef struct
29 : {
30 : IndexVacuumInfo *info;
31 : IndexBulkDeleteResult *stats;
32 : IndexBulkDeleteCallback callback;
33 : void *callback_state;
34 : GistNSN startNSN;
35 :
36 : /*
37 : * These are used to memorize all internal and empty leaf pages. They are
38 : * used for deleting all the empty pages.
39 : */
40 : IntegerSet *internal_page_set;
41 : IntegerSet *empty_leaf_set;
42 : MemoryContext page_set_context;
43 : } GistVacState;
44 :
45 : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
46 : IndexBulkDeleteCallback callback, void *callback_state);
47 : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
48 : BlockNumber orig_blkno);
49 : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
50 : GistVacState *vstate);
51 : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
52 : Buffer parentBuffer, OffsetNumber downlink,
53 : Buffer leafBuffer);
54 :
55 : /*
56 : * VACUUM bulkdelete stage: remove index entries.
57 : */
58 : IndexBulkDeleteResult *
59 38 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
60 : IndexBulkDeleteCallback callback, void *callback_state)
61 : {
62 : /* allocate stats if first time through, else re-use existing struct */
63 38 : if (stats == NULL)
64 38 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
65 :
66 38 : gistvacuumscan(info, stats, callback, callback_state);
67 :
68 38 : return stats;
69 : }
70 :
71 : /*
72 : * VACUUM cleanup stage: delete empty pages, and update index statistics.
73 : */
74 : IndexBulkDeleteResult *
75 76 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
76 : {
77 : /* No-op in ANALYZE ONLY mode */
78 76 : if (info->analyze_only)
79 12 : return stats;
80 :
81 : /*
82 : * If gistbulkdelete was called, we need not do anything, just return the
83 : * stats from the latest gistbulkdelete call. If it wasn't called, we
84 : * still need to do a pass over the index, to obtain index statistics.
85 : */
86 64 : if (stats == NULL)
87 : {
88 44 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
89 44 : gistvacuumscan(info, stats, NULL, NULL);
90 : }
91 :
92 : /*
93 : * It's quite possible for us to be fooled by concurrent page splits into
94 : * double-counting some index tuples, so disbelieve any total that exceeds
95 : * the underlying heap's count ... if we know that accurately. Otherwise
96 : * this might just make matters worse.
97 : */
98 64 : if (!info->estimated_count)
99 : {
100 64 : if (stats->num_index_tuples > info->num_heap_tuples)
101 0 : stats->num_index_tuples = info->num_heap_tuples;
102 : }
103 :
104 64 : return stats;
105 : }
106 :
107 : /*
108 : * gistvacuumscan --- scan the index for VACUUMing purposes
109 : *
110 : * This scans the index for leaf tuples that are deletable according to the
111 : * vacuum callback, and updates the stats. Both btbulkdelete and
112 : * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
113 : * occurred).
114 : *
115 : * This also makes note of any empty leaf pages, as well as all internal
116 : * pages while looping over all index pages. After scanning all the pages, we
117 : * remove the empty pages so that they can be reused. Any deleted pages are
118 : * added directly to the free space map. (They should've been added there
119 : * when they were originally deleted, already, but it's possible that the FSM
120 : * was lost at a crash, for example.)
121 : *
122 : * The caller is responsible for initially allocating/zeroing a stats struct.
123 : */
124 : static void
125 82 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
126 : IndexBulkDeleteCallback callback, void *callback_state)
127 : {
128 82 : Relation rel = info->index;
129 : GistVacState vstate;
130 : BlockNumber num_pages;
131 : bool needLock;
132 : BlockNumber blkno;
133 : MemoryContext oldctx;
134 :
135 : /*
136 : * Reset fields that track information about the entire index now. This
137 : * avoids double-counting in the case where a single VACUUM command
138 : * requires multiple scans of the index.
139 : *
140 : * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
141 : * since they track information about the VACUUM command, and so must last
142 : * across each call to gistvacuumscan().
143 : *
144 : * (Note that pages_free is treated as state about the whole index, not
145 : * the current VACUUM. This is appropriate because RecordFreeIndexPage()
146 : * calls are idempotent, and get repeated for the same deleted pages in
147 : * some scenarios. The point for us is to track the number of recyclable
148 : * pages in the index at the end of the VACUUM command.)
149 : */
150 82 : stats->num_pages = 0;
151 82 : stats->estimated_count = false;
152 82 : stats->num_index_tuples = 0;
153 82 : stats->pages_deleted = 0;
154 82 : stats->pages_free = 0;
155 :
156 : /*
157 : * Create the integer sets to remember all the internal and the empty leaf
158 : * pages in page_set_context. Internally, the integer set will remember
159 : * this context so that the subsequent allocations for these integer sets
160 : * will be done from the same context.
161 : *
162 : * XXX the allocation sizes used below pre-date generation context's block
163 : * growing code. These values should likely be benchmarked and set to
164 : * more suitable values.
165 : */
166 82 : vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
167 : "GiST VACUUM page set context",
168 : 16 * 1024,
169 : 16 * 1024,
170 : 16 * 1024);
171 82 : oldctx = MemoryContextSwitchTo(vstate.page_set_context);
172 82 : vstate.internal_page_set = intset_create();
173 82 : vstate.empty_leaf_set = intset_create();
174 82 : MemoryContextSwitchTo(oldctx);
175 :
176 : /* Set up info to pass down to gistvacuumpage */
177 82 : vstate.info = info;
178 82 : vstate.stats = stats;
179 82 : vstate.callback = callback;
180 82 : vstate.callback_state = callback_state;
181 82 : if (RelationNeedsWAL(rel))
182 82 : vstate.startNSN = GetInsertRecPtr();
183 : else
184 0 : vstate.startNSN = gistGetFakeLSN(rel);
185 :
186 : /*
187 : * The outer loop iterates over all index pages, in physical order (we
188 : * hope the kernel will cooperate in providing read-ahead for speed). It
189 : * is critical that we visit all leaf pages, including ones added after we
190 : * start the scan, else we might fail to delete some deletable tuples.
191 : * Hence, we must repeatedly check the relation length. We must acquire
192 : * the relation-extension lock while doing so to avoid a race condition:
193 : * if someone else is extending the relation, there is a window where
194 : * bufmgr/smgr have created a new all-zero page but it hasn't yet been
195 : * write-locked by gistNewBuffer(). If we manage to scan such a page
196 : * here, we'll improperly assume it can be recycled. Taking the lock
197 : * synchronizes things enough to prevent a problem: either num_pages won't
198 : * include the new page, or gistNewBuffer already has write lock on the
199 : * buffer and it will be fully initialized before we can examine it. (See
200 : * also vacuumlazy.c, which has the same issue.) Also, we need not worry
201 : * if a page is added immediately after we look; the page splitting code
202 : * already has write-lock on the left page before it adds a right page, so
203 : * we must already have processed any tuples due to be moved into such a
204 : * page.
205 : *
206 : * We can skip locking for new or temp relations, however, since no one
207 : * else could be accessing them.
208 : */
209 82 : needLock = !RELATION_IS_LOCAL(rel);
210 :
211 82 : blkno = GIST_ROOT_BLKNO;
212 : for (;;)
213 : {
214 : /* Get the current relation length */
215 164 : if (needLock)
216 164 : LockRelationForExtension(rel, ExclusiveLock);
217 164 : num_pages = RelationGetNumberOfBlocks(rel);
218 164 : if (needLock)
219 164 : UnlockRelationForExtension(rel, ExclusiveLock);
220 :
221 : /* Quit if we've scanned the whole relation */
222 164 : if (blkno >= num_pages)
223 82 : break;
224 : /* Iterate over pages, then loop back to recheck length */
225 2092 : for (; blkno < num_pages; blkno++)
226 2010 : gistvacuumpage(&vstate, blkno, blkno);
227 : }
228 :
229 : /*
230 : * If we found any recyclable pages (and recorded them in the FSM), then
231 : * forcibly update the upper-level FSM pages to ensure that searchers can
232 : * find them. It's possible that the pages were also found during
233 : * previous scans and so this is a waste of time, but it's cheap enough
234 : * relative to scanning the index that it shouldn't matter much, and
235 : * making sure that free pages are available sooner not later seems
236 : * worthwhile.
237 : *
238 : * Note that if no recyclable pages exist, we don't bother vacuuming the
239 : * FSM at all.
240 : */
241 82 : if (stats->pages_free > 0)
242 0 : IndexFreeSpaceMapVacuum(rel);
243 :
244 : /* update statistics */
245 82 : stats->num_pages = num_pages;
246 :
247 : /*
248 : * If we saw any empty pages, try to unlink them from the tree so that
249 : * they can be reused.
250 : */
251 82 : gistvacuum_delete_empty_pages(info, &vstate);
252 :
253 : /* we don't need the internal and empty page sets anymore */
254 82 : MemoryContextDelete(vstate.page_set_context);
255 82 : vstate.page_set_context = NULL;
256 82 : vstate.internal_page_set = NULL;
257 82 : vstate.empty_leaf_set = NULL;
258 82 : }
259 :
260 : /*
261 : * gistvacuumpage --- VACUUM one page
262 : *
263 : * This processes a single page for gistbulkdelete(). In some cases we
264 : * must go back and re-examine previously-scanned pages; this routine
265 : * recurses when necessary to handle that case.
266 : *
267 : * blkno is the page to process. orig_blkno is the highest block number
268 : * reached by the outer gistvacuumscan loop (the same as blkno, unless we
269 : * are recursing to re-examine a previous page).
270 : */
271 : static void
272 2010 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
273 : {
274 2010 : IndexVacuumInfo *info = vstate->info;
275 2010 : IndexBulkDeleteCallback callback = vstate->callback;
276 2010 : void *callback_state = vstate->callback_state;
277 2010 : Relation rel = info->index;
278 : Buffer buffer;
279 : Page page;
280 : BlockNumber recurse_to;
281 :
282 2010 : restart:
283 2010 : recurse_to = InvalidBlockNumber;
284 :
285 : /* call vacuum_delay_point while not holding any buffer lock */
286 2010 : vacuum_delay_point();
287 :
288 2010 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
289 : info->strategy);
290 :
291 : /*
292 : * We are not going to stay here for a long time, aggressively grab an
293 : * exclusive lock.
294 : */
295 2010 : LockBuffer(buffer, GIST_EXCLUSIVE);
296 2010 : page = (Page) BufferGetPage(buffer);
297 :
298 2010 : if (gistPageRecyclable(page))
299 : {
300 : /* Okay to recycle this page */
301 0 : RecordFreeIndexPage(rel, blkno);
302 0 : vstate->stats->pages_deleted++;
303 0 : vstate->stats->pages_free++;
304 : }
305 2010 : else if (GistPageIsDeleted(page))
306 : {
307 : /* Already deleted, but can't recycle yet */
308 0 : vstate->stats->pages_deleted++;
309 : }
310 2010 : else if (GistPageIsLeaf(page))
311 : {
312 : OffsetNumber todelete[MaxOffsetNumber];
313 1960 : int ntodelete = 0;
314 : int nremain;
315 1960 : GISTPageOpaque opaque = GistPageGetOpaque(page);
316 1960 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
317 :
318 : /*
319 : * Check whether we need to recurse back to earlier pages. What we
320 : * are concerned about is a page split that happened since we started
321 : * the vacuum scan. If the split moved some tuples to a lower page
322 : * then we might have missed 'em. If so, set up for tail recursion.
323 : *
324 : * This is similar to the checks we do during searches, when following
325 : * a downlink, but we don't need to jump to higher-numbered pages,
326 : * because we will process them later, anyway.
327 : */
328 3920 : if ((GistFollowRight(page) ||
329 1960 : vstate->startNSN < GistPageGetNSN(page)) &&
330 0 : (opaque->rightlink != InvalidBlockNumber) &&
331 0 : (opaque->rightlink < orig_blkno))
332 : {
333 0 : recurse_to = opaque->rightlink;
334 : }
335 :
336 : /*
337 : * Scan over all items to see which ones need to be deleted according
338 : * to the callback function.
339 : */
340 1960 : if (callback)
341 : {
342 : OffsetNumber off;
343 :
344 18086 : for (off = FirstOffsetNumber;
345 : off <= maxoff;
346 17916 : off = OffsetNumberNext(off))
347 : {
348 17916 : ItemId iid = PageGetItemId(page, off);
349 17916 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
350 :
351 17916 : if (callback(&(idxtuple->t_tid), callback_state))
352 7876 : todelete[ntodelete++] = off;
353 : }
354 : }
355 :
356 : /*
357 : * Apply any needed deletes. We issue just one WAL record per page,
358 : * so as to minimize WAL traffic.
359 : */
360 1960 : if (ntodelete > 0)
361 : {
362 152 : START_CRIT_SECTION();
363 :
364 152 : MarkBufferDirty(buffer);
365 :
366 152 : PageIndexMultiDelete(page, todelete, ntodelete);
367 152 : GistMarkTuplesDeleted(page);
368 :
369 152 : if (RelationNeedsWAL(rel))
370 152 : {
371 : XLogRecPtr recptr;
372 :
373 152 : recptr = gistXLogUpdate(buffer,
374 : todelete, ntodelete,
375 : NULL, 0, InvalidBuffer);
376 152 : PageSetLSN(page, recptr);
377 : }
378 : else
379 0 : PageSetLSN(page, gistGetFakeLSN(rel));
380 :
381 152 : END_CRIT_SECTION();
382 :
383 152 : vstate->stats->tuples_removed += ntodelete;
384 : /* must recompute maxoff */
385 152 : maxoff = PageGetMaxOffsetNumber(page);
386 : }
387 :
388 1960 : nremain = maxoff - FirstOffsetNumber + 1;
389 1960 : if (nremain == 0)
390 : {
391 : /*
392 : * The page is now completely empty. Remember its block number,
393 : * so that we will try to delete the page in the second stage.
394 : *
395 : * Skip this when recursing, because IntegerSet requires that the
396 : * values are added in ascending order. The next VACUUM will pick
397 : * it up.
398 : */
399 12 : if (blkno == orig_blkno)
400 12 : intset_add_member(vstate->empty_leaf_set, blkno);
401 : }
402 : else
403 1948 : vstate->stats->num_index_tuples += nremain;
404 : }
405 : else
406 : {
407 : /*
408 : * On an internal page, check for "invalid tuples", left behind by an
409 : * incomplete page split on PostgreSQL 9.0 or below. These are not
410 : * created by newer PostgreSQL versions, but unfortunately, there is
411 : * no version number anywhere in a GiST index, so we don't know
412 : * whether this index might still contain invalid tuples or not.
413 : */
414 50 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
415 : OffsetNumber off;
416 :
417 1978 : for (off = FirstOffsetNumber;
418 : off <= maxoff;
419 1928 : off = OffsetNumberNext(off))
420 : {
421 1928 : ItemId iid = PageGetItemId(page, off);
422 1928 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
423 :
424 1928 : if (GistTupleIsInvalid(idxtuple))
425 0 : ereport(LOG,
426 : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
427 : RelationGetRelationName(rel)),
428 : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
429 : errhint("Please REINDEX it.")));
430 : }
431 :
432 : /*
433 : * Remember the block number of this page, so that we can revisit it
434 : * later in gistvacuum_delete_empty_pages(), when we search for
435 : * parents of empty leaf pages.
436 : */
437 50 : if (blkno == orig_blkno)
438 50 : intset_add_member(vstate->internal_page_set, blkno);
439 : }
440 :
441 2010 : UnlockReleaseBuffer(buffer);
442 :
443 : /*
444 : * This is really tail recursion, but if the compiler is too stupid to
445 : * optimize it as such, we'd eat an uncomfortably large amount of stack
446 : * space per recursion level (due to the deletable[] array). A failure is
447 : * improbable since the number of levels isn't likely to be large ... but
448 : * just in case, let's hand-optimize into a loop.
449 : */
450 2010 : if (recurse_to != InvalidBlockNumber)
451 : {
452 0 : blkno = recurse_to;
453 0 : goto restart;
454 : }
455 2010 : }
456 :
457 : /*
458 : * Scan all internal pages, and try to delete their empty child pages.
459 : */
460 : static void
461 82 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
462 : {
463 82 : Relation rel = info->index;
464 : BlockNumber empty_pages_remaining;
465 : uint64 blkno;
466 :
467 : /*
468 : * Rescan all inner pages to find those that have empty child pages.
469 : */
470 82 : empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
471 82 : intset_begin_iterate(vstate->internal_page_set);
472 94 : while (empty_pages_remaining > 0 &&
473 12 : intset_iterate_next(vstate->internal_page_set, &blkno))
474 : {
475 : Buffer buffer;
476 : Page page;
477 : OffsetNumber off,
478 : maxoff;
479 : OffsetNumber todelete[MaxOffsetNumber];
480 : BlockNumber leafs_to_delete[MaxOffsetNumber];
481 : int ntodelete;
482 : int deleted;
483 :
484 0 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
485 : RBM_NORMAL, info->strategy);
486 :
487 0 : LockBuffer(buffer, GIST_SHARE);
488 0 : page = (Page) BufferGetPage(buffer);
489 :
490 0 : if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
491 : {
492 : /*
493 : * This page was an internal page earlier, but now it's something
494 : * else. Shouldn't happen...
495 : */
496 : Assert(false);
497 0 : UnlockReleaseBuffer(buffer);
498 0 : continue;
499 : }
500 :
501 : /*
502 : * Scan all the downlinks, and see if any of them point to empty leaf
503 : * pages.
504 : */
505 0 : maxoff = PageGetMaxOffsetNumber(page);
506 0 : ntodelete = 0;
507 0 : for (off = FirstOffsetNumber;
508 0 : off <= maxoff && ntodelete < maxoff - 1;
509 0 : off = OffsetNumberNext(off))
510 : {
511 0 : ItemId iid = PageGetItemId(page, off);
512 0 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
513 : BlockNumber leafblk;
514 :
515 0 : leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
516 0 : if (intset_is_member(vstate->empty_leaf_set, leafblk))
517 : {
518 0 : leafs_to_delete[ntodelete] = leafblk;
519 0 : todelete[ntodelete++] = off;
520 : }
521 : }
522 :
523 : /*
524 : * In order to avoid deadlock, child page must be locked before
525 : * parent, so we must release the lock on the parent, lock the child,
526 : * and then re-acquire the lock the parent. (And we wouldn't want to
527 : * do I/O, while holding a lock, anyway.)
528 : *
529 : * At the instant that we're not holding a lock on the parent, the
530 : * downlink might get moved by a concurrent insert, so we must
531 : * re-check that it still points to the same child page after we have
532 : * acquired both locks. Also, another backend might have inserted a
533 : * tuple to the page, so that it is no longer empty. gistdeletepage()
534 : * re-checks all these conditions.
535 : */
536 0 : LockBuffer(buffer, GIST_UNLOCK);
537 :
538 0 : deleted = 0;
539 0 : for (int i = 0; i < ntodelete; i++)
540 : {
541 : Buffer leafbuf;
542 :
543 : /*
544 : * Don't remove the last downlink from the parent. That would
545 : * confuse the insertion code.
546 : */
547 0 : if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
548 0 : break;
549 :
550 0 : leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
551 : RBM_NORMAL, info->strategy);
552 0 : LockBuffer(leafbuf, GIST_EXCLUSIVE);
553 0 : gistcheckpage(rel, leafbuf);
554 :
555 0 : LockBuffer(buffer, GIST_EXCLUSIVE);
556 0 : if (gistdeletepage(info, vstate->stats,
557 0 : buffer, todelete[i] - deleted,
558 : leafbuf))
559 0 : deleted++;
560 0 : LockBuffer(buffer, GIST_UNLOCK);
561 :
562 0 : UnlockReleaseBuffer(leafbuf);
563 : }
564 :
565 0 : ReleaseBuffer(buffer);
566 :
567 : /*
568 : * We can stop the scan as soon as we have seen the downlinks, even if
569 : * we were not able to remove them all.
570 : */
571 0 : empty_pages_remaining -= ntodelete;
572 : }
573 82 : }
574 :
575 : /*
576 : * gistdeletepage takes a leaf page, and its parent, and tries to delete the
577 : * leaf. Both pages must be locked.
578 : *
579 : * Even if the page was empty when we first saw it, a concurrent inserter might
580 : * have added a tuple to it since. Similarly, the downlink might have moved.
581 : * We re-check all the conditions, to make sure the page is still deletable,
582 : * before modifying anything.
583 : *
584 : * Returns true, if the page was deleted, and false if a concurrent update
585 : * prevented it.
586 : */
587 : static bool
588 0 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
589 : Buffer parentBuffer, OffsetNumber downlink,
590 : Buffer leafBuffer)
591 : {
592 0 : Page parentPage = BufferGetPage(parentBuffer);
593 0 : Page leafPage = BufferGetPage(leafBuffer);
594 : ItemId iid;
595 : IndexTuple idxtuple;
596 : XLogRecPtr recptr;
597 : FullTransactionId txid;
598 :
599 : /*
600 : * Check that the leaf is still empty and deletable.
601 : */
602 0 : if (!GistPageIsLeaf(leafPage))
603 : {
604 : /* a leaf page should never become a non-leaf page */
605 : Assert(false);
606 0 : return false;
607 : }
608 :
609 0 : if (GistFollowRight(leafPage))
610 0 : return false; /* don't mess with a concurrent page split */
611 :
612 0 : if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
613 0 : return false; /* not empty anymore */
614 :
615 : /*
616 : * Ok, the leaf is deletable. Is the downlink in the parent page still
617 : * valid? It might have been moved by a concurrent insert. We could try
618 : * to re-find it by scanning the page again, possibly moving right if the
619 : * was split. But for now, let's keep it simple and just give up. The
620 : * next VACUUM will pick it up.
621 : */
622 0 : if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
623 0 : GistPageIsLeaf(parentPage))
624 : {
625 : /* shouldn't happen, internal pages are never deleted */
626 : Assert(false);
627 0 : return false;
628 : }
629 :
630 0 : if (PageGetMaxOffsetNumber(parentPage) < downlink
631 0 : || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
632 0 : return false;
633 :
634 0 : iid = PageGetItemId(parentPage, downlink);
635 0 : idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
636 0 : if (BufferGetBlockNumber(leafBuffer) !=
637 0 : ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
638 0 : return false;
639 :
640 : /*
641 : * All good, proceed with the deletion.
642 : *
643 : * The page cannot be immediately recycled, because in-progress scans that
644 : * saw the downlink might still visit it. Mark the page with the current
645 : * next-XID counter, so that we know when it can be recycled. Once that
646 : * XID becomes older than GlobalXmin, we know that all scans that are
647 : * currently in progress must have ended. (That's much more conservative
648 : * than needed, but let's keep it safe and simple.)
649 : */
650 0 : txid = ReadNextFullTransactionId();
651 :
652 0 : START_CRIT_SECTION();
653 :
654 : /* mark the page as deleted */
655 0 : MarkBufferDirty(leafBuffer);
656 0 : GistPageSetDeleted(leafPage, txid);
657 0 : stats->pages_newly_deleted++;
658 0 : stats->pages_deleted++;
659 :
660 : /* remove the downlink from the parent */
661 0 : MarkBufferDirty(parentBuffer);
662 0 : PageIndexTupleDelete(parentPage, downlink);
663 :
664 0 : if (RelationNeedsWAL(info->index))
665 0 : recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
666 : else
667 0 : recptr = gistGetFakeLSN(info->index);
668 0 : PageSetLSN(parentPage, recptr);
669 0 : PageSetLSN(leafPage, recptr);
670 :
671 0 : END_CRIT_SECTION();
672 :
673 0 : return true;
674 : }
|