Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginvacuum.c
4 : * delete & vacuum routines for the postgres GIN
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginvacuum.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "commands/vacuum.h"
21 : #include "miscadmin.h"
22 : #include "storage/indexfsm.h"
23 : #include "storage/lmgr.h"
24 : #include "storage/predicate.h"
25 : #include "storage/read_stream.h"
26 : #include "utils/memutils.h"
27 :
28 : struct GinVacuumState
29 : {
30 : Relation index;
31 : IndexBulkDeleteResult *result;
32 : IndexBulkDeleteCallback callback;
33 : void *callback_state;
34 : GinState ginstate;
35 : BufferAccessStrategy strategy;
36 : MemoryContext tmpCxt;
37 : };
38 :
39 : /*
40 : * Vacuums an uncompressed posting list. The size of the must can be specified
41 : * in number of items (nitems).
42 : *
43 : * If none of the items need to be removed, returns NULL. Otherwise returns
44 : * a new palloc'd array with the remaining items. The number of remaining
45 : * items is returned in *nremaining.
46 : */
47 : ItemPointer
48 167784 : ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
49 : int nitem, int *nremaining)
50 : {
51 : int i,
52 167784 : remaining = 0;
53 167784 : ItemPointer tmpitems = NULL;
54 :
55 : /*
56 : * Iterate over TIDs array
57 : */
58 733083 : for (i = 0; i < nitem; i++)
59 : {
60 565299 : if (gvs->callback(items + i, gvs->callback_state))
61 : {
62 497612 : gvs->result->tuples_removed += 1;
63 497612 : if (!tmpitems)
64 : {
65 : /*
66 : * First TID to be deleted: allocate memory to hold the
67 : * remaining items.
68 : */
69 160606 : tmpitems = palloc_array(ItemPointerData, nitem);
70 160606 : memcpy(tmpitems, items, sizeof(ItemPointerData) * i);
71 : }
72 : }
73 : else
74 : {
75 67687 : gvs->result->num_index_tuples += 1;
76 67687 : if (tmpitems)
77 4266 : tmpitems[remaining] = items[i];
78 67687 : remaining++;
79 : }
80 : }
81 :
82 167784 : *nremaining = remaining;
83 167784 : return tmpitems;
84 : }
85 :
86 : /*
87 : * Create a WAL record for vacuuming entry tree leaf page.
88 : */
89 : static void
90 1150 : xlogVacuumPage(Relation index, Buffer buffer)
91 : {
92 1150 : Page page = BufferGetPage(buffer);
93 : XLogRecPtr recptr;
94 :
95 : /* This is only used for entry tree leaf pages. */
96 : Assert(!GinPageIsData(page));
97 : Assert(GinPageIsLeaf(page));
98 :
99 1150 : if (!RelationNeedsWAL(index))
100 1148 : return;
101 :
102 : /*
103 : * Always create a full image, we don't track the changes on the page at
104 : * any more fine-grained level. This could obviously be improved...
105 : */
106 2 : XLogBeginInsert();
107 2 : XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
108 :
109 2 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
110 2 : PageSetLSN(page, recptr);
111 : }
112 :
113 :
114 : /*
115 : * Stack entry used during posting tree empty-page deletion scan.
116 : *
117 : * One DataPageDeleteStack entry is allocated per tree level. As
118 : * ginScanPostingTreeToDelete() recurses down the tree, each entry tracks
119 : * the buffer of the page currently being visited at that level ('buffer'),
120 : * and the buffer of its left sibling ('leftBuffer'). The left page is kept
121 : * pinned and exclusively locked because ginDeletePostingPage() needs it to
122 : * update the sibling chain; acquiring it later could deadlock with
123 : * ginStepRight(), which locks pages left-to-right.
124 : */
125 : typedef struct DataPageDeleteStack
126 : {
127 : struct DataPageDeleteStack *child;
128 : struct DataPageDeleteStack *parent;
129 :
130 : Buffer buffer; /* buffer for the page being visited at this
131 : * tree level */
132 : Buffer leftBuffer; /* pinned and locked rightmost non-deleted
133 : * sibling to the left of the current page */
134 : OffsetNumber myoff; /* offset of this page's downlink in the
135 : * parent */
136 : bool isRoot;
137 : } DataPageDeleteStack;
138 :
139 :
140 : /*
141 : * Delete a posting tree page.
142 : *
143 : * Removes the page identified by dBuffer from the posting tree by updating
144 : * the left sibling's rightlink (in lBuffer) to skip over the deleted page,
145 : * and removing the downlink from the parent page (in pBuffer). All three
146 : * buffers must already have been pinned and exclusively locked by the caller.
147 : *
148 : * The buffers are NOT released nor unlocked here; the caller is responsible
149 : * for this.
150 : */
151 : static void
152 8 : ginDeletePostingPage(GinVacuumState *gvs, Buffer dBuffer, Buffer lBuffer,
153 : Buffer pBuffer, OffsetNumber myoff, bool isParentRoot)
154 : {
155 : Page page,
156 : parentPage;
157 : BlockNumber rightlink;
158 8 : BlockNumber deleteBlkno = BufferGetBlockNumber(dBuffer);
159 :
160 : /*
161 : * This function MUST be called only if someone of parent pages hold
162 : * exclusive cleanup lock. This guarantees that no insertions currently
163 : * happen in this subtree. Caller also acquires Exclusive locks on
164 : * deletable, parent and left pages.
165 : */
166 :
167 8 : page = BufferGetPage(dBuffer);
168 8 : rightlink = GinPageGetOpaque(page)->rightlink;
169 :
170 : /*
171 : * Any insert which would have gone on the leaf block will now go to its
172 : * right sibling.
173 : */
174 8 : PredicateLockPageCombine(gvs->index, deleteBlkno, rightlink);
175 :
176 8 : START_CRIT_SECTION();
177 :
178 : /* Unlink the page by changing left sibling's rightlink */
179 8 : page = BufferGetPage(lBuffer);
180 8 : GinPageGetOpaque(page)->rightlink = rightlink;
181 :
182 : /* Delete downlink from parent */
183 8 : parentPage = BufferGetPage(pBuffer);
184 : #ifdef USE_ASSERT_CHECKING
185 : do
186 : {
187 : PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff);
188 :
189 : Assert(PostingItemGetBlockNumber(tod) == deleteBlkno);
190 : } while (0);
191 : #endif
192 8 : GinPageDeletePostingItem(parentPage, myoff);
193 :
194 8 : page = BufferGetPage(dBuffer);
195 :
196 : /*
197 : * we shouldn't change rightlink field to save workability of running
198 : * search scan
199 : */
200 :
201 : /*
202 : * Mark page as deleted, and remember last xid which could know its
203 : * address.
204 : */
205 8 : GinPageSetDeleted(page);
206 8 : GinPageSetDeleteXid(page, ReadNextTransactionId());
207 :
208 8 : MarkBufferDirty(pBuffer);
209 8 : MarkBufferDirty(lBuffer);
210 8 : MarkBufferDirty(dBuffer);
211 :
212 8 : if (RelationNeedsWAL(gvs->index))
213 : {
214 : XLogRecPtr recptr;
215 : ginxlogDeletePage data;
216 :
217 : /*
218 : * We can't pass REGBUF_STANDARD for the deleted page, because we
219 : * didn't set pd_lower on pre-9.4 versions. The page might've been
220 : * binary-upgraded from an older version, and hence not have pd_lower
221 : * set correctly. Ditto for the left page, but removing the item from
222 : * the parent updated its pd_lower, so we know that's OK at this
223 : * point.
224 : */
225 0 : XLogBeginInsert();
226 0 : XLogRegisterBuffer(0, dBuffer, 0);
227 0 : XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
228 0 : XLogRegisterBuffer(2, lBuffer, 0);
229 :
230 0 : data.parentOffset = myoff;
231 0 : data.rightLink = GinPageGetOpaque(page)->rightlink;
232 0 : data.deleteXid = GinPageGetDeleteXid(page);
233 :
234 0 : XLogRegisterData(&data, sizeof(ginxlogDeletePage));
235 :
236 0 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
237 0 : PageSetLSN(page, recptr);
238 0 : PageSetLSN(parentPage, recptr);
239 0 : PageSetLSN(BufferGetPage(lBuffer), recptr);
240 : }
241 :
242 8 : END_CRIT_SECTION();
243 :
244 8 : gvs->result->pages_newly_deleted++;
245 8 : gvs->result->pages_deleted++;
246 8 : }
247 :
248 :
249 : /*
250 : * Scans a posting tree and deletes empty pages.
251 : *
252 : * The caller must hold a cleanup lock on the root page to prevent concurrent
253 : * inserts. The entire path from the root down to the current page is kept
254 : * exclusively locked throughout the scan. The left sibling at each level is
255 : * also kept locked, because ginDeletePostingPage() needs it to update the
256 : * rightlink of the left sibling; re-acquiring the left sibling lock later
257 : * could deadlock with ginStepRight(), which acquires page locks
258 : * left-to-right.
259 : *
260 : * All per-level state is carried in 'myStackItem': the buffer to process
261 : * (must already be pinned and exclusively locked), the left sibling buffer,
262 : * and this page's offset in the parent's downlink array. The root entry is
263 : * set up by ginVacuumPostingTree(); child entries are populated here before
264 : * recursing.
265 : *
266 : * Returns true if the page was deleted, false otherwise.
267 : */
268 : static bool
269 32 : ginScanPostingTreeToDelete(GinVacuumState *gvs, DataPageDeleteStack *myStackItem)
270 : {
271 32 : Buffer buffer = myStackItem->buffer;
272 : Page page;
273 32 : bool pageWasDeleted = false;
274 : bool isempty;
275 :
276 32 : page = BufferGetPage(buffer);
277 :
278 : Assert(GinPageIsData(page));
279 :
280 32 : if (!GinPageIsLeaf(page))
281 : {
282 : OffsetNumber i;
283 :
284 32 : for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff;)
285 : {
286 24 : PostingItem *pitem = GinDataPageGetPostingItem(page, i);
287 : Buffer childBuffer;
288 :
289 24 : childBuffer = ReadBufferExtended(gvs->index,
290 : MAIN_FORKNUM,
291 24 : PostingItemGetBlockNumber(pitem),
292 : RBM_NORMAL, gvs->strategy);
293 24 : LockBuffer(childBuffer, GIN_EXCLUSIVE);
294 :
295 : /* Allocate a child stack entry on first use; reuse thereafter */
296 24 : if (!myStackItem->child)
297 : {
298 8 : myStackItem->child = palloc0_object(DataPageDeleteStack);
299 8 : myStackItem->child->parent = myStackItem;
300 8 : myStackItem->child->leftBuffer = InvalidBuffer;
301 : }
302 :
303 24 : myStackItem->child->buffer = childBuffer;
304 24 : myStackItem->child->isRoot = false;
305 24 : myStackItem->child->myoff = i;
306 :
307 : /*
308 : * Recurse into child. If the child page was deleted, its
309 : * downlink was removed from our page, so re-examine the same
310 : * offset; otherwise advance to the next downlink.
311 : */
312 24 : if (!ginScanPostingTreeToDelete(gvs, myStackItem->child))
313 16 : i++;
314 : }
315 8 : myStackItem->buffer = InvalidBuffer;
316 :
317 : /*
318 : * After processing all children at this level, release the child
319 : * level's leftBuffer if we're at the rightmost page. There is no
320 : * right sibling that could need it for deletion.
321 : */
322 8 : if (GinPageRightMost(page) && BufferIsValid(myStackItem->child->leftBuffer))
323 : {
324 8 : UnlockReleaseBuffer(myStackItem->child->leftBuffer);
325 8 : myStackItem->child->leftBuffer = InvalidBuffer;
326 : }
327 : }
328 :
329 32 : if (GinPageIsLeaf(page))
330 24 : isempty = GinDataLeafPageIsEmpty(page);
331 : else
332 8 : isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber;
333 :
334 32 : if (isempty)
335 : {
336 : /*
337 : * Proceed to the ginDeletePostingPage() if that's not the leftmost or
338 : * the rightmost page.
339 : */
340 20 : if (BufferIsValid(myStackItem->leftBuffer) && !GinPageRightMost(page))
341 : {
342 : Assert(!myStackItem->isRoot);
343 8 : ginDeletePostingPage(gvs, buffer, myStackItem->leftBuffer,
344 8 : myStackItem->parent->buffer,
345 8 : myStackItem->myoff,
346 8 : myStackItem->parent->isRoot);
347 8 : pageWasDeleted = true;
348 : }
349 : }
350 :
351 32 : if (!pageWasDeleted)
352 : {
353 : /*
354 : * Keep this page as the new leftBuffer for this level: the next
355 : * sibling to the right might need it for deletion. Release any
356 : * previously held left page first.
357 : */
358 24 : if (BufferIsValid(myStackItem->leftBuffer))
359 8 : UnlockReleaseBuffer(myStackItem->leftBuffer);
360 24 : myStackItem->leftBuffer = buffer;
361 : }
362 : else
363 : {
364 : /*
365 : * Page was deleted; release the buffer. leftBuffer remains the same.
366 : */
367 8 : UnlockReleaseBuffer(buffer);
368 : }
369 :
370 32 : return pageWasDeleted;
371 : }
372 :
373 :
374 : /*
375 : * Scan through posting tree leafs, delete empty tuples. Returns true if there
376 : * is at least one empty page.
377 : */
378 : static bool
379 17 : ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno)
380 : {
381 : Buffer buffer;
382 : Page page;
383 17 : bool hasVoidPage = false;
384 : MemoryContext oldCxt;
385 :
386 : /* Find leftmost leaf page of posting tree and lock it in exclusive mode */
387 : while (true)
388 8 : {
389 : PostingItem *pitem;
390 :
391 25 : buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
392 : RBM_NORMAL, gvs->strategy);
393 25 : LockBuffer(buffer, GIN_SHARE);
394 25 : page = BufferGetPage(buffer);
395 :
396 : Assert(GinPageIsData(page));
397 :
398 25 : if (GinPageIsLeaf(page))
399 : {
400 17 : LockBuffer(buffer, GIN_UNLOCK);
401 17 : LockBuffer(buffer, GIN_EXCLUSIVE);
402 17 : break;
403 : }
404 :
405 : Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
406 :
407 8 : pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
408 8 : blkno = PostingItemGetBlockNumber(pitem);
409 : Assert(blkno != InvalidBlockNumber);
410 :
411 8 : UnlockReleaseBuffer(buffer);
412 : }
413 :
414 : /* Iterate all posting tree leaves using rightlinks and vacuum them */
415 : while (true)
416 : {
417 33 : oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
418 33 : ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
419 33 : MemoryContextSwitchTo(oldCxt);
420 33 : MemoryContextReset(gvs->tmpCxt);
421 :
422 33 : if (GinDataLeafPageIsEmpty(page))
423 20 : hasVoidPage = true;
424 :
425 33 : blkno = GinPageGetOpaque(page)->rightlink;
426 :
427 33 : UnlockReleaseBuffer(buffer);
428 :
429 33 : if (blkno == InvalidBlockNumber)
430 17 : break;
431 :
432 16 : buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
433 : RBM_NORMAL, gvs->strategy);
434 16 : LockBuffer(buffer, GIN_EXCLUSIVE);
435 16 : page = BufferGetPage(buffer);
436 : }
437 :
438 17 : return hasVoidPage;
439 : }
440 :
441 : static void
442 17 : ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
443 : {
444 17 : if (ginVacuumPostingTreeLeaves(gvs, rootBlkno))
445 : {
446 : /*
447 : * There is at least one empty page. So we have to rescan the tree
448 : * deleting empty pages.
449 : */
450 : Buffer buffer;
451 : DataPageDeleteStack root,
452 : *ptr,
453 : *tmp;
454 : bool deleted PG_USED_FOR_ASSERTS_ONLY;
455 :
456 8 : buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, rootBlkno,
457 : RBM_NORMAL, gvs->strategy);
458 :
459 : /*
460 : * Lock posting tree root for cleanup to ensure there are no
461 : * concurrent inserts.
462 : */
463 8 : LockBufferForCleanup(buffer);
464 :
465 8 : memset(&root, 0, sizeof(DataPageDeleteStack));
466 8 : root.buffer = buffer;
467 8 : root.leftBuffer = InvalidBuffer;
468 8 : root.myoff = InvalidOffsetNumber;
469 8 : root.isRoot = true;
470 :
471 8 : deleted = ginScanPostingTreeToDelete(gvs, &root);
472 : Assert(!deleted);
473 :
474 8 : ptr = root.child;
475 :
476 16 : while (ptr)
477 : {
478 8 : tmp = ptr->child;
479 8 : pfree(ptr);
480 8 : ptr = tmp;
481 : }
482 :
483 8 : UnlockReleaseBuffer(buffer);
484 : }
485 17 : }
486 :
487 : /*
488 : * returns modified page or NULL if page isn't modified.
489 : * Function works with original page until first change is occurred,
490 : * then page is copied into temporary one.
491 : */
492 : static Page
493 1241 : ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot)
494 : {
495 1241 : Page origpage = BufferGetPage(buffer),
496 : tmppage;
497 : OffsetNumber i,
498 1241 : maxoff = PageGetMaxOffsetNumber(origpage);
499 :
500 1241 : tmppage = origpage;
501 :
502 1241 : *nroot = 0;
503 :
504 168397 : for (i = FirstOffsetNumber; i <= maxoff; i++)
505 : {
506 167156 : IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
507 :
508 167156 : if (GinIsPostingTree(itup))
509 : {
510 : /*
511 : * store posting tree's roots for further processing, we can't
512 : * vacuum it just now due to risk of deadlocks with scans/inserts
513 : */
514 17 : roots[*nroot] = GinGetDownlink(itup);
515 17 : (*nroot)++;
516 : }
517 167139 : else if (GinGetNPosting(itup) > 0)
518 : {
519 : int nitems;
520 : ItemPointer items_orig;
521 : bool free_items_orig;
522 : ItemPointer items;
523 :
524 : /* Get list of item pointers from the tuple. */
525 167139 : if (GinItupIsCompressed(itup))
526 : {
527 167139 : items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems);
528 167139 : free_items_orig = true;
529 : }
530 : else
531 : {
532 0 : items_orig = (ItemPointer) GinGetPosting(itup);
533 0 : nitems = GinGetNPosting(itup);
534 0 : free_items_orig = false;
535 : }
536 :
537 : /* Remove any items from the list that need to be vacuumed. */
538 167139 : items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems);
539 :
540 167139 : if (free_items_orig)
541 167139 : pfree(items_orig);
542 :
543 : /* If any item pointers were removed, recreate the tuple. */
544 167139 : if (items)
545 : {
546 : OffsetNumber attnum;
547 : Datum key;
548 : GinNullCategory category;
549 : GinPostingList *plist;
550 : int plistsize;
551 :
552 160006 : if (nitems > 0)
553 : {
554 18 : plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL);
555 18 : plistsize = SizeOfGinPostingList(plist);
556 : }
557 : else
558 : {
559 159988 : plist = NULL;
560 159988 : plistsize = 0;
561 : }
562 :
563 : /*
564 : * if we already created a temporary page, make changes in
565 : * place
566 : */
567 160006 : if (tmppage == origpage)
568 : {
569 : /*
570 : * On first difference, create a temporary copy of the
571 : * page and copy the tuple's posting list to it.
572 : */
573 1150 : tmppage = PageGetTempPageCopy(origpage);
574 :
575 : /* set itup pointer to new page */
576 1150 : itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
577 : }
578 :
579 160006 : attnum = gintuple_get_attrnum(&gvs->ginstate, itup);
580 160006 : key = gintuple_get_key(&gvs->ginstate, itup, &category);
581 160006 : itup = GinFormTuple(&gvs->ginstate, attnum, key, category,
582 : (char *) plist, plistsize,
583 : nitems, true);
584 160006 : if (plist)
585 18 : pfree(plist);
586 160006 : PageIndexTupleDelete(tmppage, i);
587 :
588 160006 : if (PageAddItem(tmppage, itup, IndexTupleSize(itup), i, false, false) != i)
589 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
590 : RelationGetRelationName(gvs->index));
591 :
592 160006 : pfree(itup);
593 160006 : pfree(items);
594 : }
595 : }
596 : }
597 :
598 1241 : return (tmppage == origpage) ? NULL : tmppage;
599 : }
600 :
601 : IndexBulkDeleteResult *
602 49 : ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
603 : IndexBulkDeleteCallback callback, void *callback_state)
604 : {
605 49 : Relation index = info->index;
606 49 : BlockNumber blkno = GIN_ROOT_BLKNO;
607 : GinVacuumState gvs;
608 : Buffer buffer;
609 : BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
610 : uint32 nRoot;
611 :
612 49 : gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
613 : "Gin vacuum temporary context",
614 : ALLOCSET_DEFAULT_SIZES);
615 49 : gvs.index = index;
616 49 : gvs.callback = callback;
617 49 : gvs.callback_state = callback_state;
618 49 : gvs.strategy = info->strategy;
619 49 : initGinState(&gvs.ginstate, index);
620 :
621 : /* first time through? */
622 49 : if (stats == NULL)
623 : {
624 : /* Yes, so initialize stats to zeroes */
625 49 : stats = palloc0_object(IndexBulkDeleteResult);
626 :
627 : /*
628 : * and cleanup any pending inserts
629 : */
630 49 : ginInsertCleanup(&gvs.ginstate, !AmAutoVacuumWorkerProcess(),
631 : false, true, stats);
632 : }
633 :
634 : /* we'll re-count the tuples each time */
635 49 : stats->num_index_tuples = 0;
636 49 : gvs.result = stats;
637 :
638 49 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
639 : RBM_NORMAL, info->strategy);
640 :
641 : /* find leaf page */
642 : for (;;)
643 8 : {
644 57 : Page page = BufferGetPage(buffer);
645 : IndexTuple itup;
646 :
647 57 : LockBuffer(buffer, GIN_SHARE);
648 :
649 : Assert(!GinPageIsData(page));
650 :
651 57 : if (GinPageIsLeaf(page))
652 : {
653 49 : LockBuffer(buffer, GIN_UNLOCK);
654 49 : LockBuffer(buffer, GIN_EXCLUSIVE);
655 :
656 49 : if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
657 : {
658 0 : LockBuffer(buffer, GIN_UNLOCK);
659 0 : continue; /* check it one more */
660 : }
661 49 : break;
662 : }
663 :
664 : Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
665 :
666 8 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
667 8 : blkno = GinGetDownlink(itup);
668 : Assert(blkno != InvalidBlockNumber);
669 :
670 8 : UnlockReleaseBuffer(buffer);
671 8 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
672 : RBM_NORMAL, info->strategy);
673 : }
674 :
675 : /* right now we found leftmost page in entry's BTree */
676 :
677 : for (;;)
678 1192 : {
679 1241 : Page page = BufferGetPage(buffer);
680 : Page resPage;
681 : uint32 i;
682 :
683 : Assert(!GinPageIsData(page));
684 :
685 1241 : resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);
686 :
687 1241 : blkno = GinPageGetOpaque(page)->rightlink;
688 :
689 1241 : if (resPage)
690 : {
691 1150 : START_CRIT_SECTION();
692 1150 : PageRestoreTempPage(resPage, page);
693 1150 : MarkBufferDirty(buffer);
694 1150 : xlogVacuumPage(gvs.index, buffer);
695 1150 : END_CRIT_SECTION();
696 1150 : UnlockReleaseBuffer(buffer);
697 : }
698 : else
699 : {
700 91 : UnlockReleaseBuffer(buffer);
701 : }
702 :
703 1241 : vacuum_delay_point(false);
704 :
705 1258 : for (i = 0; i < nRoot; i++)
706 : {
707 17 : ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
708 17 : vacuum_delay_point(false);
709 : }
710 :
711 1241 : if (blkno == InvalidBlockNumber) /* rightmost page */
712 49 : break;
713 :
714 1192 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
715 : RBM_NORMAL, info->strategy);
716 1192 : LockBuffer(buffer, GIN_EXCLUSIVE);
717 : }
718 :
719 49 : MemoryContextDelete(gvs.tmpCxt);
720 :
721 49 : return gvs.result;
722 : }
723 :
724 : IndexBulkDeleteResult *
725 49 : ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
726 : {
727 49 : Relation index = info->index;
728 : bool needLock;
729 : BlockNumber npages,
730 : blkno;
731 : BlockNumber totFreePages;
732 : GinState ginstate;
733 : GinStatsData idxStat;
734 : BlockRangeReadStreamPrivate p;
735 : ReadStream *stream;
736 :
737 : /*
738 : * In an autovacuum analyze, we want to clean up pending insertions.
739 : * Otherwise, an ANALYZE-only call is a no-op.
740 : */
741 49 : if (info->analyze_only)
742 : {
743 12 : if (AmAutoVacuumWorkerProcess())
744 : {
745 8 : initGinState(&ginstate, index);
746 8 : ginInsertCleanup(&ginstate, false, true, true, stats);
747 : }
748 12 : return stats;
749 : }
750 :
751 : /*
752 : * Set up all-zero stats and cleanup pending inserts if ginbulkdelete
753 : * wasn't called
754 : */
755 37 : if (stats == NULL)
756 : {
757 28 : stats = palloc0_object(IndexBulkDeleteResult);
758 28 : initGinState(&ginstate, index);
759 28 : ginInsertCleanup(&ginstate, !AmAutoVacuumWorkerProcess(),
760 : false, true, stats);
761 : }
762 :
763 37 : memset(&idxStat, 0, sizeof(idxStat));
764 :
765 : /*
766 : * XXX we always report the heap tuple count as the number of index
767 : * entries. This is bogus if the index is partial, but it's real hard to
768 : * tell how many distinct heap entries are referenced by a GIN index.
769 : */
770 37 : stats->num_index_tuples = Max(info->num_heap_tuples, 0);
771 37 : stats->estimated_count = info->estimated_count;
772 :
773 : /*
774 : * Need lock unless it's local to this backend.
775 : */
776 37 : needLock = !RELATION_IS_LOCAL(index);
777 :
778 37 : if (needLock)
779 33 : LockRelationForExtension(index, ExclusiveLock);
780 37 : npages = RelationGetNumberOfBlocks(index);
781 37 : if (needLock)
782 33 : UnlockRelationForExtension(index, ExclusiveLock);
783 :
784 37 : totFreePages = 0;
785 :
786 : /* Scan all blocks starting from the root using streaming reads */
787 37 : p.current_blocknum = GIN_ROOT_BLKNO;
788 37 : p.last_exclusive = npages;
789 :
790 : /*
791 : * It is safe to use batchmode as block_range_read_stream_cb takes no
792 : * locks.
793 : */
794 37 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
795 : READ_STREAM_FULL |
796 : READ_STREAM_USE_BATCHING,
797 : info->strategy,
798 : index,
799 : MAIN_FORKNUM,
800 : block_range_read_stream_cb,
801 : &p,
802 : 0);
803 :
804 6138 : for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
805 : {
806 : Buffer buffer;
807 : Page page;
808 :
809 6101 : vacuum_delay_point(false);
810 :
811 6101 : buffer = read_stream_next_buffer(stream, NULL);
812 :
813 6101 : LockBuffer(buffer, GIN_SHARE);
814 6101 : page = BufferGetPage(buffer);
815 :
816 6101 : if (GinPageIsRecyclable(page))
817 : {
818 : Assert(blkno != GIN_ROOT_BLKNO);
819 3103 : RecordFreeIndexPage(index, blkno);
820 3103 : totFreePages++;
821 : }
822 2998 : else if (GinPageIsData(page))
823 : {
824 145 : idxStat.nDataPages++;
825 : }
826 2853 : else if (!GinPageIsList(page))
827 : {
828 2853 : idxStat.nEntryPages++;
829 :
830 2853 : if (GinPageIsLeaf(page))
831 2833 : idxStat.nEntries += PageGetMaxOffsetNumber(page);
832 : }
833 :
834 6101 : UnlockReleaseBuffer(buffer);
835 : }
836 :
837 : Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
838 37 : read_stream_end(stream);
839 :
840 : /* Update the metapage with accurate page and entry counts */
841 37 : idxStat.nTotalPages = npages;
842 37 : ginUpdateStats(info->index, &idxStat, false);
843 :
844 : /* Finally, vacuum the FSM */
845 37 : IndexFreeSpaceMapVacuum(info->index);
846 :
847 37 : stats->pages_free = totFreePages;
848 :
849 37 : if (needLock)
850 33 : LockRelationForExtension(index, ExclusiveLock);
851 37 : stats->num_pages = RelationGetNumberOfBlocks(index);
852 37 : if (needLock)
853 33 : UnlockRelationForExtension(index, ExclusiveLock);
854 :
855 37 : return stats;
856 : }
857 :
858 : /*
859 : * Return whether Page can safely be recycled.
860 : */
861 : bool
862 6172 : GinPageIsRecyclable(Page page)
863 : {
864 : TransactionId delete_xid;
865 :
866 6172 : if (PageIsNew(page))
867 0 : return true;
868 :
869 6172 : if (!GinPageIsDeleted(page))
870 2990 : return false;
871 :
872 3182 : delete_xid = GinPageGetDeleteXid(page);
873 :
874 3182 : if (!TransactionIdIsValid(delete_xid))
875 3174 : return true;
876 :
877 : /*
878 : * If no backend still could view delete_xid as in running, all scans
879 : * concurrent with ginDeletePostingPage() must have finished.
880 : */
881 8 : return GlobalVisCheckRemovableXid(NULL, delete_xid);
882 : }
|