Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "commands/vacuum.h"
24 : #include "miscadmin.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/indexfsm.h"
27 : #include "storage/lmgr.h"
28 : #include "storage/read_stream.h"
29 : #include "utils/snapmgr.h"
30 :
31 :
32 : /* Entry in pending-list of TIDs we need to revisit */
33 : typedef struct spgVacPendingItem
34 : {
35 : ItemPointerData tid; /* redirection target to visit */
36 : bool done; /* have we dealt with this? */
37 : struct spgVacPendingItem *next; /* list link */
38 : } spgVacPendingItem;
39 :
40 : /* Local state for vacuum operations */
41 : typedef struct spgBulkDeleteState
42 : {
43 : /* Parameters passed in to spgvacuumscan */
44 : IndexVacuumInfo *info;
45 : IndexBulkDeleteResult *stats;
46 : IndexBulkDeleteCallback callback;
47 : void *callback_state;
48 :
49 : /* Additional working state */
50 : SpGistState spgstate; /* for SPGiST operations that need one */
51 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 : TransactionId myXmin; /* for detecting newly-added redirects */
53 : BlockNumber lastFilledBlock; /* last non-deletable block */
54 : } spgBulkDeleteState;
55 :
56 :
57 : /*
58 : * Add TID to pendingList, but only if not already present.
59 : *
60 : * Note that new items are always appended at the end of the list; this
61 : * ensures that scans of the list don't miss items added during the scan.
62 : */
63 : static void
64 103752 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
65 : {
66 : spgVacPendingItem *pitem;
67 : spgVacPendingItem **listLink;
68 :
69 : /* search the list for pre-existing entry */
70 103752 : listLink = &bds->pendingList;
71 9165804 : while (*listLink != NULL)
72 : {
73 9096112 : pitem = *listLink;
74 9096112 : if (ItemPointerEquals(tid, &pitem->tid))
75 34060 : return; /* already in list, do nothing */
76 9062052 : listLink = &pitem->next;
77 : }
78 : /* not there, so append new entry */
79 69692 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80 69692 : pitem->tid = *tid;
81 69692 : pitem->done = false;
82 69692 : pitem->next = NULL;
83 69692 : *listLink = pitem;
84 : }
85 :
86 : /*
87 : * Clear pendingList
88 : */
89 : static void
90 524 : spgClearPendingList(spgBulkDeleteState *bds)
91 : {
92 : spgVacPendingItem *pitem;
93 : spgVacPendingItem *nitem;
94 :
95 70216 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96 : {
97 69692 : nitem = pitem->next;
98 : /* All items in list should have been dealt with */
99 : Assert(pitem->done);
100 69692 : pfree(pitem);
101 : }
102 524 : bds->pendingList = NULL;
103 524 : }
104 :
105 : /*
106 : * Vacuum a regular (non-root) leaf page
107 : *
108 : * We must delete tuples that are targeted for deletion by the VACUUM,
109 : * but not move any tuples that are referenced by outside links; we assume
110 : * those are the ones that are heads of chains.
111 : *
112 : * If we find a REDIRECT that was made by a concurrently-running transaction,
113 : * we must add its target TID to pendingList. (We don't try to visit the
114 : * target immediately, first because we don't want VACUUM locking more than
115 : * one buffer at a time, and second because the duplicate-filtering logic
116 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 : * loop in the face of continuous concurrent insertions.)
118 : *
119 : * If forPending is true, we are examining the page as a consequence of
120 : * chasing a redirect link, not as part of the normal sequential scan.
121 : * We still vacuum the page normally, but we don't increment the stats
122 : * about live tuples; else we'd double-count those tuples, since the page
123 : * has been or will be visited in the sequential scan as well.
124 : */
125 : static void
126 38836 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 : bool forPending)
128 : {
129 38836 : Page page = BufferGetPage(buffer);
130 : spgxlogVacuumLeaf xlrec;
131 : OffsetNumber toDead[MaxIndexTuplesPerPage];
132 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 : bool deletable[MaxIndexTuplesPerPage + 1];
139 : int nDeletable;
140 : OffsetNumber i,
141 38836 : max = PageGetMaxOffsetNumber(page);
142 :
143 38836 : memset(predecessor, 0, sizeof(predecessor));
144 38836 : memset(deletable, 0, sizeof(deletable));
145 38836 : nDeletable = 0;
146 :
147 : /* Scan page, identify tuples to delete, accumulate stats */
148 6098790 : for (i = FirstOffsetNumber; i <= max; i++)
149 : {
150 : SpGistLeafTuple lt;
151 :
152 6059954 : lt = (SpGistLeafTuple) PageGetItem(page,
153 6059954 : PageGetItemId(page, i));
154 6059954 : if (lt->tupstate == SPGIST_LIVE)
155 : {
156 : Assert(ItemPointerIsValid(<->heapPtr));
157 :
158 5811176 : if (bds->callback(<->heapPtr, bds->callback_state))
159 : {
160 39812 : bds->stats->tuples_removed += 1;
161 39812 : deletable[i] = true;
162 39812 : nDeletable++;
163 : }
164 : else
165 : {
166 5771364 : if (!forPending)
167 529756 : bds->stats->num_index_tuples += 1;
168 : }
169 :
170 : /* Form predecessor map, too */
171 5811176 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
172 : {
173 : /* paranoia about corrupted chain links */
174 5763218 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 5763218 : SGLT_GET_NEXTOFFSET(lt) > max ||
176 5763218 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
177 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 : BufferGetBlockNumber(buffer),
179 : RelationGetRelationName(index));
180 5763218 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
181 : }
182 : }
183 248778 : else if (lt->tupstate == SPGIST_REDIRECT)
184 : {
185 35234 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
186 :
187 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
188 : Assert(ItemPointerIsValid(&dt->pointer));
189 :
190 : /*
191 : * Add target TID to pending list if the redirection could have
192 : * happened since VACUUM started. (If xid is invalid, assume it
193 : * must have happened before VACUUM started, since REINDEX
194 : * CONCURRENTLY locks out VACUUM.)
195 : *
196 : * Note: we could make a tighter test by seeing if the xid is
197 : * "running" according to the active snapshot; but snapmgr.c
198 : * doesn't currently export a suitable API, and it's not entirely
199 : * clear that a tighter test is worth the cycles anyway.
200 : */
201 35234 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
202 34584 : spgAddPendingTID(bds, &dt->pointer);
203 : }
204 : else
205 : {
206 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
207 : }
208 : }
209 :
210 38836 : if (nDeletable == 0)
211 38440 : return; /* nothing more to do */
212 :
213 : /*----------
214 : * Figure out exactly what we have to do. We do this separately from
215 : * actually modifying the page, mainly so that we have a representation
216 : * that can be dumped into WAL and then the replay code can do exactly
217 : * the same thing. The output of this step consists of six arrays
218 : * describing four kinds of operations, to be performed in this order:
219 : *
220 : * toDead[]: tuple numbers to be replaced with DEAD tuples
221 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
222 : * moveSrc[]: tuple numbers that need to be relocated to another offset
223 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
224 : * moveDest[]: new locations for moveSrc tuples
225 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
226 : * chainDest[]: new values of nextOffset for chainSrc members
227 : *
228 : * It's easiest to figure out what we have to do by processing tuple
229 : * chains, so we iterate over all the tuples (not just the deletable
230 : * ones!) to identify chain heads, then chase down each chain and make
231 : * work item entries for deletable tuples within the chain.
232 : *----------
233 : */
234 396 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
235 :
236 83318 : for (i = FirstOffsetNumber; i <= max; i++)
237 : {
238 : SpGistLeafTuple head;
239 : bool interveningDeletable;
240 : OffsetNumber prevLive;
241 : OffsetNumber j;
242 :
243 82922 : head = (SpGistLeafTuple) PageGetItem(page,
244 82922 : PageGetItemId(page, i));
245 82922 : if (head->tupstate != SPGIST_LIVE)
246 23164 : continue; /* can't be a chain member */
247 59758 : if (predecessor[i] != 0)
248 59324 : continue; /* not a chain head */
249 :
250 : /* initialize ... */
251 434 : interveningDeletable = false;
252 434 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
253 :
254 : /* scan down the chain ... */
255 434 : j = SGLT_GET_NEXTOFFSET(head);
256 59758 : while (j != InvalidOffsetNumber)
257 : {
258 : SpGistLeafTuple lt;
259 :
260 59324 : lt = (SpGistLeafTuple) PageGetItem(page,
261 59324 : PageGetItemId(page, j));
262 59324 : if (lt->tupstate != SPGIST_LIVE)
263 : {
264 : /* all tuples in chain should be live */
265 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
266 : lt->tupstate);
267 : }
268 :
269 59324 : if (deletable[j])
270 : {
271 : /* This tuple should be replaced by a placeholder */
272 39434 : toPlaceholder[xlrec.nPlaceholder] = j;
273 39434 : xlrec.nPlaceholder++;
274 : /* previous live tuple's chain link will need an update */
275 39434 : interveningDeletable = true;
276 : }
277 19890 : else if (prevLive == InvalidOffsetNumber)
278 : {
279 : /*
280 : * This is the first live tuple in the chain. It has to move
281 : * to the head position.
282 : */
283 378 : moveSrc[xlrec.nMove] = j;
284 378 : moveDest[xlrec.nMove] = i;
285 378 : xlrec.nMove++;
286 : /* Chain updates will be applied after the move */
287 378 : prevLive = i;
288 378 : interveningDeletable = false;
289 : }
290 : else
291 : {
292 : /*
293 : * Second or later live tuple. Arrange to re-chain it to the
294 : * previous live one, if there was a gap.
295 : */
296 19512 : if (interveningDeletable)
297 : {
298 9766 : chainSrc[xlrec.nChain] = prevLive;
299 9766 : chainDest[xlrec.nChain] = j;
300 9766 : xlrec.nChain++;
301 : }
302 19512 : prevLive = j;
303 19512 : interveningDeletable = false;
304 : }
305 :
306 59324 : j = SGLT_GET_NEXTOFFSET(lt);
307 : }
308 :
309 434 : if (prevLive == InvalidOffsetNumber)
310 : {
311 : /* The chain is entirely removable, so we need a DEAD tuple */
312 0 : toDead[xlrec.nDead] = i;
313 0 : xlrec.nDead++;
314 : }
315 434 : else if (interveningDeletable)
316 : {
317 : /* One or more deletions at end of chain, so close it off */
318 408 : chainSrc[xlrec.nChain] = prevLive;
319 408 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
320 408 : xlrec.nChain++;
321 : }
322 : }
323 :
324 : /* sanity check ... */
325 396 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
326 0 : elog(ERROR, "inconsistent counts of deletable tuples");
327 :
328 : /* Do the updates */
329 396 : START_CRIT_SECTION();
330 :
331 396 : spgPageIndexMultiDelete(&bds->spgstate, page,
332 396 : toDead, xlrec.nDead,
333 : SPGIST_DEAD, SPGIST_DEAD,
334 : InvalidBlockNumber, InvalidOffsetNumber);
335 :
336 396 : spgPageIndexMultiDelete(&bds->spgstate, page,
337 396 : toPlaceholder, xlrec.nPlaceholder,
338 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
339 : InvalidBlockNumber, InvalidOffsetNumber);
340 :
341 : /*
342 : * We implement the move step by swapping the line pointers of the source
343 : * and target tuples, then replacing the newly-source tuples with
344 : * placeholders. This is perhaps unduly friendly with the page data
345 : * representation, but it's fast and doesn't risk page overflow when a
346 : * tuple to be relocated is large.
347 : */
348 774 : for (i = 0; i < xlrec.nMove; i++)
349 : {
350 378 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
351 378 : ItemId idDest = PageGetItemId(page, moveDest[i]);
352 : ItemIdData tmp;
353 :
354 378 : tmp = *idSrc;
355 378 : *idSrc = *idDest;
356 378 : *idDest = tmp;
357 : }
358 :
359 396 : spgPageIndexMultiDelete(&bds->spgstate, page,
360 396 : moveSrc, xlrec.nMove,
361 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
362 : InvalidBlockNumber, InvalidOffsetNumber);
363 :
364 10570 : for (i = 0; i < xlrec.nChain; i++)
365 : {
366 : SpGistLeafTuple lt;
367 :
368 10174 : lt = (SpGistLeafTuple) PageGetItem(page,
369 10174 : PageGetItemId(page, chainSrc[i]));
370 : Assert(lt->tupstate == SPGIST_LIVE);
371 10174 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
372 : }
373 :
374 396 : MarkBufferDirty(buffer);
375 :
376 396 : if (RelationNeedsWAL(index))
377 : {
378 : XLogRecPtr recptr;
379 :
380 396 : XLogBeginInsert();
381 :
382 396 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
383 :
384 396 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
385 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
386 396 : XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
387 396 : XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
388 396 : XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
389 396 : XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
390 396 : XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
391 396 : XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
392 :
393 396 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
394 :
395 396 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
396 :
397 396 : PageSetLSN(page, recptr);
398 : }
399 :
400 396 : END_CRIT_SECTION();
401 : }
402 :
403 : /*
404 : * Vacuum a root page when it is also a leaf
405 : *
406 : * On the root, we just delete any dead leaf tuples; no fancy business
407 : */
408 : static void
409 32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
410 : {
411 32 : Page page = BufferGetPage(buffer);
412 : spgxlogVacuumRoot xlrec;
413 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
414 : OffsetNumber i,
415 32 : max = PageGetMaxOffsetNumber(page);
416 :
417 32 : xlrec.nDelete = 0;
418 :
419 : /* Scan page, identify tuples to delete, accumulate stats */
420 164 : for (i = FirstOffsetNumber; i <= max; i++)
421 : {
422 : SpGistLeafTuple lt;
423 :
424 132 : lt = (SpGistLeafTuple) PageGetItem(page,
425 132 : PageGetItemId(page, i));
426 132 : if (lt->tupstate == SPGIST_LIVE)
427 : {
428 : Assert(ItemPointerIsValid(<->heapPtr));
429 :
430 132 : if (bds->callback(<->heapPtr, bds->callback_state))
431 : {
432 8 : bds->stats->tuples_removed += 1;
433 8 : toDelete[xlrec.nDelete] = i;
434 8 : xlrec.nDelete++;
435 : }
436 : else
437 : {
438 124 : bds->stats->num_index_tuples += 1;
439 : }
440 : }
441 : else
442 : {
443 : /* all tuples on root should be live */
444 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
445 : lt->tupstate);
446 : }
447 : }
448 :
449 32 : if (xlrec.nDelete == 0)
450 30 : return; /* nothing more to do */
451 :
452 : /* Do the update */
453 2 : START_CRIT_SECTION();
454 :
455 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
456 2 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
457 :
458 2 : MarkBufferDirty(buffer);
459 :
460 2 : if (RelationNeedsWAL(index))
461 : {
462 : XLogRecPtr recptr;
463 :
464 2 : XLogBeginInsert();
465 :
466 : /* Prepare WAL record */
467 2 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
468 :
469 2 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
470 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
471 2 : XLogRegisterData(toDelete,
472 2 : sizeof(OffsetNumber) * xlrec.nDelete);
473 :
474 2 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
475 :
476 2 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
477 :
478 2 : PageSetLSN(page, recptr);
479 : }
480 :
481 2 : END_CRIT_SECTION();
482 : }
483 :
484 : /*
485 : * Clean up redirect and placeholder tuples on the given page
486 : *
487 : * Redirect tuples can be marked placeholder once they're old enough.
488 : * Placeholder tuples can be removed if it won't change the offsets of
489 : * non-placeholder ones.
490 : *
491 : * Unlike the routines above, this works on both leaf and inner pages.
492 : */
493 : static void
494 38994 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
495 : {
496 38994 : Page page = BufferGetPage(buffer);
497 38994 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
498 : OffsetNumber i,
499 38994 : max = PageGetMaxOffsetNumber(page),
500 38994 : firstPlaceholder = InvalidOffsetNumber;
501 38994 : bool hasNonPlaceholder = false;
502 38994 : bool hasUpdate = false;
503 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
504 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
505 : spgxlogVacuumRedirect xlrec;
506 : GlobalVisState *vistest;
507 :
508 38994 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
509 38994 : xlrec.nToPlaceholder = 0;
510 38994 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
511 :
512 38994 : vistest = GlobalVisTestFor(heaprel);
513 :
514 38994 : START_CRIT_SECTION();
515 :
516 : /*
517 : * Scan backwards to convert old redirection tuples to placeholder tuples,
518 : * and identify location of last non-placeholder tuple while at it.
519 : */
520 5494768 : for (i = max;
521 5460062 : i >= FirstOffsetNumber &&
522 5460062 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
523 5455774 : i--)
524 : {
525 : SpGistDeadTuple dt;
526 :
527 5455774 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
528 :
529 : /*
530 : * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
531 : * be any index scans "in flight" to it. Such an index scan would
532 : * have to be in a transaction whose snapshot sees the REDIRECT's XID
533 : * as still running, so comparing the XID against global xmin is a
534 : * conservatively safe test. If the XID is invalid, it must have been
535 : * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
536 : */
537 5455774 : if (dt->tupstate == SPGIST_REDIRECT &&
538 70472 : (!TransactionIdIsValid(dt->xid) ||
539 35236 : GlobalVisTestIsRemovableXid(vistest, dt->xid)))
540 : {
541 614 : dt->tupstate = SPGIST_PLACEHOLDER;
542 : Assert(opaque->nRedirection > 0);
543 614 : opaque->nRedirection--;
544 614 : opaque->nPlaceholder++;
545 :
546 : /* remember newest XID among the removed redirects */
547 870 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
548 256 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
549 358 : xlrec.snapshotConflictHorizon = dt->xid;
550 :
551 614 : ItemPointerSetInvalid(&dt->pointer);
552 :
553 614 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
554 614 : xlrec.nToPlaceholder++;
555 :
556 614 : hasUpdate = true;
557 : }
558 :
559 5455774 : if (dt->tupstate == SPGIST_PLACEHOLDER)
560 : {
561 165978 : if (!hasNonPlaceholder)
562 160144 : firstPlaceholder = i;
563 : }
564 : else
565 : {
566 5289796 : hasNonPlaceholder = true;
567 : }
568 : }
569 :
570 : /*
571 : * Any placeholder tuples at the end of page can safely be removed. We
572 : * can't remove ones before the last non-placeholder, though, because we
573 : * can't alter the offset numbers of non-placeholder tuples.
574 : */
575 38994 : if (firstPlaceholder != InvalidOffsetNumber)
576 : {
577 : /*
578 : * We do not store this array to rdata because it's easy to recreate.
579 : */
580 162682 : for (i = firstPlaceholder; i <= max; i++)
581 160144 : itemnos[i - firstPlaceholder] = i;
582 :
583 2538 : i = max - firstPlaceholder + 1;
584 : Assert(opaque->nPlaceholder >= i);
585 2538 : opaque->nPlaceholder -= i;
586 :
587 : /* The array is surely sorted, so can use PageIndexMultiDelete */
588 2538 : PageIndexMultiDelete(page, itemnos, i);
589 :
590 2538 : hasUpdate = true;
591 : }
592 :
593 38994 : xlrec.firstPlaceholder = firstPlaceholder;
594 :
595 38994 : if (hasUpdate)
596 2546 : MarkBufferDirty(buffer);
597 :
598 38994 : if (hasUpdate && RelationNeedsWAL(index))
599 : {
600 : XLogRecPtr recptr;
601 :
602 2546 : XLogBeginInsert();
603 :
604 2546 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
605 2546 : XLogRegisterData(itemToPlaceholder,
606 2546 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
607 :
608 2546 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
609 :
610 2546 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
611 :
612 2546 : PageSetLSN(page, recptr);
613 : }
614 :
615 38994 : END_CRIT_SECTION();
616 38994 : }
617 :
618 : /*
619 : * Process one page during a bulkdelete scan
620 : */
621 : static void
622 4544 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
623 : {
624 4544 : Relation index = bds->info->index;
625 4544 : BlockNumber blkno = BufferGetBlockNumber(buffer);
626 : Page page;
627 :
628 4544 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
629 4544 : page = (Page) BufferGetPage(buffer);
630 :
631 4544 : if (PageIsNew(page))
632 : {
633 : /*
634 : * We found an all-zero page, which could happen if the database
635 : * crashed just after extending the file. Recycle it.
636 : */
637 : }
638 4536 : else if (PageIsEmpty(page))
639 : {
640 : /* nothing to do */
641 : }
642 4442 : else if (SpGistPageIsLeaf(page))
643 : {
644 4284 : if (SpGistBlockIsRoot(blkno))
645 : {
646 32 : vacuumLeafRoot(bds, index, buffer);
647 : /* no need for vacuumRedirectAndPlaceholder */
648 : }
649 : else
650 : {
651 4252 : vacuumLeafPage(bds, index, buffer, false);
652 4252 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
653 : }
654 : }
655 : else
656 : {
657 : /* inner page */
658 158 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
659 : }
660 :
661 : /*
662 : * The root pages must never be deleted, nor marked as available in FSM,
663 : * because we don't want them ever returned by a search for a place to put
664 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
665 : * knows about it.
666 : */
667 4544 : if (!SpGistBlockIsRoot(blkno))
668 : {
669 4392 : if (PageIsNew(page) || PageIsEmpty(page))
670 : {
671 66 : RecordFreeIndexPage(index, blkno);
672 66 : bds->stats->pages_deleted++;
673 : }
674 : else
675 : {
676 4326 : SpGistSetLastUsedPage(index, buffer);
677 4326 : bds->lastFilledBlock = blkno;
678 : }
679 : }
680 :
681 4544 : UnlockReleaseBuffer(buffer);
682 4544 : }
683 :
684 : /*
685 : * Process the pending-TID list between pages of the main scan
686 : */
687 : static void
688 524 : spgprocesspending(spgBulkDeleteState *bds)
689 : {
690 524 : Relation index = bds->info->index;
691 : spgVacPendingItem *pitem;
692 : spgVacPendingItem *nitem;
693 : BlockNumber blkno;
694 : Buffer buffer;
695 : Page page;
696 :
697 70216 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
698 : {
699 69692 : if (pitem->done)
700 34584 : continue; /* ignore already-done items */
701 :
702 : /* call vacuum_delay_point while not holding any buffer lock */
703 35108 : vacuum_delay_point(false);
704 :
705 : /* examine the referenced page */
706 35108 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
707 35108 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
708 35108 : RBM_NORMAL, bds->info->strategy);
709 35108 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
710 35108 : page = (Page) BufferGetPage(buffer);
711 :
712 35108 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
713 : {
714 : /* Probably shouldn't happen, but ignore it */
715 : }
716 35108 : else if (SpGistPageIsLeaf(page))
717 : {
718 34584 : if (SpGistBlockIsRoot(blkno))
719 : {
720 : /* this should definitely not happen */
721 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
722 : RelationGetRelationName(index));
723 : }
724 :
725 : /* deal with any deletable tuples */
726 34584 : vacuumLeafPage(bds, index, buffer, true);
727 : /* might as well do this while we are here */
728 34584 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
729 :
730 34584 : SpGistSetLastUsedPage(index, buffer);
731 :
732 : /*
733 : * We can mark as done not only this item, but any later ones
734 : * pointing at the same page, since we vacuumed the whole page.
735 : */
736 34584 : pitem->done = true;
737 3032908 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
738 : {
739 2998324 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
740 524 : nitem->done = true;
741 : }
742 : }
743 : else
744 : {
745 : /*
746 : * On an inner page, visit the referenced inner tuple and add all
747 : * its downlinks to the pending list. We might have pending items
748 : * for more than one inner tuple on the same page (in fact this is
749 : * pretty likely given the way space allocation works), so get
750 : * them all while we are here.
751 : */
752 70216 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
753 : {
754 69692 : if (nitem->done)
755 0 : continue;
756 69692 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
757 : {
758 : OffsetNumber offset;
759 : SpGistInnerTuple innerTuple;
760 :
761 34584 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
762 34584 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
763 34584 : PageGetItemId(page, offset));
764 34584 : if (innerTuple->tupstate == SPGIST_LIVE)
765 : {
766 : SpGistNodeTuple node;
767 : int i;
768 :
769 172920 : SGITITERATE(innerTuple, i, node)
770 : {
771 138336 : if (ItemPointerIsValid(&node->t_tid))
772 69168 : spgAddPendingTID(bds, &node->t_tid);
773 : }
774 : }
775 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
776 : {
777 : /* transfer attention to redirect point */
778 0 : spgAddPendingTID(bds,
779 : &((SpGistDeadTuple) innerTuple)->pointer);
780 : }
781 : else
782 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
783 : innerTuple->tupstate);
784 :
785 34584 : nitem->done = true;
786 : }
787 : }
788 : }
789 :
790 35108 : UnlockReleaseBuffer(buffer);
791 : }
792 :
793 524 : spgClearPendingList(bds);
794 524 : }
795 :
796 : /*
797 : * Perform a bulkdelete scan
798 : */
799 : static void
800 76 : spgvacuumscan(spgBulkDeleteState *bds)
801 : {
802 76 : Relation index = bds->info->index;
803 : bool needLock;
804 : BlockNumber num_pages;
805 : BlockRangeReadStreamPrivate p;
806 76 : ReadStream *stream = NULL;
807 :
808 : /* Finish setting up spgBulkDeleteState */
809 76 : initSpGistState(&bds->spgstate, index);
810 76 : bds->pendingList = NULL;
811 76 : bds->myXmin = GetActiveSnapshot()->xmin;
812 76 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
813 :
814 : /*
815 : * Reset counts that will be incremented during the scan; needed in case
816 : * of multiple scans during a single VACUUM command
817 : */
818 76 : bds->stats->estimated_count = false;
819 76 : bds->stats->num_index_tuples = 0;
820 76 : bds->stats->pages_deleted = 0;
821 :
822 : /* We can skip locking for new or temp relations */
823 76 : needLock = !RELATION_IS_LOCAL(index);
824 76 : p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
825 :
826 : /*
827 : * It is safe to use batchmode as block_range_read_stream_cb takes no
828 : * locks.
829 : */
830 76 : stream = read_stream_begin_relation(READ_STREAM_FULL |
831 : READ_STREAM_USE_BATCHING,
832 76 : bds->info->strategy,
833 : index,
834 : MAIN_FORKNUM,
835 : block_range_read_stream_cb,
836 : &p,
837 : 0);
838 :
839 : /*
840 : * The outer loop iterates over all index pages except the metapage, in
841 : * physical order (we hope the kernel will cooperate in providing
842 : * read-ahead for speed). It is critical that we visit all leaf pages,
843 : * including ones added after we start the scan, else we might fail to
844 : * delete some deletable tuples. See more extensive comments about this
845 : * in btvacuumscan().
846 : */
847 : for (;;)
848 : {
849 : /* Get the current relation length */
850 152 : if (needLock)
851 152 : LockRelationForExtension(index, ExclusiveLock);
852 152 : num_pages = RelationGetNumberOfBlocks(index);
853 152 : if (needLock)
854 152 : UnlockRelationForExtension(index, ExclusiveLock);
855 :
856 : /* Quit if we've scanned the whole relation */
857 152 : if (p.current_blocknum >= num_pages)
858 76 : break;
859 :
860 76 : p.last_exclusive = num_pages;
861 :
862 : /* Iterate over pages, then loop back to recheck length */
863 : while (true)
864 4544 : {
865 : Buffer buf;
866 :
867 : /* call vacuum_delay_point while not holding any buffer lock */
868 4620 : vacuum_delay_point(false);
869 :
870 4620 : buf = read_stream_next_buffer(stream, NULL);
871 :
872 4620 : if (!BufferIsValid(buf))
873 76 : break;
874 :
875 4544 : spgvacuumpage(bds, buf);
876 :
877 : /* empty the pending-list after each page */
878 4544 : if (bds->pendingList != NULL)
879 524 : spgprocesspending(bds);
880 : }
881 :
882 : /*
883 : * We have to reset the read stream to use it again. After returning
884 : * InvalidBuffer, the read stream API won't invoke our callback again
885 : * until the stream has been reset.
886 : */
887 76 : read_stream_reset(stream);
888 : }
889 :
890 76 : read_stream_end(stream);
891 :
892 : /* Propagate local lastUsedPages cache to metablock */
893 76 : SpGistUpdateMetaPage(index);
894 :
895 : /*
896 : * If we found any empty pages (and recorded them in the FSM), then
897 : * forcibly update the upper-level FSM pages to ensure that searchers can
898 : * find them. It's possible that the pages were also found during
899 : * previous scans and so this is a waste of time, but it's cheap enough
900 : * relative to scanning the index that it shouldn't matter much, and
901 : * making sure that free pages are available sooner not later seems
902 : * worthwhile.
903 : *
904 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
905 : * all.
906 : */
907 76 : if (bds->stats->pages_deleted > 0)
908 46 : IndexFreeSpaceMapVacuum(index);
909 :
910 : /*
911 : * Truncate index if possible
912 : *
913 : * XXX disabled because it's unsafe due to possible concurrent inserts.
914 : * We'd have to rescan the pages to make sure they're still empty, and it
915 : * doesn't seem worth it. Note that btree doesn't do this either.
916 : *
917 : * Another reason not to truncate is that it could invalidate the cached
918 : * pages-with-freespace pointers in the metapage and other backends'
919 : * relation caches, that is leave them pointing to nonexistent pages.
920 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
921 : * those pointers would be unduly expensive.
922 : */
923 : #ifdef NOT_USED
924 : if (num_pages > bds->lastFilledBlock + 1)
925 : {
926 : BlockNumber lastBlock = num_pages - 1;
927 :
928 : num_pages = bds->lastFilledBlock + 1;
929 : RelationTruncate(index, num_pages);
930 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
931 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
932 : }
933 : #endif
934 :
935 : /* Report final stats */
936 76 : bds->stats->num_pages = num_pages;
937 76 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
938 76 : bds->stats->pages_free = bds->stats->pages_deleted;
939 76 : }
940 :
941 : /*
942 : * Bulk deletion of all index entries pointing to a set of heap tuples.
943 : * The set of target tuples is specified via a callback routine that tells
944 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
945 : *
946 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
947 : */
948 : IndexBulkDeleteResult *
949 12 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
950 : IndexBulkDeleteCallback callback, void *callback_state)
951 : {
952 : spgBulkDeleteState bds;
953 :
954 : /* allocate stats if first time through, else re-use existing struct */
955 12 : if (stats == NULL)
956 12 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
957 12 : bds.info = info;
958 12 : bds.stats = stats;
959 12 : bds.callback = callback;
960 12 : bds.callback_state = callback_state;
961 :
962 12 : spgvacuumscan(&bds);
963 :
964 12 : return stats;
965 : }
966 :
967 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
968 : static bool
969 5751530 : dummy_callback(ItemPointer itemptr, void *state)
970 : {
971 5751530 : return false;
972 : }
973 :
974 : /*
975 : * Post-VACUUM cleanup.
976 : *
977 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
978 : */
979 : IndexBulkDeleteResult *
980 88 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
981 : {
982 : spgBulkDeleteState bds;
983 :
984 : /* No-op in ANALYZE ONLY mode */
985 88 : if (info->analyze_only)
986 12 : return stats;
987 :
988 : /*
989 : * We don't need to scan the index if there was a preceding bulkdelete
990 : * pass. Otherwise, make a pass that won't delete any live tuples, but
991 : * might still accomplish useful stuff with redirect/placeholder cleanup
992 : * and/or FSM housekeeping, and in any case will provide stats.
993 : */
994 76 : if (stats == NULL)
995 : {
996 64 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
997 64 : bds.info = info;
998 64 : bds.stats = stats;
999 64 : bds.callback = dummy_callback;
1000 64 : bds.callback_state = NULL;
1001 :
1002 64 : spgvacuumscan(&bds);
1003 : }
1004 :
1005 : /*
1006 : * It's quite possible for us to be fooled by concurrent tuple moves into
1007 : * double-counting some index tuples, so disbelieve any total that exceeds
1008 : * the underlying heap's count ... if we know that accurately. Otherwise
1009 : * this might just make matters worse.
1010 : */
1011 76 : if (!info->estimated_count)
1012 : {
1013 76 : if (stats->num_index_tuples > info->num_heap_tuples)
1014 0 : stats->num_index_tuples = info->num_heap_tuples;
1015 : }
1016 :
1017 76 : return stats;
1018 : }
|