Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "commands/vacuum.h"
24 : #include "miscadmin.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/indexfsm.h"
27 : #include "storage/lmgr.h"
28 : #include "storage/read_stream.h"
29 : #include "utils/snapmgr.h"
30 :
31 :
32 : /* Entry in pending-list of TIDs we need to revisit */
33 : typedef struct spgVacPendingItem
34 : {
35 : ItemPointerData tid; /* redirection target to visit */
36 : bool done; /* have we dealt with this? */
37 : struct spgVacPendingItem *next; /* list link */
38 : } spgVacPendingItem;
39 :
40 : /* Local state for vacuum operations */
41 : typedef struct spgBulkDeleteState
42 : {
43 : /* Parameters passed in to spgvacuumscan */
44 : IndexVacuumInfo *info;
45 : IndexBulkDeleteResult *stats;
46 : IndexBulkDeleteCallback callback;
47 : void *callback_state;
48 :
49 : /* Additional working state */
50 : SpGistState spgstate; /* for SPGiST operations that need one */
51 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 : TransactionId myXmin; /* for detecting newly-added redirects */
53 : BlockNumber lastFilledBlock; /* last non-deletable block */
54 : } spgBulkDeleteState;
55 :
56 :
57 : /*
58 : * Add TID to pendingList, but only if not already present.
59 : *
60 : * Note that new items are always appended at the end of the list; this
61 : * ensures that scans of the list don't miss items added during the scan.
62 : */
63 : static void
64 18 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
65 : {
66 : spgVacPendingItem *pitem;
67 : spgVacPendingItem **listLink;
68 :
69 : /* search the list for pre-existing entry */
70 18 : listLink = &bds->pendingList;
71 36 : while (*listLink != NULL)
72 : {
73 18 : pitem = *listLink;
74 18 : if (ItemPointerEquals(tid, &pitem->tid))
75 0 : return; /* already in list, do nothing */
76 18 : listLink = &pitem->next;
77 : }
78 : /* not there, so append new entry */
79 18 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80 18 : pitem->tid = *tid;
81 18 : pitem->done = false;
82 18 : pitem->next = NULL;
83 18 : *listLink = pitem;
84 : }
85 :
86 : /*
87 : * Clear pendingList
88 : */
89 : static void
90 6 : spgClearPendingList(spgBulkDeleteState *bds)
91 : {
92 : spgVacPendingItem *pitem;
93 : spgVacPendingItem *nitem;
94 :
95 24 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96 : {
97 18 : nitem = pitem->next;
98 : /* All items in list should have been dealt with */
99 : Assert(pitem->done);
100 18 : pfree(pitem);
101 : }
102 6 : bds->pendingList = NULL;
103 6 : }
104 :
105 : /*
106 : * Vacuum a regular (non-root) leaf page
107 : *
108 : * We must delete tuples that are targeted for deletion by the VACUUM,
109 : * but not move any tuples that are referenced by outside links; we assume
110 : * those are the ones that are heads of chains.
111 : *
112 : * If we find a REDIRECT that was made by a concurrently-running transaction,
113 : * we must add its target TID to pendingList. (We don't try to visit the
114 : * target immediately, first because we don't want VACUUM locking more than
115 : * one buffer at a time, and second because the duplicate-filtering logic
116 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 : * loop in the face of continuous concurrent insertions.)
118 : *
119 : * If forPending is true, we are examining the page as a consequence of
120 : * chasing a redirect link, not as part of the normal sequential scan.
121 : * We still vacuum the page normally, but we don't increment the stats
122 : * about live tuples; else we'd double-count those tuples, since the page
123 : * has been or will be visited in the sequential scan as well.
124 : */
125 : static void
126 4286 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 : bool forPending)
128 : {
129 4286 : Page page = BufferGetPage(buffer);
130 : spgxlogVacuumLeaf xlrec;
131 : OffsetNumber toDead[MaxIndexTuplesPerPage];
132 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 : bool deletable[MaxIndexTuplesPerPage + 1];
139 : int nDeletable;
140 : OffsetNumber i,
141 4286 : max = PageGetMaxOffsetNumber(page);
142 :
143 4286 : memset(predecessor, 0, sizeof(predecessor));
144 4286 : memset(deletable, 0, sizeof(deletable));
145 4286 : nDeletable = 0;
146 :
147 : /* Scan page, identify tuples to delete, accumulate stats */
148 794768 : for (i = FirstOffsetNumber; i <= max; i++)
149 : {
150 : SpGistLeafTuple lt;
151 :
152 790482 : lt = (SpGistLeafTuple) PageGetItem(page,
153 790482 : PageGetItemId(page, i));
154 790482 : if (lt->tupstate == SPGIST_LIVE)
155 : {
156 : Assert(ItemPointerIsValid(<->heapPtr));
157 :
158 572532 : if (bds->callback(<->heapPtr, bds->callback_state))
159 : {
160 37948 : bds->stats->tuples_removed += 1;
161 37948 : deletable[i] = true;
162 37948 : nDeletable++;
163 : }
164 : else
165 : {
166 534584 : if (!forPending)
167 533672 : bds->stats->num_index_tuples += 1;
168 : }
169 :
170 : /* Form predecessor map, too */
171 572532 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
172 : {
173 : /* paranoia about corrupted chain links */
174 559636 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 559636 : SGLT_GET_NEXTOFFSET(lt) > max ||
176 559636 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
177 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 : BufferGetBlockNumber(buffer),
179 : RelationGetRelationName(index));
180 559636 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
181 : }
182 : }
183 217950 : else if (lt->tupstate == SPGIST_REDIRECT)
184 : {
185 1202 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
186 :
187 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
188 : Assert(ItemPointerIsValid(&dt->pointer));
189 :
190 : /*
191 : * Add target TID to pending list if the redirection could have
192 : * happened since VACUUM started. (If xid is invalid, assume it
193 : * must have happened before VACUUM started, since REINDEX
194 : * CONCURRENTLY locks out VACUUM.)
195 : *
196 : * Note: we could make a tighter test by seeing if the xid is
197 : * "running" according to the active snapshot; but snapmgr.c
198 : * doesn't currently export a suitable API, and it's not entirely
199 : * clear that a tighter test is worth the cycles anyway.
200 : */
201 1202 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
202 6 : spgAddPendingTID(bds, &dt->pointer);
203 : }
204 : else
205 : {
206 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
207 : }
208 : }
209 :
210 4286 : if (nDeletable == 0)
211 3902 : return; /* nothing more to do */
212 :
213 : /*----------
214 : * Figure out exactly what we have to do. We do this separately from
215 : * actually modifying the page, mainly so that we have a representation
216 : * that can be dumped into WAL and then the replay code can do exactly
217 : * the same thing. The output of this step consists of six arrays
218 : * describing four kinds of operations, to be performed in this order:
219 : *
220 : * toDead[]: tuple numbers to be replaced with DEAD tuples
221 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
222 : * moveSrc[]: tuple numbers that need to be relocated to another offset
223 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
224 : * moveDest[]: new locations for moveSrc tuples
225 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
226 : * chainDest[]: new values of nextOffset for chainSrc members
227 : *
228 : * It's easiest to figure out what we have to do by processing tuple
229 : * chains, so we iterate over all the tuples (not just the deletable
230 : * ones!) to identify chain heads, then chase down each chain and make
231 : * work item entries for deletable tuples within the chain.
232 : *----------
233 : */
234 384 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
235 :
236 82436 : for (i = FirstOffsetNumber; i <= max; i++)
237 : {
238 : SpGistLeafTuple head;
239 : bool interveningDeletable;
240 : OffsetNumber prevLive;
241 : OffsetNumber j;
242 :
243 82052 : head = (SpGistLeafTuple) PageGetItem(page,
244 82052 : PageGetItemId(page, i));
245 82052 : if (head->tupstate != SPGIST_LIVE)
246 24214 : continue; /* can't be a chain member */
247 57838 : if (predecessor[i] != 0)
248 57410 : continue; /* not a chain head */
249 :
250 : /* initialize ... */
251 428 : interveningDeletable = false;
252 428 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
253 :
254 : /* scan down the chain ... */
255 428 : j = SGLT_GET_NEXTOFFSET(head);
256 57838 : while (j != InvalidOffsetNumber)
257 : {
258 : SpGistLeafTuple lt;
259 :
260 57410 : lt = (SpGistLeafTuple) PageGetItem(page,
261 57410 : PageGetItemId(page, j));
262 57410 : if (lt->tupstate != SPGIST_LIVE)
263 : {
264 : /* all tuples in chain should be live */
265 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
266 : lt->tupstate);
267 : }
268 :
269 57410 : if (deletable[j])
270 : {
271 : /* This tuple should be replaced by a placeholder */
272 37596 : toPlaceholder[xlrec.nPlaceholder] = j;
273 37596 : xlrec.nPlaceholder++;
274 : /* previous live tuple's chain link will need an update */
275 37596 : interveningDeletable = true;
276 : }
277 19814 : else if (prevLive == InvalidOffsetNumber)
278 : {
279 : /*
280 : * This is the first live tuple in the chain. It has to move
281 : * to the head position.
282 : */
283 352 : moveSrc[xlrec.nMove] = j;
284 352 : moveDest[xlrec.nMove] = i;
285 352 : xlrec.nMove++;
286 : /* Chain updates will be applied after the move */
287 352 : prevLive = i;
288 352 : interveningDeletable = false;
289 : }
290 : else
291 : {
292 : /*
293 : * Second or later live tuple. Arrange to re-chain it to the
294 : * previous live one, if there was a gap.
295 : */
296 19462 : if (interveningDeletable)
297 : {
298 9772 : chainSrc[xlrec.nChain] = prevLive;
299 9772 : chainDest[xlrec.nChain] = j;
300 9772 : xlrec.nChain++;
301 : }
302 19462 : prevLive = j;
303 19462 : interveningDeletable = false;
304 : }
305 :
306 57410 : j = SGLT_GET_NEXTOFFSET(lt);
307 : }
308 :
309 428 : if (prevLive == InvalidOffsetNumber)
310 : {
311 : /* The chain is entirely removable, so we need a DEAD tuple */
312 0 : toDead[xlrec.nDead] = i;
313 0 : xlrec.nDead++;
314 : }
315 428 : else if (interveningDeletable)
316 : {
317 : /* One or more deletions at end of chain, so close it off */
318 398 : chainSrc[xlrec.nChain] = prevLive;
319 398 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
320 398 : xlrec.nChain++;
321 : }
322 : }
323 :
324 : /* sanity check ... */
325 384 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
326 0 : elog(ERROR, "inconsistent counts of deletable tuples");
327 :
328 : /* Do the updates */
329 384 : START_CRIT_SECTION();
330 :
331 384 : spgPageIndexMultiDelete(&bds->spgstate, page,
332 384 : toDead, xlrec.nDead,
333 : SPGIST_DEAD, SPGIST_DEAD,
334 : InvalidBlockNumber, InvalidOffsetNumber);
335 :
336 384 : spgPageIndexMultiDelete(&bds->spgstate, page,
337 384 : toPlaceholder, xlrec.nPlaceholder,
338 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
339 : InvalidBlockNumber, InvalidOffsetNumber);
340 :
341 : /*
342 : * We implement the move step by swapping the line pointers of the source
343 : * and target tuples, then replacing the newly-source tuples with
344 : * placeholders. This is perhaps unduly friendly with the page data
345 : * representation, but it's fast and doesn't risk page overflow when a
346 : * tuple to be relocated is large.
347 : */
348 736 : for (i = 0; i < xlrec.nMove; i++)
349 : {
350 352 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
351 352 : ItemId idDest = PageGetItemId(page, moveDest[i]);
352 : ItemIdData tmp;
353 :
354 352 : tmp = *idSrc;
355 352 : *idSrc = *idDest;
356 352 : *idDest = tmp;
357 : }
358 :
359 384 : spgPageIndexMultiDelete(&bds->spgstate, page,
360 384 : moveSrc, xlrec.nMove,
361 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
362 : InvalidBlockNumber, InvalidOffsetNumber);
363 :
364 10554 : for (i = 0; i < xlrec.nChain; i++)
365 : {
366 : SpGistLeafTuple lt;
367 :
368 10170 : lt = (SpGistLeafTuple) PageGetItem(page,
369 10170 : PageGetItemId(page, chainSrc[i]));
370 : Assert(lt->tupstate == SPGIST_LIVE);
371 10170 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
372 : }
373 :
374 384 : MarkBufferDirty(buffer);
375 :
376 384 : if (RelationNeedsWAL(index))
377 : {
378 : XLogRecPtr recptr;
379 :
380 384 : XLogBeginInsert();
381 :
382 384 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
383 :
384 384 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
385 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
386 384 : XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
387 384 : XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
388 384 : XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
389 384 : XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
390 384 : XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
391 384 : XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
392 :
393 384 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
394 :
395 384 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
396 :
397 384 : PageSetLSN(page, recptr);
398 : }
399 :
400 384 : END_CRIT_SECTION();
401 : }
402 :
403 : /*
404 : * Vacuum a root page when it is also a leaf
405 : *
406 : * On the root, we just delete any dead leaf tuples; no fancy business
407 : */
408 : static void
409 32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
410 : {
411 32 : Page page = BufferGetPage(buffer);
412 : spgxlogVacuumRoot xlrec;
413 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
414 : OffsetNumber i,
415 32 : max = PageGetMaxOffsetNumber(page);
416 :
417 32 : xlrec.nDelete = 0;
418 :
419 : /* Scan page, identify tuples to delete, accumulate stats */
420 164 : for (i = FirstOffsetNumber; i <= max; i++)
421 : {
422 : SpGistLeafTuple lt;
423 :
424 132 : lt = (SpGistLeafTuple) PageGetItem(page,
425 132 : PageGetItemId(page, i));
426 132 : if (lt->tupstate == SPGIST_LIVE)
427 : {
428 : Assert(ItemPointerIsValid(<->heapPtr));
429 :
430 132 : if (bds->callback(<->heapPtr, bds->callback_state))
431 : {
432 0 : bds->stats->tuples_removed += 1;
433 0 : toDelete[xlrec.nDelete] = i;
434 0 : xlrec.nDelete++;
435 : }
436 : else
437 : {
438 132 : bds->stats->num_index_tuples += 1;
439 : }
440 : }
441 : else
442 : {
443 : /* all tuples on root should be live */
444 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
445 : lt->tupstate);
446 : }
447 : }
448 :
449 32 : if (xlrec.nDelete == 0)
450 32 : return; /* nothing more to do */
451 :
452 : /* Do the update */
453 0 : START_CRIT_SECTION();
454 :
455 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
456 0 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
457 :
458 0 : MarkBufferDirty(buffer);
459 :
460 0 : if (RelationNeedsWAL(index))
461 : {
462 : XLogRecPtr recptr;
463 :
464 0 : XLogBeginInsert();
465 :
466 : /* Prepare WAL record */
467 0 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
468 :
469 0 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
470 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
471 0 : XLogRegisterData(toDelete,
472 0 : sizeof(OffsetNumber) * xlrec.nDelete);
473 :
474 0 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
475 :
476 0 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
477 :
478 0 : PageSetLSN(page, recptr);
479 : }
480 :
481 0 : END_CRIT_SECTION();
482 : }
483 :
484 : /*
485 : * Clean up redirect and placeholder tuples on the given page
486 : *
487 : * Redirect tuples can be marked placeholder once they're old enough.
488 : * Placeholder tuples can be removed if it won't change the offsets of
489 : * non-placeholder ones.
490 : *
491 : * Unlike the routines above, this works on both leaf and inner pages.
492 : */
493 : static void
494 4444 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
495 : {
496 4444 : Page page = BufferGetPage(buffer);
497 4444 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
498 : OffsetNumber i,
499 4444 : max = PageGetMaxOffsetNumber(page),
500 4444 : firstPlaceholder = InvalidOffsetNumber;
501 4444 : bool hasNonPlaceholder = false;
502 4444 : bool hasUpdate = false;
503 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
504 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
505 : spgxlogVacuumRedirect xlrec;
506 : GlobalVisState *vistest;
507 :
508 4444 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
509 4444 : xlrec.nToPlaceholder = 0;
510 4444 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
511 :
512 4444 : vistest = GlobalVisTestFor(heaprel);
513 :
514 4444 : START_CRIT_SECTION();
515 :
516 : /*
517 : * Scan backwards to convert old redirection tuples to placeholder tuples,
518 : * and identify location of last non-placeholder tuple while at it.
519 : */
520 230226 : for (i = max;
521 229812 : i >= FirstOffsetNumber &&
522 229812 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
523 225782 : i--)
524 : {
525 : SpGistDeadTuple dt;
526 :
527 225782 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
528 :
529 : /*
530 : * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
531 : * be any index scans "in flight" to it. Such an index scan would
532 : * have to be in a transaction whose snapshot sees the REDIRECT's XID
533 : * as still running, so comparing the XID against global xmin is a
534 : * conservatively safe test. If the XID is invalid, it must have been
535 : * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
536 : */
537 225782 : if (dt->tupstate == SPGIST_REDIRECT &&
538 2408 : (!TransactionIdIsValid(dt->xid) ||
539 1204 : GlobalVisTestIsRemovableXid(vistest, dt->xid)))
540 : {
541 872 : dt->tupstate = SPGIST_PLACEHOLDER;
542 : Assert(opaque->nRedirection > 0);
543 872 : opaque->nRedirection--;
544 872 : opaque->nPlaceholder++;
545 :
546 : /* remember newest XID among the removed redirects */
547 1126 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
548 254 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
549 618 : xlrec.snapshotConflictHorizon = dt->xid;
550 :
551 872 : ItemPointerSetInvalid(&dt->pointer);
552 :
553 872 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
554 872 : xlrec.nToPlaceholder++;
555 :
556 872 : hasUpdate = true;
557 : }
558 :
559 225782 : if (dt->tupstate == SPGIST_PLACEHOLDER)
560 : {
561 170620 : if (!hasNonPlaceholder)
562 160800 : firstPlaceholder = i;
563 : }
564 : else
565 : {
566 55162 : hasNonPlaceholder = true;
567 : }
568 : }
569 :
570 : /*
571 : * Any placeholder tuples at the end of page can safely be removed. We
572 : * can't remove ones before the last non-placeholder, though, because we
573 : * can't alter the offset numbers of non-placeholder tuples.
574 : */
575 4444 : if (firstPlaceholder != InvalidOffsetNumber)
576 : {
577 : /*
578 : * We do not store this array to rdata because it's easy to recreate.
579 : */
580 163330 : for (i = firstPlaceholder; i <= max; i++)
581 160800 : itemnos[i - firstPlaceholder] = i;
582 :
583 2530 : i = max - firstPlaceholder + 1;
584 : Assert(opaque->nPlaceholder >= i);
585 2530 : opaque->nPlaceholder -= i;
586 :
587 : /* The array is surely sorted, so can use PageIndexMultiDelete */
588 2530 : PageIndexMultiDelete(page, itemnos, i);
589 :
590 2530 : hasUpdate = true;
591 : }
592 :
593 4444 : xlrec.firstPlaceholder = firstPlaceholder;
594 :
595 4444 : if (hasUpdate)
596 2538 : MarkBufferDirty(buffer);
597 :
598 4444 : if (hasUpdate && RelationNeedsWAL(index))
599 : {
600 : XLogRecPtr recptr;
601 :
602 2538 : XLogBeginInsert();
603 :
604 2538 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
605 2538 : XLogRegisterData(itemToPlaceholder,
606 2538 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
607 :
608 2538 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
609 :
610 2538 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
611 :
612 2538 : PageSetLSN(page, recptr);
613 : }
614 :
615 4444 : END_CRIT_SECTION();
616 4444 : }
617 :
618 : /*
619 : * Process one page during a bulkdelete scan
620 : */
621 : static void
622 4570 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
623 : {
624 4570 : Relation index = bds->info->index;
625 4570 : BlockNumber blkno = BufferGetBlockNumber(buffer);
626 : Page page;
627 :
628 4570 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
629 4570 : page = (Page) BufferGetPage(buffer);
630 :
631 4570 : if (PageIsNew(page))
632 : {
633 : /*
634 : * We found an all-zero page, which could happen if the database
635 : * crashed just after extending the file. Recycle it.
636 : */
637 : }
638 4558 : else if (PageIsEmpty(page))
639 : {
640 : /* nothing to do */
641 : }
642 4470 : else if (SpGistPageIsLeaf(page))
643 : {
644 4312 : if (SpGistBlockIsRoot(blkno))
645 : {
646 32 : vacuumLeafRoot(bds, index, buffer);
647 : /* no need for vacuumRedirectAndPlaceholder */
648 : }
649 : else
650 : {
651 4280 : vacuumLeafPage(bds, index, buffer, false);
652 4280 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
653 : }
654 : }
655 : else
656 : {
657 : /* inner page */
658 158 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
659 : }
660 :
661 : /*
662 : * The root pages must never be deleted, nor marked as available in FSM,
663 : * because we don't want them ever returned by a search for a place to put
664 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
665 : * knows about it.
666 : */
667 4570 : if (!SpGistBlockIsRoot(blkno))
668 : {
669 4418 : if (PageIsNew(page) || PageIsEmpty(page))
670 : {
671 64 : RecordFreeIndexPage(index, blkno);
672 64 : bds->stats->pages_deleted++;
673 : }
674 : else
675 : {
676 4354 : SpGistSetLastUsedPage(index, buffer);
677 4354 : bds->lastFilledBlock = blkno;
678 : }
679 : }
680 :
681 4570 : UnlockReleaseBuffer(buffer);
682 4570 : }
683 :
684 : /*
685 : * Process the pending-TID list between pages of the main scan
686 : */
687 : static void
688 6 : spgprocesspending(spgBulkDeleteState *bds)
689 : {
690 6 : Relation index = bds->info->index;
691 : spgVacPendingItem *pitem;
692 : spgVacPendingItem *nitem;
693 : BlockNumber blkno;
694 : Buffer buffer;
695 : Page page;
696 :
697 24 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
698 : {
699 18 : if (pitem->done)
700 6 : continue; /* ignore already-done items */
701 :
702 : /* call vacuum_delay_point while not holding any buffer lock */
703 12 : vacuum_delay_point(false);
704 :
705 : /* examine the referenced page */
706 12 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
707 12 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
708 12 : RBM_NORMAL, bds->info->strategy);
709 12 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
710 12 : page = (Page) BufferGetPage(buffer);
711 :
712 12 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
713 : {
714 : /* Probably shouldn't happen, but ignore it */
715 : }
716 12 : else if (SpGistPageIsLeaf(page))
717 : {
718 6 : if (SpGistBlockIsRoot(blkno))
719 : {
720 : /* this should definitely not happen */
721 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
722 : RelationGetRelationName(index));
723 : }
724 :
725 : /* deal with any deletable tuples */
726 6 : vacuumLeafPage(bds, index, buffer, true);
727 : /* might as well do this while we are here */
728 6 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
729 :
730 6 : SpGistSetLastUsedPage(index, buffer);
731 :
732 : /*
733 : * We can mark as done not only this item, but any later ones
734 : * pointing at the same page, since we vacuumed the whole page.
735 : */
736 6 : pitem->done = true;
737 12 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
738 : {
739 6 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
740 6 : nitem->done = true;
741 : }
742 : }
743 : else
744 : {
745 : /*
746 : * On an inner page, visit the referenced inner tuple and add all
747 : * its downlinks to the pending list. We might have pending items
748 : * for more than one inner tuple on the same page (in fact this is
749 : * pretty likely given the way space allocation works), so get
750 : * them all while we are here.
751 : */
752 24 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
753 : {
754 18 : if (nitem->done)
755 0 : continue;
756 18 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
757 : {
758 : OffsetNumber offset;
759 : SpGistInnerTuple innerTuple;
760 :
761 6 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
762 6 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
763 6 : PageGetItemId(page, offset));
764 6 : if (innerTuple->tupstate == SPGIST_LIVE)
765 : {
766 : SpGistNodeTuple node;
767 : int i;
768 :
769 30 : SGITITERATE(innerTuple, i, node)
770 : {
771 24 : if (ItemPointerIsValid(&node->t_tid))
772 12 : spgAddPendingTID(bds, &node->t_tid);
773 : }
774 : }
775 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
776 : {
777 : /* transfer attention to redirect point */
778 0 : spgAddPendingTID(bds,
779 : &((SpGistDeadTuple) innerTuple)->pointer);
780 : }
781 : else
782 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
783 : innerTuple->tupstate);
784 :
785 6 : nitem->done = true;
786 : }
787 : }
788 : }
789 :
790 12 : UnlockReleaseBuffer(buffer);
791 : }
792 :
793 6 : spgClearPendingList(bds);
794 6 : }
795 :
796 : /*
797 : * Perform a bulkdelete scan
798 : */
799 : static void
800 76 : spgvacuumscan(spgBulkDeleteState *bds)
801 : {
802 76 : Relation index = bds->info->index;
803 : bool needLock;
804 : BlockNumber num_pages;
805 : BlockRangeReadStreamPrivate p;
806 76 : ReadStream *stream = NULL;
807 :
808 : /* Finish setting up spgBulkDeleteState */
809 76 : initSpGistState(&bds->spgstate, index);
810 76 : bds->pendingList = NULL;
811 76 : bds->myXmin = GetActiveSnapshot()->xmin;
812 76 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
813 :
814 : /*
815 : * Reset counts that will be incremented during the scan; needed in case
816 : * of multiple scans during a single VACUUM command
817 : */
818 76 : bds->stats->estimated_count = false;
819 76 : bds->stats->num_index_tuples = 0;
820 76 : bds->stats->pages_deleted = 0;
821 :
822 : /* We can skip locking for new or temp relations */
823 76 : needLock = !RELATION_IS_LOCAL(index);
824 76 : p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
825 :
826 : /*
827 : * It is safe to use batchmode as block_range_read_stream_cb takes no
828 : * locks.
829 : */
830 76 : stream = read_stream_begin_relation(READ_STREAM_FULL |
831 : READ_STREAM_USE_BATCHING,
832 76 : bds->info->strategy,
833 : index,
834 : MAIN_FORKNUM,
835 : block_range_read_stream_cb,
836 : &p,
837 : 0);
838 :
839 : /*
840 : * The outer loop iterates over all index pages except the metapage, in
841 : * physical order (we hope the kernel will cooperate in providing
842 : * read-ahead for speed). It is critical that we visit all leaf pages,
843 : * including ones added after we start the scan, else we might fail to
844 : * delete some deletable tuples. See more extensive comments about this
845 : * in btvacuumscan().
846 : */
847 : for (;;)
848 : {
849 : /* Get the current relation length */
850 152 : if (needLock)
851 152 : LockRelationForExtension(index, ExclusiveLock);
852 152 : num_pages = RelationGetNumberOfBlocks(index);
853 152 : if (needLock)
854 152 : UnlockRelationForExtension(index, ExclusiveLock);
855 :
856 : /* Quit if we've scanned the whole relation */
857 152 : if (p.current_blocknum >= num_pages)
858 76 : break;
859 :
860 76 : p.last_exclusive = num_pages;
861 :
862 : /* Iterate over pages, then loop back to recheck length */
863 : while (true)
864 4570 : {
865 : Buffer buf;
866 :
867 : /* call vacuum_delay_point while not holding any buffer lock */
868 4646 : vacuum_delay_point(false);
869 :
870 4646 : buf = read_stream_next_buffer(stream, NULL);
871 :
872 4646 : if (!BufferIsValid(buf))
873 76 : break;
874 :
875 4570 : spgvacuumpage(bds, buf);
876 :
877 : /* empty the pending-list after each page */
878 4570 : if (bds->pendingList != NULL)
879 6 : spgprocesspending(bds);
880 : }
881 :
882 : Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
883 :
884 : /*
885 : * We have to reset the read stream to use it again. After returning
886 : * InvalidBuffer, the read stream API won't invoke our callback again
887 : * until the stream has been reset.
888 : */
889 76 : read_stream_reset(stream);
890 : }
891 :
892 76 : read_stream_end(stream);
893 :
894 : /* Propagate local lastUsedPages cache to metablock */
895 76 : SpGistUpdateMetaPage(index);
896 :
897 : /*
898 : * If we found any empty pages (and recorded them in the FSM), then
899 : * forcibly update the upper-level FSM pages to ensure that searchers can
900 : * find them. It's possible that the pages were also found during
901 : * previous scans and so this is a waste of time, but it's cheap enough
902 : * relative to scanning the index that it shouldn't matter much, and
903 : * making sure that free pages are available sooner not later seems
904 : * worthwhile.
905 : *
906 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
907 : * all.
908 : */
909 76 : if (bds->stats->pages_deleted > 0)
910 44 : IndexFreeSpaceMapVacuum(index);
911 :
912 : /*
913 : * Truncate index if possible
914 : *
915 : * XXX disabled because it's unsafe due to possible concurrent inserts.
916 : * We'd have to rescan the pages to make sure they're still empty, and it
917 : * doesn't seem worth it. Note that btree doesn't do this either.
918 : *
919 : * Another reason not to truncate is that it could invalidate the cached
920 : * pages-with-freespace pointers in the metapage and other backends'
921 : * relation caches, that is leave them pointing to nonexistent pages.
922 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
923 : * those pointers would be unduly expensive.
924 : */
925 : #ifdef NOT_USED
926 : if (num_pages > bds->lastFilledBlock + 1)
927 : {
928 : BlockNumber lastBlock = num_pages - 1;
929 :
930 : num_pages = bds->lastFilledBlock + 1;
931 : RelationTruncate(index, num_pages);
932 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
933 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
934 : }
935 : #endif
936 :
937 : /* Report final stats */
938 76 : bds->stats->num_pages = num_pages;
939 76 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
940 76 : bds->stats->pages_free = bds->stats->pages_deleted;
941 76 : }
942 :
943 : /*
944 : * Bulk deletion of all index entries pointing to a set of heap tuples.
945 : * The set of target tuples is specified via a callback routine that tells
946 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
947 : *
948 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
949 : */
950 : IndexBulkDeleteResult *
951 8 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
952 : IndexBulkDeleteCallback callback, void *callback_state)
953 : {
954 : spgBulkDeleteState bds;
955 :
956 : /* allocate stats if first time through, else re-use existing struct */
957 8 : if (stats == NULL)
958 8 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
959 8 : bds.info = info;
960 8 : bds.stats = stats;
961 8 : bds.callback = callback;
962 8 : bds.callback_state = callback_state;
963 :
964 8 : spgvacuumscan(&bds);
965 :
966 8 : return stats;
967 : }
968 :
969 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
970 : static bool
971 514826 : dummy_callback(ItemPointer itemptr, void *state)
972 : {
973 514826 : return false;
974 : }
975 :
976 : /*
977 : * Post-VACUUM cleanup.
978 : *
979 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
980 : */
981 : IndexBulkDeleteResult *
982 88 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
983 : {
984 : spgBulkDeleteState bds;
985 :
986 : /* No-op in ANALYZE ONLY mode */
987 88 : if (info->analyze_only)
988 12 : return stats;
989 :
990 : /*
991 : * We don't need to scan the index if there was a preceding bulkdelete
992 : * pass. Otherwise, make a pass that won't delete any live tuples, but
993 : * might still accomplish useful stuff with redirect/placeholder cleanup
994 : * and/or FSM housekeeping, and in any case will provide stats.
995 : */
996 76 : if (stats == NULL)
997 : {
998 68 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
999 68 : bds.info = info;
1000 68 : bds.stats = stats;
1001 68 : bds.callback = dummy_callback;
1002 68 : bds.callback_state = NULL;
1003 :
1004 68 : spgvacuumscan(&bds);
1005 : }
1006 :
1007 : /*
1008 : * It's quite possible for us to be fooled by concurrent tuple moves into
1009 : * double-counting some index tuples, so disbelieve any total that exceeds
1010 : * the underlying heap's count ... if we know that accurately. Otherwise
1011 : * this might just make matters worse.
1012 : */
1013 76 : if (!info->estimated_count)
1014 : {
1015 76 : if (stats->num_index_tuples > info->num_heap_tuples)
1016 4 : stats->num_index_tuples = info->num_heap_tuples;
1017 : }
1018 :
1019 76 : return stats;
1020 : }
|