Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "commands/vacuum.h"
24 : #include "miscadmin.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/indexfsm.h"
27 : #include "storage/lmgr.h"
28 : #include "storage/read_stream.h"
29 : #include "utils/snapmgr.h"
30 :
31 :
32 : /* Entry in pending-list of TIDs we need to revisit */
33 : typedef struct spgVacPendingItem
34 : {
35 : ItemPointerData tid; /* redirection target to visit */
36 : bool done; /* have we dealt with this? */
37 : struct spgVacPendingItem *next; /* list link */
38 : } spgVacPendingItem;
39 :
40 : /* Local state for vacuum operations */
41 : typedef struct spgBulkDeleteState
42 : {
43 : /* Parameters passed in to spgvacuumscan */
44 : IndexVacuumInfo *info;
45 : IndexBulkDeleteResult *stats;
46 : IndexBulkDeleteCallback callback;
47 : void *callback_state;
48 :
49 : /* Additional working state */
50 : SpGistState spgstate; /* for SPGiST operations that need one */
51 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 : TransactionId myXmin; /* for detecting newly-added redirects */
53 : BlockNumber lastFilledBlock; /* last non-deletable block */
54 : } spgBulkDeleteState;
55 :
56 :
57 : /*
58 : * Add TID to pendingList, but only if not already present.
59 : *
60 : * Note that new items are always appended at the end of the list; this
61 : * ensures that scans of the list don't miss items added during the scan.
62 : */
63 : static void
64 51882 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
65 : {
66 : spgVacPendingItem *pitem;
67 : spgVacPendingItem **listLink;
68 :
69 : /* search the list for pre-existing entry */
70 51882 : listLink = &bds->pendingList;
71 4582914 : while (*listLink != NULL)
72 : {
73 4548062 : pitem = *listLink;
74 4548062 : if (ItemPointerEquals(tid, &pitem->tid))
75 17030 : return; /* already in list, do nothing */
76 4531032 : listLink = &pitem->next;
77 : }
78 : /* not there, so append new entry */
79 34852 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80 34852 : pitem->tid = *tid;
81 34852 : pitem->done = false;
82 34852 : pitem->next = NULL;
83 34852 : *listLink = pitem;
84 : }
85 :
86 : /*
87 : * Clear pendingList
88 : */
89 : static void
90 264 : spgClearPendingList(spgBulkDeleteState *bds)
91 : {
92 : spgVacPendingItem *pitem;
93 : spgVacPendingItem *nitem;
94 :
95 35116 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96 : {
97 34852 : nitem = pitem->next;
98 : /* All items in list should have been dealt with */
99 : Assert(pitem->done);
100 34852 : pfree(pitem);
101 : }
102 264 : bds->pendingList = NULL;
103 264 : }
104 :
105 : /*
106 : * Vacuum a regular (non-root) leaf page
107 : *
108 : * We must delete tuples that are targeted for deletion by the VACUUM,
109 : * but not move any tuples that are referenced by outside links; we assume
110 : * those are the ones that are heads of chains.
111 : *
112 : * If we find a REDIRECT that was made by a concurrently-running transaction,
113 : * we must add its target TID to pendingList. (We don't try to visit the
114 : * target immediately, first because we don't want VACUUM locking more than
115 : * one buffer at a time, and second because the duplicate-filtering logic
116 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 : * loop in the face of continuous concurrent insertions.)
118 : *
119 : * If forPending is true, we are examining the page as a consequence of
120 : * chasing a redirect link, not as part of the normal sequential scan.
121 : * We still vacuum the page normally, but we don't increment the stats
122 : * about live tuples; else we'd double-count those tuples, since the page
123 : * has been or will be visited in the sequential scan as well.
124 : */
125 : static void
126 21998 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 : bool forPending)
128 : {
129 21998 : Page page = BufferGetPage(buffer);
130 : spgxlogVacuumLeaf xlrec;
131 : OffsetNumber toDead[MaxIndexTuplesPerPage];
132 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 : bool deletable[MaxIndexTuplesPerPage + 1];
139 : int nDeletable;
140 : OffsetNumber i,
141 21998 : max = PageGetMaxOffsetNumber(page);
142 :
143 21998 : memset(predecessor, 0, sizeof(predecessor));
144 21998 : memset(deletable, 0, sizeof(deletable));
145 21998 : nDeletable = 0;
146 :
147 : /* Scan page, identify tuples to delete, accumulate stats */
148 3541740 : for (i = FirstOffsetNumber; i <= max; i++)
149 : {
150 : SpGistLeafTuple lt;
151 :
152 3519742 : lt = (SpGistLeafTuple) PageGetItem(page,
153 3519742 : PageGetItemId(page, i));
154 3519742 : if (lt->tupstate == SPGIST_LIVE)
155 : {
156 : Assert(ItemPointerIsValid(<->heapPtr));
157 :
158 3262982 : if (bds->callback(<->heapPtr, bds->callback_state))
159 : {
160 69910 : bds->stats->tuples_removed += 1;
161 69910 : deletable[i] = true;
162 69910 : nDeletable++;
163 : }
164 : else
165 : {
166 3193072 : if (!forPending)
167 571964 : bds->stats->num_index_tuples += 1;
168 : }
169 :
170 : /* Form predecessor map, too */
171 3262982 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
172 : {
173 : /* paranoia about corrupted chain links */
174 3231090 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 3231090 : SGLT_GET_NEXTOFFSET(lt) > max ||
176 3231090 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
177 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 : BufferGetBlockNumber(buffer),
179 : RelationGetRelationName(index));
180 3231090 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
181 : }
182 : }
183 256760 : else if (lt->tupstate == SPGIST_REDIRECT)
184 : {
185 18432 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
186 :
187 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
188 : Assert(ItemPointerIsValid(&dt->pointer));
189 :
190 : /*
191 : * Add target TID to pending list if the redirection could have
192 : * happened since VACUUM started. (If xid is invalid, assume it
193 : * must have happened before VACUUM started, since REINDEX
194 : * CONCURRENTLY locks out VACUUM.)
195 : *
196 : * Note: we could make a tighter test by seeing if the xid is
197 : * "running" according to the active snapshot; but snapmgr.c
198 : * doesn't currently export a suitable API, and it's not entirely
199 : * clear that a tighter test is worth the cycles anyway.
200 : */
201 18432 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
202 17294 : spgAddPendingTID(bds, &dt->pointer);
203 : }
204 : else
205 : {
206 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
207 : }
208 : }
209 :
210 21998 : if (nDeletable == 0)
211 21350 : return; /* nothing more to do */
212 :
213 : /*----------
214 : * Figure out exactly what we have to do. We do this separately from
215 : * actually modifying the page, mainly so that we have a representation
216 : * that can be dumped into WAL and then the replay code can do exactly
217 : * the same thing. The output of this step consists of six arrays
218 : * describing four kinds of operations, to be performed in this order:
219 : *
220 : * toDead[]: tuple numbers to be replaced with DEAD tuples
221 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
222 : * moveSrc[]: tuple numbers that need to be relocated to another offset
223 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
224 : * moveDest[]: new locations for moveSrc tuples
225 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
226 : * chainDest[]: new values of nextOffset for chainSrc members
227 : *
228 : * It's easiest to figure out what we have to do by processing tuple
229 : * chains, so we iterate over all the tuples (not just the deletable
230 : * ones!) to identify chain heads, then chase down each chain and make
231 : * work item entries for deletable tuples within the chain.
232 : *----------
233 : */
234 648 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
235 :
236 142216 : for (i = FirstOffsetNumber; i <= max; i++)
237 : {
238 : SpGistLeafTuple head;
239 : bool interveningDeletable;
240 : OffsetNumber prevLive;
241 : OffsetNumber j;
242 :
243 141568 : head = (SpGistLeafTuple) PageGetItem(page,
244 141568 : PageGetItemId(page, i));
245 141568 : if (head->tupstate != SPGIST_LIVE)
246 43710 : continue; /* can't be a chain member */
247 97858 : if (predecessor[i] != 0)
248 97164 : continue; /* not a chain head */
249 :
250 : /* initialize ... */
251 694 : interveningDeletable = false;
252 694 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
253 :
254 : /* scan down the chain ... */
255 694 : j = SGLT_GET_NEXTOFFSET(head);
256 97858 : while (j != InvalidOffsetNumber)
257 : {
258 : SpGistLeafTuple lt;
259 :
260 97164 : lt = (SpGistLeafTuple) PageGetItem(page,
261 97164 : PageGetItemId(page, j));
262 97164 : if (lt->tupstate != SPGIST_LIVE)
263 : {
264 : /* all tuples in chain should be live */
265 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
266 : lt->tupstate);
267 : }
268 :
269 97164 : if (deletable[j])
270 : {
271 : /* This tuple should be replaced by a placeholder */
272 69284 : toPlaceholder[xlrec.nPlaceholder] = j;
273 69284 : xlrec.nPlaceholder++;
274 : /* previous live tuple's chain link will need an update */
275 69284 : interveningDeletable = true;
276 : }
277 27880 : else if (prevLive == InvalidOffsetNumber)
278 : {
279 : /*
280 : * This is the first live tuple in the chain. It has to move
281 : * to the head position.
282 : */
283 626 : moveSrc[xlrec.nMove] = j;
284 626 : moveDest[xlrec.nMove] = i;
285 626 : xlrec.nMove++;
286 : /* Chain updates will be applied after the move */
287 626 : prevLive = i;
288 626 : interveningDeletable = false;
289 : }
290 : else
291 : {
292 : /*
293 : * Second or later live tuple. Arrange to re-chain it to the
294 : * previous live one, if there was a gap.
295 : */
296 27254 : if (interveningDeletable)
297 : {
298 19502 : chainSrc[xlrec.nChain] = prevLive;
299 19502 : chainDest[xlrec.nChain] = j;
300 19502 : xlrec.nChain++;
301 : }
302 27254 : prevLive = j;
303 27254 : interveningDeletable = false;
304 : }
305 :
306 97164 : j = SGLT_GET_NEXTOFFSET(lt);
307 : }
308 :
309 694 : if (prevLive == InvalidOffsetNumber)
310 : {
311 : /* The chain is entirely removable, so we need a DEAD tuple */
312 0 : toDead[xlrec.nDead] = i;
313 0 : xlrec.nDead++;
314 : }
315 694 : else if (interveningDeletable)
316 : {
317 : /* One or more deletions at end of chain, so close it off */
318 664 : chainSrc[xlrec.nChain] = prevLive;
319 664 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
320 664 : xlrec.nChain++;
321 : }
322 : }
323 :
324 : /* sanity check ... */
325 648 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
326 0 : elog(ERROR, "inconsistent counts of deletable tuples");
327 :
328 : /* Do the updates */
329 648 : START_CRIT_SECTION();
330 :
331 648 : spgPageIndexMultiDelete(&bds->spgstate, page,
332 648 : toDead, xlrec.nDead,
333 : SPGIST_DEAD, SPGIST_DEAD,
334 : InvalidBlockNumber, InvalidOffsetNumber);
335 :
336 648 : spgPageIndexMultiDelete(&bds->spgstate, page,
337 648 : toPlaceholder, xlrec.nPlaceholder,
338 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
339 : InvalidBlockNumber, InvalidOffsetNumber);
340 :
341 : /*
342 : * We implement the move step by swapping the line pointers of the source
343 : * and target tuples, then replacing the newly-source tuples with
344 : * placeholders. This is perhaps unduly friendly with the page data
345 : * representation, but it's fast and doesn't risk page overflow when a
346 : * tuple to be relocated is large.
347 : */
348 1274 : for (i = 0; i < xlrec.nMove; i++)
349 : {
350 626 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
351 626 : ItemId idDest = PageGetItemId(page, moveDest[i]);
352 : ItemIdData tmp;
353 :
354 626 : tmp = *idSrc;
355 626 : *idSrc = *idDest;
356 626 : *idDest = tmp;
357 : }
358 :
359 648 : spgPageIndexMultiDelete(&bds->spgstate, page,
360 648 : moveSrc, xlrec.nMove,
361 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
362 : InvalidBlockNumber, InvalidOffsetNumber);
363 :
364 20814 : for (i = 0; i < xlrec.nChain; i++)
365 : {
366 : SpGistLeafTuple lt;
367 :
368 20166 : lt = (SpGistLeafTuple) PageGetItem(page,
369 20166 : PageGetItemId(page, chainSrc[i]));
370 : Assert(lt->tupstate == SPGIST_LIVE);
371 20166 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
372 : }
373 :
374 648 : MarkBufferDirty(buffer);
375 :
376 648 : if (RelationNeedsWAL(index))
377 : {
378 : XLogRecPtr recptr;
379 :
380 648 : XLogBeginInsert();
381 :
382 648 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
383 :
384 648 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
385 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
386 648 : XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
387 648 : XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
388 648 : XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
389 648 : XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
390 648 : XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
391 648 : XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
392 :
393 648 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
394 :
395 648 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
396 :
397 648 : PageSetLSN(page, recptr);
398 : }
399 :
400 648 : END_CRIT_SECTION();
401 : }
402 :
403 : /*
404 : * Vacuum a root page when it is also a leaf
405 : *
406 : * On the root, we just delete any dead leaf tuples; no fancy business
407 : */
408 : static void
409 32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
410 : {
411 32 : Page page = BufferGetPage(buffer);
412 : spgxlogVacuumRoot xlrec;
413 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
414 : OffsetNumber i,
415 32 : max = PageGetMaxOffsetNumber(page);
416 :
417 32 : xlrec.nDelete = 0;
418 :
419 : /* Scan page, identify tuples to delete, accumulate stats */
420 164 : for (i = FirstOffsetNumber; i <= max; i++)
421 : {
422 : SpGistLeafTuple lt;
423 :
424 132 : lt = (SpGistLeafTuple) PageGetItem(page,
425 132 : PageGetItemId(page, i));
426 132 : if (lt->tupstate == SPGIST_LIVE)
427 : {
428 : Assert(ItemPointerIsValid(<->heapPtr));
429 :
430 132 : if (bds->callback(<->heapPtr, bds->callback_state))
431 : {
432 0 : bds->stats->tuples_removed += 1;
433 0 : toDelete[xlrec.nDelete] = i;
434 0 : xlrec.nDelete++;
435 : }
436 : else
437 : {
438 132 : bds->stats->num_index_tuples += 1;
439 : }
440 : }
441 : else
442 : {
443 : /* all tuples on root should be live */
444 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
445 : lt->tupstate);
446 : }
447 : }
448 :
449 32 : if (xlrec.nDelete == 0)
450 32 : return; /* nothing more to do */
451 :
452 : /* Do the update */
453 0 : START_CRIT_SECTION();
454 :
455 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
456 0 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
457 :
458 0 : MarkBufferDirty(buffer);
459 :
460 0 : if (RelationNeedsWAL(index))
461 : {
462 : XLogRecPtr recptr;
463 :
464 0 : XLogBeginInsert();
465 :
466 : /* Prepare WAL record */
467 0 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
468 :
469 0 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
470 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
471 0 : XLogRegisterData(toDelete,
472 0 : sizeof(OffsetNumber) * xlrec.nDelete);
473 :
474 0 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
475 :
476 0 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
477 :
478 0 : PageSetLSN(page, recptr);
479 : }
480 :
481 0 : END_CRIT_SECTION();
482 : }
483 :
484 : /*
485 : * Clean up redirect and placeholder tuples on the given page
486 : *
487 : * Redirect tuples can be marked placeholder once they're old enough.
488 : * Placeholder tuples can be removed if it won't change the offsets of
489 : * non-placeholder ones.
490 : *
491 : * Unlike the routines above, this works on both leaf and inner pages.
492 : */
493 : static void
494 22172 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
495 : {
496 22172 : Page page = BufferGetPage(buffer);
497 22172 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
498 : OffsetNumber i,
499 22172 : max = PageGetMaxOffsetNumber(page),
500 22172 : firstPlaceholder = InvalidOffsetNumber;
501 22172 : bool hasNonPlaceholder = false;
502 22172 : bool hasUpdate = false;
503 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
504 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
505 : spgxlogVacuumRedirect xlrec;
506 : GlobalVisState *vistest;
507 :
508 22172 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
509 22172 : xlrec.nToPlaceholder = 0;
510 22172 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
511 :
512 22172 : vistest = GlobalVisTestFor(heaprel);
513 :
514 22172 : START_CRIT_SECTION();
515 :
516 : /*
517 : * Scan backwards to convert old redirection tuples to placeholder tuples,
518 : * and identify location of last non-placeholder tuple while at it.
519 : */
520 2909316 : for (i = max;
521 2891620 : i >= FirstOffsetNumber &&
522 2891620 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
523 2887144 : i--)
524 : {
525 : SpGistDeadTuple dt;
526 :
527 2887144 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
528 :
529 : /*
530 : * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
531 : * be any index scans "in flight" to it. Such an index scan would
532 : * have to be in a transaction whose snapshot sees the REDIRECT's XID
533 : * as still running, so comparing the XID against global xmin is a
534 : * conservatively safe test. If the XID is invalid, it must have been
535 : * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
536 : */
537 2887144 : if (dt->tupstate == SPGIST_REDIRECT &&
538 36872 : (!TransactionIdIsValid(dt->xid) ||
539 18436 : GlobalVisTestIsRemovableXid(vistest, dt->xid)))
540 : {
541 838 : dt->tupstate = SPGIST_PLACEHOLDER;
542 : Assert(opaque->nRedirection > 0);
543 838 : opaque->nRedirection--;
544 838 : opaque->nPlaceholder++;
545 :
546 : /* remember newest XID among the removed redirects */
547 1264 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
548 426 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
549 412 : xlrec.snapshotConflictHorizon = dt->xid;
550 :
551 838 : ItemPointerSetInvalid(&dt->pointer);
552 :
553 838 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
554 838 : xlrec.nToPlaceholder++;
555 :
556 838 : hasUpdate = true;
557 : }
558 :
559 2887144 : if (dt->tupstate == SPGIST_PLACEHOLDER)
560 : {
561 189886 : if (!hasNonPlaceholder)
562 182800 : firstPlaceholder = i;
563 : }
564 : else
565 : {
566 2697258 : hasNonPlaceholder = true;
567 : }
568 : }
569 :
570 : /*
571 : * Any placeholder tuples at the end of page can safely be removed. We
572 : * can't remove ones before the last non-placeholder, though, because we
573 : * can't alter the offset numbers of non-placeholder tuples.
574 : */
575 22172 : if (firstPlaceholder != InvalidOffsetNumber)
576 : {
577 : /*
578 : * We do not store this array to rdata because it's easy to recreate.
579 : */
580 185668 : for (i = firstPlaceholder; i <= max; i++)
581 182800 : itemnos[i - firstPlaceholder] = i;
582 :
583 2868 : i = max - firstPlaceholder + 1;
584 : Assert(opaque->nPlaceholder >= i);
585 2868 : opaque->nPlaceholder -= i;
586 :
587 : /* The array is surely sorted, so can use PageIndexMultiDelete */
588 2868 : PageIndexMultiDelete(page, itemnos, i);
589 :
590 2868 : hasUpdate = true;
591 : }
592 :
593 22172 : xlrec.firstPlaceholder = firstPlaceholder;
594 :
595 22172 : if (hasUpdate)
596 2878 : MarkBufferDirty(buffer);
597 :
598 22172 : if (hasUpdate && RelationNeedsWAL(index))
599 : {
600 : XLogRecPtr recptr;
601 :
602 2878 : XLogBeginInsert();
603 :
604 2878 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
605 2878 : XLogRegisterData(itemToPlaceholder,
606 2878 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
607 :
608 2878 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
609 :
610 2878 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
611 :
612 2878 : PageSetLSN(page, recptr);
613 : }
614 :
615 22172 : END_CRIT_SECTION();
616 22172 : }
617 :
618 : /*
619 : * Process one page during a bulkdelete scan
620 : */
621 : static void
622 5024 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
623 : {
624 5024 : Relation index = bds->info->index;
625 5024 : BlockNumber blkno = BufferGetBlockNumber(buffer);
626 : Page page;
627 :
628 5024 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
629 5024 : page = (Page) BufferGetPage(buffer);
630 :
631 5024 : if (PageIsNew(page))
632 : {
633 : /*
634 : * We found an all-zero page, which could happen if the database
635 : * crashed just after extending the file. Recycle it.
636 : */
637 : }
638 5010 : else if (PageIsEmpty(page))
639 : {
640 : /* nothing to do */
641 : }
642 4910 : else if (SpGistPageIsLeaf(page))
643 : {
644 4736 : if (SpGistBlockIsRoot(blkno))
645 : {
646 32 : vacuumLeafRoot(bds, index, buffer);
647 : /* no need for vacuumRedirectAndPlaceholder */
648 : }
649 : else
650 : {
651 4704 : vacuumLeafPage(bds, index, buffer, false);
652 4704 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
653 : }
654 : }
655 : else
656 : {
657 : /* inner page */
658 174 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
659 : }
660 :
661 : /*
662 : * The root pages must never be deleted, nor marked as available in FSM,
663 : * because we don't want them ever returned by a search for a place to put
664 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
665 : * knows about it.
666 : */
667 5024 : if (!SpGistBlockIsRoot(blkno))
668 : {
669 4860 : if (PageIsNew(page) || PageIsEmpty(page))
670 : {
671 82 : RecordFreeIndexPage(index, blkno);
672 82 : bds->stats->pages_deleted++;
673 : }
674 : else
675 : {
676 4778 : SpGistSetLastUsedPage(index, buffer);
677 4778 : bds->lastFilledBlock = blkno;
678 : }
679 : }
680 :
681 5024 : UnlockReleaseBuffer(buffer);
682 5024 : }
683 :
684 : /*
685 : * Process the pending-TID list between pages of the main scan
686 : */
687 : static void
688 264 : spgprocesspending(spgBulkDeleteState *bds)
689 : {
690 264 : Relation index = bds->info->index;
691 : spgVacPendingItem *pitem;
692 : spgVacPendingItem *nitem;
693 : BlockNumber blkno;
694 : Buffer buffer;
695 : Page page;
696 :
697 35116 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
698 : {
699 34852 : if (pitem->done)
700 17294 : continue; /* ignore already-done items */
701 :
702 : /* call vacuum_delay_point while not holding any buffer lock */
703 17558 : vacuum_delay_point(false);
704 :
705 : /* examine the referenced page */
706 17558 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
707 17558 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
708 17558 : RBM_NORMAL, bds->info->strategy);
709 17558 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
710 17558 : page = (Page) BufferGetPage(buffer);
711 :
712 17558 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
713 : {
714 : /* Probably shouldn't happen, but ignore it */
715 : }
716 17558 : else if (SpGistPageIsLeaf(page))
717 : {
718 17294 : if (SpGistBlockIsRoot(blkno))
719 : {
720 : /* this should definitely not happen */
721 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
722 : RelationGetRelationName(index));
723 : }
724 :
725 : /* deal with any deletable tuples */
726 17294 : vacuumLeafPage(bds, index, buffer, true);
727 : /* might as well do this while we are here */
728 17294 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
729 :
730 17294 : SpGistSetLastUsedPage(index, buffer);
731 :
732 : /*
733 : * We can mark as done not only this item, but any later ones
734 : * pointing at the same page, since we vacuumed the whole page.
735 : */
736 17294 : pitem->done = true;
737 1516458 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
738 : {
739 1499164 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
740 264 : nitem->done = true;
741 : }
742 : }
743 : else
744 : {
745 : /*
746 : * On an inner page, visit the referenced inner tuple and add all
747 : * its downlinks to the pending list. We might have pending items
748 : * for more than one inner tuple on the same page (in fact this is
749 : * pretty likely given the way space allocation works), so get
750 : * them all while we are here.
751 : */
752 35116 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
753 : {
754 34852 : if (nitem->done)
755 0 : continue;
756 34852 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
757 : {
758 : OffsetNumber offset;
759 : SpGistInnerTuple innerTuple;
760 :
761 17294 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
762 17294 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
763 17294 : PageGetItemId(page, offset));
764 17294 : if (innerTuple->tupstate == SPGIST_LIVE)
765 : {
766 : SpGistNodeTuple node;
767 : int i;
768 :
769 86470 : SGITITERATE(innerTuple, i, node)
770 : {
771 69176 : if (ItemPointerIsValid(&node->t_tid))
772 34588 : spgAddPendingTID(bds, &node->t_tid);
773 : }
774 : }
775 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
776 : {
777 : /* transfer attention to redirect point */
778 0 : spgAddPendingTID(bds,
779 : &((SpGistDeadTuple) innerTuple)->pointer);
780 : }
781 : else
782 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
783 : innerTuple->tupstate);
784 :
785 17294 : nitem->done = true;
786 : }
787 : }
788 : }
789 :
790 17558 : UnlockReleaseBuffer(buffer);
791 : }
792 :
793 264 : spgClearPendingList(bds);
794 264 : }
795 :
796 : /*
797 : * Perform a bulkdelete scan
798 : */
799 : static void
800 82 : spgvacuumscan(spgBulkDeleteState *bds)
801 : {
802 82 : Relation index = bds->info->index;
803 : bool needLock;
804 : BlockNumber num_pages;
805 : BlockRangeReadStreamPrivate p;
806 82 : ReadStream *stream = NULL;
807 :
808 : /* Finish setting up spgBulkDeleteState */
809 82 : initSpGistState(&bds->spgstate, index);
810 82 : bds->pendingList = NULL;
811 82 : bds->myXmin = GetActiveSnapshot()->xmin;
812 82 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
813 :
814 : /*
815 : * Reset counts that will be incremented during the scan; needed in case
816 : * of multiple scans during a single VACUUM command
817 : */
818 82 : bds->stats->estimated_count = false;
819 82 : bds->stats->num_index_tuples = 0;
820 82 : bds->stats->pages_deleted = 0;
821 :
822 : /* We can skip locking for new or temp relations */
823 82 : needLock = !RELATION_IS_LOCAL(index);
824 82 : p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
825 :
826 : /*
827 : * It is safe to use batchmode as block_range_read_stream_cb takes no
828 : * locks.
829 : */
830 82 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
831 : READ_STREAM_FULL |
832 : READ_STREAM_USE_BATCHING,
833 82 : bds->info->strategy,
834 : index,
835 : MAIN_FORKNUM,
836 : block_range_read_stream_cb,
837 : &p,
838 : 0);
839 :
840 : /*
841 : * The outer loop iterates over all index pages except the metapage, in
842 : * physical order (we hope the kernel will cooperate in providing
843 : * read-ahead for speed). It is critical that we visit all leaf pages,
844 : * including ones added after we start the scan, else we might fail to
845 : * delete some deletable tuples. See more extensive comments about this
846 : * in btvacuumscan().
847 : */
848 : for (;;)
849 : {
850 : /* Get the current relation length */
851 164 : if (needLock)
852 164 : LockRelationForExtension(index, ExclusiveLock);
853 164 : num_pages = RelationGetNumberOfBlocks(index);
854 164 : if (needLock)
855 164 : UnlockRelationForExtension(index, ExclusiveLock);
856 :
857 : /* Quit if we've scanned the whole relation */
858 164 : if (p.current_blocknum >= num_pages)
859 82 : break;
860 :
861 82 : p.last_exclusive = num_pages;
862 :
863 : /* Iterate over pages, then loop back to recheck length */
864 : while (true)
865 5024 : {
866 : Buffer buf;
867 :
868 : /* call vacuum_delay_point while not holding any buffer lock */
869 5106 : vacuum_delay_point(false);
870 :
871 5106 : buf = read_stream_next_buffer(stream, NULL);
872 :
873 5106 : if (!BufferIsValid(buf))
874 82 : break;
875 :
876 5024 : spgvacuumpage(bds, buf);
877 :
878 : /* empty the pending-list after each page */
879 5024 : if (bds->pendingList != NULL)
880 264 : spgprocesspending(bds);
881 : }
882 :
883 : /*
884 : * We have to reset the read stream to use it again. After returning
885 : * InvalidBuffer, the read stream API won't invoke our callback again
886 : * until the stream has been reset.
887 : */
888 82 : read_stream_reset(stream);
889 : }
890 :
891 82 : read_stream_end(stream);
892 :
893 : /* Propagate local lastUsedPages cache to metablock */
894 82 : SpGistUpdateMetaPage(index);
895 :
896 : /*
897 : * If we found any empty pages (and recorded them in the FSM), then
898 : * forcibly update the upper-level FSM pages to ensure that searchers can
899 : * find them. It's possible that the pages were also found during
900 : * previous scans and so this is a waste of time, but it's cheap enough
901 : * relative to scanning the index that it shouldn't matter much, and
902 : * making sure that free pages are available sooner not later seems
903 : * worthwhile.
904 : *
905 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
906 : * all.
907 : */
908 82 : if (bds->stats->pages_deleted > 0)
909 50 : IndexFreeSpaceMapVacuum(index);
910 :
911 : /*
912 : * Truncate index if possible
913 : *
914 : * XXX disabled because it's unsafe due to possible concurrent inserts.
915 : * We'd have to rescan the pages to make sure they're still empty, and it
916 : * doesn't seem worth it. Note that btree doesn't do this either.
917 : *
918 : * Another reason not to truncate is that it could invalidate the cached
919 : * pages-with-freespace pointers in the metapage and other backends'
920 : * relation caches, that is leave them pointing to nonexistent pages.
921 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
922 : * those pointers would be unduly expensive.
923 : */
924 : #ifdef NOT_USED
925 : if (num_pages > bds->lastFilledBlock + 1)
926 : {
927 : BlockNumber lastBlock = num_pages - 1;
928 :
929 : num_pages = bds->lastFilledBlock + 1;
930 : RelationTruncate(index, num_pages);
931 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
932 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
933 : }
934 : #endif
935 :
936 : /* Report final stats */
937 82 : bds->stats->num_pages = num_pages;
938 82 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
939 82 : bds->stats->pages_free = bds->stats->pages_deleted;
940 82 : }
941 :
942 : /*
943 : * Bulk deletion of all index entries pointing to a set of heap tuples.
944 : * The set of target tuples is specified via a callback routine that tells
945 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
946 : *
947 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
948 : */
949 : IndexBulkDeleteResult *
950 10 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
951 : IndexBulkDeleteCallback callback, void *callback_state)
952 : {
953 : spgBulkDeleteState bds;
954 :
955 : /* allocate stats if first time through, else re-use existing struct */
956 10 : if (stats == NULL)
957 10 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
958 10 : bds.info = info;
959 10 : bds.stats = stats;
960 10 : bds.callback = callback;
961 10 : bds.callback_state = callback_state;
962 :
963 10 : spgvacuumscan(&bds);
964 :
965 10 : return stats;
966 : }
967 :
968 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
969 : static bool
970 3165256 : dummy_callback(ItemPointer itemptr, void *state)
971 : {
972 3165256 : return false;
973 : }
974 :
975 : /*
976 : * Post-VACUUM cleanup.
977 : *
978 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
979 : */
980 : IndexBulkDeleteResult *
981 106 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
982 : {
983 : spgBulkDeleteState bds;
984 :
985 : /* No-op in ANALYZE ONLY mode */
986 106 : if (info->analyze_only)
987 24 : return stats;
988 :
989 : /*
990 : * We don't need to scan the index if there was a preceding bulkdelete
991 : * pass. Otherwise, make a pass that won't delete any live tuples, but
992 : * might still accomplish useful stuff with redirect/placeholder cleanup
993 : * and/or FSM housekeeping, and in any case will provide stats.
994 : */
995 82 : if (stats == NULL)
996 : {
997 72 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
998 72 : bds.info = info;
999 72 : bds.stats = stats;
1000 72 : bds.callback = dummy_callback;
1001 72 : bds.callback_state = NULL;
1002 :
1003 72 : spgvacuumscan(&bds);
1004 : }
1005 :
1006 : /*
1007 : * It's quite possible for us to be fooled by concurrent tuple moves into
1008 : * double-counting some index tuples, so disbelieve any total that exceeds
1009 : * the underlying heap's count ... if we know that accurately. Otherwise
1010 : * this might just make matters worse.
1011 : */
1012 82 : if (!info->estimated_count)
1013 : {
1014 82 : if (stats->num_index_tuples > info->num_heap_tuples)
1015 4 : stats->num_index_tuples = info->num_heap_tuples;
1016 : }
1017 :
1018 82 : return stats;
1019 : }
|