Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "commands/vacuum.h"
24 : #include "miscadmin.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/indexfsm.h"
27 : #include "storage/lmgr.h"
28 : #include "utils/snapmgr.h"
29 :
30 :
31 : /* Entry in pending-list of TIDs we need to revisit */
32 : typedef struct spgVacPendingItem
33 : {
34 : ItemPointerData tid; /* redirection target to visit */
35 : bool done; /* have we dealt with this? */
36 : struct spgVacPendingItem *next; /* list link */
37 : } spgVacPendingItem;
38 :
39 : /* Local state for vacuum operations */
40 : typedef struct spgBulkDeleteState
41 : {
42 : /* Parameters passed in to spgvacuumscan */
43 : IndexVacuumInfo *info;
44 : IndexBulkDeleteResult *stats;
45 : IndexBulkDeleteCallback callback;
46 : void *callback_state;
47 :
48 : /* Additional working state */
49 : SpGistState spgstate; /* for SPGiST operations that need one */
50 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
51 : TransactionId myXmin; /* for detecting newly-added redirects */
52 : BlockNumber lastFilledBlock; /* last non-deletable block */
53 : } spgBulkDeleteState;
54 :
55 :
56 : /*
57 : * Add TID to pendingList, but only if not already present.
58 : *
59 : * Note that new items are always appended at the end of the list; this
60 : * ensures that scans of the list don't miss items added during the scan.
61 : */
62 : static void
63 51888 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
64 : {
65 : spgVacPendingItem *pitem;
66 : spgVacPendingItem **listLink;
67 :
68 : /* search the list for pre-existing entry */
69 51888 : listLink = &bds->pendingList;
70 4582926 : while (*listLink != NULL)
71 : {
72 4548068 : pitem = *listLink;
73 4548068 : if (ItemPointerEquals(tid, &pitem->tid))
74 17030 : return; /* already in list, do nothing */
75 4531038 : listLink = &pitem->next;
76 : }
77 : /* not there, so append new entry */
78 34858 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
79 34858 : pitem->tid = *tid;
80 34858 : pitem->done = false;
81 34858 : pitem->next = NULL;
82 34858 : *listLink = pitem;
83 : }
84 :
85 : /*
86 : * Clear pendingList
87 : */
88 : static void
89 266 : spgClearPendingList(spgBulkDeleteState *bds)
90 : {
91 : spgVacPendingItem *pitem;
92 : spgVacPendingItem *nitem;
93 :
94 35124 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
95 : {
96 34858 : nitem = pitem->next;
97 : /* All items in list should have been dealt with */
98 : Assert(pitem->done);
99 34858 : pfree(pitem);
100 : }
101 266 : bds->pendingList = NULL;
102 266 : }
103 :
104 : /*
105 : * Vacuum a regular (non-root) leaf page
106 : *
107 : * We must delete tuples that are targeted for deletion by the VACUUM,
108 : * but not move any tuples that are referenced by outside links; we assume
109 : * those are the ones that are heads of chains.
110 : *
111 : * If we find a REDIRECT that was made by a concurrently-running transaction,
112 : * we must add its target TID to pendingList. (We don't try to visit the
113 : * target immediately, first because we don't want VACUUM locking more than
114 : * one buffer at a time, and second because the duplicate-filtering logic
115 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
116 : * loop in the face of continuous concurrent insertions.)
117 : *
118 : * If forPending is true, we are examining the page as a consequence of
119 : * chasing a redirect link, not as part of the normal sequential scan.
120 : * We still vacuum the page normally, but we don't increment the stats
121 : * about live tuples; else we'd double-count those tuples, since the page
122 : * has been or will be visited in the sequential scan as well.
123 : */
124 : static void
125 21088 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
126 : bool forPending)
127 : {
128 21088 : Page page = BufferGetPage(buffer);
129 : spgxlogVacuumLeaf xlrec;
130 : OffsetNumber toDead[MaxIndexTuplesPerPage];
131 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
132 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
133 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
134 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
135 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
136 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
137 : bool deletable[MaxIndexTuplesPerPage + 1];
138 : int nDeletable;
139 : OffsetNumber i,
140 21088 : max = PageGetMaxOffsetNumber(page);
141 :
142 21088 : memset(predecessor, 0, sizeof(predecessor));
143 21088 : memset(deletable, 0, sizeof(deletable));
144 21088 : nDeletable = 0;
145 :
146 : /* Scan page, identify tuples to delete, accumulate stats */
147 3345310 : for (i = FirstOffsetNumber; i <= max; i++)
148 : {
149 : SpGistLeafTuple lt;
150 :
151 3324222 : lt = (SpGistLeafTuple) PageGetItem(page,
152 : PageGetItemId(page, i));
153 3324222 : if (lt->tupstate == SPGIST_LIVE)
154 : {
155 : Assert(ItemPointerIsValid(<->heapPtr));
156 :
157 3118616 : if (bds->callback(<->heapPtr, bds->callback_state))
158 : {
159 7858 : bds->stats->tuples_removed += 1;
160 7858 : deletable[i] = true;
161 7858 : nDeletable++;
162 : }
163 : else
164 : {
165 3110758 : if (!forPending)
166 489346 : bds->stats->num_index_tuples += 1;
167 : }
168 :
169 : /* Form predecessor map, too */
170 3118616 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
171 : {
172 : /* paranoia about corrupted chain links */
173 3089750 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
174 3089750 : SGLT_GET_NEXTOFFSET(lt) > max ||
175 3089750 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
176 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
177 : BufferGetBlockNumber(buffer),
178 : RelationGetRelationName(index));
179 3089750 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
180 : }
181 : }
182 205606 : else if (lt->tupstate == SPGIST_REDIRECT)
183 : {
184 17968 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
185 :
186 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
187 : Assert(ItemPointerIsValid(&dt->pointer));
188 :
189 : /*
190 : * Add target TID to pending list if the redirection could have
191 : * happened since VACUUM started. (If xid is invalid, assume it
192 : * must have happened before VACUUM started, since REINDEX
193 : * CONCURRENTLY locks out VACUUM.)
194 : *
195 : * Note: we could make a tighter test by seeing if the xid is
196 : * "running" according to the active snapshot; but snapmgr.c
197 : * doesn't currently export a suitable API, and it's not entirely
198 : * clear that a tighter test is worth the cycles anyway.
199 : */
200 17968 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
201 17296 : spgAddPendingTID(bds, &dt->pointer);
202 : }
203 : else
204 : {
205 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
206 : }
207 : }
208 :
209 21088 : if (nDeletable == 0)
210 20970 : return; /* nothing more to do */
211 :
212 : /*----------
213 : * Figure out exactly what we have to do. We do this separately from
214 : * actually modifying the page, mainly so that we have a representation
215 : * that can be dumped into WAL and then the replay code can do exactly
216 : * the same thing. The output of this step consists of six arrays
217 : * describing four kinds of operations, to be performed in this order:
218 : *
219 : * toDead[]: tuple numbers to be replaced with DEAD tuples
220 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
221 : * moveSrc[]: tuple numbers that need to be relocated to another offset
222 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
223 : * moveDest[]: new locations for moveSrc tuples
224 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
225 : * chainDest[]: new values of nextOffset for chainSrc members
226 : *
227 : * It's easiest to figure out what we have to do by processing tuple
228 : * chains, so we iterate over all the tuples (not just the deletable
229 : * ones!) to identify chain heads, then chase down each chain and make
230 : * work item entries for deletable tuples within the chain.
231 : *----------
232 : */
233 118 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
234 :
235 20218 : for (i = FirstOffsetNumber; i <= max; i++)
236 : {
237 : SpGistLeafTuple head;
238 : bool interveningDeletable;
239 : OffsetNumber prevLive;
240 : OffsetNumber j;
241 :
242 20100 : head = (SpGistLeafTuple) PageGetItem(page,
243 : PageGetItemId(page, i));
244 20100 : if (head->tupstate != SPGIST_LIVE)
245 2298 : continue; /* can't be a chain member */
246 17802 : if (predecessor[i] != 0)
247 17658 : continue; /* not a chain head */
248 :
249 : /* initialize ... */
250 144 : interveningDeletable = false;
251 144 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
252 :
253 : /* scan down the chain ... */
254 144 : j = SGLT_GET_NEXTOFFSET(head);
255 17802 : while (j != InvalidOffsetNumber)
256 : {
257 : SpGistLeafTuple lt;
258 :
259 17658 : lt = (SpGistLeafTuple) PageGetItem(page,
260 : PageGetItemId(page, j));
261 17658 : if (lt->tupstate != SPGIST_LIVE)
262 : {
263 : /* all tuples in chain should be live */
264 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
265 : lt->tupstate);
266 : }
267 :
268 17658 : if (deletable[j])
269 : {
270 : /* This tuple should be replaced by a placeholder */
271 7756 : toPlaceholder[xlrec.nPlaceholder] = j;
272 7756 : xlrec.nPlaceholder++;
273 : /* previous live tuple's chain link will need an update */
274 7756 : interveningDeletable = true;
275 : }
276 9902 : else if (prevLive == InvalidOffsetNumber)
277 : {
278 : /*
279 : * This is the first live tuple in the chain. It has to move
280 : * to the head position.
281 : */
282 102 : moveSrc[xlrec.nMove] = j;
283 102 : moveDest[xlrec.nMove] = i;
284 102 : xlrec.nMove++;
285 : /* Chain updates will be applied after the move */
286 102 : prevLive = i;
287 102 : interveningDeletable = false;
288 : }
289 : else
290 : {
291 : /*
292 : * Second or later live tuple. Arrange to re-chain it to the
293 : * previous live one, if there was a gap.
294 : */
295 9800 : if (interveningDeletable)
296 : {
297 26 : chainSrc[xlrec.nChain] = prevLive;
298 26 : chainDest[xlrec.nChain] = j;
299 26 : xlrec.nChain++;
300 : }
301 9800 : prevLive = j;
302 9800 : interveningDeletable = false;
303 : }
304 :
305 17658 : j = SGLT_GET_NEXTOFFSET(lt);
306 : }
307 :
308 144 : if (prevLive == InvalidOffsetNumber)
309 : {
310 : /* The chain is entirely removable, so we need a DEAD tuple */
311 0 : toDead[xlrec.nDead] = i;
312 0 : xlrec.nDead++;
313 : }
314 144 : else if (interveningDeletable)
315 : {
316 : /* One or more deletions at end of chain, so close it off */
317 126 : chainSrc[xlrec.nChain] = prevLive;
318 126 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
319 126 : xlrec.nChain++;
320 : }
321 : }
322 :
323 : /* sanity check ... */
324 118 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
325 0 : elog(ERROR, "inconsistent counts of deletable tuples");
326 :
327 : /* Do the updates */
328 118 : START_CRIT_SECTION();
329 :
330 118 : spgPageIndexMultiDelete(&bds->spgstate, page,
331 118 : toDead, xlrec.nDead,
332 : SPGIST_DEAD, SPGIST_DEAD,
333 : InvalidBlockNumber, InvalidOffsetNumber);
334 :
335 118 : spgPageIndexMultiDelete(&bds->spgstate, page,
336 118 : toPlaceholder, xlrec.nPlaceholder,
337 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
338 : InvalidBlockNumber, InvalidOffsetNumber);
339 :
340 : /*
341 : * We implement the move step by swapping the line pointers of the source
342 : * and target tuples, then replacing the newly-source tuples with
343 : * placeholders. This is perhaps unduly friendly with the page data
344 : * representation, but it's fast and doesn't risk page overflow when a
345 : * tuple to be relocated is large.
346 : */
347 220 : for (i = 0; i < xlrec.nMove; i++)
348 : {
349 102 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
350 102 : ItemId idDest = PageGetItemId(page, moveDest[i]);
351 : ItemIdData tmp;
352 :
353 102 : tmp = *idSrc;
354 102 : *idSrc = *idDest;
355 102 : *idDest = tmp;
356 : }
357 :
358 118 : spgPageIndexMultiDelete(&bds->spgstate, page,
359 118 : moveSrc, xlrec.nMove,
360 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
361 : InvalidBlockNumber, InvalidOffsetNumber);
362 :
363 270 : for (i = 0; i < xlrec.nChain; i++)
364 : {
365 : SpGistLeafTuple lt;
366 :
367 152 : lt = (SpGistLeafTuple) PageGetItem(page,
368 152 : PageGetItemId(page, chainSrc[i]));
369 : Assert(lt->tupstate == SPGIST_LIVE);
370 152 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
371 : }
372 :
373 118 : MarkBufferDirty(buffer);
374 :
375 118 : if (RelationNeedsWAL(index))
376 : {
377 : XLogRecPtr recptr;
378 :
379 118 : XLogBeginInsert();
380 :
381 118 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
382 :
383 118 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
384 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
385 118 : XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
386 118 : XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
387 118 : XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
388 118 : XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
389 118 : XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
390 118 : XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
391 :
392 118 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
393 :
394 118 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
395 :
396 118 : PageSetLSN(page, recptr);
397 : }
398 :
399 118 : END_CRIT_SECTION();
400 : }
401 :
402 : /*
403 : * Vacuum a root page when it is also a leaf
404 : *
405 : * On the root, we just delete any dead leaf tuples; no fancy business
406 : */
407 : static void
408 32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
409 : {
410 32 : Page page = BufferGetPage(buffer);
411 : spgxlogVacuumRoot xlrec;
412 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
413 : OffsetNumber i,
414 32 : max = PageGetMaxOffsetNumber(page);
415 :
416 32 : xlrec.nDelete = 0;
417 :
418 : /* Scan page, identify tuples to delete, accumulate stats */
419 164 : for (i = FirstOffsetNumber; i <= max; i++)
420 : {
421 : SpGistLeafTuple lt;
422 :
423 132 : lt = (SpGistLeafTuple) PageGetItem(page,
424 : PageGetItemId(page, i));
425 132 : if (lt->tupstate == SPGIST_LIVE)
426 : {
427 : Assert(ItemPointerIsValid(<->heapPtr));
428 :
429 132 : if (bds->callback(<->heapPtr, bds->callback_state))
430 : {
431 0 : bds->stats->tuples_removed += 1;
432 0 : toDelete[xlrec.nDelete] = i;
433 0 : xlrec.nDelete++;
434 : }
435 : else
436 : {
437 132 : bds->stats->num_index_tuples += 1;
438 : }
439 : }
440 : else
441 : {
442 : /* all tuples on root should be live */
443 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
444 : lt->tupstate);
445 : }
446 : }
447 :
448 32 : if (xlrec.nDelete == 0)
449 32 : return; /* nothing more to do */
450 :
451 : /* Do the update */
452 0 : START_CRIT_SECTION();
453 :
454 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
455 0 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
456 :
457 0 : MarkBufferDirty(buffer);
458 :
459 0 : if (RelationNeedsWAL(index))
460 : {
461 : XLogRecPtr recptr;
462 :
463 0 : XLogBeginInsert();
464 :
465 : /* Prepare WAL record */
466 0 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
467 :
468 0 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
469 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
470 0 : XLogRegisterData((char *) toDelete,
471 0 : sizeof(OffsetNumber) * xlrec.nDelete);
472 :
473 0 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
474 :
475 0 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
476 :
477 0 : PageSetLSN(page, recptr);
478 : }
479 :
480 0 : END_CRIT_SECTION();
481 : }
482 :
483 : /*
484 : * Clean up redirect and placeholder tuples on the given page
485 : *
486 : * Redirect tuples can be marked placeholder once they're old enough.
487 : * Placeholder tuples can be removed if it won't change the offsets of
488 : * non-placeholder ones.
489 : *
490 : * Unlike the routines above, this works on both leaf and inner pages.
491 : */
492 : static void
493 21230 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
494 : {
495 21230 : Page page = BufferGetPage(buffer);
496 21230 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
497 : OffsetNumber i,
498 21230 : max = PageGetMaxOffsetNumber(page),
499 21230 : firstPlaceholder = InvalidOffsetNumber;
500 21230 : bool hasNonPlaceholder = false;
501 21230 : bool hasUpdate = false;
502 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
503 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
504 : spgxlogVacuumRedirect xlrec;
505 : GlobalVisState *vistest;
506 :
507 21230 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
508 21230 : xlrec.nToPlaceholder = 0;
509 21230 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
510 :
511 21230 : vistest = GlobalVisTestFor(heaprel);
512 :
513 21230 : START_CRIT_SECTION();
514 :
515 : /*
516 : * Scan backwards to convert old redirection tuples to placeholder tuples,
517 : * and identify location of last non-placeholder tuple while at it.
518 : */
519 2848506 : for (i = max;
520 2830852 : i >= FirstOffsetNumber &&
521 2830852 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
522 2827276 : i--)
523 : {
524 : SpGistDeadTuple dt;
525 :
526 2827276 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
527 :
528 : /*
529 : * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
530 : * be any index scans "in flight" to it. Such an index scan would
531 : * have to be in a transaction whose snapshot sees the REDIRECT's XID
532 : * as still running, so comparing the XID against global xmin is a
533 : * conservatively safe test. If the XID is invalid, it must have been
534 : * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
535 : */
536 2827276 : if (dt->tupstate == SPGIST_REDIRECT &&
537 35936 : (!TransactionIdIsValid(dt->xid) ||
538 17968 : GlobalVisTestIsRemovableXid(vistest, dt->xid)))
539 : {
540 382 : dt->tupstate = SPGIST_PLACEHOLDER;
541 : Assert(opaque->nRedirection > 0);
542 382 : opaque->nRedirection--;
543 382 : opaque->nPlaceholder++;
544 :
545 : /* remember newest XID among the removed redirects */
546 460 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
547 78 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
548 304 : xlrec.snapshotConflictHorizon = dt->xid;
549 :
550 382 : ItemPointerSetInvalid(&dt->pointer);
551 :
552 382 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
553 382 : xlrec.nToPlaceholder++;
554 :
555 382 : hasUpdate = true;
556 : }
557 :
558 2827276 : if (dt->tupstate == SPGIST_PLACEHOLDER)
559 : {
560 141950 : if (!hasNonPlaceholder)
561 137294 : firstPlaceholder = i;
562 : }
563 : else
564 : {
565 2685326 : hasNonPlaceholder = true;
566 : }
567 : }
568 :
569 : /*
570 : * Any placeholder tuples at the end of page can safely be removed. We
571 : * can't remove ones before the last non-placeholder, though, because we
572 : * can't alter the offset numbers of non-placeholder tuples.
573 : */
574 21230 : if (firstPlaceholder != InvalidOffsetNumber)
575 : {
576 : /*
577 : * We do not store this array to rdata because it's easy to recreate.
578 : */
579 139480 : for (i = firstPlaceholder; i <= max; i++)
580 137294 : itemnos[i - firstPlaceholder] = i;
581 :
582 2186 : i = max - firstPlaceholder + 1;
583 : Assert(opaque->nPlaceholder >= i);
584 2186 : opaque->nPlaceholder -= i;
585 :
586 : /* The array is surely sorted, so can use PageIndexMultiDelete */
587 2186 : PageIndexMultiDelete(page, itemnos, i);
588 :
589 2186 : hasUpdate = true;
590 : }
591 :
592 21230 : xlrec.firstPlaceholder = firstPlaceholder;
593 :
594 21230 : if (hasUpdate)
595 2194 : MarkBufferDirty(buffer);
596 :
597 21230 : if (hasUpdate && RelationNeedsWAL(index))
598 : {
599 : XLogRecPtr recptr;
600 :
601 2194 : XLogBeginInsert();
602 :
603 2194 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
604 2194 : XLogRegisterData((char *) itemToPlaceholder,
605 2194 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
606 :
607 2194 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
608 :
609 2194 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
610 :
611 2194 : PageSetLSN(page, recptr);
612 : }
613 :
614 21230 : END_CRIT_SECTION();
615 21230 : }
616 :
617 : /*
618 : * Process one page during a bulkdelete scan
619 : */
620 : static void
621 4056 : spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
622 : {
623 4056 : Relation index = bds->info->index;
624 : Buffer buffer;
625 : Page page;
626 :
627 : /* call vacuum_delay_point while not holding any buffer lock */
628 4056 : vacuum_delay_point();
629 :
630 4056 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
631 4056 : RBM_NORMAL, bds->info->strategy);
632 4056 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
633 4056 : page = (Page) BufferGetPage(buffer);
634 :
635 4056 : if (PageIsNew(page))
636 : {
637 : /*
638 : * We found an all-zero page, which could happen if the database
639 : * crashed just after extending the file. Recycle it.
640 : */
641 : }
642 4048 : else if (PageIsEmpty(page))
643 : {
644 : /* nothing to do */
645 : }
646 3966 : else if (SpGistPageIsLeaf(page))
647 : {
648 3824 : if (SpGistBlockIsRoot(blkno))
649 : {
650 32 : vacuumLeafRoot(bds, index, buffer);
651 : /* no need for vacuumRedirectAndPlaceholder */
652 : }
653 : else
654 : {
655 3792 : vacuumLeafPage(bds, index, buffer, false);
656 3792 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
657 : }
658 : }
659 : else
660 : {
661 : /* inner page */
662 142 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
663 : }
664 :
665 : /*
666 : * The root pages must never be deleted, nor marked as available in FSM,
667 : * because we don't want them ever returned by a search for a place to put
668 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
669 : * knows about it.
670 : */
671 4056 : if (!SpGistBlockIsRoot(blkno))
672 : {
673 3916 : if (PageIsNew(page) || PageIsEmpty(page))
674 : {
675 50 : RecordFreeIndexPage(index, blkno);
676 50 : bds->stats->pages_deleted++;
677 : }
678 : else
679 : {
680 3866 : SpGistSetLastUsedPage(index, buffer);
681 3866 : bds->lastFilledBlock = blkno;
682 : }
683 : }
684 :
685 4056 : UnlockReleaseBuffer(buffer);
686 4056 : }
687 :
688 : /*
689 : * Process the pending-TID list between pages of the main scan
690 : */
691 : static void
692 266 : spgprocesspending(spgBulkDeleteState *bds)
693 : {
694 266 : Relation index = bds->info->index;
695 : spgVacPendingItem *pitem;
696 : spgVacPendingItem *nitem;
697 : BlockNumber blkno;
698 : Buffer buffer;
699 : Page page;
700 :
701 35124 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
702 : {
703 34858 : if (pitem->done)
704 17296 : continue; /* ignore already-done items */
705 :
706 : /* call vacuum_delay_point while not holding any buffer lock */
707 17562 : vacuum_delay_point();
708 :
709 : /* examine the referenced page */
710 17562 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
711 17562 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
712 17562 : RBM_NORMAL, bds->info->strategy);
713 17562 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
714 17562 : page = (Page) BufferGetPage(buffer);
715 :
716 17562 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
717 : {
718 : /* Probably shouldn't happen, but ignore it */
719 : }
720 17562 : else if (SpGistPageIsLeaf(page))
721 : {
722 17296 : if (SpGistBlockIsRoot(blkno))
723 : {
724 : /* this should definitely not happen */
725 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
726 : RelationGetRelationName(index));
727 : }
728 :
729 : /* deal with any deletable tuples */
730 17296 : vacuumLeafPage(bds, index, buffer, true);
731 : /* might as well do this while we are here */
732 17296 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
733 :
734 17296 : SpGistSetLastUsedPage(index, buffer);
735 :
736 : /*
737 : * We can mark as done not only this item, but any later ones
738 : * pointing at the same page, since we vacuumed the whole page.
739 : */
740 17296 : pitem->done = true;
741 1516462 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
742 : {
743 1499166 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
744 266 : nitem->done = true;
745 : }
746 : }
747 : else
748 : {
749 : /*
750 : * On an inner page, visit the referenced inner tuple and add all
751 : * its downlinks to the pending list. We might have pending items
752 : * for more than one inner tuple on the same page (in fact this is
753 : * pretty likely given the way space allocation works), so get
754 : * them all while we are here.
755 : */
756 35124 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
757 : {
758 34858 : if (nitem->done)
759 0 : continue;
760 34858 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
761 : {
762 : OffsetNumber offset;
763 : SpGistInnerTuple innerTuple;
764 :
765 17296 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
766 17296 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
767 : PageGetItemId(page, offset));
768 17296 : if (innerTuple->tupstate == SPGIST_LIVE)
769 : {
770 : SpGistNodeTuple node;
771 : int i;
772 :
773 86480 : SGITITERATE(innerTuple, i, node)
774 : {
775 69184 : if (ItemPointerIsValid(&node->t_tid))
776 34592 : spgAddPendingTID(bds, &node->t_tid);
777 : }
778 : }
779 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
780 : {
781 : /* transfer attention to redirect point */
782 0 : spgAddPendingTID(bds,
783 : &((SpGistDeadTuple) innerTuple)->pointer);
784 : }
785 : else
786 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
787 : innerTuple->tupstate);
788 :
789 17296 : nitem->done = true;
790 : }
791 : }
792 : }
793 :
794 17562 : UnlockReleaseBuffer(buffer);
795 : }
796 :
797 266 : spgClearPendingList(bds);
798 266 : }
799 :
800 : /*
801 : * Perform a bulkdelete scan
802 : */
803 : static void
804 70 : spgvacuumscan(spgBulkDeleteState *bds)
805 : {
806 70 : Relation index = bds->info->index;
807 : bool needLock;
808 : BlockNumber num_pages,
809 : blkno;
810 :
811 : /* Finish setting up spgBulkDeleteState */
812 70 : initSpGistState(&bds->spgstate, index);
813 70 : bds->pendingList = NULL;
814 70 : bds->myXmin = GetActiveSnapshot()->xmin;
815 70 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
816 :
817 : /*
818 : * Reset counts that will be incremented during the scan; needed in case
819 : * of multiple scans during a single VACUUM command
820 : */
821 70 : bds->stats->estimated_count = false;
822 70 : bds->stats->num_index_tuples = 0;
823 70 : bds->stats->pages_deleted = 0;
824 :
825 : /* We can skip locking for new or temp relations */
826 70 : needLock = !RELATION_IS_LOCAL(index);
827 :
828 : /*
829 : * The outer loop iterates over all index pages except the metapage, in
830 : * physical order (we hope the kernel will cooperate in providing
831 : * read-ahead for speed). It is critical that we visit all leaf pages,
832 : * including ones added after we start the scan, else we might fail to
833 : * delete some deletable tuples. See more extensive comments about this
834 : * in btvacuumscan().
835 : */
836 70 : blkno = SPGIST_METAPAGE_BLKNO + 1;
837 : for (;;)
838 : {
839 : /* Get the current relation length */
840 140 : if (needLock)
841 140 : LockRelationForExtension(index, ExclusiveLock);
842 140 : num_pages = RelationGetNumberOfBlocks(index);
843 140 : if (needLock)
844 140 : UnlockRelationForExtension(index, ExclusiveLock);
845 :
846 : /* Quit if we've scanned the whole relation */
847 140 : if (blkno >= num_pages)
848 70 : break;
849 : /* Iterate over pages, then loop back to recheck length */
850 4126 : for (; blkno < num_pages; blkno++)
851 : {
852 4056 : spgvacuumpage(bds, blkno);
853 : /* empty the pending-list after each page */
854 4056 : if (bds->pendingList != NULL)
855 266 : spgprocesspending(bds);
856 : }
857 : }
858 :
859 : /* Propagate local lastUsedPages cache to metablock */
860 70 : SpGistUpdateMetaPage(index);
861 :
862 : /*
863 : * If we found any empty pages (and recorded them in the FSM), then
864 : * forcibly update the upper-level FSM pages to ensure that searchers can
865 : * find them. It's possible that the pages were also found during
866 : * previous scans and so this is a waste of time, but it's cheap enough
867 : * relative to scanning the index that it shouldn't matter much, and
868 : * making sure that free pages are available sooner not later seems
869 : * worthwhile.
870 : *
871 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
872 : * all.
873 : */
874 70 : if (bds->stats->pages_deleted > 0)
875 42 : IndexFreeSpaceMapVacuum(index);
876 :
877 : /*
878 : * Truncate index if possible
879 : *
880 : * XXX disabled because it's unsafe due to possible concurrent inserts.
881 : * We'd have to rescan the pages to make sure they're still empty, and it
882 : * doesn't seem worth it. Note that btree doesn't do this either.
883 : *
884 : * Another reason not to truncate is that it could invalidate the cached
885 : * pages-with-freespace pointers in the metapage and other backends'
886 : * relation caches, that is leave them pointing to nonexistent pages.
887 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
888 : * those pointers would be unduly expensive.
889 : */
890 : #ifdef NOT_USED
891 : if (num_pages > bds->lastFilledBlock + 1)
892 : {
893 : BlockNumber lastBlock = num_pages - 1;
894 :
895 : num_pages = bds->lastFilledBlock + 1;
896 : RelationTruncate(index, num_pages);
897 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
898 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
899 : }
900 : #endif
901 :
902 : /* Report final stats */
903 70 : bds->stats->num_pages = num_pages;
904 70 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
905 70 : bds->stats->pages_free = bds->stats->pages_deleted;
906 70 : }
907 :
908 : /*
909 : * Bulk deletion of all index entries pointing to a set of heap tuples.
910 : * The set of target tuples is specified via a callback routine that tells
911 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
912 : *
913 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
914 : */
915 : IndexBulkDeleteResult *
916 8 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
917 : IndexBulkDeleteCallback callback, void *callback_state)
918 : {
919 : spgBulkDeleteState bds;
920 :
921 : /* allocate stats if first time through, else re-use existing struct */
922 8 : if (stats == NULL)
923 8 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
924 8 : bds.info = info;
925 8 : bds.stats = stats;
926 8 : bds.callback = callback;
927 8 : bds.callback_state = callback_state;
928 :
929 8 : spgvacuumscan(&bds);
930 :
931 8 : return stats;
932 : }
933 :
934 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
935 : static bool
936 3100946 : dummy_callback(ItemPointer itemptr, void *state)
937 : {
938 3100946 : return false;
939 : }
940 :
941 : /*
942 : * Post-VACUUM cleanup.
943 : *
944 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
945 : */
946 : IndexBulkDeleteResult *
947 70 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
948 : {
949 : spgBulkDeleteState bds;
950 :
951 : /* No-op in ANALYZE ONLY mode */
952 70 : if (info->analyze_only)
953 0 : return stats;
954 :
955 : /*
956 : * We don't need to scan the index if there was a preceding bulkdelete
957 : * pass. Otherwise, make a pass that won't delete any live tuples, but
958 : * might still accomplish useful stuff with redirect/placeholder cleanup
959 : * and/or FSM housekeeping, and in any case will provide stats.
960 : */
961 70 : if (stats == NULL)
962 : {
963 62 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
964 62 : bds.info = info;
965 62 : bds.stats = stats;
966 62 : bds.callback = dummy_callback;
967 62 : bds.callback_state = NULL;
968 :
969 62 : spgvacuumscan(&bds);
970 : }
971 :
972 : /*
973 : * It's quite possible for us to be fooled by concurrent tuple moves into
974 : * double-counting some index tuples, so disbelieve any total that exceeds
975 : * the underlying heap's count ... if we know that accurately. Otherwise
976 : * this might just make matters worse.
977 : */
978 70 : if (!info->estimated_count)
979 : {
980 70 : if (stats->num_index_tuples > info->num_heap_tuples)
981 4 : stats->num_index_tuples = info->num_heap_tuples;
982 : }
983 :
984 70 : return stats;
985 : }
|