Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "commands/vacuum.h"
24 : #include "miscadmin.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/indexfsm.h"
27 : #include "storage/lmgr.h"
28 : #include "utils/snapmgr.h"
29 :
30 :
31 : /* Entry in pending-list of TIDs we need to revisit */
32 : typedef struct spgVacPendingItem
33 : {
34 : ItemPointerData tid; /* redirection target to visit */
35 : bool done; /* have we dealt with this? */
36 : struct spgVacPendingItem *next; /* list link */
37 : } spgVacPendingItem;
38 :
39 : /* Local state for vacuum operations */
40 : typedef struct spgBulkDeleteState
41 : {
42 : /* Parameters passed in to spgvacuumscan */
43 : IndexVacuumInfo *info;
44 : IndexBulkDeleteResult *stats;
45 : IndexBulkDeleteCallback callback;
46 : void *callback_state;
47 :
48 : /* Additional working state */
49 : SpGistState spgstate; /* for SPGiST operations that need one */
50 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
51 : TransactionId myXmin; /* for detecting newly-added redirects */
52 : BlockNumber lastFilledBlock; /* last non-deletable block */
53 : } spgBulkDeleteState;
54 :
55 :
56 : /*
57 : * Add TID to pendingList, but only if not already present.
58 : *
59 : * Note that new items are always appended at the end of the list; this
60 : * ensures that scans of the list don't miss items added during the scan.
61 : */
62 : static void
63 12 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
64 : {
65 : spgVacPendingItem *pitem;
66 : spgVacPendingItem **listLink;
67 :
68 : /* search the list for pre-existing entry */
69 12 : listLink = &bds->pendingList;
70 24 : while (*listLink != NULL)
71 : {
72 12 : pitem = *listLink;
73 12 : if (ItemPointerEquals(tid, &pitem->tid))
74 0 : return; /* already in list, do nothing */
75 12 : listLink = &pitem->next;
76 : }
77 : /* not there, so append new entry */
78 12 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
79 12 : pitem->tid = *tid;
80 12 : pitem->done = false;
81 12 : pitem->next = NULL;
82 12 : *listLink = pitem;
83 : }
84 :
85 : /*
86 : * Clear pendingList
87 : */
88 : static void
89 4 : spgClearPendingList(spgBulkDeleteState *bds)
90 : {
91 : spgVacPendingItem *pitem;
92 : spgVacPendingItem *nitem;
93 :
94 16 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
95 : {
96 12 : nitem = pitem->next;
97 : /* All items in list should have been dealt with */
98 : Assert(pitem->done);
99 12 : pfree(pitem);
100 : }
101 4 : bds->pendingList = NULL;
102 4 : }
103 :
104 : /*
105 : * Vacuum a regular (non-root) leaf page
106 : *
107 : * We must delete tuples that are targeted for deletion by the VACUUM,
108 : * but not move any tuples that are referenced by outside links; we assume
109 : * those are the ones that are heads of chains.
110 : *
111 : * If we find a REDIRECT that was made by a concurrently-running transaction,
112 : * we must add its target TID to pendingList. (We don't try to visit the
113 : * target immediately, first because we don't want VACUUM locking more than
114 : * one buffer at a time, and second because the duplicate-filtering logic
115 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
116 : * loop in the face of continuous concurrent insertions.)
117 : *
118 : * If forPending is true, we are examining the page as a consequence of
119 : * chasing a redirect link, not as part of the normal sequential scan.
120 : * We still vacuum the page normally, but we don't increment the stats
121 : * about live tuples; else we'd double-count those tuples, since the page
122 : * has been or will be visited in the sequential scan as well.
123 : */
124 : static void
125 3810 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
126 : bool forPending)
127 : {
128 3810 : Page page = BufferGetPage(buffer);
129 : spgxlogVacuumLeaf xlrec;
130 : OffsetNumber toDead[MaxIndexTuplesPerPage];
131 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
132 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
133 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
134 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
135 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
136 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
137 : bool deletable[MaxIndexTuplesPerPage + 1];
138 : int nDeletable;
139 : OffsetNumber i,
140 3810 : max = PageGetMaxOffsetNumber(page);
141 :
142 3810 : memset(predecessor, 0, sizeof(predecessor));
143 3810 : memset(deletable, 0, sizeof(deletable));
144 3810 : nDeletable = 0;
145 :
146 : /* Scan page, identify tuples to delete, accumulate stats */
147 690356 : for (i = FirstOffsetNumber; i <= max; i++)
148 : {
149 : SpGistLeafTuple lt;
150 :
151 686546 : lt = (SpGistLeafTuple) PageGetItem(page,
152 : PageGetItemId(page, i));
153 686546 : if (lt->tupstate == SPGIST_LIVE)
154 : {
155 : Assert(ItemPointerIsValid(<->heapPtr));
156 :
157 497772 : if (bds->callback(<->heapPtr, bds->callback_state))
158 : {
159 7724 : bds->stats->tuples_removed += 1;
160 7724 : deletable[i] = true;
161 7724 : nDeletable++;
162 : }
163 : else
164 : {
165 490048 : if (!forPending)
166 489440 : bds->stats->num_index_tuples += 1;
167 : }
168 :
169 : /* Form predecessor map, too */
170 497772 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
171 : {
172 : /* paranoia about corrupted chain links */
173 486416 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
174 486416 : SGLT_GET_NEXTOFFSET(lt) > max ||
175 486416 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
176 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
177 : BufferGetBlockNumber(buffer),
178 : RelationGetRelationName(index));
179 486416 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
180 : }
181 : }
182 188774 : else if (lt->tupstate == SPGIST_REDIRECT)
183 : {
184 946 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
185 :
186 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
187 : Assert(ItemPointerIsValid(&dt->pointer));
188 :
189 : /*
190 : * Add target TID to pending list if the redirection could have
191 : * happened since VACUUM started.
192 : *
193 : * Note: we could make a tighter test by seeing if the xid is
194 : * "running" according to the active snapshot; but snapmgr.c
195 : * doesn't currently export a suitable API, and it's not entirely
196 : * clear that a tighter test is worth the cycles anyway.
197 : */
198 946 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
199 4 : spgAddPendingTID(bds, &dt->pointer);
200 : }
201 : else
202 : {
203 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
204 : }
205 : }
206 :
207 3810 : if (nDeletable == 0)
208 3706 : return; /* nothing more to do */
209 :
210 : /*----------
211 : * Figure out exactly what we have to do. We do this separately from
212 : * actually modifying the page, mainly so that we have a representation
213 : * that can be dumped into WAL and then the replay code can do exactly
214 : * the same thing. The output of this step consists of six arrays
215 : * describing four kinds of operations, to be performed in this order:
216 : *
217 : * toDead[]: tuple numbers to be replaced with DEAD tuples
218 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
219 : * moveSrc[]: tuple numbers that need to be relocated to another offset
220 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
221 : * moveDest[]: new locations for moveSrc tuples
222 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
223 : * chainDest[]: new values of nextOffset for chainSrc members
224 : *
225 : * It's easiest to figure out what we have to do by processing tuple
226 : * chains, so we iterate over all the tuples (not just the deletable
227 : * ones!) to identify chain heads, then chase down each chain and make
228 : * work item entries for deletable tuples within the chain.
229 : *----------
230 : */
231 104 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
232 :
233 16868 : for (i = FirstOffsetNumber; i <= max; i++)
234 : {
235 : SpGistLeafTuple head;
236 : bool interveningDeletable;
237 : OffsetNumber prevLive;
238 : OffsetNumber j;
239 :
240 16764 : head = (SpGistLeafTuple) PageGetItem(page,
241 : PageGetItemId(page, i));
242 16764 : if (head->tupstate != SPGIST_LIVE)
243 1040 : continue; /* can't be a chain member */
244 15724 : if (predecessor[i] != 0)
245 15604 : continue; /* not a chain head */
246 :
247 : /* initialize ... */
248 120 : interveningDeletable = false;
249 120 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
250 :
251 : /* scan down the chain ... */
252 120 : j = SGLT_GET_NEXTOFFSET(head);
253 15724 : while (j != InvalidOffsetNumber)
254 : {
255 : SpGistLeafTuple lt;
256 :
257 15604 : lt = (SpGistLeafTuple) PageGetItem(page,
258 : PageGetItemId(page, j));
259 15604 : if (lt->tupstate != SPGIST_LIVE)
260 : {
261 : /* all tuples in chain should be live */
262 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
263 : lt->tupstate);
264 : }
265 :
266 15604 : if (deletable[j])
267 : {
268 : /* This tuple should be replaced by a placeholder */
269 7620 : toPlaceholder[xlrec.nPlaceholder] = j;
270 7620 : xlrec.nPlaceholder++;
271 : /* previous live tuple's chain link will need an update */
272 7620 : interveningDeletable = true;
273 : }
274 7984 : else if (prevLive == InvalidOffsetNumber)
275 : {
276 : /*
277 : * This is the first live tuple in the chain. It has to move
278 : * to the head position.
279 : */
280 104 : moveSrc[xlrec.nMove] = j;
281 104 : moveDest[xlrec.nMove] = i;
282 104 : xlrec.nMove++;
283 : /* Chain updates will be applied after the move */
284 104 : prevLive = i;
285 104 : interveningDeletable = false;
286 : }
287 : else
288 : {
289 : /*
290 : * Second or later live tuple. Arrange to re-chain it to the
291 : * previous live one, if there was a gap.
292 : */
293 7880 : if (interveningDeletable)
294 : {
295 16 : chainSrc[xlrec.nChain] = prevLive;
296 16 : chainDest[xlrec.nChain] = j;
297 16 : xlrec.nChain++;
298 : }
299 7880 : prevLive = j;
300 7880 : interveningDeletable = false;
301 : }
302 :
303 15604 : j = SGLT_GET_NEXTOFFSET(lt);
304 : }
305 :
306 120 : if (prevLive == InvalidOffsetNumber)
307 : {
308 : /* The chain is entirely removable, so we need a DEAD tuple */
309 0 : toDead[xlrec.nDead] = i;
310 0 : xlrec.nDead++;
311 : }
312 120 : else if (interveningDeletable)
313 : {
314 : /* One or more deletions at end of chain, so close it off */
315 108 : chainSrc[xlrec.nChain] = prevLive;
316 108 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
317 108 : xlrec.nChain++;
318 : }
319 : }
320 :
321 : /* sanity check ... */
322 104 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
323 0 : elog(ERROR, "inconsistent counts of deletable tuples");
324 :
325 : /* Do the updates */
326 104 : START_CRIT_SECTION();
327 :
328 104 : spgPageIndexMultiDelete(&bds->spgstate, page,
329 104 : toDead, xlrec.nDead,
330 : SPGIST_DEAD, SPGIST_DEAD,
331 : InvalidBlockNumber, InvalidOffsetNumber);
332 :
333 104 : spgPageIndexMultiDelete(&bds->spgstate, page,
334 104 : toPlaceholder, xlrec.nPlaceholder,
335 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
336 : InvalidBlockNumber, InvalidOffsetNumber);
337 :
338 : /*
339 : * We implement the move step by swapping the line pointers of the source
340 : * and target tuples, then replacing the newly-source tuples with
341 : * placeholders. This is perhaps unduly friendly with the page data
342 : * representation, but it's fast and doesn't risk page overflow when a
343 : * tuple to be relocated is large.
344 : */
345 208 : for (i = 0; i < xlrec.nMove; i++)
346 : {
347 104 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
348 104 : ItemId idDest = PageGetItemId(page, moveDest[i]);
349 : ItemIdData tmp;
350 :
351 104 : tmp = *idSrc;
352 104 : *idSrc = *idDest;
353 104 : *idDest = tmp;
354 : }
355 :
356 104 : spgPageIndexMultiDelete(&bds->spgstate, page,
357 104 : moveSrc, xlrec.nMove,
358 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
359 : InvalidBlockNumber, InvalidOffsetNumber);
360 :
361 228 : for (i = 0; i < xlrec.nChain; i++)
362 : {
363 : SpGistLeafTuple lt;
364 :
365 124 : lt = (SpGistLeafTuple) PageGetItem(page,
366 124 : PageGetItemId(page, chainSrc[i]));
367 : Assert(lt->tupstate == SPGIST_LIVE);
368 124 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
369 : }
370 :
371 104 : MarkBufferDirty(buffer);
372 :
373 104 : if (RelationNeedsWAL(index))
374 : {
375 : XLogRecPtr recptr;
376 :
377 104 : XLogBeginInsert();
378 :
379 104 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
380 :
381 104 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
382 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
383 104 : XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
384 104 : XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
385 104 : XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
386 104 : XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
387 104 : XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
388 104 : XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
389 :
390 104 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
391 :
392 104 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
393 :
394 104 : PageSetLSN(page, recptr);
395 : }
396 :
397 104 : END_CRIT_SECTION();
398 : }
399 :
400 : /*
401 : * Vacuum a root page when it is also a leaf
402 : *
403 : * On the root, we just delete any dead leaf tuples; no fancy business
404 : */
405 : static void
406 32 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
407 : {
408 32 : Page page = BufferGetPage(buffer);
409 : spgxlogVacuumRoot xlrec;
410 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
411 : OffsetNumber i,
412 32 : max = PageGetMaxOffsetNumber(page);
413 :
414 32 : xlrec.nDelete = 0;
415 :
416 : /* Scan page, identify tuples to delete, accumulate stats */
417 164 : for (i = FirstOffsetNumber; i <= max; i++)
418 : {
419 : SpGistLeafTuple lt;
420 :
421 132 : lt = (SpGistLeafTuple) PageGetItem(page,
422 : PageGetItemId(page, i));
423 132 : if (lt->tupstate == SPGIST_LIVE)
424 : {
425 : Assert(ItemPointerIsValid(<->heapPtr));
426 :
427 132 : if (bds->callback(<->heapPtr, bds->callback_state))
428 : {
429 8 : bds->stats->tuples_removed += 1;
430 8 : toDelete[xlrec.nDelete] = i;
431 8 : xlrec.nDelete++;
432 : }
433 : else
434 : {
435 124 : bds->stats->num_index_tuples += 1;
436 : }
437 : }
438 : else
439 : {
440 : /* all tuples on root should be live */
441 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
442 : lt->tupstate);
443 : }
444 : }
445 :
446 32 : if (xlrec.nDelete == 0)
447 30 : return; /* nothing more to do */
448 :
449 : /* Do the update */
450 2 : START_CRIT_SECTION();
451 :
452 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
453 2 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
454 :
455 2 : MarkBufferDirty(buffer);
456 :
457 2 : if (RelationNeedsWAL(index))
458 : {
459 : XLogRecPtr recptr;
460 :
461 2 : XLogBeginInsert();
462 :
463 : /* Prepare WAL record */
464 2 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
465 :
466 2 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
467 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
468 2 : XLogRegisterData((char *) toDelete,
469 2 : sizeof(OffsetNumber) * xlrec.nDelete);
470 :
471 2 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
472 :
473 2 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
474 :
475 2 : PageSetLSN(page, recptr);
476 : }
477 :
478 2 : END_CRIT_SECTION();
479 : }
480 :
481 : /*
482 : * Clean up redirect and placeholder tuples on the given page
483 : *
484 : * Redirect tuples can be marked placeholder once they're old enough.
485 : * Placeholder tuples can be removed if it won't change the offsets of
486 : * non-placeholder ones.
487 : *
488 : * Unlike the routines above, this works on both leaf and inner pages.
489 : */
490 : static void
491 3952 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
492 : {
493 3952 : Page page = BufferGetPage(buffer);
494 3952 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
495 : OffsetNumber i,
496 3952 : max = PageGetMaxOffsetNumber(page),
497 3952 : firstPlaceholder = InvalidOffsetNumber;
498 3952 : bool hasNonPlaceholder = false;
499 3952 : bool hasUpdate = false;
500 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
501 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
502 : spgxlogVacuumRedirect xlrec;
503 : GlobalVisState *vistest;
504 :
505 3952 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
506 3952 : xlrec.nToPlaceholder = 0;
507 3952 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
508 :
509 3952 : vistest = GlobalVisTestFor(heaprel);
510 :
511 3952 : START_CRIT_SECTION();
512 :
513 : /*
514 : * Scan backwards to convert old redirection tuples to placeholder tuples,
515 : * and identify location of last non-placeholder tuple while at it.
516 : */
517 194722 : for (i = max;
518 194354 : i >= FirstOffsetNumber &&
519 194354 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
520 190770 : i--)
521 : {
522 : SpGistDeadTuple dt;
523 :
524 190770 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
525 :
526 191716 : if (dt->tupstate == SPGIST_REDIRECT &&
527 946 : GlobalVisTestIsRemovableXid(vistest, dt->xid))
528 : {
529 644 : dt->tupstate = SPGIST_PLACEHOLDER;
530 : Assert(opaque->nRedirection > 0);
531 644 : opaque->nRedirection--;
532 644 : opaque->nPlaceholder++;
533 :
534 : /* remember newest XID among the removed redirects */
535 726 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
536 82 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
537 562 : xlrec.snapshotConflictHorizon = dt->xid;
538 :
539 644 : ItemPointerSetInvalid(&dt->pointer);
540 :
541 644 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
542 644 : xlrec.nToPlaceholder++;
543 :
544 644 : hasUpdate = true;
545 : }
546 :
547 190770 : if (dt->tupstate == SPGIST_PLACEHOLDER)
548 : {
549 142206 : if (!hasNonPlaceholder)
550 137724 : firstPlaceholder = i;
551 : }
552 : else
553 : {
554 48564 : hasNonPlaceholder = true;
555 : }
556 : }
557 :
558 : /*
559 : * Any placeholder tuples at the end of page can safely be removed. We
560 : * can't remove ones before the last non-placeholder, though, because we
561 : * can't alter the offset numbers of non-placeholder tuples.
562 : */
563 3952 : if (firstPlaceholder != InvalidOffsetNumber)
564 : {
565 : /*
566 : * We do not store this array to rdata because it's easy to recreate.
567 : */
568 139928 : for (i = firstPlaceholder; i <= max; i++)
569 137724 : itemnos[i - firstPlaceholder] = i;
570 :
571 2204 : i = max - firstPlaceholder + 1;
572 : Assert(opaque->nPlaceholder >= i);
573 2204 : opaque->nPlaceholder -= i;
574 :
575 : /* The array is surely sorted, so can use PageIndexMultiDelete */
576 2204 : PageIndexMultiDelete(page, itemnos, i);
577 :
578 2204 : hasUpdate = true;
579 : }
580 :
581 3952 : xlrec.firstPlaceholder = firstPlaceholder;
582 :
583 3952 : if (hasUpdate)
584 2208 : MarkBufferDirty(buffer);
585 :
586 3952 : if (hasUpdate && RelationNeedsWAL(index))
587 : {
588 : XLogRecPtr recptr;
589 :
590 2208 : XLogBeginInsert();
591 :
592 2208 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
593 2208 : XLogRegisterData((char *) itemToPlaceholder,
594 2208 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
595 :
596 2208 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
597 :
598 2208 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
599 :
600 2208 : PageSetLSN(page, recptr);
601 : }
602 :
603 3952 : END_CRIT_SECTION();
604 3952 : }
605 :
606 : /*
607 : * Process one page during a bulkdelete scan
608 : */
609 : static void
610 4070 : spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
611 : {
612 4070 : Relation index = bds->info->index;
613 : Buffer buffer;
614 : Page page;
615 :
616 : /* call vacuum_delay_point while not holding any buffer lock */
617 4070 : vacuum_delay_point();
618 :
619 4070 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
620 4070 : RBM_NORMAL, bds->info->strategy);
621 4070 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
622 4070 : page = (Page) BufferGetPage(buffer);
623 :
624 4070 : if (PageIsNew(page))
625 : {
626 : /*
627 : * We found an all-zero page, which could happen if the database
628 : * crashed just after extending the file. Recycle it.
629 : */
630 : }
631 4058 : else if (PageIsEmpty(page))
632 : {
633 : /* nothing to do */
634 : }
635 3980 : else if (SpGistPageIsLeaf(page))
636 : {
637 3838 : if (SpGistBlockIsRoot(blkno))
638 : {
639 32 : vacuumLeafRoot(bds, index, buffer);
640 : /* no need for vacuumRedirectAndPlaceholder */
641 : }
642 : else
643 : {
644 3806 : vacuumLeafPage(bds, index, buffer, false);
645 3806 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
646 : }
647 : }
648 : else
649 : {
650 : /* inner page */
651 142 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
652 : }
653 :
654 : /*
655 : * The root pages must never be deleted, nor marked as available in FSM,
656 : * because we don't want them ever returned by a search for a place to put
657 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
658 : * knows about it.
659 : */
660 4070 : if (!SpGistBlockIsRoot(blkno))
661 : {
662 3930 : if (PageIsNew(page) || PageIsEmpty(page))
663 : {
664 48 : RecordFreeIndexPage(index, blkno);
665 48 : bds->stats->pages_deleted++;
666 : }
667 : else
668 : {
669 3882 : SpGistSetLastUsedPage(index, buffer);
670 3882 : bds->lastFilledBlock = blkno;
671 : }
672 : }
673 :
674 4070 : UnlockReleaseBuffer(buffer);
675 4070 : }
676 :
677 : /*
678 : * Process the pending-TID list between pages of the main scan
679 : */
680 : static void
681 4 : spgprocesspending(spgBulkDeleteState *bds)
682 : {
683 4 : Relation index = bds->info->index;
684 : spgVacPendingItem *pitem;
685 : spgVacPendingItem *nitem;
686 : BlockNumber blkno;
687 : Buffer buffer;
688 : Page page;
689 :
690 16 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
691 : {
692 12 : if (pitem->done)
693 4 : continue; /* ignore already-done items */
694 :
695 : /* call vacuum_delay_point while not holding any buffer lock */
696 8 : vacuum_delay_point();
697 :
698 : /* examine the referenced page */
699 8 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
700 8 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
701 8 : RBM_NORMAL, bds->info->strategy);
702 8 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
703 8 : page = (Page) BufferGetPage(buffer);
704 :
705 8 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
706 : {
707 : /* Probably shouldn't happen, but ignore it */
708 : }
709 8 : else if (SpGistPageIsLeaf(page))
710 : {
711 4 : if (SpGistBlockIsRoot(blkno))
712 : {
713 : /* this should definitely not happen */
714 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
715 : RelationGetRelationName(index));
716 : }
717 :
718 : /* deal with any deletable tuples */
719 4 : vacuumLeafPage(bds, index, buffer, true);
720 : /* might as well do this while we are here */
721 4 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
722 :
723 4 : SpGistSetLastUsedPage(index, buffer);
724 :
725 : /*
726 : * We can mark as done not only this item, but any later ones
727 : * pointing at the same page, since we vacuumed the whole page.
728 : */
729 4 : pitem->done = true;
730 8 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
731 : {
732 4 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
733 4 : nitem->done = true;
734 : }
735 : }
736 : else
737 : {
738 : /*
739 : * On an inner page, visit the referenced inner tuple and add all
740 : * its downlinks to the pending list. We might have pending items
741 : * for more than one inner tuple on the same page (in fact this is
742 : * pretty likely given the way space allocation works), so get
743 : * them all while we are here.
744 : */
745 16 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
746 : {
747 12 : if (nitem->done)
748 0 : continue;
749 12 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
750 : {
751 : OffsetNumber offset;
752 : SpGistInnerTuple innerTuple;
753 :
754 4 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
755 4 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
756 : PageGetItemId(page, offset));
757 4 : if (innerTuple->tupstate == SPGIST_LIVE)
758 : {
759 : SpGistNodeTuple node;
760 : int i;
761 :
762 20 : SGITITERATE(innerTuple, i, node)
763 : {
764 16 : if (ItemPointerIsValid(&node->t_tid))
765 8 : spgAddPendingTID(bds, &node->t_tid);
766 : }
767 : }
768 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
769 : {
770 : /* transfer attention to redirect point */
771 0 : spgAddPendingTID(bds,
772 : &((SpGistDeadTuple) innerTuple)->pointer);
773 : }
774 : else
775 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
776 : innerTuple->tupstate);
777 :
778 4 : nitem->done = true;
779 : }
780 : }
781 : }
782 :
783 8 : UnlockReleaseBuffer(buffer);
784 : }
785 :
786 4 : spgClearPendingList(bds);
787 4 : }
788 :
789 : /*
790 : * Perform a bulkdelete scan
791 : */
792 : static void
793 70 : spgvacuumscan(spgBulkDeleteState *bds)
794 : {
795 70 : Relation index = bds->info->index;
796 : bool needLock;
797 : BlockNumber num_pages,
798 : blkno;
799 :
800 : /* Finish setting up spgBulkDeleteState */
801 70 : initSpGistState(&bds->spgstate, index);
802 70 : bds->pendingList = NULL;
803 70 : bds->myXmin = GetActiveSnapshot()->xmin;
804 70 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
805 :
806 : /*
807 : * Reset counts that will be incremented during the scan; needed in case
808 : * of multiple scans during a single VACUUM command
809 : */
810 70 : bds->stats->estimated_count = false;
811 70 : bds->stats->num_index_tuples = 0;
812 70 : bds->stats->pages_deleted = 0;
813 :
814 : /* We can skip locking for new or temp relations */
815 70 : needLock = !RELATION_IS_LOCAL(index);
816 :
817 : /*
818 : * The outer loop iterates over all index pages except the metapage, in
819 : * physical order (we hope the kernel will cooperate in providing
820 : * read-ahead for speed). It is critical that we visit all leaf pages,
821 : * including ones added after we start the scan, else we might fail to
822 : * delete some deletable tuples. See more extensive comments about this
823 : * in btvacuumscan().
824 : */
825 70 : blkno = SPGIST_METAPAGE_BLKNO + 1;
826 : for (;;)
827 : {
828 : /* Get the current relation length */
829 140 : if (needLock)
830 140 : LockRelationForExtension(index, ExclusiveLock);
831 140 : num_pages = RelationGetNumberOfBlocks(index);
832 140 : if (needLock)
833 140 : UnlockRelationForExtension(index, ExclusiveLock);
834 :
835 : /* Quit if we've scanned the whole relation */
836 140 : if (blkno >= num_pages)
837 70 : break;
838 : /* Iterate over pages, then loop back to recheck length */
839 4140 : for (; blkno < num_pages; blkno++)
840 : {
841 4070 : spgvacuumpage(bds, blkno);
842 : /* empty the pending-list after each page */
843 4070 : if (bds->pendingList != NULL)
844 4 : spgprocesspending(bds);
845 : }
846 : }
847 :
848 : /* Propagate local lastUsedPages cache to metablock */
849 70 : SpGistUpdateMetaPage(index);
850 :
851 : /*
852 : * If we found any empty pages (and recorded them in the FSM), then
853 : * forcibly update the upper-level FSM pages to ensure that searchers can
854 : * find them. It's possible that the pages were also found during
855 : * previous scans and so this is a waste of time, but it's cheap enough
856 : * relative to scanning the index that it shouldn't matter much, and
857 : * making sure that free pages are available sooner not later seems
858 : * worthwhile.
859 : *
860 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
861 : * all.
862 : */
863 70 : if (bds->stats->pages_deleted > 0)
864 40 : IndexFreeSpaceMapVacuum(index);
865 :
866 : /*
867 : * Truncate index if possible
868 : *
869 : * XXX disabled because it's unsafe due to possible concurrent inserts.
870 : * We'd have to rescan the pages to make sure they're still empty, and it
871 : * doesn't seem worth it. Note that btree doesn't do this either.
872 : *
873 : * Another reason not to truncate is that it could invalidate the cached
874 : * pages-with-freespace pointers in the metapage and other backends'
875 : * relation caches, that is leave them pointing to nonexistent pages.
876 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
877 : * those pointers would be unduly expensive.
878 : */
879 : #ifdef NOT_USED
880 : if (num_pages > bds->lastFilledBlock + 1)
881 : {
882 : BlockNumber lastBlock = num_pages - 1;
883 :
884 : num_pages = bds->lastFilledBlock + 1;
885 : RelationTruncate(index, num_pages);
886 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
887 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
888 : }
889 : #endif
890 :
891 : /* Report final stats */
892 70 : bds->stats->num_pages = num_pages;
893 70 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
894 70 : bds->stats->pages_free = bds->stats->pages_deleted;
895 70 : }
896 :
897 : /*
898 : * Bulk deletion of all index entries pointing to a set of heap tuples.
899 : * The set of target tuples is specified via a callback routine that tells
900 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
901 : *
902 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
903 : */
904 : IndexBulkDeleteResult *
905 10 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
906 : IndexBulkDeleteCallback callback, void *callback_state)
907 : {
908 : spgBulkDeleteState bds;
909 :
910 : /* allocate stats if first time through, else re-use existing struct */
911 10 : if (stats == NULL)
912 10 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
913 10 : bds.info = info;
914 10 : bds.stats = stats;
915 10 : bds.callback = callback;
916 10 : bds.callback_state = callback_state;
917 :
918 10 : spgvacuumscan(&bds);
919 :
920 10 : return stats;
921 : }
922 :
923 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
924 : static bool
925 482160 : dummy_callback(ItemPointer itemptr, void *state)
926 : {
927 482160 : return false;
928 : }
929 :
930 : /*
931 : * Post-VACUUM cleanup.
932 : *
933 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
934 : */
935 : IndexBulkDeleteResult *
936 70 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
937 : {
938 : spgBulkDeleteState bds;
939 :
940 : /* No-op in ANALYZE ONLY mode */
941 70 : if (info->analyze_only)
942 0 : return stats;
943 :
944 : /*
945 : * We don't need to scan the index if there was a preceding bulkdelete
946 : * pass. Otherwise, make a pass that won't delete any live tuples, but
947 : * might still accomplish useful stuff with redirect/placeholder cleanup
948 : * and/or FSM housekeeping, and in any case will provide stats.
949 : */
950 70 : if (stats == NULL)
951 : {
952 60 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
953 60 : bds.info = info;
954 60 : bds.stats = stats;
955 60 : bds.callback = dummy_callback;
956 60 : bds.callback_state = NULL;
957 :
958 60 : spgvacuumscan(&bds);
959 : }
960 :
961 : /*
962 : * It's quite possible for us to be fooled by concurrent tuple moves into
963 : * double-counting some index tuples, so disbelieve any total that exceeds
964 : * the underlying heap's count ... if we know that accurately. Otherwise
965 : * this might just make matters worse.
966 : */
967 70 : if (!info->estimated_count)
968 : {
969 70 : if (stats->num_index_tuples > info->num_heap_tuples)
970 2 : stats->num_index_tuples = info->num_heap_tuples;
971 : }
972 :
973 70 : return stats;
974 : }
|