Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pruneheap.c
4 : * heap page pruning and HOT-chain management code
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/pruneheap.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/heapam.h"
18 : #include "access/heapam_xlog.h"
19 : #include "access/htup_details.h"
20 : #include "access/multixact.h"
21 : #include "access/transam.h"
22 : #include "access/visibilitymapdefs.h"
23 : #include "access/xlog.h"
24 : #include "access/xloginsert.h"
25 : #include "commands/vacuum.h"
26 : #include "executor/instrument.h"
27 : #include "miscadmin.h"
28 : #include "pgstat.h"
29 : #include "storage/bufmgr.h"
30 : #include "utils/rel.h"
31 : #include "utils/snapmgr.h"
32 :
33 : /* Working data for heap_page_prune_and_freeze() and subroutines */
34 : typedef struct
35 : {
36 : /*-------------------------------------------------------
37 : * Arguments passed to heap_page_prune_and_freeze()
38 : *-------------------------------------------------------
39 : */
40 :
41 : /* tuple visibility test, initialized for the relation */
42 : GlobalVisState *vistest;
43 : /* whether or not dead items can be set LP_UNUSED during pruning */
44 : bool mark_unused_now;
45 : /* whether to attempt freezing tuples */
46 : bool attempt_freeze;
47 : struct VacuumCutoffs *cutoffs;
48 : Relation relation;
49 :
50 : /*
51 : * Keep the buffer, block, and page handy so that helpers needing to
52 : * access them don't need to make repeated calls to BufferGetBlockNumber()
53 : * and BufferGetPage().
54 : */
55 : BlockNumber block;
56 : Buffer buffer;
57 : Page page;
58 :
59 : /*-------------------------------------------------------
60 : * Fields describing what to do to the page
61 : *-------------------------------------------------------
62 : */
63 : TransactionId new_prune_xid; /* new prune hint value */
64 : TransactionId latest_xid_removed;
65 : int nredirected; /* numbers of entries in arrays below */
66 : int ndead;
67 : int nunused;
68 : int nfrozen;
69 : /* arrays that accumulate indexes of items to be changed */
70 : OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
71 : OffsetNumber nowdead[MaxHeapTuplesPerPage];
72 : OffsetNumber nowunused[MaxHeapTuplesPerPage];
73 : HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
74 :
75 : /*-------------------------------------------------------
76 : * Working state for HOT chain processing
77 : *-------------------------------------------------------
78 : */
79 :
80 : /*
81 : * 'root_items' contains offsets of all LP_REDIRECT line pointers and
82 : * normal non-HOT tuples. They can be stand-alone items or the first item
83 : * in a HOT chain. 'heaponly_items' contains heap-only tuples which can
84 : * only be removed as part of a HOT chain.
85 : */
86 : int nroot_items;
87 : OffsetNumber root_items[MaxHeapTuplesPerPage];
88 : int nheaponly_items;
89 : OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
90 :
91 : /*
92 : * processed[offnum] is true if item at offnum has been processed.
93 : *
94 : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
95 : * 1. Otherwise every access would need to subtract 1.
96 : */
97 : bool processed[MaxHeapTuplesPerPage + 1];
98 :
99 : /*
100 : * Tuple visibility is only computed once for each tuple, for correctness
101 : * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
102 : * details. This is of type int8[], instead of HTSV_Result[], so we can
103 : * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
104 : * items.
105 : *
106 : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
107 : * 1. Otherwise every access would need to subtract 1.
108 : */
109 : int8 htsv[MaxHeapTuplesPerPage + 1];
110 :
111 : /*-------------------------------------------------------
112 : * Working state for freezing
113 : *-------------------------------------------------------
114 : */
115 : HeapPageFreeze pagefrz;
116 :
117 : /*-------------------------------------------------------
118 : * Information about what was done
119 : *
120 : * These fields are not used by pruning itself for the most part, but are
121 : * used to collect information about what was pruned and what state the
122 : * page is in after pruning, for the benefit of the caller. They are
123 : * copied to the caller's PruneFreezeResult at the end.
124 : * -------------------------------------------------------
125 : */
126 :
127 : int ndeleted; /* Number of tuples deleted from the page */
128 :
129 : /* Number of live and recently dead tuples, after pruning */
130 : int live_tuples;
131 : int recently_dead_tuples;
132 :
133 : /* Whether or not the page makes rel truncation unsafe */
134 : bool hastup;
135 :
136 : /*
137 : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
138 : * items
139 : */
140 : int lpdead_items; /* number of items in the array */
141 : OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
142 :
143 : /*
144 : * set_all_visible and set_all_frozen indicate if the all-visible and
145 : * all-frozen bits in the visibility map can be set for this page after
146 : * pruning.
147 : *
148 : * visibility_cutoff_xid is the newest xmin of live tuples on the page.
149 : * The caller can use it as the conflict horizon, when setting the VM
150 : * bits. It is only valid if we froze some tuples, and set_all_frozen is
151 : * true.
152 : *
153 : * NOTE: set_all_visible and set_all_frozen initially don't include
154 : * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
155 : * use them to decide whether to freeze the page or not. The
156 : * set_all_visible and set_all_frozen values returned to the caller are
157 : * adjusted to include LP_DEAD items after we determine whether to
158 : * opportunistically freeze.
159 : */
160 : bool set_all_visible;
161 : bool set_all_frozen;
162 : TransactionId visibility_cutoff_xid;
163 : } PruneState;
164 :
165 : /* Local functions */
166 : static void prune_freeze_setup(PruneFreezeParams *params,
167 : TransactionId *new_relfrozen_xid,
168 : MultiXactId *new_relmin_mxid,
169 : PruneFreezeResult *presult,
170 : PruneState *prstate);
171 : static void prune_freeze_plan(PruneState *prstate,
172 : OffsetNumber *off_loc);
173 : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
174 : HeapTuple tup);
175 : static inline HTSV_Result htsv_get_valid_status(int status);
176 : static void heap_prune_chain(OffsetNumber maxoff,
177 : OffsetNumber rootoffnum, PruneState *prstate);
178 : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
179 : static void heap_prune_record_redirect(PruneState *prstate,
180 : OffsetNumber offnum, OffsetNumber rdoffnum,
181 : bool was_normal);
182 : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
183 : bool was_normal);
184 : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
185 : bool was_normal);
186 : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
187 :
188 : static void heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum);
189 : static void heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum);
190 : static void heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum);
191 : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
192 :
193 : static void page_verify_redirects(Page page);
194 :
195 : static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
196 : PruneState *prstate);
197 :
198 :
199 : /*
200 : * Optionally prune and repair fragmentation in the specified page.
201 : *
202 : * This is an opportunistic function. It will perform housekeeping
203 : * only if the page heuristically looks like a candidate for pruning and we
204 : * can acquire buffer cleanup lock without blocking.
205 : *
206 : * Note: this is called quite often. It's important that it fall out quickly
207 : * if there's not any use in pruning.
208 : *
209 : * Caller must have pin on the buffer, and must *not* have a lock on it.
210 : *
211 : * This function may pin *vmbuffer. It's passed by reference so the caller can
212 : * reuse the pin across calls, avoiding repeated pin/unpin cycles. Caller is
213 : * responsible for unpinning it.
214 : */
215 : void
216 21340497 : heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
217 : {
218 21340497 : Page page = BufferGetPage(buffer);
219 : TransactionId prune_xid;
220 : GlobalVisState *vistest;
221 : Size minfree;
222 :
223 : /*
224 : * We can't write WAL in recovery mode, so there's no point trying to
225 : * clean the page. The primary will likely issue a cleaning WAL record
226 : * soon anyway, so this is no particular loss.
227 : */
228 21340497 : if (RecoveryInProgress())
229 241407 : return;
230 :
231 : /*
232 : * First check whether there's any chance there's something to prune,
233 : * determining the appropriate horizon is a waste if there's no prune_xid
234 : * (i.e. no updates/deletes left potentially dead tuples around).
235 : */
236 21099090 : prune_xid = PageGetPruneXid(page);
237 21099090 : if (!TransactionIdIsValid(prune_xid))
238 10829226 : return;
239 :
240 : /*
241 : * Check whether prune_xid indicates that there may be dead rows that can
242 : * be cleaned up.
243 : */
244 10269864 : vistest = GlobalVisTestFor(relation);
245 :
246 10269864 : if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
247 8531113 : return;
248 :
249 : /*
250 : * We prune when a previous UPDATE failed to find enough space on the page
251 : * for a new tuple version, or when free space falls below the relation's
252 : * fill-factor target (but not less than 10%).
253 : *
254 : * Checking free space here is questionable since we aren't holding any
255 : * lock on the buffer; in the worst case we could get a bogus answer. It's
256 : * unlikely to be *seriously* wrong, though, since reading either pd_lower
257 : * or pd_upper is probably atomic. Avoiding taking a lock seems more
258 : * important than sometimes getting a wrong answer in what is after all
259 : * just a heuristic estimate.
260 : */
261 1738751 : minfree = RelationGetTargetPageFreeSpace(relation,
262 : HEAP_DEFAULT_FILLFACTOR);
263 1738751 : minfree = Max(minfree, BLCKSZ / 10);
264 :
265 1738751 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
266 : {
267 : /* OK, try to get exclusive buffer lock */
268 45072 : if (!ConditionalLockBufferForCleanup(buffer))
269 529 : return;
270 :
271 : /*
272 : * Now that we have buffer lock, get accurate information about the
273 : * page's free space, and recheck the heuristic about whether to
274 : * prune.
275 : */
276 44543 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
277 : {
278 : OffsetNumber dummy_off_loc;
279 : PruneFreezeResult presult;
280 :
281 : /*
282 : * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
283 : * regardless of whether or not the relation has indexes, since we
284 : * cannot safely determine that during on-access pruning with the
285 : * current implementation.
286 : */
287 44543 : PruneFreezeParams params = {
288 : .relation = relation,
289 : .buffer = buffer,
290 : .reason = PRUNE_ON_ACCESS,
291 : .options = 0,
292 : .vistest = vistest,
293 : .cutoffs = NULL,
294 : };
295 :
296 44543 : heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc,
297 : NULL, NULL);
298 :
299 : /*
300 : * Report the number of tuples reclaimed to pgstats. This is
301 : * presult.ndeleted minus the number of newly-LP_DEAD-set items.
302 : *
303 : * We derive the number of dead tuples like this to avoid totally
304 : * forgetting about items that were set to LP_DEAD, since they
305 : * still need to be cleaned up by VACUUM. We only want to count
306 : * heap-only tuples that just became LP_UNUSED in our report,
307 : * which don't.
308 : *
309 : * VACUUM doesn't have to compensate in the same way when it
310 : * tracks ndeleted, since it will set the same LP_DEAD items to
311 : * LP_UNUSED separately.
312 : */
313 44543 : if (presult.ndeleted > presult.nnewlpdead)
314 20113 : pgstat_update_heap_dead_tuples(relation,
315 20113 : presult.ndeleted - presult.nnewlpdead);
316 : }
317 :
318 : /* And release buffer lock */
319 44543 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
320 :
321 : /*
322 : * We avoid reuse of any free space created on the page by unrelated
323 : * UPDATEs/INSERTs by opting to not update the FSM at this point. The
324 : * free space should be reused by UPDATEs to *this* page.
325 : */
326 : }
327 : }
328 :
329 : /*
330 : * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
331 : * the provided parameters.
332 : *
333 : * params, new_relfrozen_xid, new_relmin_mxid, and presult are input
334 : * parameters and are not modified by this function. Only prstate is modified.
335 : */
336 : static void
337 447706 : prune_freeze_setup(PruneFreezeParams *params,
338 : TransactionId *new_relfrozen_xid,
339 : MultiXactId *new_relmin_mxid,
340 : PruneFreezeResult *presult,
341 : PruneState *prstate)
342 : {
343 : /* Copy parameters to prstate */
344 447706 : prstate->vistest = params->vistest;
345 447706 : prstate->mark_unused_now =
346 447706 : (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
347 :
348 : /* cutoffs must be provided if we will attempt freezing */
349 : Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
350 447706 : prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
351 447706 : prstate->cutoffs = params->cutoffs;
352 447706 : prstate->relation = params->relation;
353 447706 : prstate->block = BufferGetBlockNumber(params->buffer);
354 447706 : prstate->buffer = params->buffer;
355 447706 : prstate->page = BufferGetPage(params->buffer);
356 :
357 : /*
358 : * Our strategy is to scan the page and make lists of items to change,
359 : * then apply the changes within a critical section. This keeps as much
360 : * logic as possible out of the critical section, and also ensures that
361 : * WAL replay will work the same as the normal case.
362 : *
363 : * First, initialize the new pd_prune_xid value to zero (indicating no
364 : * prunable tuples). If we find any tuples which may soon become
365 : * prunable, we will save the lowest relevant XID in new_prune_xid. Also
366 : * initialize the rest of our working state.
367 : */
368 447706 : prstate->new_prune_xid = InvalidTransactionId;
369 447706 : prstate->latest_xid_removed = InvalidTransactionId;
370 447706 : prstate->nredirected = prstate->ndead = prstate->nunused = 0;
371 447706 : prstate->nfrozen = 0;
372 447706 : prstate->nroot_items = 0;
373 447706 : prstate->nheaponly_items = 0;
374 :
375 : /* initialize page freezing working state */
376 447706 : prstate->pagefrz.freeze_required = false;
377 447706 : prstate->pagefrz.FreezePageConflictXid = InvalidTransactionId;
378 447706 : if (prstate->attempt_freeze)
379 : {
380 : Assert(new_relfrozen_xid && new_relmin_mxid);
381 403163 : prstate->pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
382 403163 : prstate->pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
383 403163 : prstate->pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
384 403163 : prstate->pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
385 : }
386 : else
387 : {
388 : Assert(!new_relfrozen_xid && !new_relmin_mxid);
389 44543 : prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
390 44543 : prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
391 44543 : prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
392 44543 : prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
393 : }
394 :
395 447706 : prstate->ndeleted = 0;
396 447706 : prstate->live_tuples = 0;
397 447706 : prstate->recently_dead_tuples = 0;
398 447706 : prstate->hastup = false;
399 447706 : prstate->lpdead_items = 0;
400 :
401 : /*
402 : * deadoffsets are filled in during pruning but are only used to populate
403 : * PruneFreezeResult->deadoffsets. To avoid needing two copies of the
404 : * array, just save a pointer to the result offsets array in the
405 : * PruneState.
406 : */
407 447706 : prstate->deadoffsets = presult->deadoffsets;
408 :
409 : /*
410 : * Vacuum may update the VM after we're done. We can keep track of
411 : * whether the page will be all-visible and all-frozen after pruning and
412 : * freezing to help the caller to do that.
413 : *
414 : * Currently, only VACUUM sets the VM bits. To save the effort, only do
415 : * the bookkeeping if the caller needs it. Currently, that's tied to
416 : * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
417 : * to update the VM bits without also freezing or freeze without also
418 : * setting the VM bits.
419 : *
420 : * In addition to telling the caller whether it can set the VM bit, we
421 : * also use 'set_all_visible' and 'set_all_frozen' for our own
422 : * decision-making. If the whole page would become frozen, we consider
423 : * opportunistically freezing tuples. We will not be able to freeze the
424 : * whole page if there are tuples present that are not visible to everyone
425 : * or if there are dead tuples which are not yet removable. However, dead
426 : * tuples which will be removed by the end of vacuuming should not
427 : * preclude us from opportunistically freezing. Because of that, we do
428 : * not immediately clear set_all_visible and set_all_frozen when we see
429 : * LP_DEAD items. We fix that after scanning the line pointers. We must
430 : * correct set_all_visible and set_all_frozen before we return them to the
431 : * caller, so that the caller doesn't set the VM bits incorrectly.
432 : */
433 447706 : if (prstate->attempt_freeze)
434 : {
435 403163 : prstate->set_all_visible = true;
436 403163 : prstate->set_all_frozen = true;
437 : }
438 : else
439 : {
440 : /*
441 : * Initializing to false allows skipping the work to update them in
442 : * heap_prune_record_unchanged_lp_normal().
443 : */
444 44543 : prstate->set_all_visible = false;
445 44543 : prstate->set_all_frozen = false;
446 : }
447 :
448 : /*
449 : * The visibility cutoff xid is the newest xmin of live tuples on the
450 : * page. In the common case, this will be set as the conflict horizon the
451 : * caller can use for updating the VM. If, at the end of freezing and
452 : * pruning, the page is all-frozen, there is no possibility that any
453 : * running transaction on the standby does not see tuples on the page as
454 : * all-visible, so the conflict horizon remains InvalidTransactionId.
455 : */
456 447706 : prstate->visibility_cutoff_xid = InvalidTransactionId;
457 447706 : }
458 :
459 : /*
460 : * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
461 : * page, examines its visibility information, and determines the appropriate
462 : * action for each tuple. All tuples are processed and classified during this
463 : * phase, but no modifications are made to the page until the later execution
464 : * stage.
465 : *
466 : * *off_loc is used for error callback and cleared before returning.
467 : */
468 : static void
469 447706 : prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
470 : {
471 447706 : Page page = prstate->page;
472 447706 : BlockNumber blockno = prstate->block;
473 447706 : OffsetNumber maxoff = PageGetMaxOffsetNumber(prstate->page);
474 : OffsetNumber offnum;
475 : HeapTupleData tup;
476 :
477 447706 : tup.t_tableOid = RelationGetRelid(prstate->relation);
478 :
479 : /*
480 : * Determine HTSV for all tuples, and queue them up for processing as HOT
481 : * chain roots or as heap-only items.
482 : *
483 : * Determining HTSV only once for each tuple is required for correctness,
484 : * to deal with cases where running HTSV twice could result in different
485 : * results. For example, RECENTLY_DEAD can turn to DEAD if another
486 : * checked item causes GlobalVisTestIsRemovableFullXid() to update the
487 : * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
488 : * transaction aborts.
489 : *
490 : * It's also good for performance. Most commonly tuples within a page are
491 : * stored at decreasing offsets (while the items are stored at increasing
492 : * offsets). When processing all tuples on a page this leads to reading
493 : * memory at decreasing offsets within a page, with a variable stride.
494 : * That's hard for CPU prefetchers to deal with. Processing the items in
495 : * reverse order (and thus the tuples in increasing order) increases
496 : * prefetching efficiency significantly / decreases the number of cache
497 : * misses.
498 : */
499 447706 : for (offnum = maxoff;
500 25050406 : offnum >= FirstOffsetNumber;
501 24602700 : offnum = OffsetNumberPrev(offnum))
502 : {
503 24602700 : ItemId itemid = PageGetItemId(page, offnum);
504 : HeapTupleHeader htup;
505 :
506 : /*
507 : * Set the offset number so that we can display it along with any
508 : * error that occurred while processing this tuple.
509 : */
510 24602700 : *off_loc = offnum;
511 :
512 24602700 : prstate->processed[offnum] = false;
513 24602700 : prstate->htsv[offnum] = -1;
514 :
515 : /* Nothing to do if slot doesn't contain a tuple */
516 24602700 : if (!ItemIdIsUsed(itemid))
517 : {
518 257045 : heap_prune_record_unchanged_lp_unused(prstate, offnum);
519 257045 : continue;
520 : }
521 :
522 24345655 : if (ItemIdIsDead(itemid))
523 : {
524 : /*
525 : * If the caller set mark_unused_now true, we can set dead line
526 : * pointers LP_UNUSED now.
527 : */
528 1282340 : if (unlikely(prstate->mark_unused_now))
529 2167 : heap_prune_record_unused(prstate, offnum, false);
530 : else
531 1280173 : heap_prune_record_unchanged_lp_dead(prstate, offnum);
532 1282340 : continue;
533 : }
534 :
535 23063315 : if (ItemIdIsRedirected(itemid))
536 : {
537 : /* This is the start of a HOT chain */
538 338052 : prstate->root_items[prstate->nroot_items++] = offnum;
539 338052 : continue;
540 : }
541 :
542 : Assert(ItemIdIsNormal(itemid));
543 :
544 : /*
545 : * Get the tuple's visibility status and queue it up for processing.
546 : */
547 22725263 : htup = (HeapTupleHeader) PageGetItem(page, itemid);
548 22725263 : tup.t_data = htup;
549 22725263 : tup.t_len = ItemIdGetLength(itemid);
550 22725263 : ItemPointerSet(&tup.t_self, blockno, offnum);
551 :
552 22725263 : prstate->htsv[offnum] = heap_prune_satisfies_vacuum(prstate, &tup);
553 :
554 22725263 : if (!HeapTupleHeaderIsHeapOnly(htup))
555 22250141 : prstate->root_items[prstate->nroot_items++] = offnum;
556 : else
557 475122 : prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
558 : }
559 :
560 : /*
561 : * Process HOT chains.
562 : *
563 : * We added the items to the array starting from 'maxoff', so by
564 : * processing the array in reverse order, we process the items in
565 : * ascending offset number order. The order doesn't matter for
566 : * correctness, but some quick micro-benchmarking suggests that this is
567 : * faster. (Earlier PostgreSQL versions, which scanned all the items on
568 : * the page instead of using the root_items array, also did it in
569 : * ascending offset number order.)
570 : */
571 23035899 : for (int i = prstate->nroot_items - 1; i >= 0; i--)
572 : {
573 22588193 : offnum = prstate->root_items[i];
574 :
575 : /* Ignore items already processed as part of an earlier chain */
576 22588193 : if (prstate->processed[offnum])
577 0 : continue;
578 :
579 : /* see preceding loop */
580 22588193 : *off_loc = offnum;
581 :
582 : /* Process this item or chain of items */
583 22588193 : heap_prune_chain(maxoff, offnum, prstate);
584 : }
585 :
586 : /*
587 : * Process any heap-only tuples that were not already processed as part of
588 : * a HOT chain.
589 : */
590 922828 : for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
591 : {
592 475122 : offnum = prstate->heaponly_items[i];
593 :
594 475122 : if (prstate->processed[offnum])
595 459471 : continue;
596 :
597 : /* see preceding loop */
598 15651 : *off_loc = offnum;
599 :
600 : /*
601 : * If the tuple is DEAD and doesn't chain to anything else, mark it
602 : * unused. (If it does chain, we can only remove it as part of
603 : * pruning its chain.)
604 : *
605 : * We need this primarily to handle aborted HOT updates, that is,
606 : * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607 : * chain, since the parent tuple might be re-updated before any
608 : * pruning occurs. So we have to be able to reap them separately from
609 : * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610 : * return true for an XMIN_INVALID tuple, so this code will work even
611 : * when there were sequential updates within the aborted transaction.)
612 : */
613 15651 : if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
614 : {
615 3093 : ItemId itemid = PageGetItemId(page, offnum);
616 3093 : HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617 :
618 3093 : if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
619 : {
620 3093 : HeapTupleHeaderAdvanceConflictHorizon(htup,
621 : &prstate->latest_xid_removed);
622 3093 : heap_prune_record_unused(prstate, offnum, true);
623 : }
624 : else
625 : {
626 : /*
627 : * This tuple should've been processed and removed as part of
628 : * a HOT chain, so something's wrong. To preserve evidence,
629 : * we don't dare to remove it. We cannot leave behind a DEAD
630 : * tuple either, because that will cause VACUUM to error out.
631 : * Throwing an error with a distinct error message seems like
632 : * the least bad option.
633 : */
634 0 : elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635 : blockno, offnum);
636 : }
637 : }
638 : else
639 12558 : heap_prune_record_unchanged_lp_normal(prstate, offnum);
640 : }
641 :
642 : /* We should now have processed every tuple exactly once */
643 : #ifdef USE_ASSERT_CHECKING
644 : for (offnum = FirstOffsetNumber;
645 : offnum <= maxoff;
646 : offnum = OffsetNumberNext(offnum))
647 : {
648 : *off_loc = offnum;
649 :
650 : Assert(prstate->processed[offnum]);
651 : }
652 : #endif
653 :
654 : /* Clear the offset information once we have processed the given page. */
655 447706 : *off_loc = InvalidOffsetNumber;
656 447706 : }
657 :
658 : /*
659 : * Decide whether to proceed with freezing according to the freeze plans
660 : * prepared for the current heap buffer. If freezing is chosen, this function
661 : * performs several pre-freeze checks.
662 : *
663 : * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
664 : * determined before calling this function.
665 : *
666 : * prstate is both an input and output parameter.
667 : *
668 : * Returns true if we should apply the freeze plans and freeze tuples on the
669 : * page, and false otherwise.
670 : */
671 : static bool
672 447706 : heap_page_will_freeze(bool did_tuple_hint_fpi,
673 : bool do_prune,
674 : bool do_hint_prune,
675 : PruneState *prstate)
676 : {
677 447706 : bool do_freeze = false;
678 :
679 : /*
680 : * If the caller specified we should not attempt to freeze any tuples,
681 : * validate that everything is in the right state and return.
682 : */
683 447706 : if (!prstate->attempt_freeze)
684 : {
685 : Assert(!prstate->set_all_frozen && prstate->nfrozen == 0);
686 : Assert(prstate->lpdead_items == 0 || !prstate->set_all_visible);
687 44543 : return false;
688 : }
689 :
690 403163 : if (prstate->pagefrz.freeze_required)
691 : {
692 : /*
693 : * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
694 : * before FreezeLimit/MultiXactCutoff is present. Must freeze to
695 : * advance relfrozenxid/relminmxid.
696 : */
697 22057 : do_freeze = true;
698 : }
699 : else
700 : {
701 : /*
702 : * Opportunistically freeze the page if we are generating an FPI
703 : * anyway and if doing so means that we can set the page all-frozen
704 : * afterwards (might not happen until VACUUM's final heap pass).
705 : *
706 : * XXX: Previously, we knew if pruning emitted an FPI by checking
707 : * pgWalUsage.wal_fpi before and after pruning. Once the freeze and
708 : * prune records were combined, this heuristic couldn't be used
709 : * anymore. The opportunistic freeze heuristic must be improved;
710 : * however, for now, try to approximate the old logic.
711 : */
712 381106 : if (prstate->set_all_frozen && prstate->nfrozen > 0)
713 : {
714 : Assert(prstate->set_all_visible);
715 :
716 : /*
717 : * Freezing would make the page all-frozen. Have already emitted
718 : * an FPI or will do so anyway?
719 : */
720 20838 : if (RelationNeedsWAL(prstate->relation))
721 : {
722 19181 : if (did_tuple_hint_fpi)
723 1286 : do_freeze = true;
724 17895 : else if (do_prune)
725 : {
726 1698 : if (XLogCheckBufferNeedsBackup(prstate->buffer))
727 563 : do_freeze = true;
728 : }
729 16197 : else if (do_hint_prune)
730 : {
731 18 : if (XLogHintBitIsNeeded() &&
732 9 : XLogCheckBufferNeedsBackup(prstate->buffer))
733 4 : do_freeze = true;
734 : }
735 : }
736 : }
737 : }
738 :
739 403163 : if (do_freeze)
740 : {
741 : /*
742 : * Validate the tuples we will be freezing before entering the
743 : * critical section.
744 : */
745 23910 : heap_pre_freeze_checks(prstate->buffer, prstate->frozen, prstate->nfrozen);
746 : Assert(TransactionIdPrecedes(prstate->pagefrz.FreezePageConflictXid,
747 : prstate->cutoffs->OldestXmin));
748 : }
749 379253 : else if (prstate->nfrozen > 0)
750 : {
751 : /*
752 : * The page contained some tuples that were not already frozen, and we
753 : * chose not to freeze them now. The page won't be all-frozen then.
754 : */
755 : Assert(!prstate->pagefrz.freeze_required);
756 :
757 19328 : prstate->set_all_frozen = false;
758 19328 : prstate->nfrozen = 0; /* avoid miscounts in instrumentation */
759 : }
760 : else
761 : {
762 : /*
763 : * We have no freeze plans to execute. The page might already be
764 : * all-frozen (perhaps only following pruning), though. Such pages
765 : * can be marked all-frozen in the VM by our caller, even though none
766 : * of its tuples were newly frozen here.
767 : */
768 : }
769 :
770 403163 : return do_freeze;
771 : }
772 :
773 :
774 : /*
775 : * Prune and repair fragmentation and potentially freeze tuples on the
776 : * specified page.
777 : *
778 : * Caller must have pin and buffer cleanup lock on the page. Note that we
779 : * don't update the FSM information for page on caller's behalf. Caller might
780 : * also need to account for a reduction in the length of the line pointer
781 : * array following array truncation by us.
782 : *
783 : * params contains the input parameters used to control freezing and pruning
784 : * behavior. See the definition of PruneFreezeParams for more on what each
785 : * parameter does.
786 : *
787 : * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
788 : * tuples if it's required in order to advance relfrozenxid / relminmxid, or
789 : * if it's considered advantageous for overall system performance to do so
790 : * now. The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
791 : * 'new_relmin_mxid' arguments are required when freezing. When
792 : * HEAP_PAGE_PRUNE_FREEZE option is passed, we also set
793 : * presult->set_all_visible and presult->set_all_frozen after determining
794 : * whether or not to opportunistically freeze, to indicate if the VM bits can
795 : * be set. They are always set to false when the HEAP_PAGE_PRUNE_FREEZE
796 : * option is not passed, because at the moment only callers that also freeze
797 : * need that information.
798 : *
799 : * presult contains output parameters needed by callers, such as the number of
800 : * tuples removed and the offsets of dead items on the page after pruning.
801 : * heap_page_prune_and_freeze() is responsible for initializing it. Required
802 : * by all callers.
803 : *
804 : * off_loc is the offset location required by the caller to use in error
805 : * callback.
806 : *
807 : * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
808 : * HEAP_PAGE_PRUNE_FREEZE option is set in params. On entry, they contain the
809 : * oldest XID and multi-XID seen on the relation so far. They will be updated
810 : * with the oldest values present on the page after pruning. After processing
811 : * the whole relation, VACUUM can use these values as the new
812 : * relfrozenxid/relminmxid for the relation.
813 : */
814 : void
815 447706 : heap_page_prune_and_freeze(PruneFreezeParams *params,
816 : PruneFreezeResult *presult,
817 : OffsetNumber *off_loc,
818 : TransactionId *new_relfrozen_xid,
819 : MultiXactId *new_relmin_mxid)
820 : {
821 : PruneState prstate;
822 : bool do_freeze;
823 : bool do_prune;
824 : bool do_hint_prune;
825 : bool did_tuple_hint_fpi;
826 447706 : int64 fpi_before = pgWalUsage.wal_fpi;
827 :
828 : /* Initialize prstate */
829 447706 : prune_freeze_setup(params,
830 : new_relfrozen_xid, new_relmin_mxid,
831 : presult, &prstate);
832 :
833 : /*
834 : * Examine all line pointers and tuple visibility information to determine
835 : * which line pointers should change state and which tuples may be frozen.
836 : * Prepare queue of state changes to later be executed in a critical
837 : * section.
838 : */
839 447706 : prune_freeze_plan(&prstate, off_loc);
840 :
841 : /*
842 : * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
843 : * checking tuple visibility information in prune_freeze_plan() may have
844 : * caused an FPI to be emitted.
845 : */
846 447706 : did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
847 :
848 1326152 : do_prune = prstate.nredirected > 0 ||
849 838172 : prstate.ndead > 0 ||
850 390466 : prstate.nunused > 0;
851 :
852 : /*
853 : * Even if we don't prune anything, if we found a new value for the
854 : * pd_prune_xid field or the page was marked full, we will update the hint
855 : * bit.
856 : */
857 837854 : do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
858 390148 : PageIsFull(prstate.page);
859 :
860 : /*
861 : * Decide if we want to go ahead with freezing according to the freeze
862 : * plans we prepared, or not.
863 : */
864 447706 : do_freeze = heap_page_will_freeze(did_tuple_hint_fpi,
865 : do_prune,
866 : do_hint_prune,
867 : &prstate);
868 :
869 : /*
870 : * While scanning the line pointers, we did not clear
871 : * set_all_visible/set_all_frozen when encountering LP_DEAD items because
872 : * we wanted the decision whether or not to freeze the page to be
873 : * unaffected by the short-term presence of LP_DEAD items. These LP_DEAD
874 : * items are effectively assumed to be LP_UNUSED items in the making. It
875 : * doesn't matter which vacuum heap pass (initial pass or final pass) ends
876 : * up setting the page all-frozen, as long as the ongoing VACUUM does it.
877 : *
878 : * Now that we finished determining whether or not to freeze the page,
879 : * update set_all_visible and set_all_frozen so that they reflect the true
880 : * state of the page for setting PD_ALL_VISIBLE and VM bits.
881 : */
882 447706 : if (prstate.lpdead_items > 0)
883 56249 : prstate.set_all_visible = prstate.set_all_frozen = false;
884 :
885 : Assert(!prstate.set_all_frozen || prstate.set_all_visible);
886 :
887 : /* Any error while applying the changes is critical */
888 447706 : START_CRIT_SECTION();
889 :
890 447706 : if (do_hint_prune)
891 : {
892 : /*
893 : * Update the page's pd_prune_xid field to either zero, or the lowest
894 : * XID of any soon-prunable tuple.
895 : */
896 57621 : ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
897 :
898 : /*
899 : * Also clear the "page is full" flag, since there's no point in
900 : * repeating the prune/defrag process until something else happens to
901 : * the page.
902 : */
903 57621 : PageClearFull(prstate.page);
904 :
905 : /*
906 : * If that's all we had to do to the page, this is a non-WAL-logged
907 : * hint. If we are going to freeze or prune the page, we will mark
908 : * the buffer dirty below.
909 : */
910 57621 : if (!do_freeze && !do_prune)
911 196 : MarkBufferDirtyHint(prstate.buffer, true);
912 : }
913 :
914 447706 : if (do_prune || do_freeze)
915 : {
916 : /* Apply the planned item changes and repair page fragmentation. */
917 79820 : if (do_prune)
918 : {
919 57663 : heap_page_prune_execute(prstate.buffer, false,
920 : prstate.redirected, prstate.nredirected,
921 : prstate.nowdead, prstate.ndead,
922 : prstate.nowunused, prstate.nunused);
923 : }
924 :
925 79820 : if (do_freeze)
926 23910 : heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
927 :
928 79820 : MarkBufferDirty(prstate.buffer);
929 :
930 : /*
931 : * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
932 : */
933 79820 : if (RelationNeedsWAL(prstate.relation))
934 : {
935 : /*
936 : * The snapshotConflictHorizon for the whole record should be the
937 : * most conservative of all the horizons calculated for any of the
938 : * possible modifications. If this record will prune tuples, any
939 : * queries on the standby older than the newest xid of the most
940 : * recently removed tuple this record will prune will conflict. If
941 : * this record will freeze tuples, any queries on the standby with
942 : * xids older than the newest tuple this record will freeze will
943 : * conflict.
944 : */
945 : TransactionId conflict_xid;
946 :
947 78692 : if (TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid,
948 : prstate.latest_xid_removed))
949 22591 : conflict_xid = prstate.pagefrz.FreezePageConflictXid;
950 : else
951 56101 : conflict_xid = prstate.latest_xid_removed;
952 :
953 78692 : log_heap_prune_and_freeze(prstate.relation, prstate.buffer,
954 : InvalidBuffer, /* vmbuffer */
955 : 0, /* vmflags */
956 : conflict_xid,
957 : true, params->reason,
958 : prstate.frozen, prstate.nfrozen,
959 : prstate.redirected, prstate.nredirected,
960 : prstate.nowdead, prstate.ndead,
961 : prstate.nowunused, prstate.nunused);
962 : }
963 : }
964 :
965 447706 : END_CRIT_SECTION();
966 :
967 : /* Copy information back for caller */
968 447706 : presult->ndeleted = prstate.ndeleted;
969 447706 : presult->nnewlpdead = prstate.ndead;
970 447706 : presult->nfrozen = prstate.nfrozen;
971 447706 : presult->live_tuples = prstate.live_tuples;
972 447706 : presult->recently_dead_tuples = prstate.recently_dead_tuples;
973 447706 : presult->set_all_visible = prstate.set_all_visible;
974 447706 : presult->set_all_frozen = prstate.set_all_frozen;
975 447706 : presult->hastup = prstate.hastup;
976 :
977 : /*
978 : * For callers planning to update the visibility map, the conflict horizon
979 : * for that record must be the newest xmin on the page. However, if the
980 : * page is completely frozen, there can be no conflict and the
981 : * vm_conflict_horizon should remain InvalidTransactionId. This includes
982 : * the case that we just froze all the tuples; the prune-freeze record
983 : * included the conflict XID already so the caller doesn't need it.
984 : */
985 447706 : if (presult->set_all_frozen)
986 200844 : presult->vm_conflict_horizon = InvalidTransactionId;
987 : else
988 246862 : presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
989 :
990 447706 : presult->lpdead_items = prstate.lpdead_items;
991 : /* the presult->deadoffsets array was already filled in */
992 :
993 447706 : if (prstate.attempt_freeze)
994 : {
995 403163 : if (presult->nfrozen > 0)
996 : {
997 23910 : *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
998 23910 : *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
999 : }
1000 : else
1001 : {
1002 379253 : *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1003 379253 : *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1004 : }
1005 : }
1006 447706 : }
1007 :
1008 :
1009 : /*
1010 : * Perform visibility checks for heap pruning.
1011 : */
1012 : static HTSV_Result
1013 22725263 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup)
1014 : {
1015 : HTSV_Result res;
1016 : TransactionId dead_after;
1017 :
1018 22725263 : res = HeapTupleSatisfiesVacuumHorizon(tup, prstate->buffer, &dead_after);
1019 :
1020 22725263 : if (res != HEAPTUPLE_RECENTLY_DEAD)
1021 20661339 : return res;
1022 :
1023 : /*
1024 : * For VACUUM, we must be sure to prune tuples with xmax older than
1025 : * OldestXmin -- a visibility cutoff determined at the beginning of
1026 : * vacuuming the relation. OldestXmin is used for freezing determination
1027 : * and we cannot freeze dead tuples' xmaxes.
1028 : */
1029 2063924 : if (prstate->cutoffs &&
1030 1120159 : TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
1031 1120159 : NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
1032 794224 : return HEAPTUPLE_DEAD;
1033 :
1034 : /*
1035 : * Determine whether or not the tuple is considered dead when compared
1036 : * with the provided GlobalVisState. On-access pruning does not provide
1037 : * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
1038 : * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
1039 : * if the GlobalVisState has been updated since the beginning of vacuuming
1040 : * the relation.
1041 : */
1042 1269700 : if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
1043 904382 : return HEAPTUPLE_DEAD;
1044 :
1045 365318 : return res;
1046 : }
1047 :
1048 :
1049 : /*
1050 : * Pruning calculates tuple visibility once and saves the results in an array
1051 : * of int8. See PruneState.htsv for details. This helper function is meant
1052 : * to guard against examining visibility status array members which have not
1053 : * yet been computed.
1054 : */
1055 : static inline HTSV_Result
1056 22709612 : htsv_get_valid_status(int status)
1057 : {
1058 : Assert(status >= HEAPTUPLE_DEAD &&
1059 : status <= HEAPTUPLE_DELETE_IN_PROGRESS);
1060 22709612 : return (HTSV_Result) status;
1061 : }
1062 :
1063 : /*
1064 : * Prune specified line pointer or a HOT chain originating at line pointer.
1065 : *
1066 : * Tuple visibility information is provided in prstate->htsv.
1067 : *
1068 : * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
1069 : * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
1070 : * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
1071 : * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
1072 : * DEAD, our visibility test is just too coarse to detect it.
1073 : *
1074 : * Pruning must never leave behind a DEAD tuple that still has tuple storage.
1075 : * VACUUM isn't prepared to deal with that case.
1076 : *
1077 : * The root line pointer is redirected to the tuple immediately after the
1078 : * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
1079 : * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
1080 : * tuple, which we treat as a chain of length 1.)
1081 : *
1082 : * We don't actually change the page here. We just add entries to the arrays in
1083 : * prstate showing the changes to be made. Items to be redirected are added
1084 : * to the redirected[] array (two entries per redirection); items to be set to
1085 : * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
1086 : * state are added to nowunused[]. We perform bookkeeping of live tuples,
1087 : * visibility etc. based on what the page will look like after the changes
1088 : * applied. All that bookkeeping is performed in the heap_prune_record_*()
1089 : * subroutines. The division of labor is that heap_prune_chain() decides the
1090 : * fate of each tuple, ie. whether it's going to be removed, redirected or
1091 : * left unchanged, and the heap_prune_record_*() subroutines update PruneState
1092 : * based on that outcome.
1093 : */
1094 : static void
1095 22588193 : heap_prune_chain(OffsetNumber maxoff, OffsetNumber rootoffnum,
1096 : PruneState *prstate)
1097 : {
1098 22588193 : TransactionId priorXmax = InvalidTransactionId;
1099 : ItemId rootlp;
1100 : OffsetNumber offnum;
1101 : OffsetNumber chainitems[MaxHeapTuplesPerPage];
1102 22588193 : Page page = prstate->page;
1103 :
1104 : /*
1105 : * After traversing the HOT chain, ndeadchain is the index in chainitems
1106 : * of the first live successor after the last dead item.
1107 : */
1108 22588193 : int ndeadchain = 0,
1109 22588193 : nchain = 0;
1110 :
1111 22588193 : rootlp = PageGetItemId(page, rootoffnum);
1112 :
1113 : /* Start from the root tuple */
1114 22588193 : offnum = rootoffnum;
1115 :
1116 : /* while not end of the chain */
1117 : for (;;)
1118 459471 : {
1119 : HeapTupleHeader htup;
1120 : ItemId lp;
1121 :
1122 : /* Sanity check (pure paranoia) */
1123 23047664 : if (offnum < FirstOffsetNumber)
1124 0 : break;
1125 :
1126 : /*
1127 : * An offset past the end of page's line pointer array is possible
1128 : * when the array was truncated (original item must have been unused)
1129 : */
1130 23047664 : if (offnum > maxoff)
1131 0 : break;
1132 :
1133 : /* If item is already processed, stop --- it must not be same chain */
1134 23047664 : if (prstate->processed[offnum])
1135 0 : break;
1136 :
1137 23047664 : lp = PageGetItemId(page, offnum);
1138 :
1139 : /*
1140 : * Unused item obviously isn't part of the chain. Likewise, a dead
1141 : * line pointer can't be part of the chain. Both of those cases were
1142 : * already marked as processed.
1143 : */
1144 : Assert(ItemIdIsUsed(lp));
1145 : Assert(!ItemIdIsDead(lp));
1146 :
1147 : /*
1148 : * If we are looking at the redirected root line pointer, jump to the
1149 : * first normal tuple in the chain. If we find a redirect somewhere
1150 : * else, stop --- it must not be same chain.
1151 : */
1152 23047664 : if (ItemIdIsRedirected(lp))
1153 : {
1154 338052 : if (nchain > 0)
1155 0 : break; /* not at start of chain */
1156 338052 : chainitems[nchain++] = offnum;
1157 338052 : offnum = ItemIdGetRedirect(rootlp);
1158 338052 : continue;
1159 : }
1160 :
1161 : Assert(ItemIdIsNormal(lp));
1162 :
1163 22709612 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1164 :
1165 : /*
1166 : * Check the tuple XMIN against prior XMAX, if any
1167 : */
1168 22831031 : if (TransactionIdIsValid(priorXmax) &&
1169 121419 : !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
1170 0 : break;
1171 :
1172 : /*
1173 : * OK, this tuple is indeed a member of the chain.
1174 : */
1175 22709612 : chainitems[nchain++] = offnum;
1176 :
1177 22709612 : switch (htsv_get_valid_status(prstate->htsv[offnum]))
1178 : {
1179 1757167 : case HEAPTUPLE_DEAD:
1180 :
1181 : /* Remember the last DEAD tuple seen */
1182 1757167 : ndeadchain = nchain;
1183 1757167 : HeapTupleHeaderAdvanceConflictHorizon(htup,
1184 : &prstate->latest_xid_removed);
1185 : /* Advance to next chain member */
1186 1757167 : break;
1187 :
1188 365318 : case HEAPTUPLE_RECENTLY_DEAD:
1189 :
1190 : /*
1191 : * We don't need to advance the conflict horizon for
1192 : * RECENTLY_DEAD tuples, even if we are removing them. This
1193 : * is because we only remove RECENTLY_DEAD tuples if they
1194 : * precede a DEAD tuple, and the DEAD tuple must have been
1195 : * inserted by a newer transaction than the RECENTLY_DEAD
1196 : * tuple by virtue of being later in the chain. We will have
1197 : * advanced the conflict horizon for the DEAD tuple.
1198 : */
1199 :
1200 : /*
1201 : * Advance past RECENTLY_DEAD tuples just in case there's a
1202 : * DEAD one after them. We have to make sure that we don't
1203 : * miss any DEAD tuples, since DEAD tuples that still have
1204 : * tuple storage after pruning will confuse VACUUM.
1205 : */
1206 365318 : break;
1207 :
1208 20587127 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1209 : case HEAPTUPLE_LIVE:
1210 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1211 20587127 : goto process_chain;
1212 :
1213 0 : default:
1214 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1215 : goto process_chain;
1216 : }
1217 :
1218 : /*
1219 : * If the tuple is not HOT-updated, then we are at the end of this
1220 : * HOT-update chain.
1221 : */
1222 2122485 : if (!HeapTupleHeaderIsHotUpdated(htup))
1223 2001066 : goto process_chain;
1224 :
1225 : /* HOT implies it can't have moved to different partition */
1226 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
1227 :
1228 : /*
1229 : * Advance to next chain member.
1230 : */
1231 : Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == prstate->block);
1232 121419 : offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1233 121419 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1234 : }
1235 :
1236 0 : if (ItemIdIsRedirected(rootlp) && nchain < 2)
1237 : {
1238 : /*
1239 : * We found a redirect item that doesn't point to a valid follow-on
1240 : * item. This can happen if the loop in heap_page_prune_and_freeze()
1241 : * caused us to visit the dead successor of a redirect item before
1242 : * visiting the redirect item. We can clean up by setting the
1243 : * redirect item to LP_DEAD state or LP_UNUSED if the caller
1244 : * indicated.
1245 : */
1246 0 : heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
1247 0 : return;
1248 : }
1249 :
1250 0 : process_chain:
1251 :
1252 22588193 : if (ndeadchain == 0)
1253 : {
1254 : /*
1255 : * No DEAD tuple was found, so the chain is entirely composed of
1256 : * normal, unchanged tuples. Leave it alone.
1257 : */
1258 20876813 : int i = 0;
1259 :
1260 20876813 : if (ItemIdIsRedirected(rootlp))
1261 : {
1262 318232 : heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
1263 318232 : i++;
1264 : }
1265 41758168 : for (; i < nchain; i++)
1266 20881355 : heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
1267 : }
1268 1711380 : else if (ndeadchain == nchain)
1269 : {
1270 : /*
1271 : * The entire chain is dead. Mark the root line pointer LP_DEAD, and
1272 : * fully remove the other tuples in the chain.
1273 : */
1274 1642047 : heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
1275 1686795 : for (int i = 1; i < nchain; i++)
1276 44748 : heap_prune_record_unused(prstate, chainitems[i], true);
1277 : }
1278 : else
1279 : {
1280 : /*
1281 : * We found a DEAD tuple in the chain. Redirect the root line pointer
1282 : * to the first non-DEAD tuple, and mark as unused each intermediate
1283 : * item that we are able to remove from the chain.
1284 : */
1285 69333 : heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
1286 69333 : ItemIdIsNormal(rootlp));
1287 90192 : for (int i = 1; i < ndeadchain; i++)
1288 20859 : heap_prune_record_unused(prstate, chainitems[i], true);
1289 :
1290 : /* the rest of tuples in the chain are normal, unchanged tuples */
1291 140423 : for (int i = ndeadchain; i < nchain; i++)
1292 71090 : heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
1293 : }
1294 : }
1295 :
1296 : /* Record lowest soon-prunable XID */
1297 : static void
1298 4450150 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
1299 : {
1300 : /*
1301 : * This should exactly match the PageSetPrunable macro. We can't store
1302 : * directly into the page header yet, so we update working state.
1303 : */
1304 : Assert(TransactionIdIsNormal(xid));
1305 8727530 : if (!TransactionIdIsValid(prstate->new_prune_xid) ||
1306 4277380 : TransactionIdPrecedes(xid, prstate->new_prune_xid))
1307 174113 : prstate->new_prune_xid = xid;
1308 4450150 : }
1309 :
1310 : /* Record line pointer to be redirected */
1311 : static void
1312 69333 : heap_prune_record_redirect(PruneState *prstate,
1313 : OffsetNumber offnum, OffsetNumber rdoffnum,
1314 : bool was_normal)
1315 : {
1316 : Assert(!prstate->processed[offnum]);
1317 69333 : prstate->processed[offnum] = true;
1318 :
1319 : /*
1320 : * Do not mark the redirect target here. It needs to be counted
1321 : * separately as an unchanged tuple.
1322 : */
1323 :
1324 : Assert(prstate->nredirected < MaxHeapTuplesPerPage);
1325 69333 : prstate->redirected[prstate->nredirected * 2] = offnum;
1326 69333 : prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
1327 :
1328 69333 : prstate->nredirected++;
1329 :
1330 : /*
1331 : * If the root entry had been a normal tuple, we are deleting it, so count
1332 : * it in the result. But changing a redirect (even to DEAD state) doesn't
1333 : * count.
1334 : */
1335 69333 : if (was_normal)
1336 61315 : prstate->ndeleted++;
1337 :
1338 69333 : prstate->hastup = true;
1339 69333 : }
1340 :
1341 : /* Record line pointer to be marked dead */
1342 : static void
1343 1608335 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
1344 : bool was_normal)
1345 : {
1346 : Assert(!prstate->processed[offnum]);
1347 1608335 : prstate->processed[offnum] = true;
1348 :
1349 : Assert(prstate->ndead < MaxHeapTuplesPerPage);
1350 1608335 : prstate->nowdead[prstate->ndead] = offnum;
1351 1608335 : prstate->ndead++;
1352 :
1353 : /*
1354 : * Deliberately delay unsetting set_all_visible and set_all_frozen until
1355 : * later during pruning. Removable dead tuples shouldn't preclude freezing
1356 : * the page.
1357 : */
1358 :
1359 : /* Record the dead offset for vacuum */
1360 1608335 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1361 :
1362 : /*
1363 : * If the root entry had been a normal tuple, we are deleting it, so count
1364 : * it in the result. But changing a redirect (even to DEAD state) doesn't
1365 : * count.
1366 : */
1367 1608335 : if (was_normal)
1368 1596533 : prstate->ndeleted++;
1369 1608335 : }
1370 :
1371 : /*
1372 : * Depending on whether or not the caller set mark_unused_now to true, record that a
1373 : * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
1374 : * which we will mark line pointers LP_UNUSED, but we will not mark line
1375 : * pointers LP_DEAD if mark_unused_now is true.
1376 : */
1377 : static void
1378 1642047 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
1379 : bool was_normal)
1380 : {
1381 : /*
1382 : * If the caller set mark_unused_now to true, we can remove dead tuples
1383 : * during pruning instead of marking their line pointers dead. Set this
1384 : * tuple's line pointer LP_UNUSED. We hint that this option is less
1385 : * likely.
1386 : */
1387 1642047 : if (unlikely(prstate->mark_unused_now))
1388 33712 : heap_prune_record_unused(prstate, offnum, was_normal);
1389 : else
1390 1608335 : heap_prune_record_dead(prstate, offnum, was_normal);
1391 1642047 : }
1392 :
1393 : /* Record line pointer to be marked unused */
1394 : static void
1395 104579 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
1396 : {
1397 : Assert(!prstate->processed[offnum]);
1398 104579 : prstate->processed[offnum] = true;
1399 :
1400 : Assert(prstate->nunused < MaxHeapTuplesPerPage);
1401 104579 : prstate->nowunused[prstate->nunused] = offnum;
1402 104579 : prstate->nunused++;
1403 :
1404 : /*
1405 : * If the root entry had been a normal tuple, we are deleting it, so count
1406 : * it in the result. But changing a redirect (even to DEAD state) doesn't
1407 : * count.
1408 : */
1409 104579 : if (was_normal)
1410 102412 : prstate->ndeleted++;
1411 104579 : }
1412 :
1413 : /*
1414 : * Record an unused line pointer that is left unchanged.
1415 : */
1416 : static void
1417 257045 : heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum)
1418 : {
1419 : Assert(!prstate->processed[offnum]);
1420 257045 : prstate->processed[offnum] = true;
1421 257045 : }
1422 :
1423 : /*
1424 : * Record line pointer that is left unchanged. We consider freezing it, and
1425 : * update bookkeeping of tuple counts and page visibility.
1426 : */
1427 : static void
1428 20965003 : heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum)
1429 : {
1430 : HeapTupleHeader htup;
1431 20965003 : Page page = prstate->page;
1432 :
1433 : Assert(!prstate->processed[offnum]);
1434 20965003 : prstate->processed[offnum] = true;
1435 :
1436 20965003 : prstate->hastup = true; /* the page is not empty */
1437 :
1438 : /*
1439 : * The criteria for counting a tuple as live in this block need to match
1440 : * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
1441 : * ANALYZE may produce wildly different reltuples values, e.g. when there
1442 : * are many recently-dead tuples.
1443 : *
1444 : * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
1445 : * can't run inside a transaction block, which makes some cases impossible
1446 : * (e.g. in-progress insert from the same transaction).
1447 : *
1448 : * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
1449 : * subroutines. They don't count dead items like acquire_sample_rows()
1450 : * does, because we assume that all dead items will become LP_UNUSED
1451 : * before VACUUM finishes. This difference is only superficial. VACUUM
1452 : * effectively agrees with ANALYZE about DEAD items, in the end. VACUUM
1453 : * won't remember LP_DEAD items, but only because they're not supposed to
1454 : * be left behind when it is done. (Cases where we bypass index vacuuming
1455 : * will violate this optimistic assumption, but the overall impact of that
1456 : * should be negligible.)
1457 : */
1458 20965003 : htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
1459 :
1460 20965003 : switch (prstate->htsv[offnum])
1461 : {
1462 16438476 : case HEAPTUPLE_LIVE:
1463 :
1464 : /*
1465 : * Count it as live. Not only is this natural, but it's also what
1466 : * acquire_sample_rows() does.
1467 : */
1468 16438476 : prstate->live_tuples++;
1469 :
1470 : /*
1471 : * Is the tuple definitely visible to all transactions?
1472 : *
1473 : * NB: Like with per-tuple hint bits, we can't set the
1474 : * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
1475 : * See SetHintBits for more info. Check that the tuple is hinted
1476 : * xmin-committed because of that.
1477 : */
1478 16438476 : if (prstate->set_all_visible)
1479 : {
1480 : TransactionId xmin;
1481 :
1482 13026597 : if (!HeapTupleHeaderXminCommitted(htup))
1483 : {
1484 300 : prstate->set_all_visible = false;
1485 300 : prstate->set_all_frozen = false;
1486 300 : break;
1487 : }
1488 :
1489 : /*
1490 : * The inserter definitely committed. But is it old enough
1491 : * that everyone sees it as committed? A FrozenTransactionId
1492 : * is seen as committed to everyone. Otherwise, we check if
1493 : * there is a snapshot that considers this xid to still be
1494 : * running, and if so, we don't consider the page all-visible.
1495 : */
1496 13026297 : xmin = HeapTupleHeaderGetXmin(htup);
1497 :
1498 : /*
1499 : * For now always use prstate->cutoffs for this test, because
1500 : * we only update 'set_all_visible' and 'set_all_frozen' when
1501 : * freezing is requested. We could use
1502 : * GlobalVisTestIsRemovableXid instead, if a non-freezing
1503 : * caller wanted to set the VM bit.
1504 : */
1505 : Assert(prstate->cutoffs);
1506 13026297 : if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
1507 : {
1508 3299 : prstate->set_all_visible = false;
1509 3299 : prstate->set_all_frozen = false;
1510 3299 : break;
1511 : }
1512 :
1513 : /* Track newest xmin on page. */
1514 13022998 : if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) &&
1515 : TransactionIdIsNormal(xmin))
1516 124218 : prstate->visibility_cutoff_xid = xmin;
1517 : }
1518 16434877 : break;
1519 :
1520 365318 : case HEAPTUPLE_RECENTLY_DEAD:
1521 365318 : prstate->recently_dead_tuples++;
1522 365318 : prstate->set_all_visible = false;
1523 365318 : prstate->set_all_frozen = false;
1524 :
1525 : /*
1526 : * This tuple will soon become DEAD. Update the hint field so
1527 : * that the page is reconsidered for pruning in future.
1528 : */
1529 365318 : heap_prune_record_prunable(prstate,
1530 : HeapTupleHeaderGetUpdateXid(htup));
1531 365318 : break;
1532 :
1533 76377 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1534 :
1535 : /*
1536 : * We do not count these rows as live, because we expect the
1537 : * inserting transaction to update the counters at commit, and we
1538 : * assume that will happen only after we report our results. This
1539 : * assumption is a bit shaky, but it is what acquire_sample_rows()
1540 : * does, so be consistent.
1541 : */
1542 76377 : prstate->set_all_visible = false;
1543 76377 : prstate->set_all_frozen = false;
1544 :
1545 : /*
1546 : * If we wanted to optimize for aborts, we might consider marking
1547 : * the page prunable when we see INSERT_IN_PROGRESS. But we
1548 : * don't. See related decisions about when to mark the page
1549 : * prunable in heapam.c.
1550 : */
1551 76377 : break;
1552 :
1553 4084832 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1554 :
1555 : /*
1556 : * This an expected case during concurrent vacuum. Count such
1557 : * rows as live. As above, we assume the deleting transaction
1558 : * will commit and update the counters after we report.
1559 : */
1560 4084832 : prstate->live_tuples++;
1561 4084832 : prstate->set_all_visible = false;
1562 4084832 : prstate->set_all_frozen = false;
1563 :
1564 : /*
1565 : * This tuple may soon become DEAD. Update the hint field so that
1566 : * the page is reconsidered for pruning in future.
1567 : */
1568 4084832 : heap_prune_record_prunable(prstate,
1569 : HeapTupleHeaderGetUpdateXid(htup));
1570 4084832 : break;
1571 :
1572 0 : default:
1573 :
1574 : /*
1575 : * DEAD tuples should've been passed to heap_prune_record_dead()
1576 : * or heap_prune_record_unused() instead.
1577 : */
1578 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
1579 : prstate->htsv[offnum]);
1580 : break;
1581 : }
1582 :
1583 : /* Consider freezing any normal tuples which will not be removed */
1584 20965003 : if (prstate->attempt_freeze)
1585 : {
1586 : bool totally_frozen;
1587 :
1588 19433718 : if ((heap_prepare_freeze_tuple(htup,
1589 19433718 : prstate->cutoffs,
1590 : &prstate->pagefrz,
1591 19433718 : &prstate->frozen[prstate->nfrozen],
1592 : &totally_frozen)))
1593 : {
1594 : /* Save prepared freeze plan for later */
1595 2941286 : prstate->frozen[prstate->nfrozen++].offset = offnum;
1596 : }
1597 :
1598 : /*
1599 : * If any tuple isn't either totally frozen already or eligible to
1600 : * become totally frozen (according to its freeze plan), then the page
1601 : * definitely cannot be set all-frozen in the visibility map later on.
1602 : */
1603 19433718 : if (!totally_frozen)
1604 4833646 : prstate->set_all_frozen = false;
1605 : }
1606 20965003 : }
1607 :
1608 :
1609 : /*
1610 : * Record line pointer that was already LP_DEAD and is left unchanged.
1611 : */
1612 : static void
1613 1280173 : heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum)
1614 : {
1615 : Assert(!prstate->processed[offnum]);
1616 1280173 : prstate->processed[offnum] = true;
1617 :
1618 : /*
1619 : * Deliberately don't set hastup for LP_DEAD items. We make the soft
1620 : * assumption that any LP_DEAD items encountered here will become
1621 : * LP_UNUSED later on, before count_nondeletable_pages is reached. If we
1622 : * don't make this assumption then rel truncation will only happen every
1623 : * other VACUUM, at most. Besides, VACUUM must treat
1624 : * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
1625 : * handled (handled here, or handled later on).
1626 : *
1627 : * Similarly, don't unset set_all_visible and set_all_frozen until later,
1628 : * at the end of heap_page_prune_and_freeze(). This will allow us to
1629 : * attempt to freeze the page after pruning. As long as we unset it
1630 : * before updating the visibility map, this will be correct.
1631 : */
1632 :
1633 : /* Record the dead offset for vacuum */
1634 1280173 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1635 1280173 : }
1636 :
1637 : /*
1638 : * Record LP_REDIRECT that is left unchanged.
1639 : */
1640 : static void
1641 318232 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
1642 : {
1643 : /*
1644 : * A redirect line pointer doesn't count as a live tuple.
1645 : *
1646 : * If we leave a redirect line pointer in place, there will be another
1647 : * tuple on the page that it points to. We will do the bookkeeping for
1648 : * that separately. So we have nothing to do here, except remember that
1649 : * we processed this item.
1650 : */
1651 : Assert(!prstate->processed[offnum]);
1652 318232 : prstate->processed[offnum] = true;
1653 318232 : }
1654 :
1655 : /*
1656 : * Perform the actual page changes needed by heap_page_prune_and_freeze().
1657 : *
1658 : * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
1659 : * as unused, not redirecting or removing anything else. The
1660 : * PageRepairFragmentation() call is skipped in that case.
1661 : *
1662 : * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
1663 : * the buffer. If it is set, an ordinary exclusive lock suffices.
1664 : */
1665 : void
1666 68036 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
1667 : OffsetNumber *redirected, int nredirected,
1668 : OffsetNumber *nowdead, int ndead,
1669 : OffsetNumber *nowunused, int nunused)
1670 : {
1671 68036 : Page page = BufferGetPage(buffer);
1672 : OffsetNumber *offnum;
1673 : HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
1674 :
1675 : /* Shouldn't be called unless there's something to do */
1676 : Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1677 :
1678 : /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1679 : Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1680 :
1681 : /* Update all redirected line pointers */
1682 68036 : offnum = redirected;
1683 156848 : for (int i = 0; i < nredirected; i++)
1684 : {
1685 88812 : OffsetNumber fromoff = *offnum++;
1686 88812 : OffsetNumber tooff = *offnum++;
1687 88812 : ItemId fromlp = PageGetItemId(page, fromoff);
1688 : ItemId tolp PG_USED_FOR_ASSERTS_ONLY;
1689 :
1690 : #ifdef USE_ASSERT_CHECKING
1691 :
1692 : /*
1693 : * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1694 : * must be the first item from a HOT chain. If the item has tuple
1695 : * storage then it can't be a heap-only tuple. Otherwise we are just
1696 : * maintaining an existing LP_REDIRECT from an existing HOT chain that
1697 : * has been pruned at least once before now.
1698 : */
1699 : if (!ItemIdIsRedirected(fromlp))
1700 : {
1701 : Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1702 :
1703 : htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1704 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
1705 : }
1706 : else
1707 : {
1708 : /* We shouldn't need to redundantly set the redirect */
1709 : Assert(ItemIdGetRedirect(fromlp) != tooff);
1710 : }
1711 :
1712 : /*
1713 : * The item that we're about to set as an LP_REDIRECT (the 'from'
1714 : * item) will point to an existing item (the 'to' item) that is
1715 : * already a heap-only tuple. There can be at most one LP_REDIRECT
1716 : * item per HOT chain.
1717 : *
1718 : * We need to keep around an LP_REDIRECT item (after original
1719 : * non-heap-only root tuple gets pruned away) so that it's always
1720 : * possible for VACUUM to easily figure out what TID to delete from
1721 : * indexes when an entire HOT chain becomes dead. A heap-only tuple
1722 : * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1723 : * tuple can.
1724 : *
1725 : * This check may miss problems, e.g. the target of a redirect could
1726 : * be marked as unused subsequently. The page_verify_redirects() check
1727 : * below will catch such problems.
1728 : */
1729 : tolp = PageGetItemId(page, tooff);
1730 : Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1731 : htup = (HeapTupleHeader) PageGetItem(page, tolp);
1732 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1733 : #endif
1734 :
1735 88812 : ItemIdSetRedirect(fromlp, tooff);
1736 : }
1737 :
1738 : /* Update all now-dead line pointers */
1739 68036 : offnum = nowdead;
1740 1961289 : for (int i = 0; i < ndead; i++)
1741 : {
1742 1893253 : OffsetNumber off = *offnum++;
1743 1893253 : ItemId lp = PageGetItemId(page, off);
1744 :
1745 : #ifdef USE_ASSERT_CHECKING
1746 :
1747 : /*
1748 : * An LP_DEAD line pointer must be left behind when the original item
1749 : * (which is dead to everybody) could still be referenced by a TID in
1750 : * an index. This should never be necessary with any individual
1751 : * heap-only tuple item, though. (It's not clear how much of a problem
1752 : * that would be, but there is no reason to allow it.)
1753 : */
1754 : if (ItemIdHasStorage(lp))
1755 : {
1756 : Assert(ItemIdIsNormal(lp));
1757 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1758 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
1759 : }
1760 : else
1761 : {
1762 : /* Whole HOT chain becomes dead */
1763 : Assert(ItemIdIsRedirected(lp));
1764 : }
1765 : #endif
1766 :
1767 1893253 : ItemIdSetDead(lp);
1768 : }
1769 :
1770 : /* Update all now-unused line pointers */
1771 68036 : offnum = nowunused;
1772 387500 : for (int i = 0; i < nunused; i++)
1773 : {
1774 319464 : OffsetNumber off = *offnum++;
1775 319464 : ItemId lp = PageGetItemId(page, off);
1776 :
1777 : #ifdef USE_ASSERT_CHECKING
1778 :
1779 : if (lp_truncate_only)
1780 : {
1781 : /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1782 : Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1783 : }
1784 : else
1785 : {
1786 : /*
1787 : * When heap_page_prune_and_freeze() was called, mark_unused_now
1788 : * may have been passed as true, which allows would-be LP_DEAD
1789 : * items to be made LP_UNUSED instead. This is only possible if
1790 : * the relation has no indexes. If there are any dead items, then
1791 : * mark_unused_now was not true and every item being marked
1792 : * LP_UNUSED must refer to a heap-only tuple.
1793 : */
1794 : if (ndead > 0)
1795 : {
1796 : Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
1797 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1798 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1799 : }
1800 : else
1801 : Assert(ItemIdIsUsed(lp));
1802 : }
1803 :
1804 : #endif
1805 :
1806 319464 : ItemIdSetUnused(lp);
1807 : }
1808 :
1809 68036 : if (lp_truncate_only)
1810 2163 : PageTruncateLinePointerArray(page);
1811 : else
1812 : {
1813 : /*
1814 : * Finally, repair any fragmentation, and update the page's hint bit
1815 : * about whether it has free pointers.
1816 : */
1817 65873 : PageRepairFragmentation(page);
1818 :
1819 : /*
1820 : * Now that the page has been modified, assert that redirect items
1821 : * still point to valid targets.
1822 : */
1823 65873 : page_verify_redirects(page);
1824 : }
1825 68036 : }
1826 :
1827 :
1828 : /*
1829 : * If built with assertions, verify that all LP_REDIRECT items point to a
1830 : * valid item.
1831 : *
1832 : * One way that bugs related to HOT pruning show is redirect items pointing to
1833 : * removed tuples. It's not trivial to reliably check that marking an item
1834 : * unused will not orphan a redirect item during heap_prune_chain() /
1835 : * heap_page_prune_execute(), so we additionally check the whole page after
1836 : * pruning. Without this check such bugs would typically only cause asserts
1837 : * later, potentially well after the corruption has been introduced.
1838 : *
1839 : * Also check comments in heap_page_prune_execute()'s redirection loop.
1840 : */
1841 : static void
1842 65873 : page_verify_redirects(Page page)
1843 : {
1844 : #ifdef USE_ASSERT_CHECKING
1845 : OffsetNumber offnum;
1846 : OffsetNumber maxoff;
1847 :
1848 : maxoff = PageGetMaxOffsetNumber(page);
1849 : for (offnum = FirstOffsetNumber;
1850 : offnum <= maxoff;
1851 : offnum = OffsetNumberNext(offnum))
1852 : {
1853 : ItemId itemid = PageGetItemId(page, offnum);
1854 : OffsetNumber targoff;
1855 : ItemId targitem;
1856 : HeapTupleHeader htup;
1857 :
1858 : if (!ItemIdIsRedirected(itemid))
1859 : continue;
1860 :
1861 : targoff = ItemIdGetRedirect(itemid);
1862 : targitem = PageGetItemId(page, targoff);
1863 :
1864 : Assert(ItemIdIsUsed(targitem));
1865 : Assert(ItemIdIsNormal(targitem));
1866 : Assert(ItemIdHasStorage(targitem));
1867 : htup = (HeapTupleHeader) PageGetItem(page, targitem);
1868 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1869 : }
1870 : #endif
1871 65873 : }
1872 :
1873 :
1874 : /*
1875 : * For all items in this page, find their respective root line pointers.
1876 : * If item k is part of a HOT-chain with root at item j, then we set
1877 : * root_offsets[k - 1] = j.
1878 : *
1879 : * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
1880 : * Unused entries are filled with InvalidOffsetNumber (zero).
1881 : *
1882 : * The function must be called with at least share lock on the buffer, to
1883 : * prevent concurrent prune operations.
1884 : *
1885 : * Note: The information collected here is valid only as long as the caller
1886 : * holds a pin on the buffer. Once pin is released, a tuple might be pruned
1887 : * and reused by a completely unrelated tuple.
1888 : */
1889 : void
1890 132778 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
1891 : {
1892 : OffsetNumber offnum,
1893 : maxoff;
1894 :
1895 132778 : MemSet(root_offsets, InvalidOffsetNumber,
1896 : MaxHeapTuplesPerPage * sizeof(OffsetNumber));
1897 :
1898 132778 : maxoff = PageGetMaxOffsetNumber(page);
1899 10762770 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1900 : {
1901 10629992 : ItemId lp = PageGetItemId(page, offnum);
1902 : HeapTupleHeader htup;
1903 : OffsetNumber nextoffnum;
1904 : TransactionId priorXmax;
1905 :
1906 : /* skip unused and dead items */
1907 10629992 : if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1908 9235 : continue;
1909 :
1910 10620757 : if (ItemIdIsNormal(lp))
1911 : {
1912 10617005 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1913 :
1914 : /*
1915 : * Check if this tuple is part of a HOT-chain rooted at some other
1916 : * tuple. If so, skip it for now; we'll process it when we find
1917 : * its root.
1918 : */
1919 10617005 : if (HeapTupleHeaderIsHeapOnly(htup))
1920 4224 : continue;
1921 :
1922 : /*
1923 : * This is either a plain tuple or the root of a HOT-chain.
1924 : * Remember it in the mapping.
1925 : */
1926 10612781 : root_offsets[offnum - 1] = offnum;
1927 :
1928 : /* If it's not the start of a HOT-chain, we're done with it */
1929 10612781 : if (!HeapTupleHeaderIsHotUpdated(htup))
1930 10612457 : continue;
1931 :
1932 : /* Set up to scan the HOT-chain */
1933 324 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1934 324 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1935 : }
1936 : else
1937 : {
1938 : /* Must be a redirect item. We do not set its root_offsets entry */
1939 : Assert(ItemIdIsRedirected(lp));
1940 : /* Set up to scan the HOT-chain */
1941 3752 : nextoffnum = ItemIdGetRedirect(lp);
1942 3752 : priorXmax = InvalidTransactionId;
1943 : }
1944 :
1945 : /*
1946 : * Now follow the HOT-chain and collect other tuples in the chain.
1947 : *
1948 : * Note: Even though this is a nested loop, the complexity of the
1949 : * function is O(N) because a tuple in the page should be visited not
1950 : * more than twice, once in the outer loop and once in HOT-chain
1951 : * chases.
1952 : */
1953 : for (;;)
1954 : {
1955 : /* Sanity check (pure paranoia) */
1956 4220 : if (offnum < FirstOffsetNumber)
1957 0 : break;
1958 :
1959 : /*
1960 : * An offset past the end of page's line pointer array is possible
1961 : * when the array was truncated
1962 : */
1963 4220 : if (offnum > maxoff)
1964 0 : break;
1965 :
1966 4220 : lp = PageGetItemId(page, nextoffnum);
1967 :
1968 : /* Check for broken chains */
1969 4220 : if (!ItemIdIsNormal(lp))
1970 0 : break;
1971 :
1972 4220 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1973 :
1974 4688 : if (TransactionIdIsValid(priorXmax) &&
1975 468 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1976 0 : break;
1977 :
1978 : /* Remember the root line pointer for this item */
1979 4220 : root_offsets[nextoffnum - 1] = offnum;
1980 :
1981 : /* Advance to next chain member, if any */
1982 4220 : if (!HeapTupleHeaderIsHotUpdated(htup))
1983 4076 : break;
1984 :
1985 : /* HOT implies it can't have moved to different partition */
1986 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
1987 :
1988 144 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1989 144 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1990 : }
1991 : }
1992 132778 : }
1993 :
1994 :
1995 : /*
1996 : * Compare fields that describe actions required to freeze tuple with caller's
1997 : * open plan. If everything matches then the frz tuple plan is equivalent to
1998 : * caller's plan.
1999 : */
2000 : static inline bool
2001 1042388 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
2002 : {
2003 1042388 : if (plan->xmax == frz->xmax &&
2004 1041092 : plan->t_infomask2 == frz->t_infomask2 &&
2005 1040387 : plan->t_infomask == frz->t_infomask &&
2006 1037445 : plan->frzflags == frz->frzflags)
2007 1037445 : return true;
2008 :
2009 : /* Caller must call heap_log_freeze_new_plan again for frz */
2010 4943 : return false;
2011 : }
2012 :
2013 : /*
2014 : * Comparator used to deduplicate the freeze plans used in WAL records.
2015 : */
2016 : static int
2017 1421713 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
2018 : {
2019 1421713 : const HeapTupleFreeze *frz1 = arg1;
2020 1421713 : const HeapTupleFreeze *frz2 = arg2;
2021 :
2022 1421713 : if (frz1->xmax < frz2->xmax)
2023 13219 : return -1;
2024 1408494 : else if (frz1->xmax > frz2->xmax)
2025 13933 : return 1;
2026 :
2027 1394561 : if (frz1->t_infomask2 < frz2->t_infomask2)
2028 4844 : return -1;
2029 1389717 : else if (frz1->t_infomask2 > frz2->t_infomask2)
2030 4647 : return 1;
2031 :
2032 1385070 : if (frz1->t_infomask < frz2->t_infomask)
2033 11781 : return -1;
2034 1373289 : else if (frz1->t_infomask > frz2->t_infomask)
2035 15471 : return 1;
2036 :
2037 1357818 : if (frz1->frzflags < frz2->frzflags)
2038 0 : return -1;
2039 1357818 : else if (frz1->frzflags > frz2->frzflags)
2040 0 : return 1;
2041 :
2042 : /*
2043 : * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
2044 : * (So the tuples will share a single canonical freeze plan.)
2045 : *
2046 : * We tiebreak on page offset number to keep each freeze plan's page
2047 : * offset number array individually sorted. (Unnecessary, but be tidy.)
2048 : */
2049 1357818 : if (frz1->offset < frz2->offset)
2050 1152381 : return -1;
2051 205437 : else if (frz1->offset > frz2->offset)
2052 205437 : return 1;
2053 :
2054 : Assert(false);
2055 0 : return 0;
2056 : }
2057 :
2058 : /*
2059 : * Start new plan initialized using tuple-level actions. At least one tuple
2060 : * will have steps required to freeze described by caller's plan during REDO.
2061 : */
2062 : static inline void
2063 28850 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
2064 : {
2065 28850 : plan->xmax = frz->xmax;
2066 28850 : plan->t_infomask2 = frz->t_infomask2;
2067 28850 : plan->t_infomask = frz->t_infomask;
2068 28850 : plan->frzflags = frz->frzflags;
2069 28850 : plan->ntuples = 1; /* for now */
2070 28850 : }
2071 :
2072 : /*
2073 : * Deduplicate tuple-based freeze plans so that each distinct set of
2074 : * processing steps is only stored once in the WAL record.
2075 : * Called during original execution of freezing (for logged relations).
2076 : *
2077 : * Return value is number of plans set in *plans_out for caller. Also writes
2078 : * an array of offset numbers into *offsets_out output argument for caller
2079 : * (actually there is one array per freeze plan, but that's not of immediate
2080 : * concern to our caller).
2081 : */
2082 : static int
2083 23907 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
2084 : xlhp_freeze_plan *plans_out,
2085 : OffsetNumber *offsets_out)
2086 : {
2087 23907 : int nplans = 0;
2088 :
2089 : /* Sort tuple-based freeze plans in the order required to deduplicate */
2090 23907 : qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
2091 :
2092 1090202 : for (int i = 0; i < ntuples; i++)
2093 : {
2094 1066295 : HeapTupleFreeze *frz = tuples + i;
2095 :
2096 1066295 : if (i == 0)
2097 : {
2098 : /* New canonical freeze plan starting with first tup */
2099 23907 : heap_log_freeze_new_plan(plans_out, frz);
2100 23907 : nplans++;
2101 : }
2102 1042388 : else if (heap_log_freeze_eq(plans_out, frz))
2103 : {
2104 : /* tup matches open canonical plan -- include tup in it */
2105 : Assert(offsets_out[i - 1] < frz->offset);
2106 1037445 : plans_out->ntuples++;
2107 : }
2108 : else
2109 : {
2110 : /* Tup doesn't match current plan -- done with it now */
2111 4943 : plans_out++;
2112 :
2113 : /* New canonical freeze plan starting with this tup */
2114 4943 : heap_log_freeze_new_plan(plans_out, frz);
2115 4943 : nplans++;
2116 : }
2117 :
2118 : /*
2119 : * Save page offset number in dedicated buffer in passing.
2120 : *
2121 : * REDO routine relies on the record's offset numbers array grouping
2122 : * offset numbers by freeze plan. The sort order within each grouping
2123 : * is ascending offset number order, just to keep things tidy.
2124 : */
2125 1066295 : offsets_out[i] = frz->offset;
2126 : }
2127 :
2128 : Assert(nplans > 0 && nplans <= ntuples);
2129 :
2130 23907 : return nplans;
2131 : }
2132 :
2133 : /*
2134 : * Write an XLOG_HEAP2_PRUNE* WAL record
2135 : *
2136 : * This is used for several different page maintenance operations:
2137 : *
2138 : * - Page pruning, in VACUUM's 1st pass or on access: Some items are
2139 : * redirected, some marked dead, and some removed altogether.
2140 : *
2141 : * - Freezing: Items are marked as 'frozen'.
2142 : *
2143 : * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
2144 : *
2145 : * They have enough commonalities that we use a single WAL record for them
2146 : * all.
2147 : *
2148 : * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
2149 : * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
2150 : * replaying 'unused' items depends on whether they were all previously marked
2151 : * as dead.
2152 : *
2153 : * If the VM is being updated, vmflags will contain the bits to set. In this
2154 : * case, vmbuffer should already have been updated and marked dirty and should
2155 : * still be pinned and locked.
2156 : *
2157 : * Note: This function scribbles on the 'frozen' array.
2158 : *
2159 : * Note: This is called in a critical section, so careful what you do here.
2160 : */
2161 : void
2162 92887 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
2163 : Buffer vmbuffer, uint8 vmflags,
2164 : TransactionId conflict_xid,
2165 : bool cleanup_lock,
2166 : PruneReason reason,
2167 : HeapTupleFreeze *frozen, int nfrozen,
2168 : OffsetNumber *redirected, int nredirected,
2169 : OffsetNumber *dead, int ndead,
2170 : OffsetNumber *unused, int nunused)
2171 : {
2172 : xl_heap_prune xlrec;
2173 : XLogRecPtr recptr;
2174 : uint8 info;
2175 : uint8 regbuf_flags_heap;
2176 :
2177 : /* The following local variables hold data registered in the WAL record: */
2178 : xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
2179 : xlhp_freeze_plans freeze_plans;
2180 : xlhp_prune_items redirect_items;
2181 : xlhp_prune_items dead_items;
2182 : xlhp_prune_items unused_items;
2183 : OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
2184 92887 : bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2185 92887 : bool do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
2186 :
2187 : Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
2188 :
2189 92887 : xlrec.flags = 0;
2190 92887 : regbuf_flags_heap = REGBUF_STANDARD;
2191 :
2192 : /*
2193 : * We can avoid an FPI of the heap page if the only modification we are
2194 : * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2195 : * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2196 : * heap page with this record's LSN. Recovery skips records <= the stamped
2197 : * LSN, so this could lead to skipping an earlier FPI needed to repair a
2198 : * torn page.
2199 : */
2200 92887 : if (!do_prune &&
2201 0 : nfrozen == 0 &&
2202 0 : (!do_set_vm || !XLogHintBitIsNeeded()))
2203 0 : regbuf_flags_heap |= REGBUF_NO_IMAGE;
2204 :
2205 : /*
2206 : * Prepare data for the buffer. The arrays are not actually in the
2207 : * buffer, but we pretend that they are. When XLogInsert stores a full
2208 : * page image, the arrays can be omitted.
2209 : */
2210 92887 : XLogBeginInsert();
2211 92887 : XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
2212 :
2213 92887 : if (do_set_vm)
2214 14131 : XLogRegisterBuffer(1, vmbuffer, 0);
2215 :
2216 92887 : if (nfrozen > 0)
2217 : {
2218 : int nplans;
2219 :
2220 23907 : xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2221 :
2222 : /*
2223 : * Prepare deduplicated representation for use in the WAL record. This
2224 : * destructively sorts frozen tuples array in-place.
2225 : */
2226 23907 : nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2227 :
2228 23907 : freeze_plans.nplans = nplans;
2229 23907 : XLogRegisterBufData(0, &freeze_plans,
2230 : offsetof(xlhp_freeze_plans, plans));
2231 23907 : XLogRegisterBufData(0, plans,
2232 : sizeof(xlhp_freeze_plan) * nplans);
2233 : }
2234 92887 : if (nredirected > 0)
2235 : {
2236 16961 : xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2237 :
2238 16961 : redirect_items.ntargets = nredirected;
2239 16961 : XLogRegisterBufData(0, &redirect_items,
2240 : offsetof(xlhp_prune_items, data));
2241 16961 : XLogRegisterBufData(0, redirected,
2242 : sizeof(OffsetNumber[2]) * nredirected);
2243 : }
2244 92887 : if (ndead > 0)
2245 : {
2246 44297 : xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2247 :
2248 44297 : dead_items.ntargets = ndead;
2249 44297 : XLogRegisterBufData(0, &dead_items,
2250 : offsetof(xlhp_prune_items, data));
2251 44297 : XLogRegisterBufData(0, dead,
2252 : sizeof(OffsetNumber) * ndead);
2253 : }
2254 92887 : if (nunused > 0)
2255 : {
2256 27850 : xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
2257 :
2258 27850 : unused_items.ntargets = nunused;
2259 27850 : XLogRegisterBufData(0, &unused_items,
2260 : offsetof(xlhp_prune_items, data));
2261 27850 : XLogRegisterBufData(0, unused,
2262 : sizeof(OffsetNumber) * nunused);
2263 : }
2264 92887 : if (nfrozen > 0)
2265 23907 : XLogRegisterBufData(0, frz_offsets,
2266 : sizeof(OffsetNumber) * nfrozen);
2267 :
2268 : /*
2269 : * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2270 : * flag above.
2271 : */
2272 92887 : if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
2273 : {
2274 14131 : xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2275 14131 : if (vmflags & VISIBILITYMAP_ALL_FROZEN)
2276 10673 : xlrec.flags |= XLHP_VM_ALL_FROZEN;
2277 : }
2278 92887 : if (RelationIsAccessibleInLogicalDecoding(relation))
2279 576 : xlrec.flags |= XLHP_IS_CATALOG_REL;
2280 92887 : if (TransactionIdIsValid(conflict_xid))
2281 76926 : xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
2282 92887 : if (cleanup_lock)
2283 78692 : xlrec.flags |= XLHP_CLEANUP_LOCK;
2284 : else
2285 : {
2286 : Assert(nredirected == 0 && ndead == 0);
2287 : /* also, any items in 'unused' must've been LP_DEAD previously */
2288 : }
2289 92887 : XLogRegisterData(&xlrec, SizeOfHeapPrune);
2290 92887 : if (TransactionIdIsValid(conflict_xid))
2291 76926 : XLogRegisterData(&conflict_xid, sizeof(TransactionId));
2292 :
2293 92887 : switch (reason)
2294 : {
2295 44299 : case PRUNE_ON_ACCESS:
2296 44299 : info = XLOG_HEAP2_PRUNE_ON_ACCESS;
2297 44299 : break;
2298 34393 : case PRUNE_VACUUM_SCAN:
2299 34393 : info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
2300 34393 : break;
2301 14195 : case PRUNE_VACUUM_CLEANUP:
2302 14195 : info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
2303 14195 : break;
2304 0 : default:
2305 0 : elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2306 : break;
2307 : }
2308 92887 : recptr = XLogInsert(RM_HEAP2_ID, info);
2309 :
2310 92887 : if (do_set_vm)
2311 : {
2312 : Assert(BufferIsDirty(vmbuffer));
2313 14131 : PageSetLSN(BufferGetPage(vmbuffer), recptr);
2314 : }
2315 :
2316 : /*
2317 : * See comment at the top of the function about regbuf_flags_heap for
2318 : * details on when we can advance the page LSN.
2319 : */
2320 92887 : if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2321 : {
2322 : Assert(BufferIsDirty(buffer));
2323 92887 : PageSetLSN(BufferGetPage(buffer), recptr);
2324 : }
2325 92887 : }
|