Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include "access/genam.h"
133 : #include "access/heapam.h"
134 : #include "access/htup_details.h"
135 : #include "access/multixact.h"
136 : #include "access/tidstore.h"
137 : #include "access/transam.h"
138 : #include "access/visibilitymap.h"
139 : #include "access/xloginsert.h"
140 : #include "catalog/storage.h"
141 : #include "commands/progress.h"
142 : #include "commands/vacuum.h"
143 : #include "common/int.h"
144 : #include "common/pg_prng.h"
145 : #include "executor/instrument.h"
146 : #include "miscadmin.h"
147 : #include "pgstat.h"
148 : #include "portability/instr_time.h"
149 : #include "postmaster/autovacuum.h"
150 : #include "storage/bufmgr.h"
151 : #include "storage/freespace.h"
152 : #include "storage/lmgr.h"
153 : #include "storage/read_stream.h"
154 : #include "utils/lsyscache.h"
155 : #include "utils/pg_rusage.h"
156 : #include "utils/timestamp.h"
157 :
158 :
159 : /*
160 : * Space/time tradeoff parameters: do these need to be user-tunable?
161 : *
162 : * To consider truncating the relation, we want there to be at least
163 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
164 : * is less) potentially-freeable pages.
165 : */
166 : #define REL_TRUNCATE_MINIMUM 1000
167 : #define REL_TRUNCATE_FRACTION 16
168 :
169 : /*
170 : * Timing parameters for truncate locking heuristics.
171 : *
172 : * These were not exposed as user tunable GUC values because it didn't seem
173 : * that the potential for improvement was great enough to merit the cost of
174 : * supporting them.
175 : */
176 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
177 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
178 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
179 :
180 : /*
181 : * Threshold that controls whether we bypass index vacuuming and heap
182 : * vacuuming as an optimization
183 : */
184 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
185 :
186 : /*
187 : * Perform a failsafe check each time we scan another 4GB of pages.
188 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
189 : */
190 : #define FAILSAFE_EVERY_PAGES \
191 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
192 :
193 : /*
194 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
195 : * (it won't be exact because we only vacuum FSM after processing a heap page
196 : * that has some removable tuples). When there are indexes, this is ignored,
197 : * and we vacuum FSM after each index/heap cleaning pass.
198 : */
199 : #define VACUUM_FSM_EVERY_PAGES \
200 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
201 :
202 : /*
203 : * Before we consider skipping a page that's marked as clean in
204 : * visibility map, we must've seen at least this many clean pages.
205 : */
206 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
207 :
208 : /*
209 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
210 : * Needs to be a power of 2.
211 : */
212 : #define PREFETCH_SIZE ((BlockNumber) 32)
213 :
214 : /*
215 : * Macro to check if we are in a parallel vacuum. If true, we are in the
216 : * parallel mode and the DSM segment is initialized.
217 : */
218 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
219 :
220 : /* Phases of vacuum during which we report error context. */
221 : typedef enum
222 : {
223 : VACUUM_ERRCB_PHASE_UNKNOWN,
224 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
225 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
226 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
227 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
228 : VACUUM_ERRCB_PHASE_TRUNCATE,
229 : } VacErrPhase;
230 :
231 : /*
232 : * An eager scan of a page that is set all-frozen in the VM is considered
233 : * "successful". To spread out freezing overhead across multiple normal
234 : * vacuums, we limit the number of successful eager page freezes. The maximum
235 : * number of eager page freezes is calculated as a ratio of the all-visible
236 : * but not all-frozen pages at the beginning of the vacuum.
237 : */
238 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
239 :
240 : /*
241 : * On the assumption that different regions of the table tend to have
242 : * similarly aged data, once vacuum fails to freeze
243 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
244 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
245 : * to another region of the table with potentially older data.
246 : */
247 : #define EAGER_SCAN_REGION_SIZE 4096
248 :
249 : typedef struct LVRelState
250 : {
251 : /* Target heap relation and its indexes */
252 : Relation rel;
253 : Relation *indrels;
254 : int nindexes;
255 :
256 : /* Buffer access strategy and parallel vacuum state */
257 : BufferAccessStrategy bstrategy;
258 : ParallelVacuumState *pvs;
259 :
260 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
261 : bool aggressive;
262 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
263 : bool skipwithvm;
264 : /* Consider index vacuuming bypass optimization? */
265 : bool consider_bypass_optimization;
266 :
267 : /* Doing index vacuuming, index cleanup, rel truncation? */
268 : bool do_index_vacuuming;
269 : bool do_index_cleanup;
270 : bool do_rel_truncate;
271 :
272 : /* VACUUM operation's cutoffs for freezing and pruning */
273 : struct VacuumCutoffs cutoffs;
274 : GlobalVisState *vistest;
275 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
276 : TransactionId NewRelfrozenXid;
277 : MultiXactId NewRelminMxid;
278 : bool skippedallvis;
279 :
280 : /* Error reporting state */
281 : char *dbname;
282 : char *relnamespace;
283 : char *relname;
284 : char *indname; /* Current index name */
285 : BlockNumber blkno; /* used only for heap operations */
286 : OffsetNumber offnum; /* used only for heap operations */
287 : VacErrPhase phase;
288 : bool verbose; /* VACUUM VERBOSE? */
289 :
290 : /*
291 : * dead_items stores TIDs whose index tuples are deleted by index
292 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
293 : * that has been processed by lazy_scan_prune. Also needed by
294 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
295 : * LP_UNUSED during second heap pass.
296 : *
297 : * Both dead_items and dead_items_info are allocated in shared memory in
298 : * parallel vacuum cases.
299 : */
300 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
301 : VacDeadItemsInfo *dead_items_info;
302 :
303 : BlockNumber rel_pages; /* total number of pages */
304 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
305 :
306 : /*
307 : * Count of all-visible blocks eagerly scanned (for logging only). This
308 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
309 : */
310 : BlockNumber eager_scanned_pages;
311 :
312 : BlockNumber removed_pages; /* # pages removed by relation truncation */
313 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
314 :
315 : /* # pages newly set all-visible in the VM */
316 : BlockNumber vm_new_visible_pages;
317 :
318 : /*
319 : * # pages newly set all-visible and all-frozen in the VM. This is a
320 : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
321 : * all pages set all-visible, but vm_new_visible_frozen_pages includes
322 : * only those which were also set all-frozen.
323 : */
324 : BlockNumber vm_new_visible_frozen_pages;
325 :
326 : /* # all-visible pages newly set all-frozen in the VM */
327 : BlockNumber vm_new_frozen_pages;
328 :
329 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
330 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
331 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
332 :
333 : /* Statistics output by us, for table */
334 : double new_rel_tuples; /* new estimated total # of tuples */
335 : double new_live_tuples; /* new estimated total # of live tuples */
336 : /* Statistics output by index AMs */
337 : IndexBulkDeleteResult **indstats;
338 :
339 : /* Instrumentation counters */
340 : int num_index_scans;
341 : int num_dead_items_resets;
342 : Size total_dead_items_bytes;
343 : /* Counters that follow are only for scanned_pages */
344 : int64 tuples_deleted; /* # deleted from table */
345 : int64 tuples_frozen; /* # newly frozen */
346 : int64 lpdead_items; /* # deleted from indexes */
347 : int64 live_tuples; /* # live tuples remaining */
348 : int64 recently_dead_tuples; /* # dead, but not yet removable */
349 : int64 missed_dead_tuples; /* # removable, but not removed */
350 :
351 : /* State maintained by heap_vac_scan_next_block() */
352 : BlockNumber current_block; /* last block returned */
353 : BlockNumber next_unskippable_block; /* next unskippable block */
354 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
355 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
356 :
357 : /* State related to managing eager scanning of all-visible pages */
358 :
359 : /*
360 : * A normal vacuum that has failed to freeze too many eagerly scanned
361 : * blocks in a region suspends eager scanning.
362 : * next_eager_scan_region_start is the block number of the first block
363 : * eligible for resumed eager scanning.
364 : *
365 : * When eager scanning is permanently disabled, either initially
366 : * (including for aggressive vacuum) or due to hitting the success cap,
367 : * this is set to InvalidBlockNumber.
368 : */
369 : BlockNumber next_eager_scan_region_start;
370 :
371 : /*
372 : * The remaining number of blocks a normal vacuum will consider eager
373 : * scanning when it is successful. When eager scanning is enabled, this is
374 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
375 : * all-visible but not all-frozen pages. For each eager freeze success,
376 : * this is decremented. Once it hits 0, eager scanning is permanently
377 : * disabled. It is initialized to 0 if eager scanning starts out disabled
378 : * (including for aggressive vacuum).
379 : */
380 : BlockNumber eager_scan_remaining_successes;
381 :
382 : /*
383 : * The maximum number of blocks which may be eagerly scanned and not
384 : * frozen before eager scanning is temporarily suspended. This is
385 : * configurable both globally, via the
386 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
387 : * storage parameter of the same name. It is calculated as
388 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
389 : * It is 0 when eager scanning is disabled.
390 : */
391 : BlockNumber eager_scan_max_fails_per_region;
392 :
393 : /*
394 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
395 : * age) in the current eager scan region. Vacuum resets it to
396 : * eager_scan_max_fails_per_region each time it enters a new region of the
397 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
398 : * suspended until the next region. It is also 0 if eager scanning has
399 : * been permanently disabled.
400 : */
401 : BlockNumber eager_scan_remaining_fails;
402 : } LVRelState;
403 :
404 :
405 : /* Struct for saving and restoring vacuum error information. */
406 : typedef struct LVSavedErrInfo
407 : {
408 : BlockNumber blkno;
409 : OffsetNumber offnum;
410 : VacErrPhase phase;
411 : } LVSavedErrInfo;
412 :
413 :
414 : /* non-export function prototypes */
415 : static void lazy_scan_heap(LVRelState *vacrel);
416 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
417 : const VacuumParams params);
418 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
419 : void *callback_private_data,
420 : void *per_buffer_data);
421 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
422 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
423 : BlockNumber blkno, Page page,
424 : bool sharelock, Buffer vmbuffer);
425 : static void identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
426 : BlockNumber heap_blk, Page heap_page,
427 : int nlpdead_items,
428 : Buffer vmbuffer,
429 : uint8 *vmbits);
430 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
431 : BlockNumber blkno, Page page,
432 : Buffer vmbuffer,
433 : bool *has_lpdead_items, bool *vm_page_frozen);
434 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
435 : BlockNumber blkno, Page page,
436 : bool *has_lpdead_items);
437 : static void lazy_vacuum(LVRelState *vacrel);
438 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
439 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
440 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
441 : Buffer buffer, OffsetNumber *deadoffsets,
442 : int num_offsets, Buffer vmbuffer);
443 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
444 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
445 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
446 : IndexBulkDeleteResult *istat,
447 : double reltuples,
448 : LVRelState *vacrel);
449 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
450 : IndexBulkDeleteResult *istat,
451 : double reltuples,
452 : bool estimated_count,
453 : LVRelState *vacrel);
454 : static bool should_attempt_truncation(LVRelState *vacrel);
455 : static void lazy_truncate_heap(LVRelState *vacrel);
456 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
457 : bool *lock_waiter_detected);
458 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
459 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
460 : int num_offsets);
461 : static void dead_items_reset(LVRelState *vacrel);
462 : static void dead_items_cleanup(LVRelState *vacrel);
463 :
464 : #ifdef USE_ASSERT_CHECKING
465 : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
466 : TransactionId OldestXmin,
467 : bool *all_frozen,
468 : TransactionId *visibility_cutoff_xid,
469 : OffsetNumber *logging_offnum);
470 : #endif
471 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
472 : TransactionId OldestXmin,
473 : OffsetNumber *deadoffsets,
474 : int ndeadoffsets,
475 : bool *all_frozen,
476 : TransactionId *visibility_cutoff_xid,
477 : OffsetNumber *logging_offnum);
478 : static void update_relstats_all_indexes(LVRelState *vacrel);
479 : static void vacuum_error_callback(void *arg);
480 : static void update_vacuum_error_info(LVRelState *vacrel,
481 : LVSavedErrInfo *saved_vacrel,
482 : int phase, BlockNumber blkno,
483 : OffsetNumber offnum);
484 : static void restore_vacuum_error_info(LVRelState *vacrel,
485 : const LVSavedErrInfo *saved_vacrel);
486 :
487 :
488 :
489 : /*
490 : * Helper to set up the eager scanning state for vacuuming a single relation.
491 : * Initializes the eager scan management related members of the LVRelState.
492 : *
493 : * Caller provides whether or not an aggressive vacuum is required due to
494 : * vacuum options or for relfrozenxid/relminmxid advancement.
495 : */
496 : static void
497 177078 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
498 : {
499 : uint32 randseed;
500 : BlockNumber allvisible;
501 : BlockNumber allfrozen;
502 : float first_region_ratio;
503 177078 : bool oldest_unfrozen_before_cutoff = false;
504 :
505 : /*
506 : * Initialize eager scan management fields to their disabled values.
507 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
508 : * of tables without sufficiently old tuples disable eager scanning.
509 : */
510 177078 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
511 177078 : vacrel->eager_scan_max_fails_per_region = 0;
512 177078 : vacrel->eager_scan_remaining_fails = 0;
513 177078 : vacrel->eager_scan_remaining_successes = 0;
514 :
515 : /* If eager scanning is explicitly disabled, just return. */
516 177078 : if (params.max_eager_freeze_failure_rate == 0)
517 177078 : return;
518 :
519 : /*
520 : * The caller will have determined whether or not an aggressive vacuum is
521 : * required by either the vacuum parameters or the relative age of the
522 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
523 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
524 : * so scans of all-visible pages are not considered eager.
525 : */
526 177078 : if (vacrel->aggressive)
527 165798 : return;
528 :
529 : /*
530 : * Aggressively vacuuming a small relation shouldn't take long, so it
531 : * isn't worth amortizing. We use two times the region size as the size
532 : * cutoff because the eager scan start block is a random spot somewhere in
533 : * the first region, making the second region the first to be eager
534 : * scanned normally.
535 : */
536 11280 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
537 11280 : return;
538 :
539 : /*
540 : * We only want to enable eager scanning if we are likely to be able to
541 : * freeze some of the pages in the relation.
542 : *
543 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
544 : * are technically freezable, but we won't freeze them unless the criteria
545 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
546 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
547 : *
548 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
549 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
550 : * enable eager scanning.
551 : */
552 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
553 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
554 : vacrel->cutoffs.FreezeLimit))
555 0 : oldest_unfrozen_before_cutoff = true;
556 :
557 0 : if (!oldest_unfrozen_before_cutoff &&
558 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
559 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
560 : vacrel->cutoffs.MultiXactCutoff))
561 0 : oldest_unfrozen_before_cutoff = true;
562 :
563 0 : if (!oldest_unfrozen_before_cutoff)
564 0 : return;
565 :
566 : /* We have met the criteria to eagerly scan some pages. */
567 :
568 : /*
569 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
570 : * all-visible but not all-frozen blocks in the relation.
571 : */
572 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
573 :
574 0 : vacrel->eager_scan_remaining_successes =
575 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
576 0 : (allvisible - allfrozen));
577 :
578 : /* If every all-visible page is frozen, eager scanning is disabled. */
579 0 : if (vacrel->eager_scan_remaining_successes == 0)
580 0 : return;
581 :
582 : /*
583 : * Now calculate the bounds of the first eager scan region. Its end block
584 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
585 : * blocks. This affects the bounds of all subsequent regions and avoids
586 : * eager scanning and failing to freeze the same blocks each vacuum of the
587 : * relation.
588 : */
589 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
590 :
591 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
592 :
593 : Assert(params.max_eager_freeze_failure_rate > 0 &&
594 : params.max_eager_freeze_failure_rate <= 1);
595 :
596 0 : vacrel->eager_scan_max_fails_per_region =
597 0 : params.max_eager_freeze_failure_rate *
598 : EAGER_SCAN_REGION_SIZE;
599 :
600 : /*
601 : * The first region will be smaller than subsequent regions. As such,
602 : * adjust the eager freeze failures tolerated for this region.
603 : */
604 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
605 : EAGER_SCAN_REGION_SIZE;
606 :
607 0 : vacrel->eager_scan_remaining_fails =
608 0 : vacrel->eager_scan_max_fails_per_region *
609 : first_region_ratio;
610 : }
611 :
612 : /*
613 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
614 : *
615 : * This routine sets things up for and then calls lazy_scan_heap, where
616 : * almost all work actually takes place. Finalizes everything after call
617 : * returns by managing relation truncation and updating rel's pg_class
618 : * entry. (Also updates pg_class entries for any indexes that need it.)
619 : *
620 : * At entry, we have already established a transaction and opened
621 : * and locked the relation.
622 : */
623 : void
624 177078 : heap_vacuum_rel(Relation rel, const VacuumParams params,
625 : BufferAccessStrategy bstrategy)
626 : {
627 : LVRelState *vacrel;
628 : bool verbose,
629 : instrument,
630 : skipwithvm,
631 : frozenxid_updated,
632 : minmulti_updated;
633 : BlockNumber orig_rel_pages,
634 : new_rel_pages,
635 : new_rel_allvisible,
636 : new_rel_allfrozen;
637 : PGRUsage ru0;
638 177078 : TimestampTz starttime = 0;
639 177078 : PgStat_Counter startreadtime = 0,
640 177078 : startwritetime = 0;
641 177078 : WalUsage startwalusage = pgWalUsage;
642 177078 : BufferUsage startbufferusage = pgBufferUsage;
643 : ErrorContextCallback errcallback;
644 177078 : char **indnames = NULL;
645 177078 : Size dead_items_max_bytes = 0;
646 :
647 177078 : verbose = (params.options & VACOPT_VERBOSE) != 0;
648 326642 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 149564 : params.log_vacuum_min_duration >= 0));
650 177078 : if (instrument)
651 : {
652 149588 : pg_rusage_init(&ru0);
653 149588 : if (track_io_timing)
654 : {
655 0 : startreadtime = pgStatBlockReadTime;
656 0 : startwritetime = pgStatBlockWriteTime;
657 : }
658 : }
659 :
660 : /* Used for instrumentation and stats report */
661 177078 : starttime = GetCurrentTimestamp();
662 :
663 177078 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
664 : RelationGetRelid(rel));
665 177078 : if (AmAutoVacuumWorkerProcess())
666 149564 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
667 149564 : params.is_wraparound
668 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
669 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
670 : else
671 27514 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
672 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
673 :
674 : /*
675 : * Setup error traceback support for ereport() first. The idea is to set
676 : * up an error context callback to display additional information on any
677 : * error during a vacuum. During different phases of vacuum, we update
678 : * the state so that the error context callback always display current
679 : * information.
680 : *
681 : * Copy the names of heap rel into local memory for error reporting
682 : * purposes, too. It isn't always safe to assume that we can get the name
683 : * of each rel. It's convenient for code in lazy_scan_heap to always use
684 : * these temp copies.
685 : */
686 177078 : vacrel = palloc0_object(LVRelState);
687 177078 : vacrel->dbname = get_database_name(MyDatabaseId);
688 177078 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 177078 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 177078 : vacrel->indname = NULL;
691 177078 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
692 177078 : vacrel->verbose = verbose;
693 177078 : errcallback.callback = vacuum_error_callback;
694 177078 : errcallback.arg = vacrel;
695 177078 : errcallback.previous = error_context_stack;
696 177078 : error_context_stack = &errcallback;
697 :
698 : /* Set up high level stuff about rel and its indexes */
699 177078 : vacrel->rel = rel;
700 177078 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
701 : &vacrel->indrels);
702 177078 : vacrel->bstrategy = bstrategy;
703 177078 : if (instrument && vacrel->nindexes > 0)
704 : {
705 : /* Copy index names used by instrumentation (not error reporting) */
706 143114 : indnames = palloc_array(char *, vacrel->nindexes);
707 368640 : for (int i = 0; i < vacrel->nindexes; i++)
708 225526 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
709 : }
710 :
711 : /*
712 : * The index_cleanup param either disables index vacuuming and cleanup or
713 : * forces it to go ahead when we would otherwise apply the index bypass
714 : * optimization. The default is 'auto', which leaves the final decision
715 : * up to lazy_vacuum().
716 : *
717 : * The truncate param allows user to avoid attempting relation truncation,
718 : * though it can't force truncation to happen.
719 : */
720 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
721 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
722 : params.truncate != VACOPTVALUE_AUTO);
723 :
724 : /*
725 : * While VacuumFailSafeActive is reset to false before calling this, we
726 : * still need to reset it here due to recursive calls.
727 : */
728 177078 : VacuumFailsafeActive = false;
729 177078 : vacrel->consider_bypass_optimization = true;
730 177078 : vacrel->do_index_vacuuming = true;
731 177078 : vacrel->do_index_cleanup = true;
732 177078 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
733 177078 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
734 : {
735 : /* Force disable index vacuuming up-front */
736 260 : vacrel->do_index_vacuuming = false;
737 260 : vacrel->do_index_cleanup = false;
738 : }
739 176818 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
740 : {
741 : /* Force index vacuuming. Note that failsafe can still bypass. */
742 30 : vacrel->consider_bypass_optimization = false;
743 : }
744 : else
745 : {
746 : /* Default/auto, make all decisions dynamically */
747 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
748 : }
749 :
750 : /* Initialize page counters explicitly (be tidy) */
751 177078 : vacrel->scanned_pages = 0;
752 177078 : vacrel->eager_scanned_pages = 0;
753 177078 : vacrel->removed_pages = 0;
754 177078 : vacrel->new_frozen_tuple_pages = 0;
755 177078 : vacrel->lpdead_item_pages = 0;
756 177078 : vacrel->missed_dead_pages = 0;
757 177078 : vacrel->nonempty_pages = 0;
758 : /* dead_items_alloc allocates vacrel->dead_items later on */
759 :
760 : /* Allocate/initialize output statistics state */
761 177078 : vacrel->new_rel_tuples = 0;
762 177078 : vacrel->new_live_tuples = 0;
763 177078 : vacrel->indstats = (IndexBulkDeleteResult **)
764 177078 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765 :
766 : /* Initialize remaining counters (be tidy) */
767 177078 : vacrel->num_index_scans = 0;
768 177078 : vacrel->num_dead_items_resets = 0;
769 177078 : vacrel->total_dead_items_bytes = 0;
770 177078 : vacrel->tuples_deleted = 0;
771 177078 : vacrel->tuples_frozen = 0;
772 177078 : vacrel->lpdead_items = 0;
773 177078 : vacrel->live_tuples = 0;
774 177078 : vacrel->recently_dead_tuples = 0;
775 177078 : vacrel->missed_dead_tuples = 0;
776 :
777 177078 : vacrel->vm_new_visible_pages = 0;
778 177078 : vacrel->vm_new_visible_frozen_pages = 0;
779 177078 : vacrel->vm_new_frozen_pages = 0;
780 :
781 : /*
782 : * Get cutoffs that determine which deleted tuples are considered DEAD,
783 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
784 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
785 : * happen in this order to ensure that the OldestXmin cutoff field works
786 : * as an upper bound on the XIDs stored in the pages we'll actually scan
787 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
788 : *
789 : * Next acquire vistest, a related cutoff that's used in pruning. We use
790 : * vistest in combination with OldestXmin to ensure that
791 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
792 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
793 : * whether a tuple should be frozen or removed. (In the future we might
794 : * want to teach lazy_scan_prune to recompute vistest from time to time,
795 : * to increase the number of dead tuples it can prune away.)
796 : */
797 177078 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
798 177078 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
799 177078 : vacrel->vistest = GlobalVisTestFor(rel);
800 :
801 : /* Initialize state used to track oldest extant XID/MXID */
802 177078 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
803 177078 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
804 :
805 : /*
806 : * Initialize state related to tracking all-visible page skipping. This is
807 : * very important to determine whether or not it is safe to advance the
808 : * relfrozenxid/relminmxid.
809 : */
810 177078 : vacrel->skippedallvis = false;
811 177078 : skipwithvm = true;
812 177078 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
813 : {
814 : /*
815 : * Force aggressive mode, and disable skipping blocks using the
816 : * visibility map (even those set all-frozen)
817 : */
818 344 : vacrel->aggressive = true;
819 344 : skipwithvm = false;
820 : }
821 :
822 177078 : vacrel->skipwithvm = skipwithvm;
823 :
824 : /*
825 : * Set up eager scan tracking state. This must happen after determining
826 : * whether or not the vacuum must be aggressive, because only normal
827 : * vacuums use the eager scan algorithm.
828 : */
829 177078 : heap_vacuum_eager_scan_setup(vacrel, params);
830 :
831 : /* Report the vacuum mode: 'normal' or 'aggressive' */
832 177078 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
833 177078 : vacrel->aggressive
834 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
835 : : PROGRESS_VACUUM_MODE_NORMAL);
836 :
837 177078 : if (verbose)
838 : {
839 24 : if (vacrel->aggressive)
840 2 : ereport(INFO,
841 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
842 : vacrel->dbname, vacrel->relnamespace,
843 : vacrel->relname)));
844 : else
845 22 : ereport(INFO,
846 : (errmsg("vacuuming \"%s.%s.%s\"",
847 : vacrel->dbname, vacrel->relnamespace,
848 : vacrel->relname)));
849 : }
850 :
851 : /*
852 : * Allocate dead_items memory using dead_items_alloc. This handles
853 : * parallel VACUUM initialization as part of allocating shared memory
854 : * space used for dead_items. (But do a failsafe precheck first, to
855 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
856 : * is already dangerously old.)
857 : */
858 177078 : lazy_check_wraparound_failsafe(vacrel);
859 177078 : dead_items_alloc(vacrel, params.nworkers);
860 :
861 : /*
862 : * Call lazy_scan_heap to perform all required heap pruning, index
863 : * vacuuming, and heap vacuuming (plus related processing)
864 : */
865 177078 : lazy_scan_heap(vacrel);
866 :
867 : /*
868 : * Save dead items max_bytes and update the memory usage statistics before
869 : * cleanup, they are freed in parallel vacuum cases during
870 : * dead_items_cleanup().
871 : */
872 177078 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
873 177078 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
874 :
875 : /*
876 : * Free resources managed by dead_items_alloc. This ends parallel mode in
877 : * passing when necessary.
878 : */
879 177078 : dead_items_cleanup(vacrel);
880 : Assert(!IsInParallelMode());
881 :
882 : /*
883 : * Update pg_class entries for each of rel's indexes where appropriate.
884 : *
885 : * Unlike the later update to rel's pg_class entry, this is not critical.
886 : * Maintains relpages/reltuples statistics used by the planner only.
887 : */
888 177078 : if (vacrel->do_index_cleanup)
889 128188 : update_relstats_all_indexes(vacrel);
890 :
891 : /* Done with rel's indexes */
892 177078 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
893 :
894 : /* Optionally truncate rel */
895 177078 : if (should_attempt_truncation(vacrel))
896 306 : lazy_truncate_heap(vacrel);
897 :
898 : /* Pop the error context stack */
899 177078 : error_context_stack = errcallback.previous;
900 :
901 : /* Report that we are now doing final cleanup */
902 177078 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
903 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
904 :
905 : /*
906 : * Prepare to update rel's pg_class entry.
907 : *
908 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
909 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
910 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
911 : */
912 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
913 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
914 : vacrel->cutoffs.relfrozenxid,
915 : vacrel->NewRelfrozenXid));
916 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
917 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
918 : vacrel->cutoffs.relminmxid,
919 : vacrel->NewRelminMxid));
920 177078 : if (vacrel->skippedallvis)
921 : {
922 : /*
923 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
924 : * chose to skip an all-visible page range. The state that tracks new
925 : * values will have missed unfrozen XIDs from the pages we skipped.
926 : */
927 : Assert(!vacrel->aggressive);
928 66 : vacrel->NewRelfrozenXid = InvalidTransactionId;
929 66 : vacrel->NewRelminMxid = InvalidMultiXactId;
930 : }
931 :
932 : /*
933 : * For safety, clamp relallvisible to be not more than what we're setting
934 : * pg_class.relpages to
935 : */
936 177078 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
937 177078 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
938 177078 : if (new_rel_allvisible > new_rel_pages)
939 0 : new_rel_allvisible = new_rel_pages;
940 :
941 : /*
942 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
943 : * all-frozen blocks to the count of all-visible blocks. This matches the
944 : * clamping of relallvisible above.
945 : */
946 177078 : if (new_rel_allfrozen > new_rel_allvisible)
947 0 : new_rel_allfrozen = new_rel_allvisible;
948 :
949 : /*
950 : * Now actually update rel's pg_class entry.
951 : *
952 : * In principle new_live_tuples could be -1 indicating that we (still)
953 : * don't know the tuple count. In practice that can't happen, since we
954 : * scan every page that isn't skipped using the visibility map.
955 : */
956 177078 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
957 : new_rel_allvisible, new_rel_allfrozen,
958 177078 : vacrel->nindexes > 0,
959 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
960 : &frozenxid_updated, &minmulti_updated, false);
961 :
962 : /*
963 : * Report results to the cumulative stats system, too.
964 : *
965 : * Deliberately avoid telling the stats system about LP_DEAD items that
966 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
967 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
968 : * It seems like a good idea to err on the side of not vacuuming again too
969 : * soon in cases where the failsafe prevented significant amounts of heap
970 : * vacuuming.
971 : */
972 177078 : pgstat_report_vacuum(rel,
973 70896 : Max(vacrel->new_live_tuples, 0),
974 177078 : vacrel->recently_dead_tuples +
975 177078 : vacrel->missed_dead_tuples,
976 : starttime);
977 177078 : pgstat_progress_end_command();
978 :
979 177078 : if (instrument)
980 : {
981 149588 : TimestampTz endtime = GetCurrentTimestamp();
982 :
983 149806 : if (verbose || params.log_vacuum_min_duration == 0 ||
984 218 : TimestampDifferenceExceeds(starttime, endtime,
985 218 : params.log_vacuum_min_duration))
986 : {
987 : long secs_dur;
988 : int usecs_dur;
989 : WalUsage walusage;
990 : BufferUsage bufferusage;
991 : StringInfoData buf;
992 : char *msgfmt;
993 : int32 diff;
994 149370 : double read_rate = 0,
995 149370 : write_rate = 0;
996 : int64 total_blks_hit;
997 : int64 total_blks_read;
998 : int64 total_blks_dirtied;
999 :
1000 149370 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1001 149370 : memset(&walusage, 0, sizeof(WalUsage));
1002 149370 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1003 149370 : memset(&bufferusage, 0, sizeof(BufferUsage));
1004 149370 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1005 :
1006 149370 : total_blks_hit = bufferusage.shared_blks_hit +
1007 149370 : bufferusage.local_blks_hit;
1008 149370 : total_blks_read = bufferusage.shared_blks_read +
1009 149370 : bufferusage.local_blks_read;
1010 149370 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1011 149370 : bufferusage.local_blks_dirtied;
1012 :
1013 149370 : initStringInfo(&buf);
1014 149370 : if (verbose)
1015 : {
1016 : /*
1017 : * Aggressiveness already reported earlier, in dedicated
1018 : * VACUUM VERBOSE ereport
1019 : */
1020 : Assert(!params.is_wraparound);
1021 24 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1022 : }
1023 149346 : else if (params.is_wraparound)
1024 : {
1025 : /*
1026 : * While it's possible for a VACUUM to be both is_wraparound
1027 : * and !aggressive, that's just a corner-case -- is_wraparound
1028 : * implies aggressive. Produce distinct output for the corner
1029 : * case all the same, just in case.
1030 : */
1031 149212 : if (vacrel->aggressive)
1032 149186 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1033 : else
1034 26 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 : }
1036 : else
1037 : {
1038 134 : if (vacrel->aggressive)
1039 34 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1040 : else
1041 100 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 : }
1043 149370 : appendStringInfo(&buf, msgfmt,
1044 : vacrel->dbname,
1045 : vacrel->relnamespace,
1046 : vacrel->relname,
1047 : vacrel->num_index_scans);
1048 208766 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1049 : vacrel->removed_pages,
1050 : new_rel_pages,
1051 : vacrel->scanned_pages,
1052 : orig_rel_pages == 0 ? 100.0 :
1053 59396 : 100.0 * vacrel->scanned_pages /
1054 : orig_rel_pages,
1055 : vacrel->eager_scanned_pages);
1056 149370 : appendStringInfo(&buf,
1057 149370 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1058 : vacrel->tuples_deleted,
1059 149370 : (int64) vacrel->new_rel_tuples,
1060 : vacrel->recently_dead_tuples);
1061 149370 : if (vacrel->missed_dead_tuples > 0)
1062 0 : appendStringInfo(&buf,
1063 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1064 : vacrel->missed_dead_tuples,
1065 : vacrel->missed_dead_pages);
1066 149370 : diff = (int32) (ReadNextTransactionId() -
1067 149370 : vacrel->cutoffs.OldestXmin);
1068 149370 : appendStringInfo(&buf,
1069 149370 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1070 : vacrel->cutoffs.OldestXmin, diff);
1071 149370 : if (frozenxid_updated)
1072 : {
1073 36050 : diff = (int32) (vacrel->NewRelfrozenXid -
1074 36050 : vacrel->cutoffs.relfrozenxid);
1075 36050 : appendStringInfo(&buf,
1076 36050 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1077 : vacrel->NewRelfrozenXid, diff);
1078 : }
1079 149370 : if (minmulti_updated)
1080 : {
1081 82 : diff = (int32) (vacrel->NewRelminMxid -
1082 82 : vacrel->cutoffs.relminmxid);
1083 82 : appendStringInfo(&buf,
1084 82 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1085 : vacrel->NewRelminMxid, diff);
1086 : }
1087 208766 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1088 : vacrel->new_frozen_tuple_pages,
1089 : orig_rel_pages == 0 ? 100.0 :
1090 59396 : 100.0 * vacrel->new_frozen_tuple_pages /
1091 : orig_rel_pages,
1092 : vacrel->tuples_frozen);
1093 :
1094 149370 : appendStringInfo(&buf,
1095 149370 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1096 : vacrel->vm_new_visible_pages,
1097 149370 : vacrel->vm_new_visible_frozen_pages +
1098 149370 : vacrel->vm_new_frozen_pages,
1099 : vacrel->vm_new_frozen_pages);
1100 149370 : if (vacrel->do_index_vacuuming)
1101 : {
1102 100956 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1103 100864 : appendStringInfoString(&buf, _("index scan not needed: "));
1104 : else
1105 92 : appendStringInfoString(&buf, _("index scan needed: "));
1106 :
1107 100956 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1108 : }
1109 : else
1110 : {
1111 48414 : if (!VacuumFailsafeActive)
1112 2 : appendStringInfoString(&buf, _("index scan bypassed: "));
1113 : else
1114 48412 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1115 :
1116 48414 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1117 : }
1118 208766 : appendStringInfo(&buf, msgfmt,
1119 : vacrel->lpdead_item_pages,
1120 : orig_rel_pages == 0 ? 100.0 :
1121 59396 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1122 : vacrel->lpdead_items);
1123 374476 : for (int i = 0; i < vacrel->nindexes; i++)
1124 : {
1125 225106 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1126 :
1127 225106 : if (!istat)
1128 224914 : continue;
1129 :
1130 192 : appendStringInfo(&buf,
1131 192 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1132 192 : indnames[i],
1133 : istat->num_pages,
1134 : istat->pages_newly_deleted,
1135 : istat->pages_deleted,
1136 : istat->pages_free);
1137 : }
1138 149370 : if (track_cost_delay_timing)
1139 : {
1140 : /*
1141 : * We bypass the changecount mechanism because this value is
1142 : * only updated by the calling process. We also rely on the
1143 : * above call to pgstat_progress_end_command() to not clear
1144 : * the st_progress_param array.
1145 : */
1146 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1147 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1148 : }
1149 149370 : if (track_io_timing)
1150 : {
1151 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1152 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1153 :
1154 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1155 : read_ms, write_ms);
1156 : }
1157 149370 : if (secs_dur > 0 || usecs_dur > 0)
1158 : {
1159 149370 : read_rate = (double) BLCKSZ * total_blks_read /
1160 149370 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1161 149370 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1162 149370 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 : }
1164 149370 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1165 : read_rate, write_rate);
1166 149370 : appendStringInfo(&buf,
1167 149370 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1168 : total_blks_hit,
1169 : total_blks_read,
1170 : total_blks_dirtied);
1171 149370 : appendStringInfo(&buf,
1172 149370 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1173 : walusage.wal_records,
1174 : walusage.wal_fpi,
1175 : walusage.wal_bytes,
1176 : walusage.wal_fpi_bytes,
1177 : walusage.wal_buffers_full);
1178 :
1179 : /*
1180 : * Report the dead items memory usage.
1181 : *
1182 : * The num_dead_items_resets counter increases when we reset the
1183 : * collected dead items, so the counter is non-zero if at least
1184 : * one dead items are collected, even if index vacuuming is
1185 : * disabled.
1186 : */
1187 149370 : appendStringInfo(&buf,
1188 149370 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1189 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1190 149370 : vacrel->num_dead_items_resets),
1191 149370 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1192 : vacrel->num_dead_items_resets,
1193 149370 : (double) dead_items_max_bytes / (1024 * 1024));
1194 149370 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1195 :
1196 149370 : ereport(verbose ? INFO : LOG,
1197 : (errmsg_internal("%s", buf.data)));
1198 149370 : pfree(buf.data);
1199 : }
1200 : }
1201 :
1202 : /* Cleanup index statistics and index names */
1203 442102 : for (int i = 0; i < vacrel->nindexes; i++)
1204 : {
1205 265024 : if (vacrel->indstats[i])
1206 2916 : pfree(vacrel->indstats[i]);
1207 :
1208 265024 : if (instrument)
1209 225526 : pfree(indnames[i]);
1210 : }
1211 177078 : }
1212 :
1213 : /*
1214 : * lazy_scan_heap() -- workhorse function for VACUUM
1215 : *
1216 : * This routine prunes each page in the heap, and considers the need to
1217 : * freeze remaining tuples with storage (not including pages that can be
1218 : * skipped using the visibility map). Also performs related maintenance
1219 : * of the FSM and visibility map. These steps all take place during an
1220 : * initial pass over the target heap relation.
1221 : *
1222 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1223 : * consists of deleting index tuples that point to LP_DEAD items left in
1224 : * heap pages following pruning. Earlier initial pass over the heap will
1225 : * have collected the TIDs whose index tuples need to be removed.
1226 : *
1227 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1228 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1229 : * as LP_UNUSED. This has to happen in a second, final pass over the
1230 : * heap, to preserve a basic invariant that all index AMs rely on: no
1231 : * extant index tuple can ever be allowed to contain a TID that points to
1232 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1233 : * recycling of line pointers to avoid index scans that get confused
1234 : * about which TID points to which tuple immediately after recycling.
1235 : * (Actually, this isn't a concern when target heap relation happens to
1236 : * have no indexes, which allows us to safely apply the one-pass strategy
1237 : * as an optimization).
1238 : *
1239 : * In practice we often have enough space to fit all TIDs, and so won't
1240 : * need to call lazy_vacuum more than once, after our initial pass over
1241 : * the heap has totally finished. Otherwise things are slightly more
1242 : * complicated: our "initial pass" over the heap applies only to those
1243 : * pages that were pruned before we needed to call lazy_vacuum, and our
1244 : * "final pass" over the heap only vacuums these same heap pages.
1245 : * However, we process indexes in full every time lazy_vacuum is called,
1246 : * which makes index processing very inefficient when memory is in short
1247 : * supply.
1248 : */
1249 : static void
1250 177078 : lazy_scan_heap(LVRelState *vacrel)
1251 : {
1252 : ReadStream *stream;
1253 177078 : BlockNumber rel_pages = vacrel->rel_pages,
1254 177078 : blkno = 0,
1255 177078 : next_fsm_block_to_vacuum = 0;
1256 177078 : BlockNumber orig_eager_scan_success_limit =
1257 : vacrel->eager_scan_remaining_successes; /* for logging */
1258 177078 : Buffer vmbuffer = InvalidBuffer;
1259 177078 : const int initprog_index[] = {
1260 : PROGRESS_VACUUM_PHASE,
1261 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1262 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1263 : };
1264 : int64 initprog_val[3];
1265 :
1266 : /* Report that we're scanning the heap, advertising total # of blocks */
1267 177078 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1268 177078 : initprog_val[1] = rel_pages;
1269 177078 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1270 177078 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1271 :
1272 : /* Initialize for the first heap_vac_scan_next_block() call */
1273 177078 : vacrel->current_block = InvalidBlockNumber;
1274 177078 : vacrel->next_unskippable_block = InvalidBlockNumber;
1275 177078 : vacrel->next_unskippable_eager_scanned = false;
1276 177078 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1277 :
1278 : /*
1279 : * Set up the read stream for vacuum's first pass through the heap.
1280 : *
1281 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1282 : * explicit work in heap_vac_scan_next_block.
1283 : */
1284 177078 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1285 : vacrel->bstrategy,
1286 : vacrel->rel,
1287 : MAIN_FORKNUM,
1288 : heap_vac_scan_next_block,
1289 : vacrel,
1290 : sizeof(bool));
1291 :
1292 : while (true)
1293 707154 : {
1294 : Buffer buf;
1295 : Page page;
1296 884232 : bool was_eager_scanned = false;
1297 884232 : int ndeleted = 0;
1298 : bool has_lpdead_items;
1299 884232 : void *per_buffer_data = NULL;
1300 884232 : bool vm_page_frozen = false;
1301 884232 : bool got_cleanup_lock = false;
1302 :
1303 884232 : vacuum_delay_point(false);
1304 :
1305 : /*
1306 : * Regularly check if wraparound failsafe should trigger.
1307 : *
1308 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1309 : * relfrozenxid might start to look dangerously old before we reach
1310 : * that point. This check also provides failsafe coverage for the
1311 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1312 : * param set to 'off'.
1313 : */
1314 884232 : if (vacrel->scanned_pages > 0 &&
1315 707154 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1316 0 : lazy_check_wraparound_failsafe(vacrel);
1317 :
1318 : /*
1319 : * Consider if we definitely have enough space to process TIDs on page
1320 : * already. If we are close to overrunning the available space for
1321 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1322 : * this page. However, let's force at least one page-worth of tuples
1323 : * to be stored as to ensure we do at least some work when the memory
1324 : * configured is so low that we run out before storing anything.
1325 : */
1326 884232 : if (vacrel->dead_items_info->num_items > 0 &&
1327 57922 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1328 : {
1329 : /*
1330 : * Before beginning index vacuuming, we release any pin we may
1331 : * hold on the visibility map page. This isn't necessary for
1332 : * correctness, but we do it anyway to avoid holding the pin
1333 : * across a lengthy, unrelated operation.
1334 : */
1335 4 : if (BufferIsValid(vmbuffer))
1336 : {
1337 4 : ReleaseBuffer(vmbuffer);
1338 4 : vmbuffer = InvalidBuffer;
1339 : }
1340 :
1341 : /* Perform a round of index and heap vacuuming */
1342 4 : vacrel->consider_bypass_optimization = false;
1343 4 : lazy_vacuum(vacrel);
1344 :
1345 : /*
1346 : * Vacuum the Free Space Map to make newly-freed space visible on
1347 : * upper-level FSM pages. Note that blkno is the previously
1348 : * processed block.
1349 : */
1350 4 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1351 : blkno + 1);
1352 4 : next_fsm_block_to_vacuum = blkno;
1353 :
1354 : /* Report that we are once again scanning the heap */
1355 4 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1356 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1357 : }
1358 :
1359 884232 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1360 :
1361 : /* The relation is exhausted. */
1362 884232 : if (!BufferIsValid(buf))
1363 177078 : break;
1364 :
1365 707154 : was_eager_scanned = *((bool *) per_buffer_data);
1366 707154 : CheckBufferIsPinnedOnce(buf);
1367 707154 : page = BufferGetPage(buf);
1368 707154 : blkno = BufferGetBlockNumber(buf);
1369 :
1370 707154 : vacrel->scanned_pages++;
1371 707154 : if (was_eager_scanned)
1372 0 : vacrel->eager_scanned_pages++;
1373 :
1374 : /* Report as block scanned, update error traceback information */
1375 707154 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1376 707154 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1377 : blkno, InvalidOffsetNumber);
1378 :
1379 : /*
1380 : * Pin the visibility map page in case we need to mark the page
1381 : * all-visible. In most cases this will be very cheap, because we'll
1382 : * already have the correct page pinned anyway.
1383 : */
1384 707154 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1385 :
1386 : /*
1387 : * We need a buffer cleanup lock to prune HOT chains and defragment
1388 : * the page in lazy_scan_prune. But when it's not possible to acquire
1389 : * a cleanup lock right away, we may be able to settle for reduced
1390 : * processing using lazy_scan_noprune.
1391 : */
1392 707154 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1393 :
1394 707154 : if (!got_cleanup_lock)
1395 214 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1396 :
1397 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1398 707154 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1399 707154 : vmbuffer))
1400 : {
1401 : /* Processed as new/empty page (lock and pin released) */
1402 1960 : continue;
1403 : }
1404 :
1405 : /*
1406 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1407 : * items in the dead_items area for later vacuuming, count live and
1408 : * recently dead tuples for vacuum logging, and determine if this
1409 : * block could later be truncated. If we encounter any xid/mxids that
1410 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1411 : * for a cleanup lock and call lazy_scan_prune().
1412 : */
1413 705194 : if (!got_cleanup_lock &&
1414 214 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1415 : {
1416 : /*
1417 : * lazy_scan_noprune could not do all required processing. Wait
1418 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1419 : */
1420 : Assert(vacrel->aggressive);
1421 130 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1422 130 : LockBufferForCleanup(buf);
1423 130 : got_cleanup_lock = true;
1424 : }
1425 :
1426 : /*
1427 : * If we have a cleanup lock, we must now prune, freeze, and count
1428 : * tuples. We may have acquired the cleanup lock originally, or we may
1429 : * have gone back and acquired it after lazy_scan_noprune() returned
1430 : * false. Either way, the page hasn't been processed yet.
1431 : *
1432 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1433 : * recently_dead_tuples and live tuples for vacuum logging, determine
1434 : * if the block can later be truncated, and accumulate the details of
1435 : * remaining LP_DEAD line pointers on the page into dead_items. These
1436 : * dead items include those pruned by lazy_scan_prune() as well as
1437 : * line pointers previously marked LP_DEAD.
1438 : */
1439 705194 : if (got_cleanup_lock)
1440 705110 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1441 : vmbuffer,
1442 : &has_lpdead_items, &vm_page_frozen);
1443 :
1444 : /*
1445 : * Count an eagerly scanned page as a failure or a success.
1446 : *
1447 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1448 : * cleanup lock, we won't have frozen the page. However, we only count
1449 : * pages that were too new to require freezing as eager freeze
1450 : * failures.
1451 : *
1452 : * We could gather more information from lazy_scan_noprune() about
1453 : * whether or not there were tuples with XIDs or MXIDs older than the
1454 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1455 : * exclude pages skipped due to cleanup lock contention from eager
1456 : * freeze algorithm caps.
1457 : */
1458 705194 : if (got_cleanup_lock && was_eager_scanned)
1459 : {
1460 : /* Aggressive vacuums do not eager scan. */
1461 : Assert(!vacrel->aggressive);
1462 :
1463 0 : if (vm_page_frozen)
1464 : {
1465 0 : if (vacrel->eager_scan_remaining_successes > 0)
1466 0 : vacrel->eager_scan_remaining_successes--;
1467 :
1468 0 : if (vacrel->eager_scan_remaining_successes == 0)
1469 : {
1470 : /*
1471 : * Report only once that we disabled eager scanning. We
1472 : * may eagerly read ahead blocks in excess of the success
1473 : * or failure caps before attempting to freeze them, so we
1474 : * could reach here even after disabling additional eager
1475 : * scanning.
1476 : */
1477 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1478 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1479 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1480 : orig_eager_scan_success_limit,
1481 : vacrel->dbname, vacrel->relnamespace,
1482 : vacrel->relname)));
1483 :
1484 : /*
1485 : * If we hit our success cap, permanently disable eager
1486 : * scanning by setting the other eager scan management
1487 : * fields to their disabled values.
1488 : */
1489 0 : vacrel->eager_scan_remaining_fails = 0;
1490 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1491 0 : vacrel->eager_scan_max_fails_per_region = 0;
1492 : }
1493 : }
1494 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1495 0 : vacrel->eager_scan_remaining_fails--;
1496 : }
1497 :
1498 : /*
1499 : * Now drop the buffer lock and, potentially, update the FSM.
1500 : *
1501 : * Our goal is to update the freespace map the last time we touch the
1502 : * page. If we'll process a block in the second pass, we may free up
1503 : * additional space on the page, so it is better to update the FSM
1504 : * after the second pass. If the relation has no indexes, or if index
1505 : * vacuuming is disabled, there will be no second heap pass; if this
1506 : * particular page has no dead items, the second heap pass will not
1507 : * touch this page. So, in those cases, update the FSM now.
1508 : *
1509 : * Note: In corner cases, it's possible to miss updating the FSM
1510 : * entirely. If index vacuuming is currently enabled, we'll skip the
1511 : * FSM update now. But if failsafe mode is later activated, or there
1512 : * are so few dead tuples that index vacuuming is bypassed, there will
1513 : * also be no opportunity to update the FSM later, because we'll never
1514 : * revisit this page. Since updating the FSM is desirable but not
1515 : * absolutely required, that's OK.
1516 : */
1517 705194 : if (vacrel->nindexes == 0
1518 677346 : || !vacrel->do_index_vacuuming
1519 509550 : || !has_lpdead_items)
1520 671604 : {
1521 671604 : Size freespace = PageGetHeapFreeSpace(page);
1522 :
1523 671604 : UnlockReleaseBuffer(buf);
1524 671604 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1525 :
1526 : /*
1527 : * Periodically perform FSM vacuuming to make newly-freed space
1528 : * visible on upper FSM pages. This is done after vacuuming if the
1529 : * table has indexes. There will only be newly-freed space if we
1530 : * held the cleanup lock and lazy_scan_prune() was called.
1531 : */
1532 671604 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1533 902 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1534 : {
1535 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1536 : blkno);
1537 0 : next_fsm_block_to_vacuum = blkno;
1538 : }
1539 : }
1540 : else
1541 33590 : UnlockReleaseBuffer(buf);
1542 : }
1543 :
1544 177078 : vacrel->blkno = InvalidBlockNumber;
1545 177078 : if (BufferIsValid(vmbuffer))
1546 71068 : ReleaseBuffer(vmbuffer);
1547 :
1548 : /*
1549 : * Report that everything is now scanned. We never skip scanning the last
1550 : * block in the relation, so we can pass rel_pages here.
1551 : */
1552 177078 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1553 : rel_pages);
1554 :
1555 : /* now we can compute the new value for pg_class.reltuples */
1556 354156 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1557 : vacrel->scanned_pages,
1558 177078 : vacrel->live_tuples);
1559 :
1560 : /*
1561 : * Also compute the total number of surviving heap entries. In the
1562 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1563 : */
1564 177078 : vacrel->new_rel_tuples =
1565 177078 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1566 177078 : vacrel->missed_dead_tuples;
1567 :
1568 177078 : read_stream_end(stream);
1569 :
1570 : /*
1571 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1572 : * related heap vacuuming
1573 : */
1574 177078 : if (vacrel->dead_items_info->num_items > 0)
1575 1392 : lazy_vacuum(vacrel);
1576 :
1577 : /*
1578 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1579 : * not there were indexes, and whether or not we bypassed index vacuuming.
1580 : * We can pass rel_pages here because we never skip scanning the last
1581 : * block of the relation.
1582 : */
1583 177078 : if (rel_pages > next_fsm_block_to_vacuum)
1584 71068 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1585 :
1586 : /* report all blocks vacuumed */
1587 177078 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1588 :
1589 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1590 177078 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1591 121704 : lazy_cleanup_all_indexes(vacrel);
1592 177078 : }
1593 :
1594 : /*
1595 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1596 : * for vacuum to process
1597 : *
1598 : * Every time lazy_scan_heap() needs a new block to process during its first
1599 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1600 : * heap_vac_scan_next_block() to get the next block.
1601 : *
1602 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1603 : * various thresholds to skip blocks which do not need to be processed and
1604 : * returns the next block to process or InvalidBlockNumber if there are no
1605 : * remaining blocks.
1606 : *
1607 : * The visibility status of the next block to process and whether or not it
1608 : * was eager scanned is set in the per_buffer_data.
1609 : *
1610 : * callback_private_data contains a reference to the LVRelState, passed to the
1611 : * read stream API during stream setup. The LVRelState is an in/out parameter
1612 : * here (locally named `vacrel`). Vacuum options and information about the
1613 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1614 : * that's all-visible but not all-frozen (to ensure that we don't update
1615 : * relfrozenxid in that case). vacrel also holds information about the next
1616 : * unskippable block -- as bookkeeping for this function.
1617 : */
1618 : static BlockNumber
1619 884232 : heap_vac_scan_next_block(ReadStream *stream,
1620 : void *callback_private_data,
1621 : void *per_buffer_data)
1622 : {
1623 : BlockNumber next_block;
1624 884232 : LVRelState *vacrel = callback_private_data;
1625 :
1626 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1627 884232 : next_block = vacrel->current_block + 1;
1628 :
1629 : /* Have we reached the end of the relation? */
1630 884232 : if (next_block >= vacrel->rel_pages)
1631 : {
1632 177078 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1633 : {
1634 68094 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1635 68094 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1636 : }
1637 177078 : return InvalidBlockNumber;
1638 : }
1639 :
1640 : /*
1641 : * We must be in one of the three following states:
1642 : */
1643 707154 : if (next_block > vacrel->next_unskippable_block ||
1644 282232 : vacrel->next_unskippable_block == InvalidBlockNumber)
1645 : {
1646 : /*
1647 : * 1. We have just processed an unskippable block (or we're at the
1648 : * beginning of the scan). Find the next unskippable block using the
1649 : * visibility map.
1650 : */
1651 : bool skipsallvis;
1652 :
1653 495990 : find_next_unskippable_block(vacrel, &skipsallvis);
1654 :
1655 : /*
1656 : * We now know the next block that we must process. It can be the
1657 : * next block after the one we just processed, or something further
1658 : * ahead. If it's further ahead, we can jump to it, but we choose to
1659 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1660 : * pages. Since we're reading sequentially, the OS should be doing
1661 : * readahead for us, so there's no gain in skipping a page now and
1662 : * then. Skipping such a range might even discourage sequential
1663 : * detection.
1664 : *
1665 : * This test also enables more frequent relfrozenxid advancement
1666 : * during non-aggressive VACUUMs. If the range has any all-visible
1667 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1668 : * real downside.
1669 : */
1670 495990 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1671 : {
1672 6192 : next_block = vacrel->next_unskippable_block;
1673 6192 : if (skipsallvis)
1674 66 : vacrel->skippedallvis = true;
1675 : }
1676 : }
1677 :
1678 : /* Now we must be in one of the two remaining states: */
1679 707154 : if (next_block < vacrel->next_unskippable_block)
1680 : {
1681 : /*
1682 : * 2. We are processing a range of blocks that we could have skipped
1683 : * but chose not to. We know that they are all-visible in the VM,
1684 : * otherwise they would've been unskippable.
1685 : */
1686 211164 : vacrel->current_block = next_block;
1687 : /* Block was not eager scanned */
1688 211164 : *((bool *) per_buffer_data) = false;
1689 211164 : return vacrel->current_block;
1690 : }
1691 : else
1692 : {
1693 : /*
1694 : * 3. We reached the next unskippable block. Process it. On next
1695 : * iteration, we will be back in state 1.
1696 : */
1697 : Assert(next_block == vacrel->next_unskippable_block);
1698 :
1699 495990 : vacrel->current_block = next_block;
1700 495990 : *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1701 495990 : return vacrel->current_block;
1702 : }
1703 : }
1704 :
1705 : /*
1706 : * Find the next unskippable block in a vacuum scan using the visibility map.
1707 : * The next unskippable block and its visibility information is updated in
1708 : * vacrel.
1709 : *
1710 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1711 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1712 : * was concurrently cleared, though. All that matters is that caller scan all
1713 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1714 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1715 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1716 : * to skip such a range is actually made, making everything safe.)
1717 : */
1718 : static void
1719 495990 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1720 : {
1721 495990 : BlockNumber rel_pages = vacrel->rel_pages;
1722 495990 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1723 495990 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1724 495990 : bool next_unskippable_eager_scanned = false;
1725 :
1726 495990 : *skipsallvis = false;
1727 :
1728 656158 : for (;; next_unskippable_block++)
1729 656158 : {
1730 1152148 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1731 : next_unskippable_block,
1732 : &next_unskippable_vmbuffer);
1733 :
1734 :
1735 : /*
1736 : * At the start of each eager scan region, normal vacuums with eager
1737 : * scanning enabled reset the failure counter, allowing vacuum to
1738 : * resume eager scanning if it had been suspended in the previous
1739 : * region.
1740 : */
1741 1152148 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1742 : {
1743 0 : vacrel->eager_scan_remaining_fails =
1744 0 : vacrel->eager_scan_max_fails_per_region;
1745 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1746 : }
1747 :
1748 : /*
1749 : * A block is unskippable if it is not all visible according to the
1750 : * visibility map.
1751 : */
1752 1152148 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1753 : {
1754 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1755 432532 : break;
1756 : }
1757 :
1758 : /*
1759 : * Caller must scan the last page to determine whether it has tuples
1760 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1761 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1762 : * lock on rel to attempt a truncation that fails anyway, just because
1763 : * there are tuples on the last page (it is likely that there will be
1764 : * tuples on other nearby pages as well, but those can be skipped).
1765 : *
1766 : * Implement this by always treating the last block as unsafe to skip.
1767 : */
1768 719616 : if (next_unskippable_block == rel_pages - 1)
1769 62642 : break;
1770 :
1771 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1772 656974 : if (!vacrel->skipwithvm)
1773 810 : break;
1774 :
1775 : /*
1776 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1777 : * already frozen by now), so this page can be skipped.
1778 : */
1779 656164 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1780 649944 : continue;
1781 :
1782 : /*
1783 : * Aggressive vacuums cannot skip any all-visible pages that are not
1784 : * also all-frozen.
1785 : */
1786 6220 : if (vacrel->aggressive)
1787 6 : break;
1788 :
1789 : /*
1790 : * Normal vacuums with eager scanning enabled only skip all-visible
1791 : * but not all-frozen pages if they have hit the failure limit for the
1792 : * current eager scan region.
1793 : */
1794 6214 : if (vacrel->eager_scan_remaining_fails > 0)
1795 : {
1796 0 : next_unskippable_eager_scanned = true;
1797 0 : break;
1798 : }
1799 :
1800 : /*
1801 : * All-visible blocks are safe to skip in a normal vacuum. But
1802 : * remember that the final range contains such a block for later.
1803 : */
1804 6214 : *skipsallvis = true;
1805 : }
1806 :
1807 : /* write the local variables back to vacrel */
1808 495990 : vacrel->next_unskippable_block = next_unskippable_block;
1809 495990 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1810 495990 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1811 495990 : }
1812 :
1813 : /*
1814 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1815 : *
1816 : * Must call here to handle both new and empty pages before calling
1817 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1818 : * with new or empty pages.
1819 : *
1820 : * It's necessary to consider new pages as a special case, since the rules for
1821 : * maintaining the visibility map and FSM with empty pages are a little
1822 : * different (though new pages can be truncated away during rel truncation).
1823 : *
1824 : * Empty pages are not really a special case -- they're just heap pages that
1825 : * have no allocated tuples (including even LP_UNUSED items). You might
1826 : * wonder why we need to handle them here all the same. It's only necessary
1827 : * because of a corner-case involving a hard crash during heap relation
1828 : * extension. If we ever make relation-extension crash safe, then it should
1829 : * no longer be necessary to deal with empty pages here (or new pages, for
1830 : * that matter).
1831 : *
1832 : * Caller must hold at least a shared lock. We might need to escalate the
1833 : * lock in that case, so the type of lock caller holds needs to be specified
1834 : * using 'sharelock' argument.
1835 : *
1836 : * Returns false in common case where caller should go on to call
1837 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1838 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1839 : * behalf.
1840 : *
1841 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1842 : * is passed here because neither empty nor new pages can be eagerly frozen.
1843 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1844 : * the same time that they are set all-visible, and we don't eagerly scan
1845 : * frozen pages.
1846 : */
1847 : static bool
1848 707154 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1849 : Page page, bool sharelock, Buffer vmbuffer)
1850 : {
1851 : Size freespace;
1852 :
1853 707154 : if (PageIsNew(page))
1854 : {
1855 : /*
1856 : * All-zeroes pages can be left over if either a backend extends the
1857 : * relation by a single page, but crashes before the newly initialized
1858 : * page has been written out, or when bulk-extending the relation
1859 : * (which creates a number of empty pages at the tail end of the
1860 : * relation), and then enters them into the FSM.
1861 : *
1862 : * Note we do not enter the page into the visibilitymap. That has the
1863 : * downside that we repeatedly visit this page in subsequent vacuums,
1864 : * but otherwise we'll never discover the space on a promoted standby.
1865 : * The harm of repeated checking ought to normally not be too bad. The
1866 : * space usually should be used at some point, otherwise there
1867 : * wouldn't be any regular vacuums.
1868 : *
1869 : * Make sure these pages are in the FSM, to ensure they can be reused.
1870 : * Do that by testing if there's any space recorded for the page. If
1871 : * not, enter it. We do so after releasing the lock on the heap page,
1872 : * the FSM is approximate, after all.
1873 : */
1874 1906 : UnlockReleaseBuffer(buf);
1875 :
1876 1906 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1877 : {
1878 950 : freespace = BLCKSZ - SizeOfPageHeaderData;
1879 :
1880 950 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1881 : }
1882 :
1883 1906 : return true;
1884 : }
1885 :
1886 705248 : if (PageIsEmpty(page))
1887 : {
1888 : /*
1889 : * It seems likely that caller will always be able to get a cleanup
1890 : * lock on an empty page. But don't take any chances -- escalate to
1891 : * an exclusive lock (still don't need a cleanup lock, though).
1892 : */
1893 54 : if (sharelock)
1894 : {
1895 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1896 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1897 :
1898 0 : if (!PageIsEmpty(page))
1899 : {
1900 : /* page isn't new or empty -- keep lock and pin for now */
1901 0 : return false;
1902 : }
1903 : }
1904 : else
1905 : {
1906 : /* Already have a full cleanup lock (which is more than enough) */
1907 : }
1908 :
1909 : /*
1910 : * Unlike new pages, empty pages are always set all-visible and
1911 : * all-frozen.
1912 : */
1913 54 : if (!PageIsAllVisible(page))
1914 : {
1915 0 : START_CRIT_SECTION();
1916 :
1917 : /* mark buffer dirty before writing a WAL record */
1918 0 : MarkBufferDirty(buf);
1919 :
1920 : /*
1921 : * It's possible that another backend has extended the heap,
1922 : * initialized the page, and then failed to WAL-log the page due
1923 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1924 : * might try to replay our record setting the page all-visible and
1925 : * find that the page isn't initialized, which will cause a PANIC.
1926 : * To prevent that, check whether the page has been previously
1927 : * WAL-logged, and if not, do that now.
1928 : */
1929 0 : if (RelationNeedsWAL(vacrel->rel) &&
1930 0 : !XLogRecPtrIsValid(PageGetLSN(page)))
1931 0 : log_newpage_buffer(buf, true);
1932 :
1933 0 : PageSetAllVisible(page);
1934 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1935 : InvalidXLogRecPtr,
1936 : vmbuffer, InvalidTransactionId,
1937 : VISIBILITYMAP_ALL_VISIBLE |
1938 : VISIBILITYMAP_ALL_FROZEN);
1939 0 : END_CRIT_SECTION();
1940 :
1941 : /* Count the newly all-frozen pages for logging */
1942 0 : vacrel->vm_new_visible_pages++;
1943 0 : vacrel->vm_new_visible_frozen_pages++;
1944 : }
1945 :
1946 54 : freespace = PageGetHeapFreeSpace(page);
1947 54 : UnlockReleaseBuffer(buf);
1948 54 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1949 54 : return true;
1950 : }
1951 :
1952 : /* page isn't new or empty -- keep lock and pin */
1953 705194 : return false;
1954 : }
1955 :
1956 : /* qsort comparator for sorting OffsetNumbers */
1957 : static int
1958 8174076 : cmpOffsetNumbers(const void *a, const void *b)
1959 : {
1960 8174076 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1961 : }
1962 :
1963 : /*
1964 : * Helper to correct any corruption detected on a heap page and its
1965 : * corresponding visibility map page after pruning but before setting the
1966 : * visibility map. It examines the heap page, the associated VM page, and the
1967 : * number of dead items previously identified.
1968 : *
1969 : * This function must be called while holding an exclusive lock on the heap
1970 : * buffer, and the dead items must have been discovered under that same lock.
1971 :
1972 : * The provided vmbits must reflect the current state of the VM block
1973 : * referenced by vmbuffer. Although we do not hold a lock on the VM buffer, it
1974 : * is pinned, and the heap buffer is exclusively locked, ensuring that no
1975 : * other backend can update the VM bits corresponding to this heap page.
1976 : *
1977 : * If it clears corruption, it will zero out vmbits.
1978 : */
1979 : static void
1980 705110 : identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
1981 : BlockNumber heap_blk, Page heap_page,
1982 : int nlpdead_items,
1983 : Buffer vmbuffer,
1984 : uint8 *vmbits)
1985 : {
1986 : Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1987 :
1988 : Assert(BufferIsLockedByMeInMode(heap_buffer, BUFFER_LOCK_EXCLUSIVE));
1989 :
1990 : /*
1991 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1992 : * page-level bit is clear. However, it's possible that the bit got
1993 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1994 : * with buffer lock before concluding that the VM is corrupt.
1995 : */
1996 705110 : if (!PageIsAllVisible(heap_page) &&
1997 430608 : ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
1998 : {
1999 0 : ereport(WARNING,
2000 : (errcode(ERRCODE_DATA_CORRUPTED),
2001 : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2002 : RelationGetRelationName(rel), heap_blk)));
2003 :
2004 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2005 : VISIBILITYMAP_VALID_BITS);
2006 0 : *vmbits = 0;
2007 : }
2008 :
2009 : /*
2010 : * It's possible for the value returned by
2011 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2012 : * wrong for us to see tuples that appear to not be visible to everyone
2013 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2014 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2015 : * conservative and sometimes returns a value that's unnecessarily small,
2016 : * so if we see that contradiction it just means that the tuples that we
2017 : * think are not visible to everyone yet actually are, and the
2018 : * PD_ALL_VISIBLE flag is correct.
2019 : *
2020 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2021 : * however.
2022 : */
2023 705110 : else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2024 : {
2025 0 : ereport(WARNING,
2026 : (errcode(ERRCODE_DATA_CORRUPTED),
2027 : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2028 : RelationGetRelationName(rel), heap_blk)));
2029 :
2030 0 : PageClearAllVisible(heap_page);
2031 0 : MarkBufferDirty(heap_buffer);
2032 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2033 : VISIBILITYMAP_VALID_BITS);
2034 0 : *vmbits = 0;
2035 : }
2036 705110 : }
2037 :
2038 : /*
2039 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2040 : *
2041 : * Caller must hold pin and buffer cleanup lock on the buffer.
2042 : *
2043 : * vmbuffer is the buffer containing the VM block with visibility information
2044 : * for the heap block, blkno.
2045 : *
2046 : * *has_lpdead_items is set to true or false depending on whether, upon return
2047 : * from this function, any LP_DEAD items are still present on the page.
2048 : *
2049 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2050 : * VM. The caller currently only uses this for determining whether an eagerly
2051 : * scanned page was successfully set all-frozen.
2052 : *
2053 : * Returns the number of tuples deleted from the page during HOT pruning.
2054 : */
2055 : static int
2056 705110 : lazy_scan_prune(LVRelState *vacrel,
2057 : Buffer buf,
2058 : BlockNumber blkno,
2059 : Page page,
2060 : Buffer vmbuffer,
2061 : bool *has_lpdead_items,
2062 : bool *vm_page_frozen)
2063 : {
2064 705110 : Relation rel = vacrel->rel;
2065 : PruneFreezeResult presult;
2066 705110 : PruneFreezeParams params = {
2067 : .relation = rel,
2068 : .buffer = buf,
2069 : .reason = PRUNE_VACUUM_SCAN,
2070 : .options = HEAP_PAGE_PRUNE_FREEZE,
2071 705110 : .vistest = vacrel->vistest,
2072 705110 : .cutoffs = &vacrel->cutoffs,
2073 : };
2074 705110 : uint8 old_vmbits = 0;
2075 705110 : uint8 new_vmbits = 0;
2076 :
2077 : Assert(BufferGetBlockNumber(buf) == blkno);
2078 :
2079 : /*
2080 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2081 : *
2082 : * If the relation has no indexes, we can immediately mark would-be dead
2083 : * items LP_UNUSED.
2084 : *
2085 : * The number of tuples removed from the page is returned in
2086 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2087 : * presult.lpdead_items's final value can be thought of as the number of
2088 : * tuples that were deleted from indexes.
2089 : *
2090 : * We will update the VM after collecting LP_DEAD items and freezing
2091 : * tuples. Pruning will have determined whether or not the page is
2092 : * all-visible.
2093 : */
2094 705110 : if (vacrel->nindexes == 0)
2095 27848 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2096 :
2097 705110 : heap_page_prune_and_freeze(¶ms,
2098 : &presult,
2099 : &vacrel->offnum,
2100 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2101 :
2102 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2103 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2104 :
2105 705110 : if (presult.nfrozen > 0)
2106 : {
2107 : /*
2108 : * We don't increment the new_frozen_tuple_pages instrumentation
2109 : * counter when nfrozen == 0, since it only counts pages with newly
2110 : * frozen tuples (don't confuse that with pages newly set all-frozen
2111 : * in VM).
2112 : */
2113 45882 : vacrel->new_frozen_tuple_pages++;
2114 : }
2115 :
2116 : /*
2117 : * VACUUM will call heap_page_is_all_visible() during the second pass over
2118 : * the heap to determine all_visible and all_frozen for the page -- this
2119 : * is a specialized version of the logic from this function. Now that
2120 : * we've finished pruning and freezing, make sure that we're in total
2121 : * agreement with heap_page_is_all_visible() using an assertion.
2122 : */
2123 : #ifdef USE_ASSERT_CHECKING
2124 : if (presult.all_visible)
2125 : {
2126 : TransactionId debug_cutoff;
2127 : bool debug_all_frozen;
2128 :
2129 : Assert(presult.lpdead_items == 0);
2130 :
2131 : Assert(heap_page_is_all_visible(vacrel->rel, buf,
2132 : vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2133 : &debug_cutoff, &vacrel->offnum));
2134 :
2135 : Assert(presult.all_frozen == debug_all_frozen);
2136 :
2137 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2138 : debug_cutoff == presult.vm_conflict_horizon);
2139 : }
2140 : #endif
2141 :
2142 : /*
2143 : * Now save details of the LP_DEAD items from the page in vacrel
2144 : */
2145 705110 : if (presult.lpdead_items > 0)
2146 : {
2147 38226 : vacrel->lpdead_item_pages++;
2148 :
2149 : /*
2150 : * deadoffsets are collected incrementally in
2151 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2152 : * with an indeterminate order, but dead_items_add requires them to be
2153 : * sorted.
2154 : */
2155 38226 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2156 : cmpOffsetNumbers);
2157 :
2158 38226 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2159 : }
2160 :
2161 : /* Finally, add page-local counts to whole-VACUUM counts */
2162 705110 : vacrel->tuples_deleted += presult.ndeleted;
2163 705110 : vacrel->tuples_frozen += presult.nfrozen;
2164 705110 : vacrel->lpdead_items += presult.lpdead_items;
2165 705110 : vacrel->live_tuples += presult.live_tuples;
2166 705110 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2167 :
2168 : /* Can't truncate this page */
2169 705110 : if (presult.hastup)
2170 686028 : vacrel->nonempty_pages = blkno + 1;
2171 :
2172 : /* Did we find LP_DEAD items? */
2173 705110 : *has_lpdead_items = (presult.lpdead_items > 0);
2174 :
2175 : Assert(!presult.all_visible || !(*has_lpdead_items));
2176 : Assert(!presult.all_frozen || presult.all_visible);
2177 :
2178 705110 : old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2179 :
2180 705110 : identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2181 : presult.lpdead_items, vmbuffer,
2182 : &old_vmbits);
2183 :
2184 705110 : if (!presult.all_visible)
2185 356022 : return presult.ndeleted;
2186 :
2187 : /* Set the visibility map and page visibility hint */
2188 349088 : new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
2189 :
2190 349088 : if (presult.all_frozen)
2191 326522 : new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
2192 :
2193 : /* Nothing to do */
2194 349088 : if (old_vmbits == new_vmbits)
2195 274446 : return presult.ndeleted;
2196 :
2197 : /*
2198 : * It should never be the case that the visibility map page is set while
2199 : * the page-level bit is clear (and if so, we cleared it above), but the
2200 : * reverse is allowed (if checksums are not enabled). Regardless, set both
2201 : * bits so that we get back in sync.
2202 : *
2203 : * The heap buffer must be marked dirty before adding it to the WAL chain
2204 : * when setting the VM. We don't worry about unnecessarily dirtying the
2205 : * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2206 : * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2207 : * the VM bits clear, so there is no point in optimizing it.
2208 : */
2209 74642 : PageSetAllVisible(page);
2210 74642 : MarkBufferDirty(buf);
2211 :
2212 : /*
2213 : * If the page is being set all-frozen, we pass InvalidTransactionId as
2214 : * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2215 : * everything safe for REDO was logged when the page's tuples were frozen.
2216 : */
2217 : Assert(!presult.all_frozen ||
2218 : !TransactionIdIsValid(presult.vm_conflict_horizon));
2219 :
2220 74642 : visibilitymap_set(vacrel->rel, blkno, buf,
2221 : InvalidXLogRecPtr,
2222 : vmbuffer, presult.vm_conflict_horizon,
2223 : new_vmbits);
2224 :
2225 : /*
2226 : * If the page wasn't already set all-visible and/or all-frozen in the VM,
2227 : * count it as newly set for logging.
2228 : */
2229 74642 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2230 : {
2231 74588 : vacrel->vm_new_visible_pages++;
2232 74588 : if (presult.all_frozen)
2233 : {
2234 53350 : vacrel->vm_new_visible_frozen_pages++;
2235 53350 : *vm_page_frozen = true;
2236 : }
2237 : }
2238 54 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2239 54 : presult.all_frozen)
2240 : {
2241 54 : vacrel->vm_new_frozen_pages++;
2242 54 : *vm_page_frozen = true;
2243 : }
2244 :
2245 74642 : return presult.ndeleted;
2246 : }
2247 :
2248 : /*
2249 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2250 : *
2251 : * Caller need only hold a pin and share lock on the buffer, unlike
2252 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2253 : * performed here, it's quite possible that an earlier opportunistic pruning
2254 : * operation left LP_DEAD items behind. We'll at least collect any such items
2255 : * in dead_items for removal from indexes.
2256 : *
2257 : * For aggressive VACUUM callers, we may return false to indicate that a full
2258 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2259 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2260 : * one or more tuples on the page. We always return true for non-aggressive
2261 : * callers.
2262 : *
2263 : * If this function returns true, *has_lpdead_items gets set to true or false
2264 : * depending on whether, upon return from this function, any LP_DEAD items are
2265 : * present on the page. If this function returns false, *has_lpdead_items
2266 : * is not updated.
2267 : */
2268 : static bool
2269 214 : lazy_scan_noprune(LVRelState *vacrel,
2270 : Buffer buf,
2271 : BlockNumber blkno,
2272 : Page page,
2273 : bool *has_lpdead_items)
2274 : {
2275 : OffsetNumber offnum,
2276 : maxoff;
2277 : int lpdead_items,
2278 : live_tuples,
2279 : recently_dead_tuples,
2280 : missed_dead_tuples;
2281 : bool hastup;
2282 : HeapTupleHeader tupleheader;
2283 214 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2284 214 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2285 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2286 :
2287 : Assert(BufferGetBlockNumber(buf) == blkno);
2288 :
2289 214 : hastup = false; /* for now */
2290 :
2291 214 : lpdead_items = 0;
2292 214 : live_tuples = 0;
2293 214 : recently_dead_tuples = 0;
2294 214 : missed_dead_tuples = 0;
2295 :
2296 214 : maxoff = PageGetMaxOffsetNumber(page);
2297 214 : for (offnum = FirstOffsetNumber;
2298 4320 : offnum <= maxoff;
2299 4106 : offnum = OffsetNumberNext(offnum))
2300 : {
2301 : ItemId itemid;
2302 : HeapTupleData tuple;
2303 :
2304 4236 : vacrel->offnum = offnum;
2305 4236 : itemid = PageGetItemId(page, offnum);
2306 :
2307 4236 : if (!ItemIdIsUsed(itemid))
2308 724 : continue;
2309 :
2310 3836 : if (ItemIdIsRedirected(itemid))
2311 : {
2312 324 : hastup = true;
2313 324 : continue;
2314 : }
2315 :
2316 3512 : if (ItemIdIsDead(itemid))
2317 : {
2318 : /*
2319 : * Deliberately don't set hastup=true here. See same point in
2320 : * lazy_scan_prune for an explanation.
2321 : */
2322 0 : deadoffsets[lpdead_items++] = offnum;
2323 0 : continue;
2324 : }
2325 :
2326 3512 : hastup = true; /* page prevents rel truncation */
2327 3512 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2328 3512 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2329 : &NoFreezePageRelfrozenXid,
2330 : &NoFreezePageRelminMxid))
2331 : {
2332 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2333 258 : if (vacrel->aggressive)
2334 : {
2335 : /*
2336 : * Aggressive VACUUMs must always be able to advance rel's
2337 : * relfrozenxid to a value >= FreezeLimit (and be able to
2338 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2339 : * The ongoing aggressive VACUUM won't be able to do that
2340 : * unless it can freeze an XID (or MXID) from this tuple now.
2341 : *
2342 : * The only safe option is to have caller perform processing
2343 : * of this page using lazy_scan_prune. Caller might have to
2344 : * wait a while for a cleanup lock, but it can't be helped.
2345 : */
2346 130 : vacrel->offnum = InvalidOffsetNumber;
2347 130 : return false;
2348 : }
2349 :
2350 : /*
2351 : * Non-aggressive VACUUMs are under no obligation to advance
2352 : * relfrozenxid (even by one XID). We can be much laxer here.
2353 : *
2354 : * Currently we always just accept an older final relfrozenxid
2355 : * and/or relminmxid value. We never make caller wait or work a
2356 : * little harder, even when it likely makes sense to do so.
2357 : */
2358 : }
2359 :
2360 3382 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2361 3382 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2362 3382 : tuple.t_len = ItemIdGetLength(itemid);
2363 3382 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2364 :
2365 3382 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2366 : buf))
2367 : {
2368 3338 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2369 : case HEAPTUPLE_LIVE:
2370 :
2371 : /*
2372 : * Count both cases as live, just like lazy_scan_prune
2373 : */
2374 3338 : live_tuples++;
2375 :
2376 3338 : break;
2377 40 : case HEAPTUPLE_DEAD:
2378 :
2379 : /*
2380 : * There is some useful work for pruning to do, that won't be
2381 : * done due to failure to get a cleanup lock.
2382 : */
2383 40 : missed_dead_tuples++;
2384 40 : break;
2385 4 : case HEAPTUPLE_RECENTLY_DEAD:
2386 :
2387 : /*
2388 : * Count in recently_dead_tuples, just like lazy_scan_prune
2389 : */
2390 4 : recently_dead_tuples++;
2391 4 : break;
2392 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2393 :
2394 : /*
2395 : * Do not count these rows as live, just like lazy_scan_prune
2396 : */
2397 0 : break;
2398 0 : default:
2399 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2400 : break;
2401 : }
2402 : }
2403 :
2404 84 : vacrel->offnum = InvalidOffsetNumber;
2405 :
2406 : /*
2407 : * By here we know for sure that caller can put off freezing and pruning
2408 : * this particular page until the next VACUUM. Remember its details now.
2409 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2410 : */
2411 84 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2412 84 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2413 :
2414 : /* Save any LP_DEAD items found on the page in dead_items */
2415 84 : if (vacrel->nindexes == 0)
2416 : {
2417 : /* Using one-pass strategy (since table has no indexes) */
2418 0 : if (lpdead_items > 0)
2419 : {
2420 : /*
2421 : * Perfunctory handling for the corner case where a single pass
2422 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2423 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2424 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2425 : * but it beats having to maintain specialized heap vacuuming code
2426 : * forever, for vanishingly little benefit.)
2427 : */
2428 0 : hastup = true;
2429 0 : missed_dead_tuples += lpdead_items;
2430 : }
2431 : }
2432 84 : else if (lpdead_items > 0)
2433 : {
2434 : /*
2435 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2436 : * indexes will be deleted during index vacuuming (and then marked
2437 : * LP_UNUSED in the heap)
2438 : */
2439 0 : vacrel->lpdead_item_pages++;
2440 :
2441 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2442 :
2443 0 : vacrel->lpdead_items += lpdead_items;
2444 : }
2445 :
2446 : /*
2447 : * Finally, add relevant page-local counts to whole-VACUUM counts
2448 : */
2449 84 : vacrel->live_tuples += live_tuples;
2450 84 : vacrel->recently_dead_tuples += recently_dead_tuples;
2451 84 : vacrel->missed_dead_tuples += missed_dead_tuples;
2452 84 : if (missed_dead_tuples > 0)
2453 6 : vacrel->missed_dead_pages++;
2454 :
2455 : /* Can't truncate this page */
2456 84 : if (hastup)
2457 84 : vacrel->nonempty_pages = blkno + 1;
2458 :
2459 : /* Did we find LP_DEAD items? */
2460 84 : *has_lpdead_items = (lpdead_items > 0);
2461 :
2462 : /* Caller won't need to call lazy_scan_prune with same page */
2463 84 : return true;
2464 : }
2465 :
2466 : /*
2467 : * Main entry point for index vacuuming and heap vacuuming.
2468 : *
2469 : * Removes items collected in dead_items from table's indexes, then marks the
2470 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2471 : * for full details.
2472 : *
2473 : * Also empties dead_items, freeing up space for later TIDs.
2474 : *
2475 : * We may choose to bypass index vacuuming at this point, though only when the
2476 : * ongoing VACUUM operation will definitely only have one index scan/round of
2477 : * index vacuuming.
2478 : */
2479 : static void
2480 1396 : lazy_vacuum(LVRelState *vacrel)
2481 : {
2482 : bool bypass;
2483 :
2484 : /* Should not end up here with no indexes */
2485 : Assert(vacrel->nindexes > 0);
2486 : Assert(vacrel->lpdead_item_pages > 0);
2487 :
2488 1396 : if (!vacrel->do_index_vacuuming)
2489 : {
2490 : Assert(!vacrel->do_index_cleanup);
2491 22 : dead_items_reset(vacrel);
2492 22 : return;
2493 : }
2494 :
2495 : /*
2496 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2497 : *
2498 : * We currently only do this in cases where the number of LP_DEAD items
2499 : * for the entire VACUUM operation is close to zero. This avoids sharp
2500 : * discontinuities in the duration and overhead of successive VACUUM
2501 : * operations that run against the same table with a fixed workload.
2502 : * Ideally, successive VACUUM operations will behave as if there are
2503 : * exactly zero LP_DEAD items in cases where there are close to zero.
2504 : *
2505 : * This is likely to be helpful with a table that is continually affected
2506 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2507 : * have small aberrations that lead to just a few heap pages retaining
2508 : * only one or two LP_DEAD items. This is pretty common; even when the
2509 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2510 : * impossible to predict whether HOT will be applied in 100% of cases.
2511 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2512 : * HOT through careful tuning.
2513 : */
2514 1374 : bypass = false;
2515 1374 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2516 : {
2517 : BlockNumber threshold;
2518 :
2519 : Assert(vacrel->num_index_scans == 0);
2520 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2521 : Assert(vacrel->do_index_vacuuming);
2522 : Assert(vacrel->do_index_cleanup);
2523 :
2524 : /*
2525 : * This crossover point at which we'll start to do index vacuuming is
2526 : * expressed as a percentage of the total number of heap pages in the
2527 : * table that are known to have at least one LP_DEAD item. This is
2528 : * much more important than the total number of LP_DEAD items, since
2529 : * it's a proxy for the number of heap pages whose visibility map bits
2530 : * cannot be set on account of bypassing index and heap vacuuming.
2531 : *
2532 : * We apply one further precautionary test: the space currently used
2533 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2534 : * not exceed 32MB. This limits the risk that we will bypass index
2535 : * vacuuming again and again until eventually there is a VACUUM whose
2536 : * dead_items space is not CPU cache resident.
2537 : *
2538 : * We don't take any special steps to remember the LP_DEAD items (such
2539 : * as counting them in our final update to the stats system) when the
2540 : * optimization is applied. Though the accounting used in analyze.c's
2541 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2542 : * rows in its own stats report, that's okay. The discrepancy should
2543 : * be negligible. If this optimization is ever expanded to cover more
2544 : * cases then this may need to be reconsidered.
2545 : */
2546 1356 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2547 1362 : bypass = (vacrel->lpdead_item_pages < threshold &&
2548 6 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2549 : }
2550 :
2551 1374 : if (bypass)
2552 : {
2553 : /*
2554 : * There are almost zero TIDs. Behave as if there were precisely
2555 : * zero: bypass index vacuuming, but do index cleanup.
2556 : *
2557 : * We expect that the ongoing VACUUM operation will finish very
2558 : * quickly, so there is no point in considering speeding up as a
2559 : * failsafe against wraparound failure. (Index cleanup is expected to
2560 : * finish very quickly in cases where there were no ambulkdelete()
2561 : * calls.)
2562 : */
2563 6 : vacrel->do_index_vacuuming = false;
2564 : }
2565 1368 : else if (lazy_vacuum_all_indexes(vacrel))
2566 : {
2567 : /*
2568 : * We successfully completed a round of index vacuuming. Do related
2569 : * heap vacuuming now.
2570 : */
2571 1368 : lazy_vacuum_heap_rel(vacrel);
2572 : }
2573 : else
2574 : {
2575 : /*
2576 : * Failsafe case.
2577 : *
2578 : * We attempted index vacuuming, but didn't finish a full round/full
2579 : * index scan. This happens when relfrozenxid or relminmxid is too
2580 : * far in the past.
2581 : *
2582 : * From this point on the VACUUM operation will do no further index
2583 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2584 : * back here again.
2585 : */
2586 : Assert(VacuumFailsafeActive);
2587 : }
2588 :
2589 : /*
2590 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2591 : * vacuum)
2592 : */
2593 1374 : dead_items_reset(vacrel);
2594 : }
2595 :
2596 : /*
2597 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2598 : *
2599 : * Returns true in the common case when all indexes were successfully
2600 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2601 : * VACUUM operation is at risk of taking too long to finish, leading to
2602 : * wraparound failure.
2603 : */
2604 : static bool
2605 1368 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2606 : {
2607 1368 : bool allindexes = true;
2608 1368 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2609 1368 : const int progress_start_index[] = {
2610 : PROGRESS_VACUUM_PHASE,
2611 : PROGRESS_VACUUM_INDEXES_TOTAL
2612 : };
2613 1368 : const int progress_end_index[] = {
2614 : PROGRESS_VACUUM_INDEXES_TOTAL,
2615 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2616 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2617 : };
2618 : int64 progress_start_val[2];
2619 : int64 progress_end_val[3];
2620 :
2621 : Assert(vacrel->nindexes > 0);
2622 : Assert(vacrel->do_index_vacuuming);
2623 : Assert(vacrel->do_index_cleanup);
2624 :
2625 : /* Precheck for XID wraparound emergencies */
2626 1368 : if (lazy_check_wraparound_failsafe(vacrel))
2627 : {
2628 : /* Wraparound emergency -- don't even start an index scan */
2629 0 : return false;
2630 : }
2631 :
2632 : /*
2633 : * Report that we are now vacuuming indexes and the number of indexes to
2634 : * vacuum.
2635 : */
2636 1368 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2637 1368 : progress_start_val[1] = vacrel->nindexes;
2638 1368 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2639 :
2640 1368 : if (!ParallelVacuumIsActive(vacrel))
2641 : {
2642 3982 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2643 : {
2644 2624 : Relation indrel = vacrel->indrels[idx];
2645 2624 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2646 :
2647 2624 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2648 : old_live_tuples,
2649 : vacrel);
2650 :
2651 : /* Report the number of indexes vacuumed */
2652 2624 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2653 2624 : idx + 1);
2654 :
2655 2624 : if (lazy_check_wraparound_failsafe(vacrel))
2656 : {
2657 : /* Wraparound emergency -- end current index scan */
2658 0 : allindexes = false;
2659 0 : break;
2660 : }
2661 : }
2662 : }
2663 : else
2664 : {
2665 : /* Outsource everything to parallel variant */
2666 10 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2667 : vacrel->num_index_scans);
2668 :
2669 : /*
2670 : * Do a postcheck to consider applying wraparound failsafe now. Note
2671 : * that parallel VACUUM only gets the precheck and this postcheck.
2672 : */
2673 10 : if (lazy_check_wraparound_failsafe(vacrel))
2674 0 : allindexes = false;
2675 : }
2676 :
2677 : /*
2678 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2679 : * each call here (except calls where we choose to do the failsafe). This
2680 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2681 : * of the failsafe triggering, which prevents the next call from taking
2682 : * place).
2683 : */
2684 : Assert(vacrel->num_index_scans > 0 ||
2685 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2686 : Assert(allindexes || VacuumFailsafeActive);
2687 :
2688 : /*
2689 : * Increase and report the number of index scans. Also, we reset
2690 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2691 : *
2692 : * We deliberately include the case where we started a round of bulk
2693 : * deletes that we weren't able to finish due to the failsafe triggering.
2694 : */
2695 1368 : vacrel->num_index_scans++;
2696 1368 : progress_end_val[0] = 0;
2697 1368 : progress_end_val[1] = 0;
2698 1368 : progress_end_val[2] = vacrel->num_index_scans;
2699 1368 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2700 :
2701 1368 : return allindexes;
2702 : }
2703 :
2704 : /*
2705 : * Read stream callback for vacuum's third phase (second pass over the heap).
2706 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2707 : * if there are no further blocks to vacuum.
2708 : *
2709 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2710 : */
2711 : static BlockNumber
2712 34952 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2713 : void *callback_private_data,
2714 : void *per_buffer_data)
2715 : {
2716 34952 : TidStoreIter *iter = callback_private_data;
2717 : TidStoreIterResult *iter_result;
2718 :
2719 34952 : iter_result = TidStoreIterateNext(iter);
2720 34952 : if (iter_result == NULL)
2721 1368 : return InvalidBlockNumber;
2722 :
2723 : /*
2724 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2725 : * It is safe to copy the result, according to TidStoreIterateNext().
2726 : */
2727 33584 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2728 :
2729 33584 : return iter_result->blkno;
2730 : }
2731 :
2732 : /*
2733 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2734 : *
2735 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2736 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2737 : *
2738 : * We may also be able to truncate the line pointer array of the heap pages we
2739 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2740 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2741 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2742 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2743 : * page's line pointer array).
2744 : *
2745 : * Note: the reason for doing this as a second pass is we cannot remove the
2746 : * tuples until we've removed their index entries, and we want to process
2747 : * index entry removal in batches as large as possible.
2748 : */
2749 : static void
2750 1368 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2751 : {
2752 : ReadStream *stream;
2753 1368 : BlockNumber vacuumed_pages = 0;
2754 1368 : Buffer vmbuffer = InvalidBuffer;
2755 : LVSavedErrInfo saved_err_info;
2756 : TidStoreIter *iter;
2757 :
2758 : Assert(vacrel->do_index_vacuuming);
2759 : Assert(vacrel->do_index_cleanup);
2760 : Assert(vacrel->num_index_scans > 0);
2761 :
2762 : /* Report that we are now vacuuming the heap */
2763 1368 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2764 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2765 :
2766 : /* Update error traceback information */
2767 1368 : update_vacuum_error_info(vacrel, &saved_err_info,
2768 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2769 : InvalidBlockNumber, InvalidOffsetNumber);
2770 :
2771 1368 : iter = TidStoreBeginIterate(vacrel->dead_items);
2772 :
2773 : /*
2774 : * Set up the read stream for vacuum's second pass through the heap.
2775 : *
2776 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2777 : * not need to wait for IO and does not perform locking. Once we support
2778 : * parallelism it should still be fine, as presumably the holder of locks
2779 : * would never be blocked by IO while holding the lock.
2780 : */
2781 1368 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2782 : READ_STREAM_USE_BATCHING,
2783 : vacrel->bstrategy,
2784 : vacrel->rel,
2785 : MAIN_FORKNUM,
2786 : vacuum_reap_lp_read_stream_next,
2787 : iter,
2788 : sizeof(TidStoreIterResult));
2789 :
2790 : while (true)
2791 33584 : {
2792 : BlockNumber blkno;
2793 : Buffer buf;
2794 : Page page;
2795 : TidStoreIterResult *iter_result;
2796 : Size freespace;
2797 : OffsetNumber offsets[MaxOffsetNumber];
2798 : int num_offsets;
2799 :
2800 34952 : vacuum_delay_point(false);
2801 :
2802 34952 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2803 :
2804 : /* The relation is exhausted */
2805 34952 : if (!BufferIsValid(buf))
2806 1368 : break;
2807 :
2808 33584 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2809 :
2810 : Assert(iter_result);
2811 33584 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2812 : Assert(num_offsets <= lengthof(offsets));
2813 :
2814 : /*
2815 : * Pin the visibility map page in case we need to mark the page
2816 : * all-visible. In most cases this will be very cheap, because we'll
2817 : * already have the correct page pinned anyway.
2818 : */
2819 33584 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2820 :
2821 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2822 33584 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2823 33584 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2824 : num_offsets, vmbuffer);
2825 :
2826 : /* Now that we've vacuumed the page, record its available space */
2827 33584 : page = BufferGetPage(buf);
2828 33584 : freespace = PageGetHeapFreeSpace(page);
2829 :
2830 33584 : UnlockReleaseBuffer(buf);
2831 33584 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2832 33584 : vacuumed_pages++;
2833 : }
2834 :
2835 1368 : read_stream_end(stream);
2836 1368 : TidStoreEndIterate(iter);
2837 :
2838 1368 : vacrel->blkno = InvalidBlockNumber;
2839 1368 : if (BufferIsValid(vmbuffer))
2840 1368 : ReleaseBuffer(vmbuffer);
2841 :
2842 : /*
2843 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2844 : * the second heap pass. No more, no less.
2845 : */
2846 : Assert(vacrel->num_index_scans > 1 ||
2847 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2848 : vacuumed_pages == vacrel->lpdead_item_pages));
2849 :
2850 1368 : ereport(DEBUG2,
2851 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2852 : vacrel->relname, vacrel->dead_items_info->num_items,
2853 : vacuumed_pages)));
2854 :
2855 : /* Revert to the previous phase information for error traceback */
2856 1368 : restore_vacuum_error_info(vacrel, &saved_err_info);
2857 1368 : }
2858 :
2859 : /*
2860 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2861 : * vacrel->dead_items store.
2862 : *
2863 : * Caller must have an exclusive buffer lock on the buffer (though a full
2864 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2865 : * a pin on blkno's visibility map page.
2866 : */
2867 : static void
2868 33584 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2869 : OffsetNumber *deadoffsets, int num_offsets,
2870 : Buffer vmbuffer)
2871 : {
2872 33584 : Page page = BufferGetPage(buffer);
2873 : OffsetNumber unused[MaxHeapTuplesPerPage];
2874 33584 : int nunused = 0;
2875 : TransactionId visibility_cutoff_xid;
2876 33584 : TransactionId conflict_xid = InvalidTransactionId;
2877 : bool all_frozen;
2878 : LVSavedErrInfo saved_err_info;
2879 33584 : uint8 vmflags = 0;
2880 :
2881 : Assert(vacrel->do_index_vacuuming);
2882 :
2883 33584 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2884 :
2885 : /* Update error traceback information */
2886 33584 : update_vacuum_error_info(vacrel, &saved_err_info,
2887 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2888 : InvalidOffsetNumber);
2889 :
2890 : /*
2891 : * Before marking dead items unused, check whether the page will become
2892 : * all-visible once that change is applied. This lets us reap the tuples
2893 : * and mark the page all-visible within the same critical section,
2894 : * enabling both changes to be emitted in a single WAL record. Since the
2895 : * visibility checks may perform I/O and allocate memory, they must be
2896 : * done outside the critical section.
2897 : */
2898 33584 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2899 : vacrel->cutoffs.OldestXmin,
2900 : deadoffsets, num_offsets,
2901 : &all_frozen, &visibility_cutoff_xid,
2902 : &vacrel->offnum))
2903 : {
2904 33468 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2905 33468 : if (all_frozen)
2906 : {
2907 25616 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2908 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2909 : }
2910 :
2911 : /*
2912 : * Take the lock on the vmbuffer before entering a critical section.
2913 : * The heap page lock must also be held while updating the VM to
2914 : * ensure consistency.
2915 : */
2916 33468 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2917 : }
2918 :
2919 33584 : START_CRIT_SECTION();
2920 :
2921 1980610 : for (int i = 0; i < num_offsets; i++)
2922 : {
2923 : ItemId itemid;
2924 1947026 : OffsetNumber toff = deadoffsets[i];
2925 :
2926 1947026 : itemid = PageGetItemId(page, toff);
2927 :
2928 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2929 1947026 : ItemIdSetUnused(itemid);
2930 1947026 : unused[nunused++] = toff;
2931 : }
2932 :
2933 : Assert(nunused > 0);
2934 :
2935 : /* Attempt to truncate line pointer array now */
2936 33584 : PageTruncateLinePointerArray(page);
2937 :
2938 33584 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2939 : {
2940 : /*
2941 : * The page is guaranteed to have had dead line pointers, so we always
2942 : * set PD_ALL_VISIBLE.
2943 : */
2944 33468 : PageSetAllVisible(page);
2945 33468 : visibilitymap_set_vmbits(blkno,
2946 : vmbuffer, vmflags,
2947 33468 : vacrel->rel->rd_locator);
2948 33468 : conflict_xid = visibility_cutoff_xid;
2949 : }
2950 :
2951 : /*
2952 : * Mark buffer dirty before we write WAL.
2953 : */
2954 33584 : MarkBufferDirty(buffer);
2955 :
2956 : /* XLOG stuff */
2957 33584 : if (RelationNeedsWAL(vacrel->rel))
2958 : {
2959 31888 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2960 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2961 : vmflags,
2962 : conflict_xid,
2963 : false, /* no cleanup lock required */
2964 : PRUNE_VACUUM_CLEANUP,
2965 : NULL, 0, /* frozen */
2966 : NULL, 0, /* redirected */
2967 : NULL, 0, /* dead */
2968 : unused, nunused);
2969 : }
2970 :
2971 33584 : END_CRIT_SECTION();
2972 :
2973 33584 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
2974 : {
2975 : /* Count the newly set VM page for logging */
2976 33468 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2977 33468 : vacrel->vm_new_visible_pages++;
2978 33468 : if (all_frozen)
2979 25616 : vacrel->vm_new_visible_frozen_pages++;
2980 : }
2981 :
2982 : /* Revert to the previous phase information for error traceback */
2983 33584 : restore_vacuum_error_info(vacrel, &saved_err_info);
2984 33584 : }
2985 :
2986 : /*
2987 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2988 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2989 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2990 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2991 : *
2992 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2993 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2994 : * that it started out with.
2995 : *
2996 : * Returns true when failsafe has been triggered.
2997 : */
2998 : static bool
2999 181080 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
3000 : {
3001 : /* Don't warn more than once per VACUUM */
3002 181080 : if (VacuumFailsafeActive)
3003 0 : return true;
3004 :
3005 181080 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
3006 : {
3007 48630 : const int progress_index[] = {
3008 : PROGRESS_VACUUM_INDEXES_TOTAL,
3009 : PROGRESS_VACUUM_INDEXES_PROCESSED,
3010 : PROGRESS_VACUUM_MODE
3011 : };
3012 48630 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
3013 :
3014 48630 : VacuumFailsafeActive = true;
3015 :
3016 : /*
3017 : * Abandon use of a buffer access strategy to allow use of all of
3018 : * shared buffers. We assume the caller who allocated the memory for
3019 : * the BufferAccessStrategy will free it.
3020 : */
3021 48630 : vacrel->bstrategy = NULL;
3022 :
3023 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
3024 48630 : vacrel->do_index_vacuuming = false;
3025 48630 : vacrel->do_index_cleanup = false;
3026 48630 : vacrel->do_rel_truncate = false;
3027 :
3028 : /* Reset the progress counters and set the failsafe mode */
3029 48630 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
3030 :
3031 48630 : ereport(WARNING,
3032 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3033 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3034 : vacrel->num_index_scans),
3035 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3036 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3037 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3038 :
3039 : /* Stop applying cost limits from this point on */
3040 48630 : VacuumCostActive = false;
3041 48630 : VacuumCostBalance = 0;
3042 :
3043 48630 : return true;
3044 : }
3045 :
3046 132450 : return false;
3047 : }
3048 :
3049 : /*
3050 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3051 : */
3052 : static void
3053 121704 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3054 : {
3055 121704 : double reltuples = vacrel->new_rel_tuples;
3056 121704 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3057 121704 : const int progress_start_index[] = {
3058 : PROGRESS_VACUUM_PHASE,
3059 : PROGRESS_VACUUM_INDEXES_TOTAL
3060 : };
3061 121704 : const int progress_end_index[] = {
3062 : PROGRESS_VACUUM_INDEXES_TOTAL,
3063 : PROGRESS_VACUUM_INDEXES_PROCESSED
3064 : };
3065 : int64 progress_start_val[2];
3066 121704 : int64 progress_end_val[2] = {0, 0};
3067 :
3068 : Assert(vacrel->do_index_cleanup);
3069 : Assert(vacrel->nindexes > 0);
3070 :
3071 : /*
3072 : * Report that we are now cleaning up indexes and the number of indexes to
3073 : * cleanup.
3074 : */
3075 121704 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3076 121704 : progress_start_val[1] = vacrel->nindexes;
3077 121704 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3078 :
3079 121704 : if (!ParallelVacuumIsActive(vacrel))
3080 : {
3081 313104 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3082 : {
3083 191434 : Relation indrel = vacrel->indrels[idx];
3084 191434 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3085 :
3086 382868 : vacrel->indstats[idx] =
3087 191434 : lazy_cleanup_one_index(indrel, istat, reltuples,
3088 : estimated_count, vacrel);
3089 :
3090 : /* Report the number of indexes cleaned up */
3091 191434 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3092 191434 : idx + 1);
3093 : }
3094 : }
3095 : else
3096 : {
3097 : /* Outsource everything to parallel variant */
3098 34 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3099 : vacrel->num_index_scans,
3100 : estimated_count);
3101 : }
3102 :
3103 : /* Reset the progress counters */
3104 121704 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3105 121704 : }
3106 :
3107 : /*
3108 : * lazy_vacuum_one_index() -- vacuum index relation.
3109 : *
3110 : * Delete all the index tuples containing a TID collected in
3111 : * vacrel->dead_items. Also update running statistics. Exact
3112 : * details depend on index AM's ambulkdelete routine.
3113 : *
3114 : * reltuples is the number of heap tuples to be passed to the
3115 : * bulkdelete callback. It's always assumed to be estimated.
3116 : * See indexam.sgml for more info.
3117 : *
3118 : * Returns bulk delete stats derived from input stats
3119 : */
3120 : static IndexBulkDeleteResult *
3121 2624 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3122 : double reltuples, LVRelState *vacrel)
3123 : {
3124 : IndexVacuumInfo ivinfo;
3125 : LVSavedErrInfo saved_err_info;
3126 :
3127 2624 : ivinfo.index = indrel;
3128 2624 : ivinfo.heaprel = vacrel->rel;
3129 2624 : ivinfo.analyze_only = false;
3130 2624 : ivinfo.report_progress = false;
3131 2624 : ivinfo.estimated_count = true;
3132 2624 : ivinfo.message_level = DEBUG2;
3133 2624 : ivinfo.num_heap_tuples = reltuples;
3134 2624 : ivinfo.strategy = vacrel->bstrategy;
3135 :
3136 : /*
3137 : * Update error traceback information.
3138 : *
3139 : * The index name is saved during this phase and restored immediately
3140 : * after this phase. See vacuum_error_callback.
3141 : */
3142 : Assert(vacrel->indname == NULL);
3143 2624 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3144 2624 : update_vacuum_error_info(vacrel, &saved_err_info,
3145 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3146 : InvalidBlockNumber, InvalidOffsetNumber);
3147 :
3148 : /* Do bulk deletion */
3149 2624 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3150 : vacrel->dead_items_info);
3151 :
3152 : /* Revert to the previous phase information for error traceback */
3153 2624 : restore_vacuum_error_info(vacrel, &saved_err_info);
3154 2624 : pfree(vacrel->indname);
3155 2624 : vacrel->indname = NULL;
3156 :
3157 2624 : return istat;
3158 : }
3159 :
3160 : /*
3161 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3162 : *
3163 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3164 : * of heap tuples and estimated_count is true if reltuples is an
3165 : * estimated value. See indexam.sgml for more info.
3166 : *
3167 : * Returns bulk delete stats derived from input stats
3168 : */
3169 : static IndexBulkDeleteResult *
3170 191434 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3171 : double reltuples, bool estimated_count,
3172 : LVRelState *vacrel)
3173 : {
3174 : IndexVacuumInfo ivinfo;
3175 : LVSavedErrInfo saved_err_info;
3176 :
3177 191434 : ivinfo.index = indrel;
3178 191434 : ivinfo.heaprel = vacrel->rel;
3179 191434 : ivinfo.analyze_only = false;
3180 191434 : ivinfo.report_progress = false;
3181 191434 : ivinfo.estimated_count = estimated_count;
3182 191434 : ivinfo.message_level = DEBUG2;
3183 :
3184 191434 : ivinfo.num_heap_tuples = reltuples;
3185 191434 : ivinfo.strategy = vacrel->bstrategy;
3186 :
3187 : /*
3188 : * Update error traceback information.
3189 : *
3190 : * The index name is saved during this phase and restored immediately
3191 : * after this phase. See vacuum_error_callback.
3192 : */
3193 : Assert(vacrel->indname == NULL);
3194 191434 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3195 191434 : update_vacuum_error_info(vacrel, &saved_err_info,
3196 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3197 : InvalidBlockNumber, InvalidOffsetNumber);
3198 :
3199 191434 : istat = vac_cleanup_one_index(&ivinfo, istat);
3200 :
3201 : /* Revert to the previous phase information for error traceback */
3202 191434 : restore_vacuum_error_info(vacrel, &saved_err_info);
3203 191434 : pfree(vacrel->indname);
3204 191434 : vacrel->indname = NULL;
3205 :
3206 191434 : return istat;
3207 : }
3208 :
3209 : /*
3210 : * should_attempt_truncation - should we attempt to truncate the heap?
3211 : *
3212 : * Don't even think about it unless we have a shot at releasing a goodly
3213 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3214 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3215 : * be particularly disruptive.
3216 : *
3217 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3218 : * system might be refusing to allocate new XIDs at this point. The system
3219 : * definitely won't return to normal unless and until VACUUM actually advances
3220 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3221 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3222 : * truncate the table under these circumstances, an XID exhaustion error might
3223 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3224 : * There is very little chance of truncation working out when the failsafe is
3225 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3226 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3227 : * we're called.
3228 : */
3229 : static bool
3230 177078 : should_attempt_truncation(LVRelState *vacrel)
3231 : {
3232 : BlockNumber possibly_freeable;
3233 :
3234 177078 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3235 48920 : return false;
3236 :
3237 128158 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3238 128158 : if (possibly_freeable > 0 &&
3239 336 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3240 336 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3241 306 : return true;
3242 :
3243 127852 : return false;
3244 : }
3245 :
3246 : /*
3247 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3248 : */
3249 : static void
3250 306 : lazy_truncate_heap(LVRelState *vacrel)
3251 : {
3252 306 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3253 : BlockNumber new_rel_pages;
3254 : bool lock_waiter_detected;
3255 : int lock_retry;
3256 :
3257 : /* Report that we are now truncating */
3258 306 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3259 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3260 :
3261 : /* Update error traceback information one last time */
3262 306 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3263 : vacrel->nonempty_pages, InvalidOffsetNumber);
3264 :
3265 : /*
3266 : * Loop until no more truncating can be done.
3267 : */
3268 : do
3269 : {
3270 : /*
3271 : * We need full exclusive lock on the relation in order to do
3272 : * truncation. If we can't get it, give up rather than waiting --- we
3273 : * don't want to block other backends, and we don't want to deadlock
3274 : * (which is quite possible considering we already hold a lower-grade
3275 : * lock).
3276 : */
3277 306 : lock_waiter_detected = false;
3278 306 : lock_retry = 0;
3279 : while (true)
3280 : {
3281 706 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3282 302 : break;
3283 :
3284 : /*
3285 : * Check for interrupts while trying to (re-)acquire the exclusive
3286 : * lock.
3287 : */
3288 404 : CHECK_FOR_INTERRUPTS();
3289 :
3290 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3291 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3292 : {
3293 : /*
3294 : * We failed to establish the lock in the specified number of
3295 : * retries. This means we give up truncating.
3296 : */
3297 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
3298 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3299 : vacrel->relname)));
3300 6 : return;
3301 : }
3302 :
3303 400 : (void) WaitLatch(MyLatch,
3304 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3305 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3306 : WAIT_EVENT_VACUUM_TRUNCATE);
3307 400 : ResetLatch(MyLatch);
3308 : }
3309 :
3310 : /*
3311 : * Now that we have exclusive lock, look to see if the rel has grown
3312 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3313 : * the newly added pages presumably contain non-deletable tuples.
3314 : */
3315 302 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3316 302 : if (new_rel_pages != orig_rel_pages)
3317 : {
3318 : /*
3319 : * Note: we intentionally don't update vacrel->rel_pages with the
3320 : * new rel size here. If we did, it would amount to assuming that
3321 : * the new pages are empty, which is unlikely. Leaving the numbers
3322 : * alone amounts to assuming that the new pages have the same
3323 : * tuple density as existing ones, which is less unlikely.
3324 : */
3325 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3326 0 : return;
3327 : }
3328 :
3329 : /*
3330 : * Scan backwards from the end to verify that the end pages actually
3331 : * contain no tuples. This is *necessary*, not optional, because
3332 : * other backends could have added tuples to these pages whilst we
3333 : * were vacuuming.
3334 : */
3335 302 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3336 302 : vacrel->blkno = new_rel_pages;
3337 :
3338 302 : if (new_rel_pages >= orig_rel_pages)
3339 : {
3340 : /* can't do anything after all */
3341 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3342 2 : return;
3343 : }
3344 :
3345 : /*
3346 : * Okay to truncate.
3347 : */
3348 300 : RelationTruncate(vacrel->rel, new_rel_pages);
3349 :
3350 : /*
3351 : * We can release the exclusive lock as soon as we have truncated.
3352 : * Other backends can't safely access the relation until they have
3353 : * processed the smgr invalidation that smgrtruncate sent out ... but
3354 : * that should happen as part of standard invalidation processing once
3355 : * they acquire lock on the relation.
3356 : */
3357 300 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3358 :
3359 : /*
3360 : * Update statistics. Here, it *is* correct to adjust rel_pages
3361 : * without also touching reltuples, since the tuple count wasn't
3362 : * changed by the truncation.
3363 : */
3364 300 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3365 300 : vacrel->rel_pages = new_rel_pages;
3366 :
3367 300 : ereport(vacrel->verbose ? INFO : DEBUG2,
3368 : (errmsg("table \"%s\": truncated %u to %u pages",
3369 : vacrel->relname,
3370 : orig_rel_pages, new_rel_pages)));
3371 300 : orig_rel_pages = new_rel_pages;
3372 300 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3373 : }
3374 :
3375 : /*
3376 : * Rescan end pages to verify that they are (still) empty of tuples.
3377 : *
3378 : * Returns number of nondeletable pages (last nonempty page + 1).
3379 : */
3380 : static BlockNumber
3381 302 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3382 : {
3383 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3384 : "prefetch size must be power of 2");
3385 :
3386 : BlockNumber blkno;
3387 : BlockNumber prefetchedUntil;
3388 : instr_time starttime;
3389 :
3390 : /* Initialize the starttime if we check for conflicting lock requests */
3391 302 : INSTR_TIME_SET_CURRENT(starttime);
3392 :
3393 : /*
3394 : * Start checking blocks at what we believe relation end to be and move
3395 : * backwards. (Strange coding of loop control is needed because blkno is
3396 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3397 : * in forward direction, so that OS-level readahead can kick in.
3398 : */
3399 302 : blkno = vacrel->rel_pages;
3400 302 : prefetchedUntil = InvalidBlockNumber;
3401 5336 : while (blkno > vacrel->nonempty_pages)
3402 : {
3403 : Buffer buf;
3404 : Page page;
3405 : OffsetNumber offnum,
3406 : maxoff;
3407 : bool hastup;
3408 :
3409 : /*
3410 : * Check if another process requests a lock on our relation. We are
3411 : * holding an AccessExclusiveLock here, so they will be waiting. We
3412 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3413 : * only check if that interval has elapsed once every 32 blocks to
3414 : * keep the number of system calls and actual shared lock table
3415 : * lookups to a minimum.
3416 : */
3417 5038 : if ((blkno % 32) == 0)
3418 : {
3419 : instr_time currenttime;
3420 : instr_time elapsed;
3421 :
3422 164 : INSTR_TIME_SET_CURRENT(currenttime);
3423 164 : elapsed = currenttime;
3424 164 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3425 164 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3426 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3427 : {
3428 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3429 : {
3430 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3431 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3432 : vacrel->relname)));
3433 :
3434 0 : *lock_waiter_detected = true;
3435 0 : return blkno;
3436 : }
3437 0 : starttime = currenttime;
3438 : }
3439 : }
3440 :
3441 : /*
3442 : * We don't insert a vacuum delay point here, because we have an
3443 : * exclusive lock on the table which we want to hold for as short a
3444 : * time as possible. We still need to check for interrupts however.
3445 : */
3446 5038 : CHECK_FOR_INTERRUPTS();
3447 :
3448 5038 : blkno--;
3449 :
3450 : /* If we haven't prefetched this lot yet, do so now. */
3451 5038 : if (prefetchedUntil > blkno)
3452 : {
3453 : BlockNumber prefetchStart;
3454 : BlockNumber pblkno;
3455 :
3456 418 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3457 7288 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3458 : {
3459 6870 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3460 6870 : CHECK_FOR_INTERRUPTS();
3461 : }
3462 418 : prefetchedUntil = prefetchStart;
3463 : }
3464 :
3465 5038 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3466 : vacrel->bstrategy);
3467 :
3468 : /* In this phase we only need shared access to the buffer */
3469 5038 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3470 :
3471 5038 : page = BufferGetPage(buf);
3472 :
3473 5038 : if (PageIsNew(page) || PageIsEmpty(page))
3474 : {
3475 2382 : UnlockReleaseBuffer(buf);
3476 2382 : continue;
3477 : }
3478 :
3479 2656 : hastup = false;
3480 2656 : maxoff = PageGetMaxOffsetNumber(page);
3481 2656 : for (offnum = FirstOffsetNumber;
3482 5308 : offnum <= maxoff;
3483 2652 : offnum = OffsetNumberNext(offnum))
3484 : {
3485 : ItemId itemid;
3486 :
3487 2656 : itemid = PageGetItemId(page, offnum);
3488 :
3489 : /*
3490 : * Note: any non-unused item should be taken as a reason to keep
3491 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3492 : * we must not have cleaned out its index entries.
3493 : */
3494 2656 : if (ItemIdIsUsed(itemid))
3495 : {
3496 4 : hastup = true;
3497 4 : break; /* can stop scanning */
3498 : }
3499 : } /* scan along page */
3500 :
3501 2656 : UnlockReleaseBuffer(buf);
3502 :
3503 : /* Done scanning if we found a tuple here */
3504 2656 : if (hastup)
3505 4 : return blkno + 1;
3506 : }
3507 :
3508 : /*
3509 : * If we fall out of the loop, all the previously-thought-to-be-empty
3510 : * pages still are; we need not bother to look at the last known-nonempty
3511 : * page.
3512 : */
3513 298 : return vacrel->nonempty_pages;
3514 : }
3515 :
3516 : /*
3517 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3518 : * shared memory). Sets both in vacrel for caller.
3519 : *
3520 : * Also handles parallel initialization as part of allocating dead_items in
3521 : * DSM when required.
3522 : */
3523 : static void
3524 177078 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3525 : {
3526 : VacDeadItemsInfo *dead_items_info;
3527 503720 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3528 149564 : autovacuum_work_mem != -1 ?
3529 326642 : autovacuum_work_mem : maintenance_work_mem;
3530 :
3531 : /*
3532 : * Initialize state for a parallel vacuum. As of now, only one worker can
3533 : * be used for an index, so we invoke parallelism only if there are at
3534 : * least two indexes on a table.
3535 : */
3536 177078 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3537 : {
3538 : /*
3539 : * Since parallel workers cannot access data in temporary tables, we
3540 : * can't perform parallel vacuum on them.
3541 : */
3542 10970 : if (RelationUsesLocalBuffers(vacrel->rel))
3543 : {
3544 : /*
3545 : * Give warning only if the user explicitly tries to perform a
3546 : * parallel vacuum on the temporary table.
3547 : */
3548 6 : if (nworkers > 0)
3549 6 : ereport(WARNING,
3550 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3551 : vacrel->relname)));
3552 : }
3553 : else
3554 10964 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3555 : vacrel->nindexes, nworkers,
3556 : vac_work_mem,
3557 10964 : vacrel->verbose ? INFO : DEBUG2,
3558 : vacrel->bstrategy);
3559 :
3560 : /*
3561 : * If parallel mode started, dead_items and dead_items_info spaces are
3562 : * allocated in DSM.
3563 : */
3564 10970 : if (ParallelVacuumIsActive(vacrel))
3565 : {
3566 34 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3567 : &vacrel->dead_items_info);
3568 34 : return;
3569 : }
3570 : }
3571 :
3572 : /*
3573 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3574 : * locally.
3575 : */
3576 :
3577 177044 : dead_items_info = palloc_object(VacDeadItemsInfo);
3578 177044 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3579 177044 : dead_items_info->num_items = 0;
3580 177044 : vacrel->dead_items_info = dead_items_info;
3581 :
3582 177044 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3583 : }
3584 :
3585 : /*
3586 : * Add the given block number and offset numbers to dead_items.
3587 : */
3588 : static void
3589 38226 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3590 : int num_offsets)
3591 : {
3592 38226 : const int prog_index[2] = {
3593 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3594 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3595 : };
3596 : int64 prog_val[2];
3597 :
3598 38226 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3599 38226 : vacrel->dead_items_info->num_items += num_offsets;
3600 :
3601 : /* update the progress information */
3602 38226 : prog_val[0] = vacrel->dead_items_info->num_items;
3603 38226 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3604 38226 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3605 38226 : }
3606 :
3607 : /*
3608 : * Forget all collected dead items.
3609 : */
3610 : static void
3611 1396 : dead_items_reset(LVRelState *vacrel)
3612 : {
3613 : /* Update statistics for dead items */
3614 1396 : vacrel->num_dead_items_resets++;
3615 1396 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3616 :
3617 1396 : if (ParallelVacuumIsActive(vacrel))
3618 : {
3619 10 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3620 10 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3621 : &vacrel->dead_items_info);
3622 10 : return;
3623 : }
3624 :
3625 : /* Recreate the tidstore with the same max_bytes limitation */
3626 1386 : TidStoreDestroy(vacrel->dead_items);
3627 1386 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3628 :
3629 : /* Reset the counter */
3630 1386 : vacrel->dead_items_info->num_items = 0;
3631 : }
3632 :
3633 : /*
3634 : * Perform cleanup for resources allocated in dead_items_alloc
3635 : */
3636 : static void
3637 177078 : dead_items_cleanup(LVRelState *vacrel)
3638 : {
3639 177078 : if (!ParallelVacuumIsActive(vacrel))
3640 : {
3641 : /* Don't bother with pfree here */
3642 177044 : return;
3643 : }
3644 :
3645 : /* End parallel mode */
3646 34 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3647 34 : vacrel->pvs = NULL;
3648 : }
3649 :
3650 : #ifdef USE_ASSERT_CHECKING
3651 :
3652 : /*
3653 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3654 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3655 : * reason not to use it outside of asserts.
3656 : */
3657 : static bool
3658 : heap_page_is_all_visible(Relation rel, Buffer buf,
3659 : TransactionId OldestXmin,
3660 : bool *all_frozen,
3661 : TransactionId *visibility_cutoff_xid,
3662 : OffsetNumber *logging_offnum)
3663 : {
3664 :
3665 : return heap_page_would_be_all_visible(rel, buf,
3666 : OldestXmin,
3667 : NULL, 0,
3668 : all_frozen,
3669 : visibility_cutoff_xid,
3670 : logging_offnum);
3671 : }
3672 : #endif
3673 :
3674 : /*
3675 : * Check whether the heap page in buf is all-visible except for the dead
3676 : * tuples referenced in the deadoffsets array.
3677 : *
3678 : * Vacuum uses this to check if a page would become all-visible after reaping
3679 : * known dead tuples. This function does not remove the dead items.
3680 : *
3681 : * This cannot be called in a critical section, as the visibility checks may
3682 : * perform IO and allocate memory.
3683 : *
3684 : * Returns true if the page is all-visible other than the provided
3685 : * deadoffsets and false otherwise.
3686 : *
3687 : * OldestXmin is used to determine visibility.
3688 : *
3689 : * Output parameters:
3690 : *
3691 : * - *all_frozen: true if every tuple on the page is frozen
3692 : * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3693 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3694 : * used by vacuum's error callback system.
3695 : *
3696 : * Callers looking to verify that the page is already all-visible can call
3697 : * heap_page_is_all_visible().
3698 : *
3699 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3700 : * If you modify this function, ensure consistency with that code. An
3701 : * assertion cross-checks that both remain in agreement. Do not introduce new
3702 : * side-effects.
3703 : */
3704 : static bool
3705 33584 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3706 : TransactionId OldestXmin,
3707 : OffsetNumber *deadoffsets,
3708 : int ndeadoffsets,
3709 : bool *all_frozen,
3710 : TransactionId *visibility_cutoff_xid,
3711 : OffsetNumber *logging_offnum)
3712 : {
3713 33584 : Page page = BufferGetPage(buf);
3714 33584 : BlockNumber blockno = BufferGetBlockNumber(buf);
3715 : OffsetNumber offnum,
3716 : maxoff;
3717 33584 : bool all_visible = true;
3718 33584 : int matched_dead_count = 0;
3719 :
3720 33584 : *visibility_cutoff_xid = InvalidTransactionId;
3721 33584 : *all_frozen = true;
3722 :
3723 : Assert(ndeadoffsets == 0 || deadoffsets);
3724 :
3725 : #ifdef USE_ASSERT_CHECKING
3726 : /* Confirm input deadoffsets[] is strictly sorted */
3727 : if (ndeadoffsets > 1)
3728 : {
3729 : for (int i = 1; i < ndeadoffsets; i++)
3730 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3731 : }
3732 : #endif
3733 :
3734 33584 : maxoff = PageGetMaxOffsetNumber(page);
3735 33584 : for (offnum = FirstOffsetNumber;
3736 3073372 : offnum <= maxoff && all_visible;
3737 3039788 : offnum = OffsetNumberNext(offnum))
3738 : {
3739 : ItemId itemid;
3740 : HeapTupleData tuple;
3741 :
3742 : /*
3743 : * Set the offset number so that we can display it along with any
3744 : * error that occurred while processing this tuple.
3745 : */
3746 3039792 : *logging_offnum = offnum;
3747 3039792 : itemid = PageGetItemId(page, offnum);
3748 :
3749 : /* Unused or redirect line pointers are of no interest */
3750 3039792 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3751 2045022 : continue;
3752 :
3753 2938258 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3754 :
3755 : /*
3756 : * Dead line pointers can have index pointers pointing to them. So
3757 : * they can't be treated as visible
3758 : */
3759 2938258 : if (ItemIdIsDead(itemid))
3760 : {
3761 1943492 : if (!deadoffsets ||
3762 1943490 : matched_dead_count >= ndeadoffsets ||
3763 1943490 : deadoffsets[matched_dead_count] != offnum)
3764 : {
3765 4 : *all_frozen = all_visible = false;
3766 4 : break;
3767 : }
3768 1943488 : matched_dead_count++;
3769 1943488 : continue;
3770 : }
3771 :
3772 : Assert(ItemIdIsNormal(itemid));
3773 :
3774 994766 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3775 994766 : tuple.t_len = ItemIdGetLength(itemid);
3776 994766 : tuple.t_tableOid = RelationGetRelid(rel);
3777 :
3778 : /* Visibility checks may do IO or allocate memory */
3779 : Assert(CritSectionCount == 0);
3780 994766 : switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3781 : {
3782 994708 : case HEAPTUPLE_LIVE:
3783 : {
3784 : TransactionId xmin;
3785 :
3786 : /* Check comments in lazy_scan_prune. */
3787 994708 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3788 : {
3789 0 : all_visible = false;
3790 0 : *all_frozen = false;
3791 0 : break;
3792 : }
3793 :
3794 : /*
3795 : * The inserter definitely committed. But is it old enough
3796 : * that everyone sees it as committed?
3797 : */
3798 994708 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3799 994708 : if (!TransactionIdPrecedes(xmin, OldestXmin))
3800 : {
3801 54 : all_visible = false;
3802 54 : *all_frozen = false;
3803 54 : break;
3804 : }
3805 :
3806 : /* Track newest xmin on page. */
3807 994654 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3808 : TransactionIdIsNormal(xmin))
3809 33352 : *visibility_cutoff_xid = xmin;
3810 :
3811 : /* Check whether this tuple is already frozen or not */
3812 1288360 : if (all_visible && *all_frozen &&
3813 293706 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3814 7882 : *all_frozen = false;
3815 : }
3816 994654 : break;
3817 :
3818 58 : case HEAPTUPLE_DEAD:
3819 : case HEAPTUPLE_RECENTLY_DEAD:
3820 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3821 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3822 : {
3823 58 : all_visible = false;
3824 58 : *all_frozen = false;
3825 58 : break;
3826 : }
3827 0 : default:
3828 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3829 : break;
3830 : }
3831 : } /* scan along page */
3832 :
3833 : /* Clear the offset information once we have processed the given page. */
3834 33584 : *logging_offnum = InvalidOffsetNumber;
3835 :
3836 33584 : return all_visible;
3837 : }
3838 :
3839 : /*
3840 : * Update index statistics in pg_class if the statistics are accurate.
3841 : */
3842 : static void
3843 128188 : update_relstats_all_indexes(LVRelState *vacrel)
3844 : {
3845 128188 : Relation *indrels = vacrel->indrels;
3846 128188 : int nindexes = vacrel->nindexes;
3847 128188 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3848 :
3849 : Assert(vacrel->do_index_cleanup);
3850 :
3851 319744 : for (int idx = 0; idx < nindexes; idx++)
3852 : {
3853 191556 : Relation indrel = indrels[idx];
3854 191556 : IndexBulkDeleteResult *istat = indstats[idx];
3855 :
3856 191556 : if (istat == NULL || istat->estimated_count)
3857 188654 : continue;
3858 :
3859 : /* Update index statistics */
3860 2902 : vac_update_relstats(indrel,
3861 : istat->num_pages,
3862 : istat->num_index_tuples,
3863 : 0, 0,
3864 : false,
3865 : InvalidTransactionId,
3866 : InvalidMultiXactId,
3867 : NULL, NULL, false);
3868 : }
3869 128188 : }
3870 :
3871 : /*
3872 : * Error context callback for errors occurring during vacuum. The error
3873 : * context messages for index phases should match the messages set in parallel
3874 : * vacuum. If you change this function for those phases, change
3875 : * parallel_vacuum_error_callback() as well.
3876 : */
3877 : static void
3878 162538 : vacuum_error_callback(void *arg)
3879 : {
3880 162538 : LVRelState *errinfo = arg;
3881 :
3882 162538 : switch (errinfo->phase)
3883 : {
3884 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3885 0 : if (BlockNumberIsValid(errinfo->blkno))
3886 : {
3887 0 : if (OffsetNumberIsValid(errinfo->offnum))
3888 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3889 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3890 : else
3891 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3892 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3893 : }
3894 : else
3895 0 : errcontext("while scanning relation \"%s.%s\"",
3896 : errinfo->relnamespace, errinfo->relname);
3897 0 : break;
3898 :
3899 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3900 0 : if (BlockNumberIsValid(errinfo->blkno))
3901 : {
3902 0 : if (OffsetNumberIsValid(errinfo->offnum))
3903 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3904 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3905 : else
3906 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3907 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3908 : }
3909 : else
3910 0 : errcontext("while vacuuming relation \"%s.%s\"",
3911 : errinfo->relnamespace, errinfo->relname);
3912 0 : break;
3913 :
3914 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3915 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3916 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3917 0 : break;
3918 :
3919 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3920 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3921 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3922 0 : break;
3923 :
3924 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3925 6 : if (BlockNumberIsValid(errinfo->blkno))
3926 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3927 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3928 6 : break;
3929 :
3930 162532 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3931 : default:
3932 162532 : return; /* do nothing; the errinfo may not be
3933 : * initialized */
3934 : }
3935 : }
3936 :
3937 : /*
3938 : * Updates the information required for vacuum error callback. This also saves
3939 : * the current information which can be later restored via restore_vacuum_error_info.
3940 : */
3941 : static void
3942 936470 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3943 : int phase, BlockNumber blkno, OffsetNumber offnum)
3944 : {
3945 936470 : if (saved_vacrel)
3946 : {
3947 229010 : saved_vacrel->offnum = vacrel->offnum;
3948 229010 : saved_vacrel->blkno = vacrel->blkno;
3949 229010 : saved_vacrel->phase = vacrel->phase;
3950 : }
3951 :
3952 936470 : vacrel->blkno = blkno;
3953 936470 : vacrel->offnum = offnum;
3954 936470 : vacrel->phase = phase;
3955 936470 : }
3956 :
3957 : /*
3958 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3959 : */
3960 : static void
3961 229010 : restore_vacuum_error_info(LVRelState *vacrel,
3962 : const LVSavedErrInfo *saved_vacrel)
3963 : {
3964 229010 : vacrel->blkno = saved_vacrel->blkno;
3965 229010 : vacrel->offnum = saved_vacrel->offnum;
3966 229010 : vacrel->phase = saved_vacrel->phase;
3967 229010 : }
|