Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include "access/genam.h"
133 : #include "access/heapam.h"
134 : #include "access/htup_details.h"
135 : #include "access/multixact.h"
136 : #include "access/tidstore.h"
137 : #include "access/transam.h"
138 : #include "access/visibilitymap.h"
139 : #include "access/xloginsert.h"
140 : #include "catalog/storage.h"
141 : #include "commands/progress.h"
142 : #include "commands/vacuum.h"
143 : #include "common/int.h"
144 : #include "common/pg_prng.h"
145 : #include "executor/instrument.h"
146 : #include "miscadmin.h"
147 : #include "pgstat.h"
148 : #include "portability/instr_time.h"
149 : #include "postmaster/autovacuum.h"
150 : #include "storage/bufmgr.h"
151 : #include "storage/freespace.h"
152 : #include "storage/latch.h"
153 : #include "storage/lmgr.h"
154 : #include "storage/read_stream.h"
155 : #include "utils/lsyscache.h"
156 : #include "utils/pg_rusage.h"
157 : #include "utils/timestamp.h"
158 :
159 :
160 : /*
161 : * Space/time tradeoff parameters: do these need to be user-tunable?
162 : *
163 : * To consider truncating the relation, we want there to be at least
164 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
165 : * is less) potentially-freeable pages.
166 : */
167 : #define REL_TRUNCATE_MINIMUM 1000
168 : #define REL_TRUNCATE_FRACTION 16
169 :
170 : /*
171 : * Timing parameters for truncate locking heuristics.
172 : *
173 : * These were not exposed as user tunable GUC values because it didn't seem
174 : * that the potential for improvement was great enough to merit the cost of
175 : * supporting them.
176 : */
177 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
178 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
179 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
180 :
181 : /*
182 : * Threshold that controls whether we bypass index vacuuming and heap
183 : * vacuuming as an optimization
184 : */
185 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
186 :
187 : /*
188 : * Perform a failsafe check each time we scan another 4GB of pages.
189 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
190 : */
191 : #define FAILSAFE_EVERY_PAGES \
192 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
193 :
194 : /*
195 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
196 : * (it won't be exact because we only vacuum FSM after processing a heap page
197 : * that has some removable tuples). When there are indexes, this is ignored,
198 : * and we vacuum FSM after each index/heap cleaning pass.
199 : */
200 : #define VACUUM_FSM_EVERY_PAGES \
201 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
202 :
203 : /*
204 : * Before we consider skipping a page that's marked as clean in
205 : * visibility map, we must've seen at least this many clean pages.
206 : */
207 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
208 :
209 : /*
210 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
211 : * Needs to be a power of 2.
212 : */
213 : #define PREFETCH_SIZE ((BlockNumber) 32)
214 :
215 : /*
216 : * Macro to check if we are in a parallel vacuum. If true, we are in the
217 : * parallel mode and the DSM segment is initialized.
218 : */
219 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
220 :
221 : /* Phases of vacuum during which we report error context. */
222 : typedef enum
223 : {
224 : VACUUM_ERRCB_PHASE_UNKNOWN,
225 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
226 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
227 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
228 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
229 : VACUUM_ERRCB_PHASE_TRUNCATE,
230 : } VacErrPhase;
231 :
232 : /*
233 : * An eager scan of a page that is set all-frozen in the VM is considered
234 : * "successful". To spread out freezing overhead across multiple normal
235 : * vacuums, we limit the number of successful eager page freezes. The maximum
236 : * number of eager page freezes is calculated as a ratio of the all-visible
237 : * but not all-frozen pages at the beginning of the vacuum.
238 : */
239 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
240 :
241 : /*
242 : * On the assumption that different regions of the table tend to have
243 : * similarly aged data, once vacuum fails to freeze
244 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
245 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
246 : * to another region of the table with potentially older data.
247 : */
248 : #define EAGER_SCAN_REGION_SIZE 4096
249 :
250 : typedef struct LVRelState
251 : {
252 : /* Target heap relation and its indexes */
253 : Relation rel;
254 : Relation *indrels;
255 : int nindexes;
256 :
257 : /* Buffer access strategy and parallel vacuum state */
258 : BufferAccessStrategy bstrategy;
259 : ParallelVacuumState *pvs;
260 :
261 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
262 : bool aggressive;
263 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
264 : bool skipwithvm;
265 : /* Consider index vacuuming bypass optimization? */
266 : bool consider_bypass_optimization;
267 :
268 : /* Doing index vacuuming, index cleanup, rel truncation? */
269 : bool do_index_vacuuming;
270 : bool do_index_cleanup;
271 : bool do_rel_truncate;
272 :
273 : /* VACUUM operation's cutoffs for freezing and pruning */
274 : struct VacuumCutoffs cutoffs;
275 : GlobalVisState *vistest;
276 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
277 : TransactionId NewRelfrozenXid;
278 : MultiXactId NewRelminMxid;
279 : bool skippedallvis;
280 :
281 : /* Error reporting state */
282 : char *dbname;
283 : char *relnamespace;
284 : char *relname;
285 : char *indname; /* Current index name */
286 : BlockNumber blkno; /* used only for heap operations */
287 : OffsetNumber offnum; /* used only for heap operations */
288 : VacErrPhase phase;
289 : bool verbose; /* VACUUM VERBOSE? */
290 :
291 : /*
292 : * dead_items stores TIDs whose index tuples are deleted by index
293 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
294 : * that has been processed by lazy_scan_prune. Also needed by
295 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
296 : * LP_UNUSED during second heap pass.
297 : *
298 : * Both dead_items and dead_items_info are allocated in shared memory in
299 : * parallel vacuum cases.
300 : */
301 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
302 : VacDeadItemsInfo *dead_items_info;
303 :
304 : BlockNumber rel_pages; /* total number of pages */
305 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
306 :
307 : /*
308 : * Count of all-visible blocks eagerly scanned (for logging only). This
309 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
310 : */
311 : BlockNumber eager_scanned_pages;
312 :
313 : BlockNumber removed_pages; /* # pages removed by relation truncation */
314 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
315 :
316 : /* # pages newly set all-visible in the VM */
317 : BlockNumber new_all_visible_pages;
318 :
319 : /*
320 : * # pages newly set all-visible and all-frozen in the VM. This is a
321 : * subset of new_all_visible_pages. That is, new_all_visible_pages
322 : * includes all pages set all-visible, but
323 : * new_all_visible_all_frozen_pages includes only those which were also
324 : * set all-frozen.
325 : */
326 : BlockNumber new_all_visible_all_frozen_pages;
327 :
328 : /* # all-visible pages newly set all-frozen in the VM */
329 : BlockNumber new_all_frozen_pages;
330 :
331 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
332 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
333 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
334 :
335 : /* Statistics output by us, for table */
336 : double new_rel_tuples; /* new estimated total # of tuples */
337 : double new_live_tuples; /* new estimated total # of live tuples */
338 : /* Statistics output by index AMs */
339 : IndexBulkDeleteResult **indstats;
340 :
341 : /* Instrumentation counters */
342 : int num_index_scans;
343 : int num_dead_items_resets;
344 : Size total_dead_items_bytes;
345 : /* Counters that follow are only for scanned_pages */
346 : int64 tuples_deleted; /* # deleted from table */
347 : int64 tuples_frozen; /* # newly frozen */
348 : int64 lpdead_items; /* # deleted from indexes */
349 : int64 live_tuples; /* # live tuples remaining */
350 : int64 recently_dead_tuples; /* # dead, but not yet removable */
351 : int64 missed_dead_tuples; /* # removable, but not removed */
352 :
353 : /* State maintained by heap_vac_scan_next_block() */
354 : BlockNumber current_block; /* last block returned */
355 : BlockNumber next_unskippable_block; /* next unskippable block */
356 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
357 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
358 :
359 : /* State related to managing eager scanning of all-visible pages */
360 :
361 : /*
362 : * A normal vacuum that has failed to freeze too many eagerly scanned
363 : * blocks in a region suspends eager scanning.
364 : * next_eager_scan_region_start is the block number of the first block
365 : * eligible for resumed eager scanning.
366 : *
367 : * When eager scanning is permanently disabled, either initially
368 : * (including for aggressive vacuum) or due to hitting the success cap,
369 : * this is set to InvalidBlockNumber.
370 : */
371 : BlockNumber next_eager_scan_region_start;
372 :
373 : /*
374 : * The remaining number of blocks a normal vacuum will consider eager
375 : * scanning when it is successful. When eager scanning is enabled, this is
376 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
377 : * all-visible but not all-frozen pages. For each eager freeze success,
378 : * this is decremented. Once it hits 0, eager scanning is permanently
379 : * disabled. It is initialized to 0 if eager scanning starts out disabled
380 : * (including for aggressive vacuum).
381 : */
382 : BlockNumber eager_scan_remaining_successes;
383 :
384 : /*
385 : * The maximum number of blocks which may be eagerly scanned and not
386 : * frozen before eager scanning is temporarily suspended. This is
387 : * configurable both globally, via the
388 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
389 : * storage parameter of the same name. It is calculated as
390 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
391 : * It is 0 when eager scanning is disabled.
392 : */
393 : BlockNumber eager_scan_max_fails_per_region;
394 :
395 : /*
396 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
397 : * age) in the current eager scan region. Vacuum resets it to
398 : * eager_scan_max_fails_per_region each time it enters a new region of the
399 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
400 : * suspended until the next region. It is also 0 if eager scanning has
401 : * been permanently disabled.
402 : */
403 : BlockNumber eager_scan_remaining_fails;
404 : } LVRelState;
405 :
406 :
407 : /* Struct for saving and restoring vacuum error information. */
408 : typedef struct LVSavedErrInfo
409 : {
410 : BlockNumber blkno;
411 : OffsetNumber offnum;
412 : VacErrPhase phase;
413 : } LVSavedErrInfo;
414 :
415 :
416 : /* non-export function prototypes */
417 : static void lazy_scan_heap(LVRelState *vacrel);
418 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
419 : const VacuumParams params);
420 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
421 : void *callback_private_data,
422 : void *per_buffer_data);
423 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
424 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
425 : BlockNumber blkno, Page page,
426 : bool sharelock, Buffer vmbuffer);
427 : static void identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
428 : BlockNumber heap_blk, Page heap_page,
429 : int nlpdead_items,
430 : Buffer vmbuffer,
431 : uint8 *vmbits);
432 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
433 : BlockNumber blkno, Page page,
434 : Buffer vmbuffer,
435 : bool *has_lpdead_items, bool *vm_page_frozen);
436 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
437 : BlockNumber blkno, Page page,
438 : bool *has_lpdead_items);
439 : static void lazy_vacuum(LVRelState *vacrel);
440 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
441 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
442 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
443 : Buffer buffer, OffsetNumber *deadoffsets,
444 : int num_offsets, Buffer vmbuffer);
445 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
446 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
447 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
448 : IndexBulkDeleteResult *istat,
449 : double reltuples,
450 : LVRelState *vacrel);
451 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
452 : IndexBulkDeleteResult *istat,
453 : double reltuples,
454 : bool estimated_count,
455 : LVRelState *vacrel);
456 : static bool should_attempt_truncation(LVRelState *vacrel);
457 : static void lazy_truncate_heap(LVRelState *vacrel);
458 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
459 : bool *lock_waiter_detected);
460 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
461 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
462 : int num_offsets);
463 : static void dead_items_reset(LVRelState *vacrel);
464 : static void dead_items_cleanup(LVRelState *vacrel);
465 :
466 : #ifdef USE_ASSERT_CHECKING
467 : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
468 : TransactionId OldestXmin,
469 : bool *all_frozen,
470 : TransactionId *visibility_cutoff_xid,
471 : OffsetNumber *logging_offnum);
472 : #endif
473 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
474 : TransactionId OldestXmin,
475 : OffsetNumber *deadoffsets,
476 : int ndeadoffsets,
477 : bool *all_frozen,
478 : TransactionId *visibility_cutoff_xid,
479 : OffsetNumber *logging_offnum);
480 : static void update_relstats_all_indexes(LVRelState *vacrel);
481 : static void vacuum_error_callback(void *arg);
482 : static void update_vacuum_error_info(LVRelState *vacrel,
483 : LVSavedErrInfo *saved_vacrel,
484 : int phase, BlockNumber blkno,
485 : OffsetNumber offnum);
486 : static void restore_vacuum_error_info(LVRelState *vacrel,
487 : const LVSavedErrInfo *saved_vacrel);
488 :
489 :
490 :
491 : /*
492 : * Helper to set up the eager scanning state for vacuuming a single relation.
493 : * Initializes the eager scan management related members of the LVRelState.
494 : *
495 : * Caller provides whether or not an aggressive vacuum is required due to
496 : * vacuum options or for relfrozenxid/relminmxid advancement.
497 : */
498 : static void
499 120792 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
500 : {
501 : uint32 randseed;
502 : BlockNumber allvisible;
503 : BlockNumber allfrozen;
504 : float first_region_ratio;
505 120792 : bool oldest_unfrozen_before_cutoff = false;
506 :
507 : /*
508 : * Initialize eager scan management fields to their disabled values.
509 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
510 : * of tables without sufficiently old tuples disable eager scanning.
511 : */
512 120792 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
513 120792 : vacrel->eager_scan_max_fails_per_region = 0;
514 120792 : vacrel->eager_scan_remaining_fails = 0;
515 120792 : vacrel->eager_scan_remaining_successes = 0;
516 :
517 : /* If eager scanning is explicitly disabled, just return. */
518 120792 : if (params.max_eager_freeze_failure_rate == 0)
519 120792 : return;
520 :
521 : /*
522 : * The caller will have determined whether or not an aggressive vacuum is
523 : * required by either the vacuum parameters or the relative age of the
524 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
525 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
526 : * so scans of all-visible pages are not considered eager.
527 : */
528 120792 : if (vacrel->aggressive)
529 115179 : return;
530 :
531 : /*
532 : * Aggressively vacuuming a small relation shouldn't take long, so it
533 : * isn't worth amortizing. We use two times the region size as the size
534 : * cutoff because the eager scan start block is a random spot somewhere in
535 : * the first region, making the second region the first to be eager
536 : * scanned normally.
537 : */
538 5613 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
539 5613 : return;
540 :
541 : /*
542 : * We only want to enable eager scanning if we are likely to be able to
543 : * freeze some of the pages in the relation.
544 : *
545 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
546 : * are technically freezable, but we won't freeze them unless the criteria
547 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
548 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
549 : *
550 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
551 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
552 : * enable eager scanning.
553 : */
554 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
555 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
556 : vacrel->cutoffs.FreezeLimit))
557 0 : oldest_unfrozen_before_cutoff = true;
558 :
559 0 : if (!oldest_unfrozen_before_cutoff &&
560 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
561 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
562 : vacrel->cutoffs.MultiXactCutoff))
563 0 : oldest_unfrozen_before_cutoff = true;
564 :
565 0 : if (!oldest_unfrozen_before_cutoff)
566 0 : return;
567 :
568 : /* We have met the criteria to eagerly scan some pages. */
569 :
570 : /*
571 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
572 : * all-visible but not all-frozen blocks in the relation.
573 : */
574 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
575 :
576 0 : vacrel->eager_scan_remaining_successes =
577 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
578 0 : (allvisible - allfrozen));
579 :
580 : /* If every all-visible page is frozen, eager scanning is disabled. */
581 0 : if (vacrel->eager_scan_remaining_successes == 0)
582 0 : return;
583 :
584 : /*
585 : * Now calculate the bounds of the first eager scan region. Its end block
586 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
587 : * blocks. This affects the bounds of all subsequent regions and avoids
588 : * eager scanning and failing to freeze the same blocks each vacuum of the
589 : * relation.
590 : */
591 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
592 :
593 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
594 :
595 : Assert(params.max_eager_freeze_failure_rate > 0 &&
596 : params.max_eager_freeze_failure_rate <= 1);
597 :
598 0 : vacrel->eager_scan_max_fails_per_region =
599 0 : params.max_eager_freeze_failure_rate *
600 : EAGER_SCAN_REGION_SIZE;
601 :
602 : /*
603 : * The first region will be smaller than subsequent regions. As such,
604 : * adjust the eager freeze failures tolerated for this region.
605 : */
606 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
607 : EAGER_SCAN_REGION_SIZE;
608 :
609 0 : vacrel->eager_scan_remaining_fails =
610 0 : vacrel->eager_scan_max_fails_per_region *
611 : first_region_ratio;
612 : }
613 :
614 : /*
615 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
616 : *
617 : * This routine sets things up for and then calls lazy_scan_heap, where
618 : * almost all work actually takes place. Finalizes everything after call
619 : * returns by managing relation truncation and updating rel's pg_class
620 : * entry. (Also updates pg_class entries for any indexes that need it.)
621 : *
622 : * At entry, we have already established a transaction and opened
623 : * and locked the relation.
624 : */
625 : void
626 120792 : heap_vacuum_rel(Relation rel, const VacuumParams params,
627 : BufferAccessStrategy bstrategy)
628 : {
629 : LVRelState *vacrel;
630 : bool verbose,
631 : instrument,
632 : skipwithvm,
633 : frozenxid_updated,
634 : minmulti_updated;
635 : BlockNumber orig_rel_pages,
636 : new_rel_pages,
637 : new_rel_allvisible,
638 : new_rel_allfrozen;
639 : PGRUsage ru0;
640 120792 : TimestampTz starttime = 0;
641 120792 : PgStat_Counter startreadtime = 0,
642 120792 : startwritetime = 0;
643 120792 : WalUsage startwalusage = pgWalUsage;
644 120792 : BufferUsage startbufferusage = pgBufferUsage;
645 : ErrorContextCallback errcallback;
646 120792 : char **indnames = NULL;
647 120792 : Size dead_items_max_bytes = 0;
648 :
649 120792 : verbose = (params.options & VACOPT_VERBOSE) != 0;
650 227724 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
651 106932 : params.log_vacuum_min_duration >= 0));
652 120792 : if (instrument)
653 : {
654 106944 : pg_rusage_init(&ru0);
655 106944 : if (track_io_timing)
656 : {
657 0 : startreadtime = pgStatBlockReadTime;
658 0 : startwritetime = pgStatBlockWriteTime;
659 : }
660 : }
661 :
662 : /* Used for instrumentation and stats report */
663 120792 : starttime = GetCurrentTimestamp();
664 :
665 120792 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
666 : RelationGetRelid(rel));
667 120792 : if (AmAutoVacuumWorkerProcess())
668 106932 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
669 106932 : params.is_wraparound
670 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
671 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
672 : else
673 13860 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
674 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
675 :
676 : /*
677 : * Setup error traceback support for ereport() first. The idea is to set
678 : * up an error context callback to display additional information on any
679 : * error during a vacuum. During different phases of vacuum, we update
680 : * the state so that the error context callback always display current
681 : * information.
682 : *
683 : * Copy the names of heap rel into local memory for error reporting
684 : * purposes, too. It isn't always safe to assume that we can get the name
685 : * of each rel. It's convenient for code in lazy_scan_heap to always use
686 : * these temp copies.
687 : */
688 120792 : vacrel = palloc0_object(LVRelState);
689 120792 : vacrel->dbname = get_database_name(MyDatabaseId);
690 120792 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
691 120792 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
692 120792 : vacrel->indname = NULL;
693 120792 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
694 120792 : vacrel->verbose = verbose;
695 120792 : errcallback.callback = vacuum_error_callback;
696 120792 : errcallback.arg = vacrel;
697 120792 : errcallback.previous = error_context_stack;
698 120792 : error_context_stack = &errcallback;
699 :
700 : /* Set up high level stuff about rel and its indexes */
701 120792 : vacrel->rel = rel;
702 120792 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
703 : &vacrel->indrels);
704 120792 : vacrel->bstrategy = bstrategy;
705 120792 : if (instrument && vacrel->nindexes > 0)
706 : {
707 : /* Copy index names used by instrumentation (not error reporting) */
708 102283 : indnames = palloc_array(char *, vacrel->nindexes);
709 263371 : for (int i = 0; i < vacrel->nindexes; i++)
710 161088 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
711 : }
712 :
713 : /*
714 : * The index_cleanup param either disables index vacuuming and cleanup or
715 : * forces it to go ahead when we would otherwise apply the index bypass
716 : * optimization. The default is 'auto', which leaves the final decision
717 : * up to lazy_vacuum().
718 : *
719 : * The truncate param allows user to avoid attempting relation truncation,
720 : * though it can't force truncation to happen.
721 : */
722 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
723 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
724 : params.truncate != VACOPTVALUE_AUTO);
725 :
726 : /*
727 : * While VacuumFailSafeActive is reset to false before calling this, we
728 : * still need to reset it here due to recursive calls.
729 : */
730 120792 : VacuumFailsafeActive = false;
731 120792 : vacrel->consider_bypass_optimization = true;
732 120792 : vacrel->do_index_vacuuming = true;
733 120792 : vacrel->do_index_cleanup = true;
734 120792 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
735 120792 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
736 : {
737 : /* Force disable index vacuuming up-front */
738 130 : vacrel->do_index_vacuuming = false;
739 130 : vacrel->do_index_cleanup = false;
740 : }
741 120662 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
742 : {
743 : /* Force index vacuuming. Note that failsafe can still bypass. */
744 15 : vacrel->consider_bypass_optimization = false;
745 : }
746 : else
747 : {
748 : /* Default/auto, make all decisions dynamically */
749 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
750 : }
751 :
752 : /* Initialize page counters explicitly (be tidy) */
753 120792 : vacrel->scanned_pages = 0;
754 120792 : vacrel->eager_scanned_pages = 0;
755 120792 : vacrel->removed_pages = 0;
756 120792 : vacrel->new_frozen_tuple_pages = 0;
757 120792 : vacrel->lpdead_item_pages = 0;
758 120792 : vacrel->missed_dead_pages = 0;
759 120792 : vacrel->nonempty_pages = 0;
760 : /* dead_items_alloc allocates vacrel->dead_items later on */
761 :
762 : /* Allocate/initialize output statistics state */
763 120792 : vacrel->new_rel_tuples = 0;
764 120792 : vacrel->new_live_tuples = 0;
765 120792 : vacrel->indstats = (IndexBulkDeleteResult **)
766 120792 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
767 :
768 : /* Initialize remaining counters (be tidy) */
769 120792 : vacrel->num_index_scans = 0;
770 120792 : vacrel->num_dead_items_resets = 0;
771 120792 : vacrel->total_dead_items_bytes = 0;
772 120792 : vacrel->tuples_deleted = 0;
773 120792 : vacrel->tuples_frozen = 0;
774 120792 : vacrel->lpdead_items = 0;
775 120792 : vacrel->live_tuples = 0;
776 120792 : vacrel->recently_dead_tuples = 0;
777 120792 : vacrel->missed_dead_tuples = 0;
778 :
779 120792 : vacrel->new_all_visible_pages = 0;
780 120792 : vacrel->new_all_visible_all_frozen_pages = 0;
781 120792 : vacrel->new_all_frozen_pages = 0;
782 :
783 : /*
784 : * Get cutoffs that determine which deleted tuples are considered DEAD,
785 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
786 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
787 : * happen in this order to ensure that the OldestXmin cutoff field works
788 : * as an upper bound on the XIDs stored in the pages we'll actually scan
789 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
790 : *
791 : * Next acquire vistest, a related cutoff that's used in pruning. We use
792 : * vistest in combination with OldestXmin to ensure that
793 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
794 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
795 : * whether a tuple should be frozen or removed. (In the future we might
796 : * want to teach lazy_scan_prune to recompute vistest from time to time,
797 : * to increase the number of dead tuples it can prune away.)
798 : */
799 120792 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
800 120792 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
801 120792 : vacrel->vistest = GlobalVisTestFor(rel);
802 :
803 : /* Initialize state used to track oldest extant XID/MXID */
804 120792 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
805 120792 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
806 :
807 : /*
808 : * Initialize state related to tracking all-visible page skipping. This is
809 : * very important to determine whether or not it is safe to advance the
810 : * relfrozenxid/relminmxid.
811 : */
812 120792 : vacrel->skippedallvis = false;
813 120792 : skipwithvm = true;
814 120792 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
815 : {
816 : /*
817 : * Force aggressive mode, and disable skipping blocks using the
818 : * visibility map (even those set all-frozen)
819 : */
820 172 : vacrel->aggressive = true;
821 172 : skipwithvm = false;
822 : }
823 :
824 120792 : vacrel->skipwithvm = skipwithvm;
825 :
826 : /*
827 : * Set up eager scan tracking state. This must happen after determining
828 : * whether or not the vacuum must be aggressive, because only normal
829 : * vacuums use the eager scan algorithm.
830 : */
831 120792 : heap_vacuum_eager_scan_setup(vacrel, params);
832 :
833 : /* Report the vacuum mode: 'normal' or 'aggressive' */
834 120792 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
835 120792 : vacrel->aggressive
836 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
837 : : PROGRESS_VACUUM_MODE_NORMAL);
838 :
839 120792 : if (verbose)
840 : {
841 12 : if (vacrel->aggressive)
842 1 : ereport(INFO,
843 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
844 : vacrel->dbname, vacrel->relnamespace,
845 : vacrel->relname)));
846 : else
847 11 : ereport(INFO,
848 : (errmsg("vacuuming \"%s.%s.%s\"",
849 : vacrel->dbname, vacrel->relnamespace,
850 : vacrel->relname)));
851 : }
852 :
853 : /*
854 : * Allocate dead_items memory using dead_items_alloc. This handles
855 : * parallel VACUUM initialization as part of allocating shared memory
856 : * space used for dead_items. (But do a failsafe precheck first, to
857 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
858 : * is already dangerously old.)
859 : */
860 120792 : lazy_check_wraparound_failsafe(vacrel);
861 120792 : dead_items_alloc(vacrel, params.nworkers);
862 :
863 : /*
864 : * Call lazy_scan_heap to perform all required heap pruning, index
865 : * vacuuming, and heap vacuuming (plus related processing)
866 : */
867 120792 : lazy_scan_heap(vacrel);
868 :
869 : /*
870 : * Save dead items max_bytes and update the memory usage statistics before
871 : * cleanup, they are freed in parallel vacuum cases during
872 : * dead_items_cleanup().
873 : */
874 120792 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
875 120792 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
876 :
877 : /*
878 : * Free resources managed by dead_items_alloc. This ends parallel mode in
879 : * passing when necessary.
880 : */
881 120792 : dead_items_cleanup(vacrel);
882 : Assert(!IsInParallelMode());
883 :
884 : /*
885 : * Update pg_class entries for each of rel's indexes where appropriate.
886 : *
887 : * Unlike the later update to rel's pg_class entry, this is not critical.
888 : * Maintains relpages/reltuples statistics used by the planner only.
889 : */
890 120792 : if (vacrel->do_index_cleanup)
891 99146 : update_relstats_all_indexes(vacrel);
892 :
893 : /* Done with rel's indexes */
894 120792 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
895 :
896 : /* Optionally truncate rel */
897 120792 : if (should_attempt_truncation(vacrel))
898 155 : lazy_truncate_heap(vacrel);
899 :
900 : /* Pop the error context stack */
901 120792 : error_context_stack = errcallback.previous;
902 :
903 : /* Report that we are now doing final cleanup */
904 120792 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
905 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
906 :
907 : /*
908 : * Prepare to update rel's pg_class entry.
909 : *
910 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
911 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
912 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
913 : */
914 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
915 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
916 : vacrel->cutoffs.relfrozenxid,
917 : vacrel->NewRelfrozenXid));
918 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
919 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
920 : vacrel->cutoffs.relminmxid,
921 : vacrel->NewRelminMxid));
922 120792 : if (vacrel->skippedallvis)
923 : {
924 : /*
925 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
926 : * chose to skip an all-visible page range. The state that tracks new
927 : * values will have missed unfrozen XIDs from the pages we skipped.
928 : */
929 : Assert(!vacrel->aggressive);
930 32 : vacrel->NewRelfrozenXid = InvalidTransactionId;
931 32 : vacrel->NewRelminMxid = InvalidMultiXactId;
932 : }
933 :
934 : /*
935 : * For safety, clamp relallvisible to be not more than what we're setting
936 : * pg_class.relpages to
937 : */
938 120792 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
939 120792 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
940 120792 : if (new_rel_allvisible > new_rel_pages)
941 0 : new_rel_allvisible = new_rel_pages;
942 :
943 : /*
944 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
945 : * all-frozen blocks to the count of all-visible blocks. This matches the
946 : * clamping of relallvisible above.
947 : */
948 120792 : if (new_rel_allfrozen > new_rel_allvisible)
949 0 : new_rel_allfrozen = new_rel_allvisible;
950 :
951 : /*
952 : * Now actually update rel's pg_class entry.
953 : *
954 : * In principle new_live_tuples could be -1 indicating that we (still)
955 : * don't know the tuple count. In practice that can't happen, since we
956 : * scan every page that isn't skipped using the visibility map.
957 : */
958 120792 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
959 : new_rel_allvisible, new_rel_allfrozen,
960 120792 : vacrel->nindexes > 0,
961 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
962 : &frozenxid_updated, &minmulti_updated, false);
963 :
964 : /*
965 : * Report results to the cumulative stats system, too.
966 : *
967 : * Deliberately avoid telling the stats system about LP_DEAD items that
968 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
969 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
970 : * It seems like a good idea to err on the side of not vacuuming again too
971 : * soon in cases where the failsafe prevented significant amounts of heap
972 : * vacuuming.
973 : */
974 120792 : pgstat_report_vacuum(rel,
975 48334 : Max(vacrel->new_live_tuples, 0),
976 120792 : vacrel->recently_dead_tuples +
977 120792 : vacrel->missed_dead_tuples,
978 : starttime);
979 120792 : pgstat_progress_end_command();
980 :
981 120792 : if (instrument)
982 : {
983 106944 : TimestampTz endtime = GetCurrentTimestamp();
984 :
985 107046 : if (verbose || params.log_vacuum_min_duration == 0 ||
986 102 : TimestampDifferenceExceeds(starttime, endtime,
987 102 : params.log_vacuum_min_duration))
988 : {
989 : long secs_dur;
990 : int usecs_dur;
991 : WalUsage walusage;
992 : BufferUsage bufferusage;
993 : StringInfoData buf;
994 : char *msgfmt;
995 : int32 diff;
996 106842 : double read_rate = 0,
997 106842 : write_rate = 0;
998 : int64 total_blks_hit;
999 : int64 total_blks_read;
1000 : int64 total_blks_dirtied;
1001 :
1002 106842 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1003 106842 : memset(&walusage, 0, sizeof(WalUsage));
1004 106842 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1005 106842 : memset(&bufferusage, 0, sizeof(BufferUsage));
1006 106842 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1007 :
1008 106842 : total_blks_hit = bufferusage.shared_blks_hit +
1009 106842 : bufferusage.local_blks_hit;
1010 106842 : total_blks_read = bufferusage.shared_blks_read +
1011 106842 : bufferusage.local_blks_read;
1012 106842 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1013 106842 : bufferusage.local_blks_dirtied;
1014 :
1015 106842 : initStringInfo(&buf);
1016 106842 : if (verbose)
1017 : {
1018 : /*
1019 : * Aggressiveness already reported earlier, in dedicated
1020 : * VACUUM VERBOSE ereport
1021 : */
1022 : Assert(!params.is_wraparound);
1023 12 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1024 : }
1025 106830 : else if (params.is_wraparound)
1026 : {
1027 : /*
1028 : * While it's possible for a VACUUM to be both is_wraparound
1029 : * and !aggressive, that's just a corner-case -- is_wraparound
1030 : * implies aggressive. Produce distinct output for the corner
1031 : * case all the same, just in case.
1032 : */
1033 106775 : if (vacrel->aggressive)
1034 106765 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 : else
1036 10 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 : }
1038 : else
1039 : {
1040 55 : if (vacrel->aggressive)
1041 16 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 : else
1043 39 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 : }
1045 106842 : appendStringInfo(&buf, msgfmt,
1046 : vacrel->dbname,
1047 : vacrel->relnamespace,
1048 : vacrel->relname,
1049 : vacrel->num_index_scans);
1050 149389 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1051 : vacrel->removed_pages,
1052 : new_rel_pages,
1053 : vacrel->scanned_pages,
1054 : orig_rel_pages == 0 ? 100.0 :
1055 42547 : 100.0 * vacrel->scanned_pages /
1056 : orig_rel_pages,
1057 : vacrel->eager_scanned_pages);
1058 106842 : appendStringInfo(&buf,
1059 106842 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1060 : vacrel->tuples_deleted,
1061 106842 : (int64) vacrel->new_rel_tuples,
1062 : vacrel->recently_dead_tuples);
1063 106842 : if (vacrel->missed_dead_tuples > 0)
1064 0 : appendStringInfo(&buf,
1065 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1066 : vacrel->missed_dead_tuples,
1067 : vacrel->missed_dead_pages);
1068 106842 : diff = (int32) (ReadNextTransactionId() -
1069 106842 : vacrel->cutoffs.OldestXmin);
1070 106842 : appendStringInfo(&buf,
1071 106842 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1072 : vacrel->cutoffs.OldestXmin, diff);
1073 106842 : if (frozenxid_updated)
1074 : {
1075 17612 : diff = (int32) (vacrel->NewRelfrozenXid -
1076 17612 : vacrel->cutoffs.relfrozenxid);
1077 17612 : appendStringInfo(&buf,
1078 17612 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1079 : vacrel->NewRelfrozenXid, diff);
1080 : }
1081 106842 : if (minmulti_updated)
1082 : {
1083 36 : diff = (int32) (vacrel->NewRelminMxid -
1084 36 : vacrel->cutoffs.relminmxid);
1085 36 : appendStringInfo(&buf,
1086 36 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1087 : vacrel->NewRelminMxid, diff);
1088 : }
1089 149389 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1090 : vacrel->new_frozen_tuple_pages,
1091 : orig_rel_pages == 0 ? 100.0 :
1092 42547 : 100.0 * vacrel->new_frozen_tuple_pages /
1093 : orig_rel_pages,
1094 : vacrel->tuples_frozen);
1095 :
1096 106842 : appendStringInfo(&buf,
1097 106842 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1098 : vacrel->new_all_visible_pages,
1099 106842 : vacrel->new_all_visible_all_frozen_pages +
1100 106842 : vacrel->new_all_frozen_pages,
1101 : vacrel->new_all_frozen_pages);
1102 106842 : if (vacrel->do_index_vacuuming)
1103 : {
1104 85434 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1105 85395 : appendStringInfoString(&buf, _("index scan not needed: "));
1106 : else
1107 39 : appendStringInfoString(&buf, _("index scan needed: "));
1108 :
1109 85434 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1110 : }
1111 : else
1112 : {
1113 21408 : if (!VacuumFailsafeActive)
1114 1 : appendStringInfoString(&buf, _("index scan bypassed: "));
1115 : else
1116 21407 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1117 :
1118 21408 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1119 : }
1120 149389 : appendStringInfo(&buf, msgfmt,
1121 : vacrel->lpdead_item_pages,
1122 : orig_rel_pages == 0 ? 100.0 :
1123 42547 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1124 : vacrel->lpdead_items);
1125 267745 : for (int i = 0; i < vacrel->nindexes; i++)
1126 : {
1127 160903 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1128 :
1129 160903 : if (!istat)
1130 160824 : continue;
1131 :
1132 79 : appendStringInfo(&buf,
1133 79 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1134 79 : indnames[i],
1135 : istat->num_pages,
1136 : istat->pages_newly_deleted,
1137 : istat->pages_deleted,
1138 : istat->pages_free);
1139 : }
1140 106842 : if (track_cost_delay_timing)
1141 : {
1142 : /*
1143 : * We bypass the changecount mechanism because this value is
1144 : * only updated by the calling process. We also rely on the
1145 : * above call to pgstat_progress_end_command() to not clear
1146 : * the st_progress_param array.
1147 : */
1148 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1149 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1150 : }
1151 106842 : if (track_io_timing)
1152 : {
1153 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1154 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1155 :
1156 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1157 : read_ms, write_ms);
1158 : }
1159 106842 : if (secs_dur > 0 || usecs_dur > 0)
1160 : {
1161 106842 : read_rate = (double) BLCKSZ * total_blks_read /
1162 106842 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 106842 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1164 106842 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1165 : }
1166 106842 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1167 : read_rate, write_rate);
1168 106842 : appendStringInfo(&buf,
1169 106842 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1170 : total_blks_hit,
1171 : total_blks_read,
1172 : total_blks_dirtied);
1173 106842 : appendStringInfo(&buf,
1174 106842 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1175 : walusage.wal_records,
1176 : walusage.wal_fpi,
1177 : walusage.wal_bytes,
1178 : walusage.wal_fpi_bytes,
1179 : walusage.wal_buffers_full);
1180 :
1181 : /*
1182 : * Report the dead items memory usage.
1183 : *
1184 : * The num_dead_items_resets counter increases when we reset the
1185 : * collected dead items, so the counter is non-zero if at least
1186 : * one dead items are collected, even if index vacuuming is
1187 : * disabled.
1188 : */
1189 106842 : appendStringInfo(&buf,
1190 106842 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1191 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1192 106842 : vacrel->num_dead_items_resets),
1193 106842 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1194 : vacrel->num_dead_items_resets,
1195 106842 : (double) dead_items_max_bytes / (1024 * 1024));
1196 106842 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1197 :
1198 106842 : ereport(verbose ? INFO : LOG,
1199 : (errmsg_internal("%s", buf.data)));
1200 106842 : pfree(buf.data);
1201 : }
1202 : }
1203 :
1204 : /* Cleanup index statistics and index names */
1205 301792 : for (int i = 0; i < vacrel->nindexes; i++)
1206 : {
1207 181000 : if (vacrel->indstats[i])
1208 1427 : pfree(vacrel->indstats[i]);
1209 :
1210 181000 : if (instrument)
1211 161088 : pfree(indnames[i]);
1212 : }
1213 120792 : }
1214 :
1215 : /*
1216 : * lazy_scan_heap() -- workhorse function for VACUUM
1217 : *
1218 : * This routine prunes each page in the heap, and considers the need to
1219 : * freeze remaining tuples with storage (not including pages that can be
1220 : * skipped using the visibility map). Also performs related maintenance
1221 : * of the FSM and visibility map. These steps all take place during an
1222 : * initial pass over the target heap relation.
1223 : *
1224 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1225 : * consists of deleting index tuples that point to LP_DEAD items left in
1226 : * heap pages following pruning. Earlier initial pass over the heap will
1227 : * have collected the TIDs whose index tuples need to be removed.
1228 : *
1229 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1230 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1231 : * as LP_UNUSED. This has to happen in a second, final pass over the
1232 : * heap, to preserve a basic invariant that all index AMs rely on: no
1233 : * extant index tuple can ever be allowed to contain a TID that points to
1234 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1235 : * recycling of line pointers to avoid index scans that get confused
1236 : * about which TID points to which tuple immediately after recycling.
1237 : * (Actually, this isn't a concern when target heap relation happens to
1238 : * have no indexes, which allows us to safely apply the one-pass strategy
1239 : * as an optimization).
1240 : *
1241 : * In practice we often have enough space to fit all TIDs, and so won't
1242 : * need to call lazy_vacuum more than once, after our initial pass over
1243 : * the heap has totally finished. Otherwise things are slightly more
1244 : * complicated: our "initial pass" over the heap applies only to those
1245 : * pages that were pruned before we needed to call lazy_vacuum, and our
1246 : * "final pass" over the heap only vacuums these same heap pages.
1247 : * However, we process indexes in full every time lazy_vacuum is called,
1248 : * which makes index processing very inefficient when memory is in short
1249 : * supply.
1250 : */
1251 : static void
1252 120792 : lazy_scan_heap(LVRelState *vacrel)
1253 : {
1254 : ReadStream *stream;
1255 120792 : BlockNumber rel_pages = vacrel->rel_pages,
1256 120792 : blkno = 0,
1257 120792 : next_fsm_block_to_vacuum = 0;
1258 120792 : BlockNumber orig_eager_scan_success_limit =
1259 : vacrel->eager_scan_remaining_successes; /* for logging */
1260 120792 : Buffer vmbuffer = InvalidBuffer;
1261 120792 : const int initprog_index[] = {
1262 : PROGRESS_VACUUM_PHASE,
1263 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1264 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1265 : };
1266 : int64 initprog_val[3];
1267 :
1268 : /* Report that we're scanning the heap, advertising total # of blocks */
1269 120792 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1270 120792 : initprog_val[1] = rel_pages;
1271 120792 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1272 120792 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1273 :
1274 : /* Initialize for the first heap_vac_scan_next_block() call */
1275 120792 : vacrel->current_block = InvalidBlockNumber;
1276 120792 : vacrel->next_unskippable_block = InvalidBlockNumber;
1277 120792 : vacrel->next_unskippable_eager_scanned = false;
1278 120792 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1279 :
1280 : /*
1281 : * Set up the read stream for vacuum's first pass through the heap.
1282 : *
1283 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1284 : * explicit work in heap_vac_scan_next_block.
1285 : */
1286 120792 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1287 : vacrel->bstrategy,
1288 : vacrel->rel,
1289 : MAIN_FORKNUM,
1290 : heap_vac_scan_next_block,
1291 : vacrel,
1292 : sizeof(bool));
1293 :
1294 : while (true)
1295 508717 : {
1296 : Buffer buf;
1297 : Page page;
1298 629509 : bool was_eager_scanned = false;
1299 629509 : int ndeleted = 0;
1300 : bool has_lpdead_items;
1301 629509 : void *per_buffer_data = NULL;
1302 629509 : bool vm_page_frozen = false;
1303 629509 : bool got_cleanup_lock = false;
1304 :
1305 629509 : vacuum_delay_point(false);
1306 :
1307 : /*
1308 : * Regularly check if wraparound failsafe should trigger.
1309 : *
1310 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1311 : * relfrozenxid might start to look dangerously old before we reach
1312 : * that point. This check also provides failsafe coverage for the
1313 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1314 : * param set to 'off'.
1315 : */
1316 629509 : if (vacrel->scanned_pages > 0 &&
1317 508717 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1318 0 : lazy_check_wraparound_failsafe(vacrel);
1319 :
1320 : /*
1321 : * Consider if we definitely have enough space to process TIDs on page
1322 : * already. If we are close to overrunning the available space for
1323 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1324 : * this page. However, let's force at least one page-worth of tuples
1325 : * to be stored as to ensure we do at least some work when the memory
1326 : * configured is so low that we run out before storing anything.
1327 : */
1328 629509 : if (vacrel->dead_items_info->num_items > 0 &&
1329 27344 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1330 : {
1331 : /*
1332 : * Before beginning index vacuuming, we release any pin we may
1333 : * hold on the visibility map page. This isn't necessary for
1334 : * correctness, but we do it anyway to avoid holding the pin
1335 : * across a lengthy, unrelated operation.
1336 : */
1337 7 : if (BufferIsValid(vmbuffer))
1338 : {
1339 7 : ReleaseBuffer(vmbuffer);
1340 7 : vmbuffer = InvalidBuffer;
1341 : }
1342 :
1343 : /* Perform a round of index and heap vacuuming */
1344 7 : vacrel->consider_bypass_optimization = false;
1345 7 : lazy_vacuum(vacrel);
1346 :
1347 : /*
1348 : * Vacuum the Free Space Map to make newly-freed space visible on
1349 : * upper-level FSM pages. Note that blkno is the previously
1350 : * processed block.
1351 : */
1352 7 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1353 : blkno + 1);
1354 7 : next_fsm_block_to_vacuum = blkno;
1355 :
1356 : /* Report that we are once again scanning the heap */
1357 7 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1358 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1359 : }
1360 :
1361 629509 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1362 :
1363 : /* The relation is exhausted. */
1364 629509 : if (!BufferIsValid(buf))
1365 120792 : break;
1366 :
1367 508717 : was_eager_scanned = *((bool *) per_buffer_data);
1368 508717 : CheckBufferIsPinnedOnce(buf);
1369 508717 : page = BufferGetPage(buf);
1370 508717 : blkno = BufferGetBlockNumber(buf);
1371 :
1372 508717 : vacrel->scanned_pages++;
1373 508717 : if (was_eager_scanned)
1374 0 : vacrel->eager_scanned_pages++;
1375 :
1376 : /* Report as block scanned, update error traceback information */
1377 508717 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1378 508717 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1379 : blkno, InvalidOffsetNumber);
1380 :
1381 : /*
1382 : * Pin the visibility map page in case we need to mark the page
1383 : * all-visible. In most cases this will be very cheap, because we'll
1384 : * already have the correct page pinned anyway.
1385 : */
1386 508717 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1387 :
1388 : /*
1389 : * We need a buffer cleanup lock to prune HOT chains and defragment
1390 : * the page in lazy_scan_prune. But when it's not possible to acquire
1391 : * a cleanup lock right away, we may be able to settle for reduced
1392 : * processing using lazy_scan_noprune.
1393 : */
1394 508717 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1395 :
1396 508717 : if (!got_cleanup_lock)
1397 142 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1398 :
1399 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1400 508717 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1401 508717 : vmbuffer))
1402 : {
1403 : /* Processed as new/empty page (lock and pin released) */
1404 980 : continue;
1405 : }
1406 :
1407 : /*
1408 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1409 : * items in the dead_items area for later vacuuming, count live and
1410 : * recently dead tuples for vacuum logging, and determine if this
1411 : * block could later be truncated. If we encounter any xid/mxids that
1412 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1413 : * for a cleanup lock and call lazy_scan_prune().
1414 : */
1415 507737 : if (!got_cleanup_lock &&
1416 142 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1417 : {
1418 : /*
1419 : * lazy_scan_noprune could not do all required processing. Wait
1420 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1421 : */
1422 : Assert(vacrel->aggressive);
1423 62 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1424 62 : LockBufferForCleanup(buf);
1425 62 : got_cleanup_lock = true;
1426 : }
1427 :
1428 : /*
1429 : * If we have a cleanup lock, we must now prune, freeze, and count
1430 : * tuples. We may have acquired the cleanup lock originally, or we may
1431 : * have gone back and acquired it after lazy_scan_noprune() returned
1432 : * false. Either way, the page hasn't been processed yet.
1433 : *
1434 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1435 : * recently_dead_tuples and live tuples for vacuum logging, determine
1436 : * if the block can later be truncated, and accumulate the details of
1437 : * remaining LP_DEAD line pointers on the page into dead_items. These
1438 : * dead items include those pruned by lazy_scan_prune() as well as
1439 : * line pointers previously marked LP_DEAD.
1440 : */
1441 507737 : if (got_cleanup_lock)
1442 507657 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1443 : vmbuffer,
1444 : &has_lpdead_items, &vm_page_frozen);
1445 :
1446 : /*
1447 : * Count an eagerly scanned page as a failure or a success.
1448 : *
1449 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1450 : * cleanup lock, we won't have frozen the page. However, we only count
1451 : * pages that were too new to require freezing as eager freeze
1452 : * failures.
1453 : *
1454 : * We could gather more information from lazy_scan_noprune() about
1455 : * whether or not there were tuples with XIDs or MXIDs older than the
1456 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1457 : * exclude pages skipped due to cleanup lock contention from eager
1458 : * freeze algorithm caps.
1459 : */
1460 507737 : if (got_cleanup_lock && was_eager_scanned)
1461 : {
1462 : /* Aggressive vacuums do not eager scan. */
1463 : Assert(!vacrel->aggressive);
1464 :
1465 0 : if (vm_page_frozen)
1466 : {
1467 0 : if (vacrel->eager_scan_remaining_successes > 0)
1468 0 : vacrel->eager_scan_remaining_successes--;
1469 :
1470 0 : if (vacrel->eager_scan_remaining_successes == 0)
1471 : {
1472 : /*
1473 : * Report only once that we disabled eager scanning. We
1474 : * may eagerly read ahead blocks in excess of the success
1475 : * or failure caps before attempting to freeze them, so we
1476 : * could reach here even after disabling additional eager
1477 : * scanning.
1478 : */
1479 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1480 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1481 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1482 : orig_eager_scan_success_limit,
1483 : vacrel->dbname, vacrel->relnamespace,
1484 : vacrel->relname)));
1485 :
1486 : /*
1487 : * If we hit our success cap, permanently disable eager
1488 : * scanning by setting the other eager scan management
1489 : * fields to their disabled values.
1490 : */
1491 0 : vacrel->eager_scan_remaining_fails = 0;
1492 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1493 0 : vacrel->eager_scan_max_fails_per_region = 0;
1494 : }
1495 : }
1496 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1497 0 : vacrel->eager_scan_remaining_fails--;
1498 : }
1499 :
1500 : /*
1501 : * Now drop the buffer lock and, potentially, update the FSM.
1502 : *
1503 : * Our goal is to update the freespace map the last time we touch the
1504 : * page. If we'll process a block in the second pass, we may free up
1505 : * additional space on the page, so it is better to update the FSM
1506 : * after the second pass. If the relation has no indexes, or if index
1507 : * vacuuming is disabled, there will be no second heap pass; if this
1508 : * particular page has no dead items, the second heap pass will not
1509 : * touch this page. So, in those cases, update the FSM now.
1510 : *
1511 : * Note: In corner cases, it's possible to miss updating the FSM
1512 : * entirely. If index vacuuming is currently enabled, we'll skip the
1513 : * FSM update now. But if failsafe mode is later activated, or there
1514 : * are so few dead tuples that index vacuuming is bypassed, there will
1515 : * also be no opportunity to update the FSM later, because we'll never
1516 : * revisit this page. Since updating the FSM is desirable but not
1517 : * absolutely required, that's OK.
1518 : */
1519 507737 : if (vacrel->nindexes == 0
1520 490154 : || !vacrel->do_index_vacuuming
1521 417484 : || !has_lpdead_items)
1522 493062 : {
1523 493062 : Size freespace = PageGetHeapFreeSpace(page);
1524 :
1525 493062 : UnlockReleaseBuffer(buf);
1526 493062 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1527 :
1528 : /*
1529 : * Periodically perform FSM vacuuming to make newly-freed space
1530 : * visible on upper FSM pages. This is done after vacuuming if the
1531 : * table has indexes. There will only be newly-freed space if we
1532 : * held the cleanup lock and lazy_scan_prune() was called.
1533 : */
1534 493062 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1535 455 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1536 : {
1537 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1538 : blkno);
1539 0 : next_fsm_block_to_vacuum = blkno;
1540 : }
1541 : }
1542 : else
1543 14675 : UnlockReleaseBuffer(buf);
1544 : }
1545 :
1546 120792 : vacrel->blkno = InvalidBlockNumber;
1547 120792 : if (BufferIsValid(vmbuffer))
1548 48417 : ReleaseBuffer(vmbuffer);
1549 :
1550 : /*
1551 : * Report that everything is now scanned. We never skip scanning the last
1552 : * block in the relation, so we can pass rel_pages here.
1553 : */
1554 120792 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1555 : rel_pages);
1556 :
1557 : /* now we can compute the new value for pg_class.reltuples */
1558 241584 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1559 : vacrel->scanned_pages,
1560 120792 : vacrel->live_tuples);
1561 :
1562 : /*
1563 : * Also compute the total number of surviving heap entries. In the
1564 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1565 : */
1566 120792 : vacrel->new_rel_tuples =
1567 120792 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1568 120792 : vacrel->missed_dead_tuples;
1569 :
1570 120792 : read_stream_end(stream);
1571 :
1572 : /*
1573 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1574 : * related heap vacuuming
1575 : */
1576 120792 : if (vacrel->dead_items_info->num_items > 0)
1577 685 : lazy_vacuum(vacrel);
1578 :
1579 : /*
1580 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1581 : * not there were indexes, and whether or not we bypassed index vacuuming.
1582 : * We can pass rel_pages here because we never skip scanning the last
1583 : * block of the relation.
1584 : */
1585 120792 : if (rel_pages > next_fsm_block_to_vacuum)
1586 48418 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1587 :
1588 : /* report all blocks vacuumed */
1589 120792 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1590 :
1591 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1592 120792 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1593 94364 : lazy_cleanup_all_indexes(vacrel);
1594 120792 : }
1595 :
1596 : /*
1597 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1598 : * for vacuum to process
1599 : *
1600 : * Every time lazy_scan_heap() needs a new block to process during its first
1601 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1602 : * heap_vac_scan_next_block() to get the next block.
1603 : *
1604 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1605 : * various thresholds to skip blocks which do not need to be processed and
1606 : * returns the next block to process or InvalidBlockNumber if there are no
1607 : * remaining blocks.
1608 : *
1609 : * The visibility status of the next block to process and whether or not it
1610 : * was eager scanned is set in the per_buffer_data.
1611 : *
1612 : * callback_private_data contains a reference to the LVRelState, passed to the
1613 : * read stream API during stream setup. The LVRelState is an in/out parameter
1614 : * here (locally named `vacrel`). Vacuum options and information about the
1615 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1616 : * that's all-visible but not all-frozen (to ensure that we don't update
1617 : * relfrozenxid in that case). vacrel also holds information about the next
1618 : * unskippable block -- as bookkeeping for this function.
1619 : */
1620 : static BlockNumber
1621 629509 : heap_vac_scan_next_block(ReadStream *stream,
1622 : void *callback_private_data,
1623 : void *per_buffer_data)
1624 : {
1625 : BlockNumber next_block;
1626 629509 : LVRelState *vacrel = callback_private_data;
1627 :
1628 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1629 629509 : next_block = vacrel->current_block + 1;
1630 :
1631 : /* Have we reached the end of the relation? */
1632 629509 : if (next_block >= vacrel->rel_pages)
1633 : {
1634 120792 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1635 : {
1636 46937 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1637 46937 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1638 : }
1639 120792 : return InvalidBlockNumber;
1640 : }
1641 :
1642 : /*
1643 : * We must be in one of the three following states:
1644 : */
1645 508717 : if (next_block > vacrel->next_unskippable_block ||
1646 198464 : vacrel->next_unskippable_block == InvalidBlockNumber)
1647 : {
1648 : /*
1649 : * 1. We have just processed an unskippable block (or we're at the
1650 : * beginning of the scan). Find the next unskippable block using the
1651 : * visibility map.
1652 : */
1653 : bool skipsallvis;
1654 :
1655 358671 : find_next_unskippable_block(vacrel, &skipsallvis);
1656 :
1657 : /*
1658 : * We now know the next block that we must process. It can be the
1659 : * next block after the one we just processed, or something further
1660 : * ahead. If it's further ahead, we can jump to it, but we choose to
1661 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1662 : * pages. Since we're reading sequentially, the OS should be doing
1663 : * readahead for us, so there's no gain in skipping a page now and
1664 : * then. Skipping such a range might even discourage sequential
1665 : * detection.
1666 : *
1667 : * This test also enables more frequent relfrozenxid advancement
1668 : * during non-aggressive VACUUMs. If the range has any all-visible
1669 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1670 : * real downside.
1671 : */
1672 358671 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1673 : {
1674 4321 : next_block = vacrel->next_unskippable_block;
1675 4321 : if (skipsallvis)
1676 33 : vacrel->skippedallvis = true;
1677 : }
1678 : }
1679 :
1680 : /* Now we must be in one of the two remaining states: */
1681 508717 : if (next_block < vacrel->next_unskippable_block)
1682 : {
1683 : /*
1684 : * 2. We are processing a range of blocks that we could have skipped
1685 : * but chose not to. We know that they are all-visible in the VM,
1686 : * otherwise they would've been unskippable.
1687 : */
1688 150046 : vacrel->current_block = next_block;
1689 : /* Block was not eager scanned */
1690 150046 : *((bool *) per_buffer_data) = false;
1691 150046 : return vacrel->current_block;
1692 : }
1693 : else
1694 : {
1695 : /*
1696 : * 3. We reached the next unskippable block. Process it. On next
1697 : * iteration, we will be back in state 1.
1698 : */
1699 : Assert(next_block == vacrel->next_unskippable_block);
1700 :
1701 358671 : vacrel->current_block = next_block;
1702 358671 : *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1703 358671 : return vacrel->current_block;
1704 : }
1705 : }
1706 :
1707 : /*
1708 : * Find the next unskippable block in a vacuum scan using the visibility map.
1709 : * The next unskippable block and its visibility information is updated in
1710 : * vacrel.
1711 : *
1712 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1713 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1714 : * was concurrently cleared, though. All that matters is that caller scan all
1715 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1716 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1717 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1718 : * to skip such a range is actually made, making everything safe.)
1719 : */
1720 : static void
1721 358671 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1722 : {
1723 358671 : BlockNumber rel_pages = vacrel->rel_pages;
1724 358671 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1725 358671 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1726 358671 : bool next_unskippable_eager_scanned = false;
1727 :
1728 358671 : *skipsallvis = false;
1729 :
1730 461380 : for (;; next_unskippable_block++)
1731 461380 : {
1732 820051 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1733 : next_unskippable_block,
1734 : &next_unskippable_vmbuffer);
1735 :
1736 :
1737 : /*
1738 : * At the start of each eager scan region, normal vacuums with eager
1739 : * scanning enabled reset the failure counter, allowing vacuum to
1740 : * resume eager scanning if it had been suspended in the previous
1741 : * region.
1742 : */
1743 820051 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1744 : {
1745 0 : vacrel->eager_scan_remaining_fails =
1746 0 : vacrel->eager_scan_max_fails_per_region;
1747 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1748 : }
1749 :
1750 : /*
1751 : * A block is unskippable if it is not all visible according to the
1752 : * visibility map.
1753 : */
1754 820051 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1755 : {
1756 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1757 314538 : break;
1758 : }
1759 :
1760 : /*
1761 : * Caller must scan the last page to determine whether it has tuples
1762 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1763 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1764 : * lock on rel to attempt a truncation that fails anyway, just because
1765 : * there are tuples on the last page (it is likely that there will be
1766 : * tuples on other nearby pages as well, but those can be skipped).
1767 : *
1768 : * Implement this by always treating the last block as unsafe to skip.
1769 : */
1770 505513 : if (next_unskippable_block == rel_pages - 1)
1771 43722 : break;
1772 :
1773 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1774 461791 : if (!vacrel->skipwithvm)
1775 408 : break;
1776 :
1777 : /*
1778 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1779 : * already frozen by now), so this page can be skipped.
1780 : */
1781 461383 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1782 458280 : continue;
1783 :
1784 : /*
1785 : * Aggressive vacuums cannot skip any all-visible pages that are not
1786 : * also all-frozen.
1787 : */
1788 3103 : if (vacrel->aggressive)
1789 3 : break;
1790 :
1791 : /*
1792 : * Normal vacuums with eager scanning enabled only skip all-visible
1793 : * but not all-frozen pages if they have hit the failure limit for the
1794 : * current eager scan region.
1795 : */
1796 3100 : if (vacrel->eager_scan_remaining_fails > 0)
1797 : {
1798 0 : next_unskippable_eager_scanned = true;
1799 0 : break;
1800 : }
1801 :
1802 : /*
1803 : * All-visible blocks are safe to skip in a normal vacuum. But
1804 : * remember that the final range contains such a block for later.
1805 : */
1806 3100 : *skipsallvis = true;
1807 : }
1808 :
1809 : /* write the local variables back to vacrel */
1810 358671 : vacrel->next_unskippable_block = next_unskippable_block;
1811 358671 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1812 358671 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1813 358671 : }
1814 :
1815 : /*
1816 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1817 : *
1818 : * Must call here to handle both new and empty pages before calling
1819 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1820 : * with new or empty pages.
1821 : *
1822 : * It's necessary to consider new pages as a special case, since the rules for
1823 : * maintaining the visibility map and FSM with empty pages are a little
1824 : * different (though new pages can be truncated away during rel truncation).
1825 : *
1826 : * Empty pages are not really a special case -- they're just heap pages that
1827 : * have no allocated tuples (including even LP_UNUSED items). You might
1828 : * wonder why we need to handle them here all the same. It's only necessary
1829 : * because of a corner-case involving a hard crash during heap relation
1830 : * extension. If we ever make relation-extension crash safe, then it should
1831 : * no longer be necessary to deal with empty pages here (or new pages, for
1832 : * that matter).
1833 : *
1834 : * Caller must hold at least a shared lock. We might need to escalate the
1835 : * lock in that case, so the type of lock caller holds needs to be specified
1836 : * using 'sharelock' argument.
1837 : *
1838 : * Returns false in common case where caller should go on to call
1839 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1840 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1841 : * behalf.
1842 : *
1843 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1844 : * is passed here because neither empty nor new pages can be eagerly frozen.
1845 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1846 : * the same time that they are set all-visible, and we don't eagerly scan
1847 : * frozen pages.
1848 : */
1849 : static bool
1850 508717 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1851 : Page page, bool sharelock, Buffer vmbuffer)
1852 : {
1853 : Size freespace;
1854 :
1855 508717 : if (PageIsNew(page))
1856 : {
1857 : /*
1858 : * All-zeroes pages can be left over if either a backend extends the
1859 : * relation by a single page, but crashes before the newly initialized
1860 : * page has been written out, or when bulk-extending the relation
1861 : * (which creates a number of empty pages at the tail end of the
1862 : * relation), and then enters them into the FSM.
1863 : *
1864 : * Note we do not enter the page into the visibilitymap. That has the
1865 : * downside that we repeatedly visit this page in subsequent vacuums,
1866 : * but otherwise we'll never discover the space on a promoted standby.
1867 : * The harm of repeated checking ought to normally not be too bad. The
1868 : * space usually should be used at some point, otherwise there
1869 : * wouldn't be any regular vacuums.
1870 : *
1871 : * Make sure these pages are in the FSM, to ensure they can be reused.
1872 : * Do that by testing if there's any space recorded for the page. If
1873 : * not, enter it. We do so after releasing the lock on the heap page,
1874 : * the FSM is approximate, after all.
1875 : */
1876 953 : UnlockReleaseBuffer(buf);
1877 :
1878 953 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1879 : {
1880 475 : freespace = BLCKSZ - SizeOfPageHeaderData;
1881 :
1882 475 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1883 : }
1884 :
1885 953 : return true;
1886 : }
1887 :
1888 507764 : if (PageIsEmpty(page))
1889 : {
1890 : /*
1891 : * It seems likely that caller will always be able to get a cleanup
1892 : * lock on an empty page. But don't take any chances -- escalate to
1893 : * an exclusive lock (still don't need a cleanup lock, though).
1894 : */
1895 27 : if (sharelock)
1896 : {
1897 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1898 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1899 :
1900 0 : if (!PageIsEmpty(page))
1901 : {
1902 : /* page isn't new or empty -- keep lock and pin for now */
1903 0 : return false;
1904 : }
1905 : }
1906 : else
1907 : {
1908 : /* Already have a full cleanup lock (which is more than enough) */
1909 : }
1910 :
1911 : /*
1912 : * Unlike new pages, empty pages are always set all-visible and
1913 : * all-frozen.
1914 : */
1915 27 : if (!PageIsAllVisible(page))
1916 : {
1917 0 : START_CRIT_SECTION();
1918 :
1919 : /* mark buffer dirty before writing a WAL record */
1920 0 : MarkBufferDirty(buf);
1921 :
1922 : /*
1923 : * It's possible that another backend has extended the heap,
1924 : * initialized the page, and then failed to WAL-log the page due
1925 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1926 : * might try to replay our record setting the page all-visible and
1927 : * find that the page isn't initialized, which will cause a PANIC.
1928 : * To prevent that, check whether the page has been previously
1929 : * WAL-logged, and if not, do that now.
1930 : */
1931 0 : if (RelationNeedsWAL(vacrel->rel) &&
1932 0 : !XLogRecPtrIsValid(PageGetLSN(page)))
1933 0 : log_newpage_buffer(buf, true);
1934 :
1935 0 : PageSetAllVisible(page);
1936 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1937 : InvalidXLogRecPtr,
1938 : vmbuffer, InvalidTransactionId,
1939 : VISIBILITYMAP_ALL_VISIBLE |
1940 : VISIBILITYMAP_ALL_FROZEN);
1941 0 : END_CRIT_SECTION();
1942 :
1943 : /* Count the newly all-frozen pages for logging */
1944 0 : vacrel->new_all_visible_pages++;
1945 0 : vacrel->new_all_visible_all_frozen_pages++;
1946 : }
1947 :
1948 27 : freespace = PageGetHeapFreeSpace(page);
1949 27 : UnlockReleaseBuffer(buf);
1950 27 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1951 27 : return true;
1952 : }
1953 :
1954 : /* page isn't new or empty -- keep lock and pin */
1955 507737 : return false;
1956 : }
1957 :
1958 : /* qsort comparator for sorting OffsetNumbers */
1959 : static int
1960 3519308 : cmpOffsetNumbers(const void *a, const void *b)
1961 : {
1962 3519308 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1963 : }
1964 :
1965 : /*
1966 : * Helper to correct any corruption detected on a heap page and its
1967 : * corresponding visibility map page after pruning but before setting the
1968 : * visibility map. It examines the heap page, the associated VM page, and the
1969 : * number of dead items previously identified.
1970 : *
1971 : * This function must be called while holding an exclusive lock on the heap
1972 : * buffer, and the dead items must have been discovered under that same lock.
1973 :
1974 : * The provided vmbits must reflect the current state of the VM block
1975 : * referenced by vmbuffer. Although we do not hold a lock on the VM buffer, it
1976 : * is pinned, and the heap buffer is exclusively locked, ensuring that no
1977 : * other backend can update the VM bits corresponding to this heap page.
1978 : *
1979 : * If it clears corruption, it will zero out vmbits.
1980 : */
1981 : static void
1982 507657 : identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
1983 : BlockNumber heap_blk, Page heap_page,
1984 : int nlpdead_items,
1985 : Buffer vmbuffer,
1986 : uint8 *vmbits)
1987 : {
1988 : Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1989 :
1990 : Assert(BufferIsLockedByMeInMode(heap_buffer, BUFFER_LOCK_EXCLUSIVE));
1991 :
1992 : /*
1993 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1994 : * page-level bit is clear. However, it's possible that the bit got
1995 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1996 : * with buffer lock before concluding that the VM is corrupt.
1997 : */
1998 507657 : if (!PageIsAllVisible(heap_page) &&
1999 313577 : ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
2000 : {
2001 0 : ereport(WARNING,
2002 : (errcode(ERRCODE_DATA_CORRUPTED),
2003 : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2004 : RelationGetRelationName(rel), heap_blk)));
2005 :
2006 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2007 : VISIBILITYMAP_VALID_BITS);
2008 0 : *vmbits = 0;
2009 : }
2010 :
2011 : /*
2012 : * It's possible for the value returned by
2013 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2014 : * wrong for us to see tuples that appear to not be visible to everyone
2015 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2016 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2017 : * conservative and sometimes returns a value that's unnecessarily small,
2018 : * so if we see that contradiction it just means that the tuples that we
2019 : * think are not visible to everyone yet actually are, and the
2020 : * PD_ALL_VISIBLE flag is correct.
2021 : *
2022 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2023 : * however.
2024 : */
2025 507657 : else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2026 : {
2027 0 : ereport(WARNING,
2028 : (errcode(ERRCODE_DATA_CORRUPTED),
2029 : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2030 : RelationGetRelationName(rel), heap_blk)));
2031 :
2032 0 : PageClearAllVisible(heap_page);
2033 0 : MarkBufferDirty(heap_buffer);
2034 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2035 : VISIBILITYMAP_VALID_BITS);
2036 0 : *vmbits = 0;
2037 : }
2038 507657 : }
2039 :
2040 : /*
2041 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2042 : *
2043 : * Caller must hold pin and buffer cleanup lock on the buffer.
2044 : *
2045 : * vmbuffer is the buffer containing the VM block with visibility information
2046 : * for the heap block, blkno.
2047 : *
2048 : * *has_lpdead_items is set to true or false depending on whether, upon return
2049 : * from this function, any LP_DEAD items are still present on the page.
2050 : *
2051 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2052 : * VM. The caller currently only uses this for determining whether an eagerly
2053 : * scanned page was successfully set all-frozen.
2054 : *
2055 : * Returns the number of tuples deleted from the page during HOT pruning.
2056 : */
2057 : static int
2058 507657 : lazy_scan_prune(LVRelState *vacrel,
2059 : Buffer buf,
2060 : BlockNumber blkno,
2061 : Page page,
2062 : Buffer vmbuffer,
2063 : bool *has_lpdead_items,
2064 : bool *vm_page_frozen)
2065 : {
2066 507657 : Relation rel = vacrel->rel;
2067 : PruneFreezeResult presult;
2068 507657 : PruneFreezeParams params = {
2069 : .relation = rel,
2070 : .buffer = buf,
2071 : .reason = PRUNE_VACUUM_SCAN,
2072 : .options = HEAP_PAGE_PRUNE_FREEZE,
2073 507657 : .vistest = vacrel->vistest,
2074 507657 : .cutoffs = &vacrel->cutoffs,
2075 : };
2076 507657 : uint8 old_vmbits = 0;
2077 507657 : uint8 new_vmbits = 0;
2078 :
2079 : Assert(BufferGetBlockNumber(buf) == blkno);
2080 :
2081 : /*
2082 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2083 : *
2084 : * If the relation has no indexes, we can immediately mark would-be dead
2085 : * items LP_UNUSED.
2086 : *
2087 : * The number of tuples removed from the page is returned in
2088 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2089 : * presult.lpdead_items's final value can be thought of as the number of
2090 : * tuples that were deleted from indexes.
2091 : *
2092 : * We will update the VM after collecting LP_DEAD items and freezing
2093 : * tuples. Pruning will have determined whether or not the page is
2094 : * all-visible.
2095 : */
2096 507657 : if (vacrel->nindexes == 0)
2097 17583 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2098 :
2099 507657 : heap_page_prune_and_freeze(¶ms,
2100 : &presult,
2101 : &vacrel->offnum,
2102 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2103 :
2104 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2105 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2106 :
2107 507657 : if (presult.nfrozen > 0)
2108 : {
2109 : /*
2110 : * We don't increment the new_frozen_tuple_pages instrumentation
2111 : * counter when nfrozen == 0, since it only counts pages with newly
2112 : * frozen tuples (don't confuse that with pages newly set all-frozen
2113 : * in VM).
2114 : */
2115 22833 : vacrel->new_frozen_tuple_pages++;
2116 : }
2117 :
2118 : /*
2119 : * VACUUM will call heap_page_is_all_visible() during the second pass over
2120 : * the heap to determine all_visible and all_frozen for the page -- this
2121 : * is a specialized version of the logic from this function. Now that
2122 : * we've finished pruning and freezing, make sure that we're in total
2123 : * agreement with heap_page_is_all_visible() using an assertion.
2124 : */
2125 : #ifdef USE_ASSERT_CHECKING
2126 : if (presult.all_visible)
2127 : {
2128 : TransactionId debug_cutoff;
2129 : bool debug_all_frozen;
2130 :
2131 : Assert(presult.lpdead_items == 0);
2132 :
2133 : Assert(heap_page_is_all_visible(vacrel->rel, buf,
2134 : vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2135 : &debug_cutoff, &vacrel->offnum));
2136 :
2137 : Assert(presult.all_frozen == debug_all_frozen);
2138 :
2139 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2140 : debug_cutoff == presult.vm_conflict_horizon);
2141 : }
2142 : #endif
2143 :
2144 : /*
2145 : * Now save details of the LP_DEAD items from the page in vacrel
2146 : */
2147 507657 : if (presult.lpdead_items > 0)
2148 : {
2149 17001 : vacrel->lpdead_item_pages++;
2150 :
2151 : /*
2152 : * deadoffsets are collected incrementally in
2153 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2154 : * with an indeterminate order, but dead_items_add requires them to be
2155 : * sorted.
2156 : */
2157 17001 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2158 : cmpOffsetNumbers);
2159 :
2160 17001 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2161 : }
2162 :
2163 : /* Finally, add page-local counts to whole-VACUUM counts */
2164 507657 : vacrel->tuples_deleted += presult.ndeleted;
2165 507657 : vacrel->tuples_frozen += presult.nfrozen;
2166 507657 : vacrel->lpdead_items += presult.lpdead_items;
2167 507657 : vacrel->live_tuples += presult.live_tuples;
2168 507657 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2169 :
2170 : /* Can't truncate this page */
2171 507657 : if (presult.hastup)
2172 499192 : vacrel->nonempty_pages = blkno + 1;
2173 :
2174 : /* Did we find LP_DEAD items? */
2175 507657 : *has_lpdead_items = (presult.lpdead_items > 0);
2176 :
2177 : Assert(!presult.all_visible || !(*has_lpdead_items));
2178 : Assert(!presult.all_frozen || presult.all_visible);
2179 :
2180 507657 : old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2181 :
2182 507657 : identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2183 : presult.lpdead_items, vmbuffer,
2184 : &old_vmbits);
2185 :
2186 507657 : if (!presult.all_visible)
2187 276469 : return presult.ndeleted;
2188 :
2189 : /* Set the visibility map and page visibility hint */
2190 231188 : new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
2191 :
2192 231188 : if (presult.all_frozen)
2193 220538 : new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
2194 :
2195 : /* Nothing to do */
2196 231188 : if (old_vmbits == new_vmbits)
2197 194052 : return presult.ndeleted;
2198 :
2199 : /*
2200 : * It should never be the case that the visibility map page is set while
2201 : * the page-level bit is clear (and if so, we cleared it above), but the
2202 : * reverse is allowed (if checksums are not enabled). Regardless, set both
2203 : * bits so that we get back in sync.
2204 : *
2205 : * The heap buffer must be marked dirty before adding it to the WAL chain
2206 : * when setting the VM. We don't worry about unnecessarily dirtying the
2207 : * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2208 : * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2209 : * the VM bits clear, so there is no point in optimizing it.
2210 : */
2211 37136 : PageSetAllVisible(page);
2212 37136 : MarkBufferDirty(buf);
2213 :
2214 : /*
2215 : * If the page is being set all-frozen, we pass InvalidTransactionId as
2216 : * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2217 : * everything safe for REDO was logged when the page's tuples were frozen.
2218 : */
2219 : Assert(!presult.all_frozen ||
2220 : !TransactionIdIsValid(presult.vm_conflict_horizon));
2221 :
2222 37136 : visibilitymap_set(vacrel->rel, blkno, buf,
2223 : InvalidXLogRecPtr,
2224 : vmbuffer, presult.vm_conflict_horizon,
2225 : new_vmbits);
2226 :
2227 : /*
2228 : * If the page wasn't already set all-visible and/or all-frozen in the VM,
2229 : * count it as newly set for logging.
2230 : */
2231 37136 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2232 : {
2233 37109 : vacrel->new_all_visible_pages++;
2234 37109 : if (presult.all_frozen)
2235 : {
2236 27132 : vacrel->new_all_visible_all_frozen_pages++;
2237 27132 : *vm_page_frozen = true;
2238 : }
2239 : }
2240 27 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2241 27 : presult.all_frozen)
2242 : {
2243 27 : vacrel->new_all_frozen_pages++;
2244 27 : *vm_page_frozen = true;
2245 : }
2246 :
2247 37136 : return presult.ndeleted;
2248 : }
2249 :
2250 : /*
2251 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2252 : *
2253 : * Caller need only hold a pin and share lock on the buffer, unlike
2254 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2255 : * performed here, it's quite possible that an earlier opportunistic pruning
2256 : * operation left LP_DEAD items behind. We'll at least collect any such items
2257 : * in dead_items for removal from indexes.
2258 : *
2259 : * For aggressive VACUUM callers, we may return false to indicate that a full
2260 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2261 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2262 : * one or more tuples on the page. We always return true for non-aggressive
2263 : * callers.
2264 : *
2265 : * If this function returns true, *has_lpdead_items gets set to true or false
2266 : * depending on whether, upon return from this function, any LP_DEAD items are
2267 : * present on the page. If this function returns false, *has_lpdead_items
2268 : * is not updated.
2269 : */
2270 : static bool
2271 142 : lazy_scan_noprune(LVRelState *vacrel,
2272 : Buffer buf,
2273 : BlockNumber blkno,
2274 : Page page,
2275 : bool *has_lpdead_items)
2276 : {
2277 : OffsetNumber offnum,
2278 : maxoff;
2279 : int lpdead_items,
2280 : live_tuples,
2281 : recently_dead_tuples,
2282 : missed_dead_tuples;
2283 : bool hastup;
2284 : HeapTupleHeader tupleheader;
2285 142 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2286 142 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2287 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2288 :
2289 : Assert(BufferGetBlockNumber(buf) == blkno);
2290 :
2291 142 : hastup = false; /* for now */
2292 :
2293 142 : lpdead_items = 0;
2294 142 : live_tuples = 0;
2295 142 : recently_dead_tuples = 0;
2296 142 : missed_dead_tuples = 0;
2297 :
2298 142 : maxoff = PageGetMaxOffsetNumber(page);
2299 142 : for (offnum = FirstOffsetNumber;
2300 4010 : offnum <= maxoff;
2301 3868 : offnum = OffsetNumberNext(offnum))
2302 : {
2303 : ItemId itemid;
2304 : HeapTupleData tuple;
2305 :
2306 3930 : vacrel->offnum = offnum;
2307 3930 : itemid = PageGetItemId(page, offnum);
2308 :
2309 3930 : if (!ItemIdIsUsed(itemid))
2310 870 : continue;
2311 :
2312 3287 : if (ItemIdIsRedirected(itemid))
2313 : {
2314 227 : hastup = true;
2315 227 : continue;
2316 : }
2317 :
2318 3060 : if (ItemIdIsDead(itemid))
2319 : {
2320 : /*
2321 : * Deliberately don't set hastup=true here. See same point in
2322 : * lazy_scan_prune for an explanation.
2323 : */
2324 0 : deadoffsets[lpdead_items++] = offnum;
2325 0 : continue;
2326 : }
2327 :
2328 3060 : hastup = true; /* page prevents rel truncation */
2329 3060 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2330 3060 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2331 : &NoFreezePageRelfrozenXid,
2332 : &NoFreezePageRelminMxid))
2333 : {
2334 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2335 126 : if (vacrel->aggressive)
2336 : {
2337 : /*
2338 : * Aggressive VACUUMs must always be able to advance rel's
2339 : * relfrozenxid to a value >= FreezeLimit (and be able to
2340 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2341 : * The ongoing aggressive VACUUM won't be able to do that
2342 : * unless it can freeze an XID (or MXID) from this tuple now.
2343 : *
2344 : * The only safe option is to have caller perform processing
2345 : * of this page using lazy_scan_prune. Caller might have to
2346 : * wait a while for a cleanup lock, but it can't be helped.
2347 : */
2348 62 : vacrel->offnum = InvalidOffsetNumber;
2349 62 : return false;
2350 : }
2351 :
2352 : /*
2353 : * Non-aggressive VACUUMs are under no obligation to advance
2354 : * relfrozenxid (even by one XID). We can be much laxer here.
2355 : *
2356 : * Currently we always just accept an older final relfrozenxid
2357 : * and/or relminmxid value. We never make caller wait or work a
2358 : * little harder, even when it likely makes sense to do so.
2359 : */
2360 : }
2361 :
2362 2998 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2363 2998 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2364 2998 : tuple.t_len = ItemIdGetLength(itemid);
2365 2998 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2366 :
2367 2998 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2368 : buf))
2369 : {
2370 2994 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2371 : case HEAPTUPLE_LIVE:
2372 :
2373 : /*
2374 : * Count both cases as live, just like lazy_scan_prune
2375 : */
2376 2994 : live_tuples++;
2377 :
2378 2994 : break;
2379 2 : case HEAPTUPLE_DEAD:
2380 :
2381 : /*
2382 : * There is some useful work for pruning to do, that won't be
2383 : * done due to failure to get a cleanup lock.
2384 : */
2385 2 : missed_dead_tuples++;
2386 2 : break;
2387 2 : case HEAPTUPLE_RECENTLY_DEAD:
2388 :
2389 : /*
2390 : * Count in recently_dead_tuples, just like lazy_scan_prune
2391 : */
2392 2 : recently_dead_tuples++;
2393 2 : break;
2394 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2395 :
2396 : /*
2397 : * Do not count these rows as live, just like lazy_scan_prune
2398 : */
2399 0 : break;
2400 0 : default:
2401 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2402 : break;
2403 : }
2404 : }
2405 :
2406 80 : vacrel->offnum = InvalidOffsetNumber;
2407 :
2408 : /*
2409 : * By here we know for sure that caller can put off freezing and pruning
2410 : * this particular page until the next VACUUM. Remember its details now.
2411 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2412 : */
2413 80 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2414 80 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2415 :
2416 : /* Save any LP_DEAD items found on the page in dead_items */
2417 80 : if (vacrel->nindexes == 0)
2418 : {
2419 : /* Using one-pass strategy (since table has no indexes) */
2420 0 : if (lpdead_items > 0)
2421 : {
2422 : /*
2423 : * Perfunctory handling for the corner case where a single pass
2424 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2425 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2426 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2427 : * but it beats having to maintain specialized heap vacuuming code
2428 : * forever, for vanishingly little benefit.)
2429 : */
2430 0 : hastup = true;
2431 0 : missed_dead_tuples += lpdead_items;
2432 : }
2433 : }
2434 80 : else if (lpdead_items > 0)
2435 : {
2436 : /*
2437 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2438 : * indexes will be deleted during index vacuuming (and then marked
2439 : * LP_UNUSED in the heap)
2440 : */
2441 0 : vacrel->lpdead_item_pages++;
2442 :
2443 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2444 :
2445 0 : vacrel->lpdead_items += lpdead_items;
2446 : }
2447 :
2448 : /*
2449 : * Finally, add relevant page-local counts to whole-VACUUM counts
2450 : */
2451 80 : vacrel->live_tuples += live_tuples;
2452 80 : vacrel->recently_dead_tuples += recently_dead_tuples;
2453 80 : vacrel->missed_dead_tuples += missed_dead_tuples;
2454 80 : if (missed_dead_tuples > 0)
2455 2 : vacrel->missed_dead_pages++;
2456 :
2457 : /* Can't truncate this page */
2458 80 : if (hastup)
2459 80 : vacrel->nonempty_pages = blkno + 1;
2460 :
2461 : /* Did we find LP_DEAD items? */
2462 80 : *has_lpdead_items = (lpdead_items > 0);
2463 :
2464 : /* Caller won't need to call lazy_scan_prune with same page */
2465 80 : return true;
2466 : }
2467 :
2468 : /*
2469 : * Main entry point for index vacuuming and heap vacuuming.
2470 : *
2471 : * Removes items collected in dead_items from table's indexes, then marks the
2472 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2473 : * for full details.
2474 : *
2475 : * Also empties dead_items, freeing up space for later TIDs.
2476 : *
2477 : * We may choose to bypass index vacuuming at this point, though only when the
2478 : * ongoing VACUUM operation will definitely only have one index scan/round of
2479 : * index vacuuming.
2480 : */
2481 : static void
2482 692 : lazy_vacuum(LVRelState *vacrel)
2483 : {
2484 : bool bypass;
2485 :
2486 : /* Should not end up here with no indexes */
2487 : Assert(vacrel->nindexes > 0);
2488 : Assert(vacrel->lpdead_item_pages > 0);
2489 :
2490 692 : if (!vacrel->do_index_vacuuming)
2491 : {
2492 : Assert(!vacrel->do_index_cleanup);
2493 14 : dead_items_reset(vacrel);
2494 14 : return;
2495 : }
2496 :
2497 : /*
2498 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2499 : *
2500 : * We currently only do this in cases where the number of LP_DEAD items
2501 : * for the entire VACUUM operation is close to zero. This avoids sharp
2502 : * discontinuities in the duration and overhead of successive VACUUM
2503 : * operations that run against the same table with a fixed workload.
2504 : * Ideally, successive VACUUM operations will behave as if there are
2505 : * exactly zero LP_DEAD items in cases where there are close to zero.
2506 : *
2507 : * This is likely to be helpful with a table that is continually affected
2508 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2509 : * have small aberrations that lead to just a few heap pages retaining
2510 : * only one or two LP_DEAD items. This is pretty common; even when the
2511 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2512 : * impossible to predict whether HOT will be applied in 100% of cases.
2513 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2514 : * HOT through careful tuning.
2515 : */
2516 678 : bypass = false;
2517 678 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2518 : {
2519 : BlockNumber threshold;
2520 :
2521 : Assert(vacrel->num_index_scans == 0);
2522 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2523 : Assert(vacrel->do_index_vacuuming);
2524 : Assert(vacrel->do_index_cleanup);
2525 :
2526 : /*
2527 : * This crossover point at which we'll start to do index vacuuming is
2528 : * expressed as a percentage of the total number of heap pages in the
2529 : * table that are known to have at least one LP_DEAD item. This is
2530 : * much more important than the total number of LP_DEAD items, since
2531 : * it's a proxy for the number of heap pages whose visibility map bits
2532 : * cannot be set on account of bypassing index and heap vacuuming.
2533 : *
2534 : * We apply one further precautionary test: the space currently used
2535 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2536 : * not exceed 32MB. This limits the risk that we will bypass index
2537 : * vacuuming again and again until eventually there is a VACUUM whose
2538 : * dead_items space is not CPU cache resident.
2539 : *
2540 : * We don't take any special steps to remember the LP_DEAD items (such
2541 : * as counting them in our final update to the stats system) when the
2542 : * optimization is applied. Though the accounting used in analyze.c's
2543 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2544 : * rows in its own stats report, that's okay. The discrepancy should
2545 : * be negligible. If this optimization is ever expanded to cover more
2546 : * cases then this may need to be reconsidered.
2547 : */
2548 659 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2549 663 : bypass = (vacrel->lpdead_item_pages < threshold &&
2550 4 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2551 : }
2552 :
2553 678 : if (bypass)
2554 : {
2555 : /*
2556 : * There are almost zero TIDs. Behave as if there were precisely
2557 : * zero: bypass index vacuuming, but do index cleanup.
2558 : *
2559 : * We expect that the ongoing VACUUM operation will finish very
2560 : * quickly, so there is no point in considering speeding up as a
2561 : * failsafe against wraparound failure. (Index cleanup is expected to
2562 : * finish very quickly in cases where there were no ambulkdelete()
2563 : * calls.)
2564 : */
2565 4 : vacrel->do_index_vacuuming = false;
2566 : }
2567 674 : else if (lazy_vacuum_all_indexes(vacrel))
2568 : {
2569 : /*
2570 : * We successfully completed a round of index vacuuming. Do related
2571 : * heap vacuuming now.
2572 : */
2573 674 : lazy_vacuum_heap_rel(vacrel);
2574 : }
2575 : else
2576 : {
2577 : /*
2578 : * Failsafe case.
2579 : *
2580 : * We attempted index vacuuming, but didn't finish a full round/full
2581 : * index scan. This happens when relfrozenxid or relminmxid is too
2582 : * far in the past.
2583 : *
2584 : * From this point on the VACUUM operation will do no further index
2585 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2586 : * back here again.
2587 : */
2588 : Assert(VacuumFailsafeActive);
2589 : }
2590 :
2591 : /*
2592 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2593 : * vacuum)
2594 : */
2595 678 : dead_items_reset(vacrel);
2596 : }
2597 :
2598 : /*
2599 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2600 : *
2601 : * Returns true in the common case when all indexes were successfully
2602 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2603 : * VACUUM operation is at risk of taking too long to finish, leading to
2604 : * wraparound failure.
2605 : */
2606 : static bool
2607 674 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2608 : {
2609 674 : bool allindexes = true;
2610 674 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2611 674 : const int progress_start_index[] = {
2612 : PROGRESS_VACUUM_PHASE,
2613 : PROGRESS_VACUUM_INDEXES_TOTAL
2614 : };
2615 674 : const int progress_end_index[] = {
2616 : PROGRESS_VACUUM_INDEXES_TOTAL,
2617 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2618 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2619 : };
2620 : int64 progress_start_val[2];
2621 : int64 progress_end_val[3];
2622 :
2623 : Assert(vacrel->nindexes > 0);
2624 : Assert(vacrel->do_index_vacuuming);
2625 : Assert(vacrel->do_index_cleanup);
2626 :
2627 : /* Precheck for XID wraparound emergencies */
2628 674 : if (lazy_check_wraparound_failsafe(vacrel))
2629 : {
2630 : /* Wraparound emergency -- don't even start an index scan */
2631 0 : return false;
2632 : }
2633 :
2634 : /*
2635 : * Report that we are now vacuuming indexes and the number of indexes to
2636 : * vacuum.
2637 : */
2638 674 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2639 674 : progress_start_val[1] = vacrel->nindexes;
2640 674 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2641 :
2642 674 : if (!ParallelVacuumIsActive(vacrel))
2643 : {
2644 1936 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2645 : {
2646 1274 : Relation indrel = vacrel->indrels[idx];
2647 1274 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2648 :
2649 1274 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2650 : old_live_tuples,
2651 : vacrel);
2652 :
2653 : /* Report the number of indexes vacuumed */
2654 1274 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2655 1274 : idx + 1);
2656 :
2657 1274 : if (lazy_check_wraparound_failsafe(vacrel))
2658 : {
2659 : /* Wraparound emergency -- end current index scan */
2660 0 : allindexes = false;
2661 0 : break;
2662 : }
2663 : }
2664 : }
2665 : else
2666 : {
2667 : /* Outsource everything to parallel variant */
2668 12 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2669 : vacrel->num_index_scans);
2670 :
2671 : /*
2672 : * Do a postcheck to consider applying wraparound failsafe now. Note
2673 : * that parallel VACUUM only gets the precheck and this postcheck.
2674 : */
2675 12 : if (lazy_check_wraparound_failsafe(vacrel))
2676 0 : allindexes = false;
2677 : }
2678 :
2679 : /*
2680 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2681 : * each call here (except calls where we choose to do the failsafe). This
2682 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2683 : * of the failsafe triggering, which prevents the next call from taking
2684 : * place).
2685 : */
2686 : Assert(vacrel->num_index_scans > 0 ||
2687 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2688 : Assert(allindexes || VacuumFailsafeActive);
2689 :
2690 : /*
2691 : * Increase and report the number of index scans. Also, we reset
2692 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2693 : *
2694 : * We deliberately include the case where we started a round of bulk
2695 : * deletes that we weren't able to finish due to the failsafe triggering.
2696 : */
2697 674 : vacrel->num_index_scans++;
2698 674 : progress_end_val[0] = 0;
2699 674 : progress_end_val[1] = 0;
2700 674 : progress_end_val[2] = vacrel->num_index_scans;
2701 674 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2702 :
2703 674 : return allindexes;
2704 : }
2705 :
2706 : /*
2707 : * Read stream callback for vacuum's third phase (second pass over the heap).
2708 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2709 : * if there are no further blocks to vacuum.
2710 : *
2711 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2712 : */
2713 : static BlockNumber
2714 15345 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2715 : void *callback_private_data,
2716 : void *per_buffer_data)
2717 : {
2718 15345 : TidStoreIter *iter = callback_private_data;
2719 : TidStoreIterResult *iter_result;
2720 :
2721 15345 : iter_result = TidStoreIterateNext(iter);
2722 15345 : if (iter_result == NULL)
2723 674 : return InvalidBlockNumber;
2724 :
2725 : /*
2726 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2727 : * It is safe to copy the result, according to TidStoreIterateNext().
2728 : */
2729 14671 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2730 :
2731 14671 : return iter_result->blkno;
2732 : }
2733 :
2734 : /*
2735 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2736 : *
2737 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2738 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2739 : *
2740 : * We may also be able to truncate the line pointer array of the heap pages we
2741 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2742 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2743 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2744 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2745 : * page's line pointer array).
2746 : *
2747 : * Note: the reason for doing this as a second pass is we cannot remove the
2748 : * tuples until we've removed their index entries, and we want to process
2749 : * index entry removal in batches as large as possible.
2750 : */
2751 : static void
2752 674 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2753 : {
2754 : ReadStream *stream;
2755 674 : BlockNumber vacuumed_pages = 0;
2756 674 : Buffer vmbuffer = InvalidBuffer;
2757 : LVSavedErrInfo saved_err_info;
2758 : TidStoreIter *iter;
2759 :
2760 : Assert(vacrel->do_index_vacuuming);
2761 : Assert(vacrel->do_index_cleanup);
2762 : Assert(vacrel->num_index_scans > 0);
2763 :
2764 : /* Report that we are now vacuuming the heap */
2765 674 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2766 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2767 :
2768 : /* Update error traceback information */
2769 674 : update_vacuum_error_info(vacrel, &saved_err_info,
2770 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2771 : InvalidBlockNumber, InvalidOffsetNumber);
2772 :
2773 674 : iter = TidStoreBeginIterate(vacrel->dead_items);
2774 :
2775 : /*
2776 : * Set up the read stream for vacuum's second pass through the heap.
2777 : *
2778 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2779 : * not need to wait for IO and does not perform locking. Once we support
2780 : * parallelism it should still be fine, as presumably the holder of locks
2781 : * would never be blocked by IO while holding the lock.
2782 : */
2783 674 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2784 : READ_STREAM_USE_BATCHING,
2785 : vacrel->bstrategy,
2786 : vacrel->rel,
2787 : MAIN_FORKNUM,
2788 : vacuum_reap_lp_read_stream_next,
2789 : iter,
2790 : sizeof(TidStoreIterResult));
2791 :
2792 : while (true)
2793 14671 : {
2794 : BlockNumber blkno;
2795 : Buffer buf;
2796 : Page page;
2797 : TidStoreIterResult *iter_result;
2798 : Size freespace;
2799 : OffsetNumber offsets[MaxOffsetNumber];
2800 : int num_offsets;
2801 :
2802 15345 : vacuum_delay_point(false);
2803 :
2804 15345 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2805 :
2806 : /* The relation is exhausted */
2807 15345 : if (!BufferIsValid(buf))
2808 674 : break;
2809 :
2810 14671 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2811 :
2812 : Assert(iter_result);
2813 14671 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2814 : Assert(num_offsets <= lengthof(offsets));
2815 :
2816 : /*
2817 : * Pin the visibility map page in case we need to mark the page
2818 : * all-visible. In most cases this will be very cheap, because we'll
2819 : * already have the correct page pinned anyway.
2820 : */
2821 14671 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2822 :
2823 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2824 14671 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2825 14671 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2826 : num_offsets, vmbuffer);
2827 :
2828 : /* Now that we've vacuumed the page, record its available space */
2829 14671 : page = BufferGetPage(buf);
2830 14671 : freespace = PageGetHeapFreeSpace(page);
2831 :
2832 14671 : UnlockReleaseBuffer(buf);
2833 14671 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2834 14671 : vacuumed_pages++;
2835 : }
2836 :
2837 674 : read_stream_end(stream);
2838 674 : TidStoreEndIterate(iter);
2839 :
2840 674 : vacrel->blkno = InvalidBlockNumber;
2841 674 : if (BufferIsValid(vmbuffer))
2842 674 : ReleaseBuffer(vmbuffer);
2843 :
2844 : /*
2845 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2846 : * the second heap pass. No more, no less.
2847 : */
2848 : Assert(vacrel->num_index_scans > 1 ||
2849 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2850 : vacuumed_pages == vacrel->lpdead_item_pages));
2851 :
2852 674 : ereport(DEBUG2,
2853 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2854 : vacrel->relname, vacrel->dead_items_info->num_items,
2855 : vacuumed_pages)));
2856 :
2857 : /* Revert to the previous phase information for error traceback */
2858 674 : restore_vacuum_error_info(vacrel, &saved_err_info);
2859 674 : }
2860 :
2861 : /*
2862 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2863 : * vacrel->dead_items store.
2864 : *
2865 : * Caller must have an exclusive buffer lock on the buffer (though a full
2866 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2867 : * a pin on blkno's visibility map page.
2868 : */
2869 : static void
2870 14671 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2871 : OffsetNumber *deadoffsets, int num_offsets,
2872 : Buffer vmbuffer)
2873 : {
2874 14671 : Page page = BufferGetPage(buffer);
2875 : OffsetNumber unused[MaxHeapTuplesPerPage];
2876 14671 : int nunused = 0;
2877 : TransactionId visibility_cutoff_xid;
2878 14671 : TransactionId conflict_xid = InvalidTransactionId;
2879 : bool all_frozen;
2880 : LVSavedErrInfo saved_err_info;
2881 14671 : uint8 vmflags = 0;
2882 :
2883 : Assert(vacrel->do_index_vacuuming);
2884 :
2885 14671 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2886 :
2887 : /* Update error traceback information */
2888 14671 : update_vacuum_error_info(vacrel, &saved_err_info,
2889 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2890 : InvalidOffsetNumber);
2891 :
2892 : /*
2893 : * Before marking dead items unused, check whether the page will become
2894 : * all-visible once that change is applied. This lets us reap the tuples
2895 : * and mark the page all-visible within the same critical section,
2896 : * enabling both changes to be emitted in a single WAL record. Since the
2897 : * visibility checks may perform I/O and allocate memory, they must be
2898 : * done outside the critical section.
2899 : */
2900 14671 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2901 : vacrel->cutoffs.OldestXmin,
2902 : deadoffsets, num_offsets,
2903 : &all_frozen, &visibility_cutoff_xid,
2904 : &vacrel->offnum))
2905 : {
2906 14511 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2907 14511 : if (all_frozen)
2908 : {
2909 11270 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2910 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2911 : }
2912 :
2913 : /*
2914 : * Take the lock on the vmbuffer before entering a critical section.
2915 : * The heap page lock must also be held while updating the VM to
2916 : * ensure consistency.
2917 : */
2918 14511 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2919 : }
2920 :
2921 14671 : START_CRIT_SECTION();
2922 :
2923 918791 : for (int i = 0; i < num_offsets; i++)
2924 : {
2925 : ItemId itemid;
2926 904120 : OffsetNumber toff = deadoffsets[i];
2927 :
2928 904120 : itemid = PageGetItemId(page, toff);
2929 :
2930 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2931 904120 : ItemIdSetUnused(itemid);
2932 904120 : unused[nunused++] = toff;
2933 : }
2934 :
2935 : Assert(nunused > 0);
2936 :
2937 : /* Attempt to truncate line pointer array now */
2938 14671 : PageTruncateLinePointerArray(page);
2939 :
2940 14671 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2941 : {
2942 : /*
2943 : * The page is guaranteed to have had dead line pointers, so we always
2944 : * set PD_ALL_VISIBLE.
2945 : */
2946 14511 : PageSetAllVisible(page);
2947 14511 : visibilitymap_set_vmbits(blkno,
2948 : vmbuffer, vmflags,
2949 14511 : vacrel->rel->rd_locator);
2950 14511 : conflict_xid = visibility_cutoff_xid;
2951 : }
2952 :
2953 : /*
2954 : * Mark buffer dirty before we write WAL.
2955 : */
2956 14671 : MarkBufferDirty(buffer);
2957 :
2958 : /* XLOG stuff */
2959 14671 : if (RelationNeedsWAL(vacrel->rel))
2960 : {
2961 13819 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2962 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2963 : vmflags,
2964 : conflict_xid,
2965 : false, /* no cleanup lock required */
2966 : PRUNE_VACUUM_CLEANUP,
2967 : NULL, 0, /* frozen */
2968 : NULL, 0, /* redirected */
2969 : NULL, 0, /* dead */
2970 : unused, nunused);
2971 : }
2972 :
2973 14671 : END_CRIT_SECTION();
2974 :
2975 14671 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
2976 : {
2977 : /* Count the newly set VM page for logging */
2978 14511 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2979 14511 : vacrel->new_all_visible_pages++;
2980 14511 : if (all_frozen)
2981 11270 : vacrel->new_all_visible_all_frozen_pages++;
2982 : }
2983 :
2984 : /* Revert to the previous phase information for error traceback */
2985 14671 : restore_vacuum_error_info(vacrel, &saved_err_info);
2986 14671 : }
2987 :
2988 : /*
2989 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2990 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2991 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2992 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2993 : *
2994 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2995 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2996 : * that it started out with.
2997 : *
2998 : * Returns true when failsafe has been triggered.
2999 : */
3000 : static bool
3001 122752 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
3002 : {
3003 : /* Don't warn more than once per VACUUM */
3004 122752 : if (VacuumFailsafeActive)
3005 0 : return true;
3006 :
3007 122752 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
3008 : {
3009 21516 : const int progress_index[] = {
3010 : PROGRESS_VACUUM_INDEXES_TOTAL,
3011 : PROGRESS_VACUUM_INDEXES_PROCESSED,
3012 : PROGRESS_VACUUM_MODE
3013 : };
3014 21516 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
3015 :
3016 21516 : VacuumFailsafeActive = true;
3017 :
3018 : /*
3019 : * Abandon use of a buffer access strategy to allow use of all of
3020 : * shared buffers. We assume the caller who allocated the memory for
3021 : * the BufferAccessStrategy will free it.
3022 : */
3023 21516 : vacrel->bstrategy = NULL;
3024 :
3025 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
3026 21516 : vacrel->do_index_vacuuming = false;
3027 21516 : vacrel->do_index_cleanup = false;
3028 21516 : vacrel->do_rel_truncate = false;
3029 :
3030 : /* Reset the progress counters and set the failsafe mode */
3031 21516 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
3032 :
3033 21516 : ereport(WARNING,
3034 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3035 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3036 : vacrel->num_index_scans),
3037 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3038 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3039 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3040 :
3041 : /* Stop applying cost limits from this point on */
3042 21516 : VacuumCostActive = false;
3043 21516 : VacuumCostBalance = 0;
3044 :
3045 21516 : return true;
3046 : }
3047 :
3048 101236 : return false;
3049 : }
3050 :
3051 : /*
3052 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3053 : */
3054 : static void
3055 94364 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3056 : {
3057 94364 : double reltuples = vacrel->new_rel_tuples;
3058 94364 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3059 94364 : const int progress_start_index[] = {
3060 : PROGRESS_VACUUM_PHASE,
3061 : PROGRESS_VACUUM_INDEXES_TOTAL
3062 : };
3063 94364 : const int progress_end_index[] = {
3064 : PROGRESS_VACUUM_INDEXES_TOTAL,
3065 : PROGRESS_VACUUM_INDEXES_PROCESSED
3066 : };
3067 : int64 progress_start_val[2];
3068 94364 : int64 progress_end_val[2] = {0, 0};
3069 :
3070 : Assert(vacrel->do_index_cleanup);
3071 : Assert(vacrel->nindexes > 0);
3072 :
3073 : /*
3074 : * Report that we are now cleaning up indexes and the number of indexes to
3075 : * cleanup.
3076 : */
3077 94364 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3078 94364 : progress_start_val[1] = vacrel->nindexes;
3079 94364 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3080 :
3081 94364 : if (!ParallelVacuumIsActive(vacrel))
3082 : {
3083 242788 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3084 : {
3085 148441 : Relation indrel = vacrel->indrels[idx];
3086 148441 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3087 :
3088 296882 : vacrel->indstats[idx] =
3089 148441 : lazy_cleanup_one_index(indrel, istat, reltuples,
3090 : estimated_count, vacrel);
3091 :
3092 : /* Report the number of indexes cleaned up */
3093 148441 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3094 148441 : idx + 1);
3095 : }
3096 : }
3097 : else
3098 : {
3099 : /* Outsource everything to parallel variant */
3100 17 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3101 : vacrel->num_index_scans,
3102 : estimated_count);
3103 : }
3104 :
3105 : /* Reset the progress counters */
3106 94364 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3107 94364 : }
3108 :
3109 : /*
3110 : * lazy_vacuum_one_index() -- vacuum index relation.
3111 : *
3112 : * Delete all the index tuples containing a TID collected in
3113 : * vacrel->dead_items. Also update running statistics. Exact
3114 : * details depend on index AM's ambulkdelete routine.
3115 : *
3116 : * reltuples is the number of heap tuples to be passed to the
3117 : * bulkdelete callback. It's always assumed to be estimated.
3118 : * See indexam.sgml for more info.
3119 : *
3120 : * Returns bulk delete stats derived from input stats
3121 : */
3122 : static IndexBulkDeleteResult *
3123 1274 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3124 : double reltuples, LVRelState *vacrel)
3125 : {
3126 : IndexVacuumInfo ivinfo;
3127 : LVSavedErrInfo saved_err_info;
3128 :
3129 1274 : ivinfo.index = indrel;
3130 1274 : ivinfo.heaprel = vacrel->rel;
3131 1274 : ivinfo.analyze_only = false;
3132 1274 : ivinfo.report_progress = false;
3133 1274 : ivinfo.estimated_count = true;
3134 1274 : ivinfo.message_level = DEBUG2;
3135 1274 : ivinfo.num_heap_tuples = reltuples;
3136 1274 : ivinfo.strategy = vacrel->bstrategy;
3137 :
3138 : /*
3139 : * Update error traceback information.
3140 : *
3141 : * The index name is saved during this phase and restored immediately
3142 : * after this phase. See vacuum_error_callback.
3143 : */
3144 : Assert(vacrel->indname == NULL);
3145 1274 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3146 1274 : update_vacuum_error_info(vacrel, &saved_err_info,
3147 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3148 : InvalidBlockNumber, InvalidOffsetNumber);
3149 :
3150 : /* Do bulk deletion */
3151 1274 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3152 : vacrel->dead_items_info);
3153 :
3154 : /* Revert to the previous phase information for error traceback */
3155 1274 : restore_vacuum_error_info(vacrel, &saved_err_info);
3156 1274 : pfree(vacrel->indname);
3157 1274 : vacrel->indname = NULL;
3158 :
3159 1274 : return istat;
3160 : }
3161 :
3162 : /*
3163 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3164 : *
3165 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3166 : * of heap tuples and estimated_count is true if reltuples is an
3167 : * estimated value. See indexam.sgml for more info.
3168 : *
3169 : * Returns bulk delete stats derived from input stats
3170 : */
3171 : static IndexBulkDeleteResult *
3172 148441 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3173 : double reltuples, bool estimated_count,
3174 : LVRelState *vacrel)
3175 : {
3176 : IndexVacuumInfo ivinfo;
3177 : LVSavedErrInfo saved_err_info;
3178 :
3179 148441 : ivinfo.index = indrel;
3180 148441 : ivinfo.heaprel = vacrel->rel;
3181 148441 : ivinfo.analyze_only = false;
3182 148441 : ivinfo.report_progress = false;
3183 148441 : ivinfo.estimated_count = estimated_count;
3184 148441 : ivinfo.message_level = DEBUG2;
3185 :
3186 148441 : ivinfo.num_heap_tuples = reltuples;
3187 148441 : ivinfo.strategy = vacrel->bstrategy;
3188 :
3189 : /*
3190 : * Update error traceback information.
3191 : *
3192 : * The index name is saved during this phase and restored immediately
3193 : * after this phase. See vacuum_error_callback.
3194 : */
3195 : Assert(vacrel->indname == NULL);
3196 148441 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3197 148441 : update_vacuum_error_info(vacrel, &saved_err_info,
3198 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3199 : InvalidBlockNumber, InvalidOffsetNumber);
3200 :
3201 148441 : istat = vac_cleanup_one_index(&ivinfo, istat);
3202 :
3203 : /* Revert to the previous phase information for error traceback */
3204 148441 : restore_vacuum_error_info(vacrel, &saved_err_info);
3205 148441 : pfree(vacrel->indname);
3206 148441 : vacrel->indname = NULL;
3207 :
3208 148441 : return istat;
3209 : }
3210 :
3211 : /*
3212 : * should_attempt_truncation - should we attempt to truncate the heap?
3213 : *
3214 : * Don't even think about it unless we have a shot at releasing a goodly
3215 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3216 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3217 : * be particularly disruptive.
3218 : *
3219 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3220 : * system might be refusing to allocate new XIDs at this point. The system
3221 : * definitely won't return to normal unless and until VACUUM actually advances
3222 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3223 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3224 : * truncate the table under these circumstances, an XID exhaustion error might
3225 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3226 : * There is very little chance of truncation working out when the failsafe is
3227 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3228 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3229 : * we're called.
3230 : */
3231 : static bool
3232 120792 : should_attempt_truncation(LVRelState *vacrel)
3233 : {
3234 : BlockNumber possibly_freeable;
3235 :
3236 120792 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3237 21661 : return false;
3238 :
3239 99131 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3240 99131 : if (possibly_freeable > 0 &&
3241 174 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3242 174 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3243 155 : return true;
3244 :
3245 98976 : return false;
3246 : }
3247 :
3248 : /*
3249 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3250 : */
3251 : static void
3252 155 : lazy_truncate_heap(LVRelState *vacrel)
3253 : {
3254 155 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3255 : BlockNumber new_rel_pages;
3256 : bool lock_waiter_detected;
3257 : int lock_retry;
3258 :
3259 : /* Report that we are now truncating */
3260 155 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3261 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3262 :
3263 : /* Update error traceback information one last time */
3264 155 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3265 : vacrel->nonempty_pages, InvalidOffsetNumber);
3266 :
3267 : /*
3268 : * Loop until no more truncating can be done.
3269 : */
3270 : do
3271 : {
3272 : /*
3273 : * We need full exclusive lock on the relation in order to do
3274 : * truncation. If we can't get it, give up rather than waiting --- we
3275 : * don't want to block other backends, and we don't want to deadlock
3276 : * (which is quite possible considering we already hold a lower-grade
3277 : * lock).
3278 : */
3279 155 : lock_waiter_detected = false;
3280 155 : lock_retry = 0;
3281 : while (true)
3282 : {
3283 357 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3284 153 : break;
3285 :
3286 : /*
3287 : * Check for interrupts while trying to (re-)acquire the exclusive
3288 : * lock.
3289 : */
3290 204 : CHECK_FOR_INTERRUPTS();
3291 :
3292 204 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3293 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3294 : {
3295 : /*
3296 : * We failed to establish the lock in the specified number of
3297 : * retries. This means we give up truncating.
3298 : */
3299 2 : ereport(vacrel->verbose ? INFO : DEBUG2,
3300 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3301 : vacrel->relname)));
3302 3 : return;
3303 : }
3304 :
3305 202 : (void) WaitLatch(MyLatch,
3306 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3307 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3308 : WAIT_EVENT_VACUUM_TRUNCATE);
3309 202 : ResetLatch(MyLatch);
3310 : }
3311 :
3312 : /*
3313 : * Now that we have exclusive lock, look to see if the rel has grown
3314 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3315 : * the newly added pages presumably contain non-deletable tuples.
3316 : */
3317 153 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3318 153 : if (new_rel_pages != orig_rel_pages)
3319 : {
3320 : /*
3321 : * Note: we intentionally don't update vacrel->rel_pages with the
3322 : * new rel size here. If we did, it would amount to assuming that
3323 : * the new pages are empty, which is unlikely. Leaving the numbers
3324 : * alone amounts to assuming that the new pages have the same
3325 : * tuple density as existing ones, which is less unlikely.
3326 : */
3327 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3328 0 : return;
3329 : }
3330 :
3331 : /*
3332 : * Scan backwards from the end to verify that the end pages actually
3333 : * contain no tuples. This is *necessary*, not optional, because
3334 : * other backends could have added tuples to these pages whilst we
3335 : * were vacuuming.
3336 : */
3337 153 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3338 153 : vacrel->blkno = new_rel_pages;
3339 :
3340 153 : if (new_rel_pages >= orig_rel_pages)
3341 : {
3342 : /* can't do anything after all */
3343 1 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3344 1 : return;
3345 : }
3346 :
3347 : /*
3348 : * Okay to truncate.
3349 : */
3350 152 : RelationTruncate(vacrel->rel, new_rel_pages);
3351 :
3352 : /*
3353 : * We can release the exclusive lock as soon as we have truncated.
3354 : * Other backends can't safely access the relation until they have
3355 : * processed the smgr invalidation that smgrtruncate sent out ... but
3356 : * that should happen as part of standard invalidation processing once
3357 : * they acquire lock on the relation.
3358 : */
3359 152 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3360 :
3361 : /*
3362 : * Update statistics. Here, it *is* correct to adjust rel_pages
3363 : * without also touching reltuples, since the tuple count wasn't
3364 : * changed by the truncation.
3365 : */
3366 152 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3367 152 : vacrel->rel_pages = new_rel_pages;
3368 :
3369 152 : ereport(vacrel->verbose ? INFO : DEBUG2,
3370 : (errmsg("table \"%s\": truncated %u to %u pages",
3371 : vacrel->relname,
3372 : orig_rel_pages, new_rel_pages)));
3373 152 : orig_rel_pages = new_rel_pages;
3374 152 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3375 : }
3376 :
3377 : /*
3378 : * Rescan end pages to verify that they are (still) empty of tuples.
3379 : *
3380 : * Returns number of nondeletable pages (last nonempty page + 1).
3381 : */
3382 : static BlockNumber
3383 153 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3384 : {
3385 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3386 : "prefetch size must be power of 2");
3387 :
3388 : BlockNumber blkno;
3389 : BlockNumber prefetchedUntil;
3390 : instr_time starttime;
3391 :
3392 : /* Initialize the starttime if we check for conflicting lock requests */
3393 153 : INSTR_TIME_SET_CURRENT(starttime);
3394 :
3395 : /*
3396 : * Start checking blocks at what we believe relation end to be and move
3397 : * backwards. (Strange coding of loop control is needed because blkno is
3398 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3399 : * in forward direction, so that OS-level readahead can kick in.
3400 : */
3401 153 : blkno = vacrel->rel_pages;
3402 153 : prefetchedUntil = InvalidBlockNumber;
3403 2679 : while (blkno > vacrel->nonempty_pages)
3404 : {
3405 : Buffer buf;
3406 : Page page;
3407 : OffsetNumber offnum,
3408 : maxoff;
3409 : bool hastup;
3410 :
3411 : /*
3412 : * Check if another process requests a lock on our relation. We are
3413 : * holding an AccessExclusiveLock here, so they will be waiting. We
3414 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3415 : * only check if that interval has elapsed once every 32 blocks to
3416 : * keep the number of system calls and actual shared lock table
3417 : * lookups to a minimum.
3418 : */
3419 2530 : if ((blkno % 32) == 0)
3420 : {
3421 : instr_time currenttime;
3422 : instr_time elapsed;
3423 :
3424 85 : INSTR_TIME_SET_CURRENT(currenttime);
3425 85 : elapsed = currenttime;
3426 85 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3427 85 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3428 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3429 : {
3430 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3431 : {
3432 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3433 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3434 : vacrel->relname)));
3435 :
3436 0 : *lock_waiter_detected = true;
3437 0 : return blkno;
3438 : }
3439 0 : starttime = currenttime;
3440 : }
3441 : }
3442 :
3443 : /*
3444 : * We don't insert a vacuum delay point here, because we have an
3445 : * exclusive lock on the table which we want to hold for as short a
3446 : * time as possible. We still need to check for interrupts however.
3447 : */
3448 2530 : CHECK_FOR_INTERRUPTS();
3449 :
3450 2530 : blkno--;
3451 :
3452 : /* If we haven't prefetched this lot yet, do so now. */
3453 2530 : if (prefetchedUntil > blkno)
3454 : {
3455 : BlockNumber prefetchStart;
3456 : BlockNumber pblkno;
3457 :
3458 214 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3459 3721 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3460 : {
3461 3507 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3462 3507 : CHECK_FOR_INTERRUPTS();
3463 : }
3464 214 : prefetchedUntil = prefetchStart;
3465 : }
3466 :
3467 2530 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3468 : vacrel->bstrategy);
3469 :
3470 : /* In this phase we only need shared access to the buffer */
3471 2530 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3472 :
3473 2530 : page = BufferGetPage(buf);
3474 :
3475 2530 : if (PageIsNew(page) || PageIsEmpty(page))
3476 : {
3477 1191 : UnlockReleaseBuffer(buf);
3478 1191 : continue;
3479 : }
3480 :
3481 1339 : hastup = false;
3482 1339 : maxoff = PageGetMaxOffsetNumber(page);
3483 1339 : for (offnum = FirstOffsetNumber;
3484 2702 : offnum <= maxoff;
3485 1363 : offnum = OffsetNumberNext(offnum))
3486 : {
3487 : ItemId itemid;
3488 :
3489 1367 : itemid = PageGetItemId(page, offnum);
3490 :
3491 : /*
3492 : * Note: any non-unused item should be taken as a reason to keep
3493 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3494 : * we must not have cleaned out its index entries.
3495 : */
3496 1367 : if (ItemIdIsUsed(itemid))
3497 : {
3498 4 : hastup = true;
3499 4 : break; /* can stop scanning */
3500 : }
3501 : } /* scan along page */
3502 :
3503 1339 : UnlockReleaseBuffer(buf);
3504 :
3505 : /* Done scanning if we found a tuple here */
3506 1339 : if (hastup)
3507 4 : return blkno + 1;
3508 : }
3509 :
3510 : /*
3511 : * If we fall out of the loop, all the previously-thought-to-be-empty
3512 : * pages still are; we need not bother to look at the last known-nonempty
3513 : * page.
3514 : */
3515 149 : return vacrel->nonempty_pages;
3516 : }
3517 :
3518 : /*
3519 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3520 : * shared memory). Sets both in vacrel for caller.
3521 : *
3522 : * Also handles parallel initialization as part of allocating dead_items in
3523 : * DSM when required.
3524 : */
3525 : static void
3526 120792 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3527 : {
3528 : VacDeadItemsInfo *dead_items_info;
3529 348516 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3530 106932 : autovacuum_work_mem != -1 ?
3531 227724 : autovacuum_work_mem : maintenance_work_mem;
3532 :
3533 : /*
3534 : * Initialize state for a parallel vacuum. As of now, only one worker can
3535 : * be used for an index, so we invoke parallelism only if there are at
3536 : * least two indexes on a table.
3537 : */
3538 120792 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3539 : {
3540 : /*
3541 : * Since parallel workers cannot access data in temporary tables, we
3542 : * can't perform parallel vacuum on them.
3543 : */
3544 5533 : if (RelationUsesLocalBuffers(vacrel->rel))
3545 : {
3546 : /*
3547 : * Give warning only if the user explicitly tries to perform a
3548 : * parallel vacuum on the temporary table.
3549 : */
3550 3 : if (nworkers > 0)
3551 3 : ereport(WARNING,
3552 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3553 : vacrel->relname)));
3554 : }
3555 : else
3556 5530 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3557 : vacrel->nindexes, nworkers,
3558 : vac_work_mem,
3559 5530 : vacrel->verbose ? INFO : DEBUG2,
3560 : vacrel->bstrategy);
3561 :
3562 : /*
3563 : * If parallel mode started, dead_items and dead_items_info spaces are
3564 : * allocated in DSM.
3565 : */
3566 5533 : if (ParallelVacuumIsActive(vacrel))
3567 : {
3568 17 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3569 : &vacrel->dead_items_info);
3570 17 : return;
3571 : }
3572 : }
3573 :
3574 : /*
3575 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3576 : * locally.
3577 : */
3578 :
3579 120775 : dead_items_info = palloc_object(VacDeadItemsInfo);
3580 120775 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3581 120775 : dead_items_info->num_items = 0;
3582 120775 : vacrel->dead_items_info = dead_items_info;
3583 :
3584 120775 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3585 : }
3586 :
3587 : /*
3588 : * Add the given block number and offset numbers to dead_items.
3589 : */
3590 : static void
3591 17001 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3592 : int num_offsets)
3593 : {
3594 17001 : const int prog_index[2] = {
3595 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3596 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3597 : };
3598 : int64 prog_val[2];
3599 :
3600 17001 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3601 17001 : vacrel->dead_items_info->num_items += num_offsets;
3602 :
3603 : /* update the progress information */
3604 17001 : prog_val[0] = vacrel->dead_items_info->num_items;
3605 17001 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3606 17001 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3607 17001 : }
3608 :
3609 : /*
3610 : * Forget all collected dead items.
3611 : */
3612 : static void
3613 692 : dead_items_reset(LVRelState *vacrel)
3614 : {
3615 : /* Update statistics for dead items */
3616 692 : vacrel->num_dead_items_resets++;
3617 692 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3618 :
3619 692 : if (ParallelVacuumIsActive(vacrel))
3620 : {
3621 12 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3622 12 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3623 : &vacrel->dead_items_info);
3624 12 : return;
3625 : }
3626 :
3627 : /* Recreate the tidstore with the same max_bytes limitation */
3628 680 : TidStoreDestroy(vacrel->dead_items);
3629 680 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3630 :
3631 : /* Reset the counter */
3632 680 : vacrel->dead_items_info->num_items = 0;
3633 : }
3634 :
3635 : /*
3636 : * Perform cleanup for resources allocated in dead_items_alloc
3637 : */
3638 : static void
3639 120792 : dead_items_cleanup(LVRelState *vacrel)
3640 : {
3641 120792 : if (!ParallelVacuumIsActive(vacrel))
3642 : {
3643 : /* Don't bother with pfree here */
3644 120775 : return;
3645 : }
3646 :
3647 : /* End parallel mode */
3648 17 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3649 17 : vacrel->pvs = NULL;
3650 : }
3651 :
3652 : #ifdef USE_ASSERT_CHECKING
3653 :
3654 : /*
3655 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3656 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3657 : * reason not to use it outside of asserts.
3658 : */
3659 : static bool
3660 : heap_page_is_all_visible(Relation rel, Buffer buf,
3661 : TransactionId OldestXmin,
3662 : bool *all_frozen,
3663 : TransactionId *visibility_cutoff_xid,
3664 : OffsetNumber *logging_offnum)
3665 : {
3666 :
3667 : return heap_page_would_be_all_visible(rel, buf,
3668 : OldestXmin,
3669 : NULL, 0,
3670 : all_frozen,
3671 : visibility_cutoff_xid,
3672 : logging_offnum);
3673 : }
3674 : #endif
3675 :
3676 : /*
3677 : * Check whether the heap page in buf is all-visible except for the dead
3678 : * tuples referenced in the deadoffsets array.
3679 : *
3680 : * Vacuum uses this to check if a page would become all-visible after reaping
3681 : * known dead tuples. This function does not remove the dead items.
3682 : *
3683 : * This cannot be called in a critical section, as the visibility checks may
3684 : * perform IO and allocate memory.
3685 : *
3686 : * Returns true if the page is all-visible other than the provided
3687 : * deadoffsets and false otherwise.
3688 : *
3689 : * OldestXmin is used to determine visibility.
3690 : *
3691 : * Output parameters:
3692 : *
3693 : * - *all_frozen: true if every tuple on the page is frozen
3694 : * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3695 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3696 : * used by vacuum's error callback system.
3697 : *
3698 : * Callers looking to verify that the page is already all-visible can call
3699 : * heap_page_is_all_visible().
3700 : *
3701 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3702 : * If you modify this function, ensure consistency with that code. An
3703 : * assertion cross-checks that both remain in agreement. Do not introduce new
3704 : * side-effects.
3705 : */
3706 : static bool
3707 14671 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3708 : TransactionId OldestXmin,
3709 : OffsetNumber *deadoffsets,
3710 : int ndeadoffsets,
3711 : bool *all_frozen,
3712 : TransactionId *visibility_cutoff_xid,
3713 : OffsetNumber *logging_offnum)
3714 : {
3715 14671 : Page page = BufferGetPage(buf);
3716 14671 : BlockNumber blockno = BufferGetBlockNumber(buf);
3717 : OffsetNumber offnum,
3718 : maxoff;
3719 14671 : bool all_visible = true;
3720 14671 : int matched_dead_count = 0;
3721 :
3722 14671 : *visibility_cutoff_xid = InvalidTransactionId;
3723 14671 : *all_frozen = true;
3724 :
3725 : Assert(ndeadoffsets == 0 || deadoffsets);
3726 :
3727 : #ifdef USE_ASSERT_CHECKING
3728 : /* Confirm input deadoffsets[] is strictly sorted */
3729 : if (ndeadoffsets > 1)
3730 : {
3731 : for (int i = 1; i < ndeadoffsets; i++)
3732 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3733 : }
3734 : #endif
3735 :
3736 14671 : maxoff = PageGetMaxOffsetNumber(page);
3737 14671 : for (offnum = FirstOffsetNumber;
3738 1428854 : offnum <= maxoff && all_visible;
3739 1414183 : offnum = OffsetNumberNext(offnum))
3740 : {
3741 : ItemId itemid;
3742 : HeapTupleData tuple;
3743 : TransactionId dead_after;
3744 :
3745 : /*
3746 : * Set the offset number so that we can display it along with any
3747 : * error that occurred while processing this tuple.
3748 : */
3749 1414184 : *logging_offnum = offnum;
3750 1414184 : itemid = PageGetItemId(page, offnum);
3751 :
3752 : /* Unused or redirect line pointers are of no interest */
3753 1414184 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3754 945083 : continue;
3755 :
3756 1370723 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3757 :
3758 : /*
3759 : * Dead line pointers can have index pointers pointing to them. So
3760 : * they can't be treated as visible
3761 : */
3762 1370723 : if (ItemIdIsDead(itemid))
3763 : {
3764 901623 : if (!deadoffsets ||
3765 901622 : matched_dead_count >= ndeadoffsets ||
3766 901622 : deadoffsets[matched_dead_count] != offnum)
3767 : {
3768 1 : *all_frozen = all_visible = false;
3769 1 : break;
3770 : }
3771 901622 : matched_dead_count++;
3772 901622 : continue;
3773 : }
3774 :
3775 : Assert(ItemIdIsNormal(itemid));
3776 :
3777 469100 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3778 469100 : tuple.t_len = ItemIdGetLength(itemid);
3779 469100 : tuple.t_tableOid = RelationGetRelid(rel);
3780 :
3781 : /* Visibility checks may do IO or allocate memory */
3782 : Assert(CritSectionCount == 0);
3783 469100 : switch (HeapTupleSatisfiesVacuumHorizon(&tuple, buf, &dead_after))
3784 : {
3785 468990 : case HEAPTUPLE_LIVE:
3786 : {
3787 : TransactionId xmin;
3788 :
3789 : /* Check comments in lazy_scan_prune. */
3790 468990 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3791 : {
3792 0 : all_visible = false;
3793 0 : *all_frozen = false;
3794 0 : break;
3795 : }
3796 :
3797 : /*
3798 : * The inserter definitely committed. But is it old enough
3799 : * that everyone sees it as committed?
3800 : */
3801 468990 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3802 468990 : if (!TransactionIdPrecedes(xmin, OldestXmin))
3803 : {
3804 49 : all_visible = false;
3805 49 : *all_frozen = false;
3806 49 : break;
3807 : }
3808 :
3809 : /* Track newest xmin on page. */
3810 468941 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3811 : TransactionIdIsNormal(xmin))
3812 12512 : *visibility_cutoff_xid = xmin;
3813 :
3814 : /* Check whether this tuple is already frozen or not */
3815 611328 : if (all_visible && *all_frozen &&
3816 142387 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3817 3269 : *all_frozen = false;
3818 : }
3819 468941 : break;
3820 :
3821 110 : case HEAPTUPLE_DEAD:
3822 : case HEAPTUPLE_RECENTLY_DEAD:
3823 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3824 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3825 : {
3826 110 : all_visible = false;
3827 110 : *all_frozen = false;
3828 110 : break;
3829 : }
3830 0 : default:
3831 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3832 : break;
3833 : }
3834 : } /* scan along page */
3835 :
3836 : /* Clear the offset information once we have processed the given page. */
3837 14671 : *logging_offnum = InvalidOffsetNumber;
3838 :
3839 14671 : return all_visible;
3840 : }
3841 :
3842 : /*
3843 : * Update index statistics in pg_class if the statistics are accurate.
3844 : */
3845 : static void
3846 99146 : update_relstats_all_indexes(LVRelState *vacrel)
3847 : {
3848 99146 : Relation *indrels = vacrel->indrels;
3849 99146 : int nindexes = vacrel->nindexes;
3850 99146 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3851 :
3852 : Assert(vacrel->do_index_cleanup);
3853 :
3854 247648 : for (int idx = 0; idx < nindexes; idx++)
3855 : {
3856 148502 : Relation indrel = indrels[idx];
3857 148502 : IndexBulkDeleteResult *istat = indstats[idx];
3858 :
3859 148502 : if (istat == NULL || istat->estimated_count)
3860 147082 : continue;
3861 :
3862 : /* Update index statistics */
3863 1420 : vac_update_relstats(indrel,
3864 : istat->num_pages,
3865 : istat->num_index_tuples,
3866 : 0, 0,
3867 : false,
3868 : InvalidTransactionId,
3869 : InvalidMultiXactId,
3870 : NULL, NULL, false);
3871 : }
3872 99146 : }
3873 :
3874 : /*
3875 : * Error context callback for errors occurring during vacuum. The error
3876 : * context messages for index phases should match the messages set in parallel
3877 : * vacuum. If you change this function for those phases, change
3878 : * parallel_vacuum_error_callback() as well.
3879 : */
3880 : static void
3881 111054 : vacuum_error_callback(void *arg)
3882 : {
3883 111054 : LVRelState *errinfo = arg;
3884 :
3885 111054 : switch (errinfo->phase)
3886 : {
3887 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3888 0 : if (BlockNumberIsValid(errinfo->blkno))
3889 : {
3890 0 : if (OffsetNumberIsValid(errinfo->offnum))
3891 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3892 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3893 : else
3894 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3895 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3896 : }
3897 : else
3898 0 : errcontext("while scanning relation \"%s.%s\"",
3899 : errinfo->relnamespace, errinfo->relname);
3900 0 : break;
3901 :
3902 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3903 0 : if (BlockNumberIsValid(errinfo->blkno))
3904 : {
3905 0 : if (OffsetNumberIsValid(errinfo->offnum))
3906 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3907 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3908 : else
3909 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3910 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3911 : }
3912 : else
3913 0 : errcontext("while vacuuming relation \"%s.%s\"",
3914 : errinfo->relnamespace, errinfo->relname);
3915 0 : break;
3916 :
3917 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3918 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3919 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3920 0 : break;
3921 :
3922 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3923 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3924 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3925 0 : break;
3926 :
3927 3 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3928 3 : if (BlockNumberIsValid(errinfo->blkno))
3929 3 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3930 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3931 3 : break;
3932 :
3933 111051 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3934 : default:
3935 111051 : return; /* do nothing; the errinfo may not be
3936 : * initialized */
3937 : }
3938 : }
3939 :
3940 : /*
3941 : * Updates the information required for vacuum error callback. This also saves
3942 : * the current information which can be later restored via restore_vacuum_error_info.
3943 : */
3944 : static void
3945 673932 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3946 : int phase, BlockNumber blkno, OffsetNumber offnum)
3947 : {
3948 673932 : if (saved_vacrel)
3949 : {
3950 165060 : saved_vacrel->offnum = vacrel->offnum;
3951 165060 : saved_vacrel->blkno = vacrel->blkno;
3952 165060 : saved_vacrel->phase = vacrel->phase;
3953 : }
3954 :
3955 673932 : vacrel->blkno = blkno;
3956 673932 : vacrel->offnum = offnum;
3957 673932 : vacrel->phase = phase;
3958 673932 : }
3959 :
3960 : /*
3961 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3962 : */
3963 : static void
3964 165060 : restore_vacuum_error_info(LVRelState *vacrel,
3965 : const LVSavedErrInfo *saved_vacrel)
3966 : {
3967 165060 : vacrel->blkno = saved_vacrel->blkno;
3968 165060 : vacrel->offnum = saved_vacrel->offnum;
3969 165060 : vacrel->phase = saved_vacrel->phase;
3970 165060 : }
|