Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include "access/genam.h"
133 : #include "access/heapam.h"
134 : #include "access/htup_details.h"
135 : #include "access/multixact.h"
136 : #include "access/tidstore.h"
137 : #include "access/transam.h"
138 : #include "access/visibilitymap.h"
139 : #include "access/xloginsert.h"
140 : #include "catalog/storage.h"
141 : #include "commands/progress.h"
142 : #include "commands/vacuum.h"
143 : #include "common/int.h"
144 : #include "common/pg_prng.h"
145 : #include "executor/instrument.h"
146 : #include "miscadmin.h"
147 : #include "pgstat.h"
148 : #include "portability/instr_time.h"
149 : #include "postmaster/autovacuum.h"
150 : #include "storage/bufmgr.h"
151 : #include "storage/freespace.h"
152 : #include "storage/latch.h"
153 : #include "storage/lmgr.h"
154 : #include "storage/read_stream.h"
155 : #include "utils/injection_point.h"
156 : #include "utils/lsyscache.h"
157 : #include "utils/pg_rusage.h"
158 : #include "utils/timestamp.h"
159 : #include "utils/wait_event.h"
160 :
161 :
162 : /*
163 : * Space/time tradeoff parameters: do these need to be user-tunable?
164 : *
165 : * To consider truncating the relation, we want there to be at least
166 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
167 : * is less) potentially-freeable pages.
168 : */
169 : #define REL_TRUNCATE_MINIMUM 1000
170 : #define REL_TRUNCATE_FRACTION 16
171 :
172 : /*
173 : * Timing parameters for truncate locking heuristics.
174 : *
175 : * These were not exposed as user tunable GUC values because it didn't seem
176 : * that the potential for improvement was great enough to merit the cost of
177 : * supporting them.
178 : */
179 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
181 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
182 :
183 : /*
184 : * Threshold that controls whether we bypass index vacuuming and heap
185 : * vacuuming as an optimization
186 : */
187 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
188 :
189 : /*
190 : * Perform a failsafe check each time we scan another 4GB of pages.
191 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
192 : */
193 : #define FAILSAFE_EVERY_PAGES \
194 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
195 :
196 : /*
197 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
198 : * (it won't be exact because we only vacuum FSM after processing a heap page
199 : * that has some removable tuples). When there are indexes, this is ignored,
200 : * and we vacuum FSM after each index/heap cleaning pass.
201 : */
202 : #define VACUUM_FSM_EVERY_PAGES \
203 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
204 :
205 : /*
206 : * Before we consider skipping a page that's marked as clean in
207 : * visibility map, we must've seen at least this many clean pages.
208 : */
209 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
210 :
211 : /*
212 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
213 : * Needs to be a power of 2.
214 : */
215 : #define PREFETCH_SIZE ((BlockNumber) 32)
216 :
217 : /*
218 : * Macro to check if we are in a parallel vacuum. If true, we are in the
219 : * parallel mode and the DSM segment is initialized.
220 : */
221 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
222 :
223 : /* Phases of vacuum during which we report error context. */
224 : typedef enum
225 : {
226 : VACUUM_ERRCB_PHASE_UNKNOWN,
227 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
228 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
229 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
230 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
231 : VACUUM_ERRCB_PHASE_TRUNCATE,
232 : } VacErrPhase;
233 :
234 : /*
235 : * An eager scan of a page that is set all-frozen in the VM is considered
236 : * "successful". To spread out freezing overhead across multiple normal
237 : * vacuums, we limit the number of successful eager page freezes. The maximum
238 : * number of eager page freezes is calculated as a ratio of the all-visible
239 : * but not all-frozen pages at the beginning of the vacuum.
240 : */
241 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
242 :
243 : /*
244 : * On the assumption that different regions of the table tend to have
245 : * similarly aged data, once vacuum fails to freeze
246 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
247 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
248 : * to another region of the table with potentially older data.
249 : */
250 : #define EAGER_SCAN_REGION_SIZE 4096
251 :
252 : typedef struct LVRelState
253 : {
254 : /* Target heap relation and its indexes */
255 : Relation rel;
256 : Relation *indrels;
257 : int nindexes;
258 :
259 : /* Buffer access strategy and parallel vacuum state */
260 : BufferAccessStrategy bstrategy;
261 : ParallelVacuumState *pvs;
262 :
263 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
264 : bool aggressive;
265 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
266 : bool skipwithvm;
267 : /* Consider index vacuuming bypass optimization? */
268 : bool consider_bypass_optimization;
269 :
270 : /* Doing index vacuuming, index cleanup, rel truncation? */
271 : bool do_index_vacuuming;
272 : bool do_index_cleanup;
273 : bool do_rel_truncate;
274 :
275 : /* VACUUM operation's cutoffs for freezing and pruning */
276 : struct VacuumCutoffs cutoffs;
277 : GlobalVisState *vistest;
278 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
279 : TransactionId NewRelfrozenXid;
280 : MultiXactId NewRelminMxid;
281 : bool skippedallvis;
282 :
283 : /* Error reporting state */
284 : char *dbname;
285 : char *relnamespace;
286 : char *relname;
287 : char *indname; /* Current index name */
288 : BlockNumber blkno; /* used only for heap operations */
289 : OffsetNumber offnum; /* used only for heap operations */
290 : VacErrPhase phase;
291 : bool verbose; /* VACUUM VERBOSE? */
292 :
293 : /*
294 : * dead_items stores TIDs whose index tuples are deleted by index
295 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
296 : * that has been processed by lazy_scan_prune. Also needed by
297 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
298 : * LP_UNUSED during second heap pass.
299 : *
300 : * Both dead_items and dead_items_info are allocated in shared memory in
301 : * parallel vacuum cases.
302 : */
303 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
304 : VacDeadItemsInfo *dead_items_info;
305 :
306 : BlockNumber rel_pages; /* total number of pages */
307 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
308 :
309 : /*
310 : * Count of all-visible blocks eagerly scanned (for logging only). This
311 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
312 : */
313 : BlockNumber eager_scanned_pages;
314 :
315 : BlockNumber removed_pages; /* # pages removed by relation truncation */
316 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
317 :
318 : /* # pages newly set all-visible in the VM */
319 : BlockNumber new_all_visible_pages;
320 :
321 : /*
322 : * # pages newly set all-visible and all-frozen in the VM. This is a
323 : * subset of new_all_visible_pages. That is, new_all_visible_pages
324 : * includes all pages set all-visible, but
325 : * new_all_visible_all_frozen_pages includes only those which were also
326 : * set all-frozen.
327 : */
328 : BlockNumber new_all_visible_all_frozen_pages;
329 :
330 : /* # all-visible pages newly set all-frozen in the VM */
331 : BlockNumber new_all_frozen_pages;
332 :
333 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
334 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
335 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
336 :
337 : /* Statistics output by us, for table */
338 : double new_rel_tuples; /* new estimated total # of tuples */
339 : double new_live_tuples; /* new estimated total # of live tuples */
340 : /* Statistics output by index AMs */
341 : IndexBulkDeleteResult **indstats;
342 :
343 : /* Instrumentation counters */
344 : int num_index_scans;
345 : int num_dead_items_resets;
346 : Size total_dead_items_bytes;
347 :
348 : /*
349 : * Total number of planned and actually launched parallel workers for
350 : * index vacuuming and index cleanup.
351 : */
352 : PVWorkerUsage worker_usage;
353 :
354 : /* Counters that follow are only for scanned_pages */
355 : int64 tuples_deleted; /* # deleted from table */
356 : int64 tuples_frozen; /* # newly frozen */
357 : int64 lpdead_items; /* # deleted from indexes */
358 : int64 live_tuples; /* # live tuples remaining */
359 : int64 recently_dead_tuples; /* # dead, but not yet removable */
360 : int64 missed_dead_tuples; /* # removable, but not removed */
361 :
362 : /* State maintained by heap_vac_scan_next_block() */
363 : BlockNumber current_block; /* last block returned */
364 : BlockNumber next_unskippable_block; /* next unskippable block */
365 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
366 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
367 :
368 : /* State related to managing eager scanning of all-visible pages */
369 :
370 : /*
371 : * A normal vacuum that has failed to freeze too many eagerly scanned
372 : * blocks in a region suspends eager scanning.
373 : * next_eager_scan_region_start is the block number of the first block
374 : * eligible for resumed eager scanning.
375 : *
376 : * When eager scanning is permanently disabled, either initially
377 : * (including for aggressive vacuum) or due to hitting the success cap,
378 : * this is set to InvalidBlockNumber.
379 : */
380 : BlockNumber next_eager_scan_region_start;
381 :
382 : /*
383 : * The remaining number of blocks a normal vacuum will consider eager
384 : * scanning when it is successful. When eager scanning is enabled, this is
385 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
386 : * all-visible but not all-frozen pages. For each eager freeze success,
387 : * this is decremented. Once it hits 0, eager scanning is permanently
388 : * disabled. It is initialized to 0 if eager scanning starts out disabled
389 : * (including for aggressive vacuum).
390 : */
391 : BlockNumber eager_scan_remaining_successes;
392 :
393 : /*
394 : * The maximum number of blocks which may be eagerly scanned and not
395 : * frozen before eager scanning is temporarily suspended. This is
396 : * configurable both globally, via the
397 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
398 : * storage parameter of the same name. It is calculated as
399 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
400 : * It is 0 when eager scanning is disabled.
401 : */
402 : BlockNumber eager_scan_max_fails_per_region;
403 :
404 : /*
405 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
406 : * age) in the current eager scan region. Vacuum resets it to
407 : * eager_scan_max_fails_per_region each time it enters a new region of the
408 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
409 : * suspended until the next region. It is also 0 if eager scanning has
410 : * been permanently disabled.
411 : */
412 : BlockNumber eager_scan_remaining_fails;
413 : } LVRelState;
414 :
415 :
416 : /* Struct for saving and restoring vacuum error information. */
417 : typedef struct LVSavedErrInfo
418 : {
419 : BlockNumber blkno;
420 : OffsetNumber offnum;
421 : VacErrPhase phase;
422 : } LVSavedErrInfo;
423 :
424 :
425 : /* non-export function prototypes */
426 : static void lazy_scan_heap(LVRelState *vacrel);
427 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
428 : const VacuumParams *params);
429 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
430 : void *callback_private_data,
431 : void *per_buffer_data);
432 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
433 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
434 : BlockNumber blkno, Page page,
435 : bool sharelock, Buffer vmbuffer);
436 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
437 : BlockNumber blkno, Page page,
438 : Buffer vmbuffer,
439 : bool *has_lpdead_items, bool *vm_page_frozen);
440 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
441 : BlockNumber blkno, Page page,
442 : bool *has_lpdead_items);
443 : static void lazy_vacuum(LVRelState *vacrel);
444 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
445 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
446 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
447 : Buffer buffer, OffsetNumber *deadoffsets,
448 : int num_offsets, Buffer vmbuffer);
449 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
450 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
451 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
452 : IndexBulkDeleteResult *istat,
453 : double reltuples,
454 : LVRelState *vacrel);
455 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
456 : IndexBulkDeleteResult *istat,
457 : double reltuples,
458 : bool estimated_count,
459 : LVRelState *vacrel);
460 : static bool should_attempt_truncation(LVRelState *vacrel);
461 : static void lazy_truncate_heap(LVRelState *vacrel);
462 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
463 : bool *lock_waiter_detected);
464 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
465 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
466 : int num_offsets);
467 : static void dead_items_reset(LVRelState *vacrel);
468 : static void dead_items_cleanup(LVRelState *vacrel);
469 :
470 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
471 : GlobalVisState *vistest,
472 : bool allow_update_vistest,
473 : OffsetNumber *deadoffsets,
474 : int ndeadoffsets,
475 : bool *all_frozen,
476 : TransactionId *newest_live_xid,
477 : OffsetNumber *logging_offnum);
478 : static void update_relstats_all_indexes(LVRelState *vacrel);
479 : static void vacuum_error_callback(void *arg);
480 : static void update_vacuum_error_info(LVRelState *vacrel,
481 : LVSavedErrInfo *saved_vacrel,
482 : int phase, BlockNumber blkno,
483 : OffsetNumber offnum);
484 : static void restore_vacuum_error_info(LVRelState *vacrel,
485 : const LVSavedErrInfo *saved_vacrel);
486 :
487 :
488 :
489 : /*
490 : * Helper to set up the eager scanning state for vacuuming a single relation.
491 : * Initializes the eager scan management related members of the LVRelState.
492 : *
493 : * Caller provides whether or not an aggressive vacuum is required due to
494 : * vacuum options or for relfrozenxid/relminmxid advancement.
495 : */
496 : static void
497 115835 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams *params)
498 : {
499 : uint32 randseed;
500 : BlockNumber allvisible;
501 : BlockNumber allfrozen;
502 : float first_region_ratio;
503 115835 : bool oldest_unfrozen_before_cutoff = false;
504 :
505 : /*
506 : * Initialize eager scan management fields to their disabled values.
507 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
508 : * of tables without sufficiently old tuples disable eager scanning.
509 : */
510 115835 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
511 115835 : vacrel->eager_scan_max_fails_per_region = 0;
512 115835 : vacrel->eager_scan_remaining_fails = 0;
513 115835 : vacrel->eager_scan_remaining_successes = 0;
514 :
515 : /* If eager scanning is explicitly disabled, just return. */
516 115835 : if (params->max_eager_freeze_failure_rate == 0)
517 115835 : return;
518 :
519 : /*
520 : * The caller will have determined whether or not an aggressive vacuum is
521 : * required by either the vacuum parameters or the relative age of the
522 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
523 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
524 : * so scans of all-visible pages are not considered eager.
525 : */
526 115835 : if (vacrel->aggressive)
527 108931 : return;
528 :
529 : /*
530 : * Aggressively vacuuming a small relation shouldn't take long, so it
531 : * isn't worth amortizing. We use two times the region size as the size
532 : * cutoff because the eager scan start block is a random spot somewhere in
533 : * the first region, making the second region the first to be eager
534 : * scanned normally.
535 : */
536 6904 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
537 6904 : return;
538 :
539 : /*
540 : * We only want to enable eager scanning if we are likely to be able to
541 : * freeze some of the pages in the relation.
542 : *
543 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
544 : * are technically freezable, but we won't freeze them unless the criteria
545 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
546 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
547 : *
548 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
549 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
550 : * enable eager scanning.
551 : */
552 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
553 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
554 : vacrel->cutoffs.FreezeLimit))
555 0 : oldest_unfrozen_before_cutoff = true;
556 :
557 0 : if (!oldest_unfrozen_before_cutoff &&
558 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
559 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
560 : vacrel->cutoffs.MultiXactCutoff))
561 0 : oldest_unfrozen_before_cutoff = true;
562 :
563 0 : if (!oldest_unfrozen_before_cutoff)
564 0 : return;
565 :
566 : /* We have met the criteria to eagerly scan some pages. */
567 :
568 : /*
569 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
570 : * all-visible but not all-frozen blocks in the relation.
571 : */
572 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
573 :
574 0 : vacrel->eager_scan_remaining_successes =
575 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
576 0 : (allvisible - allfrozen));
577 :
578 : /* If every all-visible page is frozen, eager scanning is disabled. */
579 0 : if (vacrel->eager_scan_remaining_successes == 0)
580 0 : return;
581 :
582 : /*
583 : * Now calculate the bounds of the first eager scan region. Its end block
584 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
585 : * blocks. This affects the bounds of all subsequent regions and avoids
586 : * eager scanning and failing to freeze the same blocks each vacuum of the
587 : * relation.
588 : */
589 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
590 :
591 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
592 :
593 : Assert(params->max_eager_freeze_failure_rate > 0 &&
594 : params->max_eager_freeze_failure_rate <= 1);
595 :
596 0 : vacrel->eager_scan_max_fails_per_region =
597 0 : params->max_eager_freeze_failure_rate *
598 : EAGER_SCAN_REGION_SIZE;
599 :
600 : /*
601 : * The first region will be smaller than subsequent regions. As such,
602 : * adjust the eager freeze failures tolerated for this region.
603 : */
604 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
605 : EAGER_SCAN_REGION_SIZE;
606 :
607 0 : vacrel->eager_scan_remaining_fails =
608 0 : vacrel->eager_scan_max_fails_per_region *
609 : first_region_ratio;
610 : }
611 :
612 : /*
613 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
614 : *
615 : * This routine sets things up for and then calls lazy_scan_heap, where
616 : * almost all work actually takes place. Finalizes everything after call
617 : * returns by managing relation truncation and updating rel's pg_class
618 : * entry. (Also updates pg_class entries for any indexes that need it.)
619 : *
620 : * At entry, we have already established a transaction and opened
621 : * and locked the relation.
622 : */
623 : void
624 115835 : heap_vacuum_rel(Relation rel, const VacuumParams *params,
625 : BufferAccessStrategy bstrategy)
626 : {
627 : LVRelState *vacrel;
628 : bool verbose,
629 : instrument,
630 : skipwithvm,
631 : frozenxid_updated,
632 : minmulti_updated;
633 : BlockNumber orig_rel_pages,
634 : new_rel_pages,
635 : new_rel_allvisible,
636 : new_rel_allfrozen;
637 : PGRUsage ru0;
638 115835 : TimestampTz starttime = 0;
639 115835 : PgStat_Counter startreadtime = 0,
640 115835 : startwritetime = 0;
641 115835 : WalUsage startwalusage = pgWalUsage;
642 115835 : BufferUsage startbufferusage = pgBufferUsage;
643 : ErrorContextCallback errcallback;
644 115835 : char **indnames = NULL;
645 115835 : Size dead_items_max_bytes = 0;
646 :
647 115835 : verbose = (params->options & VACOPT_VERBOSE) != 0;
648 215040 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 99205 : params->log_vacuum_min_duration >= 0));
650 115835 : if (instrument)
651 : {
652 99216 : pg_rusage_init(&ru0);
653 99216 : if (track_io_timing)
654 : {
655 0 : startreadtime = pgStatBlockReadTime;
656 0 : startwritetime = pgStatBlockWriteTime;
657 : }
658 : }
659 :
660 : /* Used for instrumentation and stats report */
661 115835 : starttime = GetCurrentTimestamp();
662 :
663 115835 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
664 : RelationGetRelid(rel));
665 115835 : if (AmAutoVacuumWorkerProcess())
666 99205 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
667 99205 : params->is_wraparound
668 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
669 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
670 : else
671 16630 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
672 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
673 :
674 : /*
675 : * Setup error traceback support for ereport() first. The idea is to set
676 : * up an error context callback to display additional information on any
677 : * error during a vacuum. During different phases of vacuum, we update
678 : * the state so that the error context callback always display current
679 : * information.
680 : *
681 : * Copy the names of heap rel into local memory for error reporting
682 : * purposes, too. It isn't always safe to assume that we can get the name
683 : * of each rel. It's convenient for code in lazy_scan_heap to always use
684 : * these temp copies.
685 : */
686 115835 : vacrel = palloc0_object(LVRelState);
687 115835 : vacrel->dbname = get_database_name(MyDatabaseId);
688 115835 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 115835 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 115835 : vacrel->indname = NULL;
691 115835 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
692 115835 : vacrel->verbose = verbose;
693 115835 : errcallback.callback = vacuum_error_callback;
694 115835 : errcallback.arg = vacrel;
695 115835 : errcallback.previous = error_context_stack;
696 115835 : error_context_stack = &errcallback;
697 :
698 : /* Set up high level stuff about rel and its indexes */
699 115835 : vacrel->rel = rel;
700 115835 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
701 : &vacrel->indrels);
702 115835 : vacrel->bstrategy = bstrategy;
703 115835 : if (instrument && vacrel->nindexes > 0)
704 : {
705 : /* Copy index names used by instrumentation (not error reporting) */
706 95118 : indnames = palloc_array(char *, vacrel->nindexes);
707 246871 : for (int i = 0; i < vacrel->nindexes; i++)
708 151753 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
709 : }
710 :
711 : /*
712 : * The index_cleanup param either disables index vacuuming and cleanup or
713 : * forces it to go ahead when we would otherwise apply the index bypass
714 : * optimization. The default is 'auto', which leaves the final decision
715 : * up to lazy_vacuum().
716 : *
717 : * The truncate param allows user to avoid attempting relation truncation,
718 : * though it can't force truncation to happen.
719 : */
720 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
721 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
722 : params->truncate != VACOPTVALUE_AUTO);
723 :
724 : /*
725 : * While VacuumFailSafeActive is reset to false before calling this, we
726 : * still need to reset it here due to recursive calls.
727 : */
728 115835 : VacuumFailsafeActive = false;
729 115835 : vacrel->consider_bypass_optimization = true;
730 115835 : vacrel->do_index_vacuuming = true;
731 115835 : vacrel->do_index_cleanup = true;
732 115835 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
733 115835 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
734 : {
735 : /* Force disable index vacuuming up-front */
736 144 : vacrel->do_index_vacuuming = false;
737 144 : vacrel->do_index_cleanup = false;
738 : }
739 115691 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
740 : {
741 : /* Force index vacuuming. Note that failsafe can still bypass. */
742 19 : vacrel->consider_bypass_optimization = false;
743 : }
744 : else
745 : {
746 : /* Default/auto, make all decisions dynamically */
747 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
748 : }
749 :
750 : /* Initialize page counters explicitly (be tidy) */
751 115835 : vacrel->scanned_pages = 0;
752 115835 : vacrel->eager_scanned_pages = 0;
753 115835 : vacrel->removed_pages = 0;
754 115835 : vacrel->new_frozen_tuple_pages = 0;
755 115835 : vacrel->lpdead_item_pages = 0;
756 115835 : vacrel->missed_dead_pages = 0;
757 115835 : vacrel->nonempty_pages = 0;
758 : /* dead_items_alloc allocates vacrel->dead_items later on */
759 :
760 : /* Allocate/initialize output statistics state */
761 115835 : vacrel->new_rel_tuples = 0;
762 115835 : vacrel->new_live_tuples = 0;
763 115835 : vacrel->indstats = (IndexBulkDeleteResult **)
764 115835 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765 :
766 : /* Initialize remaining counters (be tidy) */
767 115835 : vacrel->num_index_scans = 0;
768 115835 : vacrel->num_dead_items_resets = 0;
769 115835 : vacrel->total_dead_items_bytes = 0;
770 115835 : vacrel->tuples_deleted = 0;
771 115835 : vacrel->tuples_frozen = 0;
772 115835 : vacrel->lpdead_items = 0;
773 115835 : vacrel->live_tuples = 0;
774 115835 : vacrel->recently_dead_tuples = 0;
775 115835 : vacrel->missed_dead_tuples = 0;
776 :
777 115835 : vacrel->new_all_visible_pages = 0;
778 115835 : vacrel->new_all_visible_all_frozen_pages = 0;
779 115835 : vacrel->new_all_frozen_pages = 0;
780 :
781 115835 : vacrel->worker_usage.vacuum.nlaunched = 0;
782 115835 : vacrel->worker_usage.vacuum.nplanned = 0;
783 115835 : vacrel->worker_usage.cleanup.nlaunched = 0;
784 115835 : vacrel->worker_usage.cleanup.nplanned = 0;
785 :
786 : /*
787 : * Get cutoffs that determine which deleted tuples are considered DEAD,
788 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
789 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
790 : * happen in this order to ensure that the OldestXmin cutoff field works
791 : * as an upper bound on the XIDs stored in the pages we'll actually scan
792 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
793 : *
794 : * Next acquire vistest, a related cutoff that's used in pruning. We use
795 : * vistest in combination with OldestXmin to ensure that
796 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
797 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
798 : * whether a tuple should be frozen or removed. (In the future we might
799 : * want to teach lazy_scan_prune to recompute vistest from time to time,
800 : * to increase the number of dead tuples it can prune away.)
801 : */
802 115835 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
803 115835 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
804 115835 : vacrel->vistest = GlobalVisTestFor(rel);
805 :
806 : /* Initialize state used to track oldest extant XID/MXID */
807 115835 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
808 115835 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
809 :
810 : /*
811 : * Initialize state related to tracking all-visible page skipping. This is
812 : * very important to determine whether or not it is safe to advance the
813 : * relfrozenxid/relminmxid.
814 : */
815 115835 : vacrel->skippedallvis = false;
816 115835 : skipwithvm = true;
817 115835 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
818 : {
819 : /*
820 : * Force aggressive mode, and disable skipping blocks using the
821 : * visibility map (even those set all-frozen)
822 : */
823 194 : vacrel->aggressive = true;
824 194 : skipwithvm = false;
825 : }
826 :
827 115835 : vacrel->skipwithvm = skipwithvm;
828 :
829 : /*
830 : * Set up eager scan tracking state. This must happen after determining
831 : * whether or not the vacuum must be aggressive, because only normal
832 : * vacuums use the eager scan algorithm.
833 : */
834 115835 : heap_vacuum_eager_scan_setup(vacrel, params);
835 :
836 : /* Report the vacuum mode: 'normal' or 'aggressive' */
837 115835 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
838 115835 : vacrel->aggressive
839 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
840 : : PROGRESS_VACUUM_MODE_NORMAL);
841 :
842 115835 : if (verbose)
843 : {
844 13 : if (vacrel->aggressive)
845 1 : ereport(INFO,
846 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
847 : vacrel->dbname, vacrel->relnamespace,
848 : vacrel->relname)));
849 : else
850 12 : ereport(INFO,
851 : (errmsg("vacuuming \"%s.%s.%s\"",
852 : vacrel->dbname, vacrel->relnamespace,
853 : vacrel->relname)));
854 : }
855 :
856 : /*
857 : * Allocate dead_items memory using dead_items_alloc. This handles
858 : * parallel VACUUM initialization as part of allocating shared memory
859 : * space used for dead_items. (But do a failsafe precheck first, to
860 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
861 : * is already dangerously old.)
862 : */
863 115835 : lazy_check_wraparound_failsafe(vacrel);
864 115835 : dead_items_alloc(vacrel, params->nworkers);
865 :
866 : #ifdef USE_INJECTION_POINTS
867 :
868 : /*
869 : * Used by tests to pause before parallel vacuum is launched, allowing
870 : * test code to modify configuration that the leader then propagates to
871 : * workers.
872 : */
873 115835 : if (AmAutoVacuumWorkerProcess() && ParallelVacuumIsActive(vacrel))
874 3 : INJECTION_POINT("autovacuum-start-parallel-vacuum", NULL);
875 : #endif
876 :
877 : /*
878 : * Call lazy_scan_heap to perform all required heap pruning, index
879 : * vacuuming, and heap vacuuming (plus related processing)
880 : */
881 115835 : lazy_scan_heap(vacrel);
882 :
883 : /*
884 : * Save dead items max_bytes and update the memory usage statistics before
885 : * cleanup, they are freed in parallel vacuum cases during
886 : * dead_items_cleanup().
887 : */
888 115835 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
889 115835 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
890 :
891 : /*
892 : * Free resources managed by dead_items_alloc. This ends parallel mode in
893 : * passing when necessary.
894 : */
895 115835 : dead_items_cleanup(vacrel);
896 : Assert(!IsInParallelMode());
897 :
898 : /*
899 : * Update pg_class entries for each of rel's indexes where appropriate.
900 : *
901 : * Unlike the later update to rel's pg_class entry, this is not critical.
902 : * Maintains relpages/reltuples statistics used by the planner only.
903 : */
904 115835 : if (vacrel->do_index_cleanup)
905 91420 : update_relstats_all_indexes(vacrel);
906 :
907 : /* Done with rel's indexes */
908 115835 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
909 :
910 : /* Optionally truncate rel */
911 115835 : if (should_attempt_truncation(vacrel))
912 192 : lazy_truncate_heap(vacrel);
913 :
914 : /* Pop the error context stack */
915 115835 : error_context_stack = errcallback.previous;
916 :
917 : /* Report that we are now doing final cleanup */
918 115835 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
919 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
920 :
921 : /*
922 : * Prepare to update rel's pg_class entry.
923 : *
924 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
925 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
926 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
927 : */
928 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
929 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
930 : vacrel->cutoffs.relfrozenxid,
931 : vacrel->NewRelfrozenXid));
932 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
933 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
934 : vacrel->cutoffs.relminmxid,
935 : vacrel->NewRelminMxid));
936 115835 : if (vacrel->skippedallvis)
937 : {
938 : /*
939 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
940 : * chose to skip an all-visible page range. The state that tracks new
941 : * values will have missed unfrozen XIDs from the pages we skipped.
942 : */
943 : Assert(!vacrel->aggressive);
944 65 : vacrel->NewRelfrozenXid = InvalidTransactionId;
945 65 : vacrel->NewRelminMxid = InvalidMultiXactId;
946 : }
947 :
948 : /*
949 : * For safety, clamp relallvisible to be not more than what we're setting
950 : * pg_class.relpages to
951 : */
952 115835 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
953 115835 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
954 115835 : if (new_rel_allvisible > new_rel_pages)
955 0 : new_rel_allvisible = new_rel_pages;
956 :
957 : /*
958 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
959 : * all-frozen blocks to the count of all-visible blocks. This matches the
960 : * clamping of relallvisible above.
961 : */
962 115835 : if (new_rel_allfrozen > new_rel_allvisible)
963 0 : new_rel_allfrozen = new_rel_allvisible;
964 :
965 : /*
966 : * Now actually update rel's pg_class entry.
967 : *
968 : * In principle new_live_tuples could be -1 indicating that we (still)
969 : * don't know the tuple count. In practice that can't happen, since we
970 : * scan every page that isn't skipped using the visibility map.
971 : */
972 115835 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
973 : new_rel_allvisible, new_rel_allfrozen,
974 115835 : vacrel->nindexes > 0,
975 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
976 : &frozenxid_updated, &minmulti_updated, false);
977 :
978 : /*
979 : * Report results to the cumulative stats system, too.
980 : *
981 : * Deliberately avoid telling the stats system about LP_DEAD items that
982 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
983 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
984 : * It seems like a good idea to err on the side of not vacuuming again too
985 : * soon in cases where the failsafe prevented significant amounts of heap
986 : * vacuuming.
987 : */
988 115835 : pgstat_report_vacuum(rel,
989 43664 : Max(vacrel->new_live_tuples, 0),
990 115835 : vacrel->recently_dead_tuples +
991 115835 : vacrel->missed_dead_tuples,
992 : starttime);
993 115835 : pgstat_progress_end_command();
994 :
995 115835 : if (instrument)
996 : {
997 99216 : TimestampTz endtime = GetCurrentTimestamp();
998 :
999 99362 : if (verbose || params->log_vacuum_min_duration == 0 ||
1000 146 : TimestampDifferenceExceeds(starttime, endtime,
1001 146 : params->log_vacuum_min_duration))
1002 : {
1003 : long secs_dur;
1004 : int usecs_dur;
1005 : WalUsage walusage;
1006 : BufferUsage bufferusage;
1007 : StringInfoData buf;
1008 : char *msgfmt;
1009 : int32 diff;
1010 99070 : double read_rate = 0,
1011 99070 : write_rate = 0;
1012 : int64 total_blks_hit;
1013 : int64 total_blks_read;
1014 : int64 total_blks_dirtied;
1015 :
1016 99070 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1017 99070 : memset(&walusage, 0, sizeof(WalUsage));
1018 99070 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1019 99070 : memset(&bufferusage, 0, sizeof(BufferUsage));
1020 99070 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1021 :
1022 99070 : total_blks_hit = bufferusage.shared_blks_hit +
1023 99070 : bufferusage.local_blks_hit;
1024 99070 : total_blks_read = bufferusage.shared_blks_read +
1025 99070 : bufferusage.local_blks_read;
1026 99070 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1027 99070 : bufferusage.local_blks_dirtied;
1028 :
1029 99070 : initStringInfo(&buf);
1030 99070 : if (verbose)
1031 : {
1032 : /*
1033 : * Aggressiveness already reported earlier, in dedicated
1034 : * VACUUM VERBOSE ereport
1035 : */
1036 : Assert(!params->is_wraparound);
1037 13 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1038 : }
1039 99057 : else if (params->is_wraparound)
1040 : {
1041 : /*
1042 : * While it's possible for a VACUUM to be both is_wraparound
1043 : * and !aggressive, that's just a corner-case -- is_wraparound
1044 : * implies aggressive. Produce distinct output for the corner
1045 : * case all the same, just in case.
1046 : */
1047 99035 : if (vacrel->aggressive)
1048 99035 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1049 : else
1050 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1051 : }
1052 : else
1053 : {
1054 22 : if (vacrel->aggressive)
1055 17 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1056 : else
1057 5 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1058 : }
1059 99070 : appendStringInfo(&buf, msgfmt,
1060 : vacrel->dbname,
1061 : vacrel->relnamespace,
1062 : vacrel->relname,
1063 : vacrel->num_index_scans);
1064 136118 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1065 : vacrel->removed_pages,
1066 : new_rel_pages,
1067 : vacrel->scanned_pages,
1068 : orig_rel_pages == 0 ? 100.0 :
1069 37048 : 100.0 * vacrel->scanned_pages /
1070 : orig_rel_pages,
1071 : vacrel->eager_scanned_pages);
1072 99070 : appendStringInfo(&buf,
1073 99070 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1074 : vacrel->tuples_deleted,
1075 99070 : (int64) vacrel->new_rel_tuples,
1076 : vacrel->recently_dead_tuples);
1077 99070 : if (vacrel->missed_dead_tuples > 0)
1078 0 : appendStringInfo(&buf,
1079 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1080 : vacrel->missed_dead_tuples,
1081 : vacrel->missed_dead_pages);
1082 99070 : diff = (int32) (ReadNextTransactionId() -
1083 99070 : vacrel->cutoffs.OldestXmin);
1084 99070 : appendStringInfo(&buf,
1085 99070 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1086 : vacrel->cutoffs.OldestXmin, diff);
1087 99070 : if (frozenxid_updated)
1088 : {
1089 19007 : diff = (int32) (vacrel->NewRelfrozenXid -
1090 19007 : vacrel->cutoffs.relfrozenxid);
1091 19007 : appendStringInfo(&buf,
1092 19007 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1093 : vacrel->NewRelfrozenXid, diff);
1094 : }
1095 99070 : if (minmulti_updated)
1096 : {
1097 11 : diff = (int32) (vacrel->NewRelminMxid -
1098 11 : vacrel->cutoffs.relminmxid);
1099 11 : appendStringInfo(&buf,
1100 11 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1101 : vacrel->NewRelminMxid, diff);
1102 : }
1103 136118 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1104 : vacrel->new_frozen_tuple_pages,
1105 : orig_rel_pages == 0 ? 100.0 :
1106 37048 : 100.0 * vacrel->new_frozen_tuple_pages /
1107 : orig_rel_pages,
1108 : vacrel->tuples_frozen);
1109 :
1110 99070 : appendStringInfo(&buf,
1111 99070 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1112 : vacrel->new_all_visible_pages,
1113 99070 : vacrel->new_all_visible_all_frozen_pages +
1114 99070 : vacrel->new_all_frozen_pages,
1115 : vacrel->new_all_frozen_pages);
1116 99070 : if (vacrel->do_index_vacuuming)
1117 : {
1118 74915 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1119 74899 : appendStringInfoString(&buf, _("index scan not needed: "));
1120 : else
1121 16 : appendStringInfoString(&buf, _("index scan needed: "));
1122 :
1123 74915 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1124 : }
1125 : else
1126 : {
1127 24155 : if (!VacuumFailsafeActive)
1128 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1129 : else
1130 24155 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1131 :
1132 24155 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1133 : }
1134 136118 : appendStringInfo(&buf, msgfmt,
1135 : vacrel->lpdead_item_pages,
1136 : orig_rel_pages == 0 ? 100.0 :
1137 37048 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1138 : vacrel->lpdead_items);
1139 :
1140 99070 : if (vacrel->worker_usage.vacuum.nplanned > 0)
1141 1 : appendStringInfo(&buf,
1142 1 : _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1143 : vacrel->worker_usage.vacuum.nplanned,
1144 : vacrel->worker_usage.vacuum.nlaunched);
1145 :
1146 99070 : if (vacrel->worker_usage.cleanup.nplanned > 0)
1147 0 : appendStringInfo(&buf,
1148 0 : _("parallel workers: index cleanup: %d planned, %d launched\n"),
1149 : vacrel->worker_usage.cleanup.nplanned,
1150 : vacrel->worker_usage.cleanup.nlaunched);
1151 :
1152 250567 : for (int i = 0; i < vacrel->nindexes; i++)
1153 : {
1154 151497 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1155 :
1156 151497 : if (!istat)
1157 151468 : continue;
1158 :
1159 29 : appendStringInfo(&buf,
1160 29 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1161 29 : indnames[i],
1162 : istat->num_pages,
1163 : istat->pages_newly_deleted,
1164 : istat->pages_deleted,
1165 : istat->pages_free);
1166 : }
1167 99070 : if (track_cost_delay_timing)
1168 : {
1169 : /*
1170 : * We bypass the changecount mechanism because this value is
1171 : * only updated by the calling process. We also rely on the
1172 : * above call to pgstat_progress_end_command() to not clear
1173 : * the st_progress_param array.
1174 : */
1175 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1176 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1177 : }
1178 99070 : if (track_io_timing)
1179 : {
1180 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1181 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1182 :
1183 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1184 : read_ms, write_ms);
1185 : }
1186 99070 : if (secs_dur > 0 || usecs_dur > 0)
1187 : {
1188 99070 : read_rate = (double) BLCKSZ * total_blks_read /
1189 99070 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1190 99070 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1191 99070 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1192 : }
1193 99070 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1194 : read_rate, write_rate);
1195 99070 : appendStringInfo(&buf,
1196 99070 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1197 : total_blks_hit,
1198 : total_blks_read,
1199 : total_blks_dirtied);
1200 99070 : appendStringInfo(&buf,
1201 99070 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1202 : walusage.wal_records,
1203 : walusage.wal_fpi,
1204 : walusage.wal_bytes,
1205 : walusage.wal_fpi_bytes,
1206 : walusage.wal_buffers_full);
1207 :
1208 : /*
1209 : * Report the dead items memory usage.
1210 : *
1211 : * The num_dead_items_resets counter increases when we reset the
1212 : * collected dead items, so the counter is non-zero if at least
1213 : * one dead items are collected, even if index vacuuming is
1214 : * disabled.
1215 : */
1216 99070 : appendStringInfo(&buf,
1217 99070 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1218 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1219 99070 : vacrel->num_dead_items_resets),
1220 99070 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1221 : vacrel->num_dead_items_resets,
1222 99070 : (double) dead_items_max_bytes / (1024 * 1024));
1223 99070 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1224 :
1225 99070 : ereport(verbose ? INFO : LOG,
1226 : (errmsg_internal("%s", buf.data)));
1227 99070 : pfree(buf.data);
1228 : }
1229 : }
1230 :
1231 : /* Cleanup index statistics and index names */
1232 291537 : for (int i = 0; i < vacrel->nindexes; i++)
1233 : {
1234 175702 : if (vacrel->indstats[i])
1235 1734 : pfree(vacrel->indstats[i]);
1236 :
1237 175702 : if (instrument)
1238 151753 : pfree(indnames[i]);
1239 : }
1240 115835 : }
1241 :
1242 : /*
1243 : * lazy_scan_heap() -- workhorse function for VACUUM
1244 : *
1245 : * This routine prunes each page in the heap, and considers the need to
1246 : * freeze remaining tuples with storage (not including pages that can be
1247 : * skipped using the visibility map). Also performs related maintenance
1248 : * of the FSM and visibility map. These steps all take place during an
1249 : * initial pass over the target heap relation.
1250 : *
1251 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1252 : * consists of deleting index tuples that point to LP_DEAD items left in
1253 : * heap pages following pruning. Earlier initial pass over the heap will
1254 : * have collected the TIDs whose index tuples need to be removed.
1255 : *
1256 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1257 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1258 : * as LP_UNUSED. This has to happen in a second, final pass over the
1259 : * heap, to preserve a basic invariant that all index AMs rely on: no
1260 : * extant index tuple can ever be allowed to contain a TID that points to
1261 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1262 : * recycling of line pointers to avoid index scans that get confused
1263 : * about which TID points to which tuple immediately after recycling.
1264 : * (Actually, this isn't a concern when target heap relation happens to
1265 : * have no indexes, which allows us to safely apply the one-pass strategy
1266 : * as an optimization).
1267 : *
1268 : * In practice we often have enough space to fit all TIDs, and so won't
1269 : * need to call lazy_vacuum more than once, after our initial pass over
1270 : * the heap has totally finished. Otherwise things are slightly more
1271 : * complicated: our "initial pass" over the heap applies only to those
1272 : * pages that were pruned before we needed to call lazy_vacuum, and our
1273 : * "final pass" over the heap only vacuums these same heap pages.
1274 : * However, we process indexes in full every time lazy_vacuum is called,
1275 : * which makes index processing very inefficient when memory is in short
1276 : * supply.
1277 : */
1278 : static void
1279 115835 : lazy_scan_heap(LVRelState *vacrel)
1280 : {
1281 : ReadStream *stream;
1282 115835 : BlockNumber rel_pages = vacrel->rel_pages,
1283 115835 : blkno = 0,
1284 115835 : next_fsm_block_to_vacuum = 0;
1285 115835 : BlockNumber orig_eager_scan_success_limit =
1286 : vacrel->eager_scan_remaining_successes; /* for logging */
1287 115835 : Buffer vmbuffer = InvalidBuffer;
1288 115835 : const int initprog_index[] = {
1289 : PROGRESS_VACUUM_PHASE,
1290 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1291 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1292 : };
1293 : int64 initprog_val[3];
1294 :
1295 : /* Report that we're scanning the heap, advertising total # of blocks */
1296 115835 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1297 115835 : initprog_val[1] = rel_pages;
1298 115835 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1299 115835 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1300 :
1301 : /* Initialize for the first heap_vac_scan_next_block() call */
1302 115835 : vacrel->current_block = InvalidBlockNumber;
1303 115835 : vacrel->next_unskippable_block = InvalidBlockNumber;
1304 115835 : vacrel->next_unskippable_eager_scanned = false;
1305 115835 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1306 :
1307 : /*
1308 : * Set up the read stream for vacuum's first pass through the heap.
1309 : *
1310 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1311 : * explicit work in heap_vac_scan_next_block.
1312 : */
1313 115835 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1314 : vacrel->bstrategy,
1315 : vacrel->rel,
1316 : MAIN_FORKNUM,
1317 : heap_vac_scan_next_block,
1318 : vacrel,
1319 : sizeof(bool));
1320 :
1321 : while (true)
1322 407850 : {
1323 : Buffer buf;
1324 : Page page;
1325 523685 : bool was_eager_scanned = false;
1326 523685 : int ndeleted = 0;
1327 : bool has_lpdead_items;
1328 523685 : void *per_buffer_data = NULL;
1329 523685 : bool vm_page_frozen = false;
1330 523685 : bool got_cleanup_lock = false;
1331 :
1332 523685 : vacuum_delay_point(false);
1333 :
1334 : /*
1335 : * Regularly check if wraparound failsafe should trigger.
1336 : *
1337 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1338 : * relfrozenxid might start to look dangerously old before we reach
1339 : * that point. This check also provides failsafe coverage for the
1340 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1341 : * param set to 'off'.
1342 : */
1343 523685 : if (vacrel->scanned_pages > 0 &&
1344 407850 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1345 0 : lazy_check_wraparound_failsafe(vacrel);
1346 :
1347 : /*
1348 : * Consider if we definitely have enough space to process TIDs on page
1349 : * already. If we are close to overrunning the available space for
1350 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1351 : * this page. However, let's force at least one page-worth of tuples
1352 : * to be stored as to ensure we do at least some work when the memory
1353 : * configured is so low that we run out before storing anything.
1354 : */
1355 523685 : if (vacrel->dead_items_info->num_items > 0 &&
1356 31318 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1357 : {
1358 : /*
1359 : * Before beginning index vacuuming, we release any pin we may
1360 : * hold on the visibility map page. This isn't necessary for
1361 : * correctness, but we do it anyway to avoid holding the pin
1362 : * across a lengthy, unrelated operation.
1363 : */
1364 17 : if (BufferIsValid(vmbuffer))
1365 : {
1366 17 : ReleaseBuffer(vmbuffer);
1367 17 : vmbuffer = InvalidBuffer;
1368 : }
1369 :
1370 : /* Perform a round of index and heap vacuuming */
1371 17 : vacrel->consider_bypass_optimization = false;
1372 17 : lazy_vacuum(vacrel);
1373 :
1374 : /*
1375 : * Vacuum the Free Space Map to make newly-freed space visible on
1376 : * upper-level FSM pages. Note that blkno is the previously
1377 : * processed block.
1378 : */
1379 17 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1380 : blkno + 1);
1381 17 : next_fsm_block_to_vacuum = blkno;
1382 :
1383 : /* Report that we are once again scanning the heap */
1384 17 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1385 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1386 : }
1387 :
1388 523685 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1389 :
1390 : /* The relation is exhausted. */
1391 523685 : if (!BufferIsValid(buf))
1392 115835 : break;
1393 :
1394 407850 : was_eager_scanned = *((bool *) per_buffer_data);
1395 407850 : CheckBufferIsPinnedOnce(buf);
1396 407850 : page = BufferGetPage(buf);
1397 407850 : blkno = BufferGetBlockNumber(buf);
1398 :
1399 407850 : vacrel->scanned_pages++;
1400 407850 : if (was_eager_scanned)
1401 0 : vacrel->eager_scanned_pages++;
1402 :
1403 : /* Report as block scanned, update error traceback information */
1404 407850 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1405 407850 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1406 : blkno, InvalidOffsetNumber);
1407 :
1408 : /*
1409 : * Pin the visibility map page in case we need to mark the page
1410 : * all-visible. In most cases this will be very cheap, because we'll
1411 : * already have the correct page pinned anyway.
1412 : */
1413 407850 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1414 :
1415 : /*
1416 : * We need a buffer cleanup lock to prune HOT chains and defragment
1417 : * the page in lazy_scan_prune. But when it's not possible to acquire
1418 : * a cleanup lock right away, we may be able to settle for reduced
1419 : * processing using lazy_scan_noprune.
1420 : */
1421 407850 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1422 :
1423 407850 : if (!got_cleanup_lock)
1424 109 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1425 :
1426 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1427 407850 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1428 407850 : vmbuffer))
1429 : {
1430 : /* Processed as new/empty page (lock and pin released) */
1431 1252 : continue;
1432 : }
1433 :
1434 : /*
1435 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1436 : * items in the dead_items area for later vacuuming, count live and
1437 : * recently dead tuples for vacuum logging, and determine if this
1438 : * block could later be truncated. If we encounter any xid/mxids that
1439 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1440 : * for a cleanup lock and call lazy_scan_prune().
1441 : */
1442 406598 : if (!got_cleanup_lock &&
1443 109 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1444 : {
1445 : /*
1446 : * lazy_scan_noprune could not do all required processing. Wait
1447 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1448 : */
1449 : Assert(vacrel->aggressive);
1450 54 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1451 54 : LockBufferForCleanup(buf);
1452 54 : got_cleanup_lock = true;
1453 : }
1454 :
1455 : /*
1456 : * If we have a cleanup lock, we must now prune, freeze, and count
1457 : * tuples. We may have acquired the cleanup lock originally, or we may
1458 : * have gone back and acquired it after lazy_scan_noprune() returned
1459 : * false. Either way, the page hasn't been processed yet.
1460 : *
1461 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1462 : * recently_dead_tuples and live tuples for vacuum logging, determine
1463 : * if the block can later be truncated, and accumulate the details of
1464 : * remaining LP_DEAD line pointers on the page into dead_items. These
1465 : * dead items include those pruned by lazy_scan_prune() as well as
1466 : * line pointers previously marked LP_DEAD.
1467 : */
1468 406598 : if (got_cleanup_lock)
1469 406543 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1470 : vmbuffer,
1471 : &has_lpdead_items, &vm_page_frozen);
1472 :
1473 : /*
1474 : * Count an eagerly scanned page as a failure or a success.
1475 : *
1476 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1477 : * cleanup lock, we won't have frozen the page. However, we only count
1478 : * pages that were too new to require freezing as eager freeze
1479 : * failures.
1480 : *
1481 : * We could gather more information from lazy_scan_noprune() about
1482 : * whether or not there were tuples with XIDs or MXIDs older than the
1483 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1484 : * exclude pages skipped due to cleanup lock contention from eager
1485 : * freeze algorithm caps.
1486 : */
1487 406598 : if (got_cleanup_lock && was_eager_scanned)
1488 : {
1489 : /* Aggressive vacuums do not eager scan. */
1490 : Assert(!vacrel->aggressive);
1491 :
1492 0 : if (vm_page_frozen)
1493 : {
1494 0 : if (vacrel->eager_scan_remaining_successes > 0)
1495 0 : vacrel->eager_scan_remaining_successes--;
1496 :
1497 0 : if (vacrel->eager_scan_remaining_successes == 0)
1498 : {
1499 : /*
1500 : * Report only once that we disabled eager scanning. We
1501 : * may eagerly read ahead blocks in excess of the success
1502 : * or failure caps before attempting to freeze them, so we
1503 : * could reach here even after disabling additional eager
1504 : * scanning.
1505 : */
1506 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1507 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1508 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1509 : orig_eager_scan_success_limit,
1510 : vacrel->dbname, vacrel->relnamespace,
1511 : vacrel->relname)));
1512 :
1513 : /*
1514 : * If we hit our success cap, permanently disable eager
1515 : * scanning by setting the other eager scan management
1516 : * fields to their disabled values.
1517 : */
1518 0 : vacrel->eager_scan_remaining_fails = 0;
1519 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1520 0 : vacrel->eager_scan_max_fails_per_region = 0;
1521 : }
1522 : }
1523 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1524 0 : vacrel->eager_scan_remaining_fails--;
1525 : }
1526 :
1527 : /*
1528 : * Now drop the buffer lock and, potentially, update the FSM.
1529 : *
1530 : * Our goal is to update the freespace map the last time we touch the
1531 : * page. If we'll process a block in the second pass, we may free up
1532 : * additional space on the page, so it is better to update the FSM
1533 : * after the second pass. If the relation has no indexes, or if index
1534 : * vacuuming is disabled, there will be no second heap pass; if this
1535 : * particular page has no dead items, the second heap pass will not
1536 : * touch this page. So, in those cases, update the FSM now.
1537 : *
1538 : * Note: In corner cases, it's possible to miss updating the FSM
1539 : * entirely. If index vacuuming is currently enabled, we'll skip the
1540 : * FSM update now. But if failsafe mode is later activated, or there
1541 : * are so few dead tuples that index vacuuming is bypassed, there will
1542 : * also be no opportunity to update the FSM later, because we'll never
1543 : * revisit this page. Since updating the FSM is desirable but not
1544 : * absolutely required, that's OK.
1545 : */
1546 406598 : if (vacrel->nindexes == 0
1547 387867 : || !vacrel->do_index_vacuuming
1548 306042 : || !has_lpdead_items)
1549 389232 : {
1550 389232 : Size freespace = PageGetHeapFreeSpace(page);
1551 :
1552 389232 : UnlockReleaseBuffer(buf);
1553 389232 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1554 :
1555 : /*
1556 : * Periodically perform FSM vacuuming to make newly-freed space
1557 : * visible on upper FSM pages. This is done after vacuuming if the
1558 : * table has indexes. There will only be newly-freed space if we
1559 : * held the cleanup lock and lazy_scan_prune() was called.
1560 : */
1561 389232 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1562 491 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1563 : {
1564 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1565 : blkno);
1566 0 : next_fsm_block_to_vacuum = blkno;
1567 : }
1568 : }
1569 : else
1570 17366 : UnlockReleaseBuffer(buf);
1571 : }
1572 :
1573 115835 : vacrel->blkno = InvalidBlockNumber;
1574 115835 : if (BufferIsValid(vmbuffer))
1575 43767 : ReleaseBuffer(vmbuffer);
1576 :
1577 : /*
1578 : * Report that everything is now scanned. We never skip scanning the last
1579 : * block in the relation, so we can pass rel_pages here.
1580 : */
1581 115835 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1582 : rel_pages);
1583 :
1584 : /* now we can compute the new value for pg_class.reltuples */
1585 231670 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1586 : vacrel->scanned_pages,
1587 115835 : vacrel->live_tuples);
1588 :
1589 : /*
1590 : * Also compute the total number of surviving heap entries. In the
1591 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1592 : */
1593 115835 : vacrel->new_rel_tuples =
1594 115835 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1595 115835 : vacrel->missed_dead_tuples;
1596 :
1597 115835 : read_stream_end(stream);
1598 :
1599 : /*
1600 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1601 : * related heap vacuuming
1602 : */
1603 115835 : if (vacrel->dead_items_info->num_items > 0)
1604 811 : lazy_vacuum(vacrel);
1605 :
1606 : /*
1607 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1608 : * not there were indexes, and whether or not we bypassed index vacuuming.
1609 : * We can pass rel_pages here because we never skip scanning the last
1610 : * block of the relation.
1611 : */
1612 115835 : if (rel_pages > next_fsm_block_to_vacuum)
1613 43770 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1614 :
1615 : /* report all blocks vacuumed */
1616 115835 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1617 :
1618 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1619 115835 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1620 86966 : lazy_cleanup_all_indexes(vacrel);
1621 115835 : }
1622 :
1623 : /*
1624 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1625 : * for vacuum to process
1626 : *
1627 : * Every time lazy_scan_heap() needs a new block to process during its first
1628 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1629 : * heap_vac_scan_next_block() to get the next block.
1630 : *
1631 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1632 : * various thresholds to skip blocks which do not need to be processed and
1633 : * returns the next block to process or InvalidBlockNumber if there are no
1634 : * remaining blocks.
1635 : *
1636 : * The visibility status of the next block to process and whether or not it
1637 : * was eager scanned is set in the per_buffer_data.
1638 : *
1639 : * callback_private_data contains a reference to the LVRelState, passed to the
1640 : * read stream API during stream setup. The LVRelState is an in/out parameter
1641 : * here (locally named `vacrel`). Vacuum options and information about the
1642 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1643 : * that's all-visible but not all-frozen (to ensure that we don't update
1644 : * relfrozenxid in that case). vacrel also holds information about the next
1645 : * unskippable block -- as bookkeeping for this function.
1646 : */
1647 : static BlockNumber
1648 523685 : heap_vac_scan_next_block(ReadStream *stream,
1649 : void *callback_private_data,
1650 : void *per_buffer_data)
1651 : {
1652 : BlockNumber next_block;
1653 523685 : LVRelState *vacrel = callback_private_data;
1654 :
1655 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1656 523685 : next_block = vacrel->current_block + 1;
1657 :
1658 : /* Have we reached the end of the relation? */
1659 523685 : if (next_block >= vacrel->rel_pages)
1660 : {
1661 115835 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1662 : {
1663 42054 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1664 42054 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1665 : }
1666 115835 : return InvalidBlockNumber;
1667 : }
1668 :
1669 : /*
1670 : * We must be in one of the three following states:
1671 : */
1672 407850 : if (next_block > vacrel->next_unskippable_block ||
1673 178897 : vacrel->next_unskippable_block == InvalidBlockNumber)
1674 : {
1675 : /*
1676 : * 1. We have just processed an unskippable block (or we're at the
1677 : * beginning of the scan). Find the next unskippable block using the
1678 : * visibility map.
1679 : */
1680 : bool skipsallvis;
1681 :
1682 272723 : find_next_unskippable_block(vacrel, &skipsallvis);
1683 :
1684 : /*
1685 : * We now know the next block that we must process. It can be the
1686 : * next block after the one we just processed, or something further
1687 : * ahead. If it's further ahead, we can jump to it, but we choose to
1688 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1689 : * pages. Since we're reading sequentially, the OS should be doing
1690 : * readahead for us, so there's no gain in skipping a page now and
1691 : * then. Skipping such a range might even discourage sequential
1692 : * detection.
1693 : *
1694 : * This test also enables more frequent relfrozenxid advancement
1695 : * during non-aggressive VACUUMs. If the range has any all-visible
1696 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1697 : * real downside.
1698 : */
1699 272723 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1700 : {
1701 3837 : next_block = vacrel->next_unskippable_block;
1702 3837 : if (skipsallvis)
1703 65 : vacrel->skippedallvis = true;
1704 : }
1705 : }
1706 :
1707 : /* Now we must be in one of the two remaining states: */
1708 407850 : if (next_block < vacrel->next_unskippable_block)
1709 : {
1710 : /*
1711 : * 2. We are processing a range of blocks that we could have skipped
1712 : * but chose not to. We know that they are all-visible in the VM,
1713 : * otherwise they would've been unskippable.
1714 : */
1715 135127 : vacrel->current_block = next_block;
1716 : /* Block was not eager scanned */
1717 135127 : *((bool *) per_buffer_data) = false;
1718 135127 : return vacrel->current_block;
1719 : }
1720 : else
1721 : {
1722 : /*
1723 : * 3. We reached the next unskippable block. Process it. On next
1724 : * iteration, we will be back in state 1.
1725 : */
1726 : Assert(next_block == vacrel->next_unskippable_block);
1727 :
1728 272723 : vacrel->current_block = next_block;
1729 272723 : *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1730 272723 : return vacrel->current_block;
1731 : }
1732 : }
1733 :
1734 : /*
1735 : * Find the next unskippable block in a vacuum scan using the visibility map.
1736 : * The next unskippable block and its visibility information is updated in
1737 : * vacrel.
1738 : *
1739 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1740 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1741 : * was concurrently cleared, though. All that matters is that caller scan all
1742 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1743 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1744 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1745 : * to skip such a range is actually made, making everything safe.)
1746 : */
1747 : static void
1748 272723 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1749 : {
1750 272723 : BlockNumber rel_pages = vacrel->rel_pages;
1751 272723 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1752 272723 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1753 272723 : bool next_unskippable_eager_scanned = false;
1754 :
1755 272723 : *skipsallvis = false;
1756 :
1757 409762 : for (;; next_unskippable_block++)
1758 409762 : {
1759 682485 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1760 : next_unskippable_block,
1761 : &next_unskippable_vmbuffer);
1762 :
1763 :
1764 : /*
1765 : * At the start of each eager scan region, normal vacuums with eager
1766 : * scanning enabled reset the failure counter, allowing vacuum to
1767 : * resume eager scanning if it had been suspended in the previous
1768 : * region.
1769 : */
1770 682485 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1771 : {
1772 0 : vacrel->eager_scan_remaining_fails =
1773 0 : vacrel->eager_scan_max_fails_per_region;
1774 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1775 : }
1776 :
1777 : /*
1778 : * A block is unskippable if it is not all visible according to the
1779 : * visibility map.
1780 : */
1781 682485 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1782 : {
1783 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1784 229905 : break;
1785 : }
1786 :
1787 : /*
1788 : * Caller must scan the last page to determine whether it has tuples
1789 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1790 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1791 : * lock on rel to attempt a truncation that fails anyway, just because
1792 : * there are tuples on the last page (it is likely that there will be
1793 : * tuples on other nearby pages as well, but those can be skipped).
1794 : *
1795 : * Implement this by always treating the last block as unsafe to skip.
1796 : */
1797 452580 : if (next_unskippable_block == rel_pages - 1)
1798 38981 : break;
1799 :
1800 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1801 413599 : if (!vacrel->skipwithvm)
1802 418 : break;
1803 :
1804 : /*
1805 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1806 : * already frozen by now), so this page can be skipped.
1807 : */
1808 413181 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1809 403379 : continue;
1810 :
1811 : /*
1812 : * Aggressive vacuums cannot skip any all-visible pages that are not
1813 : * also all-frozen.
1814 : */
1815 9802 : if (vacrel->aggressive)
1816 3419 : break;
1817 :
1818 : /*
1819 : * Normal vacuums with eager scanning enabled only skip all-visible
1820 : * but not all-frozen pages if they have hit the failure limit for the
1821 : * current eager scan region.
1822 : */
1823 6383 : if (vacrel->eager_scan_remaining_fails > 0)
1824 : {
1825 0 : next_unskippable_eager_scanned = true;
1826 0 : break;
1827 : }
1828 :
1829 : /*
1830 : * All-visible blocks are safe to skip in a normal vacuum. But
1831 : * remember that the final range contains such a block for later.
1832 : */
1833 6383 : *skipsallvis = true;
1834 : }
1835 :
1836 : /* write the local variables back to vacrel */
1837 272723 : vacrel->next_unskippable_block = next_unskippable_block;
1838 272723 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1839 272723 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1840 272723 : }
1841 :
1842 : /*
1843 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1844 : *
1845 : * Must call here to handle both new and empty pages before calling
1846 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1847 : * with new or empty pages.
1848 : *
1849 : * It's necessary to consider new pages as a special case, since the rules for
1850 : * maintaining the visibility map and FSM with empty pages are a little
1851 : * different (though new pages can be truncated away during rel truncation).
1852 : *
1853 : * Empty pages are not really a special case -- they're just heap pages that
1854 : * have no allocated tuples (including even LP_UNUSED items). You might
1855 : * wonder why we need to handle them here all the same. It's only necessary
1856 : * because of a corner-case involving a hard crash during heap relation
1857 : * extension. If we ever make relation-extension crash safe, then it should
1858 : * no longer be necessary to deal with empty pages here (or new pages, for
1859 : * that matter).
1860 : *
1861 : * Caller must hold at least a shared lock. We might need to escalate the
1862 : * lock in that case, so the type of lock caller holds needs to be specified
1863 : * using 'sharelock' argument.
1864 : *
1865 : * Returns false in common case where caller should go on to call
1866 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1867 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1868 : * behalf.
1869 : *
1870 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1871 : * is passed here because neither empty nor new pages can be eagerly frozen.
1872 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1873 : * the same time that they are set all-visible, and we don't eagerly scan
1874 : * frozen pages.
1875 : */
1876 : static bool
1877 407850 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1878 : Page page, bool sharelock, Buffer vmbuffer)
1879 : {
1880 : Size freespace;
1881 :
1882 407850 : if (PageIsNew(page))
1883 : {
1884 : /*
1885 : * All-zeroes pages can be left over if either a backend extends the
1886 : * relation by a single page, but crashes before the newly initialized
1887 : * page has been written out, or when bulk-extending the relation
1888 : * (which creates a number of empty pages at the tail end of the
1889 : * relation), and then enters them into the FSM.
1890 : *
1891 : * Note we do not enter the page into the visibilitymap. That has the
1892 : * downside that we repeatedly visit this page in subsequent vacuums,
1893 : * but otherwise we'll never discover the space on a promoted standby.
1894 : * The harm of repeated checking ought to normally not be too bad. The
1895 : * space usually should be used at some point, otherwise there
1896 : * wouldn't be any regular vacuums.
1897 : *
1898 : * Make sure these pages are in the FSM, to ensure they can be reused.
1899 : * Do that by testing if there's any space recorded for the page. If
1900 : * not, enter it. We do so after releasing the lock on the heap page,
1901 : * the FSM is approximate, after all.
1902 : */
1903 1222 : UnlockReleaseBuffer(buf);
1904 :
1905 1222 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1906 : {
1907 666 : freespace = BLCKSZ - SizeOfPageHeaderData;
1908 :
1909 666 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1910 : }
1911 :
1912 1222 : return true;
1913 : }
1914 :
1915 406628 : if (PageIsEmpty(page))
1916 : {
1917 : /*
1918 : * It seems likely that caller will always be able to get a cleanup
1919 : * lock on an empty page. But don't take any chances -- escalate to
1920 : * an exclusive lock (still don't need a cleanup lock, though).
1921 : */
1922 30 : if (sharelock)
1923 : {
1924 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1925 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1926 :
1927 0 : if (!PageIsEmpty(page))
1928 : {
1929 : /* page isn't new or empty -- keep lock and pin for now */
1930 0 : return false;
1931 : }
1932 : }
1933 : else
1934 : {
1935 : /* Already have a full cleanup lock (which is more than enough) */
1936 : }
1937 :
1938 : /*
1939 : * Unlike new pages, empty pages are always set all-visible and
1940 : * all-frozen.
1941 : */
1942 30 : if (!PageIsAllVisible(page))
1943 : {
1944 : /* Lock vmbuffer before entering critical section */
1945 0 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
1946 :
1947 0 : START_CRIT_SECTION();
1948 :
1949 : /* mark buffer dirty before writing a WAL record */
1950 0 : MarkBufferDirty(buf);
1951 :
1952 0 : PageSetAllVisible(page);
1953 0 : PageClearPrunable(page);
1954 0 : visibilitymap_set(blkno,
1955 : vmbuffer,
1956 : VISIBILITYMAP_ALL_VISIBLE |
1957 : VISIBILITYMAP_ALL_FROZEN,
1958 0 : vacrel->rel->rd_locator);
1959 :
1960 : /*
1961 : * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1962 : * setting the VM.
1963 : */
1964 0 : if (RelationNeedsWAL(vacrel->rel))
1965 0 : log_heap_prune_and_freeze(vacrel->rel, buf,
1966 : vmbuffer,
1967 : VISIBILITYMAP_ALL_VISIBLE |
1968 : VISIBILITYMAP_ALL_FROZEN,
1969 : InvalidTransactionId, /* conflict xid */
1970 : false, /* cleanup lock */
1971 : PRUNE_VACUUM_SCAN, /* reason */
1972 : NULL, 0,
1973 : NULL, 0,
1974 : NULL, 0,
1975 : NULL, 0);
1976 :
1977 0 : END_CRIT_SECTION();
1978 :
1979 0 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1980 :
1981 : /* Count the newly all-frozen pages for logging */
1982 0 : vacrel->new_all_visible_pages++;
1983 0 : vacrel->new_all_visible_all_frozen_pages++;
1984 : }
1985 :
1986 30 : freespace = PageGetHeapFreeSpace(page);
1987 30 : UnlockReleaseBuffer(buf);
1988 30 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1989 30 : return true;
1990 : }
1991 :
1992 : /* page isn't new or empty -- keep lock and pin */
1993 406598 : return false;
1994 : }
1995 :
1996 : /* qsort comparator for sorting OffsetNumbers */
1997 : static int
1998 4903084 : cmpOffsetNumbers(const void *a, const void *b)
1999 : {
2000 4903084 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
2001 : }
2002 :
2003 : /*
2004 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2005 : *
2006 : * Caller must hold pin and buffer cleanup lock on the buffer.
2007 : *
2008 : * vmbuffer is the buffer containing the VM block with visibility information
2009 : * for the heap block, blkno.
2010 : *
2011 : * *has_lpdead_items is set to true or false depending on whether, upon return
2012 : * from this function, any LP_DEAD items are still present on the page.
2013 : *
2014 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2015 : * VM. The caller currently only uses this for determining whether an eagerly
2016 : * scanned page was successfully set all-frozen.
2017 : *
2018 : * Returns the number of tuples deleted from the page during HOT pruning.
2019 : */
2020 : static int
2021 406543 : lazy_scan_prune(LVRelState *vacrel,
2022 : Buffer buf,
2023 : BlockNumber blkno,
2024 : Page page,
2025 : Buffer vmbuffer,
2026 : bool *has_lpdead_items,
2027 : bool *vm_page_frozen)
2028 : {
2029 406543 : Relation rel = vacrel->rel;
2030 : PruneFreezeResult presult;
2031 406543 : PruneFreezeParams params = {
2032 : .relation = rel,
2033 : .buffer = buf,
2034 : .vmbuffer = vmbuffer,
2035 : .reason = PRUNE_VACUUM_SCAN,
2036 : .options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_SET_VM,
2037 406543 : .vistest = vacrel->vistest,
2038 406543 : .cutoffs = &vacrel->cutoffs,
2039 : };
2040 :
2041 : Assert(BufferGetBlockNumber(buf) == blkno);
2042 :
2043 : /*
2044 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2045 : *
2046 : * If the relation has no indexes, we can immediately mark would-be dead
2047 : * items LP_UNUSED.
2048 : *
2049 : * The number of tuples removed from the page is returned in
2050 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2051 : * presult.lpdead_items's final value can be thought of as the number of
2052 : * tuples that were deleted from indexes.
2053 : *
2054 : * We will update the VM after collecting LP_DEAD items and freezing
2055 : * tuples. Pruning will have determined whether or not the page is
2056 : * all-visible.
2057 : */
2058 406543 : if (vacrel->nindexes == 0)
2059 18731 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2060 :
2061 : /*
2062 : * Allow skipping full inspection of pages that the VM indicates are
2063 : * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2064 : * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2065 : * so we must examine the page to make sure it is truly all-frozen and fix
2066 : * it otherwise.
2067 : */
2068 406543 : if (vacrel->skipwithvm)
2069 405246 : params.options |= HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
2070 :
2071 406543 : heap_page_prune_and_freeze(¶ms,
2072 : &presult,
2073 : &vacrel->offnum,
2074 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2075 :
2076 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2077 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2078 :
2079 406543 : if (presult.nfrozen > 0)
2080 : {
2081 : /*
2082 : * We don't increment the new_frozen_tuple_pages instrumentation
2083 : * counter when nfrozen == 0, since it only counts pages with newly
2084 : * frozen tuples (don't confuse that with pages newly set all-frozen
2085 : * in VM).
2086 : */
2087 27016 : vacrel->new_frozen_tuple_pages++;
2088 : }
2089 :
2090 : /*
2091 : * Now save details of the LP_DEAD items from the page in vacrel
2092 : */
2093 406543 : if (presult.lpdead_items > 0)
2094 : {
2095 19652 : vacrel->lpdead_item_pages++;
2096 :
2097 : /*
2098 : * deadoffsets are collected incrementally in
2099 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2100 : * with an indeterminate order, but dead_items_add requires them to be
2101 : * sorted.
2102 : */
2103 19652 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2104 : cmpOffsetNumbers);
2105 :
2106 19652 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2107 : }
2108 :
2109 : /* Finally, add page-local counts to whole-VACUUM counts */
2110 406543 : if (presult.newly_all_visible)
2111 42065 : vacrel->new_all_visible_pages++;
2112 406543 : if (presult.newly_all_visible_frozen)
2113 30280 : vacrel->new_all_visible_all_frozen_pages++;
2114 406543 : if (presult.newly_all_frozen)
2115 3444 : vacrel->new_all_frozen_pages++;
2116 :
2117 : /* Capture if the page was newly set frozen */
2118 782806 : *vm_page_frozen = presult.newly_all_visible_frozen ||
2119 376263 : presult.newly_all_frozen;
2120 :
2121 406543 : vacrel->tuples_deleted += presult.ndeleted;
2122 406543 : vacrel->tuples_frozen += presult.nfrozen;
2123 406543 : vacrel->lpdead_items += presult.lpdead_items;
2124 406543 : vacrel->live_tuples += presult.live_tuples;
2125 406543 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2126 :
2127 : /* Can't truncate this page */
2128 406543 : if (presult.hastup)
2129 395672 : vacrel->nonempty_pages = blkno + 1;
2130 :
2131 : /* Did we find LP_DEAD items? */
2132 406543 : *has_lpdead_items = (presult.lpdead_items > 0);
2133 :
2134 406543 : return presult.ndeleted;
2135 : }
2136 :
2137 : /*
2138 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2139 : *
2140 : * Caller need only hold a pin and share lock on the buffer, unlike
2141 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2142 : * performed here, it's quite possible that an earlier opportunistic pruning
2143 : * operation left LP_DEAD items behind. We'll at least collect any such items
2144 : * in dead_items for removal from indexes.
2145 : *
2146 : * For aggressive VACUUM callers, we may return false to indicate that a full
2147 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2148 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2149 : * one or more tuples on the page. We always return true for non-aggressive
2150 : * callers.
2151 : *
2152 : * If this function returns true, *has_lpdead_items gets set to true or false
2153 : * depending on whether, upon return from this function, any LP_DEAD items are
2154 : * present on the page. If this function returns false, *has_lpdead_items
2155 : * is not updated.
2156 : */
2157 : static bool
2158 109 : lazy_scan_noprune(LVRelState *vacrel,
2159 : Buffer buf,
2160 : BlockNumber blkno,
2161 : Page page,
2162 : bool *has_lpdead_items)
2163 : {
2164 : OffsetNumber offnum,
2165 : maxoff;
2166 : int lpdead_items,
2167 : live_tuples,
2168 : recently_dead_tuples,
2169 : missed_dead_tuples;
2170 : bool hastup;
2171 : HeapTupleHeader tupleheader;
2172 109 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2173 109 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2174 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2175 :
2176 : Assert(BufferGetBlockNumber(buf) == blkno);
2177 :
2178 109 : hastup = false; /* for now */
2179 :
2180 109 : lpdead_items = 0;
2181 109 : live_tuples = 0;
2182 109 : recently_dead_tuples = 0;
2183 109 : missed_dead_tuples = 0;
2184 :
2185 109 : maxoff = PageGetMaxOffsetNumber(page);
2186 109 : for (offnum = FirstOffsetNumber;
2187 2818 : offnum <= maxoff;
2188 2709 : offnum = OffsetNumberNext(offnum))
2189 : {
2190 : ItemId itemid;
2191 : HeapTupleData tuple;
2192 :
2193 2763 : vacrel->offnum = offnum;
2194 2763 : itemid = PageGetItemId(page, offnum);
2195 :
2196 2763 : if (!ItemIdIsUsed(itemid))
2197 405 : continue;
2198 :
2199 2553 : if (ItemIdIsRedirected(itemid))
2200 : {
2201 107 : hastup = true;
2202 107 : continue;
2203 : }
2204 :
2205 2446 : if (ItemIdIsDead(itemid))
2206 : {
2207 : /*
2208 : * Deliberately don't set hastup=true here. See same point in
2209 : * lazy_scan_prune for an explanation.
2210 : */
2211 88 : deadoffsets[lpdead_items++] = offnum;
2212 88 : continue;
2213 : }
2214 :
2215 2358 : hastup = true; /* page prevents rel truncation */
2216 2358 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2217 2358 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2218 : &NoFreezePageRelfrozenXid,
2219 : &NoFreezePageRelminMxid))
2220 : {
2221 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2222 118 : if (vacrel->aggressive)
2223 : {
2224 : /*
2225 : * Aggressive VACUUMs must always be able to advance rel's
2226 : * relfrozenxid to a value >= FreezeLimit (and be able to
2227 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2228 : * The ongoing aggressive VACUUM won't be able to do that
2229 : * unless it can freeze an XID (or MXID) from this tuple now.
2230 : *
2231 : * The only safe option is to have caller perform processing
2232 : * of this page using lazy_scan_prune. Caller might have to
2233 : * wait a while for a cleanup lock, but it can't be helped.
2234 : */
2235 54 : vacrel->offnum = InvalidOffsetNumber;
2236 54 : return false;
2237 : }
2238 :
2239 : /*
2240 : * Non-aggressive VACUUMs are under no obligation to advance
2241 : * relfrozenxid (even by one XID). We can be much laxer here.
2242 : *
2243 : * Currently we always just accept an older final relfrozenxid
2244 : * and/or relminmxid value. We never make caller wait or work a
2245 : * little harder, even when it likely makes sense to do so.
2246 : */
2247 : }
2248 :
2249 2304 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2250 2304 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2251 2304 : tuple.t_len = ItemIdGetLength(itemid);
2252 2304 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2253 :
2254 2304 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2255 : buf))
2256 : {
2257 2261 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2258 : case HEAPTUPLE_LIVE:
2259 :
2260 : /*
2261 : * Count both cases as live, just like lazy_scan_prune
2262 : */
2263 2261 : live_tuples++;
2264 :
2265 2261 : break;
2266 35 : case HEAPTUPLE_DEAD:
2267 :
2268 : /*
2269 : * There is some useful work for pruning to do, that won't be
2270 : * done due to failure to get a cleanup lock.
2271 : */
2272 35 : missed_dead_tuples++;
2273 35 : break;
2274 2 : case HEAPTUPLE_RECENTLY_DEAD:
2275 :
2276 : /*
2277 : * Count in recently_dead_tuples, just like lazy_scan_prune
2278 : */
2279 2 : recently_dead_tuples++;
2280 2 : break;
2281 6 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2282 :
2283 : /*
2284 : * Do not count these rows as live, just like lazy_scan_prune
2285 : */
2286 6 : break;
2287 0 : default:
2288 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2289 : break;
2290 : }
2291 : }
2292 :
2293 55 : vacrel->offnum = InvalidOffsetNumber;
2294 :
2295 : /*
2296 : * By here we know for sure that caller can put off freezing and pruning
2297 : * this particular page until the next VACUUM. Remember its details now.
2298 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2299 : */
2300 55 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2301 55 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2302 :
2303 : /* Save any LP_DEAD items found on the page in dead_items */
2304 55 : if (vacrel->nindexes == 0)
2305 : {
2306 : /* Using one-pass strategy (since table has no indexes) */
2307 0 : if (lpdead_items > 0)
2308 : {
2309 : /*
2310 : * Perfunctory handling for the corner case where a single pass
2311 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2312 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2313 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2314 : * but it beats having to maintain specialized heap vacuuming code
2315 : * forever, for vanishingly little benefit.)
2316 : */
2317 0 : hastup = true;
2318 0 : missed_dead_tuples += lpdead_items;
2319 : }
2320 : }
2321 55 : else if (lpdead_items > 0)
2322 : {
2323 : /*
2324 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2325 : * indexes will be deleted during index vacuuming (and then marked
2326 : * LP_UNUSED in the heap)
2327 : */
2328 2 : vacrel->lpdead_item_pages++;
2329 :
2330 2 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2331 :
2332 2 : vacrel->lpdead_items += lpdead_items;
2333 : }
2334 :
2335 : /*
2336 : * Finally, add relevant page-local counts to whole-VACUUM counts
2337 : */
2338 55 : vacrel->live_tuples += live_tuples;
2339 55 : vacrel->recently_dead_tuples += recently_dead_tuples;
2340 55 : vacrel->missed_dead_tuples += missed_dead_tuples;
2341 55 : if (missed_dead_tuples > 0)
2342 4 : vacrel->missed_dead_pages++;
2343 :
2344 : /* Can't truncate this page */
2345 55 : if (hastup)
2346 55 : vacrel->nonempty_pages = blkno + 1;
2347 :
2348 : /* Did we find LP_DEAD items? */
2349 55 : *has_lpdead_items = (lpdead_items > 0);
2350 :
2351 : /* Caller won't need to call lazy_scan_prune with same page */
2352 55 : return true;
2353 : }
2354 :
2355 : /*
2356 : * Main entry point for index vacuuming and heap vacuuming.
2357 : *
2358 : * Removes items collected in dead_items from table's indexes, then marks the
2359 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2360 : * for full details.
2361 : *
2362 : * Also empties dead_items, freeing up space for later TIDs.
2363 : *
2364 : * We may choose to bypass index vacuuming at this point, though only when the
2365 : * ongoing VACUUM operation will definitely only have one index scan/round of
2366 : * index vacuuming.
2367 : */
2368 : static void
2369 828 : lazy_vacuum(LVRelState *vacrel)
2370 : {
2371 : bool bypass;
2372 :
2373 : /* Should not end up here with no indexes */
2374 : Assert(vacrel->nindexes > 0);
2375 : Assert(vacrel->lpdead_item_pages > 0);
2376 :
2377 828 : if (!vacrel->do_index_vacuuming)
2378 : {
2379 : Assert(!vacrel->do_index_cleanup);
2380 9 : dead_items_reset(vacrel);
2381 9 : return;
2382 : }
2383 :
2384 : /*
2385 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2386 : *
2387 : * We currently only do this in cases where the number of LP_DEAD items
2388 : * for the entire VACUUM operation is close to zero. This avoids sharp
2389 : * discontinuities in the duration and overhead of successive VACUUM
2390 : * operations that run against the same table with a fixed workload.
2391 : * Ideally, successive VACUUM operations will behave as if there are
2392 : * exactly zero LP_DEAD items in cases where there are close to zero.
2393 : *
2394 : * This is likely to be helpful with a table that is continually affected
2395 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2396 : * have small aberrations that lead to just a few heap pages retaining
2397 : * only one or two LP_DEAD items. This is pretty common; even when the
2398 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2399 : * impossible to predict whether HOT will be applied in 100% of cases.
2400 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2401 : * HOT through careful tuning.
2402 : */
2403 819 : bypass = false;
2404 819 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2405 : {
2406 : BlockNumber threshold;
2407 :
2408 : Assert(vacrel->num_index_scans == 0);
2409 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2410 : Assert(vacrel->do_index_vacuuming);
2411 : Assert(vacrel->do_index_cleanup);
2412 :
2413 : /*
2414 : * This crossover point at which we'll start to do index vacuuming is
2415 : * expressed as a percentage of the total number of heap pages in the
2416 : * table that are known to have at least one LP_DEAD item. This is
2417 : * much more important than the total number of LP_DEAD items, since
2418 : * it's a proxy for the number of heap pages whose visibility map bits
2419 : * cannot be set on account of bypassing index and heap vacuuming.
2420 : *
2421 : * We apply one further precautionary test: the space currently used
2422 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2423 : * not exceed 32MB. This limits the risk that we will bypass index
2424 : * vacuuming again and again until eventually there is a VACUUM whose
2425 : * dead_items space is not CPU cache resident.
2426 : *
2427 : * We don't take any special steps to remember the LP_DEAD items (such
2428 : * as counting them in our final update to the stats system) when the
2429 : * optimization is applied. Though the accounting used in analyze.c's
2430 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2431 : * rows in its own stats report, that's okay. The discrepancy should
2432 : * be negligible. If this optimization is ever expanded to cover more
2433 : * cases then this may need to be reconsidered.
2434 : */
2435 789 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2436 792 : bypass = (vacrel->lpdead_item_pages < threshold &&
2437 3 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2438 : }
2439 :
2440 819 : if (bypass)
2441 : {
2442 : /*
2443 : * There are almost zero TIDs. Behave as if there were precisely
2444 : * zero: bypass index vacuuming, but do index cleanup.
2445 : *
2446 : * We expect that the ongoing VACUUM operation will finish very
2447 : * quickly, so there is no point in considering speeding up as a
2448 : * failsafe against wraparound failure. (Index cleanup is expected to
2449 : * finish very quickly in cases where there were no ambulkdelete()
2450 : * calls.)
2451 : */
2452 3 : vacrel->do_index_vacuuming = false;
2453 : }
2454 816 : else if (lazy_vacuum_all_indexes(vacrel))
2455 : {
2456 : /*
2457 : * We successfully completed a round of index vacuuming. Do related
2458 : * heap vacuuming now.
2459 : */
2460 816 : lazy_vacuum_heap_rel(vacrel);
2461 : }
2462 : else
2463 : {
2464 : /*
2465 : * Failsafe case.
2466 : *
2467 : * We attempted index vacuuming, but didn't finish a full round/full
2468 : * index scan. This happens when relfrozenxid or relminmxid is too
2469 : * far in the past.
2470 : *
2471 : * From this point on the VACUUM operation will do no further index
2472 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2473 : * back here again.
2474 : */
2475 : Assert(VacuumFailsafeActive);
2476 : }
2477 :
2478 : /*
2479 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2480 : * vacuum)
2481 : */
2482 819 : dead_items_reset(vacrel);
2483 : }
2484 :
2485 : /*
2486 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2487 : *
2488 : * Returns true in the common case when all indexes were successfully
2489 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2490 : * VACUUM operation is at risk of taking too long to finish, leading to
2491 : * wraparound failure.
2492 : */
2493 : static bool
2494 816 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2495 : {
2496 816 : bool allindexes = true;
2497 816 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2498 816 : const int progress_start_index[] = {
2499 : PROGRESS_VACUUM_PHASE,
2500 : PROGRESS_VACUUM_INDEXES_TOTAL
2501 : };
2502 816 : const int progress_end_index[] = {
2503 : PROGRESS_VACUUM_INDEXES_TOTAL,
2504 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2505 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2506 : };
2507 : int64 progress_start_val[2];
2508 : int64 progress_end_val[3];
2509 :
2510 : Assert(vacrel->nindexes > 0);
2511 : Assert(vacrel->do_index_vacuuming);
2512 : Assert(vacrel->do_index_cleanup);
2513 :
2514 : /* Precheck for XID wraparound emergencies */
2515 816 : if (lazy_check_wraparound_failsafe(vacrel))
2516 : {
2517 : /* Wraparound emergency -- don't even start an index scan */
2518 0 : return false;
2519 : }
2520 :
2521 : /*
2522 : * Report that we are now vacuuming indexes and the number of indexes to
2523 : * vacuum.
2524 : */
2525 816 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2526 816 : progress_start_val[1] = vacrel->nindexes;
2527 816 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2528 :
2529 816 : if (!ParallelVacuumIsActive(vacrel))
2530 : {
2531 2316 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2532 : {
2533 1525 : Relation indrel = vacrel->indrels[idx];
2534 1525 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2535 :
2536 1525 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2537 : old_live_tuples,
2538 : vacrel);
2539 :
2540 : /* Report the number of indexes vacuumed */
2541 1525 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2542 1525 : idx + 1);
2543 :
2544 1525 : if (lazy_check_wraparound_failsafe(vacrel))
2545 : {
2546 : /* Wraparound emergency -- end current index scan */
2547 0 : allindexes = false;
2548 0 : break;
2549 : }
2550 : }
2551 : }
2552 : else
2553 : {
2554 : /* Outsource everything to parallel variant */
2555 25 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2556 : vacrel->num_index_scans,
2557 : &(vacrel->worker_usage.vacuum));
2558 :
2559 : /*
2560 : * Do a postcheck to consider applying wraparound failsafe now. Note
2561 : * that parallel VACUUM only gets the precheck and this postcheck.
2562 : */
2563 25 : if (lazy_check_wraparound_failsafe(vacrel))
2564 0 : allindexes = false;
2565 : }
2566 :
2567 : /*
2568 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2569 : * each call here (except calls where we choose to do the failsafe). This
2570 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2571 : * of the failsafe triggering, which prevents the next call from taking
2572 : * place).
2573 : */
2574 : Assert(vacrel->num_index_scans > 0 ||
2575 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2576 : Assert(allindexes || VacuumFailsafeActive);
2577 :
2578 : /*
2579 : * Increase and report the number of index scans. Also, we reset
2580 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2581 : *
2582 : * We deliberately include the case where we started a round of bulk
2583 : * deletes that we weren't able to finish due to the failsafe triggering.
2584 : */
2585 816 : vacrel->num_index_scans++;
2586 816 : progress_end_val[0] = 0;
2587 816 : progress_end_val[1] = 0;
2588 816 : progress_end_val[2] = vacrel->num_index_scans;
2589 816 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2590 :
2591 816 : return allindexes;
2592 : }
2593 :
2594 : /*
2595 : * Read stream callback for vacuum's third phase (second pass over the heap).
2596 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2597 : * if there are no further blocks to vacuum.
2598 : *
2599 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2600 : */
2601 : static BlockNumber
2602 18179 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2603 : void *callback_private_data,
2604 : void *per_buffer_data)
2605 : {
2606 18179 : TidStoreIter *iter = callback_private_data;
2607 : TidStoreIterResult *iter_result;
2608 :
2609 18179 : iter_result = TidStoreIterateNext(iter);
2610 18179 : if (iter_result == NULL)
2611 816 : return InvalidBlockNumber;
2612 :
2613 : /*
2614 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2615 : * It is safe to copy the result, according to TidStoreIterateNext().
2616 : */
2617 17363 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2618 :
2619 17363 : return iter_result->blkno;
2620 : }
2621 :
2622 : /*
2623 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2624 : *
2625 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2626 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2627 : *
2628 : * We may also be able to truncate the line pointer array of the heap pages we
2629 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2630 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2631 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2632 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2633 : * page's line pointer array).
2634 : *
2635 : * Note: the reason for doing this as a second pass is we cannot remove the
2636 : * tuples until we've removed their index entries, and we want to process
2637 : * index entry removal in batches as large as possible.
2638 : */
2639 : static void
2640 816 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2641 : {
2642 : ReadStream *stream;
2643 816 : BlockNumber vacuumed_pages = 0;
2644 816 : Buffer vmbuffer = InvalidBuffer;
2645 : LVSavedErrInfo saved_err_info;
2646 : TidStoreIter *iter;
2647 :
2648 : Assert(vacrel->do_index_vacuuming);
2649 : Assert(vacrel->do_index_cleanup);
2650 : Assert(vacrel->num_index_scans > 0);
2651 :
2652 : /* Report that we are now vacuuming the heap */
2653 816 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2654 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2655 :
2656 : /* Update error traceback information */
2657 816 : update_vacuum_error_info(vacrel, &saved_err_info,
2658 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2659 : InvalidBlockNumber, InvalidOffsetNumber);
2660 :
2661 816 : iter = TidStoreBeginIterate(vacrel->dead_items);
2662 :
2663 : /*
2664 : * Set up the read stream for vacuum's second pass through the heap.
2665 : *
2666 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2667 : * not need to wait for IO and does not perform locking. Once we support
2668 : * parallelism it should still be fine, as presumably the holder of locks
2669 : * would never be blocked by IO while holding the lock.
2670 : */
2671 816 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2672 : READ_STREAM_USE_BATCHING,
2673 : vacrel->bstrategy,
2674 : vacrel->rel,
2675 : MAIN_FORKNUM,
2676 : vacuum_reap_lp_read_stream_next,
2677 : iter,
2678 : sizeof(TidStoreIterResult));
2679 :
2680 : while (true)
2681 17363 : {
2682 : BlockNumber blkno;
2683 : Buffer buf;
2684 : Page page;
2685 : TidStoreIterResult *iter_result;
2686 : Size freespace;
2687 : OffsetNumber offsets[MaxOffsetNumber];
2688 : int num_offsets;
2689 :
2690 18179 : vacuum_delay_point(false);
2691 :
2692 18179 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2693 :
2694 : /* The relation is exhausted */
2695 18179 : if (!BufferIsValid(buf))
2696 816 : break;
2697 :
2698 17363 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2699 :
2700 : Assert(iter_result);
2701 17363 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2702 : Assert(num_offsets <= lengthof(offsets));
2703 :
2704 : /*
2705 : * Pin the visibility map page in case we need to mark the page
2706 : * all-visible. In most cases this will be very cheap, because we'll
2707 : * already have the correct page pinned anyway.
2708 : */
2709 17363 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2710 :
2711 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2712 17363 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2713 17363 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2714 : num_offsets, vmbuffer);
2715 :
2716 : /* Now that we've vacuumed the page, record its available space */
2717 17363 : page = BufferGetPage(buf);
2718 17363 : freespace = PageGetHeapFreeSpace(page);
2719 :
2720 17363 : UnlockReleaseBuffer(buf);
2721 17363 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2722 17363 : vacuumed_pages++;
2723 : }
2724 :
2725 816 : read_stream_end(stream);
2726 816 : TidStoreEndIterate(iter);
2727 :
2728 816 : vacrel->blkno = InvalidBlockNumber;
2729 816 : if (BufferIsValid(vmbuffer))
2730 816 : ReleaseBuffer(vmbuffer);
2731 :
2732 : /*
2733 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2734 : * the second heap pass. No more, no less.
2735 : */
2736 : Assert(vacrel->num_index_scans > 1 ||
2737 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2738 : vacuumed_pages == vacrel->lpdead_item_pages));
2739 :
2740 816 : ereport(DEBUG2,
2741 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2742 : vacrel->relname, vacrel->dead_items_info->num_items,
2743 : vacuumed_pages)));
2744 :
2745 : /* Revert to the previous phase information for error traceback */
2746 816 : restore_vacuum_error_info(vacrel, &saved_err_info);
2747 816 : }
2748 :
2749 : /*
2750 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2751 : * vacrel->dead_items store.
2752 : *
2753 : * Caller must have an exclusive buffer lock on the buffer (though a full
2754 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2755 : * a pin on blkno's visibility map page.
2756 : */
2757 : static void
2758 17363 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2759 : OffsetNumber *deadoffsets, int num_offsets,
2760 : Buffer vmbuffer)
2761 : {
2762 17363 : Page page = BufferGetPage(buffer);
2763 : OffsetNumber unused[MaxHeapTuplesPerPage];
2764 17363 : int nunused = 0;
2765 : TransactionId newest_live_xid;
2766 17363 : TransactionId conflict_xid = InvalidTransactionId;
2767 : bool all_frozen;
2768 : LVSavedErrInfo saved_err_info;
2769 17363 : uint8 vmflags = 0;
2770 :
2771 : Assert(vacrel->do_index_vacuuming);
2772 :
2773 17363 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2774 :
2775 : /* Update error traceback information */
2776 17363 : update_vacuum_error_info(vacrel, &saved_err_info,
2777 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2778 : InvalidOffsetNumber);
2779 :
2780 : /*
2781 : * Before marking dead items unused, check whether the page will become
2782 : * all-visible once that change is applied. This lets us reap the tuples
2783 : * and mark the page all-visible within the same critical section,
2784 : * enabling both changes to be emitted in a single WAL record. Since the
2785 : * visibility checks may perform I/O and allocate memory, they must be
2786 : * done outside the critical section.
2787 : */
2788 17363 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2789 : vacrel->vistest, true,
2790 : deadoffsets, num_offsets,
2791 : &all_frozen, &newest_live_xid,
2792 : &vacrel->offnum))
2793 : {
2794 17275 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2795 17275 : if (all_frozen)
2796 : {
2797 13128 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2798 : Assert(!TransactionIdIsValid(newest_live_xid));
2799 : }
2800 :
2801 : /*
2802 : * Take the lock on the vmbuffer before entering a critical section.
2803 : * The heap page lock must also be held while updating the VM to
2804 : * ensure consistency.
2805 : */
2806 17275 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2807 : }
2808 :
2809 17363 : START_CRIT_SECTION();
2810 :
2811 1194502 : for (int i = 0; i < num_offsets; i++)
2812 : {
2813 : ItemId itemid;
2814 1177139 : OffsetNumber toff = deadoffsets[i];
2815 :
2816 1177139 : itemid = PageGetItemId(page, toff);
2817 :
2818 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2819 1177139 : ItemIdSetUnused(itemid);
2820 1177139 : unused[nunused++] = toff;
2821 : }
2822 :
2823 : Assert(nunused > 0);
2824 :
2825 : /* Attempt to truncate line pointer array now */
2826 17363 : PageTruncateLinePointerArray(page);
2827 :
2828 17363 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2829 : {
2830 : /*
2831 : * The page is guaranteed to have had dead line pointers, so we always
2832 : * set PD_ALL_VISIBLE.
2833 : */
2834 17275 : PageSetAllVisible(page);
2835 17275 : PageClearPrunable(page);
2836 17275 : visibilitymap_set(blkno,
2837 : vmbuffer, vmflags,
2838 17275 : vacrel->rel->rd_locator);
2839 17275 : conflict_xid = newest_live_xid;
2840 : }
2841 :
2842 : /*
2843 : * Mark buffer dirty before we write WAL.
2844 : */
2845 17363 : MarkBufferDirty(buffer);
2846 :
2847 : /* XLOG stuff */
2848 17363 : if (RelationNeedsWAL(vacrel->rel))
2849 : {
2850 16267 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2851 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2852 : vmflags,
2853 : conflict_xid,
2854 : false, /* no cleanup lock required */
2855 : PRUNE_VACUUM_CLEANUP,
2856 : NULL, 0, /* frozen */
2857 : NULL, 0, /* redirected */
2858 : NULL, 0, /* dead */
2859 : unused, nunused);
2860 : }
2861 :
2862 17363 : END_CRIT_SECTION();
2863 :
2864 17363 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
2865 : {
2866 : /* Count the newly set VM page for logging */
2867 17275 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2868 17275 : vacrel->new_all_visible_pages++;
2869 17275 : if (all_frozen)
2870 13128 : vacrel->new_all_visible_all_frozen_pages++;
2871 : }
2872 :
2873 : /* Revert to the previous phase information for error traceback */
2874 17363 : restore_vacuum_error_info(vacrel, &saved_err_info);
2875 17363 : }
2876 :
2877 : /*
2878 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2879 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2880 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2881 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2882 : *
2883 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2884 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2885 : * that it started out with.
2886 : *
2887 : * Returns true when failsafe has been triggered.
2888 : */
2889 : static bool
2890 118201 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2891 : {
2892 : /* Don't warn more than once per VACUUM */
2893 118201 : if (VacuumFailsafeActive)
2894 0 : return true;
2895 :
2896 118201 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2897 : {
2898 24271 : const int progress_index[] = {
2899 : PROGRESS_VACUUM_INDEXES_TOTAL,
2900 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2901 : PROGRESS_VACUUM_MODE
2902 : };
2903 24271 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
2904 :
2905 24271 : VacuumFailsafeActive = true;
2906 :
2907 : /*
2908 : * Abandon use of a buffer access strategy to allow use of all of
2909 : * shared buffers. We assume the caller who allocated the memory for
2910 : * the BufferAccessStrategy will free it.
2911 : */
2912 24271 : vacrel->bstrategy = NULL;
2913 :
2914 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2915 24271 : vacrel->do_index_vacuuming = false;
2916 24271 : vacrel->do_index_cleanup = false;
2917 24271 : vacrel->do_rel_truncate = false;
2918 :
2919 : /* Reset the progress counters and set the failsafe mode */
2920 24271 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
2921 :
2922 24271 : ereport(WARNING,
2923 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2924 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2925 : vacrel->num_index_scans),
2926 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2927 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2928 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2929 :
2930 : /* Stop applying cost limits from this point on */
2931 24271 : VacuumCostActive = false;
2932 24271 : VacuumCostBalance = 0;
2933 :
2934 24271 : return true;
2935 : }
2936 :
2937 93930 : return false;
2938 : }
2939 :
2940 : /*
2941 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2942 : */
2943 : static void
2944 86966 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2945 : {
2946 86966 : double reltuples = vacrel->new_rel_tuples;
2947 86966 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2948 86966 : const int progress_start_index[] = {
2949 : PROGRESS_VACUUM_PHASE,
2950 : PROGRESS_VACUUM_INDEXES_TOTAL
2951 : };
2952 86966 : const int progress_end_index[] = {
2953 : PROGRESS_VACUUM_INDEXES_TOTAL,
2954 : PROGRESS_VACUUM_INDEXES_PROCESSED
2955 : };
2956 : int64 progress_start_val[2];
2957 86966 : int64 progress_end_val[2] = {0, 0};
2958 :
2959 : Assert(vacrel->do_index_cleanup);
2960 : Assert(vacrel->nindexes > 0);
2961 :
2962 : /*
2963 : * Report that we are now cleaning up indexes and the number of indexes to
2964 : * cleanup.
2965 : */
2966 86966 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2967 86966 : progress_start_val[1] = vacrel->nindexes;
2968 86966 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2969 :
2970 86966 : if (!ParallelVacuumIsActive(vacrel))
2971 : {
2972 225391 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2973 : {
2974 138451 : Relation indrel = vacrel->indrels[idx];
2975 138451 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2976 :
2977 276902 : vacrel->indstats[idx] =
2978 138451 : lazy_cleanup_one_index(indrel, istat, reltuples,
2979 : estimated_count, vacrel);
2980 :
2981 : /* Report the number of indexes cleaned up */
2982 138451 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2983 138451 : idx + 1);
2984 : }
2985 : }
2986 : else
2987 : {
2988 : /* Outsource everything to parallel variant */
2989 26 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2990 : vacrel->num_index_scans,
2991 : estimated_count,
2992 : &(vacrel->worker_usage.cleanup));
2993 : }
2994 :
2995 : /* Reset the progress counters */
2996 86966 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2997 86966 : }
2998 :
2999 : /*
3000 : * lazy_vacuum_one_index() -- vacuum index relation.
3001 : *
3002 : * Delete all the index tuples containing a TID collected in
3003 : * vacrel->dead_items. Also update running statistics. Exact
3004 : * details depend on index AM's ambulkdelete routine.
3005 : *
3006 : * reltuples is the number of heap tuples to be passed to the
3007 : * bulkdelete callback. It's always assumed to be estimated.
3008 : * See indexam.sgml for more info.
3009 : *
3010 : * Returns bulk delete stats derived from input stats
3011 : */
3012 : static IndexBulkDeleteResult *
3013 1525 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3014 : double reltuples, LVRelState *vacrel)
3015 : {
3016 : IndexVacuumInfo ivinfo;
3017 : LVSavedErrInfo saved_err_info;
3018 :
3019 1525 : ivinfo.index = indrel;
3020 1525 : ivinfo.heaprel = vacrel->rel;
3021 1525 : ivinfo.analyze_only = false;
3022 1525 : ivinfo.report_progress = false;
3023 1525 : ivinfo.estimated_count = true;
3024 1525 : ivinfo.message_level = DEBUG2;
3025 1525 : ivinfo.num_heap_tuples = reltuples;
3026 1525 : ivinfo.strategy = vacrel->bstrategy;
3027 :
3028 : /*
3029 : * Update error traceback information.
3030 : *
3031 : * The index name is saved during this phase and restored immediately
3032 : * after this phase. See vacuum_error_callback.
3033 : */
3034 : Assert(vacrel->indname == NULL);
3035 1525 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3036 1525 : update_vacuum_error_info(vacrel, &saved_err_info,
3037 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3038 : InvalidBlockNumber, InvalidOffsetNumber);
3039 :
3040 : /* Do bulk deletion */
3041 1525 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3042 : vacrel->dead_items_info);
3043 :
3044 : /* Revert to the previous phase information for error traceback */
3045 1525 : restore_vacuum_error_info(vacrel, &saved_err_info);
3046 1525 : pfree(vacrel->indname);
3047 1525 : vacrel->indname = NULL;
3048 :
3049 1525 : return istat;
3050 : }
3051 :
3052 : /*
3053 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3054 : *
3055 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3056 : * of heap tuples and estimated_count is true if reltuples is an
3057 : * estimated value. See indexam.sgml for more info.
3058 : *
3059 : * Returns bulk delete stats derived from input stats
3060 : */
3061 : static IndexBulkDeleteResult *
3062 138451 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3063 : double reltuples, bool estimated_count,
3064 : LVRelState *vacrel)
3065 : {
3066 : IndexVacuumInfo ivinfo;
3067 : LVSavedErrInfo saved_err_info;
3068 :
3069 138451 : ivinfo.index = indrel;
3070 138451 : ivinfo.heaprel = vacrel->rel;
3071 138451 : ivinfo.analyze_only = false;
3072 138451 : ivinfo.report_progress = false;
3073 138451 : ivinfo.estimated_count = estimated_count;
3074 138451 : ivinfo.message_level = DEBUG2;
3075 :
3076 138451 : ivinfo.num_heap_tuples = reltuples;
3077 138451 : ivinfo.strategy = vacrel->bstrategy;
3078 :
3079 : /*
3080 : * Update error traceback information.
3081 : *
3082 : * The index name is saved during this phase and restored immediately
3083 : * after this phase. See vacuum_error_callback.
3084 : */
3085 : Assert(vacrel->indname == NULL);
3086 138451 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3087 138451 : update_vacuum_error_info(vacrel, &saved_err_info,
3088 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3089 : InvalidBlockNumber, InvalidOffsetNumber);
3090 :
3091 138451 : istat = vac_cleanup_one_index(&ivinfo, istat);
3092 :
3093 : /* Revert to the previous phase information for error traceback */
3094 138451 : restore_vacuum_error_info(vacrel, &saved_err_info);
3095 138451 : pfree(vacrel->indname);
3096 138451 : vacrel->indname = NULL;
3097 :
3098 138451 : return istat;
3099 : }
3100 :
3101 : /*
3102 : * should_attempt_truncation - should we attempt to truncate the heap?
3103 : *
3104 : * Don't even think about it unless we have a shot at releasing a goodly
3105 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3106 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3107 : * be particularly disruptive.
3108 : *
3109 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3110 : * system might be refusing to allocate new XIDs at this point. The system
3111 : * definitely won't return to normal unless and until VACUUM actually advances
3112 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3113 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3114 : * truncate the table under these circumstances, an XID exhaustion error might
3115 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3116 : * There is very little chance of truncation working out when the failsafe is
3117 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3118 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3119 : * we're called.
3120 : */
3121 : static bool
3122 115835 : should_attempt_truncation(LVRelState *vacrel)
3123 : {
3124 : BlockNumber possibly_freeable;
3125 :
3126 115835 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3127 24433 : return false;
3128 :
3129 91402 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3130 91402 : if (possibly_freeable > 0 &&
3131 208 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3132 208 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3133 192 : return true;
3134 :
3135 91210 : return false;
3136 : }
3137 :
3138 : /*
3139 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3140 : */
3141 : static void
3142 192 : lazy_truncate_heap(LVRelState *vacrel)
3143 : {
3144 192 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3145 : BlockNumber new_rel_pages;
3146 : bool lock_waiter_detected;
3147 : int lock_retry;
3148 :
3149 : /* Report that we are now truncating */
3150 192 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3151 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3152 :
3153 : /* Update error traceback information one last time */
3154 192 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3155 : vacrel->nonempty_pages, InvalidOffsetNumber);
3156 :
3157 : /*
3158 : * Loop until no more truncating can be done.
3159 : */
3160 : do
3161 : {
3162 : /*
3163 : * We need full exclusive lock on the relation in order to do
3164 : * truncation. If we can't get it, give up rather than waiting --- we
3165 : * don't want to block other backends, and we don't want to deadlock
3166 : * (which is quite possible considering we already hold a lower-grade
3167 : * lock).
3168 : */
3169 192 : lock_waiter_detected = false;
3170 192 : lock_retry = 0;
3171 : while (true)
3172 : {
3173 395 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3174 190 : break;
3175 :
3176 : /*
3177 : * Check for interrupts while trying to (re-)acquire the exclusive
3178 : * lock.
3179 : */
3180 205 : CHECK_FOR_INTERRUPTS();
3181 :
3182 205 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3183 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3184 : {
3185 : /*
3186 : * We failed to establish the lock in the specified number of
3187 : * retries. This means we give up truncating.
3188 : */
3189 2 : ereport(vacrel->verbose ? INFO : DEBUG2,
3190 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3191 : vacrel->relname)));
3192 3 : return;
3193 : }
3194 :
3195 203 : (void) WaitLatch(MyLatch,
3196 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3197 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3198 : WAIT_EVENT_VACUUM_TRUNCATE);
3199 203 : ResetLatch(MyLatch);
3200 : }
3201 :
3202 : /*
3203 : * Now that we have exclusive lock, look to see if the rel has grown
3204 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3205 : * the newly added pages presumably contain non-deletable tuples.
3206 : */
3207 190 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3208 190 : if (new_rel_pages != orig_rel_pages)
3209 : {
3210 : /*
3211 : * Note: we intentionally don't update vacrel->rel_pages with the
3212 : * new rel size here. If we did, it would amount to assuming that
3213 : * the new pages are empty, which is unlikely. Leaving the numbers
3214 : * alone amounts to assuming that the new pages have the same
3215 : * tuple density as existing ones, which is less unlikely.
3216 : */
3217 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3218 0 : return;
3219 : }
3220 :
3221 : /*
3222 : * Scan backwards from the end to verify that the end pages actually
3223 : * contain no tuples. This is *necessary*, not optional, because
3224 : * other backends could have added tuples to these pages whilst we
3225 : * were vacuuming.
3226 : */
3227 190 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3228 190 : vacrel->blkno = new_rel_pages;
3229 :
3230 190 : if (new_rel_pages >= orig_rel_pages)
3231 : {
3232 : /* can't do anything after all */
3233 1 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3234 1 : return;
3235 : }
3236 :
3237 : /*
3238 : * Okay to truncate.
3239 : */
3240 189 : RelationTruncate(vacrel->rel, new_rel_pages);
3241 :
3242 : /*
3243 : * We can release the exclusive lock as soon as we have truncated.
3244 : * Other backends can't safely access the relation until they have
3245 : * processed the smgr invalidation that smgrtruncate sent out ... but
3246 : * that should happen as part of standard invalidation processing once
3247 : * they acquire lock on the relation.
3248 : */
3249 189 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3250 :
3251 : /*
3252 : * Update statistics. Here, it *is* correct to adjust rel_pages
3253 : * without also touching reltuples, since the tuple count wasn't
3254 : * changed by the truncation.
3255 : */
3256 189 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3257 189 : vacrel->rel_pages = new_rel_pages;
3258 :
3259 189 : ereport(vacrel->verbose ? INFO : DEBUG2,
3260 : (errmsg("table \"%s\": truncated %u to %u pages",
3261 : vacrel->relname,
3262 : orig_rel_pages, new_rel_pages)));
3263 189 : orig_rel_pages = new_rel_pages;
3264 189 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3265 : }
3266 :
3267 : /*
3268 : * Rescan end pages to verify that they are (still) empty of tuples.
3269 : *
3270 : * Returns number of nondeletable pages (last nonempty page + 1).
3271 : */
3272 : static BlockNumber
3273 190 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3274 : {
3275 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3276 : "prefetch size must be power of 2");
3277 :
3278 : BlockNumber blkno;
3279 : BlockNumber prefetchedUntil;
3280 : instr_time starttime;
3281 :
3282 : /* Initialize the starttime if we check for conflicting lock requests */
3283 190 : INSTR_TIME_SET_CURRENT(starttime);
3284 :
3285 : /*
3286 : * Start checking blocks at what we believe relation end to be and move
3287 : * backwards. (Strange coding of loop control is needed because blkno is
3288 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3289 : * in forward direction, so that OS-level readahead can kick in.
3290 : */
3291 190 : blkno = vacrel->rel_pages;
3292 190 : prefetchedUntil = InvalidBlockNumber;
3293 3373 : while (blkno > vacrel->nonempty_pages)
3294 : {
3295 : Buffer buf;
3296 : Page page;
3297 : OffsetNumber offnum,
3298 : maxoff;
3299 : bool hastup;
3300 :
3301 : /*
3302 : * Check if another process requests a lock on our relation. We are
3303 : * holding an AccessExclusiveLock here, so they will be waiting. We
3304 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3305 : * only check if that interval has elapsed once every 32 blocks to
3306 : * keep the number of system calls and actual shared lock table
3307 : * lookups to a minimum.
3308 : */
3309 3189 : if ((blkno % 32) == 0)
3310 : {
3311 : instr_time currenttime;
3312 : instr_time elapsed;
3313 :
3314 107 : INSTR_TIME_SET_CURRENT(currenttime);
3315 107 : elapsed = currenttime;
3316 107 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3317 107 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3318 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3319 : {
3320 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3321 : {
3322 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3323 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3324 : vacrel->relname)));
3325 :
3326 0 : *lock_waiter_detected = true;
3327 0 : return blkno;
3328 : }
3329 0 : starttime = currenttime;
3330 : }
3331 : }
3332 :
3333 : /*
3334 : * We don't insert a vacuum delay point here, because we have an
3335 : * exclusive lock on the table which we want to hold for as short a
3336 : * time as possible. We still need to check for interrupts however.
3337 : */
3338 3189 : CHECK_FOR_INTERRUPTS();
3339 :
3340 3189 : blkno--;
3341 :
3342 : /* If we haven't prefetched this lot yet, do so now. */
3343 3189 : if (prefetchedUntil > blkno)
3344 : {
3345 : BlockNumber prefetchStart;
3346 : BlockNumber pblkno;
3347 :
3348 265 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3349 4631 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3350 : {
3351 4366 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3352 4366 : CHECK_FOR_INTERRUPTS();
3353 : }
3354 265 : prefetchedUntil = prefetchStart;
3355 : }
3356 :
3357 3189 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3358 : vacrel->bstrategy);
3359 :
3360 : /* In this phase we only need shared access to the buffer */
3361 3189 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3362 :
3363 3189 : page = BufferGetPage(buf);
3364 :
3365 3189 : if (PageIsNew(page) || PageIsEmpty(page))
3366 : {
3367 1437 : UnlockReleaseBuffer(buf);
3368 1437 : continue;
3369 : }
3370 :
3371 1752 : hastup = false;
3372 1752 : maxoff = PageGetMaxOffsetNumber(page);
3373 1752 : for (offnum = FirstOffsetNumber;
3374 3498 : offnum <= maxoff;
3375 1746 : offnum = OffsetNumberNext(offnum))
3376 : {
3377 : ItemId itemid;
3378 :
3379 1752 : itemid = PageGetItemId(page, offnum);
3380 :
3381 : /*
3382 : * Note: any non-unused item should be taken as a reason to keep
3383 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3384 : * we must not have cleaned out its index entries.
3385 : */
3386 1752 : if (ItemIdIsUsed(itemid))
3387 : {
3388 6 : hastup = true;
3389 6 : break; /* can stop scanning */
3390 : }
3391 : } /* scan along page */
3392 :
3393 1752 : UnlockReleaseBuffer(buf);
3394 :
3395 : /* Done scanning if we found a tuple here */
3396 1752 : if (hastup)
3397 6 : return blkno + 1;
3398 : }
3399 :
3400 : /*
3401 : * If we fall out of the loop, all the previously-thought-to-be-empty
3402 : * pages still are; we need not bother to look at the last known-nonempty
3403 : * page.
3404 : */
3405 184 : return vacrel->nonempty_pages;
3406 : }
3407 :
3408 : /*
3409 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3410 : * shared memory). Sets both in vacrel for caller.
3411 : *
3412 : * Also handles parallel initialization as part of allocating dead_items in
3413 : * DSM when required.
3414 : */
3415 : static void
3416 115835 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3417 : {
3418 : VacDeadItemsInfo *dead_items_info;
3419 330875 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3420 99205 : autovacuum_work_mem != -1 ?
3421 215040 : autovacuum_work_mem : maintenance_work_mem;
3422 :
3423 : /*
3424 : * Initialize state for a parallel vacuum. As of now, only one worker can
3425 : * be used for an index, so we invoke parallelism only if there are at
3426 : * least two indexes on a table.
3427 : */
3428 115835 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3429 : {
3430 : /*
3431 : * Since parallel workers cannot access data in temporary tables, we
3432 : * can't perform parallel vacuum on them.
3433 : */
3434 41312 : if (RelationUsesLocalBuffers(vacrel->rel))
3435 : {
3436 : /*
3437 : * Give warning only if the user explicitly tries to perform a
3438 : * parallel vacuum on the temporary table.
3439 : */
3440 4 : if (nworkers > 0)
3441 4 : ereport(WARNING,
3442 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3443 : vacrel->relname)));
3444 : }
3445 : else
3446 41308 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3447 : vacrel->nindexes, nworkers,
3448 : vac_work_mem,
3449 41308 : vacrel->verbose ? INFO : DEBUG2,
3450 : vacrel->bstrategy);
3451 :
3452 : /*
3453 : * If parallel mode started, dead_items and dead_items_info spaces are
3454 : * allocated in DSM.
3455 : */
3456 41312 : if (ParallelVacuumIsActive(vacrel))
3457 : {
3458 26 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3459 : &vacrel->dead_items_info);
3460 26 : return;
3461 : }
3462 : }
3463 :
3464 : /*
3465 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3466 : * locally.
3467 : */
3468 :
3469 115809 : dead_items_info = palloc_object(VacDeadItemsInfo);
3470 115809 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3471 115809 : dead_items_info->num_items = 0;
3472 115809 : vacrel->dead_items_info = dead_items_info;
3473 :
3474 115809 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3475 : }
3476 :
3477 : /*
3478 : * Add the given block number and offset numbers to dead_items.
3479 : */
3480 : static void
3481 19654 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3482 : int num_offsets)
3483 : {
3484 19654 : const int prog_index[2] = {
3485 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3486 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3487 : };
3488 : int64 prog_val[2];
3489 :
3490 19654 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3491 19654 : vacrel->dead_items_info->num_items += num_offsets;
3492 :
3493 : /* update the progress information */
3494 19654 : prog_val[0] = vacrel->dead_items_info->num_items;
3495 19654 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3496 19654 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3497 19654 : }
3498 :
3499 : /*
3500 : * Forget all collected dead items.
3501 : */
3502 : static void
3503 828 : dead_items_reset(LVRelState *vacrel)
3504 : {
3505 : /* Update statistics for dead items */
3506 828 : vacrel->num_dead_items_resets++;
3507 828 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3508 :
3509 828 : if (ParallelVacuumIsActive(vacrel))
3510 : {
3511 25 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3512 25 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3513 : &vacrel->dead_items_info);
3514 25 : return;
3515 : }
3516 :
3517 : /* Recreate the tidstore with the same max_bytes limitation */
3518 803 : TidStoreDestroy(vacrel->dead_items);
3519 803 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3520 :
3521 : /* Reset the counter */
3522 803 : vacrel->dead_items_info->num_items = 0;
3523 : }
3524 :
3525 : /*
3526 : * Perform cleanup for resources allocated in dead_items_alloc
3527 : */
3528 : static void
3529 115835 : dead_items_cleanup(LVRelState *vacrel)
3530 : {
3531 115835 : if (!ParallelVacuumIsActive(vacrel))
3532 : {
3533 : /* Don't bother with pfree here */
3534 115809 : return;
3535 : }
3536 :
3537 : /* End parallel mode */
3538 26 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3539 26 : vacrel->pvs = NULL;
3540 : }
3541 :
3542 : #ifdef USE_ASSERT_CHECKING
3543 :
3544 : /*
3545 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3546 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3547 : * reason not to use it outside of asserts.
3548 : */
3549 : bool
3550 : heap_page_is_all_visible(Relation rel, Buffer buf,
3551 : GlobalVisState *vistest,
3552 : bool *all_frozen,
3553 : TransactionId *newest_live_xid,
3554 : OffsetNumber *logging_offnum)
3555 : {
3556 : /*
3557 : * Pass allow_update_vistest as false so that the GlobalVisState
3558 : * boundaries used here match those used by the pruning code we are
3559 : * cross-checking. Allowing an update could move the boundaries between
3560 : * the two calls, causing a spurious assertion failure.
3561 : */
3562 : return heap_page_would_be_all_visible(rel, buf,
3563 : vistest, false,
3564 : NULL, 0,
3565 : all_frozen,
3566 : newest_live_xid,
3567 : logging_offnum);
3568 : }
3569 : #endif
3570 :
3571 : /*
3572 : * Check whether the heap page in buf is all-visible except for the dead
3573 : * tuples referenced in the deadoffsets array.
3574 : *
3575 : * Vacuum uses this to check if a page would become all-visible after reaping
3576 : * known dead tuples. This function does not remove the dead items.
3577 : *
3578 : * This cannot be called in a critical section, as the visibility checks may
3579 : * perform IO and allocate memory.
3580 : *
3581 : * Returns true if the page is all-visible other than the provided
3582 : * deadoffsets and false otherwise.
3583 : *
3584 : * vistest is used to determine visibility. If allow_update_vistest is true,
3585 : * the boundaries of the GlobalVisState may be updated when checking the
3586 : * visibility of the newest live XID on the page.
3587 : *
3588 : * Output parameters:
3589 : *
3590 : * - *all_frozen: true if every tuple on the page is frozen
3591 : * - *newest_live_xid: newest xmin of live tuples on the page
3592 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3593 : * used by vacuum's error callback system.
3594 : *
3595 : * Callers looking to verify that the page is already all-visible can call
3596 : * heap_page_is_all_visible().
3597 : *
3598 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3599 : * If you modify this function, ensure consistency with that code. An
3600 : * assertion cross-checks that both remain in agreement. Do not introduce new
3601 : * side-effects.
3602 : */
3603 : static bool
3604 17363 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3605 : GlobalVisState *vistest,
3606 : bool allow_update_vistest,
3607 : OffsetNumber *deadoffsets,
3608 : int ndeadoffsets,
3609 : bool *all_frozen,
3610 : TransactionId *newest_live_xid,
3611 : OffsetNumber *logging_offnum)
3612 : {
3613 17363 : Page page = BufferGetPage(buf);
3614 17363 : BlockNumber blockno = BufferGetBlockNumber(buf);
3615 : OffsetNumber offnum,
3616 : maxoff;
3617 17363 : bool all_visible = true;
3618 17363 : int matched_dead_count = 0;
3619 :
3620 17363 : *newest_live_xid = InvalidTransactionId;
3621 17363 : *all_frozen = true;
3622 :
3623 : Assert(ndeadoffsets == 0 || deadoffsets);
3624 :
3625 : #ifdef USE_ASSERT_CHECKING
3626 : /* Confirm input deadoffsets[] is strictly sorted */
3627 : if (ndeadoffsets > 1)
3628 : {
3629 : for (int i = 1; i < ndeadoffsets; i++)
3630 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3631 : }
3632 : #endif
3633 :
3634 17363 : maxoff = PageGetMaxOffsetNumber(page);
3635 17363 : for (offnum = FirstOffsetNumber;
3636 1724185 : offnum <= maxoff && all_visible;
3637 1706822 : offnum = OffsetNumberNext(offnum))
3638 : {
3639 : ItemId itemid;
3640 : HeapTupleData tuple;
3641 : TransactionId dead_after;
3642 :
3643 : /*
3644 : * Set the offset number so that we can display it along with any
3645 : * error that occurred while processing this tuple.
3646 : */
3647 1706822 : *logging_offnum = offnum;
3648 1706822 : itemid = PageGetItemId(page, offnum);
3649 :
3650 : /* Unused or redirect line pointers are of no interest */
3651 1706822 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3652 1226661 : continue;
3653 :
3654 1656866 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3655 :
3656 : /*
3657 : * Dead line pointers can have index pointers pointing to them. So
3658 : * they can't be treated as visible
3659 : */
3660 1656866 : if (ItemIdIsDead(itemid))
3661 : {
3662 1176705 : if (!deadoffsets ||
3663 1176705 : matched_dead_count >= ndeadoffsets ||
3664 1176705 : deadoffsets[matched_dead_count] != offnum)
3665 : {
3666 0 : *all_frozen = all_visible = false;
3667 0 : break;
3668 : }
3669 1176705 : matched_dead_count++;
3670 1176705 : continue;
3671 : }
3672 :
3673 : Assert(ItemIdIsNormal(itemid));
3674 :
3675 480161 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3676 480161 : tuple.t_len = ItemIdGetLength(itemid);
3677 480161 : tuple.t_tableOid = RelationGetRelid(rel);
3678 :
3679 : /* Visibility checks may do IO or allocate memory */
3680 : Assert(CritSectionCount == 0);
3681 480161 : switch (HeapTupleSatisfiesVacuumHorizon(&tuple, buf, &dead_after))
3682 : {
3683 480107 : case HEAPTUPLE_LIVE:
3684 : {
3685 : TransactionId xmin;
3686 :
3687 : /* Check heap_prune_record_unchanged_lp_normal comments */
3688 480107 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3689 : {
3690 0 : all_visible = false;
3691 0 : *all_frozen = false;
3692 0 : break;
3693 : }
3694 :
3695 : /*
3696 : * The inserter definitely committed. But we don't know if
3697 : * it is old enough that everyone sees it as committed.
3698 : * Don't check that now.
3699 : *
3700 : * If we scan all tuples without finding one that prevents
3701 : * the page from being all-visible, we then check whether
3702 : * any snapshot still considers the newest XID on the page
3703 : * to be running. In that case, the page is not considered
3704 : * all-visible.
3705 : */
3706 480107 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3707 :
3708 : /* Track newest xmin on page. */
3709 480107 : if (TransactionIdFollows(xmin, *newest_live_xid) &&
3710 : TransactionIdIsNormal(xmin))
3711 18293 : *newest_live_xid = xmin;
3712 :
3713 : /* Check whether this tuple is already frozen or not */
3714 601558 : if (all_visible && *all_frozen &&
3715 121451 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3716 4208 : *all_frozen = false;
3717 : }
3718 480107 : break;
3719 :
3720 54 : case HEAPTUPLE_DEAD:
3721 : case HEAPTUPLE_RECENTLY_DEAD:
3722 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3723 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3724 : {
3725 54 : all_visible = false;
3726 54 : *all_frozen = false;
3727 54 : break;
3728 : }
3729 0 : default:
3730 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3731 : break;
3732 : }
3733 : } /* scan along page */
3734 :
3735 : /*
3736 : * After processing all the live tuples on the page, if the newest xmin
3737 : * among them may still be considered running by any snapshot, the page
3738 : * cannot be all-visible.
3739 : */
3740 17363 : if (all_visible &&
3741 21484 : TransactionIdIsNormal(*newest_live_xid) &&
3742 4175 : GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3743 : allow_update_vistest))
3744 : {
3745 34 : all_visible = false;
3746 34 : *all_frozen = false;
3747 : }
3748 :
3749 : /* Clear the offset information once we have processed the given page. */
3750 17363 : *logging_offnum = InvalidOffsetNumber;
3751 :
3752 17363 : return all_visible;
3753 : }
3754 :
3755 : /*
3756 : * Update index statistics in pg_class if the statistics are accurate.
3757 : */
3758 : static void
3759 91420 : update_relstats_all_indexes(LVRelState *vacrel)
3760 : {
3761 91420 : Relation *indrels = vacrel->indrels;
3762 91420 : int nindexes = vacrel->nindexes;
3763 91420 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3764 :
3765 : Assert(vacrel->do_index_cleanup);
3766 :
3767 229961 : for (int idx = 0; idx < nindexes; idx++)
3768 : {
3769 138541 : Relation indrel = indrels[idx];
3770 138541 : IndexBulkDeleteResult *istat = indstats[idx];
3771 :
3772 138541 : if (istat == NULL || istat->estimated_count)
3773 136814 : continue;
3774 :
3775 : /* Update index statistics */
3776 1727 : vac_update_relstats(indrel,
3777 : istat->num_pages,
3778 : istat->num_index_tuples,
3779 : 0, 0,
3780 : false,
3781 : InvalidTransactionId,
3782 : InvalidMultiXactId,
3783 : NULL, NULL, false);
3784 : }
3785 91420 : }
3786 :
3787 : /*
3788 : * Error context callback for errors occurring during vacuum. The error
3789 : * context messages for index phases should match the messages set in parallel
3790 : * vacuum. If you change this function for those phases, change
3791 : * parallel_vacuum_error_callback() as well.
3792 : */
3793 : static void
3794 104863 : vacuum_error_callback(void *arg)
3795 : {
3796 104863 : LVRelState *errinfo = arg;
3797 :
3798 104863 : switch (errinfo->phase)
3799 : {
3800 8 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3801 8 : if (BlockNumberIsValid(errinfo->blkno))
3802 : {
3803 0 : if (OffsetNumberIsValid(errinfo->offnum))
3804 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3805 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3806 : else
3807 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3808 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3809 : }
3810 : else
3811 8 : errcontext("while scanning relation \"%s.%s\"",
3812 : errinfo->relnamespace, errinfo->relname);
3813 8 : break;
3814 :
3815 1 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3816 1 : if (BlockNumberIsValid(errinfo->blkno))
3817 : {
3818 0 : if (OffsetNumberIsValid(errinfo->offnum))
3819 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3820 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3821 : else
3822 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3823 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3824 : }
3825 : else
3826 1 : errcontext("while vacuuming relation \"%s.%s\"",
3827 : errinfo->relnamespace, errinfo->relname);
3828 1 : break;
3829 :
3830 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3831 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3832 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3833 0 : break;
3834 :
3835 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3836 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3837 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3838 0 : break;
3839 :
3840 3 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3841 3 : if (BlockNumberIsValid(errinfo->blkno))
3842 3 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3843 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3844 3 : break;
3845 :
3846 104851 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3847 : default:
3848 104851 : return; /* do nothing; the errinfo may not be
3849 : * initialized */
3850 : }
3851 : }
3852 :
3853 : /*
3854 : * Updates the information required for vacuum error callback. This also saves
3855 : * the current information which can be later restored via restore_vacuum_error_info.
3856 : */
3857 : static void
3858 566197 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3859 : int phase, BlockNumber blkno, OffsetNumber offnum)
3860 : {
3861 566197 : if (saved_vacrel)
3862 : {
3863 158155 : saved_vacrel->offnum = vacrel->offnum;
3864 158155 : saved_vacrel->blkno = vacrel->blkno;
3865 158155 : saved_vacrel->phase = vacrel->phase;
3866 : }
3867 :
3868 566197 : vacrel->blkno = blkno;
3869 566197 : vacrel->offnum = offnum;
3870 566197 : vacrel->phase = phase;
3871 566197 : }
3872 :
3873 : /*
3874 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3875 : */
3876 : static void
3877 158155 : restore_vacuum_error_info(LVRelState *vacrel,
3878 : const LVSavedErrInfo *saved_vacrel)
3879 : {
3880 158155 : vacrel->blkno = saved_vacrel->blkno;
3881 158155 : vacrel->offnum = saved_vacrel->offnum;
3882 158155 : vacrel->phase = saved_vacrel->phase;
3883 158155 : }
|