Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include <math.h>
133 :
134 : #include "access/genam.h"
135 : #include "access/heapam.h"
136 : #include "access/htup_details.h"
137 : #include "access/multixact.h"
138 : #include "access/tidstore.h"
139 : #include "access/transam.h"
140 : #include "access/visibilitymap.h"
141 : #include "access/xloginsert.h"
142 : #include "catalog/storage.h"
143 : #include "commands/progress.h"
144 : #include "commands/vacuum.h"
145 : #include "common/int.h"
146 : #include "common/pg_prng.h"
147 : #include "executor/instrument.h"
148 : #include "miscadmin.h"
149 : #include "pgstat.h"
150 : #include "portability/instr_time.h"
151 : #include "postmaster/autovacuum.h"
152 : #include "storage/bufmgr.h"
153 : #include "storage/freespace.h"
154 : #include "storage/lmgr.h"
155 : #include "storage/read_stream.h"
156 : #include "utils/lsyscache.h"
157 : #include "utils/pg_rusage.h"
158 : #include "utils/timestamp.h"
159 :
160 :
161 : /*
162 : * Space/time tradeoff parameters: do these need to be user-tunable?
163 : *
164 : * To consider truncating the relation, we want there to be at least
165 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
166 : * is less) potentially-freeable pages.
167 : */
168 : #define REL_TRUNCATE_MINIMUM 1000
169 : #define REL_TRUNCATE_FRACTION 16
170 :
171 : /*
172 : * Timing parameters for truncate locking heuristics.
173 : *
174 : * These were not exposed as user tunable GUC values because it didn't seem
175 : * that the potential for improvement was great enough to merit the cost of
176 : * supporting them.
177 : */
178 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
179 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
181 :
182 : /*
183 : * Threshold that controls whether we bypass index vacuuming and heap
184 : * vacuuming as an optimization
185 : */
186 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
187 :
188 : /*
189 : * Perform a failsafe check each time we scan another 4GB of pages.
190 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
191 : */
192 : #define FAILSAFE_EVERY_PAGES \
193 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
194 :
195 : /*
196 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
197 : * (it won't be exact because we only vacuum FSM after processing a heap page
198 : * that has some removable tuples). When there are indexes, this is ignored,
199 : * and we vacuum FSM after each index/heap cleaning pass.
200 : */
201 : #define VACUUM_FSM_EVERY_PAGES \
202 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
203 :
204 : /*
205 : * Before we consider skipping a page that's marked as clean in
206 : * visibility map, we must've seen at least this many clean pages.
207 : */
208 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
209 :
210 : /*
211 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
212 : * Needs to be a power of 2.
213 : */
214 : #define PREFETCH_SIZE ((BlockNumber) 32)
215 :
216 : /*
217 : * Macro to check if we are in a parallel vacuum. If true, we are in the
218 : * parallel mode and the DSM segment is initialized.
219 : */
220 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
221 :
222 : /* Phases of vacuum during which we report error context. */
223 : typedef enum
224 : {
225 : VACUUM_ERRCB_PHASE_UNKNOWN,
226 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
227 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
228 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
229 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
230 : VACUUM_ERRCB_PHASE_TRUNCATE,
231 : } VacErrPhase;
232 :
233 : /*
234 : * An eager scan of a page that is set all-frozen in the VM is considered
235 : * "successful". To spread out freezing overhead across multiple normal
236 : * vacuums, we limit the number of successful eager page freezes. The maximum
237 : * number of eager page freezes is calculated as a ratio of the all-visible
238 : * but not all-frozen pages at the beginning of the vacuum.
239 : */
240 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
241 :
242 : /*
243 : * On the assumption that different regions of the table tend to have
244 : * similarly aged data, once vacuum fails to freeze
245 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
246 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
247 : * to another region of the table with potentially older data.
248 : */
249 : #define EAGER_SCAN_REGION_SIZE 4096
250 :
251 : /*
252 : * heap_vac_scan_next_block() sets these flags to communicate information
253 : * about the block it read to the caller.
254 : */
255 : #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
256 : #define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
257 :
258 : typedef struct LVRelState
259 : {
260 : /* Target heap relation and its indexes */
261 : Relation rel;
262 : Relation *indrels;
263 : int nindexes;
264 :
265 : /* Buffer access strategy and parallel vacuum state */
266 : BufferAccessStrategy bstrategy;
267 : ParallelVacuumState *pvs;
268 :
269 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
270 : bool aggressive;
271 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
272 : bool skipwithvm;
273 : /* Consider index vacuuming bypass optimization? */
274 : bool consider_bypass_optimization;
275 :
276 : /* Doing index vacuuming, index cleanup, rel truncation? */
277 : bool do_index_vacuuming;
278 : bool do_index_cleanup;
279 : bool do_rel_truncate;
280 :
281 : /* VACUUM operation's cutoffs for freezing and pruning */
282 : struct VacuumCutoffs cutoffs;
283 : GlobalVisState *vistest;
284 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
285 : TransactionId NewRelfrozenXid;
286 : MultiXactId NewRelminMxid;
287 : bool skippedallvis;
288 :
289 : /* Error reporting state */
290 : char *dbname;
291 : char *relnamespace;
292 : char *relname;
293 : char *indname; /* Current index name */
294 : BlockNumber blkno; /* used only for heap operations */
295 : OffsetNumber offnum; /* used only for heap operations */
296 : VacErrPhase phase;
297 : bool verbose; /* VACUUM VERBOSE? */
298 :
299 : /*
300 : * dead_items stores TIDs whose index tuples are deleted by index
301 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
302 : * that has been processed by lazy_scan_prune. Also needed by
303 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
304 : * LP_UNUSED during second heap pass.
305 : *
306 : * Both dead_items and dead_items_info are allocated in shared memory in
307 : * parallel vacuum cases.
308 : */
309 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
310 : VacDeadItemsInfo *dead_items_info;
311 :
312 : BlockNumber rel_pages; /* total number of pages */
313 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
314 :
315 : /*
316 : * Count of all-visible blocks eagerly scanned (for logging only). This
317 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
318 : */
319 : BlockNumber eager_scanned_pages;
320 :
321 : BlockNumber removed_pages; /* # pages removed by relation truncation */
322 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
323 :
324 : /* # pages newly set all-visible in the VM */
325 : BlockNumber vm_new_visible_pages;
326 :
327 : /*
328 : * # pages newly set all-visible and all-frozen in the VM. This is a
329 : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
330 : * all pages set all-visible, but vm_new_visible_frozen_pages includes
331 : * only those which were also set all-frozen.
332 : */
333 : BlockNumber vm_new_visible_frozen_pages;
334 :
335 : /* # all-visible pages newly set all-frozen in the VM */
336 : BlockNumber vm_new_frozen_pages;
337 :
338 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
339 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
340 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
341 :
342 : /* Statistics output by us, for table */
343 : double new_rel_tuples; /* new estimated total # of tuples */
344 : double new_live_tuples; /* new estimated total # of live tuples */
345 : /* Statistics output by index AMs */
346 : IndexBulkDeleteResult **indstats;
347 :
348 : /* Instrumentation counters */
349 : int num_index_scans;
350 : /* Counters that follow are only for scanned_pages */
351 : int64 tuples_deleted; /* # deleted from table */
352 : int64 tuples_frozen; /* # newly frozen */
353 : int64 lpdead_items; /* # deleted from indexes */
354 : int64 live_tuples; /* # live tuples remaining */
355 : int64 recently_dead_tuples; /* # dead, but not yet removable */
356 : int64 missed_dead_tuples; /* # removable, but not removed */
357 :
358 : /* State maintained by heap_vac_scan_next_block() */
359 : BlockNumber current_block; /* last block returned */
360 : BlockNumber next_unskippable_block; /* next unskippable block */
361 : bool next_unskippable_allvis; /* its visibility status */
362 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
363 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
364 :
365 : /* State related to managing eager scanning of all-visible pages */
366 :
367 : /*
368 : * A normal vacuum that has failed to freeze too many eagerly scanned
369 : * blocks in a region suspends eager scanning.
370 : * next_eager_scan_region_start is the block number of the first block
371 : * eligible for resumed eager scanning.
372 : *
373 : * When eager scanning is permanently disabled, either initially
374 : * (including for aggressive vacuum) or due to hitting the success cap,
375 : * this is set to InvalidBlockNumber.
376 : */
377 : BlockNumber next_eager_scan_region_start;
378 :
379 : /*
380 : * The remaining number of blocks a normal vacuum will consider eager
381 : * scanning when it is successful. When eager scanning is enabled, this is
382 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
383 : * all-visible but not all-frozen pages. For each eager freeze success,
384 : * this is decremented. Once it hits 0, eager scanning is permanently
385 : * disabled. It is initialized to 0 if eager scanning starts out disabled
386 : * (including for aggressive vacuum).
387 : */
388 : BlockNumber eager_scan_remaining_successes;
389 :
390 : /*
391 : * The maximum number of blocks which may be eagerly scanned and not
392 : * frozen before eager scanning is temporarily suspended. This is
393 : * configurable both globally, via the
394 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
395 : * storage parameter of the same name. It is calculated as
396 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
397 : * It is 0 when eager scanning is disabled.
398 : */
399 : BlockNumber eager_scan_max_fails_per_region;
400 :
401 : /*
402 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
403 : * age) in the current eager scan region. Vacuum resets it to
404 : * eager_scan_max_fails_per_region each time it enters a new region of the
405 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
406 : * suspended until the next region. It is also 0 if eager scanning has
407 : * been permanently disabled.
408 : */
409 : BlockNumber eager_scan_remaining_fails;
410 : } LVRelState;
411 :
412 :
413 : /* Struct for saving and restoring vacuum error information. */
414 : typedef struct LVSavedErrInfo
415 : {
416 : BlockNumber blkno;
417 : OffsetNumber offnum;
418 : VacErrPhase phase;
419 : } LVSavedErrInfo;
420 :
421 :
422 : /* non-export function prototypes */
423 : static void lazy_scan_heap(LVRelState *vacrel);
424 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
425 : const VacuumParams params);
426 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
427 : void *callback_private_data,
428 : void *per_buffer_data);
429 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
430 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
431 : BlockNumber blkno, Page page,
432 : bool sharelock, Buffer vmbuffer);
433 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
434 : BlockNumber blkno, Page page,
435 : Buffer vmbuffer, bool all_visible_according_to_vm,
436 : bool *has_lpdead_items, bool *vm_page_frozen);
437 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
438 : BlockNumber blkno, Page page,
439 : bool *has_lpdead_items);
440 : static void lazy_vacuum(LVRelState *vacrel);
441 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
442 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
443 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
444 : Buffer buffer, OffsetNumber *deadoffsets,
445 : int num_offsets, Buffer vmbuffer);
446 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
447 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
448 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
449 : IndexBulkDeleteResult *istat,
450 : double reltuples,
451 : LVRelState *vacrel);
452 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
453 : IndexBulkDeleteResult *istat,
454 : double reltuples,
455 : bool estimated_count,
456 : LVRelState *vacrel);
457 : static bool should_attempt_truncation(LVRelState *vacrel);
458 : static void lazy_truncate_heap(LVRelState *vacrel);
459 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
460 : bool *lock_waiter_detected);
461 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
462 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
463 : int num_offsets);
464 : static void dead_items_reset(LVRelState *vacrel);
465 : static void dead_items_cleanup(LVRelState *vacrel);
466 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
467 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
468 : static void update_relstats_all_indexes(LVRelState *vacrel);
469 : static void vacuum_error_callback(void *arg);
470 : static void update_vacuum_error_info(LVRelState *vacrel,
471 : LVSavedErrInfo *saved_vacrel,
472 : int phase, BlockNumber blkno,
473 : OffsetNumber offnum);
474 : static void restore_vacuum_error_info(LVRelState *vacrel,
475 : const LVSavedErrInfo *saved_vacrel);
476 :
477 :
478 :
479 : /*
480 : * Helper to set up the eager scanning state for vacuuming a single relation.
481 : * Initializes the eager scan management related members of the LVRelState.
482 : *
483 : * Caller provides whether or not an aggressive vacuum is required due to
484 : * vacuum options or for relfrozenxid/relminmxid advancement.
485 : */
486 : static void
487 245850 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
488 : {
489 : uint32 randseed;
490 : BlockNumber allvisible;
491 : BlockNumber allfrozen;
492 : float first_region_ratio;
493 245850 : bool oldest_unfrozen_before_cutoff = false;
494 :
495 : /*
496 : * Initialize eager scan management fields to their disabled values.
497 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
498 : * of tables without sufficiently old tuples disable eager scanning.
499 : */
500 245850 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
501 245850 : vacrel->eager_scan_max_fails_per_region = 0;
502 245850 : vacrel->eager_scan_remaining_fails = 0;
503 245850 : vacrel->eager_scan_remaining_successes = 0;
504 :
505 : /* If eager scanning is explicitly disabled, just return. */
506 245850 : if (params.max_eager_freeze_failure_rate == 0)
507 245850 : return;
508 :
509 : /*
510 : * The caller will have determined whether or not an aggressive vacuum is
511 : * required by either the vacuum parameters or the relative age of the
512 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
513 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
514 : * so scans of all-visible pages are not considered eager.
515 : */
516 245850 : if (vacrel->aggressive)
517 235250 : return;
518 :
519 : /*
520 : * Aggressively vacuuming a small relation shouldn't take long, so it
521 : * isn't worth amortizing. We use two times the region size as the size
522 : * cutoff because the eager scan start block is a random spot somewhere in
523 : * the first region, making the second region the first to be eager
524 : * scanned normally.
525 : */
526 10600 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
527 10600 : return;
528 :
529 : /*
530 : * We only want to enable eager scanning if we are likely to be able to
531 : * freeze some of the pages in the relation.
532 : *
533 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
534 : * are technically freezable, but we won't freeze them unless the criteria
535 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
536 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
537 : *
538 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
539 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
540 : * enable eager scanning.
541 : */
542 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
543 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
544 : vacrel->cutoffs.FreezeLimit))
545 0 : oldest_unfrozen_before_cutoff = true;
546 :
547 0 : if (!oldest_unfrozen_before_cutoff &&
548 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
549 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
550 : vacrel->cutoffs.MultiXactCutoff))
551 0 : oldest_unfrozen_before_cutoff = true;
552 :
553 0 : if (!oldest_unfrozen_before_cutoff)
554 0 : return;
555 :
556 : /* We have met the criteria to eagerly scan some pages. */
557 :
558 : /*
559 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
560 : * all-visible but not all-frozen blocks in the relation.
561 : */
562 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
563 :
564 0 : vacrel->eager_scan_remaining_successes =
565 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
566 0 : (allvisible - allfrozen));
567 :
568 : /* If every all-visible page is frozen, eager scanning is disabled. */
569 0 : if (vacrel->eager_scan_remaining_successes == 0)
570 0 : return;
571 :
572 : /*
573 : * Now calculate the bounds of the first eager scan region. Its end block
574 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
575 : * blocks. This affects the bounds of all subsequent regions and avoids
576 : * eager scanning and failing to freeze the same blocks each vacuum of the
577 : * relation.
578 : */
579 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
580 :
581 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
582 :
583 : Assert(params.max_eager_freeze_failure_rate > 0 &&
584 : params.max_eager_freeze_failure_rate <= 1);
585 :
586 0 : vacrel->eager_scan_max_fails_per_region =
587 0 : params.max_eager_freeze_failure_rate *
588 : EAGER_SCAN_REGION_SIZE;
589 :
590 : /*
591 : * The first region will be smaller than subsequent regions. As such,
592 : * adjust the eager freeze failures tolerated for this region.
593 : */
594 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
595 : EAGER_SCAN_REGION_SIZE;
596 :
597 0 : vacrel->eager_scan_remaining_fails =
598 0 : vacrel->eager_scan_max_fails_per_region *
599 : first_region_ratio;
600 : }
601 :
602 : /*
603 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
604 : *
605 : * This routine sets things up for and then calls lazy_scan_heap, where
606 : * almost all work actually takes place. Finalizes everything after call
607 : * returns by managing relation truncation and updating rel's pg_class
608 : * entry. (Also updates pg_class entries for any indexes that need it.)
609 : *
610 : * At entry, we have already established a transaction and opened
611 : * and locked the relation.
612 : */
613 : void
614 245850 : heap_vacuum_rel(Relation rel, const VacuumParams params,
615 : BufferAccessStrategy bstrategy)
616 : {
617 : LVRelState *vacrel;
618 : bool verbose,
619 : instrument,
620 : skipwithvm,
621 : frozenxid_updated,
622 : minmulti_updated;
623 : BlockNumber orig_rel_pages,
624 : new_rel_pages,
625 : new_rel_allvisible,
626 : new_rel_allfrozen;
627 : PGRUsage ru0;
628 245850 : TimestampTz starttime = 0;
629 245850 : PgStat_Counter startreadtime = 0,
630 245850 : startwritetime = 0;
631 245850 : WalUsage startwalusage = pgWalUsage;
632 245850 : BufferUsage startbufferusage = pgBufferUsage;
633 : ErrorContextCallback errcallback;
634 245850 : char **indnames = NULL;
635 :
636 245850 : verbose = (params.options & VACOPT_VERBOSE) != 0;
637 465732 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
638 219882 : params.log_min_duration >= 0));
639 245850 : if (instrument)
640 : {
641 219906 : pg_rusage_init(&ru0);
642 219906 : if (track_io_timing)
643 : {
644 0 : startreadtime = pgStatBlockReadTime;
645 0 : startwritetime = pgStatBlockWriteTime;
646 : }
647 : }
648 :
649 : /* Used for instrumentation and stats report */
650 245850 : starttime = GetCurrentTimestamp();
651 :
652 245850 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
653 : RelationGetRelid(rel));
654 :
655 : /*
656 : * Setup error traceback support for ereport() first. The idea is to set
657 : * up an error context callback to display additional information on any
658 : * error during a vacuum. During different phases of vacuum, we update
659 : * the state so that the error context callback always display current
660 : * information.
661 : *
662 : * Copy the names of heap rel into local memory for error reporting
663 : * purposes, too. It isn't always safe to assume that we can get the name
664 : * of each rel. It's convenient for code in lazy_scan_heap to always use
665 : * these temp copies.
666 : */
667 245850 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
668 245850 : vacrel->dbname = get_database_name(MyDatabaseId);
669 245850 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
670 245850 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
671 245850 : vacrel->indname = NULL;
672 245850 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
673 245850 : vacrel->verbose = verbose;
674 245850 : errcallback.callback = vacuum_error_callback;
675 245850 : errcallback.arg = vacrel;
676 245850 : errcallback.previous = error_context_stack;
677 245850 : error_context_stack = &errcallback;
678 :
679 : /* Set up high level stuff about rel and its indexes */
680 245850 : vacrel->rel = rel;
681 245850 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
682 : &vacrel->indrels);
683 245850 : vacrel->bstrategy = bstrategy;
684 245850 : if (instrument && vacrel->nindexes > 0)
685 : {
686 : /* Copy index names used by instrumentation (not error reporting) */
687 210366 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
688 541168 : for (int i = 0; i < vacrel->nindexes; i++)
689 330802 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
690 : }
691 :
692 : /*
693 : * The index_cleanup param either disables index vacuuming and cleanup or
694 : * forces it to go ahead when we would otherwise apply the index bypass
695 : * optimization. The default is 'auto', which leaves the final decision
696 : * up to lazy_vacuum().
697 : *
698 : * The truncate param allows user to avoid attempting relation truncation,
699 : * though it can't force truncation to happen.
700 : */
701 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
702 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
703 : params.truncate != VACOPTVALUE_AUTO);
704 :
705 : /*
706 : * While VacuumFailSafeActive is reset to false before calling this, we
707 : * still need to reset it here due to recursive calls.
708 : */
709 245850 : VacuumFailsafeActive = false;
710 245850 : vacrel->consider_bypass_optimization = true;
711 245850 : vacrel->do_index_vacuuming = true;
712 245850 : vacrel->do_index_cleanup = true;
713 245850 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
714 245850 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
715 : {
716 : /* Force disable index vacuuming up-front */
717 260 : vacrel->do_index_vacuuming = false;
718 260 : vacrel->do_index_cleanup = false;
719 : }
720 245590 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
721 : {
722 : /* Force index vacuuming. Note that failsafe can still bypass. */
723 30 : vacrel->consider_bypass_optimization = false;
724 : }
725 : else
726 : {
727 : /* Default/auto, make all decisions dynamically */
728 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
729 : }
730 :
731 : /* Initialize page counters explicitly (be tidy) */
732 245850 : vacrel->scanned_pages = 0;
733 245850 : vacrel->eager_scanned_pages = 0;
734 245850 : vacrel->removed_pages = 0;
735 245850 : vacrel->new_frozen_tuple_pages = 0;
736 245850 : vacrel->lpdead_item_pages = 0;
737 245850 : vacrel->missed_dead_pages = 0;
738 245850 : vacrel->nonempty_pages = 0;
739 : /* dead_items_alloc allocates vacrel->dead_items later on */
740 :
741 : /* Allocate/initialize output statistics state */
742 245850 : vacrel->new_rel_tuples = 0;
743 245850 : vacrel->new_live_tuples = 0;
744 245850 : vacrel->indstats = (IndexBulkDeleteResult **)
745 245850 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
746 :
747 : /* Initialize remaining counters (be tidy) */
748 245850 : vacrel->num_index_scans = 0;
749 245850 : vacrel->tuples_deleted = 0;
750 245850 : vacrel->tuples_frozen = 0;
751 245850 : vacrel->lpdead_items = 0;
752 245850 : vacrel->live_tuples = 0;
753 245850 : vacrel->recently_dead_tuples = 0;
754 245850 : vacrel->missed_dead_tuples = 0;
755 :
756 245850 : vacrel->vm_new_visible_pages = 0;
757 245850 : vacrel->vm_new_visible_frozen_pages = 0;
758 245850 : vacrel->vm_new_frozen_pages = 0;
759 :
760 : /*
761 : * Get cutoffs that determine which deleted tuples are considered DEAD,
762 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
763 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
764 : * happen in this order to ensure that the OldestXmin cutoff field works
765 : * as an upper bound on the XIDs stored in the pages we'll actually scan
766 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
767 : *
768 : * Next acquire vistest, a related cutoff that's used in pruning. We use
769 : * vistest in combination with OldestXmin to ensure that
770 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
771 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
772 : * whether a tuple should be frozen or removed. (In the future we might
773 : * want to teach lazy_scan_prune to recompute vistest from time to time,
774 : * to increase the number of dead tuples it can prune away.)
775 : */
776 245850 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
777 245850 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
778 245850 : vacrel->vistest = GlobalVisTestFor(rel);
779 :
780 : /* Initialize state used to track oldest extant XID/MXID */
781 245850 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
782 245850 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
783 :
784 : /*
785 : * Initialize state related to tracking all-visible page skipping. This is
786 : * very important to determine whether or not it is safe to advance the
787 : * relfrozenxid/relminmxid.
788 : */
789 245850 : vacrel->skippedallvis = false;
790 245850 : skipwithvm = true;
791 245850 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
792 : {
793 : /*
794 : * Force aggressive mode, and disable skipping blocks using the
795 : * visibility map (even those set all-frozen)
796 : */
797 344 : vacrel->aggressive = true;
798 344 : skipwithvm = false;
799 : }
800 :
801 245850 : vacrel->skipwithvm = skipwithvm;
802 :
803 : /*
804 : * Set up eager scan tracking state. This must happen after determining
805 : * whether or not the vacuum must be aggressive, because only normal
806 : * vacuums use the eager scan algorithm.
807 : */
808 245850 : heap_vacuum_eager_scan_setup(vacrel, params);
809 :
810 245850 : if (verbose)
811 : {
812 24 : if (vacrel->aggressive)
813 2 : ereport(INFO,
814 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
815 : vacrel->dbname, vacrel->relnamespace,
816 : vacrel->relname)));
817 : else
818 22 : ereport(INFO,
819 : (errmsg("vacuuming \"%s.%s.%s\"",
820 : vacrel->dbname, vacrel->relnamespace,
821 : vacrel->relname)));
822 : }
823 :
824 : /*
825 : * Allocate dead_items memory using dead_items_alloc. This handles
826 : * parallel VACUUM initialization as part of allocating shared memory
827 : * space used for dead_items. (But do a failsafe precheck first, to
828 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
829 : * is already dangerously old.)
830 : */
831 245850 : lazy_check_wraparound_failsafe(vacrel);
832 245850 : dead_items_alloc(vacrel, params.nworkers);
833 :
834 : /*
835 : * Call lazy_scan_heap to perform all required heap pruning, index
836 : * vacuuming, and heap vacuuming (plus related processing)
837 : */
838 245850 : lazy_scan_heap(vacrel);
839 :
840 : /*
841 : * Free resources managed by dead_items_alloc. This ends parallel mode in
842 : * passing when necessary.
843 : */
844 245850 : dead_items_cleanup(vacrel);
845 : Assert(!IsInParallelMode());
846 :
847 : /*
848 : * Update pg_class entries for each of rel's indexes where appropriate.
849 : *
850 : * Unlike the later update to rel's pg_class entry, this is not critical.
851 : * Maintains relpages/reltuples statistics used by the planner only.
852 : */
853 245850 : if (vacrel->do_index_cleanup)
854 159512 : update_relstats_all_indexes(vacrel);
855 :
856 : /* Done with rel's indexes */
857 245850 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
858 :
859 : /* Optionally truncate rel */
860 245850 : if (should_attempt_truncation(vacrel))
861 288 : lazy_truncate_heap(vacrel);
862 :
863 : /* Pop the error context stack */
864 245850 : error_context_stack = errcallback.previous;
865 :
866 : /* Report that we are now doing final cleanup */
867 245850 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
868 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
869 :
870 : /*
871 : * Prepare to update rel's pg_class entry.
872 : *
873 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
874 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
875 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
876 : */
877 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
878 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
879 : vacrel->cutoffs.relfrozenxid,
880 : vacrel->NewRelfrozenXid));
881 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
882 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
883 : vacrel->cutoffs.relminmxid,
884 : vacrel->NewRelminMxid));
885 245850 : if (vacrel->skippedallvis)
886 : {
887 : /*
888 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
889 : * chose to skip an all-visible page range. The state that tracks new
890 : * values will have missed unfrozen XIDs from the pages we skipped.
891 : */
892 : Assert(!vacrel->aggressive);
893 54 : vacrel->NewRelfrozenXid = InvalidTransactionId;
894 54 : vacrel->NewRelminMxid = InvalidMultiXactId;
895 : }
896 :
897 : /*
898 : * For safety, clamp relallvisible to be not more than what we're setting
899 : * pg_class.relpages to
900 : */
901 245850 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
902 245850 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
903 245850 : if (new_rel_allvisible > new_rel_pages)
904 0 : new_rel_allvisible = new_rel_pages;
905 :
906 : /*
907 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
908 : * all-frozen blocks to the count of all-visible blocks. This matches the
909 : * clamping of relallvisible above.
910 : */
911 245850 : if (new_rel_allfrozen > new_rel_allvisible)
912 0 : new_rel_allfrozen = new_rel_allvisible;
913 :
914 : /*
915 : * Now actually update rel's pg_class entry.
916 : *
917 : * In principle new_live_tuples could be -1 indicating that we (still)
918 : * don't know the tuple count. In practice that can't happen, since we
919 : * scan every page that isn't skipped using the visibility map.
920 : */
921 245850 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
922 : new_rel_allvisible, new_rel_allfrozen,
923 245850 : vacrel->nindexes > 0,
924 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
925 : &frozenxid_updated, &minmulti_updated, false);
926 :
927 : /*
928 : * Report results to the cumulative stats system, too.
929 : *
930 : * Deliberately avoid telling the stats system about LP_DEAD items that
931 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
932 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
933 : * It seems like a good idea to err on the side of not vacuuming again too
934 : * soon in cases where the failsafe prevented significant amounts of heap
935 : * vacuuming.
936 : */
937 147570 : pgstat_report_vacuum(RelationGetRelid(rel),
938 245850 : rel->rd_rel->relisshared,
939 98280 : Max(vacrel->new_live_tuples, 0),
940 245850 : vacrel->recently_dead_tuples +
941 245850 : vacrel->missed_dead_tuples,
942 : starttime);
943 245850 : pgstat_progress_end_command();
944 :
945 245850 : if (instrument)
946 : {
947 219906 : TimestampTz endtime = GetCurrentTimestamp();
948 :
949 220028 : if (verbose || params.log_min_duration == 0 ||
950 122 : TimestampDifferenceExceeds(starttime, endtime,
951 122 : params.log_min_duration))
952 : {
953 : long secs_dur;
954 : int usecs_dur;
955 : WalUsage walusage;
956 : BufferUsage bufferusage;
957 : StringInfoData buf;
958 : char *msgfmt;
959 : int32 diff;
960 219784 : double read_rate = 0,
961 219784 : write_rate = 0;
962 : int64 total_blks_hit;
963 : int64 total_blks_read;
964 : int64 total_blks_dirtied;
965 :
966 219784 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
967 219784 : memset(&walusage, 0, sizeof(WalUsage));
968 219784 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
969 219784 : memset(&bufferusage, 0, sizeof(BufferUsage));
970 219784 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
971 :
972 219784 : total_blks_hit = bufferusage.shared_blks_hit +
973 219784 : bufferusage.local_blks_hit;
974 219784 : total_blks_read = bufferusage.shared_blks_read +
975 219784 : bufferusage.local_blks_read;
976 219784 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
977 219784 : bufferusage.local_blks_dirtied;
978 :
979 219784 : initStringInfo(&buf);
980 219784 : if (verbose)
981 : {
982 : /*
983 : * Aggressiveness already reported earlier, in dedicated
984 : * VACUUM VERBOSE ereport
985 : */
986 : Assert(!params.is_wraparound);
987 24 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
988 : }
989 219760 : else if (params.is_wraparound)
990 : {
991 : /*
992 : * While it's possible for a VACUUM to be both is_wraparound
993 : * and !aggressive, that's just a corner-case -- is_wraparound
994 : * implies aggressive. Produce distinct output for the corner
995 : * case all the same, just in case.
996 : */
997 219726 : if (vacrel->aggressive)
998 219726 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
999 : else
1000 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1001 : }
1002 : else
1003 : {
1004 34 : if (vacrel->aggressive)
1005 28 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1006 : else
1007 6 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1008 : }
1009 219784 : appendStringInfo(&buf, msgfmt,
1010 : vacrel->dbname,
1011 : vacrel->relnamespace,
1012 : vacrel->relname,
1013 : vacrel->num_index_scans);
1014 307258 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1015 : vacrel->removed_pages,
1016 : new_rel_pages,
1017 : vacrel->scanned_pages,
1018 : orig_rel_pages == 0 ? 100.0 :
1019 87474 : 100.0 * vacrel->scanned_pages /
1020 : orig_rel_pages,
1021 : vacrel->eager_scanned_pages);
1022 219784 : appendStringInfo(&buf,
1023 219784 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1024 : vacrel->tuples_deleted,
1025 219784 : (int64) vacrel->new_rel_tuples,
1026 : vacrel->recently_dead_tuples);
1027 219784 : if (vacrel->missed_dead_tuples > 0)
1028 0 : appendStringInfo(&buf,
1029 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1030 : vacrel->missed_dead_tuples,
1031 : vacrel->missed_dead_pages);
1032 219784 : diff = (int32) (ReadNextTransactionId() -
1033 219784 : vacrel->cutoffs.OldestXmin);
1034 219784 : appendStringInfo(&buf,
1035 219784 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1036 : vacrel->cutoffs.OldestXmin, diff);
1037 219784 : if (frozenxid_updated)
1038 : {
1039 35536 : diff = (int32) (vacrel->NewRelfrozenXid -
1040 35536 : vacrel->cutoffs.relfrozenxid);
1041 35536 : appendStringInfo(&buf,
1042 35536 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1043 : vacrel->NewRelfrozenXid, diff);
1044 : }
1045 219784 : if (minmulti_updated)
1046 : {
1047 8 : diff = (int32) (vacrel->NewRelminMxid -
1048 8 : vacrel->cutoffs.relminmxid);
1049 8 : appendStringInfo(&buf,
1050 8 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1051 : vacrel->NewRelminMxid, diff);
1052 : }
1053 307258 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1054 : vacrel->new_frozen_tuple_pages,
1055 : orig_rel_pages == 0 ? 100.0 :
1056 87474 : 100.0 * vacrel->new_frozen_tuple_pages /
1057 : orig_rel_pages,
1058 : vacrel->tuples_frozen);
1059 :
1060 219784 : appendStringInfo(&buf,
1061 219784 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1062 : vacrel->vm_new_visible_pages,
1063 219784 : vacrel->vm_new_visible_frozen_pages +
1064 219784 : vacrel->vm_new_frozen_pages,
1065 : vacrel->vm_new_frozen_pages);
1066 219784 : if (vacrel->do_index_vacuuming)
1067 : {
1068 133924 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1069 133894 : appendStringInfoString(&buf, _("index scan not needed: "));
1070 : else
1071 30 : appendStringInfoString(&buf, _("index scan needed: "));
1072 :
1073 133924 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1074 : }
1075 : else
1076 : {
1077 85860 : if (!VacuumFailsafeActive)
1078 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1079 : else
1080 85860 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1081 :
1082 85860 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1083 : }
1084 307258 : appendStringInfo(&buf, msgfmt,
1085 : vacrel->lpdead_item_pages,
1086 : orig_rel_pages == 0 ? 100.0 :
1087 87474 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1088 : vacrel->lpdead_items);
1089 550354 : for (int i = 0; i < vacrel->nindexes; i++)
1090 : {
1091 330570 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1092 :
1093 330570 : if (!istat)
1094 330520 : continue;
1095 :
1096 50 : appendStringInfo(&buf,
1097 50 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1098 50 : indnames[i],
1099 : istat->num_pages,
1100 : istat->pages_newly_deleted,
1101 : istat->pages_deleted,
1102 : istat->pages_free);
1103 : }
1104 219784 : if (track_cost_delay_timing)
1105 : {
1106 : /*
1107 : * We bypass the changecount mechanism because this value is
1108 : * only updated by the calling process. We also rely on the
1109 : * above call to pgstat_progress_end_command() to not clear
1110 : * the st_progress_param array.
1111 : */
1112 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1113 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1114 : }
1115 219784 : if (track_io_timing)
1116 : {
1117 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1118 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1119 :
1120 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1121 : read_ms, write_ms);
1122 : }
1123 219784 : if (secs_dur > 0 || usecs_dur > 0)
1124 : {
1125 219784 : read_rate = (double) BLCKSZ * total_blks_read /
1126 219784 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1127 219784 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1128 219784 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1129 : }
1130 219784 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1131 : read_rate, write_rate);
1132 219784 : appendStringInfo(&buf,
1133 219784 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1134 : total_blks_hit,
1135 : total_blks_read,
1136 : total_blks_dirtied);
1137 219784 : appendStringInfo(&buf,
1138 219784 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRId64 " buffers full\n"),
1139 : walusage.wal_records,
1140 : walusage.wal_fpi,
1141 : walusage.wal_bytes,
1142 : walusage.wal_buffers_full);
1143 219784 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1144 :
1145 219784 : ereport(verbose ? INFO : LOG,
1146 : (errmsg_internal("%s", buf.data)));
1147 219784 : pfree(buf.data);
1148 : }
1149 : }
1150 :
1151 : /* Cleanup index statistics and index names */
1152 613854 : for (int i = 0; i < vacrel->nindexes; i++)
1153 : {
1154 368004 : if (vacrel->indstats[i])
1155 2578 : pfree(vacrel->indstats[i]);
1156 :
1157 368004 : if (instrument)
1158 330802 : pfree(indnames[i]);
1159 : }
1160 245850 : }
1161 :
1162 : /*
1163 : * lazy_scan_heap() -- workhorse function for VACUUM
1164 : *
1165 : * This routine prunes each page in the heap, and considers the need to
1166 : * freeze remaining tuples with storage (not including pages that can be
1167 : * skipped using the visibility map). Also performs related maintenance
1168 : * of the FSM and visibility map. These steps all take place during an
1169 : * initial pass over the target heap relation.
1170 : *
1171 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1172 : * consists of deleting index tuples that point to LP_DEAD items left in
1173 : * heap pages following pruning. Earlier initial pass over the heap will
1174 : * have collected the TIDs whose index tuples need to be removed.
1175 : *
1176 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1177 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1178 : * as LP_UNUSED. This has to happen in a second, final pass over the
1179 : * heap, to preserve a basic invariant that all index AMs rely on: no
1180 : * extant index tuple can ever be allowed to contain a TID that points to
1181 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1182 : * recycling of line pointers to avoid index scans that get confused
1183 : * about which TID points to which tuple immediately after recycling.
1184 : * (Actually, this isn't a concern when target heap relation happens to
1185 : * have no indexes, which allows us to safely apply the one-pass strategy
1186 : * as an optimization).
1187 : *
1188 : * In practice we often have enough space to fit all TIDs, and so won't
1189 : * need to call lazy_vacuum more than once, after our initial pass over
1190 : * the heap has totally finished. Otherwise things are slightly more
1191 : * complicated: our "initial pass" over the heap applies only to those
1192 : * pages that were pruned before we needed to call lazy_vacuum, and our
1193 : * "final pass" over the heap only vacuums these same heap pages.
1194 : * However, we process indexes in full every time lazy_vacuum is called,
1195 : * which makes index processing very inefficient when memory is in short
1196 : * supply.
1197 : */
1198 : static void
1199 245850 : lazy_scan_heap(LVRelState *vacrel)
1200 : {
1201 : ReadStream *stream;
1202 245850 : BlockNumber rel_pages = vacrel->rel_pages,
1203 245850 : blkno = 0,
1204 245850 : next_fsm_block_to_vacuum = 0;
1205 245850 : BlockNumber orig_eager_scan_success_limit =
1206 : vacrel->eager_scan_remaining_successes; /* for logging */
1207 245850 : Buffer vmbuffer = InvalidBuffer;
1208 245850 : const int initprog_index[] = {
1209 : PROGRESS_VACUUM_PHASE,
1210 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1211 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1212 : };
1213 : int64 initprog_val[3];
1214 :
1215 : /* Report that we're scanning the heap, advertising total # of blocks */
1216 245850 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1217 245850 : initprog_val[1] = rel_pages;
1218 245850 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1219 245850 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1220 :
1221 : /* Initialize for the first heap_vac_scan_next_block() call */
1222 245850 : vacrel->current_block = InvalidBlockNumber;
1223 245850 : vacrel->next_unskippable_block = InvalidBlockNumber;
1224 245850 : vacrel->next_unskippable_allvis = false;
1225 245850 : vacrel->next_unskippable_eager_scanned = false;
1226 245850 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1227 :
1228 : /*
1229 : * Set up the read stream for vacuum's first pass through the heap.
1230 : *
1231 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1232 : * explicit work in heap_vac_scan_next_block.
1233 : */
1234 245850 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1235 : vacrel->bstrategy,
1236 : vacrel->rel,
1237 : MAIN_FORKNUM,
1238 : heap_vac_scan_next_block,
1239 : vacrel,
1240 : sizeof(uint8));
1241 :
1242 : while (true)
1243 1148348 : {
1244 : Buffer buf;
1245 : Page page;
1246 1394198 : uint8 blk_info = 0;
1247 1394198 : int ndeleted = 0;
1248 : bool has_lpdead_items;
1249 1394198 : void *per_buffer_data = NULL;
1250 1394198 : bool vm_page_frozen = false;
1251 1394198 : bool got_cleanup_lock = false;
1252 :
1253 1394198 : vacuum_delay_point(false);
1254 :
1255 : /*
1256 : * Regularly check if wraparound failsafe should trigger.
1257 : *
1258 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1259 : * relfrozenxid might start to look dangerously old before we reach
1260 : * that point. This check also provides failsafe coverage for the
1261 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1262 : * param set to 'off'.
1263 : */
1264 1394198 : if (vacrel->scanned_pages > 0 &&
1265 1148348 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1266 0 : lazy_check_wraparound_failsafe(vacrel);
1267 :
1268 : /*
1269 : * Consider if we definitely have enough space to process TIDs on page
1270 : * already. If we are close to overrunning the available space for
1271 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1272 : * this page. However, let's force at least one page-worth of tuples
1273 : * to be stored as to ensure we do at least some work when the memory
1274 : * configured is so low that we run out before storing anything.
1275 : */
1276 1394198 : if (vacrel->dead_items_info->num_items > 0 &&
1277 44852 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1278 : {
1279 : /*
1280 : * Before beginning index vacuuming, we release any pin we may
1281 : * hold on the visibility map page. This isn't necessary for
1282 : * correctness, but we do it anyway to avoid holding the pin
1283 : * across a lengthy, unrelated operation.
1284 : */
1285 4 : if (BufferIsValid(vmbuffer))
1286 : {
1287 4 : ReleaseBuffer(vmbuffer);
1288 4 : vmbuffer = InvalidBuffer;
1289 : }
1290 :
1291 : /* Perform a round of index and heap vacuuming */
1292 4 : vacrel->consider_bypass_optimization = false;
1293 4 : lazy_vacuum(vacrel);
1294 :
1295 : /*
1296 : * Vacuum the Free Space Map to make newly-freed space visible on
1297 : * upper-level FSM pages. Note that blkno is the previously
1298 : * processed block.
1299 : */
1300 4 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1301 : blkno + 1);
1302 4 : next_fsm_block_to_vacuum = blkno;
1303 :
1304 : /* Report that we are once again scanning the heap */
1305 4 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1306 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1307 : }
1308 :
1309 1394198 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1310 :
1311 : /* The relation is exhausted. */
1312 1394198 : if (!BufferIsValid(buf))
1313 245850 : break;
1314 :
1315 1148348 : blk_info = *((uint8 *) per_buffer_data);
1316 1148348 : CheckBufferIsPinnedOnce(buf);
1317 1148348 : page = BufferGetPage(buf);
1318 1148348 : blkno = BufferGetBlockNumber(buf);
1319 :
1320 1148348 : vacrel->scanned_pages++;
1321 1148348 : if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1322 0 : vacrel->eager_scanned_pages++;
1323 :
1324 : /* Report as block scanned, update error traceback information */
1325 1148348 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1326 1148348 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1327 : blkno, InvalidOffsetNumber);
1328 :
1329 : /*
1330 : * Pin the visibility map page in case we need to mark the page
1331 : * all-visible. In most cases this will be very cheap, because we'll
1332 : * already have the correct page pinned anyway.
1333 : */
1334 1148348 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1335 :
1336 : /*
1337 : * We need a buffer cleanup lock to prune HOT chains and defragment
1338 : * the page in lazy_scan_prune. But when it's not possible to acquire
1339 : * a cleanup lock right away, we may be able to settle for reduced
1340 : * processing using lazy_scan_noprune.
1341 : */
1342 1148348 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1343 :
1344 1148348 : if (!got_cleanup_lock)
1345 338 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1346 :
1347 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1348 1148348 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1349 1148348 : vmbuffer))
1350 : {
1351 : /* Processed as new/empty page (lock and pin released) */
1352 1224 : continue;
1353 : }
1354 :
1355 : /*
1356 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1357 : * items in the dead_items area for later vacuuming, count live and
1358 : * recently dead tuples for vacuum logging, and determine if this
1359 : * block could later be truncated. If we encounter any xid/mxids that
1360 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1361 : * for a cleanup lock and call lazy_scan_prune().
1362 : */
1363 1147124 : if (!got_cleanup_lock &&
1364 338 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1365 : {
1366 : /*
1367 : * lazy_scan_noprune could not do all required processing. Wait
1368 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1369 : */
1370 : Assert(vacrel->aggressive);
1371 154 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1372 154 : LockBufferForCleanup(buf);
1373 154 : got_cleanup_lock = true;
1374 : }
1375 :
1376 : /*
1377 : * If we have a cleanup lock, we must now prune, freeze, and count
1378 : * tuples. We may have acquired the cleanup lock originally, or we may
1379 : * have gone back and acquired it after lazy_scan_noprune() returned
1380 : * false. Either way, the page hasn't been processed yet.
1381 : *
1382 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1383 : * recently_dead_tuples and live tuples for vacuum logging, determine
1384 : * if the block can later be truncated, and accumulate the details of
1385 : * remaining LP_DEAD line pointers on the page into dead_items. These
1386 : * dead items include those pruned by lazy_scan_prune() as well as
1387 : * line pointers previously marked LP_DEAD.
1388 : */
1389 1147124 : if (got_cleanup_lock)
1390 1146940 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1391 : vmbuffer,
1392 1146940 : blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
1393 : &has_lpdead_items, &vm_page_frozen);
1394 :
1395 : /*
1396 : * Count an eagerly scanned page as a failure or a success.
1397 : *
1398 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1399 : * cleanup lock, we won't have frozen the page. However, we only count
1400 : * pages that were too new to require freezing as eager freeze
1401 : * failures.
1402 : *
1403 : * We could gather more information from lazy_scan_noprune() about
1404 : * whether or not there were tuples with XIDs or MXIDs older than the
1405 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1406 : * exclude pages skipped due to cleanup lock contention from eager
1407 : * freeze algorithm caps.
1408 : */
1409 1147124 : if (got_cleanup_lock &&
1410 1146940 : (blk_info & VAC_BLK_WAS_EAGER_SCANNED))
1411 : {
1412 : /* Aggressive vacuums do not eager scan. */
1413 : Assert(!vacrel->aggressive);
1414 :
1415 0 : if (vm_page_frozen)
1416 : {
1417 0 : if (vacrel->eager_scan_remaining_successes > 0)
1418 0 : vacrel->eager_scan_remaining_successes--;
1419 :
1420 0 : if (vacrel->eager_scan_remaining_successes == 0)
1421 : {
1422 : /*
1423 : * Report only once that we disabled eager scanning. We
1424 : * may eagerly read ahead blocks in excess of the success
1425 : * or failure caps before attempting to freeze them, so we
1426 : * could reach here even after disabling additional eager
1427 : * scanning.
1428 : */
1429 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1430 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1431 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1432 : orig_eager_scan_success_limit,
1433 : vacrel->dbname, vacrel->relnamespace,
1434 : vacrel->relname)));
1435 :
1436 : /*
1437 : * If we hit our success cap, permanently disable eager
1438 : * scanning by setting the other eager scan management
1439 : * fields to their disabled values.
1440 : */
1441 0 : vacrel->eager_scan_remaining_fails = 0;
1442 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1443 0 : vacrel->eager_scan_max_fails_per_region = 0;
1444 : }
1445 : }
1446 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1447 0 : vacrel->eager_scan_remaining_fails--;
1448 : }
1449 :
1450 : /*
1451 : * Now drop the buffer lock and, potentially, update the FSM.
1452 : *
1453 : * Our goal is to update the freespace map the last time we touch the
1454 : * page. If we'll process a block in the second pass, we may free up
1455 : * additional space on the page, so it is better to update the FSM
1456 : * after the second pass. If the relation has no indexes, or if index
1457 : * vacuuming is disabled, there will be no second heap pass; if this
1458 : * particular page has no dead items, the second heap pass will not
1459 : * touch this page. So, in those cases, update the FSM now.
1460 : *
1461 : * Note: In corner cases, it's possible to miss updating the FSM
1462 : * entirely. If index vacuuming is currently enabled, we'll skip the
1463 : * FSM update now. But if failsafe mode is later activated, or there
1464 : * are so few dead tuples that index vacuuming is bypassed, there will
1465 : * also be no opportunity to update the FSM later, because we'll never
1466 : * revisit this page. Since updating the FSM is desirable but not
1467 : * absolutely required, that's OK.
1468 : */
1469 1147124 : if (vacrel->nindexes == 0
1470 1112306 : || !vacrel->do_index_vacuuming
1471 749226 : || !has_lpdead_items)
1472 1123648 : {
1473 1123648 : Size freespace = PageGetHeapFreeSpace(page);
1474 :
1475 1123648 : UnlockReleaseBuffer(buf);
1476 1123648 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1477 :
1478 : /*
1479 : * Periodically perform FSM vacuuming to make newly-freed space
1480 : * visible on upper FSM pages. This is done after vacuuming if the
1481 : * table has indexes. There will only be newly-freed space if we
1482 : * held the cleanup lock and lazy_scan_prune() was called.
1483 : */
1484 1123648 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1485 908 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1486 : {
1487 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1488 : blkno);
1489 0 : next_fsm_block_to_vacuum = blkno;
1490 : }
1491 : }
1492 : else
1493 23476 : UnlockReleaseBuffer(buf);
1494 : }
1495 :
1496 245850 : vacrel->blkno = InvalidBlockNumber;
1497 245850 : if (BufferIsValid(vmbuffer))
1498 98444 : ReleaseBuffer(vmbuffer);
1499 :
1500 : /*
1501 : * Report that everything is now scanned. We never skip scanning the last
1502 : * block in the relation, so we can pass rel_pages here.
1503 : */
1504 245850 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1505 : rel_pages);
1506 :
1507 : /* now we can compute the new value for pg_class.reltuples */
1508 491700 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1509 : vacrel->scanned_pages,
1510 245850 : vacrel->live_tuples);
1511 :
1512 : /*
1513 : * Also compute the total number of surviving heap entries. In the
1514 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1515 : */
1516 245850 : vacrel->new_rel_tuples =
1517 245850 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1518 245850 : vacrel->missed_dead_tuples;
1519 :
1520 245850 : read_stream_end(stream);
1521 :
1522 : /*
1523 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1524 : * related heap vacuuming
1525 : */
1526 245850 : if (vacrel->dead_items_info->num_items > 0)
1527 1218 : lazy_vacuum(vacrel);
1528 :
1529 : /*
1530 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1531 : * not there were indexes, and whether or not we bypassed index vacuuming.
1532 : * We can pass rel_pages here because we never skip scanning the last
1533 : * block of the relation.
1534 : */
1535 245850 : if (rel_pages > next_fsm_block_to_vacuum)
1536 98444 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1537 :
1538 : /* report all blocks vacuumed */
1539 245850 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1540 :
1541 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1542 245850 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1543 151714 : lazy_cleanup_all_indexes(vacrel);
1544 245850 : }
1545 :
1546 : /*
1547 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1548 : * for vacuum to process
1549 : *
1550 : * Every time lazy_scan_heap() needs a new block to process during its first
1551 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1552 : * heap_vac_scan_next_block() to get the next block.
1553 : *
1554 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1555 : * various thresholds to skip blocks which do not need to be processed and
1556 : * returns the next block to process or InvalidBlockNumber if there are no
1557 : * remaining blocks.
1558 : *
1559 : * The visibility status of the next block to process and whether or not it
1560 : * was eager scanned is set in the per_buffer_data.
1561 : *
1562 : * callback_private_data contains a reference to the LVRelState, passed to the
1563 : * read stream API during stream setup. The LVRelState is an in/out parameter
1564 : * here (locally named `vacrel`). Vacuum options and information about the
1565 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1566 : * that's all-visible but not all-frozen (to ensure that we don't update
1567 : * relfrozenxid in that case). vacrel also holds information about the next
1568 : * unskippable block -- as bookkeeping for this function.
1569 : */
1570 : static BlockNumber
1571 1394198 : heap_vac_scan_next_block(ReadStream *stream,
1572 : void *callback_private_data,
1573 : void *per_buffer_data)
1574 : {
1575 : BlockNumber next_block;
1576 1394198 : LVRelState *vacrel = callback_private_data;
1577 1394198 : uint8 blk_info = 0;
1578 :
1579 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1580 1394198 : next_block = vacrel->current_block + 1;
1581 :
1582 : /* Have we reached the end of the relation? */
1583 1394198 : if (next_block >= vacrel->rel_pages)
1584 : {
1585 245850 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1586 : {
1587 95536 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1588 95536 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1589 : }
1590 245850 : return InvalidBlockNumber;
1591 : }
1592 :
1593 : /*
1594 : * We must be in one of the three following states:
1595 : */
1596 1148348 : if (next_block > vacrel->next_unskippable_block ||
1597 396020 : vacrel->next_unskippable_block == InvalidBlockNumber)
1598 : {
1599 : /*
1600 : * 1. We have just processed an unskippable block (or we're at the
1601 : * beginning of the scan). Find the next unskippable block using the
1602 : * visibility map.
1603 : */
1604 : bool skipsallvis;
1605 :
1606 850772 : find_next_unskippable_block(vacrel, &skipsallvis);
1607 :
1608 : /*
1609 : * We now know the next block that we must process. It can be the
1610 : * next block after the one we just processed, or something further
1611 : * ahead. If it's further ahead, we can jump to it, but we choose to
1612 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1613 : * pages. Since we're reading sequentially, the OS should be doing
1614 : * readahead for us, so there's no gain in skipping a page now and
1615 : * then. Skipping such a range might even discourage sequential
1616 : * detection.
1617 : *
1618 : * This test also enables more frequent relfrozenxid advancement
1619 : * during non-aggressive VACUUMs. If the range has any all-visible
1620 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1621 : * real downside.
1622 : */
1623 850772 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1624 : {
1625 8848 : next_block = vacrel->next_unskippable_block;
1626 8848 : if (skipsallvis)
1627 56 : vacrel->skippedallvis = true;
1628 : }
1629 : }
1630 :
1631 : /* Now we must be in one of the two remaining states: */
1632 1148348 : if (next_block < vacrel->next_unskippable_block)
1633 : {
1634 : /*
1635 : * 2. We are processing a range of blocks that we could have skipped
1636 : * but chose not to. We know that they are all-visible in the VM,
1637 : * otherwise they would've been unskippable.
1638 : */
1639 297576 : vacrel->current_block = next_block;
1640 297576 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1641 297576 : *((uint8 *) per_buffer_data) = blk_info;
1642 297576 : return vacrel->current_block;
1643 : }
1644 : else
1645 : {
1646 : /*
1647 : * 3. We reached the next unskippable block. Process it. On next
1648 : * iteration, we will be back in state 1.
1649 : */
1650 : Assert(next_block == vacrel->next_unskippable_block);
1651 :
1652 850772 : vacrel->current_block = next_block;
1653 850772 : if (vacrel->next_unskippable_allvis)
1654 89688 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1655 850772 : if (vacrel->next_unskippable_eager_scanned)
1656 0 : blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1657 850772 : *((uint8 *) per_buffer_data) = blk_info;
1658 850772 : return vacrel->current_block;
1659 : }
1660 : }
1661 :
1662 : /*
1663 : * Find the next unskippable block in a vacuum scan using the visibility map.
1664 : * The next unskippable block and its visibility information is updated in
1665 : * vacrel.
1666 : *
1667 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1668 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1669 : * was concurrently cleared, though. All that matters is that caller scan all
1670 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1671 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1672 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1673 : * to skip such a range is actually made, making everything safe.)
1674 : */
1675 : static void
1676 850772 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1677 : {
1678 850772 : BlockNumber rel_pages = vacrel->rel_pages;
1679 850772 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1680 850772 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1681 850772 : bool next_unskippable_eager_scanned = false;
1682 : bool next_unskippable_allvis;
1683 :
1684 850772 : *skipsallvis = false;
1685 :
1686 949092 : for (;; next_unskippable_block++)
1687 949092 : {
1688 1799864 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1689 : next_unskippable_block,
1690 : &next_unskippable_vmbuffer);
1691 :
1692 1799864 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1693 :
1694 : /*
1695 : * At the start of each eager scan region, normal vacuums with eager
1696 : * scanning enabled reset the failure counter, allowing vacuum to
1697 : * resume eager scanning if it had been suspended in the previous
1698 : * region.
1699 : */
1700 1799864 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1701 : {
1702 0 : vacrel->eager_scan_remaining_fails =
1703 0 : vacrel->eager_scan_max_fails_per_region;
1704 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1705 : }
1706 :
1707 : /*
1708 : * A block is unskippable if it is not all visible according to the
1709 : * visibility map.
1710 : */
1711 1799864 : if (!next_unskippable_allvis)
1712 : {
1713 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1714 761084 : break;
1715 : }
1716 :
1717 : /*
1718 : * Caller must scan the last page to determine whether it has tuples
1719 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1720 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1721 : * lock on rel to attempt a truncation that fails anyway, just because
1722 : * there are tuples on the last page (it is likely that there will be
1723 : * tuples on other nearby pages as well, but those can be skipped).
1724 : *
1725 : * Implement this by always treating the last block as unsafe to skip.
1726 : */
1727 1038780 : if (next_unskippable_block == rel_pages - 1)
1728 88876 : break;
1729 :
1730 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1731 949904 : if (!vacrel->skipwithvm)
1732 812 : break;
1733 :
1734 : /*
1735 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1736 : * already frozen by now), so this page can be skipped.
1737 : */
1738 949092 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1739 943936 : continue;
1740 :
1741 : /*
1742 : * Aggressive vacuums cannot skip any all-visible pages that are not
1743 : * also all-frozen.
1744 : */
1745 5156 : if (vacrel->aggressive)
1746 0 : break;
1747 :
1748 : /*
1749 : * Normal vacuums with eager scanning enabled only skip all-visible
1750 : * but not all-frozen pages if they have hit the failure limit for the
1751 : * current eager scan region.
1752 : */
1753 5156 : if (vacrel->eager_scan_remaining_fails > 0)
1754 : {
1755 0 : next_unskippable_eager_scanned = true;
1756 0 : break;
1757 : }
1758 :
1759 : /*
1760 : * All-visible blocks are safe to skip in a normal vacuum. But
1761 : * remember that the final range contains such a block for later.
1762 : */
1763 5156 : *skipsallvis = true;
1764 : }
1765 :
1766 : /* write the local variables back to vacrel */
1767 850772 : vacrel->next_unskippable_block = next_unskippable_block;
1768 850772 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1769 850772 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1770 850772 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1771 850772 : }
1772 :
1773 : /*
1774 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1775 : *
1776 : * Must call here to handle both new and empty pages before calling
1777 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1778 : * with new or empty pages.
1779 : *
1780 : * It's necessary to consider new pages as a special case, since the rules for
1781 : * maintaining the visibility map and FSM with empty pages are a little
1782 : * different (though new pages can be truncated away during rel truncation).
1783 : *
1784 : * Empty pages are not really a special case -- they're just heap pages that
1785 : * have no allocated tuples (including even LP_UNUSED items). You might
1786 : * wonder why we need to handle them here all the same. It's only necessary
1787 : * because of a corner-case involving a hard crash during heap relation
1788 : * extension. If we ever make relation-extension crash safe, then it should
1789 : * no longer be necessary to deal with empty pages here (or new pages, for
1790 : * that matter).
1791 : *
1792 : * Caller must hold at least a shared lock. We might need to escalate the
1793 : * lock in that case, so the type of lock caller holds needs to be specified
1794 : * using 'sharelock' argument.
1795 : *
1796 : * Returns false in common case where caller should go on to call
1797 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1798 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1799 : * behalf.
1800 : *
1801 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1802 : * is passed here because neither empty nor new pages can be eagerly frozen.
1803 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1804 : * the same time that they are set all-visible, and we don't eagerly scan
1805 : * frozen pages.
1806 : */
1807 : static bool
1808 1148348 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1809 : Page page, bool sharelock, Buffer vmbuffer)
1810 : {
1811 : Size freespace;
1812 :
1813 1148348 : if (PageIsNew(page))
1814 : {
1815 : /*
1816 : * All-zeroes pages can be left over if either a backend extends the
1817 : * relation by a single page, but crashes before the newly initialized
1818 : * page has been written out, or when bulk-extending the relation
1819 : * (which creates a number of empty pages at the tail end of the
1820 : * relation), and then enters them into the FSM.
1821 : *
1822 : * Note we do not enter the page into the visibilitymap. That has the
1823 : * downside that we repeatedly visit this page in subsequent vacuums,
1824 : * but otherwise we'll never discover the space on a promoted standby.
1825 : * The harm of repeated checking ought to normally not be too bad. The
1826 : * space usually should be used at some point, otherwise there
1827 : * wouldn't be any regular vacuums.
1828 : *
1829 : * Make sure these pages are in the FSM, to ensure they can be reused.
1830 : * Do that by testing if there's any space recorded for the page. If
1831 : * not, enter it. We do so after releasing the lock on the heap page,
1832 : * the FSM is approximate, after all.
1833 : */
1834 1170 : UnlockReleaseBuffer(buf);
1835 :
1836 1170 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1837 : {
1838 858 : freespace = BLCKSZ - SizeOfPageHeaderData;
1839 :
1840 858 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1841 : }
1842 :
1843 1170 : return true;
1844 : }
1845 :
1846 1147178 : if (PageIsEmpty(page))
1847 : {
1848 : /*
1849 : * It seems likely that caller will always be able to get a cleanup
1850 : * lock on an empty page. But don't take any chances -- escalate to
1851 : * an exclusive lock (still don't need a cleanup lock, though).
1852 : */
1853 54 : if (sharelock)
1854 : {
1855 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1856 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1857 :
1858 0 : if (!PageIsEmpty(page))
1859 : {
1860 : /* page isn't new or empty -- keep lock and pin for now */
1861 0 : return false;
1862 : }
1863 : }
1864 : else
1865 : {
1866 : /* Already have a full cleanup lock (which is more than enough) */
1867 : }
1868 :
1869 : /*
1870 : * Unlike new pages, empty pages are always set all-visible and
1871 : * all-frozen.
1872 : */
1873 54 : if (!PageIsAllVisible(page))
1874 : {
1875 0 : START_CRIT_SECTION();
1876 :
1877 : /* mark buffer dirty before writing a WAL record */
1878 0 : MarkBufferDirty(buf);
1879 :
1880 : /*
1881 : * It's possible that another backend has extended the heap,
1882 : * initialized the page, and then failed to WAL-log the page due
1883 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1884 : * might try to replay our record setting the page all-visible and
1885 : * find that the page isn't initialized, which will cause a PANIC.
1886 : * To prevent that, check whether the page has been previously
1887 : * WAL-logged, and if not, do that now.
1888 : */
1889 0 : if (RelationNeedsWAL(vacrel->rel) &&
1890 0 : PageGetLSN(page) == InvalidXLogRecPtr)
1891 0 : log_newpage_buffer(buf, true);
1892 :
1893 0 : PageSetAllVisible(page);
1894 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1895 : InvalidXLogRecPtr,
1896 : vmbuffer, InvalidTransactionId,
1897 : VISIBILITYMAP_ALL_VISIBLE |
1898 : VISIBILITYMAP_ALL_FROZEN);
1899 0 : END_CRIT_SECTION();
1900 :
1901 : /* Count the newly all-frozen pages for logging */
1902 0 : vacrel->vm_new_visible_pages++;
1903 0 : vacrel->vm_new_visible_frozen_pages++;
1904 : }
1905 :
1906 54 : freespace = PageGetHeapFreeSpace(page);
1907 54 : UnlockReleaseBuffer(buf);
1908 54 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1909 54 : return true;
1910 : }
1911 :
1912 : /* page isn't new or empty -- keep lock and pin */
1913 1147124 : return false;
1914 : }
1915 :
1916 : /* qsort comparator for sorting OffsetNumbers */
1917 : static int
1918 5064558 : cmpOffsetNumbers(const void *a, const void *b)
1919 : {
1920 5064558 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1921 : }
1922 :
1923 : /*
1924 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1925 : *
1926 : * Caller must hold pin and buffer cleanup lock on the buffer.
1927 : *
1928 : * vmbuffer is the buffer containing the VM block with visibility information
1929 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1930 : * visibility status of the heap block looked up earlier by the caller. We
1931 : * won't rely entirely on this status, as it may be out of date.
1932 : *
1933 : * *has_lpdead_items is set to true or false depending on whether, upon return
1934 : * from this function, any LP_DEAD items are still present on the page.
1935 : *
1936 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
1937 : * VM. The caller currently only uses this for determining whether an eagerly
1938 : * scanned page was successfully set all-frozen.
1939 : *
1940 : * Returns the number of tuples deleted from the page during HOT pruning.
1941 : */
1942 : static int
1943 1146940 : lazy_scan_prune(LVRelState *vacrel,
1944 : Buffer buf,
1945 : BlockNumber blkno,
1946 : Page page,
1947 : Buffer vmbuffer,
1948 : bool all_visible_according_to_vm,
1949 : bool *has_lpdead_items,
1950 : bool *vm_page_frozen)
1951 : {
1952 1146940 : Relation rel = vacrel->rel;
1953 : PruneFreezeResult presult;
1954 1146940 : int prune_options = 0;
1955 :
1956 : Assert(BufferGetBlockNumber(buf) == blkno);
1957 :
1958 : /*
1959 : * Prune all HOT-update chains and potentially freeze tuples on this page.
1960 : *
1961 : * If the relation has no indexes, we can immediately mark would-be dead
1962 : * items LP_UNUSED.
1963 : *
1964 : * The number of tuples removed from the page is returned in
1965 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
1966 : * presult.lpdead_items's final value can be thought of as the number of
1967 : * tuples that were deleted from indexes.
1968 : *
1969 : * We will update the VM after collecting LP_DEAD items and freezing
1970 : * tuples. Pruning will have determined whether or not the page is
1971 : * all-visible.
1972 : */
1973 1146940 : prune_options = HEAP_PAGE_PRUNE_FREEZE;
1974 1146940 : if (vacrel->nindexes == 0)
1975 34818 : prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1976 :
1977 1146940 : heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1978 : &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1979 : &vacrel->offnum,
1980 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1981 :
1982 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
1983 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
1984 :
1985 1146940 : if (presult.nfrozen > 0)
1986 : {
1987 : /*
1988 : * We don't increment the new_frozen_tuple_pages instrumentation
1989 : * counter when nfrozen == 0, since it only counts pages with newly
1990 : * frozen tuples (don't confuse that with pages newly set all-frozen
1991 : * in VM).
1992 : */
1993 44738 : vacrel->new_frozen_tuple_pages++;
1994 : }
1995 :
1996 : /*
1997 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1998 : * the heap to determine all_visible and all_frozen for the page -- this
1999 : * is a specialized version of the logic from this function. Now that
2000 : * we've finished pruning and freezing, make sure that we're in total
2001 : * agreement with heap_page_is_all_visible() using an assertion.
2002 : */
2003 : #ifdef USE_ASSERT_CHECKING
2004 : /* Note that all_frozen value does not matter when !all_visible */
2005 : if (presult.all_visible)
2006 : {
2007 : TransactionId debug_cutoff;
2008 : bool debug_all_frozen;
2009 :
2010 : Assert(presult.lpdead_items == 0);
2011 :
2012 : if (!heap_page_is_all_visible(vacrel, buf,
2013 : &debug_cutoff, &debug_all_frozen))
2014 : Assert(false);
2015 :
2016 : Assert(presult.all_frozen == debug_all_frozen);
2017 :
2018 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2019 : debug_cutoff == presult.vm_conflict_horizon);
2020 : }
2021 : #endif
2022 :
2023 : /*
2024 : * Now save details of the LP_DEAD items from the page in vacrel
2025 : */
2026 1146940 : if (presult.lpdead_items > 0)
2027 : {
2028 28072 : vacrel->lpdead_item_pages++;
2029 :
2030 : /*
2031 : * deadoffsets are collected incrementally in
2032 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2033 : * with an indeterminate order, but dead_items_add requires them to be
2034 : * sorted.
2035 : */
2036 28072 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2037 : cmpOffsetNumbers);
2038 :
2039 28072 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2040 : }
2041 :
2042 : /* Finally, add page-local counts to whole-VACUUM counts */
2043 1146940 : vacrel->tuples_deleted += presult.ndeleted;
2044 1146940 : vacrel->tuples_frozen += presult.nfrozen;
2045 1146940 : vacrel->lpdead_items += presult.lpdead_items;
2046 1146940 : vacrel->live_tuples += presult.live_tuples;
2047 1146940 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2048 :
2049 : /* Can't truncate this page */
2050 1146940 : if (presult.hastup)
2051 1132906 : vacrel->nonempty_pages = blkno + 1;
2052 :
2053 : /* Did we find LP_DEAD items? */
2054 1146940 : *has_lpdead_items = (presult.lpdead_items > 0);
2055 :
2056 : Assert(!presult.all_visible || !(*has_lpdead_items));
2057 :
2058 : /*
2059 : * Handle setting visibility map bit based on information from the VM (as
2060 : * of last heap_vac_scan_next_block() call), and from all_visible and
2061 : * all_frozen variables
2062 : */
2063 1146940 : if (!all_visible_according_to_vm && presult.all_visible)
2064 70140 : {
2065 : uint8 old_vmbits;
2066 70140 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2067 :
2068 70140 : if (presult.all_frozen)
2069 : {
2070 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2071 53026 : flags |= VISIBILITYMAP_ALL_FROZEN;
2072 : }
2073 :
2074 : /*
2075 : * It should never be the case that the visibility map page is set
2076 : * while the page-level bit is clear, but the reverse is allowed (if
2077 : * checksums are not enabled). Regardless, set both bits so that we
2078 : * get back in sync.
2079 : *
2080 : * NB: If the heap page is all-visible but the VM bit is not set, we
2081 : * don't need to dirty the heap page. However, if checksums are
2082 : * enabled, we do need to make sure that the heap page is dirtied
2083 : * before passing it to visibilitymap_set(), because it may be logged.
2084 : * Given that this situation should only happen in rare cases after a
2085 : * crash, it is not worth optimizing.
2086 : */
2087 70140 : PageSetAllVisible(page);
2088 70140 : MarkBufferDirty(buf);
2089 70140 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2090 : InvalidXLogRecPtr,
2091 : vmbuffer, presult.vm_conflict_horizon,
2092 : flags);
2093 :
2094 : /*
2095 : * If the page wasn't already set all-visible and/or all-frozen in the
2096 : * VM, count it as newly set for logging.
2097 : */
2098 70140 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2099 : {
2100 70140 : vacrel->vm_new_visible_pages++;
2101 70140 : if (presult.all_frozen)
2102 : {
2103 53026 : vacrel->vm_new_visible_frozen_pages++;
2104 53026 : *vm_page_frozen = true;
2105 : }
2106 : }
2107 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2108 0 : presult.all_frozen)
2109 : {
2110 0 : vacrel->vm_new_frozen_pages++;
2111 0 : *vm_page_frozen = true;
2112 : }
2113 : }
2114 :
2115 : /*
2116 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
2117 : * page-level bit is clear. However, it's possible that the bit got
2118 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
2119 : * with buffer lock before concluding that the VM is corrupt.
2120 : */
2121 1076800 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
2122 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
2123 : {
2124 0 : ereport(WARNING,
2125 : (errcode(ERRCODE_DATA_CORRUPTED),
2126 : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2127 : vacrel->relname, blkno)));
2128 :
2129 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2130 : VISIBILITYMAP_VALID_BITS);
2131 : }
2132 :
2133 : /*
2134 : * It's possible for the value returned by
2135 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2136 : * wrong for us to see tuples that appear to not be visible to everyone
2137 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2138 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2139 : * conservative and sometimes returns a value that's unnecessarily small,
2140 : * so if we see that contradiction it just means that the tuples that we
2141 : * think are not visible to everyone yet actually are, and the
2142 : * PD_ALL_VISIBLE flag is correct.
2143 : *
2144 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2145 : * however.
2146 : */
2147 1076800 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
2148 : {
2149 0 : ereport(WARNING,
2150 : (errcode(ERRCODE_DATA_CORRUPTED),
2151 : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2152 : vacrel->relname, blkno)));
2153 :
2154 0 : PageClearAllVisible(page);
2155 0 : MarkBufferDirty(buf);
2156 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2157 : VISIBILITYMAP_VALID_BITS);
2158 : }
2159 :
2160 : /*
2161 : * If the all-visible page is all-frozen but not marked as such yet, mark
2162 : * it as all-frozen. Note that all_frozen is only valid if all_visible is
2163 : * true, so we must check both all_visible and all_frozen.
2164 : */
2165 1076800 : else if (all_visible_according_to_vm && presult.all_visible &&
2166 387036 : presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
2167 : {
2168 : uint8 old_vmbits;
2169 :
2170 : /*
2171 : * Avoid relying on all_visible_according_to_vm as a proxy for the
2172 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
2173 : * stale -- even when all_visible is set
2174 : */
2175 42 : if (!PageIsAllVisible(page))
2176 : {
2177 0 : PageSetAllVisible(page);
2178 0 : MarkBufferDirty(buf);
2179 : }
2180 :
2181 : /*
2182 : * Set the page all-frozen (and all-visible) in the VM.
2183 : *
2184 : * We can pass InvalidTransactionId as our cutoff_xid, since a
2185 : * snapshotConflictHorizon sufficient to make everything safe for REDO
2186 : * was logged when the page's tuples were frozen.
2187 : */
2188 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2189 42 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2190 : InvalidXLogRecPtr,
2191 : vmbuffer, InvalidTransactionId,
2192 : VISIBILITYMAP_ALL_VISIBLE |
2193 : VISIBILITYMAP_ALL_FROZEN);
2194 :
2195 : /*
2196 : * The page was likely already set all-visible in the VM. However,
2197 : * there is a small chance that it was modified sometime between
2198 : * setting all_visible_according_to_vm and checking the visibility
2199 : * during pruning. Check the return value of old_vmbits anyway to
2200 : * ensure the visibility map counters used for logging are accurate.
2201 : */
2202 42 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2203 : {
2204 0 : vacrel->vm_new_visible_pages++;
2205 0 : vacrel->vm_new_visible_frozen_pages++;
2206 0 : *vm_page_frozen = true;
2207 : }
2208 :
2209 : /*
2210 : * We already checked that the page was not set all-frozen in the VM
2211 : * above, so we don't need to test the value of old_vmbits.
2212 : */
2213 : else
2214 : {
2215 42 : vacrel->vm_new_frozen_pages++;
2216 42 : *vm_page_frozen = true;
2217 : }
2218 : }
2219 :
2220 1146940 : return presult.ndeleted;
2221 : }
2222 :
2223 : /*
2224 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2225 : *
2226 : * Caller need only hold a pin and share lock on the buffer, unlike
2227 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2228 : * performed here, it's quite possible that an earlier opportunistic pruning
2229 : * operation left LP_DEAD items behind. We'll at least collect any such items
2230 : * in dead_items for removal from indexes.
2231 : *
2232 : * For aggressive VACUUM callers, we may return false to indicate that a full
2233 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2234 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2235 : * one or more tuples on the page. We always return true for non-aggressive
2236 : * callers.
2237 : *
2238 : * If this function returns true, *has_lpdead_items gets set to true or false
2239 : * depending on whether, upon return from this function, any LP_DEAD items are
2240 : * present on the page. If this function returns false, *has_lpdead_items
2241 : * is not updated.
2242 : */
2243 : static bool
2244 338 : lazy_scan_noprune(LVRelState *vacrel,
2245 : Buffer buf,
2246 : BlockNumber blkno,
2247 : Page page,
2248 : bool *has_lpdead_items)
2249 : {
2250 : OffsetNumber offnum,
2251 : maxoff;
2252 : int lpdead_items,
2253 : live_tuples,
2254 : recently_dead_tuples,
2255 : missed_dead_tuples;
2256 : bool hastup;
2257 : HeapTupleHeader tupleheader;
2258 338 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2259 338 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2260 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2261 :
2262 : Assert(BufferGetBlockNumber(buf) == blkno);
2263 :
2264 338 : hastup = false; /* for now */
2265 :
2266 338 : lpdead_items = 0;
2267 338 : live_tuples = 0;
2268 338 : recently_dead_tuples = 0;
2269 338 : missed_dead_tuples = 0;
2270 :
2271 338 : maxoff = PageGetMaxOffsetNumber(page);
2272 338 : for (offnum = FirstOffsetNumber;
2273 8164 : offnum <= maxoff;
2274 7826 : offnum = OffsetNumberNext(offnum))
2275 : {
2276 : ItemId itemid;
2277 : HeapTupleData tuple;
2278 :
2279 7980 : vacrel->offnum = offnum;
2280 7980 : itemid = PageGetItemId(page, offnum);
2281 :
2282 7980 : if (!ItemIdIsUsed(itemid))
2283 1976 : continue;
2284 :
2285 6430 : if (ItemIdIsRedirected(itemid))
2286 : {
2287 426 : hastup = true;
2288 426 : continue;
2289 : }
2290 :
2291 6004 : if (ItemIdIsDead(itemid))
2292 : {
2293 : /*
2294 : * Deliberately don't set hastup=true here. See same point in
2295 : * lazy_scan_prune for an explanation.
2296 : */
2297 0 : deadoffsets[lpdead_items++] = offnum;
2298 0 : continue;
2299 : }
2300 :
2301 6004 : hastup = true; /* page prevents rel truncation */
2302 6004 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2303 6004 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2304 : &NoFreezePageRelfrozenXid,
2305 : &NoFreezePageRelminMxid))
2306 : {
2307 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2308 282 : if (vacrel->aggressive)
2309 : {
2310 : /*
2311 : * Aggressive VACUUMs must always be able to advance rel's
2312 : * relfrozenxid to a value >= FreezeLimit (and be able to
2313 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2314 : * The ongoing aggressive VACUUM won't be able to do that
2315 : * unless it can freeze an XID (or MXID) from this tuple now.
2316 : *
2317 : * The only safe option is to have caller perform processing
2318 : * of this page using lazy_scan_prune. Caller might have to
2319 : * wait a while for a cleanup lock, but it can't be helped.
2320 : */
2321 154 : vacrel->offnum = InvalidOffsetNumber;
2322 154 : return false;
2323 : }
2324 :
2325 : /*
2326 : * Non-aggressive VACUUMs are under no obligation to advance
2327 : * relfrozenxid (even by one XID). We can be much laxer here.
2328 : *
2329 : * Currently we always just accept an older final relfrozenxid
2330 : * and/or relminmxid value. We never make caller wait or work a
2331 : * little harder, even when it likely makes sense to do so.
2332 : */
2333 : }
2334 :
2335 5850 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2336 5850 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2337 5850 : tuple.t_len = ItemIdGetLength(itemid);
2338 5850 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2339 :
2340 5850 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2341 : buf))
2342 : {
2343 5842 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2344 : case HEAPTUPLE_LIVE:
2345 :
2346 : /*
2347 : * Count both cases as live, just like lazy_scan_prune
2348 : */
2349 5842 : live_tuples++;
2350 :
2351 5842 : break;
2352 4 : case HEAPTUPLE_DEAD:
2353 :
2354 : /*
2355 : * There is some useful work for pruning to do, that won't be
2356 : * done due to failure to get a cleanup lock.
2357 : */
2358 4 : missed_dead_tuples++;
2359 4 : break;
2360 4 : case HEAPTUPLE_RECENTLY_DEAD:
2361 :
2362 : /*
2363 : * Count in recently_dead_tuples, just like lazy_scan_prune
2364 : */
2365 4 : recently_dead_tuples++;
2366 4 : break;
2367 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2368 :
2369 : /*
2370 : * Do not count these rows as live, just like lazy_scan_prune
2371 : */
2372 0 : break;
2373 0 : default:
2374 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2375 : break;
2376 : }
2377 : }
2378 :
2379 184 : vacrel->offnum = InvalidOffsetNumber;
2380 :
2381 : /*
2382 : * By here we know for sure that caller can put off freezing and pruning
2383 : * this particular page until the next VACUUM. Remember its details now.
2384 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2385 : */
2386 184 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2387 184 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2388 :
2389 : /* Save any LP_DEAD items found on the page in dead_items */
2390 184 : if (vacrel->nindexes == 0)
2391 : {
2392 : /* Using one-pass strategy (since table has no indexes) */
2393 0 : if (lpdead_items > 0)
2394 : {
2395 : /*
2396 : * Perfunctory handling for the corner case where a single pass
2397 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2398 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2399 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2400 : * but it beats having to maintain specialized heap vacuuming code
2401 : * forever, for vanishingly little benefit.)
2402 : */
2403 0 : hastup = true;
2404 0 : missed_dead_tuples += lpdead_items;
2405 : }
2406 : }
2407 184 : else if (lpdead_items > 0)
2408 : {
2409 : /*
2410 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2411 : * indexes will be deleted during index vacuuming (and then marked
2412 : * LP_UNUSED in the heap)
2413 : */
2414 0 : vacrel->lpdead_item_pages++;
2415 :
2416 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2417 :
2418 0 : vacrel->lpdead_items += lpdead_items;
2419 : }
2420 :
2421 : /*
2422 : * Finally, add relevant page-local counts to whole-VACUUM counts
2423 : */
2424 184 : vacrel->live_tuples += live_tuples;
2425 184 : vacrel->recently_dead_tuples += recently_dead_tuples;
2426 184 : vacrel->missed_dead_tuples += missed_dead_tuples;
2427 184 : if (missed_dead_tuples > 0)
2428 4 : vacrel->missed_dead_pages++;
2429 :
2430 : /* Can't truncate this page */
2431 184 : if (hastup)
2432 184 : vacrel->nonempty_pages = blkno + 1;
2433 :
2434 : /* Did we find LP_DEAD items? */
2435 184 : *has_lpdead_items = (lpdead_items > 0);
2436 :
2437 : /* Caller won't need to call lazy_scan_prune with same page */
2438 184 : return true;
2439 : }
2440 :
2441 : /*
2442 : * Main entry point for index vacuuming and heap vacuuming.
2443 : *
2444 : * Removes items collected in dead_items from table's indexes, then marks the
2445 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2446 : * for full details.
2447 : *
2448 : * Also empties dead_items, freeing up space for later TIDs.
2449 : *
2450 : * We may choose to bypass index vacuuming at this point, though only when the
2451 : * ongoing VACUUM operation will definitely only have one index scan/round of
2452 : * index vacuuming.
2453 : */
2454 : static void
2455 1222 : lazy_vacuum(LVRelState *vacrel)
2456 : {
2457 : bool bypass;
2458 :
2459 : /* Should not end up here with no indexes */
2460 : Assert(vacrel->nindexes > 0);
2461 : Assert(vacrel->lpdead_item_pages > 0);
2462 :
2463 1222 : if (!vacrel->do_index_vacuuming)
2464 : {
2465 : Assert(!vacrel->do_index_cleanup);
2466 20 : dead_items_reset(vacrel);
2467 20 : return;
2468 : }
2469 :
2470 : /*
2471 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2472 : *
2473 : * We currently only do this in cases where the number of LP_DEAD items
2474 : * for the entire VACUUM operation is close to zero. This avoids sharp
2475 : * discontinuities in the duration and overhead of successive VACUUM
2476 : * operations that run against the same table with a fixed workload.
2477 : * Ideally, successive VACUUM operations will behave as if there are
2478 : * exactly zero LP_DEAD items in cases where there are close to zero.
2479 : *
2480 : * This is likely to be helpful with a table that is continually affected
2481 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2482 : * have small aberrations that lead to just a few heap pages retaining
2483 : * only one or two LP_DEAD items. This is pretty common; even when the
2484 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2485 : * impossible to predict whether HOT will be applied in 100% of cases.
2486 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2487 : * HOT through careful tuning.
2488 : */
2489 1202 : bypass = false;
2490 1202 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2491 : {
2492 : BlockNumber threshold;
2493 :
2494 : Assert(vacrel->num_index_scans == 0);
2495 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2496 : Assert(vacrel->do_index_vacuuming);
2497 : Assert(vacrel->do_index_cleanup);
2498 :
2499 : /*
2500 : * This crossover point at which we'll start to do index vacuuming is
2501 : * expressed as a percentage of the total number of heap pages in the
2502 : * table that are known to have at least one LP_DEAD item. This is
2503 : * much more important than the total number of LP_DEAD items, since
2504 : * it's a proxy for the number of heap pages whose visibility map bits
2505 : * cannot be set on account of bypassing index and heap vacuuming.
2506 : *
2507 : * We apply one further precautionary test: the space currently used
2508 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2509 : * not exceed 32MB. This limits the risk that we will bypass index
2510 : * vacuuming again and again until eventually there is a VACUUM whose
2511 : * dead_items space is not CPU cache resident.
2512 : *
2513 : * We don't take any special steps to remember the LP_DEAD items (such
2514 : * as counting them in our final update to the stats system) when the
2515 : * optimization is applied. Though the accounting used in analyze.c's
2516 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2517 : * rows in its own stats report, that's okay. The discrepancy should
2518 : * be negligible. If this optimization is ever expanded to cover more
2519 : * cases then this may need to be reconsidered.
2520 : */
2521 1176 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2522 1178 : bypass = (vacrel->lpdead_item_pages < threshold &&
2523 2 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2524 : }
2525 :
2526 1202 : if (bypass)
2527 : {
2528 : /*
2529 : * There are almost zero TIDs. Behave as if there were precisely
2530 : * zero: bypass index vacuuming, but do index cleanup.
2531 : *
2532 : * We expect that the ongoing VACUUM operation will finish very
2533 : * quickly, so there is no point in considering speeding up as a
2534 : * failsafe against wraparound failure. (Index cleanup is expected to
2535 : * finish very quickly in cases where there were no ambulkdelete()
2536 : * calls.)
2537 : */
2538 2 : vacrel->do_index_vacuuming = false;
2539 : }
2540 1200 : else if (lazy_vacuum_all_indexes(vacrel))
2541 : {
2542 : /*
2543 : * We successfully completed a round of index vacuuming. Do related
2544 : * heap vacuuming now.
2545 : */
2546 1200 : lazy_vacuum_heap_rel(vacrel);
2547 : }
2548 : else
2549 : {
2550 : /*
2551 : * Failsafe case.
2552 : *
2553 : * We attempted index vacuuming, but didn't finish a full round/full
2554 : * index scan. This happens when relfrozenxid or relminmxid is too
2555 : * far in the past.
2556 : *
2557 : * From this point on the VACUUM operation will do no further index
2558 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2559 : * back here again.
2560 : */
2561 : Assert(VacuumFailsafeActive);
2562 : }
2563 :
2564 : /*
2565 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2566 : * vacuum)
2567 : */
2568 1202 : dead_items_reset(vacrel);
2569 : }
2570 :
2571 : /*
2572 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2573 : *
2574 : * Returns true in the common case when all indexes were successfully
2575 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2576 : * VACUUM operation is at risk of taking too long to finish, leading to
2577 : * wraparound failure.
2578 : */
2579 : static bool
2580 1200 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2581 : {
2582 1200 : bool allindexes = true;
2583 1200 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2584 1200 : const int progress_start_index[] = {
2585 : PROGRESS_VACUUM_PHASE,
2586 : PROGRESS_VACUUM_INDEXES_TOTAL
2587 : };
2588 1200 : const int progress_end_index[] = {
2589 : PROGRESS_VACUUM_INDEXES_TOTAL,
2590 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2591 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2592 : };
2593 : int64 progress_start_val[2];
2594 : int64 progress_end_val[3];
2595 :
2596 : Assert(vacrel->nindexes > 0);
2597 : Assert(vacrel->do_index_vacuuming);
2598 : Assert(vacrel->do_index_cleanup);
2599 :
2600 : /* Precheck for XID wraparound emergencies */
2601 1200 : if (lazy_check_wraparound_failsafe(vacrel))
2602 : {
2603 : /* Wraparound emergency -- don't even start an index scan */
2604 0 : return false;
2605 : }
2606 :
2607 : /*
2608 : * Report that we are now vacuuming indexes and the number of indexes to
2609 : * vacuum.
2610 : */
2611 1200 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2612 1200 : progress_start_val[1] = vacrel->nindexes;
2613 1200 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2614 :
2615 1200 : if (!ParallelVacuumIsActive(vacrel))
2616 : {
2617 3450 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2618 : {
2619 2266 : Relation indrel = vacrel->indrels[idx];
2620 2266 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2621 :
2622 2266 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2623 : old_live_tuples,
2624 : vacrel);
2625 :
2626 : /* Report the number of indexes vacuumed */
2627 2266 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2628 2266 : idx + 1);
2629 :
2630 2266 : if (lazy_check_wraparound_failsafe(vacrel))
2631 : {
2632 : /* Wraparound emergency -- end current index scan */
2633 0 : allindexes = false;
2634 0 : break;
2635 : }
2636 : }
2637 : }
2638 : else
2639 : {
2640 : /* Outsource everything to parallel variant */
2641 16 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2642 : vacrel->num_index_scans);
2643 :
2644 : /*
2645 : * Do a postcheck to consider applying wraparound failsafe now. Note
2646 : * that parallel VACUUM only gets the precheck and this postcheck.
2647 : */
2648 16 : if (lazy_check_wraparound_failsafe(vacrel))
2649 0 : allindexes = false;
2650 : }
2651 :
2652 : /*
2653 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2654 : * each call here (except calls where we choose to do the failsafe). This
2655 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2656 : * of the failsafe triggering, which prevents the next call from taking
2657 : * place).
2658 : */
2659 : Assert(vacrel->num_index_scans > 0 ||
2660 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2661 : Assert(allindexes || VacuumFailsafeActive);
2662 :
2663 : /*
2664 : * Increase and report the number of index scans. Also, we reset
2665 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2666 : *
2667 : * We deliberately include the case where we started a round of bulk
2668 : * deletes that we weren't able to finish due to the failsafe triggering.
2669 : */
2670 1200 : vacrel->num_index_scans++;
2671 1200 : progress_end_val[0] = 0;
2672 1200 : progress_end_val[1] = 0;
2673 1200 : progress_end_val[2] = vacrel->num_index_scans;
2674 1200 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2675 :
2676 1200 : return allindexes;
2677 : }
2678 :
2679 : /*
2680 : * Read stream callback for vacuum's third phase (second pass over the heap).
2681 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2682 : * if there are no further blocks to vacuum.
2683 : *
2684 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2685 : */
2686 : static BlockNumber
2687 24674 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2688 : void *callback_private_data,
2689 : void *per_buffer_data)
2690 : {
2691 24674 : TidStoreIter *iter = callback_private_data;
2692 : TidStoreIterResult *iter_result;
2693 :
2694 24674 : iter_result = TidStoreIterateNext(iter);
2695 24674 : if (iter_result == NULL)
2696 1200 : return InvalidBlockNumber;
2697 :
2698 : /*
2699 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2700 : * It is safe to copy the result, according to TidStoreIterateNext().
2701 : */
2702 23474 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2703 :
2704 23474 : return iter_result->blkno;
2705 : }
2706 :
2707 : /*
2708 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2709 : *
2710 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2711 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2712 : *
2713 : * We may also be able to truncate the line pointer array of the heap pages we
2714 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2715 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2716 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2717 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2718 : * page's line pointer array).
2719 : *
2720 : * Note: the reason for doing this as a second pass is we cannot remove the
2721 : * tuples until we've removed their index entries, and we want to process
2722 : * index entry removal in batches as large as possible.
2723 : */
2724 : static void
2725 1200 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2726 : {
2727 : ReadStream *stream;
2728 1200 : BlockNumber vacuumed_pages = 0;
2729 1200 : Buffer vmbuffer = InvalidBuffer;
2730 : LVSavedErrInfo saved_err_info;
2731 : TidStoreIter *iter;
2732 :
2733 : Assert(vacrel->do_index_vacuuming);
2734 : Assert(vacrel->do_index_cleanup);
2735 : Assert(vacrel->num_index_scans > 0);
2736 :
2737 : /* Report that we are now vacuuming the heap */
2738 1200 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2739 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2740 :
2741 : /* Update error traceback information */
2742 1200 : update_vacuum_error_info(vacrel, &saved_err_info,
2743 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2744 : InvalidBlockNumber, InvalidOffsetNumber);
2745 :
2746 1200 : iter = TidStoreBeginIterate(vacrel->dead_items);
2747 :
2748 : /*
2749 : * Set up the read stream for vacuum's second pass through the heap.
2750 : *
2751 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2752 : * not need to wait for IO and does not perform locking. Once we support
2753 : * parallelism it should still be fine, as presumably the holder of locks
2754 : * would never be blocked by IO while holding the lock.
2755 : */
2756 1200 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2757 : READ_STREAM_USE_BATCHING,
2758 : vacrel->bstrategy,
2759 : vacrel->rel,
2760 : MAIN_FORKNUM,
2761 : vacuum_reap_lp_read_stream_next,
2762 : iter,
2763 : sizeof(TidStoreIterResult));
2764 :
2765 : while (true)
2766 23474 : {
2767 : BlockNumber blkno;
2768 : Buffer buf;
2769 : Page page;
2770 : TidStoreIterResult *iter_result;
2771 : Size freespace;
2772 : OffsetNumber offsets[MaxOffsetNumber];
2773 : int num_offsets;
2774 :
2775 24674 : vacuum_delay_point(false);
2776 :
2777 24674 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2778 :
2779 : /* The relation is exhausted */
2780 24674 : if (!BufferIsValid(buf))
2781 1200 : break;
2782 :
2783 23474 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2784 :
2785 : Assert(iter_result);
2786 23474 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2787 : Assert(num_offsets <= lengthof(offsets));
2788 :
2789 : /*
2790 : * Pin the visibility map page in case we need to mark the page
2791 : * all-visible. In most cases this will be very cheap, because we'll
2792 : * already have the correct page pinned anyway.
2793 : */
2794 23474 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2795 :
2796 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2797 23474 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2798 23474 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2799 : num_offsets, vmbuffer);
2800 :
2801 : /* Now that we've vacuumed the page, record its available space */
2802 23474 : page = BufferGetPage(buf);
2803 23474 : freespace = PageGetHeapFreeSpace(page);
2804 :
2805 23474 : UnlockReleaseBuffer(buf);
2806 23474 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2807 23474 : vacuumed_pages++;
2808 : }
2809 :
2810 1200 : read_stream_end(stream);
2811 1200 : TidStoreEndIterate(iter);
2812 :
2813 1200 : vacrel->blkno = InvalidBlockNumber;
2814 1200 : if (BufferIsValid(vmbuffer))
2815 1200 : ReleaseBuffer(vmbuffer);
2816 :
2817 : /*
2818 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2819 : * the second heap pass. No more, no less.
2820 : */
2821 : Assert(vacrel->num_index_scans > 1 ||
2822 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2823 : vacuumed_pages == vacrel->lpdead_item_pages));
2824 :
2825 1200 : ereport(DEBUG2,
2826 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2827 : vacrel->relname, vacrel->dead_items_info->num_items,
2828 : vacuumed_pages)));
2829 :
2830 : /* Revert to the previous phase information for error traceback */
2831 1200 : restore_vacuum_error_info(vacrel, &saved_err_info);
2832 1200 : }
2833 :
2834 : /*
2835 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2836 : * vacrel->dead_items store.
2837 : *
2838 : * Caller must have an exclusive buffer lock on the buffer (though a full
2839 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2840 : * a pin on blkno's visibility map page.
2841 : */
2842 : static void
2843 23474 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2844 : OffsetNumber *deadoffsets, int num_offsets,
2845 : Buffer vmbuffer)
2846 : {
2847 23474 : Page page = BufferGetPage(buffer);
2848 : OffsetNumber unused[MaxHeapTuplesPerPage];
2849 23474 : int nunused = 0;
2850 : TransactionId visibility_cutoff_xid;
2851 : bool all_frozen;
2852 : LVSavedErrInfo saved_err_info;
2853 :
2854 : Assert(vacrel->do_index_vacuuming);
2855 :
2856 23474 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2857 :
2858 : /* Update error traceback information */
2859 23474 : update_vacuum_error_info(vacrel, &saved_err_info,
2860 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2861 : InvalidOffsetNumber);
2862 :
2863 23474 : START_CRIT_SECTION();
2864 :
2865 1498502 : for (int i = 0; i < num_offsets; i++)
2866 : {
2867 : ItemId itemid;
2868 1475028 : OffsetNumber toff = deadoffsets[i];
2869 :
2870 1475028 : itemid = PageGetItemId(page, toff);
2871 :
2872 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2873 1475028 : ItemIdSetUnused(itemid);
2874 1475028 : unused[nunused++] = toff;
2875 : }
2876 :
2877 : Assert(nunused > 0);
2878 :
2879 : /* Attempt to truncate line pointer array now */
2880 23474 : PageTruncateLinePointerArray(page);
2881 :
2882 : /*
2883 : * Mark buffer dirty before we write WAL.
2884 : */
2885 23474 : MarkBufferDirty(buffer);
2886 :
2887 : /* XLOG stuff */
2888 23474 : if (RelationNeedsWAL(vacrel->rel))
2889 : {
2890 21772 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2891 : InvalidTransactionId,
2892 : false, /* no cleanup lock required */
2893 : PRUNE_VACUUM_CLEANUP,
2894 : NULL, 0, /* frozen */
2895 : NULL, 0, /* redirected */
2896 : NULL, 0, /* dead */
2897 : unused, nunused);
2898 : }
2899 :
2900 : /*
2901 : * End critical section, so we safely can do visibility tests (which
2902 : * possibly need to perform IO and allocate memory!). If we crash now the
2903 : * page (including the corresponding vm bit) might not be marked all
2904 : * visible, but that's fine. A later vacuum will fix that.
2905 : */
2906 23474 : END_CRIT_SECTION();
2907 :
2908 : /*
2909 : * Now that we have removed the LP_DEAD items from the page, once again
2910 : * check if the page has become all-visible. The page is already marked
2911 : * dirty, exclusively locked, and, if needed, a full page image has been
2912 : * emitted.
2913 : */
2914 : Assert(!PageIsAllVisible(page));
2915 23474 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2916 : &all_frozen))
2917 : {
2918 23416 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2919 :
2920 23416 : if (all_frozen)
2921 : {
2922 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2923 18722 : flags |= VISIBILITYMAP_ALL_FROZEN;
2924 : }
2925 :
2926 23416 : PageSetAllVisible(page);
2927 23416 : visibilitymap_set(vacrel->rel, blkno, buffer,
2928 : InvalidXLogRecPtr,
2929 : vmbuffer, visibility_cutoff_xid,
2930 : flags);
2931 :
2932 : /* Count the newly set VM page for logging */
2933 23416 : vacrel->vm_new_visible_pages++;
2934 23416 : if (all_frozen)
2935 18722 : vacrel->vm_new_visible_frozen_pages++;
2936 : }
2937 :
2938 : /* Revert to the previous phase information for error traceback */
2939 23474 : restore_vacuum_error_info(vacrel, &saved_err_info);
2940 23474 : }
2941 :
2942 : /*
2943 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2944 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2945 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2946 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2947 : *
2948 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2949 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2950 : * that it started out with.
2951 : *
2952 : * Returns true when failsafe has been triggered.
2953 : */
2954 : static bool
2955 249332 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2956 : {
2957 : /* Don't warn more than once per VACUUM */
2958 249332 : if (VacuumFailsafeActive)
2959 0 : return true;
2960 :
2961 249332 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2962 : {
2963 86078 : const int progress_index[] = {
2964 : PROGRESS_VACUUM_INDEXES_TOTAL,
2965 : PROGRESS_VACUUM_INDEXES_PROCESSED
2966 : };
2967 86078 : int64 progress_val[2] = {0, 0};
2968 :
2969 86078 : VacuumFailsafeActive = true;
2970 :
2971 : /*
2972 : * Abandon use of a buffer access strategy to allow use of all of
2973 : * shared buffers. We assume the caller who allocated the memory for
2974 : * the BufferAccessStrategy will free it.
2975 : */
2976 86078 : vacrel->bstrategy = NULL;
2977 :
2978 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2979 86078 : vacrel->do_index_vacuuming = false;
2980 86078 : vacrel->do_index_cleanup = false;
2981 86078 : vacrel->do_rel_truncate = false;
2982 :
2983 : /* Reset the progress counters */
2984 86078 : pgstat_progress_update_multi_param(2, progress_index, progress_val);
2985 :
2986 86078 : ereport(WARNING,
2987 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2988 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2989 : vacrel->num_index_scans),
2990 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2991 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2992 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2993 :
2994 : /* Stop applying cost limits from this point on */
2995 86078 : VacuumCostActive = false;
2996 86078 : VacuumCostBalance = 0;
2997 :
2998 86078 : return true;
2999 : }
3000 :
3001 163254 : return false;
3002 : }
3003 :
3004 : /*
3005 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3006 : */
3007 : static void
3008 151714 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3009 : {
3010 151714 : double reltuples = vacrel->new_rel_tuples;
3011 151714 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3012 151714 : const int progress_start_index[] = {
3013 : PROGRESS_VACUUM_PHASE,
3014 : PROGRESS_VACUUM_INDEXES_TOTAL
3015 : };
3016 151714 : const int progress_end_index[] = {
3017 : PROGRESS_VACUUM_INDEXES_TOTAL,
3018 : PROGRESS_VACUUM_INDEXES_PROCESSED
3019 : };
3020 : int64 progress_start_val[2];
3021 151714 : int64 progress_end_val[2] = {0, 0};
3022 :
3023 : Assert(vacrel->do_index_cleanup);
3024 : Assert(vacrel->nindexes > 0);
3025 :
3026 : /*
3027 : * Report that we are now cleaning up indexes and the number of indexes to
3028 : * cleanup.
3029 : */
3030 151714 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3031 151714 : progress_start_val[1] = vacrel->nindexes;
3032 151714 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3033 :
3034 151714 : if (!ParallelVacuumIsActive(vacrel))
3035 : {
3036 389962 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3037 : {
3038 238282 : Relation indrel = vacrel->indrels[idx];
3039 238282 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3040 :
3041 476564 : vacrel->indstats[idx] =
3042 238282 : lazy_cleanup_one_index(indrel, istat, reltuples,
3043 : estimated_count, vacrel);
3044 :
3045 : /* Report the number of indexes cleaned up */
3046 238282 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3047 238282 : idx + 1);
3048 : }
3049 : }
3050 : else
3051 : {
3052 : /* Outsource everything to parallel variant */
3053 34 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3054 : vacrel->num_index_scans,
3055 : estimated_count);
3056 : }
3057 :
3058 : /* Reset the progress counters */
3059 151714 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3060 151714 : }
3061 :
3062 : /*
3063 : * lazy_vacuum_one_index() -- vacuum index relation.
3064 : *
3065 : * Delete all the index tuples containing a TID collected in
3066 : * vacrel->dead_items. Also update running statistics. Exact
3067 : * details depend on index AM's ambulkdelete routine.
3068 : *
3069 : * reltuples is the number of heap tuples to be passed to the
3070 : * bulkdelete callback. It's always assumed to be estimated.
3071 : * See indexam.sgml for more info.
3072 : *
3073 : * Returns bulk delete stats derived from input stats
3074 : */
3075 : static IndexBulkDeleteResult *
3076 2266 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3077 : double reltuples, LVRelState *vacrel)
3078 : {
3079 : IndexVacuumInfo ivinfo;
3080 : LVSavedErrInfo saved_err_info;
3081 :
3082 2266 : ivinfo.index = indrel;
3083 2266 : ivinfo.heaprel = vacrel->rel;
3084 2266 : ivinfo.analyze_only = false;
3085 2266 : ivinfo.report_progress = false;
3086 2266 : ivinfo.estimated_count = true;
3087 2266 : ivinfo.message_level = DEBUG2;
3088 2266 : ivinfo.num_heap_tuples = reltuples;
3089 2266 : ivinfo.strategy = vacrel->bstrategy;
3090 :
3091 : /*
3092 : * Update error traceback information.
3093 : *
3094 : * The index name is saved during this phase and restored immediately
3095 : * after this phase. See vacuum_error_callback.
3096 : */
3097 : Assert(vacrel->indname == NULL);
3098 2266 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3099 2266 : update_vacuum_error_info(vacrel, &saved_err_info,
3100 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3101 : InvalidBlockNumber, InvalidOffsetNumber);
3102 :
3103 : /* Do bulk deletion */
3104 2266 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3105 : vacrel->dead_items_info);
3106 :
3107 : /* Revert to the previous phase information for error traceback */
3108 2266 : restore_vacuum_error_info(vacrel, &saved_err_info);
3109 2266 : pfree(vacrel->indname);
3110 2266 : vacrel->indname = NULL;
3111 :
3112 2266 : return istat;
3113 : }
3114 :
3115 : /*
3116 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3117 : *
3118 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3119 : * of heap tuples and estimated_count is true if reltuples is an
3120 : * estimated value. See indexam.sgml for more info.
3121 : *
3122 : * Returns bulk delete stats derived from input stats
3123 : */
3124 : static IndexBulkDeleteResult *
3125 238282 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3126 : double reltuples, bool estimated_count,
3127 : LVRelState *vacrel)
3128 : {
3129 : IndexVacuumInfo ivinfo;
3130 : LVSavedErrInfo saved_err_info;
3131 :
3132 238282 : ivinfo.index = indrel;
3133 238282 : ivinfo.heaprel = vacrel->rel;
3134 238282 : ivinfo.analyze_only = false;
3135 238282 : ivinfo.report_progress = false;
3136 238282 : ivinfo.estimated_count = estimated_count;
3137 238282 : ivinfo.message_level = DEBUG2;
3138 :
3139 238282 : ivinfo.num_heap_tuples = reltuples;
3140 238282 : ivinfo.strategy = vacrel->bstrategy;
3141 :
3142 : /*
3143 : * Update error traceback information.
3144 : *
3145 : * The index name is saved during this phase and restored immediately
3146 : * after this phase. See vacuum_error_callback.
3147 : */
3148 : Assert(vacrel->indname == NULL);
3149 238282 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3150 238282 : update_vacuum_error_info(vacrel, &saved_err_info,
3151 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3152 : InvalidBlockNumber, InvalidOffsetNumber);
3153 :
3154 238282 : istat = vac_cleanup_one_index(&ivinfo, istat);
3155 :
3156 : /* Revert to the previous phase information for error traceback */
3157 238282 : restore_vacuum_error_info(vacrel, &saved_err_info);
3158 238282 : pfree(vacrel->indname);
3159 238282 : vacrel->indname = NULL;
3160 :
3161 238282 : return istat;
3162 : }
3163 :
3164 : /*
3165 : * should_attempt_truncation - should we attempt to truncate the heap?
3166 : *
3167 : * Don't even think about it unless we have a shot at releasing a goodly
3168 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3169 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3170 : * be particularly disruptive.
3171 : *
3172 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3173 : * system might be refusing to allocate new XIDs at this point. The system
3174 : * definitely won't return to normal unless and until VACUUM actually advances
3175 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3176 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3177 : * truncate the table under these circumstances, an XID exhaustion error might
3178 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3179 : * There is very little chance of truncation working out when the failsafe is
3180 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3181 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3182 : * we're called.
3183 : */
3184 : static bool
3185 245850 : should_attempt_truncation(LVRelState *vacrel)
3186 : {
3187 : BlockNumber possibly_freeable;
3188 :
3189 245850 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3190 86368 : return false;
3191 :
3192 159482 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3193 159482 : if (possibly_freeable > 0 &&
3194 306 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3195 306 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3196 288 : return true;
3197 :
3198 159194 : return false;
3199 : }
3200 :
3201 : /*
3202 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3203 : */
3204 : static void
3205 288 : lazy_truncate_heap(LVRelState *vacrel)
3206 : {
3207 288 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3208 : BlockNumber new_rel_pages;
3209 : bool lock_waiter_detected;
3210 : int lock_retry;
3211 :
3212 : /* Report that we are now truncating */
3213 288 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3214 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3215 :
3216 : /* Update error traceback information one last time */
3217 288 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3218 : vacrel->nonempty_pages, InvalidOffsetNumber);
3219 :
3220 : /*
3221 : * Loop until no more truncating can be done.
3222 : */
3223 : do
3224 : {
3225 : /*
3226 : * We need full exclusive lock on the relation in order to do
3227 : * truncation. If we can't get it, give up rather than waiting --- we
3228 : * don't want to block other backends, and we don't want to deadlock
3229 : * (which is quite possible considering we already hold a lower-grade
3230 : * lock).
3231 : */
3232 288 : lock_waiter_detected = false;
3233 288 : lock_retry = 0;
3234 : while (true)
3235 : {
3236 688 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3237 284 : break;
3238 :
3239 : /*
3240 : * Check for interrupts while trying to (re-)acquire the exclusive
3241 : * lock.
3242 : */
3243 404 : CHECK_FOR_INTERRUPTS();
3244 :
3245 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3246 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3247 : {
3248 : /*
3249 : * We failed to establish the lock in the specified number of
3250 : * retries. This means we give up truncating.
3251 : */
3252 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
3253 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3254 : vacrel->relname)));
3255 6 : return;
3256 : }
3257 :
3258 400 : (void) WaitLatch(MyLatch,
3259 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3260 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3261 : WAIT_EVENT_VACUUM_TRUNCATE);
3262 400 : ResetLatch(MyLatch);
3263 : }
3264 :
3265 : /*
3266 : * Now that we have exclusive lock, look to see if the rel has grown
3267 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3268 : * the newly added pages presumably contain non-deletable tuples.
3269 : */
3270 284 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3271 284 : if (new_rel_pages != orig_rel_pages)
3272 : {
3273 : /*
3274 : * Note: we intentionally don't update vacrel->rel_pages with the
3275 : * new rel size here. If we did, it would amount to assuming that
3276 : * the new pages are empty, which is unlikely. Leaving the numbers
3277 : * alone amounts to assuming that the new pages have the same
3278 : * tuple density as existing ones, which is less unlikely.
3279 : */
3280 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3281 0 : return;
3282 : }
3283 :
3284 : /*
3285 : * Scan backwards from the end to verify that the end pages actually
3286 : * contain no tuples. This is *necessary*, not optional, because
3287 : * other backends could have added tuples to these pages whilst we
3288 : * were vacuuming.
3289 : */
3290 284 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3291 284 : vacrel->blkno = new_rel_pages;
3292 :
3293 284 : if (new_rel_pages >= orig_rel_pages)
3294 : {
3295 : /* can't do anything after all */
3296 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3297 2 : return;
3298 : }
3299 :
3300 : /*
3301 : * Okay to truncate.
3302 : */
3303 282 : RelationTruncate(vacrel->rel, new_rel_pages);
3304 :
3305 : /*
3306 : * We can release the exclusive lock as soon as we have truncated.
3307 : * Other backends can't safely access the relation until they have
3308 : * processed the smgr invalidation that smgrtruncate sent out ... but
3309 : * that should happen as part of standard invalidation processing once
3310 : * they acquire lock on the relation.
3311 : */
3312 282 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3313 :
3314 : /*
3315 : * Update statistics. Here, it *is* correct to adjust rel_pages
3316 : * without also touching reltuples, since the tuple count wasn't
3317 : * changed by the truncation.
3318 : */
3319 282 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3320 282 : vacrel->rel_pages = new_rel_pages;
3321 :
3322 282 : ereport(vacrel->verbose ? INFO : DEBUG2,
3323 : (errmsg("table \"%s\": truncated %u to %u pages",
3324 : vacrel->relname,
3325 : orig_rel_pages, new_rel_pages)));
3326 282 : orig_rel_pages = new_rel_pages;
3327 282 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3328 : }
3329 :
3330 : /*
3331 : * Rescan end pages to verify that they are (still) empty of tuples.
3332 : *
3333 : * Returns number of nondeletable pages (last nonempty page + 1).
3334 : */
3335 : static BlockNumber
3336 284 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3337 : {
3338 : BlockNumber blkno;
3339 : BlockNumber prefetchedUntil;
3340 : instr_time starttime;
3341 :
3342 : /* Initialize the starttime if we check for conflicting lock requests */
3343 284 : INSTR_TIME_SET_CURRENT(starttime);
3344 :
3345 : /*
3346 : * Start checking blocks at what we believe relation end to be and move
3347 : * backwards. (Strange coding of loop control is needed because blkno is
3348 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3349 : * in forward direction, so that OS-level readahead can kick in.
3350 : */
3351 284 : blkno = vacrel->rel_pages;
3352 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3353 : "prefetch size must be power of 2");
3354 284 : prefetchedUntil = InvalidBlockNumber;
3355 4308 : while (blkno > vacrel->nonempty_pages)
3356 : {
3357 : Buffer buf;
3358 : Page page;
3359 : OffsetNumber offnum,
3360 : maxoff;
3361 : bool hastup;
3362 :
3363 : /*
3364 : * Check if another process requests a lock on our relation. We are
3365 : * holding an AccessExclusiveLock here, so they will be waiting. We
3366 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3367 : * only check if that interval has elapsed once every 32 blocks to
3368 : * keep the number of system calls and actual shared lock table
3369 : * lookups to a minimum.
3370 : */
3371 4032 : if ((blkno % 32) == 0)
3372 : {
3373 : instr_time currenttime;
3374 : instr_time elapsed;
3375 :
3376 128 : INSTR_TIME_SET_CURRENT(currenttime);
3377 128 : elapsed = currenttime;
3378 128 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3379 128 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3380 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3381 : {
3382 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3383 : {
3384 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3385 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3386 : vacrel->relname)));
3387 :
3388 0 : *lock_waiter_detected = true;
3389 0 : return blkno;
3390 : }
3391 0 : starttime = currenttime;
3392 : }
3393 : }
3394 :
3395 : /*
3396 : * We don't insert a vacuum delay point here, because we have an
3397 : * exclusive lock on the table which we want to hold for as short a
3398 : * time as possible. We still need to check for interrupts however.
3399 : */
3400 4032 : CHECK_FOR_INTERRUPTS();
3401 :
3402 4032 : blkno--;
3403 :
3404 : /* If we haven't prefetched this lot yet, do so now. */
3405 4032 : if (prefetchedUntil > blkno)
3406 : {
3407 : BlockNumber prefetchStart;
3408 : BlockNumber pblkno;
3409 :
3410 374 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3411 5960 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3412 : {
3413 5586 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3414 5586 : CHECK_FOR_INTERRUPTS();
3415 : }
3416 374 : prefetchedUntil = prefetchStart;
3417 : }
3418 :
3419 4032 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3420 : vacrel->bstrategy);
3421 :
3422 : /* In this phase we only need shared access to the buffer */
3423 4032 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3424 :
3425 4032 : page = BufferGetPage(buf);
3426 :
3427 4032 : if (PageIsNew(page) || PageIsEmpty(page))
3428 : {
3429 1644 : UnlockReleaseBuffer(buf);
3430 1644 : continue;
3431 : }
3432 :
3433 2388 : hastup = false;
3434 2388 : maxoff = PageGetMaxOffsetNumber(page);
3435 2388 : for (offnum = FirstOffsetNumber;
3436 5398 : offnum <= maxoff;
3437 3010 : offnum = OffsetNumberNext(offnum))
3438 : {
3439 : ItemId itemid;
3440 :
3441 3018 : itemid = PageGetItemId(page, offnum);
3442 :
3443 : /*
3444 : * Note: any non-unused item should be taken as a reason to keep
3445 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3446 : * we must not have cleaned out its index entries.
3447 : */
3448 3018 : if (ItemIdIsUsed(itemid))
3449 : {
3450 8 : hastup = true;
3451 8 : break; /* can stop scanning */
3452 : }
3453 : } /* scan along page */
3454 :
3455 2388 : UnlockReleaseBuffer(buf);
3456 :
3457 : /* Done scanning if we found a tuple here */
3458 2388 : if (hastup)
3459 8 : return blkno + 1;
3460 : }
3461 :
3462 : /*
3463 : * If we fall out of the loop, all the previously-thought-to-be-empty
3464 : * pages still are; we need not bother to look at the last known-nonempty
3465 : * page.
3466 : */
3467 276 : return vacrel->nonempty_pages;
3468 : }
3469 :
3470 : /*
3471 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3472 : * shared memory). Sets both in vacrel for caller.
3473 : *
3474 : * Also handles parallel initialization as part of allocating dead_items in
3475 : * DSM when required.
3476 : */
3477 : static void
3478 245850 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3479 : {
3480 : VacDeadItemsInfo *dead_items_info;
3481 711582 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3482 219882 : autovacuum_work_mem != -1 ?
3483 465732 : autovacuum_work_mem : maintenance_work_mem;
3484 :
3485 : /*
3486 : * Initialize state for a parallel vacuum. As of now, only one worker can
3487 : * be used for an index, so we invoke parallelism only if there are at
3488 : * least two indexes on a table.
3489 : */
3490 245850 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3491 : {
3492 : /*
3493 : * Since parallel workers cannot access data in temporary tables, we
3494 : * can't perform parallel vacuum on them.
3495 : */
3496 10296 : if (RelationUsesLocalBuffers(vacrel->rel))
3497 : {
3498 : /*
3499 : * Give warning only if the user explicitly tries to perform a
3500 : * parallel vacuum on the temporary table.
3501 : */
3502 6 : if (nworkers > 0)
3503 6 : ereport(WARNING,
3504 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3505 : vacrel->relname)));
3506 : }
3507 : else
3508 10290 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3509 : vacrel->nindexes, nworkers,
3510 : vac_work_mem,
3511 10290 : vacrel->verbose ? INFO : DEBUG2,
3512 : vacrel->bstrategy);
3513 :
3514 : /*
3515 : * If parallel mode started, dead_items and dead_items_info spaces are
3516 : * allocated in DSM.
3517 : */
3518 10296 : if (ParallelVacuumIsActive(vacrel))
3519 : {
3520 34 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3521 : &vacrel->dead_items_info);
3522 34 : return;
3523 : }
3524 : }
3525 :
3526 : /*
3527 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3528 : * locally.
3529 : */
3530 :
3531 245816 : dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
3532 245816 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3533 245816 : dead_items_info->num_items = 0;
3534 245816 : vacrel->dead_items_info = dead_items_info;
3535 :
3536 245816 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3537 : }
3538 :
3539 : /*
3540 : * Add the given block number and offset numbers to dead_items.
3541 : */
3542 : static void
3543 28072 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3544 : int num_offsets)
3545 : {
3546 28072 : const int prog_index[2] = {
3547 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3548 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3549 : };
3550 : int64 prog_val[2];
3551 :
3552 28072 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3553 28072 : vacrel->dead_items_info->num_items += num_offsets;
3554 :
3555 : /* update the progress information */
3556 28072 : prog_val[0] = vacrel->dead_items_info->num_items;
3557 28072 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3558 28072 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3559 28072 : }
3560 :
3561 : /*
3562 : * Forget all collected dead items.
3563 : */
3564 : static void
3565 1222 : dead_items_reset(LVRelState *vacrel)
3566 : {
3567 1222 : if (ParallelVacuumIsActive(vacrel))
3568 : {
3569 16 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3570 16 : return;
3571 : }
3572 :
3573 : /* Recreate the tidstore with the same max_bytes limitation */
3574 1206 : TidStoreDestroy(vacrel->dead_items);
3575 1206 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3576 :
3577 : /* Reset the counter */
3578 1206 : vacrel->dead_items_info->num_items = 0;
3579 : }
3580 :
3581 : /*
3582 : * Perform cleanup for resources allocated in dead_items_alloc
3583 : */
3584 : static void
3585 245850 : dead_items_cleanup(LVRelState *vacrel)
3586 : {
3587 245850 : if (!ParallelVacuumIsActive(vacrel))
3588 : {
3589 : /* Don't bother with pfree here */
3590 245816 : return;
3591 : }
3592 :
3593 : /* End parallel mode */
3594 34 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3595 34 : vacrel->pvs = NULL;
3596 : }
3597 :
3598 : /*
3599 : * Check if every tuple in the given page is visible to all current and future
3600 : * transactions. Also return the visibility_cutoff_xid which is the highest
3601 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3602 : * on this page is frozen.
3603 : *
3604 : * This is a stripped down version of lazy_scan_prune(). If you change
3605 : * anything here, make sure that everything stays in sync. Note that an
3606 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
3607 : * introducing new side-effects here.
3608 : */
3609 : static bool
3610 23474 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
3611 : TransactionId *visibility_cutoff_xid,
3612 : bool *all_frozen)
3613 : {
3614 23474 : Page page = BufferGetPage(buf);
3615 23474 : BlockNumber blockno = BufferGetBlockNumber(buf);
3616 : OffsetNumber offnum,
3617 : maxoff;
3618 23474 : bool all_visible = true;
3619 :
3620 23474 : *visibility_cutoff_xid = InvalidTransactionId;
3621 23474 : *all_frozen = true;
3622 :
3623 23474 : maxoff = PageGetMaxOffsetNumber(page);
3624 23474 : for (offnum = FirstOffsetNumber;
3625 1165710 : offnum <= maxoff && all_visible;
3626 1142236 : offnum = OffsetNumberNext(offnum))
3627 : {
3628 : ItemId itemid;
3629 : HeapTupleData tuple;
3630 :
3631 : /*
3632 : * Set the offset number so that we can display it along with any
3633 : * error that occurred while processing this tuple.
3634 : */
3635 1142236 : vacrel->offnum = offnum;
3636 1142236 : itemid = PageGetItemId(page, offnum);
3637 :
3638 : /* Unused or redirect line pointers are of no interest */
3639 1142236 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3640 287470 : continue;
3641 :
3642 854766 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3643 :
3644 : /*
3645 : * Dead line pointers can have index pointers pointing to them. So
3646 : * they can't be treated as visible
3647 : */
3648 854766 : if (ItemIdIsDead(itemid))
3649 : {
3650 0 : all_visible = false;
3651 0 : *all_frozen = false;
3652 0 : break;
3653 : }
3654 :
3655 : Assert(ItemIdIsNormal(itemid));
3656 :
3657 854766 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3658 854766 : tuple.t_len = ItemIdGetLength(itemid);
3659 854766 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3660 :
3661 854766 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3662 : buf))
3663 : {
3664 854746 : case HEAPTUPLE_LIVE:
3665 : {
3666 : TransactionId xmin;
3667 :
3668 : /* Check comments in lazy_scan_prune. */
3669 854746 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3670 : {
3671 0 : all_visible = false;
3672 0 : *all_frozen = false;
3673 0 : break;
3674 : }
3675 :
3676 : /*
3677 : * The inserter definitely committed. But is it old enough
3678 : * that everyone sees it as committed?
3679 : */
3680 854746 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3681 854746 : if (!TransactionIdPrecedes(xmin,
3682 : vacrel->cutoffs.OldestXmin))
3683 : {
3684 38 : all_visible = false;
3685 38 : *all_frozen = false;
3686 38 : break;
3687 : }
3688 :
3689 : /* Track newest xmin on page. */
3690 854708 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3691 : TransactionIdIsNormal(xmin))
3692 14612 : *visibility_cutoff_xid = xmin;
3693 :
3694 : /* Check whether this tuple is already frozen or not */
3695 1098842 : if (all_visible && *all_frozen &&
3696 244134 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3697 4704 : *all_frozen = false;
3698 : }
3699 854708 : break;
3700 :
3701 20 : case HEAPTUPLE_DEAD:
3702 : case HEAPTUPLE_RECENTLY_DEAD:
3703 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3704 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3705 : {
3706 20 : all_visible = false;
3707 20 : *all_frozen = false;
3708 20 : break;
3709 : }
3710 0 : default:
3711 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3712 : break;
3713 : }
3714 : } /* scan along page */
3715 :
3716 : /* Clear the offset information once we have processed the given page. */
3717 23474 : vacrel->offnum = InvalidOffsetNumber;
3718 :
3719 23474 : return all_visible;
3720 : }
3721 :
3722 : /*
3723 : * Update index statistics in pg_class if the statistics are accurate.
3724 : */
3725 : static void
3726 159512 : update_relstats_all_indexes(LVRelState *vacrel)
3727 : {
3728 159512 : Relation *indrels = vacrel->indrels;
3729 159512 : int nindexes = vacrel->nindexes;
3730 159512 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3731 :
3732 : Assert(vacrel->do_index_cleanup);
3733 :
3734 397916 : for (int idx = 0; idx < nindexes; idx++)
3735 : {
3736 238404 : Relation indrel = indrels[idx];
3737 238404 : IndexBulkDeleteResult *istat = indstats[idx];
3738 :
3739 238404 : if (istat == NULL || istat->estimated_count)
3740 235840 : continue;
3741 :
3742 : /* Update index statistics */
3743 2564 : vac_update_relstats(indrel,
3744 : istat->num_pages,
3745 : istat->num_index_tuples,
3746 : 0, 0,
3747 : false,
3748 : InvalidTransactionId,
3749 : InvalidMultiXactId,
3750 : NULL, NULL, false);
3751 : }
3752 159512 : }
3753 :
3754 : /*
3755 : * Error context callback for errors occurring during vacuum. The error
3756 : * context messages for index phases should match the messages set in parallel
3757 : * vacuum. If you change this function for those phases, change
3758 : * parallel_vacuum_error_callback() as well.
3759 : */
3760 : static void
3761 270954 : vacuum_error_callback(void *arg)
3762 : {
3763 270954 : LVRelState *errinfo = arg;
3764 :
3765 270954 : switch (errinfo->phase)
3766 : {
3767 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3768 0 : if (BlockNumberIsValid(errinfo->blkno))
3769 : {
3770 0 : if (OffsetNumberIsValid(errinfo->offnum))
3771 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3772 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3773 : else
3774 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3775 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3776 : }
3777 : else
3778 0 : errcontext("while scanning relation \"%s.%s\"",
3779 : errinfo->relnamespace, errinfo->relname);
3780 0 : break;
3781 :
3782 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3783 0 : if (BlockNumberIsValid(errinfo->blkno))
3784 : {
3785 0 : if (OffsetNumberIsValid(errinfo->offnum))
3786 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3787 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3788 : else
3789 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3790 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3791 : }
3792 : else
3793 0 : errcontext("while vacuuming relation \"%s.%s\"",
3794 : errinfo->relnamespace, errinfo->relname);
3795 0 : break;
3796 :
3797 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3798 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3799 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3800 0 : break;
3801 :
3802 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3803 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3804 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3805 0 : break;
3806 :
3807 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3808 6 : if (BlockNumberIsValid(errinfo->blkno))
3809 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3810 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3811 6 : break;
3812 :
3813 270948 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3814 : default:
3815 270948 : return; /* do nothing; the errinfo may not be
3816 : * initialized */
3817 : }
3818 : }
3819 :
3820 : /*
3821 : * Updates the information required for vacuum error callback. This also saves
3822 : * the current information which can be later restored via restore_vacuum_error_info.
3823 : */
3824 : static void
3825 1413858 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3826 : int phase, BlockNumber blkno, OffsetNumber offnum)
3827 : {
3828 1413858 : if (saved_vacrel)
3829 : {
3830 265222 : saved_vacrel->offnum = vacrel->offnum;
3831 265222 : saved_vacrel->blkno = vacrel->blkno;
3832 265222 : saved_vacrel->phase = vacrel->phase;
3833 : }
3834 :
3835 1413858 : vacrel->blkno = blkno;
3836 1413858 : vacrel->offnum = offnum;
3837 1413858 : vacrel->phase = phase;
3838 1413858 : }
3839 :
3840 : /*
3841 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3842 : */
3843 : static void
3844 265222 : restore_vacuum_error_info(LVRelState *vacrel,
3845 : const LVSavedErrInfo *saved_vacrel)
3846 : {
3847 265222 : vacrel->blkno = saved_vacrel->blkno;
3848 265222 : vacrel->offnum = saved_vacrel->offnum;
3849 265222 : vacrel->phase = saved_vacrel->phase;
3850 265222 : }
|