Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include <math.h>
133 :
134 : #include "access/genam.h"
135 : #include "access/heapam.h"
136 : #include "access/htup_details.h"
137 : #include "access/multixact.h"
138 : #include "access/tidstore.h"
139 : #include "access/transam.h"
140 : #include "access/visibilitymap.h"
141 : #include "access/xloginsert.h"
142 : #include "catalog/storage.h"
143 : #include "commands/dbcommands.h"
144 : #include "commands/progress.h"
145 : #include "commands/vacuum.h"
146 : #include "common/int.h"
147 : #include "common/pg_prng.h"
148 : #include "executor/instrument.h"
149 : #include "miscadmin.h"
150 : #include "pgstat.h"
151 : #include "portability/instr_time.h"
152 : #include "postmaster/autovacuum.h"
153 : #include "storage/bufmgr.h"
154 : #include "storage/freespace.h"
155 : #include "storage/lmgr.h"
156 : #include "storage/read_stream.h"
157 : #include "utils/lsyscache.h"
158 : #include "utils/pg_rusage.h"
159 : #include "utils/timestamp.h"
160 :
161 :
162 : /*
163 : * Space/time tradeoff parameters: do these need to be user-tunable?
164 : *
165 : * To consider truncating the relation, we want there to be at least
166 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
167 : * is less) potentially-freeable pages.
168 : */
169 : #define REL_TRUNCATE_MINIMUM 1000
170 : #define REL_TRUNCATE_FRACTION 16
171 :
172 : /*
173 : * Timing parameters for truncate locking heuristics.
174 : *
175 : * These were not exposed as user tunable GUC values because it didn't seem
176 : * that the potential for improvement was great enough to merit the cost of
177 : * supporting them.
178 : */
179 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
181 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
182 :
183 : /*
184 : * Threshold that controls whether we bypass index vacuuming and heap
185 : * vacuuming as an optimization
186 : */
187 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
188 :
189 : /*
190 : * Perform a failsafe check each time we scan another 4GB of pages.
191 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
192 : */
193 : #define FAILSAFE_EVERY_PAGES \
194 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
195 :
196 : /*
197 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
198 : * (it won't be exact because we only vacuum FSM after processing a heap page
199 : * that has some removable tuples). When there are indexes, this is ignored,
200 : * and we vacuum FSM after each index/heap cleaning pass.
201 : */
202 : #define VACUUM_FSM_EVERY_PAGES \
203 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
204 :
205 : /*
206 : * Before we consider skipping a page that's marked as clean in
207 : * visibility map, we must've seen at least this many clean pages.
208 : */
209 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
210 :
211 : /*
212 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
213 : * Needs to be a power of 2.
214 : */
215 : #define PREFETCH_SIZE ((BlockNumber) 32)
216 :
217 : /*
218 : * Macro to check if we are in a parallel vacuum. If true, we are in the
219 : * parallel mode and the DSM segment is initialized.
220 : */
221 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
222 :
223 : /* Phases of vacuum during which we report error context. */
224 : typedef enum
225 : {
226 : VACUUM_ERRCB_PHASE_UNKNOWN,
227 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
228 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
229 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
230 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
231 : VACUUM_ERRCB_PHASE_TRUNCATE,
232 : } VacErrPhase;
233 :
234 : /*
235 : * An eager scan of a page that is set all-frozen in the VM is considered
236 : * "successful". To spread out freezing overhead across multiple normal
237 : * vacuums, we limit the number of successful eager page freezes. The maximum
238 : * number of eager page freezes is calculated as a ratio of the all-visible
239 : * but not all-frozen pages at the beginning of the vacuum.
240 : */
241 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
242 :
243 : /*
244 : * On the assumption that different regions of the table tend to have
245 : * similarly aged data, once vacuum fails to freeze
246 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
247 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
248 : * to another region of the table with potentially older data.
249 : */
250 : #define EAGER_SCAN_REGION_SIZE 4096
251 :
252 : /*
253 : * heap_vac_scan_next_block() sets these flags to communicate information
254 : * about the block it read to the caller.
255 : */
256 : #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
257 : #define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
258 :
259 : typedef struct LVRelState
260 : {
261 : /* Target heap relation and its indexes */
262 : Relation rel;
263 : Relation *indrels;
264 : int nindexes;
265 :
266 : /* Buffer access strategy and parallel vacuum state */
267 : BufferAccessStrategy bstrategy;
268 : ParallelVacuumState *pvs;
269 :
270 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
271 : bool aggressive;
272 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
273 : bool skipwithvm;
274 : /* Consider index vacuuming bypass optimization? */
275 : bool consider_bypass_optimization;
276 :
277 : /* Doing index vacuuming, index cleanup, rel truncation? */
278 : bool do_index_vacuuming;
279 : bool do_index_cleanup;
280 : bool do_rel_truncate;
281 :
282 : /* VACUUM operation's cutoffs for freezing and pruning */
283 : struct VacuumCutoffs cutoffs;
284 : GlobalVisState *vistest;
285 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
286 : TransactionId NewRelfrozenXid;
287 : MultiXactId NewRelminMxid;
288 : bool skippedallvis;
289 :
290 : /* Error reporting state */
291 : char *dbname;
292 : char *relnamespace;
293 : char *relname;
294 : char *indname; /* Current index name */
295 : BlockNumber blkno; /* used only for heap operations */
296 : OffsetNumber offnum; /* used only for heap operations */
297 : VacErrPhase phase;
298 : bool verbose; /* VACUUM VERBOSE? */
299 :
300 : /*
301 : * dead_items stores TIDs whose index tuples are deleted by index
302 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
303 : * that has been processed by lazy_scan_prune. Also needed by
304 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
305 : * LP_UNUSED during second heap pass.
306 : *
307 : * Both dead_items and dead_items_info are allocated in shared memory in
308 : * parallel vacuum cases.
309 : */
310 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
311 : VacDeadItemsInfo *dead_items_info;
312 :
313 : BlockNumber rel_pages; /* total number of pages */
314 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
315 :
316 : /*
317 : * Count of all-visible blocks eagerly scanned (for logging only). This
318 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
319 : */
320 : BlockNumber eager_scanned_pages;
321 :
322 : BlockNumber removed_pages; /* # pages removed by relation truncation */
323 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
324 :
325 : /* # pages newly set all-visible in the VM */
326 : BlockNumber vm_new_visible_pages;
327 :
328 : /*
329 : * # pages newly set all-visible and all-frozen in the VM. This is a
330 : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
331 : * all pages set all-visible, but vm_new_visible_frozen_pages includes
332 : * only those which were also set all-frozen.
333 : */
334 : BlockNumber vm_new_visible_frozen_pages;
335 :
336 : /* # all-visible pages newly set all-frozen in the VM */
337 : BlockNumber vm_new_frozen_pages;
338 :
339 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
340 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
341 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
342 :
343 : /* Statistics output by us, for table */
344 : double new_rel_tuples; /* new estimated total # of tuples */
345 : double new_live_tuples; /* new estimated total # of live tuples */
346 : /* Statistics output by index AMs */
347 : IndexBulkDeleteResult **indstats;
348 :
349 : /* Instrumentation counters */
350 : int num_index_scans;
351 : /* Counters that follow are only for scanned_pages */
352 : int64 tuples_deleted; /* # deleted from table */
353 : int64 tuples_frozen; /* # newly frozen */
354 : int64 lpdead_items; /* # deleted from indexes */
355 : int64 live_tuples; /* # live tuples remaining */
356 : int64 recently_dead_tuples; /* # dead, but not yet removable */
357 : int64 missed_dead_tuples; /* # removable, but not removed */
358 :
359 : /* State maintained by heap_vac_scan_next_block() */
360 : BlockNumber current_block; /* last block returned */
361 : BlockNumber next_unskippable_block; /* next unskippable block */
362 : bool next_unskippable_allvis; /* its visibility status */
363 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
364 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
365 :
366 : /* State related to managing eager scanning of all-visible pages */
367 :
368 : /*
369 : * A normal vacuum that has failed to freeze too many eagerly scanned
370 : * blocks in a region suspends eager scanning.
371 : * next_eager_scan_region_start is the block number of the first block
372 : * eligible for resumed eager scanning.
373 : *
374 : * When eager scanning is permanently disabled, either initially
375 : * (including for aggressive vacuum) or due to hitting the success cap,
376 : * this is set to InvalidBlockNumber.
377 : */
378 : BlockNumber next_eager_scan_region_start;
379 :
380 : /*
381 : * The remaining number of blocks a normal vacuum will consider eager
382 : * scanning when it is successful. When eager scanning is enabled, this is
383 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
384 : * all-visible but not all-frozen pages. For each eager freeze success,
385 : * this is decremented. Once it hits 0, eager scanning is permanently
386 : * disabled. It is initialized to 0 if eager scanning starts out disabled
387 : * (including for aggressive vacuum).
388 : */
389 : BlockNumber eager_scan_remaining_successes;
390 :
391 : /*
392 : * The maximum number of blocks which may be eagerly scanned and not
393 : * frozen before eager scanning is temporarily suspended. This is
394 : * configurable both globally, via the
395 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
396 : * storage parameter of the same name. It is calculated as
397 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
398 : * It is 0 when eager scanning is disabled.
399 : */
400 : BlockNumber eager_scan_max_fails_per_region;
401 :
402 : /*
403 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
404 : * age) in the current eager scan region. Vacuum resets it to
405 : * eager_scan_max_fails_per_region each time it enters a new region of the
406 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
407 : * suspended until the next region. It is also 0 if eager scanning has
408 : * been permanently disabled.
409 : */
410 : BlockNumber eager_scan_remaining_fails;
411 : } LVRelState;
412 :
413 :
414 : /* Struct for saving and restoring vacuum error information. */
415 : typedef struct LVSavedErrInfo
416 : {
417 : BlockNumber blkno;
418 : OffsetNumber offnum;
419 : VacErrPhase phase;
420 : } LVSavedErrInfo;
421 :
422 :
423 : /* non-export function prototypes */
424 : static void lazy_scan_heap(LVRelState *vacrel);
425 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
426 : VacuumParams *params);
427 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
428 : void *callback_private_data,
429 : void *per_buffer_data);
430 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
431 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
432 : BlockNumber blkno, Page page,
433 : bool sharelock, Buffer vmbuffer);
434 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
435 : BlockNumber blkno, Page page,
436 : Buffer vmbuffer, bool all_visible_according_to_vm,
437 : bool *has_lpdead_items, bool *vm_page_frozen);
438 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
439 : BlockNumber blkno, Page page,
440 : bool *has_lpdead_items);
441 : static void lazy_vacuum(LVRelState *vacrel);
442 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
443 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
444 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
445 : Buffer buffer, OffsetNumber *deadoffsets,
446 : int num_offsets, Buffer vmbuffer);
447 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
448 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
449 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
450 : IndexBulkDeleteResult *istat,
451 : double reltuples,
452 : LVRelState *vacrel);
453 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
454 : IndexBulkDeleteResult *istat,
455 : double reltuples,
456 : bool estimated_count,
457 : LVRelState *vacrel);
458 : static bool should_attempt_truncation(LVRelState *vacrel);
459 : static void lazy_truncate_heap(LVRelState *vacrel);
460 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
461 : bool *lock_waiter_detected);
462 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
463 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
464 : int num_offsets);
465 : static void dead_items_reset(LVRelState *vacrel);
466 : static void dead_items_cleanup(LVRelState *vacrel);
467 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
468 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
469 : static void update_relstats_all_indexes(LVRelState *vacrel);
470 : static void vacuum_error_callback(void *arg);
471 : static void update_vacuum_error_info(LVRelState *vacrel,
472 : LVSavedErrInfo *saved_vacrel,
473 : int phase, BlockNumber blkno,
474 : OffsetNumber offnum);
475 : static void restore_vacuum_error_info(LVRelState *vacrel,
476 : const LVSavedErrInfo *saved_vacrel);
477 :
478 :
479 :
480 : /*
481 : * Helper to set up the eager scanning state for vacuuming a single relation.
482 : * Initializes the eager scan management related members of the LVRelState.
483 : *
484 : * Caller provides whether or not an aggressive vacuum is required due to
485 : * vacuum options or for relfrozenxid/relminmxid advancement.
486 : */
487 : static void
488 117624 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
489 : {
490 : uint32 randseed;
491 : BlockNumber allvisible;
492 : BlockNumber allfrozen;
493 : float first_region_ratio;
494 117624 : bool oldest_unfrozen_before_cutoff = false;
495 :
496 : /*
497 : * Initialize eager scan management fields to their disabled values.
498 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
499 : * of tables without sufficiently old tuples disable eager scanning.
500 : */
501 117624 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
502 117624 : vacrel->eager_scan_max_fails_per_region = 0;
503 117624 : vacrel->eager_scan_remaining_fails = 0;
504 117624 : vacrel->eager_scan_remaining_successes = 0;
505 :
506 : /* If eager scanning is explicitly disabled, just return. */
507 117624 : if (params->max_eager_freeze_failure_rate == 0)
508 117624 : return;
509 :
510 : /*
511 : * The caller will have determined whether or not an aggressive vacuum is
512 : * required by either the vacuum parameters or the relative age of the
513 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
514 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
515 : * so scans of all-visible pages are not considered eager.
516 : */
517 117624 : if (vacrel->aggressive)
518 108672 : return;
519 :
520 : /*
521 : * Aggressively vacuuming a small relation shouldn't take long, so it
522 : * isn't worth amortizing. We use two times the region size as the size
523 : * cutoff because the eager scan start block is a random spot somewhere in
524 : * the first region, making the second region the first to be eager
525 : * scanned normally.
526 : */
527 8952 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
528 8952 : return;
529 :
530 : /*
531 : * We only want to enable eager scanning if we are likely to be able to
532 : * freeze some of the pages in the relation.
533 : *
534 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
535 : * are technically freezable, but we won't freeze them unless the criteria
536 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
537 : * than the the FreezeLimit/MultiXactCutoff are frozen in the common case.
538 : *
539 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
540 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
541 : * enable eager scanning.
542 : */
543 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
544 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
545 : vacrel->cutoffs.FreezeLimit))
546 0 : oldest_unfrozen_before_cutoff = true;
547 :
548 0 : if (!oldest_unfrozen_before_cutoff &&
549 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
550 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
551 : vacrel->cutoffs.MultiXactCutoff))
552 0 : oldest_unfrozen_before_cutoff = true;
553 :
554 0 : if (!oldest_unfrozen_before_cutoff)
555 0 : return;
556 :
557 : /* We have met the criteria to eagerly scan some pages. */
558 :
559 : /*
560 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
561 : * all-visible but not all-frozen blocks in the relation.
562 : */
563 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
564 :
565 0 : vacrel->eager_scan_remaining_successes =
566 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
567 0 : (allvisible - allfrozen));
568 :
569 : /* If every all-visible page is frozen, eager scanning is disabled. */
570 0 : if (vacrel->eager_scan_remaining_successes == 0)
571 0 : return;
572 :
573 : /*
574 : * Now calculate the bounds of the first eager scan region. Its end block
575 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
576 : * blocks. This affects the bounds of all subsequent regions and avoids
577 : * eager scanning and failing to freeze the same blocks each vacuum of the
578 : * relation.
579 : */
580 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
581 :
582 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
583 :
584 : Assert(params->max_eager_freeze_failure_rate > 0 &&
585 : params->max_eager_freeze_failure_rate <= 1);
586 :
587 0 : vacrel->eager_scan_max_fails_per_region =
588 0 : params->max_eager_freeze_failure_rate *
589 : EAGER_SCAN_REGION_SIZE;
590 :
591 : /*
592 : * The first region will be smaller than subsequent regions. As such,
593 : * adjust the eager freeze failures tolerated for this region.
594 : */
595 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
596 : EAGER_SCAN_REGION_SIZE;
597 :
598 0 : vacrel->eager_scan_remaining_fails =
599 0 : vacrel->eager_scan_max_fails_per_region *
600 : first_region_ratio;
601 : }
602 :
603 : /*
604 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
605 : *
606 : * This routine sets things up for and then calls lazy_scan_heap, where
607 : * almost all work actually takes place. Finalizes everything after call
608 : * returns by managing relation truncation and updating rel's pg_class
609 : * entry. (Also updates pg_class entries for any indexes that need it.)
610 : *
611 : * At entry, we have already established a transaction and opened
612 : * and locked the relation.
613 : */
614 : void
615 117624 : heap_vacuum_rel(Relation rel, VacuumParams *params,
616 : BufferAccessStrategy bstrategy)
617 : {
618 : LVRelState *vacrel;
619 : bool verbose,
620 : instrument,
621 : skipwithvm,
622 : frozenxid_updated,
623 : minmulti_updated;
624 : BlockNumber orig_rel_pages,
625 : new_rel_pages,
626 : new_rel_allvisible;
627 : PGRUsage ru0;
628 117624 : TimestampTz starttime = 0;
629 117624 : PgStat_Counter startreadtime = 0,
630 117624 : startwritetime = 0;
631 117624 : WalUsage startwalusage = pgWalUsage;
632 117624 : BufferUsage startbufferusage = pgBufferUsage;
633 : ErrorContextCallback errcallback;
634 117624 : char **indnames = NULL;
635 :
636 117624 : verbose = (params->options & VACOPT_VERBOSE) != 0;
637 213614 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
638 95990 : params->log_min_duration >= 0));
639 117624 : if (instrument)
640 : {
641 96010 : pg_rusage_init(&ru0);
642 96010 : if (track_io_timing)
643 : {
644 0 : startreadtime = pgStatBlockReadTime;
645 0 : startwritetime = pgStatBlockWriteTime;
646 : }
647 : }
648 :
649 : /* Used for instrumentation and stats report */
650 117624 : starttime = GetCurrentTimestamp();
651 :
652 117624 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
653 : RelationGetRelid(rel));
654 :
655 : /*
656 : * Setup error traceback support for ereport() first. The idea is to set
657 : * up an error context callback to display additional information on any
658 : * error during a vacuum. During different phases of vacuum, we update
659 : * the state so that the error context callback always display current
660 : * information.
661 : *
662 : * Copy the names of heap rel into local memory for error reporting
663 : * purposes, too. It isn't always safe to assume that we can get the name
664 : * of each rel. It's convenient for code in lazy_scan_heap to always use
665 : * these temp copies.
666 : */
667 117624 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
668 117624 : vacrel->dbname = get_database_name(MyDatabaseId);
669 117624 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
670 117624 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
671 117624 : vacrel->indname = NULL;
672 117624 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
673 117624 : vacrel->verbose = verbose;
674 117624 : errcallback.callback = vacuum_error_callback;
675 117624 : errcallback.arg = vacrel;
676 117624 : errcallback.previous = error_context_stack;
677 117624 : error_context_stack = &errcallback;
678 :
679 : /* Set up high level stuff about rel and its indexes */
680 117624 : vacrel->rel = rel;
681 117624 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
682 : &vacrel->indrels);
683 117624 : vacrel->bstrategy = bstrategy;
684 117624 : if (instrument && vacrel->nindexes > 0)
685 : {
686 : /* Copy index names used by instrumentation (not error reporting) */
687 92118 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
688 236274 : for (int i = 0; i < vacrel->nindexes; i++)
689 144156 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
690 : }
691 :
692 : /*
693 : * The index_cleanup param either disables index vacuuming and cleanup or
694 : * forces it to go ahead when we would otherwise apply the index bypass
695 : * optimization. The default is 'auto', which leaves the final decision
696 : * up to lazy_vacuum().
697 : *
698 : * The truncate param allows user to avoid attempting relation truncation,
699 : * though it can't force truncation to happen.
700 : */
701 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
702 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
703 : params->truncate != VACOPTVALUE_AUTO);
704 :
705 : /*
706 : * While VacuumFailSafeActive is reset to false before calling this, we
707 : * still need to reset it here due to recursive calls.
708 : */
709 117624 : VacuumFailsafeActive = false;
710 117624 : vacrel->consider_bypass_optimization = true;
711 117624 : vacrel->do_index_vacuuming = true;
712 117624 : vacrel->do_index_cleanup = true;
713 117624 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
714 117624 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
715 : {
716 : /* Force disable index vacuuming up-front */
717 264 : vacrel->do_index_vacuuming = false;
718 264 : vacrel->do_index_cleanup = false;
719 : }
720 117360 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
721 : {
722 : /* Force index vacuuming. Note that failsafe can still bypass. */
723 32 : vacrel->consider_bypass_optimization = false;
724 : }
725 : else
726 : {
727 : /* Default/auto, make all decisions dynamically */
728 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
729 : }
730 :
731 : /* Initialize page counters explicitly (be tidy) */
732 117624 : vacrel->scanned_pages = 0;
733 117624 : vacrel->eager_scanned_pages = 0;
734 117624 : vacrel->removed_pages = 0;
735 117624 : vacrel->new_frozen_tuple_pages = 0;
736 117624 : vacrel->lpdead_item_pages = 0;
737 117624 : vacrel->missed_dead_pages = 0;
738 117624 : vacrel->nonempty_pages = 0;
739 : /* dead_items_alloc allocates vacrel->dead_items later on */
740 :
741 : /* Allocate/initialize output statistics state */
742 117624 : vacrel->new_rel_tuples = 0;
743 117624 : vacrel->new_live_tuples = 0;
744 117624 : vacrel->indstats = (IndexBulkDeleteResult **)
745 117624 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
746 :
747 : /* Initialize remaining counters (be tidy) */
748 117624 : vacrel->num_index_scans = 0;
749 117624 : vacrel->tuples_deleted = 0;
750 117624 : vacrel->tuples_frozen = 0;
751 117624 : vacrel->lpdead_items = 0;
752 117624 : vacrel->live_tuples = 0;
753 117624 : vacrel->recently_dead_tuples = 0;
754 117624 : vacrel->missed_dead_tuples = 0;
755 :
756 117624 : vacrel->vm_new_visible_pages = 0;
757 117624 : vacrel->vm_new_visible_frozen_pages = 0;
758 117624 : vacrel->vm_new_frozen_pages = 0;
759 117624 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
760 :
761 : /*
762 : * Get cutoffs that determine which deleted tuples are considered DEAD,
763 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
764 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
765 : * happen in this order to ensure that the OldestXmin cutoff field works
766 : * as an upper bound on the XIDs stored in the pages we'll actually scan
767 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
768 : *
769 : * Next acquire vistest, a related cutoff that's used in pruning. We use
770 : * vistest in combination with OldestXmin to ensure that
771 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
772 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
773 : * whether a tuple should be frozen or removed. (In the future we might
774 : * want to teach lazy_scan_prune to recompute vistest from time to time,
775 : * to increase the number of dead tuples it can prune away.)
776 : */
777 117624 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
778 117624 : vacrel->vistest = GlobalVisTestFor(rel);
779 : /* Initialize state used to track oldest extant XID/MXID */
780 117624 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
781 117624 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
782 :
783 : /*
784 : * Initialize state related to tracking all-visible page skipping. This is
785 : * very important to determine whether or not it is safe to advance the
786 : * relfrozenxid/relminmxid.
787 : */
788 117624 : vacrel->skippedallvis = false;
789 117624 : skipwithvm = true;
790 117624 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
791 : {
792 : /*
793 : * Force aggressive mode, and disable skipping blocks using the
794 : * visibility map (even those set all-frozen)
795 : */
796 298 : vacrel->aggressive = true;
797 298 : skipwithvm = false;
798 : }
799 :
800 117624 : vacrel->skipwithvm = skipwithvm;
801 :
802 : /*
803 : * Set up eager scan tracking state. This must happen after determining
804 : * whether or not the vacuum must be aggressive, because only normal
805 : * vacuums use the eager scan algorithm.
806 : */
807 117624 : heap_vacuum_eager_scan_setup(vacrel, params);
808 :
809 117624 : if (verbose)
810 : {
811 20 : if (vacrel->aggressive)
812 0 : ereport(INFO,
813 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
814 : vacrel->dbname, vacrel->relnamespace,
815 : vacrel->relname)));
816 : else
817 20 : ereport(INFO,
818 : (errmsg("vacuuming \"%s.%s.%s\"",
819 : vacrel->dbname, vacrel->relnamespace,
820 : vacrel->relname)));
821 : }
822 :
823 : /*
824 : * Allocate dead_items memory using dead_items_alloc. This handles
825 : * parallel VACUUM initialization as part of allocating shared memory
826 : * space used for dead_items. (But do a failsafe precheck first, to
827 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
828 : * is already dangerously old.)
829 : */
830 117624 : lazy_check_wraparound_failsafe(vacrel);
831 117624 : dead_items_alloc(vacrel, params->nworkers);
832 :
833 : /*
834 : * Call lazy_scan_heap to perform all required heap pruning, index
835 : * vacuuming, and heap vacuuming (plus related processing)
836 : */
837 117624 : lazy_scan_heap(vacrel);
838 :
839 : /*
840 : * Free resources managed by dead_items_alloc. This ends parallel mode in
841 : * passing when necessary.
842 : */
843 117624 : dead_items_cleanup(vacrel);
844 : Assert(!IsInParallelMode());
845 :
846 : /*
847 : * Update pg_class entries for each of rel's indexes where appropriate.
848 : *
849 : * Unlike the later update to rel's pg_class entry, this is not critical.
850 : * Maintains relpages/reltuples statistics used by the planner only.
851 : */
852 117624 : if (vacrel->do_index_cleanup)
853 98304 : update_relstats_all_indexes(vacrel);
854 :
855 : /* Done with rel's indexes */
856 117624 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
857 :
858 : /* Optionally truncate rel */
859 117624 : if (should_attempt_truncation(vacrel))
860 278 : lazy_truncate_heap(vacrel);
861 :
862 : /* Pop the error context stack */
863 117624 : error_context_stack = errcallback.previous;
864 :
865 : /* Report that we are now doing final cleanup */
866 117624 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
867 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
868 :
869 : /*
870 : * Prepare to update rel's pg_class entry.
871 : *
872 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
873 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
874 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
875 : */
876 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
877 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
878 : vacrel->cutoffs.relfrozenxid,
879 : vacrel->NewRelfrozenXid));
880 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
881 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
882 : vacrel->cutoffs.relminmxid,
883 : vacrel->NewRelminMxid));
884 117624 : if (vacrel->skippedallvis)
885 : {
886 : /*
887 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
888 : * chose to skip an all-visible page range. The state that tracks new
889 : * values will have missed unfrozen XIDs from the pages we skipped.
890 : */
891 : Assert(!vacrel->aggressive);
892 50 : vacrel->NewRelfrozenXid = InvalidTransactionId;
893 50 : vacrel->NewRelminMxid = InvalidMultiXactId;
894 : }
895 :
896 : /*
897 : * For safety, clamp relallvisible to be not more than what we're setting
898 : * pg_class.relpages to
899 : */
900 117624 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
901 117624 : visibilitymap_count(rel, &new_rel_allvisible, NULL);
902 117624 : if (new_rel_allvisible > new_rel_pages)
903 0 : new_rel_allvisible = new_rel_pages;
904 :
905 : /*
906 : * Now actually update rel's pg_class entry.
907 : *
908 : * In principle new_live_tuples could be -1 indicating that we (still)
909 : * don't know the tuple count. In practice that can't happen, since we
910 : * scan every page that isn't skipped using the visibility map.
911 : */
912 117624 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
913 117624 : new_rel_allvisible, vacrel->nindexes > 0,
914 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
915 : &frozenxid_updated, &minmulti_updated, false);
916 :
917 : /*
918 : * Report results to the cumulative stats system, too.
919 : *
920 : * Deliberately avoid telling the stats system about LP_DEAD items that
921 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
922 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
923 : * It seems like a good idea to err on the side of not vacuuming again too
924 : * soon in cases where the failsafe prevented significant amounts of heap
925 : * vacuuming.
926 : */
927 70526 : pgstat_report_vacuum(RelationGetRelid(rel),
928 117624 : rel->rd_rel->relisshared,
929 47098 : Max(vacrel->new_live_tuples, 0),
930 117624 : vacrel->recently_dead_tuples +
931 117624 : vacrel->missed_dead_tuples,
932 : starttime);
933 117624 : pgstat_progress_end_command();
934 :
935 117624 : if (instrument)
936 : {
937 96010 : TimestampTz endtime = GetCurrentTimestamp();
938 :
939 96040 : if (verbose || params->log_min_duration == 0 ||
940 30 : TimestampDifferenceExceeds(starttime, endtime,
941 : params->log_min_duration))
942 : {
943 : long secs_dur;
944 : int usecs_dur;
945 : WalUsage walusage;
946 : BufferUsage bufferusage;
947 : StringInfoData buf;
948 : char *msgfmt;
949 : int32 diff;
950 95980 : double read_rate = 0,
951 95980 : write_rate = 0;
952 : int64 total_blks_hit;
953 : int64 total_blks_read;
954 : int64 total_blks_dirtied;
955 :
956 95980 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
957 95980 : memset(&walusage, 0, sizeof(WalUsage));
958 95980 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
959 95980 : memset(&bufferusage, 0, sizeof(BufferUsage));
960 95980 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
961 :
962 95980 : total_blks_hit = bufferusage.shared_blks_hit +
963 95980 : bufferusage.local_blks_hit;
964 95980 : total_blks_read = bufferusage.shared_blks_read +
965 95980 : bufferusage.local_blks_read;
966 95980 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
967 95980 : bufferusage.local_blks_dirtied;
968 :
969 95980 : initStringInfo(&buf);
970 95980 : if (verbose)
971 : {
972 : /*
973 : * Aggressiveness already reported earlier, in dedicated
974 : * VACUUM VERBOSE ereport
975 : */
976 : Assert(!params->is_wraparound);
977 20 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
978 : }
979 95960 : else if (params->is_wraparound)
980 : {
981 : /*
982 : * While it's possible for a VACUUM to be both is_wraparound
983 : * and !aggressive, that's just a corner-case -- is_wraparound
984 : * implies aggressive. Produce distinct output for the corner
985 : * case all the same, just in case.
986 : */
987 95918 : if (vacrel->aggressive)
988 95902 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
989 : else
990 16 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
991 : }
992 : else
993 : {
994 42 : if (vacrel->aggressive)
995 16 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
996 : else
997 26 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
998 : }
999 95980 : appendStringInfo(&buf, msgfmt,
1000 : vacrel->dbname,
1001 : vacrel->relnamespace,
1002 : vacrel->relname,
1003 : vacrel->num_index_scans);
1004 134108 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1005 : vacrel->removed_pages,
1006 : new_rel_pages,
1007 : vacrel->scanned_pages,
1008 : orig_rel_pages == 0 ? 100.0 :
1009 38128 : 100.0 * vacrel->scanned_pages /
1010 : orig_rel_pages,
1011 : vacrel->eager_scanned_pages);
1012 95980 : appendStringInfo(&buf,
1013 95980 : _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
1014 95980 : (long long) vacrel->tuples_deleted,
1015 95980 : (long long) vacrel->new_rel_tuples,
1016 95980 : (long long) vacrel->recently_dead_tuples);
1017 95980 : if (vacrel->missed_dead_tuples > 0)
1018 0 : appendStringInfo(&buf,
1019 0 : _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
1020 0 : (long long) vacrel->missed_dead_tuples,
1021 : vacrel->missed_dead_pages);
1022 95980 : diff = (int32) (ReadNextTransactionId() -
1023 95980 : vacrel->cutoffs.OldestXmin);
1024 95980 : appendStringInfo(&buf,
1025 95980 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1026 : vacrel->cutoffs.OldestXmin, diff);
1027 95980 : if (frozenxid_updated)
1028 : {
1029 36448 : diff = (int32) (vacrel->NewRelfrozenXid -
1030 36448 : vacrel->cutoffs.relfrozenxid);
1031 36448 : appendStringInfo(&buf,
1032 36448 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1033 : vacrel->NewRelfrozenXid, diff);
1034 : }
1035 95980 : if (minmulti_updated)
1036 : {
1037 14 : diff = (int32) (vacrel->NewRelminMxid -
1038 14 : vacrel->cutoffs.relminmxid);
1039 14 : appendStringInfo(&buf,
1040 14 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1041 : vacrel->NewRelminMxid, diff);
1042 : }
1043 95980 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
1044 : vacrel->new_frozen_tuple_pages,
1045 : orig_rel_pages == 0 ? 100.0 :
1046 38128 : 100.0 * vacrel->new_frozen_tuple_pages /
1047 : orig_rel_pages,
1048 95980 : (long long) vacrel->tuples_frozen);
1049 :
1050 95980 : appendStringInfo(&buf,
1051 95980 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1052 : vacrel->vm_new_visible_pages,
1053 95980 : vacrel->vm_new_visible_frozen_pages +
1054 95980 : vacrel->vm_new_frozen_pages,
1055 : vacrel->vm_new_frozen_pages);
1056 95980 : if (vacrel->do_index_vacuuming)
1057 : {
1058 77130 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1059 77082 : appendStringInfoString(&buf, _("index scan not needed: "));
1060 : else
1061 48 : appendStringInfoString(&buf, _("index scan needed: "));
1062 :
1063 77130 : msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
1064 : }
1065 : else
1066 : {
1067 18850 : if (!VacuumFailsafeActive)
1068 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1069 : else
1070 18850 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1071 :
1072 18850 : msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
1073 : }
1074 95980 : appendStringInfo(&buf, msgfmt,
1075 : vacrel->lpdead_item_pages,
1076 : orig_rel_pages == 0 ? 100.0 :
1077 38128 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1078 95980 : (long long) vacrel->lpdead_items);
1079 240064 : for (int i = 0; i < vacrel->nindexes; i++)
1080 : {
1081 144084 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1082 :
1083 144084 : if (!istat)
1084 143994 : continue;
1085 :
1086 90 : appendStringInfo(&buf,
1087 90 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1088 90 : indnames[i],
1089 : istat->num_pages,
1090 : istat->pages_newly_deleted,
1091 : istat->pages_deleted,
1092 : istat->pages_free);
1093 : }
1094 95980 : if (track_cost_delay_timing)
1095 : {
1096 : /*
1097 : * We bypass the changecount mechanism because this value is
1098 : * only updated by the calling process. We also rely on the
1099 : * above call to pgstat_progress_end_command() to not clear
1100 : * the st_progress_param array.
1101 : */
1102 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1103 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1104 : }
1105 95980 : if (track_io_timing)
1106 : {
1107 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1108 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1109 :
1110 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1111 : read_ms, write_ms);
1112 : }
1113 95980 : if (secs_dur > 0 || usecs_dur > 0)
1114 : {
1115 95980 : read_rate = (double) BLCKSZ * total_blks_read /
1116 95980 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1117 95980 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1118 95980 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1119 : }
1120 95980 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1121 : read_rate, write_rate);
1122 95980 : appendStringInfo(&buf,
1123 95980 : _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
1124 : (long long) total_blks_hit,
1125 : (long long) total_blks_read,
1126 : (long long) total_blks_dirtied);
1127 95980 : appendStringInfo(&buf,
1128 95980 : _("WAL usage: %lld records, %lld full page images, %llu bytes, %lld buffers full\n"),
1129 95980 : (long long) walusage.wal_records,
1130 95980 : (long long) walusage.wal_fpi,
1131 95980 : (unsigned long long) walusage.wal_bytes,
1132 95980 : (long long) walusage.wal_buffers_full);
1133 95980 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1134 :
1135 95980 : ereport(verbose ? INFO : LOG,
1136 : (errmsg_internal("%s", buf.data)));
1137 95980 : pfree(buf.data);
1138 : }
1139 : }
1140 :
1141 : /* Cleanup index statistics and index names */
1142 292482 : for (int i = 0; i < vacrel->nindexes; i++)
1143 : {
1144 174858 : if (vacrel->indstats[i])
1145 2260 : pfree(vacrel->indstats[i]);
1146 :
1147 174858 : if (instrument)
1148 144156 : pfree(indnames[i]);
1149 : }
1150 117624 : }
1151 :
1152 : /*
1153 : * lazy_scan_heap() -- workhorse function for VACUUM
1154 : *
1155 : * This routine prunes each page in the heap, and considers the need to
1156 : * freeze remaining tuples with storage (not including pages that can be
1157 : * skipped using the visibility map). Also performs related maintenance
1158 : * of the FSM and visibility map. These steps all take place during an
1159 : * initial pass over the target heap relation.
1160 : *
1161 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1162 : * consists of deleting index tuples that point to LP_DEAD items left in
1163 : * heap pages following pruning. Earlier initial pass over the heap will
1164 : * have collected the TIDs whose index tuples need to be removed.
1165 : *
1166 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1167 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1168 : * as LP_UNUSED. This has to happen in a second, final pass over the
1169 : * heap, to preserve a basic invariant that all index AMs rely on: no
1170 : * extant index tuple can ever be allowed to contain a TID that points to
1171 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1172 : * recycling of line pointers to avoid index scans that get confused
1173 : * about which TID points to which tuple immediately after recycling.
1174 : * (Actually, this isn't a concern when target heap relation happens to
1175 : * have no indexes, which allows us to safely apply the one-pass strategy
1176 : * as an optimization).
1177 : *
1178 : * In practice we often have enough space to fit all TIDs, and so won't
1179 : * need to call lazy_vacuum more than once, after our initial pass over
1180 : * the heap has totally finished. Otherwise things are slightly more
1181 : * complicated: our "initial pass" over the heap applies only to those
1182 : * pages that were pruned before we needed to call lazy_vacuum, and our
1183 : * "final pass" over the heap only vacuums these same heap pages.
1184 : * However, we process indexes in full every time lazy_vacuum is called,
1185 : * which makes index processing very inefficient when memory is in short
1186 : * supply.
1187 : */
1188 : static void
1189 117624 : lazy_scan_heap(LVRelState *vacrel)
1190 : {
1191 : ReadStream *stream;
1192 117624 : BlockNumber rel_pages = vacrel->rel_pages,
1193 117624 : blkno = 0,
1194 117624 : next_fsm_block_to_vacuum = 0;
1195 117624 : BlockNumber orig_eager_scan_success_limit =
1196 : vacrel->eager_scan_remaining_successes; /* for logging */
1197 117624 : Buffer vmbuffer = InvalidBuffer;
1198 117624 : const int initprog_index[] = {
1199 : PROGRESS_VACUUM_PHASE,
1200 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1201 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1202 : };
1203 : int64 initprog_val[3];
1204 :
1205 : /* Report that we're scanning the heap, advertising total # of blocks */
1206 117624 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1207 117624 : initprog_val[1] = rel_pages;
1208 117624 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1209 117624 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1210 :
1211 : /* Initialize for the first heap_vac_scan_next_block() call */
1212 117624 : vacrel->current_block = InvalidBlockNumber;
1213 117624 : vacrel->next_unskippable_block = InvalidBlockNumber;
1214 117624 : vacrel->next_unskippable_allvis = false;
1215 117624 : vacrel->next_unskippable_eager_scanned = false;
1216 117624 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1217 :
1218 : /* Set up the read stream for vacuum's first pass through the heap */
1219 117624 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1220 : vacrel->bstrategy,
1221 : vacrel->rel,
1222 : MAIN_FORKNUM,
1223 : heap_vac_scan_next_block,
1224 : vacrel,
1225 : sizeof(uint8));
1226 :
1227 : while (true)
1228 759210 : {
1229 : Buffer buf;
1230 : Page page;
1231 876834 : uint8 blk_info = 0;
1232 : bool has_lpdead_items;
1233 876834 : void *per_buffer_data = NULL;
1234 876834 : bool vm_page_frozen = false;
1235 876834 : bool got_cleanup_lock = false;
1236 :
1237 876834 : vacuum_delay_point(false);
1238 :
1239 : /*
1240 : * Regularly check if wraparound failsafe should trigger.
1241 : *
1242 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1243 : * relfrozenxid might start to look dangerously old before we reach
1244 : * that point. This check also provides failsafe coverage for the
1245 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1246 : * param set to 'off'.
1247 : */
1248 876834 : if (vacrel->scanned_pages > 0 &&
1249 759210 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1250 0 : lazy_check_wraparound_failsafe(vacrel);
1251 :
1252 : /*
1253 : * Consider if we definitely have enough space to process TIDs on page
1254 : * already. If we are close to overrunning the available space for
1255 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1256 : * this page.
1257 : */
1258 876834 : if (TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1259 : {
1260 : /*
1261 : * Before beginning index vacuuming, we release any pin we may
1262 : * hold on the visibility map page. This isn't necessary for
1263 : * correctness, but we do it anyway to avoid holding the pin
1264 : * across a lengthy, unrelated operation.
1265 : */
1266 0 : if (BufferIsValid(vmbuffer))
1267 : {
1268 0 : ReleaseBuffer(vmbuffer);
1269 0 : vmbuffer = InvalidBuffer;
1270 : }
1271 :
1272 : /* Perform a round of index and heap vacuuming */
1273 0 : vacrel->consider_bypass_optimization = false;
1274 0 : lazy_vacuum(vacrel);
1275 :
1276 : /*
1277 : * Vacuum the Free Space Map to make newly-freed space visible on
1278 : * upper-level FSM pages. Note that blkno is the previously
1279 : * processed block.
1280 : */
1281 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1282 : blkno + 1);
1283 0 : next_fsm_block_to_vacuum = blkno;
1284 :
1285 : /* Report that we are once again scanning the heap */
1286 0 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1287 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1288 : }
1289 :
1290 876834 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1291 :
1292 : /* The relation is exhausted. */
1293 876834 : if (!BufferIsValid(buf))
1294 117624 : break;
1295 :
1296 759210 : blk_info = *((uint8 *) per_buffer_data);
1297 759210 : CheckBufferIsPinnedOnce(buf);
1298 759210 : page = BufferGetPage(buf);
1299 759210 : blkno = BufferGetBlockNumber(buf);
1300 :
1301 759210 : vacrel->scanned_pages++;
1302 759210 : if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1303 0 : vacrel->eager_scanned_pages++;
1304 :
1305 : /* Report as block scanned, update error traceback information */
1306 759210 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1307 759210 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1308 : blkno, InvalidOffsetNumber);
1309 :
1310 : /*
1311 : * Pin the visibility map page in case we need to mark the page
1312 : * all-visible. In most cases this will be very cheap, because we'll
1313 : * already have the correct page pinned anyway.
1314 : */
1315 759210 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1316 :
1317 : /*
1318 : * We need a buffer cleanup lock to prune HOT chains and defragment
1319 : * the page in lazy_scan_prune. But when it's not possible to acquire
1320 : * a cleanup lock right away, we may be able to settle for reduced
1321 : * processing using lazy_scan_noprune.
1322 : */
1323 759210 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1324 :
1325 759210 : if (!got_cleanup_lock)
1326 10 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1327 :
1328 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1329 759210 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1330 759210 : vmbuffer))
1331 : {
1332 : /* Processed as new/empty page (lock and pin released) */
1333 1212 : continue;
1334 : }
1335 :
1336 : /*
1337 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1338 : * items in the dead_items area for later vacuuming, count live and
1339 : * recently dead tuples for vacuum logging, and determine if this
1340 : * block could later be truncated. If we encounter any xid/mxids that
1341 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1342 : * for a cleanup lock and call lazy_scan_prune().
1343 : */
1344 757998 : if (!got_cleanup_lock &&
1345 10 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1346 : {
1347 : /*
1348 : * lazy_scan_noprune could not do all required processing. Wait
1349 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1350 : */
1351 : Assert(vacrel->aggressive);
1352 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1353 0 : LockBufferForCleanup(buf);
1354 0 : got_cleanup_lock = true;
1355 : }
1356 :
1357 : /*
1358 : * If we have a cleanup lock, we must now prune, freeze, and count
1359 : * tuples. We may have acquired the cleanup lock originally, or we may
1360 : * have gone back and acquired it after lazy_scan_noprune() returned
1361 : * false. Either way, the page hasn't been processed yet.
1362 : *
1363 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1364 : * recently_dead_tuples and live tuples for vacuum logging, determine
1365 : * if the block can later be truncated, and accumulate the details of
1366 : * remaining LP_DEAD line pointers on the page into dead_items. These
1367 : * dead items include those pruned by lazy_scan_prune() as well as
1368 : * line pointers previously marked LP_DEAD.
1369 : */
1370 757998 : if (got_cleanup_lock)
1371 757988 : lazy_scan_prune(vacrel, buf, blkno, page,
1372 : vmbuffer,
1373 757988 : blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
1374 : &has_lpdead_items, &vm_page_frozen);
1375 :
1376 : /*
1377 : * Count an eagerly scanned page as a failure or a success.
1378 : *
1379 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1380 : * cleanup lock, we won't have frozen the page. However, we only count
1381 : * pages that were too new to require freezing as eager freeze
1382 : * failures.
1383 : *
1384 : * We could gather more information from lazy_scan_noprune() about
1385 : * whether or not there were tuples with XIDs or MXIDs older than the
1386 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1387 : * exclude pages skipped due to cleanup lock contention from eager
1388 : * freeze algorithm caps.
1389 : */
1390 757998 : if (got_cleanup_lock &&
1391 757988 : (blk_info & VAC_BLK_WAS_EAGER_SCANNED))
1392 : {
1393 : /* Aggressive vacuums do not eager scan. */
1394 : Assert(!vacrel->aggressive);
1395 :
1396 0 : if (vm_page_frozen)
1397 : {
1398 : Assert(vacrel->eager_scan_remaining_successes > 0);
1399 0 : vacrel->eager_scan_remaining_successes--;
1400 :
1401 0 : if (vacrel->eager_scan_remaining_successes == 0)
1402 : {
1403 : /*
1404 : * If we hit our success cap, permanently disable eager
1405 : * scanning by setting the other eager scan management
1406 : * fields to their disabled values.
1407 : */
1408 0 : vacrel->eager_scan_remaining_fails = 0;
1409 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1410 0 : vacrel->eager_scan_max_fails_per_region = 0;
1411 :
1412 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1413 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"",
1414 : orig_eager_scan_success_limit,
1415 : vacrel->dbname, vacrel->relnamespace,
1416 : vacrel->relname)));
1417 : }
1418 : }
1419 : else
1420 : {
1421 : Assert(vacrel->eager_scan_remaining_fails > 0);
1422 0 : vacrel->eager_scan_remaining_fails--;
1423 : }
1424 : }
1425 :
1426 : /*
1427 : * Now drop the buffer lock and, potentially, update the FSM.
1428 : *
1429 : * Our goal is to update the freespace map the last time we touch the
1430 : * page. If we'll process a block in the second pass, we may free up
1431 : * additional space on the page, so it is better to update the FSM
1432 : * after the second pass. If the relation has no indexes, or if index
1433 : * vacuuming is disabled, there will be no second heap pass; if this
1434 : * particular page has no dead items, the second heap pass will not
1435 : * touch this page. So, in those cases, update the FSM now.
1436 : *
1437 : * Note: In corner cases, it's possible to miss updating the FSM
1438 : * entirely. If index vacuuming is currently enabled, we'll skip the
1439 : * FSM update now. But if failsafe mode is later activated, or there
1440 : * are so few dead tuples that index vacuuming is bypassed, there will
1441 : * also be no opportunity to update the FSM later, because we'll never
1442 : * revisit this page. Since updating the FSM is desirable but not
1443 : * absolutely required, that's OK.
1444 : */
1445 757998 : if (vacrel->nindexes == 0
1446 737452 : || !vacrel->do_index_vacuuming
1447 564974 : || !has_lpdead_items)
1448 736304 : {
1449 736304 : Size freespace = PageGetHeapFreeSpace(page);
1450 :
1451 736304 : UnlockReleaseBuffer(buf);
1452 736304 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1453 :
1454 : /*
1455 : * Periodically perform FSM vacuuming to make newly-freed space
1456 : * visible on upper FSM pages. This is done after vacuuming if the
1457 : * table has indexes. There will only be newly-freed space if we
1458 : * held the cleanup lock and lazy_scan_prune() was called.
1459 : */
1460 736304 : if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1461 0 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1462 : {
1463 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1464 : blkno);
1465 0 : next_fsm_block_to_vacuum = blkno;
1466 : }
1467 : }
1468 : else
1469 21694 : UnlockReleaseBuffer(buf);
1470 : }
1471 :
1472 117624 : vacrel->blkno = InvalidBlockNumber;
1473 117624 : if (BufferIsValid(vmbuffer))
1474 47240 : ReleaseBuffer(vmbuffer);
1475 :
1476 : /*
1477 : * Report that everything is now scanned. We never skip scanning the last
1478 : * block in the relation, so we can pass rel_pages here.
1479 : */
1480 117624 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1481 : rel_pages);
1482 :
1483 : /* now we can compute the new value for pg_class.reltuples */
1484 235248 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1485 : vacrel->scanned_pages,
1486 117624 : vacrel->live_tuples);
1487 :
1488 : /*
1489 : * Also compute the total number of surviving heap entries. In the
1490 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1491 : */
1492 117624 : vacrel->new_rel_tuples =
1493 117624 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1494 117624 : vacrel->missed_dead_tuples;
1495 :
1496 117624 : read_stream_end(stream);
1497 :
1498 : /*
1499 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1500 : * related heap vacuuming
1501 : */
1502 117624 : if (vacrel->dead_items_info->num_items > 0)
1503 1048 : lazy_vacuum(vacrel);
1504 :
1505 : /*
1506 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1507 : * not there were indexes, and whether or not we bypassed index vacuuming.
1508 : * We can pass rel_pages here because we never skip scanning the last
1509 : * block of the relation.
1510 : */
1511 117624 : if (rel_pages > next_fsm_block_to_vacuum)
1512 47240 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1513 :
1514 : /* report all blocks vacuumed */
1515 117624 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1516 :
1517 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1518 117624 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1519 93380 : lazy_cleanup_all_indexes(vacrel);
1520 117624 : }
1521 :
1522 : /*
1523 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1524 : * for vacuum to process
1525 : *
1526 : * Every time lazy_scan_heap() needs a new block to process during its first
1527 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1528 : * heap_vac_scan_next_block() to get the next block.
1529 : *
1530 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1531 : * various thresholds to skip blocks which do not need to be processed and
1532 : * returns the next block to process or InvalidBlockNumber if there are no
1533 : * remaining blocks.
1534 : *
1535 : * The visibility status of the next block to process and whether or not it
1536 : * was eager scanned is set in the per_buffer_data.
1537 : *
1538 : * callback_private_data contains a reference to the LVRelState, passed to the
1539 : * read stream API during stream setup. The LVRelState is an in/out parameter
1540 : * here (locally named `vacrel`). Vacuum options and information about the
1541 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1542 : * that's all-visible but not all-frozen (to ensure that we don't update
1543 : * relfrozenxid in that case). vacrel also holds information about the next
1544 : * unskippable block -- as bookkeeping for this function.
1545 : */
1546 : static BlockNumber
1547 876834 : heap_vac_scan_next_block(ReadStream *stream,
1548 : void *callback_private_data,
1549 : void *per_buffer_data)
1550 : {
1551 : BlockNumber next_block;
1552 876834 : LVRelState *vacrel = callback_private_data;
1553 876834 : uint8 blk_info = 0;
1554 :
1555 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1556 876834 : next_block = vacrel->current_block + 1;
1557 :
1558 : /* Have we reached the end of the relation? */
1559 876834 : if (next_block >= vacrel->rel_pages)
1560 : {
1561 117624 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1562 : {
1563 44588 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1564 44588 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1565 : }
1566 117624 : return InvalidBlockNumber;
1567 : }
1568 :
1569 : /*
1570 : * We must be in one of the three following states:
1571 : */
1572 759210 : if (next_block > vacrel->next_unskippable_block ||
1573 188074 : vacrel->next_unskippable_block == InvalidBlockNumber)
1574 : {
1575 : /*
1576 : * 1. We have just processed an unskippable block (or we're at the
1577 : * beginning of the scan). Find the next unskippable block using the
1578 : * visibility map.
1579 : */
1580 : bool skipsallvis;
1581 :
1582 618376 : find_next_unskippable_block(vacrel, &skipsallvis);
1583 :
1584 : /*
1585 : * We now know the next block that we must process. It can be the
1586 : * next block after the one we just processed, or something further
1587 : * ahead. If it's further ahead, we can jump to it, but we choose to
1588 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1589 : * pages. Since we're reading sequentially, the OS should be doing
1590 : * readahead for us, so there's no gain in skipping a page now and
1591 : * then. Skipping such a range might even discourage sequential
1592 : * detection.
1593 : *
1594 : * This test also enables more frequent relfrozenxid advancement
1595 : * during non-aggressive VACUUMs. If the range has any all-visible
1596 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1597 : * real downside.
1598 : */
1599 618376 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1600 : {
1601 4052 : next_block = vacrel->next_unskippable_block;
1602 4052 : if (skipsallvis)
1603 50 : vacrel->skippedallvis = true;
1604 : }
1605 : }
1606 :
1607 : /* Now we must be in one of the two remaining states: */
1608 759210 : if (next_block < vacrel->next_unskippable_block)
1609 : {
1610 : /*
1611 : * 2. We are processing a range of blocks that we could have skipped
1612 : * but chose not to. We know that they are all-visible in the VM,
1613 : * otherwise they would've been unskippable.
1614 : */
1615 140834 : vacrel->current_block = next_block;
1616 140834 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1617 140834 : *((uint8 *) per_buffer_data) = blk_info;
1618 140834 : return vacrel->current_block;
1619 : }
1620 : else
1621 : {
1622 : /*
1623 : * 3. We reached the next unskippable block. Process it. On next
1624 : * iteration, we will be back in state 1.
1625 : */
1626 : Assert(next_block == vacrel->next_unskippable_block);
1627 :
1628 618376 : vacrel->current_block = next_block;
1629 618376 : if (vacrel->next_unskippable_allvis)
1630 40580 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1631 618376 : if (vacrel->next_unskippable_eager_scanned)
1632 0 : blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1633 618376 : *((uint8 *) per_buffer_data) = blk_info;
1634 618376 : return vacrel->current_block;
1635 : }
1636 : }
1637 :
1638 : /*
1639 : * Find the next unskippable block in a vacuum scan using the visibility map.
1640 : * The next unskippable block and its visibility information is updated in
1641 : * vacrel.
1642 : *
1643 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1644 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1645 : * was concurrently cleared, though. All that matters is that caller scan all
1646 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1647 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1648 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1649 : * to skip such a range is actually made, making everything safe.)
1650 : */
1651 : static void
1652 618376 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1653 : {
1654 618376 : BlockNumber rel_pages = vacrel->rel_pages;
1655 618376 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1656 618376 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1657 618376 : bool next_unskippable_eager_scanned = false;
1658 : bool next_unskippable_allvis;
1659 :
1660 618376 : *skipsallvis = false;
1661 :
1662 451026 : for (;; next_unskippable_block++)
1663 451026 : {
1664 1069402 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1665 : next_unskippable_block,
1666 : &next_unskippable_vmbuffer);
1667 :
1668 1069402 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1669 :
1670 : /*
1671 : * At the start of each eager scan region, normal vacuums with eager
1672 : * scanning enabled reset the failure counter, allowing vacuum to
1673 : * resume eager scanning if it had been suspended in the previous
1674 : * region.
1675 : */
1676 1069402 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1677 : {
1678 0 : vacrel->eager_scan_remaining_fails =
1679 0 : vacrel->eager_scan_max_fails_per_region;
1680 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1681 : }
1682 :
1683 : /*
1684 : * A block is unskippable if it is not all visible according to the
1685 : * visibility map.
1686 : */
1687 1069402 : if (!next_unskippable_allvis)
1688 : {
1689 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1690 577796 : break;
1691 : }
1692 :
1693 : /*
1694 : * Caller must scan the last page to determine whether it has tuples
1695 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1696 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1697 : * lock on rel to attempt a truncation that fails anyway, just because
1698 : * there are tuples on the last page (it is likely that there will be
1699 : * tuples on other nearby pages as well, but those can be skipped).
1700 : *
1701 : * Implement this by always treating the last block as unsafe to skip.
1702 : */
1703 491606 : if (next_unskippable_block == rel_pages - 1)
1704 39834 : break;
1705 :
1706 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1707 451772 : if (!vacrel->skipwithvm)
1708 746 : break;
1709 :
1710 : /*
1711 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1712 : * already frozen by now), so this page can be skipped.
1713 : */
1714 451026 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1715 446168 : continue;
1716 :
1717 : /*
1718 : * Aggressive vacuums cannot skip any all-visible pages that are not
1719 : * also all-frozen.
1720 : */
1721 4858 : if (vacrel->aggressive)
1722 0 : break;
1723 :
1724 : /*
1725 : * Normal vacuums with eager scanning enabled only skip all-visible
1726 : * but not all-frozen pages if they have hit the failure limit for the
1727 : * current eager scan region.
1728 : */
1729 4858 : if (vacrel->eager_scan_remaining_fails > 0)
1730 : {
1731 0 : next_unskippable_eager_scanned = true;
1732 0 : break;
1733 : }
1734 :
1735 : /*
1736 : * All-visible blocks are safe to skip in a normal vacuum. But
1737 : * remember that the final range contains such a block for later.
1738 : */
1739 4858 : *skipsallvis = true;
1740 : }
1741 :
1742 : /* write the local variables back to vacrel */
1743 618376 : vacrel->next_unskippable_block = next_unskippable_block;
1744 618376 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1745 618376 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1746 618376 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1747 618376 : }
1748 :
1749 : /*
1750 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1751 : *
1752 : * Must call here to handle both new and empty pages before calling
1753 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1754 : * with new or empty pages.
1755 : *
1756 : * It's necessary to consider new pages as a special case, since the rules for
1757 : * maintaining the visibility map and FSM with empty pages are a little
1758 : * different (though new pages can be truncated away during rel truncation).
1759 : *
1760 : * Empty pages are not really a special case -- they're just heap pages that
1761 : * have no allocated tuples (including even LP_UNUSED items). You might
1762 : * wonder why we need to handle them here all the same. It's only necessary
1763 : * because of a corner-case involving a hard crash during heap relation
1764 : * extension. If we ever make relation-extension crash safe, then it should
1765 : * no longer be necessary to deal with empty pages here (or new pages, for
1766 : * that matter).
1767 : *
1768 : * Caller must hold at least a shared lock. We might need to escalate the
1769 : * lock in that case, so the type of lock caller holds needs to be specified
1770 : * using 'sharelock' argument.
1771 : *
1772 : * Returns false in common case where caller should go on to call
1773 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1774 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1775 : * behalf.
1776 : *
1777 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1778 : * is passed here because neither empty nor new pages can be eagerly frozen.
1779 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1780 : * the same time that they are set all-visible, and we don't eagerly scan
1781 : * frozen pages.
1782 : */
1783 : static bool
1784 759210 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1785 : Page page, bool sharelock, Buffer vmbuffer)
1786 : {
1787 : Size freespace;
1788 :
1789 759210 : if (PageIsNew(page))
1790 : {
1791 : /*
1792 : * All-zeroes pages can be left over if either a backend extends the
1793 : * relation by a single page, but crashes before the newly initialized
1794 : * page has been written out, or when bulk-extending the relation
1795 : * (which creates a number of empty pages at the tail end of the
1796 : * relation), and then enters them into the FSM.
1797 : *
1798 : * Note we do not enter the page into the visibilitymap. That has the
1799 : * downside that we repeatedly visit this page in subsequent vacuums,
1800 : * but otherwise we'll never discover the space on a promoted standby.
1801 : * The harm of repeated checking ought to normally not be too bad. The
1802 : * space usually should be used at some point, otherwise there
1803 : * wouldn't be any regular vacuums.
1804 : *
1805 : * Make sure these pages are in the FSM, to ensure they can be reused.
1806 : * Do that by testing if there's any space recorded for the page. If
1807 : * not, enter it. We do so after releasing the lock on the heap page,
1808 : * the FSM is approximate, after all.
1809 : */
1810 1170 : UnlockReleaseBuffer(buf);
1811 :
1812 1170 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1813 : {
1814 858 : freespace = BLCKSZ - SizeOfPageHeaderData;
1815 :
1816 858 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1817 : }
1818 :
1819 1170 : return true;
1820 : }
1821 :
1822 758040 : if (PageIsEmpty(page))
1823 : {
1824 : /*
1825 : * It seems likely that caller will always be able to get a cleanup
1826 : * lock on an empty page. But don't take any chances -- escalate to
1827 : * an exclusive lock (still don't need a cleanup lock, though).
1828 : */
1829 42 : if (sharelock)
1830 : {
1831 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1832 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1833 :
1834 0 : if (!PageIsEmpty(page))
1835 : {
1836 : /* page isn't new or empty -- keep lock and pin for now */
1837 0 : return false;
1838 : }
1839 : }
1840 : else
1841 : {
1842 : /* Already have a full cleanup lock (which is more than enough) */
1843 : }
1844 :
1845 : /*
1846 : * Unlike new pages, empty pages are always set all-visible and
1847 : * all-frozen.
1848 : */
1849 42 : if (!PageIsAllVisible(page))
1850 : {
1851 : uint8 old_vmbits;
1852 :
1853 0 : START_CRIT_SECTION();
1854 :
1855 : /* mark buffer dirty before writing a WAL record */
1856 0 : MarkBufferDirty(buf);
1857 :
1858 : /*
1859 : * It's possible that another backend has extended the heap,
1860 : * initialized the page, and then failed to WAL-log the page due
1861 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1862 : * might try to replay our record setting the page all-visible and
1863 : * find that the page isn't initialized, which will cause a PANIC.
1864 : * To prevent that, check whether the page has been previously
1865 : * WAL-logged, and if not, do that now.
1866 : */
1867 0 : if (RelationNeedsWAL(vacrel->rel) &&
1868 0 : PageGetLSN(page) == InvalidXLogRecPtr)
1869 0 : log_newpage_buffer(buf, true);
1870 :
1871 0 : PageSetAllVisible(page);
1872 0 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1873 : InvalidXLogRecPtr,
1874 : vmbuffer, InvalidTransactionId,
1875 : VISIBILITYMAP_ALL_VISIBLE |
1876 : VISIBILITYMAP_ALL_FROZEN);
1877 0 : END_CRIT_SECTION();
1878 :
1879 : /*
1880 : * If the page wasn't already set all-visible and/or all-frozen in
1881 : * the VM, count it as newly set for logging.
1882 : */
1883 0 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1884 : {
1885 0 : vacrel->vm_new_visible_pages++;
1886 0 : vacrel->vm_new_visible_frozen_pages++;
1887 : }
1888 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1889 0 : vacrel->vm_new_frozen_pages++;
1890 : }
1891 :
1892 42 : freespace = PageGetHeapFreeSpace(page);
1893 42 : UnlockReleaseBuffer(buf);
1894 42 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1895 42 : return true;
1896 : }
1897 :
1898 : /* page isn't new or empty -- keep lock and pin */
1899 757998 : return false;
1900 : }
1901 :
1902 : /* qsort comparator for sorting OffsetNumbers */
1903 : static int
1904 5360310 : cmpOffsetNumbers(const void *a, const void *b)
1905 : {
1906 5360310 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1907 : }
1908 :
1909 : /*
1910 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1911 : *
1912 : * Caller must hold pin and buffer cleanup lock on the buffer.
1913 : *
1914 : * vmbuffer is the buffer containing the VM block with visibility information
1915 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1916 : * visibility status of the heap block looked up earlier by the caller. We
1917 : * won't rely entirely on this status, as it may be out of date.
1918 : *
1919 : * *has_lpdead_items is set to true or false depending on whether, upon return
1920 : * from this function, any LP_DEAD items are still present on the page.
1921 : *
1922 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
1923 : * VM. The caller currently only uses this for determining whether an eagerly
1924 : * scanned page was successfully set all-frozen.
1925 : */
1926 : static void
1927 757988 : lazy_scan_prune(LVRelState *vacrel,
1928 : Buffer buf,
1929 : BlockNumber blkno,
1930 : Page page,
1931 : Buffer vmbuffer,
1932 : bool all_visible_according_to_vm,
1933 : bool *has_lpdead_items,
1934 : bool *vm_page_frozen)
1935 : {
1936 757988 : Relation rel = vacrel->rel;
1937 : PruneFreezeResult presult;
1938 757988 : int prune_options = 0;
1939 :
1940 : Assert(BufferGetBlockNumber(buf) == blkno);
1941 :
1942 : /*
1943 : * Prune all HOT-update chains and potentially freeze tuples on this page.
1944 : *
1945 : * If the relation has no indexes, we can immediately mark would-be dead
1946 : * items LP_UNUSED.
1947 : *
1948 : * The number of tuples removed from the page is returned in
1949 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
1950 : * presult.lpdead_items's final value can be thought of as the number of
1951 : * tuples that were deleted from indexes.
1952 : *
1953 : * We will update the VM after collecting LP_DEAD items and freezing
1954 : * tuples. Pruning will have determined whether or not the page is
1955 : * all-visible.
1956 : */
1957 757988 : prune_options = HEAP_PAGE_PRUNE_FREEZE;
1958 757988 : if (vacrel->nindexes == 0)
1959 20546 : prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1960 :
1961 757988 : heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1962 : &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1963 : &vacrel->offnum,
1964 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1965 :
1966 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
1967 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
1968 :
1969 757988 : if (presult.nfrozen > 0)
1970 : {
1971 : /*
1972 : * We don't increment the new_frozen_tuple_pages instrumentation
1973 : * counter when nfrozen == 0, since it only counts pages with newly
1974 : * frozen tuples (don't confuse that with pages newly set all-frozen
1975 : * in VM).
1976 : */
1977 32786 : vacrel->new_frozen_tuple_pages++;
1978 : }
1979 :
1980 : /*
1981 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1982 : * the heap to determine all_visible and all_frozen for the page -- this
1983 : * is a specialized version of the logic from this function. Now that
1984 : * we've finished pruning and freezing, make sure that we're in total
1985 : * agreement with heap_page_is_all_visible() using an assertion.
1986 : */
1987 : #ifdef USE_ASSERT_CHECKING
1988 : /* Note that all_frozen value does not matter when !all_visible */
1989 : if (presult.all_visible)
1990 : {
1991 : TransactionId debug_cutoff;
1992 : bool debug_all_frozen;
1993 :
1994 : Assert(presult.lpdead_items == 0);
1995 :
1996 : if (!heap_page_is_all_visible(vacrel, buf,
1997 : &debug_cutoff, &debug_all_frozen))
1998 : Assert(false);
1999 :
2000 : Assert(presult.all_frozen == debug_all_frozen);
2001 :
2002 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2003 : debug_cutoff == presult.vm_conflict_horizon);
2004 : }
2005 : #endif
2006 :
2007 : /*
2008 : * Now save details of the LP_DEAD items from the page in vacrel
2009 : */
2010 757988 : if (presult.lpdead_items > 0)
2011 : {
2012 26296 : vacrel->lpdead_item_pages++;
2013 :
2014 : /*
2015 : * deadoffsets are collected incrementally in
2016 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2017 : * with an indeterminate order, but dead_items_add requires them to be
2018 : * sorted.
2019 : */
2020 26296 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2021 : cmpOffsetNumbers);
2022 :
2023 26296 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2024 : }
2025 :
2026 : /* Finally, add page-local counts to whole-VACUUM counts */
2027 757988 : vacrel->tuples_deleted += presult.ndeleted;
2028 757988 : vacrel->tuples_frozen += presult.nfrozen;
2029 757988 : vacrel->lpdead_items += presult.lpdead_items;
2030 757988 : vacrel->live_tuples += presult.live_tuples;
2031 757988 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2032 :
2033 : /* Can't truncate this page */
2034 757988 : if (presult.hastup)
2035 744544 : vacrel->nonempty_pages = blkno + 1;
2036 :
2037 : /* Did we find LP_DEAD items? */
2038 757988 : *has_lpdead_items = (presult.lpdead_items > 0);
2039 :
2040 : Assert(!presult.all_visible || !(*has_lpdead_items));
2041 :
2042 : /*
2043 : * Handle setting visibility map bit based on information from the VM (as
2044 : * of last heap_vac_scan_next_block() call), and from all_visible and
2045 : * all_frozen variables
2046 : */
2047 757988 : if (!all_visible_according_to_vm && presult.all_visible)
2048 56846 : {
2049 : uint8 old_vmbits;
2050 56846 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2051 :
2052 56846 : if (presult.all_frozen)
2053 : {
2054 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2055 40730 : flags |= VISIBILITYMAP_ALL_FROZEN;
2056 : }
2057 :
2058 : /*
2059 : * It should never be the case that the visibility map page is set
2060 : * while the page-level bit is clear, but the reverse is allowed (if
2061 : * checksums are not enabled). Regardless, set both bits so that we
2062 : * get back in sync.
2063 : *
2064 : * NB: If the heap page is all-visible but the VM bit is not set, we
2065 : * don't need to dirty the heap page. However, if checksums are
2066 : * enabled, we do need to make sure that the heap page is dirtied
2067 : * before passing it to visibilitymap_set(), because it may be logged.
2068 : * Given that this situation should only happen in rare cases after a
2069 : * crash, it is not worth optimizing.
2070 : */
2071 56846 : PageSetAllVisible(page);
2072 56846 : MarkBufferDirty(buf);
2073 56846 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2074 : InvalidXLogRecPtr,
2075 : vmbuffer, presult.vm_conflict_horizon,
2076 : flags);
2077 :
2078 : /*
2079 : * If the page wasn't already set all-visible and/or all-frozen in the
2080 : * VM, count it as newly set for logging.
2081 : */
2082 56846 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2083 : {
2084 56846 : vacrel->vm_new_visible_pages++;
2085 56846 : if (presult.all_frozen)
2086 : {
2087 40730 : vacrel->vm_new_visible_frozen_pages++;
2088 40730 : *vm_page_frozen = true;
2089 : }
2090 : }
2091 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2092 0 : presult.all_frozen)
2093 : {
2094 0 : vacrel->vm_new_frozen_pages++;
2095 0 : *vm_page_frozen = true;
2096 : }
2097 : }
2098 :
2099 : /*
2100 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
2101 : * page-level bit is clear. However, it's possible that the bit got
2102 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
2103 : * with buffer lock before concluding that the VM is corrupt.
2104 : */
2105 701142 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
2106 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
2107 : {
2108 0 : elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2109 : vacrel->relname, blkno);
2110 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2111 : VISIBILITYMAP_VALID_BITS);
2112 : }
2113 :
2114 : /*
2115 : * It's possible for the value returned by
2116 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2117 : * wrong for us to see tuples that appear to not be visible to everyone
2118 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2119 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2120 : * conservative and sometimes returns a value that's unnecessarily small,
2121 : * so if we see that contradiction it just means that the tuples that we
2122 : * think are not visible to everyone yet actually are, and the
2123 : * PD_ALL_VISIBLE flag is correct.
2124 : *
2125 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2126 : * however.
2127 : */
2128 701142 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
2129 : {
2130 0 : elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2131 : vacrel->relname, blkno);
2132 0 : PageClearAllVisible(page);
2133 0 : MarkBufferDirty(buf);
2134 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2135 : VISIBILITYMAP_VALID_BITS);
2136 : }
2137 :
2138 : /*
2139 : * If the all-visible page is all-frozen but not marked as such yet, mark
2140 : * it as all-frozen. Note that all_frozen is only valid if all_visible is
2141 : * true, so we must check both all_visible and all_frozen.
2142 : */
2143 701142 : else if (all_visible_according_to_vm && presult.all_visible &&
2144 181370 : presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
2145 : {
2146 : uint8 old_vmbits;
2147 :
2148 : /*
2149 : * Avoid relying on all_visible_according_to_vm as a proxy for the
2150 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
2151 : * stale -- even when all_visible is set
2152 : */
2153 26 : if (!PageIsAllVisible(page))
2154 : {
2155 0 : PageSetAllVisible(page);
2156 0 : MarkBufferDirty(buf);
2157 : }
2158 :
2159 : /*
2160 : * Set the page all-frozen (and all-visible) in the VM.
2161 : *
2162 : * We can pass InvalidTransactionId as our cutoff_xid, since a
2163 : * snapshotConflictHorizon sufficient to make everything safe for REDO
2164 : * was logged when the page's tuples were frozen.
2165 : */
2166 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2167 26 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2168 : InvalidXLogRecPtr,
2169 : vmbuffer, InvalidTransactionId,
2170 : VISIBILITYMAP_ALL_VISIBLE |
2171 : VISIBILITYMAP_ALL_FROZEN);
2172 :
2173 : /*
2174 : * The page was likely already set all-visible in the VM. However,
2175 : * there is a small chance that it was modified sometime between
2176 : * setting all_visible_according_to_vm and checking the visibility
2177 : * during pruning. Check the return value of old_vmbits anyway to
2178 : * ensure the visibility map counters used for logging are accurate.
2179 : */
2180 26 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2181 : {
2182 0 : vacrel->vm_new_visible_pages++;
2183 0 : vacrel->vm_new_visible_frozen_pages++;
2184 0 : *vm_page_frozen = true;
2185 : }
2186 :
2187 : /*
2188 : * We already checked that the page was not set all-frozen in the VM
2189 : * above, so we don't need to test the value of old_vmbits.
2190 : */
2191 : else
2192 : {
2193 26 : vacrel->vm_new_frozen_pages++;
2194 26 : *vm_page_frozen = true;
2195 : }
2196 : }
2197 757988 : }
2198 :
2199 : /*
2200 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2201 : *
2202 : * Caller need only hold a pin and share lock on the buffer, unlike
2203 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2204 : * performed here, it's quite possible that an earlier opportunistic pruning
2205 : * operation left LP_DEAD items behind. We'll at least collect any such items
2206 : * in dead_items for removal from indexes.
2207 : *
2208 : * For aggressive VACUUM callers, we may return false to indicate that a full
2209 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2210 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2211 : * one or more tuples on the page. We always return true for non-aggressive
2212 : * callers.
2213 : *
2214 : * If this function returns true, *has_lpdead_items gets set to true or false
2215 : * depending on whether, upon return from this function, any LP_DEAD items are
2216 : * present on the page. If this function returns false, *has_lpdead_items
2217 : * is not updated.
2218 : */
2219 : static bool
2220 10 : lazy_scan_noprune(LVRelState *vacrel,
2221 : Buffer buf,
2222 : BlockNumber blkno,
2223 : Page page,
2224 : bool *has_lpdead_items)
2225 : {
2226 : OffsetNumber offnum,
2227 : maxoff;
2228 : int lpdead_items,
2229 : live_tuples,
2230 : recently_dead_tuples,
2231 : missed_dead_tuples;
2232 : bool hastup;
2233 : HeapTupleHeader tupleheader;
2234 10 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2235 10 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2236 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2237 :
2238 : Assert(BufferGetBlockNumber(buf) == blkno);
2239 :
2240 10 : hastup = false; /* for now */
2241 :
2242 10 : lpdead_items = 0;
2243 10 : live_tuples = 0;
2244 10 : recently_dead_tuples = 0;
2245 10 : missed_dead_tuples = 0;
2246 :
2247 10 : maxoff = PageGetMaxOffsetNumber(page);
2248 288 : for (offnum = FirstOffsetNumber;
2249 : offnum <= maxoff;
2250 278 : offnum = OffsetNumberNext(offnum))
2251 : {
2252 : ItemId itemid;
2253 : HeapTupleData tuple;
2254 :
2255 278 : vacrel->offnum = offnum;
2256 278 : itemid = PageGetItemId(page, offnum);
2257 :
2258 278 : if (!ItemIdIsUsed(itemid))
2259 56 : continue;
2260 :
2261 272 : if (ItemIdIsRedirected(itemid))
2262 : {
2263 50 : hastup = true;
2264 50 : continue;
2265 : }
2266 :
2267 222 : if (ItemIdIsDead(itemid))
2268 : {
2269 : /*
2270 : * Deliberately don't set hastup=true here. See same point in
2271 : * lazy_scan_prune for an explanation.
2272 : */
2273 0 : deadoffsets[lpdead_items++] = offnum;
2274 0 : continue;
2275 : }
2276 :
2277 222 : hastup = true; /* page prevents rel truncation */
2278 222 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2279 222 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2280 : &NoFreezePageRelfrozenXid,
2281 : &NoFreezePageRelminMxid))
2282 : {
2283 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2284 128 : if (vacrel->aggressive)
2285 : {
2286 : /*
2287 : * Aggressive VACUUMs must always be able to advance rel's
2288 : * relfrozenxid to a value >= FreezeLimit (and be able to
2289 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2290 : * The ongoing aggressive VACUUM won't be able to do that
2291 : * unless it can freeze an XID (or MXID) from this tuple now.
2292 : *
2293 : * The only safe option is to have caller perform processing
2294 : * of this page using lazy_scan_prune. Caller might have to
2295 : * wait a while for a cleanup lock, but it can't be helped.
2296 : */
2297 0 : vacrel->offnum = InvalidOffsetNumber;
2298 0 : return false;
2299 : }
2300 :
2301 : /*
2302 : * Non-aggressive VACUUMs are under no obligation to advance
2303 : * relfrozenxid (even by one XID). We can be much laxer here.
2304 : *
2305 : * Currently we always just accept an older final relfrozenxid
2306 : * and/or relminmxid value. We never make caller wait or work a
2307 : * little harder, even when it likely makes sense to do so.
2308 : */
2309 : }
2310 :
2311 222 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2312 222 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2313 222 : tuple.t_len = ItemIdGetLength(itemid);
2314 222 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2315 :
2316 222 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2317 : buf))
2318 : {
2319 216 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2320 : case HEAPTUPLE_LIVE:
2321 :
2322 : /*
2323 : * Count both cases as live, just like lazy_scan_prune
2324 : */
2325 216 : live_tuples++;
2326 :
2327 216 : break;
2328 2 : case HEAPTUPLE_DEAD:
2329 :
2330 : /*
2331 : * There is some useful work for pruning to do, that won't be
2332 : * done due to failure to get a cleanup lock.
2333 : */
2334 2 : missed_dead_tuples++;
2335 2 : break;
2336 4 : case HEAPTUPLE_RECENTLY_DEAD:
2337 :
2338 : /*
2339 : * Count in recently_dead_tuples, just like lazy_scan_prune
2340 : */
2341 4 : recently_dead_tuples++;
2342 4 : break;
2343 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2344 :
2345 : /*
2346 : * Do not count these rows as live, just like lazy_scan_prune
2347 : */
2348 0 : break;
2349 0 : default:
2350 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2351 : break;
2352 : }
2353 : }
2354 :
2355 10 : vacrel->offnum = InvalidOffsetNumber;
2356 :
2357 : /*
2358 : * By here we know for sure that caller can put off freezing and pruning
2359 : * this particular page until the next VACUUM. Remember its details now.
2360 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2361 : */
2362 10 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2363 10 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2364 :
2365 : /* Save any LP_DEAD items found on the page in dead_items */
2366 10 : if (vacrel->nindexes == 0)
2367 : {
2368 : /* Using one-pass strategy (since table has no indexes) */
2369 0 : if (lpdead_items > 0)
2370 : {
2371 : /*
2372 : * Perfunctory handling for the corner case where a single pass
2373 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2374 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2375 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2376 : * but it beats having to maintain specialized heap vacuuming code
2377 : * forever, for vanishingly little benefit.)
2378 : */
2379 0 : hastup = true;
2380 0 : missed_dead_tuples += lpdead_items;
2381 : }
2382 : }
2383 10 : else if (lpdead_items > 0)
2384 : {
2385 : /*
2386 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2387 : * indexes will be deleted during index vacuuming (and then marked
2388 : * LP_UNUSED in the heap)
2389 : */
2390 0 : vacrel->lpdead_item_pages++;
2391 :
2392 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2393 :
2394 0 : vacrel->lpdead_items += lpdead_items;
2395 : }
2396 :
2397 : /*
2398 : * Finally, add relevant page-local counts to whole-VACUUM counts
2399 : */
2400 10 : vacrel->live_tuples += live_tuples;
2401 10 : vacrel->recently_dead_tuples += recently_dead_tuples;
2402 10 : vacrel->missed_dead_tuples += missed_dead_tuples;
2403 10 : if (missed_dead_tuples > 0)
2404 2 : vacrel->missed_dead_pages++;
2405 :
2406 : /* Can't truncate this page */
2407 10 : if (hastup)
2408 10 : vacrel->nonempty_pages = blkno + 1;
2409 :
2410 : /* Did we find LP_DEAD items? */
2411 10 : *has_lpdead_items = (lpdead_items > 0);
2412 :
2413 : /* Caller won't need to call lazy_scan_prune with same page */
2414 10 : return true;
2415 : }
2416 :
2417 : /*
2418 : * Main entry point for index vacuuming and heap vacuuming.
2419 : *
2420 : * Removes items collected in dead_items from table's indexes, then marks the
2421 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2422 : * for full details.
2423 : *
2424 : * Also empties dead_items, freeing up space for later TIDs.
2425 : *
2426 : * We may choose to bypass index vacuuming at this point, though only when the
2427 : * ongoing VACUUM operation will definitely only have one index scan/round of
2428 : * index vacuuming.
2429 : */
2430 : static void
2431 1048 : lazy_vacuum(LVRelState *vacrel)
2432 : {
2433 : bool bypass;
2434 :
2435 : /* Should not end up here with no indexes */
2436 : Assert(vacrel->nindexes > 0);
2437 : Assert(vacrel->lpdead_item_pages > 0);
2438 :
2439 1048 : if (!vacrel->do_index_vacuuming)
2440 : {
2441 : Assert(!vacrel->do_index_cleanup);
2442 22 : dead_items_reset(vacrel);
2443 22 : return;
2444 : }
2445 :
2446 : /*
2447 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2448 : *
2449 : * We currently only do this in cases where the number of LP_DEAD items
2450 : * for the entire VACUUM operation is close to zero. This avoids sharp
2451 : * discontinuities in the duration and overhead of successive VACUUM
2452 : * operations that run against the same table with a fixed workload.
2453 : * Ideally, successive VACUUM operations will behave as if there are
2454 : * exactly zero LP_DEAD items in cases where there are close to zero.
2455 : *
2456 : * This is likely to be helpful with a table that is continually affected
2457 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2458 : * have small aberrations that lead to just a few heap pages retaining
2459 : * only one or two LP_DEAD items. This is pretty common; even when the
2460 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2461 : * impossible to predict whether HOT will be applied in 100% of cases.
2462 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2463 : * HOT through careful tuning.
2464 : */
2465 1026 : bypass = false;
2466 1026 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2467 : {
2468 : BlockNumber threshold;
2469 :
2470 : Assert(vacrel->num_index_scans == 0);
2471 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2472 : Assert(vacrel->do_index_vacuuming);
2473 : Assert(vacrel->do_index_cleanup);
2474 :
2475 : /*
2476 : * This crossover point at which we'll start to do index vacuuming is
2477 : * expressed as a percentage of the total number of heap pages in the
2478 : * table that are known to have at least one LP_DEAD item. This is
2479 : * much more important than the total number of LP_DEAD items, since
2480 : * it's a proxy for the number of heap pages whose visibility map bits
2481 : * cannot be set on account of bypassing index and heap vacuuming.
2482 : *
2483 : * We apply one further precautionary test: the space currently used
2484 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2485 : * not exceed 32MB. This limits the risk that we will bypass index
2486 : * vacuuming again and again until eventually there is a VACUUM whose
2487 : * dead_items space is not CPU cache resident.
2488 : *
2489 : * We don't take any special steps to remember the LP_DEAD items (such
2490 : * as counting them in our final update to the stats system) when the
2491 : * optimization is applied. Though the accounting used in analyze.c's
2492 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2493 : * rows in its own stats report, that's okay. The discrepancy should
2494 : * be negligible. If this optimization is ever expanded to cover more
2495 : * cases then this may need to be reconsidered.
2496 : */
2497 1010 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2498 1010 : bypass = (vacrel->lpdead_item_pages < threshold &&
2499 0 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2500 : }
2501 :
2502 1026 : if (bypass)
2503 : {
2504 : /*
2505 : * There are almost zero TIDs. Behave as if there were precisely
2506 : * zero: bypass index vacuuming, but do index cleanup.
2507 : *
2508 : * We expect that the ongoing VACUUM operation will finish very
2509 : * quickly, so there is no point in considering speeding up as a
2510 : * failsafe against wraparound failure. (Index cleanup is expected to
2511 : * finish very quickly in cases where there were no ambulkdelete()
2512 : * calls.)
2513 : */
2514 0 : vacrel->do_index_vacuuming = false;
2515 : }
2516 1026 : else if (lazy_vacuum_all_indexes(vacrel))
2517 : {
2518 : /*
2519 : * We successfully completed a round of index vacuuming. Do related
2520 : * heap vacuuming now.
2521 : */
2522 1026 : lazy_vacuum_heap_rel(vacrel);
2523 : }
2524 : else
2525 : {
2526 : /*
2527 : * Failsafe case.
2528 : *
2529 : * We attempted index vacuuming, but didn't finish a full round/full
2530 : * index scan. This happens when relfrozenxid or relminmxid is too
2531 : * far in the past.
2532 : *
2533 : * From this point on the VACUUM operation will do no further index
2534 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2535 : * back here again.
2536 : */
2537 : Assert(VacuumFailsafeActive);
2538 : }
2539 :
2540 : /*
2541 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2542 : * vacuum)
2543 : */
2544 1026 : dead_items_reset(vacrel);
2545 : }
2546 :
2547 : /*
2548 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2549 : *
2550 : * Returns true in the common case when all indexes were successfully
2551 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2552 : * VACUUM operation is at risk of taking too long to finish, leading to
2553 : * wraparound failure.
2554 : */
2555 : static bool
2556 1026 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2557 : {
2558 1026 : bool allindexes = true;
2559 1026 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2560 1026 : const int progress_start_index[] = {
2561 : PROGRESS_VACUUM_PHASE,
2562 : PROGRESS_VACUUM_INDEXES_TOTAL
2563 : };
2564 1026 : const int progress_end_index[] = {
2565 : PROGRESS_VACUUM_INDEXES_TOTAL,
2566 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2567 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2568 : };
2569 : int64 progress_start_val[2];
2570 : int64 progress_end_val[3];
2571 :
2572 : Assert(vacrel->nindexes > 0);
2573 : Assert(vacrel->do_index_vacuuming);
2574 : Assert(vacrel->do_index_cleanup);
2575 :
2576 : /* Precheck for XID wraparound emergencies */
2577 1026 : if (lazy_check_wraparound_failsafe(vacrel))
2578 : {
2579 : /* Wraparound emergency -- don't even start an index scan */
2580 0 : return false;
2581 : }
2582 :
2583 : /*
2584 : * Report that we are now vacuuming indexes and the number of indexes to
2585 : * vacuum.
2586 : */
2587 1026 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2588 1026 : progress_start_val[1] = vacrel->nindexes;
2589 1026 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2590 :
2591 1026 : if (!ParallelVacuumIsActive(vacrel))
2592 : {
2593 3004 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2594 : {
2595 1990 : Relation indrel = vacrel->indrels[idx];
2596 1990 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2597 :
2598 1990 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2599 : old_live_tuples,
2600 : vacrel);
2601 :
2602 : /* Report the number of indexes vacuumed */
2603 1990 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2604 1990 : idx + 1);
2605 :
2606 1990 : if (lazy_check_wraparound_failsafe(vacrel))
2607 : {
2608 : /* Wraparound emergency -- end current index scan */
2609 0 : allindexes = false;
2610 0 : break;
2611 : }
2612 : }
2613 : }
2614 : else
2615 : {
2616 : /* Outsource everything to parallel variant */
2617 12 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2618 : vacrel->num_index_scans);
2619 :
2620 : /*
2621 : * Do a postcheck to consider applying wraparound failsafe now. Note
2622 : * that parallel VACUUM only gets the precheck and this postcheck.
2623 : */
2624 12 : if (lazy_check_wraparound_failsafe(vacrel))
2625 0 : allindexes = false;
2626 : }
2627 :
2628 : /*
2629 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2630 : * each call here (except calls where we choose to do the failsafe). This
2631 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2632 : * of the failsafe triggering, which prevents the next call from taking
2633 : * place).
2634 : */
2635 : Assert(vacrel->num_index_scans > 0 ||
2636 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2637 : Assert(allindexes || VacuumFailsafeActive);
2638 :
2639 : /*
2640 : * Increase and report the number of index scans. Also, we reset
2641 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2642 : *
2643 : * We deliberately include the case where we started a round of bulk
2644 : * deletes that we weren't able to finish due to the failsafe triggering.
2645 : */
2646 1026 : vacrel->num_index_scans++;
2647 1026 : progress_end_val[0] = 0;
2648 1026 : progress_end_val[1] = 0;
2649 1026 : progress_end_val[2] = vacrel->num_index_scans;
2650 1026 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2651 :
2652 1026 : return allindexes;
2653 : }
2654 :
2655 : /*
2656 : * Read stream callback for vacuum's third phase (second pass over the heap).
2657 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2658 : * if there are no further blocks to vacuum.
2659 : */
2660 : static BlockNumber
2661 22720 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2662 : void *callback_private_data,
2663 : void *per_buffer_data)
2664 : {
2665 22720 : TidStoreIter *iter = callback_private_data;
2666 : TidStoreIterResult *iter_result;
2667 :
2668 22720 : iter_result = TidStoreIterateNext(iter);
2669 22720 : if (iter_result == NULL)
2670 1026 : return InvalidBlockNumber;
2671 :
2672 : /*
2673 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2674 : * It is safe to copy the result, according to TidStoreIterateNext().
2675 : */
2676 21694 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2677 :
2678 21694 : return iter_result->blkno;
2679 : }
2680 :
2681 : /*
2682 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2683 : *
2684 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2685 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2686 : *
2687 : * We may also be able to truncate the line pointer array of the heap pages we
2688 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2689 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2690 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2691 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2692 : * page's line pointer array).
2693 : *
2694 : * Note: the reason for doing this as a second pass is we cannot remove the
2695 : * tuples until we've removed their index entries, and we want to process
2696 : * index entry removal in batches as large as possible.
2697 : */
2698 : static void
2699 1026 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2700 : {
2701 : ReadStream *stream;
2702 1026 : BlockNumber vacuumed_pages = 0;
2703 1026 : Buffer vmbuffer = InvalidBuffer;
2704 : LVSavedErrInfo saved_err_info;
2705 : TidStoreIter *iter;
2706 :
2707 : Assert(vacrel->do_index_vacuuming);
2708 : Assert(vacrel->do_index_cleanup);
2709 : Assert(vacrel->num_index_scans > 0);
2710 :
2711 : /* Report that we are now vacuuming the heap */
2712 1026 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2713 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2714 :
2715 : /* Update error traceback information */
2716 1026 : update_vacuum_error_info(vacrel, &saved_err_info,
2717 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2718 : InvalidBlockNumber, InvalidOffsetNumber);
2719 :
2720 1026 : iter = TidStoreBeginIterate(vacrel->dead_items);
2721 :
2722 : /* Set up the read stream for vacuum's second pass through the heap */
2723 1026 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
2724 : vacrel->bstrategy,
2725 : vacrel->rel,
2726 : MAIN_FORKNUM,
2727 : vacuum_reap_lp_read_stream_next,
2728 : iter,
2729 : sizeof(TidStoreIterResult));
2730 :
2731 : while (true)
2732 21694 : {
2733 : BlockNumber blkno;
2734 : Buffer buf;
2735 : Page page;
2736 : TidStoreIterResult *iter_result;
2737 : Size freespace;
2738 : OffsetNumber offsets[MaxOffsetNumber];
2739 : int num_offsets;
2740 :
2741 22720 : vacuum_delay_point(false);
2742 :
2743 22720 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2744 :
2745 : /* The relation is exhausted */
2746 22720 : if (!BufferIsValid(buf))
2747 1026 : break;
2748 :
2749 21694 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2750 :
2751 : Assert(iter_result);
2752 21694 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2753 : Assert(num_offsets <= lengthof(offsets));
2754 :
2755 : /*
2756 : * Pin the visibility map page in case we need to mark the page
2757 : * all-visible. In most cases this will be very cheap, because we'll
2758 : * already have the correct page pinned anyway.
2759 : */
2760 21694 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2761 :
2762 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2763 21694 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2764 21694 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2765 : num_offsets, vmbuffer);
2766 :
2767 : /* Now that we've vacuumed the page, record its available space */
2768 21694 : page = BufferGetPage(buf);
2769 21694 : freespace = PageGetHeapFreeSpace(page);
2770 :
2771 21694 : UnlockReleaseBuffer(buf);
2772 21694 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2773 21694 : vacuumed_pages++;
2774 : }
2775 :
2776 1026 : read_stream_end(stream);
2777 1026 : TidStoreEndIterate(iter);
2778 :
2779 1026 : vacrel->blkno = InvalidBlockNumber;
2780 1026 : if (BufferIsValid(vmbuffer))
2781 1026 : ReleaseBuffer(vmbuffer);
2782 :
2783 : /*
2784 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2785 : * the second heap pass. No more, no less.
2786 : */
2787 : Assert(vacrel->num_index_scans > 1 ||
2788 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2789 : vacuumed_pages == vacrel->lpdead_item_pages));
2790 :
2791 1026 : ereport(DEBUG2,
2792 : (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2793 : vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2794 : vacuumed_pages)));
2795 :
2796 : /* Revert to the previous phase information for error traceback */
2797 1026 : restore_vacuum_error_info(vacrel, &saved_err_info);
2798 1026 : }
2799 :
2800 : /*
2801 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2802 : * vacrel->dead_items store.
2803 : *
2804 : * Caller must have an exclusive buffer lock on the buffer (though a full
2805 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2806 : * a pin on blkno's visibility map page.
2807 : */
2808 : static void
2809 21694 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2810 : OffsetNumber *deadoffsets, int num_offsets,
2811 : Buffer vmbuffer)
2812 : {
2813 21694 : Page page = BufferGetPage(buffer);
2814 : OffsetNumber unused[MaxHeapTuplesPerPage];
2815 21694 : int nunused = 0;
2816 : TransactionId visibility_cutoff_xid;
2817 : bool all_frozen;
2818 : LVSavedErrInfo saved_err_info;
2819 :
2820 : Assert(vacrel->do_index_vacuuming);
2821 :
2822 21694 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2823 :
2824 : /* Update error traceback information */
2825 21694 : update_vacuum_error_info(vacrel, &saved_err_info,
2826 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2827 : InvalidOffsetNumber);
2828 :
2829 21694 : START_CRIT_SECTION();
2830 :
2831 1450824 : for (int i = 0; i < num_offsets; i++)
2832 : {
2833 : ItemId itemid;
2834 1429130 : OffsetNumber toff = deadoffsets[i];
2835 :
2836 1429130 : itemid = PageGetItemId(page, toff);
2837 :
2838 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2839 1429130 : ItemIdSetUnused(itemid);
2840 1429130 : unused[nunused++] = toff;
2841 : }
2842 :
2843 : Assert(nunused > 0);
2844 :
2845 : /* Attempt to truncate line pointer array now */
2846 21694 : PageTruncateLinePointerArray(page);
2847 :
2848 : /*
2849 : * Mark buffer dirty before we write WAL.
2850 : */
2851 21694 : MarkBufferDirty(buffer);
2852 :
2853 : /* XLOG stuff */
2854 21694 : if (RelationNeedsWAL(vacrel->rel))
2855 : {
2856 20000 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2857 : InvalidTransactionId,
2858 : false, /* no cleanup lock required */
2859 : PRUNE_VACUUM_CLEANUP,
2860 : NULL, 0, /* frozen */
2861 : NULL, 0, /* redirected */
2862 : NULL, 0, /* dead */
2863 : unused, nunused);
2864 : }
2865 :
2866 : /*
2867 : * End critical section, so we safely can do visibility tests (which
2868 : * possibly need to perform IO and allocate memory!). If we crash now the
2869 : * page (including the corresponding vm bit) might not be marked all
2870 : * visible, but that's fine. A later vacuum will fix that.
2871 : */
2872 21694 : END_CRIT_SECTION();
2873 :
2874 : /*
2875 : * Now that we have removed the LP_DEAD items from the page, once again
2876 : * check if the page has become all-visible. The page is already marked
2877 : * dirty, exclusively locked, and, if needed, a full page image has been
2878 : * emitted.
2879 : */
2880 : Assert(!PageIsAllVisible(page));
2881 21694 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2882 : &all_frozen))
2883 : {
2884 : uint8 old_vmbits;
2885 21574 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2886 :
2887 21574 : if (all_frozen)
2888 : {
2889 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2890 16678 : flags |= VISIBILITYMAP_ALL_FROZEN;
2891 : }
2892 :
2893 21574 : PageSetAllVisible(page);
2894 21574 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
2895 : InvalidXLogRecPtr,
2896 : vmbuffer, visibility_cutoff_xid,
2897 : flags);
2898 :
2899 : /*
2900 : * If the page wasn't already set all-visible and/or all-frozen in the
2901 : * VM, count it as newly set for logging.
2902 : */
2903 21574 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2904 : {
2905 21574 : vacrel->vm_new_visible_pages++;
2906 21574 : if (all_frozen)
2907 16678 : vacrel->vm_new_visible_frozen_pages++;
2908 : }
2909 :
2910 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2911 : all_frozen)
2912 0 : vacrel->vm_new_frozen_pages++;
2913 : }
2914 :
2915 : /* Revert to the previous phase information for error traceback */
2916 21694 : restore_vacuum_error_info(vacrel, &saved_err_info);
2917 21694 : }
2918 :
2919 : /*
2920 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2921 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2922 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2923 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2924 : *
2925 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2926 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2927 : * that it started out with.
2928 : *
2929 : * Returns true when failsafe has been triggered.
2930 : */
2931 : static bool
2932 120652 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2933 : {
2934 : /* Don't warn more than once per VACUUM */
2935 120652 : if (VacuumFailsafeActive)
2936 0 : return true;
2937 :
2938 120652 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2939 : {
2940 19056 : const int progress_index[] = {
2941 : PROGRESS_VACUUM_INDEXES_TOTAL,
2942 : PROGRESS_VACUUM_INDEXES_PROCESSED
2943 : };
2944 19056 : int64 progress_val[2] = {0, 0};
2945 :
2946 19056 : VacuumFailsafeActive = true;
2947 :
2948 : /*
2949 : * Abandon use of a buffer access strategy to allow use of all of
2950 : * shared buffers. We assume the caller who allocated the memory for
2951 : * the BufferAccessStrategy will free it.
2952 : */
2953 19056 : vacrel->bstrategy = NULL;
2954 :
2955 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2956 19056 : vacrel->do_index_vacuuming = false;
2957 19056 : vacrel->do_index_cleanup = false;
2958 19056 : vacrel->do_rel_truncate = false;
2959 :
2960 : /* Reset the progress counters */
2961 19056 : pgstat_progress_update_multi_param(2, progress_index, progress_val);
2962 :
2963 19056 : ereport(WARNING,
2964 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2965 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2966 : vacrel->num_index_scans),
2967 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2968 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2969 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2970 :
2971 : /* Stop applying cost limits from this point on */
2972 19056 : VacuumCostActive = false;
2973 19056 : VacuumCostBalance = 0;
2974 :
2975 19056 : return true;
2976 : }
2977 :
2978 101596 : return false;
2979 : }
2980 :
2981 : /*
2982 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2983 : */
2984 : static void
2985 93380 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2986 : {
2987 93380 : double reltuples = vacrel->new_rel_tuples;
2988 93380 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2989 93380 : const int progress_start_index[] = {
2990 : PROGRESS_VACUUM_PHASE,
2991 : PROGRESS_VACUUM_INDEXES_TOTAL
2992 : };
2993 93380 : const int progress_end_index[] = {
2994 : PROGRESS_VACUUM_INDEXES_TOTAL,
2995 : PROGRESS_VACUUM_INDEXES_PROCESSED
2996 : };
2997 : int64 progress_start_val[2];
2998 93380 : int64 progress_end_val[2] = {0, 0};
2999 :
3000 : Assert(vacrel->do_index_cleanup);
3001 : Assert(vacrel->nindexes > 0);
3002 :
3003 : /*
3004 : * Report that we are now cleaning up indexes and the number of indexes to
3005 : * cleanup.
3006 : */
3007 93380 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3008 93380 : progress_start_val[1] = vacrel->nindexes;
3009 93380 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3010 :
3011 93380 : if (!ParallelVacuumIsActive(vacrel))
3012 : {
3013 239276 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3014 : {
3015 145918 : Relation indrel = vacrel->indrels[idx];
3016 145918 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3017 :
3018 291836 : vacrel->indstats[idx] =
3019 145918 : lazy_cleanup_one_index(indrel, istat, reltuples,
3020 : estimated_count, vacrel);
3021 :
3022 : /* Report the number of indexes cleaned up */
3023 145918 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3024 145918 : idx + 1);
3025 : }
3026 : }
3027 : else
3028 : {
3029 : /* Outsource everything to parallel variant */
3030 22 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3031 : vacrel->num_index_scans,
3032 : estimated_count);
3033 : }
3034 :
3035 : /* Reset the progress counters */
3036 93380 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3037 93380 : }
3038 :
3039 : /*
3040 : * lazy_vacuum_one_index() -- vacuum index relation.
3041 : *
3042 : * Delete all the index tuples containing a TID collected in
3043 : * vacrel->dead_items. Also update running statistics. Exact
3044 : * details depend on index AM's ambulkdelete routine.
3045 : *
3046 : * reltuples is the number of heap tuples to be passed to the
3047 : * bulkdelete callback. It's always assumed to be estimated.
3048 : * See indexam.sgml for more info.
3049 : *
3050 : * Returns bulk delete stats derived from input stats
3051 : */
3052 : static IndexBulkDeleteResult *
3053 1990 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3054 : double reltuples, LVRelState *vacrel)
3055 : {
3056 : IndexVacuumInfo ivinfo;
3057 : LVSavedErrInfo saved_err_info;
3058 :
3059 1990 : ivinfo.index = indrel;
3060 1990 : ivinfo.heaprel = vacrel->rel;
3061 1990 : ivinfo.analyze_only = false;
3062 1990 : ivinfo.report_progress = false;
3063 1990 : ivinfo.estimated_count = true;
3064 1990 : ivinfo.message_level = DEBUG2;
3065 1990 : ivinfo.num_heap_tuples = reltuples;
3066 1990 : ivinfo.strategy = vacrel->bstrategy;
3067 :
3068 : /*
3069 : * Update error traceback information.
3070 : *
3071 : * The index name is saved during this phase and restored immediately
3072 : * after this phase. See vacuum_error_callback.
3073 : */
3074 : Assert(vacrel->indname == NULL);
3075 1990 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3076 1990 : update_vacuum_error_info(vacrel, &saved_err_info,
3077 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3078 : InvalidBlockNumber, InvalidOffsetNumber);
3079 :
3080 : /* Do bulk deletion */
3081 1990 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3082 : vacrel->dead_items_info);
3083 :
3084 : /* Revert to the previous phase information for error traceback */
3085 1990 : restore_vacuum_error_info(vacrel, &saved_err_info);
3086 1990 : pfree(vacrel->indname);
3087 1990 : vacrel->indname = NULL;
3088 :
3089 1990 : return istat;
3090 : }
3091 :
3092 : /*
3093 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3094 : *
3095 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3096 : * of heap tuples and estimated_count is true if reltuples is an
3097 : * estimated value. See indexam.sgml for more info.
3098 : *
3099 : * Returns bulk delete stats derived from input stats
3100 : */
3101 : static IndexBulkDeleteResult *
3102 145918 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3103 : double reltuples, bool estimated_count,
3104 : LVRelState *vacrel)
3105 : {
3106 : IndexVacuumInfo ivinfo;
3107 : LVSavedErrInfo saved_err_info;
3108 :
3109 145918 : ivinfo.index = indrel;
3110 145918 : ivinfo.heaprel = vacrel->rel;
3111 145918 : ivinfo.analyze_only = false;
3112 145918 : ivinfo.report_progress = false;
3113 145918 : ivinfo.estimated_count = estimated_count;
3114 145918 : ivinfo.message_level = DEBUG2;
3115 :
3116 145918 : ivinfo.num_heap_tuples = reltuples;
3117 145918 : ivinfo.strategy = vacrel->bstrategy;
3118 :
3119 : /*
3120 : * Update error traceback information.
3121 : *
3122 : * The index name is saved during this phase and restored immediately
3123 : * after this phase. See vacuum_error_callback.
3124 : */
3125 : Assert(vacrel->indname == NULL);
3126 145918 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3127 145918 : update_vacuum_error_info(vacrel, &saved_err_info,
3128 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3129 : InvalidBlockNumber, InvalidOffsetNumber);
3130 :
3131 145918 : istat = vac_cleanup_one_index(&ivinfo, istat);
3132 :
3133 : /* Revert to the previous phase information for error traceback */
3134 145918 : restore_vacuum_error_info(vacrel, &saved_err_info);
3135 145918 : pfree(vacrel->indname);
3136 145918 : vacrel->indname = NULL;
3137 :
3138 145918 : return istat;
3139 : }
3140 :
3141 : /*
3142 : * should_attempt_truncation - should we attempt to truncate the heap?
3143 : *
3144 : * Don't even think about it unless we have a shot at releasing a goodly
3145 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3146 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3147 : * be particularly disruptive.
3148 : *
3149 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3150 : * system might be refusing to allocate new XIDs at this point. The system
3151 : * definitely won't return to normal unless and until VACUUM actually advances
3152 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3153 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3154 : * truncate the table under these circumstances, an XID exhaustion error might
3155 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3156 : * There is very little chance of truncation working out when the failsafe is
3157 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3158 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3159 : * we're called.
3160 : */
3161 : static bool
3162 117624 : should_attempt_truncation(LVRelState *vacrel)
3163 : {
3164 : BlockNumber possibly_freeable;
3165 :
3166 117624 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3167 19296 : return false;
3168 :
3169 98328 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3170 98328 : if (possibly_freeable > 0 &&
3171 290 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3172 290 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3173 278 : return true;
3174 :
3175 98050 : return false;
3176 : }
3177 :
3178 : /*
3179 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3180 : */
3181 : static void
3182 278 : lazy_truncate_heap(LVRelState *vacrel)
3183 : {
3184 278 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3185 : BlockNumber new_rel_pages;
3186 : bool lock_waiter_detected;
3187 : int lock_retry;
3188 :
3189 : /* Report that we are now truncating */
3190 278 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3191 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3192 :
3193 : /* Update error traceback information one last time */
3194 278 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3195 : vacrel->nonempty_pages, InvalidOffsetNumber);
3196 :
3197 : /*
3198 : * Loop until no more truncating can be done.
3199 : */
3200 : do
3201 : {
3202 : /*
3203 : * We need full exclusive lock on the relation in order to do
3204 : * truncation. If we can't get it, give up rather than waiting --- we
3205 : * don't want to block other backends, and we don't want to deadlock
3206 : * (which is quite possible considering we already hold a lower-grade
3207 : * lock).
3208 : */
3209 278 : lock_waiter_detected = false;
3210 278 : lock_retry = 0;
3211 : while (true)
3212 : {
3213 682 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3214 274 : break;
3215 :
3216 : /*
3217 : * Check for interrupts while trying to (re-)acquire the exclusive
3218 : * lock.
3219 : */
3220 408 : CHECK_FOR_INTERRUPTS();
3221 :
3222 408 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3223 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3224 : {
3225 : /*
3226 : * We failed to establish the lock in the specified number of
3227 : * retries. This means we give up truncating.
3228 : */
3229 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
3230 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3231 : vacrel->relname)));
3232 6 : return;
3233 : }
3234 :
3235 404 : (void) WaitLatch(MyLatch,
3236 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3237 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3238 : WAIT_EVENT_VACUUM_TRUNCATE);
3239 404 : ResetLatch(MyLatch);
3240 : }
3241 :
3242 : /*
3243 : * Now that we have exclusive lock, look to see if the rel has grown
3244 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3245 : * the newly added pages presumably contain non-deletable tuples.
3246 : */
3247 274 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3248 274 : if (new_rel_pages != orig_rel_pages)
3249 : {
3250 : /*
3251 : * Note: we intentionally don't update vacrel->rel_pages with the
3252 : * new rel size here. If we did, it would amount to assuming that
3253 : * the new pages are empty, which is unlikely. Leaving the numbers
3254 : * alone amounts to assuming that the new pages have the same
3255 : * tuple density as existing ones, which is less unlikely.
3256 : */
3257 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3258 0 : return;
3259 : }
3260 :
3261 : /*
3262 : * Scan backwards from the end to verify that the end pages actually
3263 : * contain no tuples. This is *necessary*, not optional, because
3264 : * other backends could have added tuples to these pages whilst we
3265 : * were vacuuming.
3266 : */
3267 274 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3268 274 : vacrel->blkno = new_rel_pages;
3269 :
3270 274 : if (new_rel_pages >= orig_rel_pages)
3271 : {
3272 : /* can't do anything after all */
3273 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3274 2 : return;
3275 : }
3276 :
3277 : /*
3278 : * Okay to truncate.
3279 : */
3280 272 : RelationTruncate(vacrel->rel, new_rel_pages);
3281 :
3282 : /*
3283 : * We can release the exclusive lock as soon as we have truncated.
3284 : * Other backends can't safely access the relation until they have
3285 : * processed the smgr invalidation that smgrtruncate sent out ... but
3286 : * that should happen as part of standard invalidation processing once
3287 : * they acquire lock on the relation.
3288 : */
3289 272 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3290 :
3291 : /*
3292 : * Update statistics. Here, it *is* correct to adjust rel_pages
3293 : * without also touching reltuples, since the tuple count wasn't
3294 : * changed by the truncation.
3295 : */
3296 272 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3297 272 : vacrel->rel_pages = new_rel_pages;
3298 :
3299 272 : ereport(vacrel->verbose ? INFO : DEBUG2,
3300 : (errmsg("table \"%s\": truncated %u to %u pages",
3301 : vacrel->relname,
3302 : orig_rel_pages, new_rel_pages)));
3303 272 : orig_rel_pages = new_rel_pages;
3304 272 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3305 : }
3306 :
3307 : /*
3308 : * Rescan end pages to verify that they are (still) empty of tuples.
3309 : *
3310 : * Returns number of nondeletable pages (last nonempty page + 1).
3311 : */
3312 : static BlockNumber
3313 274 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3314 : {
3315 : BlockNumber blkno;
3316 : BlockNumber prefetchedUntil;
3317 : instr_time starttime;
3318 :
3319 : /* Initialize the starttime if we check for conflicting lock requests */
3320 274 : INSTR_TIME_SET_CURRENT(starttime);
3321 :
3322 : /*
3323 : * Start checking blocks at what we believe relation end to be and move
3324 : * backwards. (Strange coding of loop control is needed because blkno is
3325 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3326 : * in forward direction, so that OS-level readahead can kick in.
3327 : */
3328 274 : blkno = vacrel->rel_pages;
3329 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3330 : "prefetch size must be power of 2");
3331 274 : prefetchedUntil = InvalidBlockNumber;
3332 4046 : while (blkno > vacrel->nonempty_pages)
3333 : {
3334 : Buffer buf;
3335 : Page page;
3336 : OffsetNumber offnum,
3337 : maxoff;
3338 : bool hastup;
3339 :
3340 : /*
3341 : * Check if another process requests a lock on our relation. We are
3342 : * holding an AccessExclusiveLock here, so they will be waiting. We
3343 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3344 : * only check if that interval has elapsed once every 32 blocks to
3345 : * keep the number of system calls and actual shared lock table
3346 : * lookups to a minimum.
3347 : */
3348 3776 : if ((blkno % 32) == 0)
3349 : {
3350 : instr_time currenttime;
3351 : instr_time elapsed;
3352 :
3353 118 : INSTR_TIME_SET_CURRENT(currenttime);
3354 118 : elapsed = currenttime;
3355 118 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3356 118 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3357 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3358 : {
3359 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3360 : {
3361 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3362 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3363 : vacrel->relname)));
3364 :
3365 0 : *lock_waiter_detected = true;
3366 0 : return blkno;
3367 : }
3368 0 : starttime = currenttime;
3369 : }
3370 : }
3371 :
3372 : /*
3373 : * We don't insert a vacuum delay point here, because we have an
3374 : * exclusive lock on the table which we want to hold for as short a
3375 : * time as possible. We still need to check for interrupts however.
3376 : */
3377 3776 : CHECK_FOR_INTERRUPTS();
3378 :
3379 3776 : blkno--;
3380 :
3381 : /* If we haven't prefetched this lot yet, do so now. */
3382 3776 : if (prefetchedUntil > blkno)
3383 : {
3384 : BlockNumber prefetchStart;
3385 : BlockNumber pblkno;
3386 :
3387 358 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3388 5694 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3389 : {
3390 5336 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3391 5336 : CHECK_FOR_INTERRUPTS();
3392 : }
3393 358 : prefetchedUntil = prefetchStart;
3394 : }
3395 :
3396 3776 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3397 : vacrel->bstrategy);
3398 :
3399 : /* In this phase we only need shared access to the buffer */
3400 3776 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3401 :
3402 3776 : page = BufferGetPage(buf);
3403 :
3404 3776 : if (PageIsNew(page) || PageIsEmpty(page))
3405 : {
3406 1640 : UnlockReleaseBuffer(buf);
3407 1640 : continue;
3408 : }
3409 :
3410 2136 : hastup = false;
3411 2136 : maxoff = PageGetMaxOffsetNumber(page);
3412 4268 : for (offnum = FirstOffsetNumber;
3413 : offnum <= maxoff;
3414 2132 : offnum = OffsetNumberNext(offnum))
3415 : {
3416 : ItemId itemid;
3417 :
3418 2136 : itemid = PageGetItemId(page, offnum);
3419 :
3420 : /*
3421 : * Note: any non-unused item should be taken as a reason to keep
3422 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3423 : * we must not have cleaned out its index entries.
3424 : */
3425 2136 : if (ItemIdIsUsed(itemid))
3426 : {
3427 4 : hastup = true;
3428 4 : break; /* can stop scanning */
3429 : }
3430 : } /* scan along page */
3431 :
3432 2136 : UnlockReleaseBuffer(buf);
3433 :
3434 : /* Done scanning if we found a tuple here */
3435 2136 : if (hastup)
3436 4 : return blkno + 1;
3437 : }
3438 :
3439 : /*
3440 : * If we fall out of the loop, all the previously-thought-to-be-empty
3441 : * pages still are; we need not bother to look at the last known-nonempty
3442 : * page.
3443 : */
3444 270 : return vacrel->nonempty_pages;
3445 : }
3446 :
3447 : /*
3448 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3449 : * shared memory). Sets both in vacrel for caller.
3450 : *
3451 : * Also handles parallel initialization as part of allocating dead_items in
3452 : * DSM when required.
3453 : */
3454 : static void
3455 117624 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3456 : {
3457 : VacDeadItemsInfo *dead_items_info;
3458 331238 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3459 95990 : autovacuum_work_mem != -1 ?
3460 213614 : autovacuum_work_mem : maintenance_work_mem;
3461 :
3462 : /*
3463 : * Initialize state for a parallel vacuum. As of now, only one worker can
3464 : * be used for an index, so we invoke parallelism only if there are at
3465 : * least two indexes on a table.
3466 : */
3467 117624 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3468 : {
3469 : /*
3470 : * Since parallel workers cannot access data in temporary tables, we
3471 : * can't perform parallel vacuum on them.
3472 : */
3473 8336 : if (RelationUsesLocalBuffers(vacrel->rel))
3474 : {
3475 : /*
3476 : * Give warning only if the user explicitly tries to perform a
3477 : * parallel vacuum on the temporary table.
3478 : */
3479 6 : if (nworkers > 0)
3480 6 : ereport(WARNING,
3481 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3482 : vacrel->relname)));
3483 : }
3484 : else
3485 8330 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3486 : vacrel->nindexes, nworkers,
3487 : vac_work_mem,
3488 8330 : vacrel->verbose ? INFO : DEBUG2,
3489 : vacrel->bstrategy);
3490 :
3491 : /*
3492 : * If parallel mode started, dead_items and dead_items_info spaces are
3493 : * allocated in DSM.
3494 : */
3495 8336 : if (ParallelVacuumIsActive(vacrel))
3496 : {
3497 22 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3498 : &vacrel->dead_items_info);
3499 22 : return;
3500 : }
3501 : }
3502 :
3503 : /*
3504 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3505 : * locally.
3506 : */
3507 :
3508 117602 : dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
3509 117602 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3510 117602 : dead_items_info->num_items = 0;
3511 117602 : vacrel->dead_items_info = dead_items_info;
3512 :
3513 117602 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3514 : }
3515 :
3516 : /*
3517 : * Add the given block number and offset numbers to dead_items.
3518 : */
3519 : static void
3520 26296 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3521 : int num_offsets)
3522 : {
3523 26296 : const int prog_index[2] = {
3524 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3525 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3526 : };
3527 : int64 prog_val[2];
3528 :
3529 26296 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3530 26296 : vacrel->dead_items_info->num_items += num_offsets;
3531 :
3532 : /* update the progress information */
3533 26296 : prog_val[0] = vacrel->dead_items_info->num_items;
3534 26296 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3535 26296 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3536 26296 : }
3537 :
3538 : /*
3539 : * Forget all collected dead items.
3540 : */
3541 : static void
3542 1048 : dead_items_reset(LVRelState *vacrel)
3543 : {
3544 1048 : if (ParallelVacuumIsActive(vacrel))
3545 : {
3546 12 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3547 12 : return;
3548 : }
3549 :
3550 : /* Recreate the tidstore with the same max_bytes limitation */
3551 1036 : TidStoreDestroy(vacrel->dead_items);
3552 1036 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3553 :
3554 : /* Reset the counter */
3555 1036 : vacrel->dead_items_info->num_items = 0;
3556 : }
3557 :
3558 : /*
3559 : * Perform cleanup for resources allocated in dead_items_alloc
3560 : */
3561 : static void
3562 117624 : dead_items_cleanup(LVRelState *vacrel)
3563 : {
3564 117624 : if (!ParallelVacuumIsActive(vacrel))
3565 : {
3566 : /* Don't bother with pfree here */
3567 117602 : return;
3568 : }
3569 :
3570 : /* End parallel mode */
3571 22 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3572 22 : vacrel->pvs = NULL;
3573 : }
3574 :
3575 : /*
3576 : * Check if every tuple in the given page is visible to all current and future
3577 : * transactions. Also return the visibility_cutoff_xid which is the highest
3578 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3579 : * on this page is frozen.
3580 : *
3581 : * This is a stripped down version of lazy_scan_prune(). If you change
3582 : * anything here, make sure that everything stays in sync. Note that an
3583 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
3584 : * introducing new side-effects here.
3585 : */
3586 : static bool
3587 21694 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
3588 : TransactionId *visibility_cutoff_xid,
3589 : bool *all_frozen)
3590 : {
3591 21694 : Page page = BufferGetPage(buf);
3592 21694 : BlockNumber blockno = BufferGetBlockNumber(buf);
3593 : OffsetNumber offnum,
3594 : maxoff;
3595 21694 : bool all_visible = true;
3596 :
3597 21694 : *visibility_cutoff_xid = InvalidTransactionId;
3598 21694 : *all_frozen = true;
3599 :
3600 21694 : maxoff = PageGetMaxOffsetNumber(page);
3601 1113820 : for (offnum = FirstOffsetNumber;
3602 1092236 : offnum <= maxoff && all_visible;
3603 1092126 : offnum = OffsetNumberNext(offnum))
3604 : {
3605 : ItemId itemid;
3606 : HeapTupleData tuple;
3607 :
3608 : /*
3609 : * Set the offset number so that we can display it along with any
3610 : * error that occurred while processing this tuple.
3611 : */
3612 1092128 : vacrel->offnum = offnum;
3613 1092128 : itemid = PageGetItemId(page, offnum);
3614 :
3615 : /* Unused or redirect line pointers are of no interest */
3616 1092128 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3617 272864 : continue;
3618 :
3619 819264 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3620 :
3621 : /*
3622 : * Dead line pointers can have index pointers pointing to them. So
3623 : * they can't be treated as visible
3624 : */
3625 819264 : if (ItemIdIsDead(itemid))
3626 : {
3627 2 : all_visible = false;
3628 2 : *all_frozen = false;
3629 2 : break;
3630 : }
3631 :
3632 : Assert(ItemIdIsNormal(itemid));
3633 :
3634 819262 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3635 819262 : tuple.t_len = ItemIdGetLength(itemid);
3636 819262 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3637 :
3638 819262 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3639 : buf))
3640 : {
3641 819184 : case HEAPTUPLE_LIVE:
3642 : {
3643 : TransactionId xmin;
3644 :
3645 : /* Check comments in lazy_scan_prune. */
3646 819184 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3647 : {
3648 0 : all_visible = false;
3649 0 : *all_frozen = false;
3650 0 : break;
3651 : }
3652 :
3653 : /*
3654 : * The inserter definitely committed. But is it old enough
3655 : * that everyone sees it as committed?
3656 : */
3657 819184 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3658 819184 : if (!TransactionIdPrecedes(xmin,
3659 : vacrel->cutoffs.OldestXmin))
3660 : {
3661 40 : all_visible = false;
3662 40 : *all_frozen = false;
3663 40 : break;
3664 : }
3665 :
3666 : /* Track newest xmin on page. */
3667 819144 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3668 : TransactionIdIsNormal(xmin))
3669 17130 : *visibility_cutoff_xid = xmin;
3670 :
3671 : /* Check whether this tuple is already frozen or not */
3672 1018242 : if (all_visible && *all_frozen &&
3673 199098 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3674 4938 : *all_frozen = false;
3675 : }
3676 819144 : break;
3677 :
3678 78 : case HEAPTUPLE_DEAD:
3679 : case HEAPTUPLE_RECENTLY_DEAD:
3680 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3681 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3682 : {
3683 78 : all_visible = false;
3684 78 : *all_frozen = false;
3685 78 : break;
3686 : }
3687 0 : default:
3688 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3689 : break;
3690 : }
3691 : } /* scan along page */
3692 :
3693 : /* Clear the offset information once we have processed the given page. */
3694 21694 : vacrel->offnum = InvalidOffsetNumber;
3695 :
3696 21694 : return all_visible;
3697 : }
3698 :
3699 : /*
3700 : * Update index statistics in pg_class if the statistics are accurate.
3701 : */
3702 : static void
3703 98304 : update_relstats_all_indexes(LVRelState *vacrel)
3704 : {
3705 98304 : Relation *indrels = vacrel->indrels;
3706 98304 : int nindexes = vacrel->nindexes;
3707 98304 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3708 :
3709 : Assert(vacrel->do_index_cleanup);
3710 :
3711 244320 : for (int idx = 0; idx < nindexes; idx++)
3712 : {
3713 146016 : Relation indrel = indrels[idx];
3714 146016 : IndexBulkDeleteResult *istat = indstats[idx];
3715 :
3716 146016 : if (istat == NULL || istat->estimated_count)
3717 143764 : continue;
3718 :
3719 : /* Update index statistics */
3720 2252 : vac_update_relstats(indrel,
3721 : istat->num_pages,
3722 : istat->num_index_tuples,
3723 : 0,
3724 : false,
3725 : InvalidTransactionId,
3726 : InvalidMultiXactId,
3727 : NULL, NULL, false);
3728 : }
3729 98304 : }
3730 :
3731 : /*
3732 : * Error context callback for errors occurring during vacuum. The error
3733 : * context messages for index phases should match the messages set in parallel
3734 : * vacuum. If you change this function for those phases, change
3735 : * parallel_vacuum_error_callback() as well.
3736 : */
3737 : static void
3738 79206 : vacuum_error_callback(void *arg)
3739 : {
3740 79206 : LVRelState *errinfo = arg;
3741 :
3742 79206 : switch (errinfo->phase)
3743 : {
3744 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3745 0 : if (BlockNumberIsValid(errinfo->blkno))
3746 : {
3747 0 : if (OffsetNumberIsValid(errinfo->offnum))
3748 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3749 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3750 : else
3751 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3752 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3753 : }
3754 : else
3755 0 : errcontext("while scanning relation \"%s.%s\"",
3756 : errinfo->relnamespace, errinfo->relname);
3757 0 : break;
3758 :
3759 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3760 0 : if (BlockNumberIsValid(errinfo->blkno))
3761 : {
3762 0 : if (OffsetNumberIsValid(errinfo->offnum))
3763 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3764 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3765 : else
3766 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3767 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3768 : }
3769 : else
3770 0 : errcontext("while vacuuming relation \"%s.%s\"",
3771 : errinfo->relnamespace, errinfo->relname);
3772 0 : break;
3773 :
3774 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3775 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3776 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3777 0 : break;
3778 :
3779 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3780 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3781 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3782 0 : break;
3783 :
3784 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3785 6 : if (BlockNumberIsValid(errinfo->blkno))
3786 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3787 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3788 6 : break;
3789 :
3790 79200 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3791 : default:
3792 79200 : return; /* do nothing; the errinfo may not be
3793 : * initialized */
3794 : }
3795 : }
3796 :
3797 : /*
3798 : * Updates the information required for vacuum error callback. This also saves
3799 : * the current information which can be later restored via restore_vacuum_error_info.
3800 : */
3801 : static void
3802 930116 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3803 : int phase, BlockNumber blkno, OffsetNumber offnum)
3804 : {
3805 930116 : if (saved_vacrel)
3806 : {
3807 170628 : saved_vacrel->offnum = vacrel->offnum;
3808 170628 : saved_vacrel->blkno = vacrel->blkno;
3809 170628 : saved_vacrel->phase = vacrel->phase;
3810 : }
3811 :
3812 930116 : vacrel->blkno = blkno;
3813 930116 : vacrel->offnum = offnum;
3814 930116 : vacrel->phase = phase;
3815 930116 : }
3816 :
3817 : /*
3818 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3819 : */
3820 : static void
3821 170628 : restore_vacuum_error_info(LVRelState *vacrel,
3822 : const LVSavedErrInfo *saved_vacrel)
3823 : {
3824 170628 : vacrel->blkno = saved_vacrel->blkno;
3825 170628 : vacrel->offnum = saved_vacrel->offnum;
3826 170628 : vacrel->phase = saved_vacrel->phase;
3827 170628 : }
|