Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include "access/genam.h"
133 : #include "access/heapam.h"
134 : #include "access/htup_details.h"
135 : #include "access/multixact.h"
136 : #include "access/tidstore.h"
137 : #include "access/transam.h"
138 : #include "access/visibilitymap.h"
139 : #include "access/xloginsert.h"
140 : #include "catalog/storage.h"
141 : #include "commands/progress.h"
142 : #include "commands/vacuum.h"
143 : #include "common/int.h"
144 : #include "common/pg_prng.h"
145 : #include "executor/instrument.h"
146 : #include "miscadmin.h"
147 : #include "pgstat.h"
148 : #include "portability/instr_time.h"
149 : #include "postmaster/autovacuum.h"
150 : #include "storage/bufmgr.h"
151 : #include "storage/freespace.h"
152 : #include "storage/latch.h"
153 : #include "storage/lmgr.h"
154 : #include "storage/read_stream.h"
155 : #include "utils/lsyscache.h"
156 : #include "utils/pg_rusage.h"
157 : #include "utils/timestamp.h"
158 : #include "utils/wait_event.h"
159 :
160 :
161 : /*
162 : * Space/time tradeoff parameters: do these need to be user-tunable?
163 : *
164 : * To consider truncating the relation, we want there to be at least
165 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
166 : * is less) potentially-freeable pages.
167 : */
168 : #define REL_TRUNCATE_MINIMUM 1000
169 : #define REL_TRUNCATE_FRACTION 16
170 :
171 : /*
172 : * Timing parameters for truncate locking heuristics.
173 : *
174 : * These were not exposed as user tunable GUC values because it didn't seem
175 : * that the potential for improvement was great enough to merit the cost of
176 : * supporting them.
177 : */
178 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
179 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
181 :
182 : /*
183 : * Threshold that controls whether we bypass index vacuuming and heap
184 : * vacuuming as an optimization
185 : */
186 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
187 :
188 : /*
189 : * Perform a failsafe check each time we scan another 4GB of pages.
190 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
191 : */
192 : #define FAILSAFE_EVERY_PAGES \
193 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
194 :
195 : /*
196 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
197 : * (it won't be exact because we only vacuum FSM after processing a heap page
198 : * that has some removable tuples). When there are indexes, this is ignored,
199 : * and we vacuum FSM after each index/heap cleaning pass.
200 : */
201 : #define VACUUM_FSM_EVERY_PAGES \
202 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
203 :
204 : /*
205 : * Before we consider skipping a page that's marked as clean in
206 : * visibility map, we must've seen at least this many clean pages.
207 : */
208 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
209 :
210 : /*
211 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
212 : * Needs to be a power of 2.
213 : */
214 : #define PREFETCH_SIZE ((BlockNumber) 32)
215 :
216 : /*
217 : * Macro to check if we are in a parallel vacuum. If true, we are in the
218 : * parallel mode and the DSM segment is initialized.
219 : */
220 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
221 :
222 : /* Phases of vacuum during which we report error context. */
223 : typedef enum
224 : {
225 : VACUUM_ERRCB_PHASE_UNKNOWN,
226 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
227 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
228 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
229 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
230 : VACUUM_ERRCB_PHASE_TRUNCATE,
231 : } VacErrPhase;
232 :
233 : /*
234 : * An eager scan of a page that is set all-frozen in the VM is considered
235 : * "successful". To spread out freezing overhead across multiple normal
236 : * vacuums, we limit the number of successful eager page freezes. The maximum
237 : * number of eager page freezes is calculated as a ratio of the all-visible
238 : * but not all-frozen pages at the beginning of the vacuum.
239 : */
240 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
241 :
242 : /*
243 : * On the assumption that different regions of the table tend to have
244 : * similarly aged data, once vacuum fails to freeze
245 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
246 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
247 : * to another region of the table with potentially older data.
248 : */
249 : #define EAGER_SCAN_REGION_SIZE 4096
250 :
251 : typedef struct LVRelState
252 : {
253 : /* Target heap relation and its indexes */
254 : Relation rel;
255 : Relation *indrels;
256 : int nindexes;
257 :
258 : /* Buffer access strategy and parallel vacuum state */
259 : BufferAccessStrategy bstrategy;
260 : ParallelVacuumState *pvs;
261 :
262 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
263 : bool aggressive;
264 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
265 : bool skipwithvm;
266 : /* Consider index vacuuming bypass optimization? */
267 : bool consider_bypass_optimization;
268 :
269 : /* Doing index vacuuming, index cleanup, rel truncation? */
270 : bool do_index_vacuuming;
271 : bool do_index_cleanup;
272 : bool do_rel_truncate;
273 :
274 : /* VACUUM operation's cutoffs for freezing and pruning */
275 : struct VacuumCutoffs cutoffs;
276 : GlobalVisState *vistest;
277 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
278 : TransactionId NewRelfrozenXid;
279 : MultiXactId NewRelminMxid;
280 : bool skippedallvis;
281 :
282 : /* Error reporting state */
283 : char *dbname;
284 : char *relnamespace;
285 : char *relname;
286 : char *indname; /* Current index name */
287 : BlockNumber blkno; /* used only for heap operations */
288 : OffsetNumber offnum; /* used only for heap operations */
289 : VacErrPhase phase;
290 : bool verbose; /* VACUUM VERBOSE? */
291 :
292 : /*
293 : * dead_items stores TIDs whose index tuples are deleted by index
294 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
295 : * that has been processed by lazy_scan_prune. Also needed by
296 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
297 : * LP_UNUSED during second heap pass.
298 : *
299 : * Both dead_items and dead_items_info are allocated in shared memory in
300 : * parallel vacuum cases.
301 : */
302 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
303 : VacDeadItemsInfo *dead_items_info;
304 :
305 : BlockNumber rel_pages; /* total number of pages */
306 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
307 :
308 : /*
309 : * Count of all-visible blocks eagerly scanned (for logging only). This
310 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
311 : */
312 : BlockNumber eager_scanned_pages;
313 :
314 : BlockNumber removed_pages; /* # pages removed by relation truncation */
315 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
316 :
317 : /* # pages newly set all-visible in the VM */
318 : BlockNumber new_all_visible_pages;
319 :
320 : /*
321 : * # pages newly set all-visible and all-frozen in the VM. This is a
322 : * subset of new_all_visible_pages. That is, new_all_visible_pages
323 : * includes all pages set all-visible, but
324 : * new_all_visible_all_frozen_pages includes only those which were also
325 : * set all-frozen.
326 : */
327 : BlockNumber new_all_visible_all_frozen_pages;
328 :
329 : /* # all-visible pages newly set all-frozen in the VM */
330 : BlockNumber new_all_frozen_pages;
331 :
332 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
333 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
334 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
335 :
336 : /* Statistics output by us, for table */
337 : double new_rel_tuples; /* new estimated total # of tuples */
338 : double new_live_tuples; /* new estimated total # of live tuples */
339 : /* Statistics output by index AMs */
340 : IndexBulkDeleteResult **indstats;
341 :
342 : /* Instrumentation counters */
343 : int num_index_scans;
344 : int num_dead_items_resets;
345 : Size total_dead_items_bytes;
346 :
347 : /*
348 : * Total number of planned and actually launched parallel workers for
349 : * index vacuuming and index cleanup.
350 : */
351 : PVWorkerUsage worker_usage;
352 :
353 : /* Counters that follow are only for scanned_pages */
354 : int64 tuples_deleted; /* # deleted from table */
355 : int64 tuples_frozen; /* # newly frozen */
356 : int64 lpdead_items; /* # deleted from indexes */
357 : int64 live_tuples; /* # live tuples remaining */
358 : int64 recently_dead_tuples; /* # dead, but not yet removable */
359 : int64 missed_dead_tuples; /* # removable, but not removed */
360 :
361 : /* State maintained by heap_vac_scan_next_block() */
362 : BlockNumber current_block; /* last block returned */
363 : BlockNumber next_unskippable_block; /* next unskippable block */
364 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
365 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
366 :
367 : /* State related to managing eager scanning of all-visible pages */
368 :
369 : /*
370 : * A normal vacuum that has failed to freeze too many eagerly scanned
371 : * blocks in a region suspends eager scanning.
372 : * next_eager_scan_region_start is the block number of the first block
373 : * eligible for resumed eager scanning.
374 : *
375 : * When eager scanning is permanently disabled, either initially
376 : * (including for aggressive vacuum) or due to hitting the success cap,
377 : * this is set to InvalidBlockNumber.
378 : */
379 : BlockNumber next_eager_scan_region_start;
380 :
381 : /*
382 : * The remaining number of blocks a normal vacuum will consider eager
383 : * scanning when it is successful. When eager scanning is enabled, this is
384 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
385 : * all-visible but not all-frozen pages. For each eager freeze success,
386 : * this is decremented. Once it hits 0, eager scanning is permanently
387 : * disabled. It is initialized to 0 if eager scanning starts out disabled
388 : * (including for aggressive vacuum).
389 : */
390 : BlockNumber eager_scan_remaining_successes;
391 :
392 : /*
393 : * The maximum number of blocks which may be eagerly scanned and not
394 : * frozen before eager scanning is temporarily suspended. This is
395 : * configurable both globally, via the
396 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
397 : * storage parameter of the same name. It is calculated as
398 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
399 : * It is 0 when eager scanning is disabled.
400 : */
401 : BlockNumber eager_scan_max_fails_per_region;
402 :
403 : /*
404 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
405 : * age) in the current eager scan region. Vacuum resets it to
406 : * eager_scan_max_fails_per_region each time it enters a new region of the
407 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
408 : * suspended until the next region. It is also 0 if eager scanning has
409 : * been permanently disabled.
410 : */
411 : BlockNumber eager_scan_remaining_fails;
412 : } LVRelState;
413 :
414 :
415 : /* Struct for saving and restoring vacuum error information. */
416 : typedef struct LVSavedErrInfo
417 : {
418 : BlockNumber blkno;
419 : OffsetNumber offnum;
420 : VacErrPhase phase;
421 : } LVSavedErrInfo;
422 :
423 :
424 : /* non-export function prototypes */
425 : static void lazy_scan_heap(LVRelState *vacrel);
426 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
427 : const VacuumParams params);
428 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
429 : void *callback_private_data,
430 : void *per_buffer_data);
431 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
432 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
433 : BlockNumber blkno, Page page,
434 : bool sharelock, Buffer vmbuffer);
435 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
436 : BlockNumber blkno, Page page,
437 : Buffer vmbuffer,
438 : bool *has_lpdead_items, bool *vm_page_frozen);
439 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
440 : BlockNumber blkno, Page page,
441 : bool *has_lpdead_items);
442 : static void lazy_vacuum(LVRelState *vacrel);
443 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
444 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
445 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
446 : Buffer buffer, OffsetNumber *deadoffsets,
447 : int num_offsets, Buffer vmbuffer);
448 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
449 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
450 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
451 : IndexBulkDeleteResult *istat,
452 : double reltuples,
453 : LVRelState *vacrel);
454 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
455 : IndexBulkDeleteResult *istat,
456 : double reltuples,
457 : bool estimated_count,
458 : LVRelState *vacrel);
459 : static bool should_attempt_truncation(LVRelState *vacrel);
460 : static void lazy_truncate_heap(LVRelState *vacrel);
461 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
462 : bool *lock_waiter_detected);
463 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
464 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
465 : int num_offsets);
466 : static void dead_items_reset(LVRelState *vacrel);
467 : static void dead_items_cleanup(LVRelState *vacrel);
468 :
469 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
470 : GlobalVisState *vistest,
471 : bool allow_update_vistest,
472 : OffsetNumber *deadoffsets,
473 : int ndeadoffsets,
474 : bool *all_frozen,
475 : TransactionId *newest_live_xid,
476 : OffsetNumber *logging_offnum);
477 : static void update_relstats_all_indexes(LVRelState *vacrel);
478 : static void vacuum_error_callback(void *arg);
479 : static void update_vacuum_error_info(LVRelState *vacrel,
480 : LVSavedErrInfo *saved_vacrel,
481 : int phase, BlockNumber blkno,
482 : OffsetNumber offnum);
483 : static void restore_vacuum_error_info(LVRelState *vacrel,
484 : const LVSavedErrInfo *saved_vacrel);
485 :
486 :
487 :
488 : /*
489 : * Helper to set up the eager scanning state for vacuuming a single relation.
490 : * Initializes the eager scan management related members of the LVRelState.
491 : *
492 : * Caller provides whether or not an aggressive vacuum is required due to
493 : * vacuum options or for relfrozenxid/relminmxid advancement.
494 : */
495 : static void
496 115781 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
497 : {
498 : uint32 randseed;
499 : BlockNumber allvisible;
500 : BlockNumber allfrozen;
501 : float first_region_ratio;
502 115781 : bool oldest_unfrozen_before_cutoff = false;
503 :
504 : /*
505 : * Initialize eager scan management fields to their disabled values.
506 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
507 : * of tables without sufficiently old tuples disable eager scanning.
508 : */
509 115781 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
510 115781 : vacrel->eager_scan_max_fails_per_region = 0;
511 115781 : vacrel->eager_scan_remaining_fails = 0;
512 115781 : vacrel->eager_scan_remaining_successes = 0;
513 :
514 : /* If eager scanning is explicitly disabled, just return. */
515 115781 : if (params.max_eager_freeze_failure_rate == 0)
516 115781 : return;
517 :
518 : /*
519 : * The caller will have determined whether or not an aggressive vacuum is
520 : * required by either the vacuum parameters or the relative age of the
521 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
522 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
523 : * so scans of all-visible pages are not considered eager.
524 : */
525 115781 : if (vacrel->aggressive)
526 108918 : return;
527 :
528 : /*
529 : * Aggressively vacuuming a small relation shouldn't take long, so it
530 : * isn't worth amortizing. We use two times the region size as the size
531 : * cutoff because the eager scan start block is a random spot somewhere in
532 : * the first region, making the second region the first to be eager
533 : * scanned normally.
534 : */
535 6863 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
536 6863 : return;
537 :
538 : /*
539 : * We only want to enable eager scanning if we are likely to be able to
540 : * freeze some of the pages in the relation.
541 : *
542 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
543 : * are technically freezable, but we won't freeze them unless the criteria
544 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
545 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
546 : *
547 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
548 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
549 : * enable eager scanning.
550 : */
551 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
552 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
553 : vacrel->cutoffs.FreezeLimit))
554 0 : oldest_unfrozen_before_cutoff = true;
555 :
556 0 : if (!oldest_unfrozen_before_cutoff &&
557 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
558 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
559 : vacrel->cutoffs.MultiXactCutoff))
560 0 : oldest_unfrozen_before_cutoff = true;
561 :
562 0 : if (!oldest_unfrozen_before_cutoff)
563 0 : return;
564 :
565 : /* We have met the criteria to eagerly scan some pages. */
566 :
567 : /*
568 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
569 : * all-visible but not all-frozen blocks in the relation.
570 : */
571 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
572 :
573 0 : vacrel->eager_scan_remaining_successes =
574 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
575 0 : (allvisible - allfrozen));
576 :
577 : /* If every all-visible page is frozen, eager scanning is disabled. */
578 0 : if (vacrel->eager_scan_remaining_successes == 0)
579 0 : return;
580 :
581 : /*
582 : * Now calculate the bounds of the first eager scan region. Its end block
583 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
584 : * blocks. This affects the bounds of all subsequent regions and avoids
585 : * eager scanning and failing to freeze the same blocks each vacuum of the
586 : * relation.
587 : */
588 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
589 :
590 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
591 :
592 : Assert(params.max_eager_freeze_failure_rate > 0 &&
593 : params.max_eager_freeze_failure_rate <= 1);
594 :
595 0 : vacrel->eager_scan_max_fails_per_region =
596 0 : params.max_eager_freeze_failure_rate *
597 : EAGER_SCAN_REGION_SIZE;
598 :
599 : /*
600 : * The first region will be smaller than subsequent regions. As such,
601 : * adjust the eager freeze failures tolerated for this region.
602 : */
603 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
604 : EAGER_SCAN_REGION_SIZE;
605 :
606 0 : vacrel->eager_scan_remaining_fails =
607 0 : vacrel->eager_scan_max_fails_per_region *
608 : first_region_ratio;
609 : }
610 :
611 : /*
612 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
613 : *
614 : * This routine sets things up for and then calls lazy_scan_heap, where
615 : * almost all work actually takes place. Finalizes everything after call
616 : * returns by managing relation truncation and updating rel's pg_class
617 : * entry. (Also updates pg_class entries for any indexes that need it.)
618 : *
619 : * At entry, we have already established a transaction and opened
620 : * and locked the relation.
621 : */
622 : void
623 115781 : heap_vacuum_rel(Relation rel, const VacuumParams params,
624 : BufferAccessStrategy bstrategy)
625 : {
626 : LVRelState *vacrel;
627 : bool verbose,
628 : instrument,
629 : skipwithvm,
630 : frozenxid_updated,
631 : minmulti_updated;
632 : BlockNumber orig_rel_pages,
633 : new_rel_pages,
634 : new_rel_allvisible,
635 : new_rel_allfrozen;
636 : PGRUsage ru0;
637 115781 : TimestampTz starttime = 0;
638 115781 : PgStat_Counter startreadtime = 0,
639 115781 : startwritetime = 0;
640 115781 : WalUsage startwalusage = pgWalUsage;
641 115781 : BufferUsage startbufferusage = pgBufferUsage;
642 : ErrorContextCallback errcallback;
643 115781 : char **indnames = NULL;
644 115781 : Size dead_items_max_bytes = 0;
645 :
646 115781 : verbose = (params.options & VACOPT_VERBOSE) != 0;
647 215789 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
648 100008 : params.log_vacuum_min_duration >= 0));
649 115781 : if (instrument)
650 : {
651 100021 : pg_rusage_init(&ru0);
652 100021 : if (track_io_timing)
653 : {
654 0 : startreadtime = pgStatBlockReadTime;
655 0 : startwritetime = pgStatBlockWriteTime;
656 : }
657 : }
658 :
659 : /* Used for instrumentation and stats report */
660 115781 : starttime = GetCurrentTimestamp();
661 :
662 115781 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
663 : RelationGetRelid(rel));
664 115781 : if (AmAutoVacuumWorkerProcess())
665 100008 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
666 100008 : params.is_wraparound
667 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
668 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
669 : else
670 15773 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
671 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
672 :
673 : /*
674 : * Setup error traceback support for ereport() first. The idea is to set
675 : * up an error context callback to display additional information on any
676 : * error during a vacuum. During different phases of vacuum, we update
677 : * the state so that the error context callback always display current
678 : * information.
679 : *
680 : * Copy the names of heap rel into local memory for error reporting
681 : * purposes, too. It isn't always safe to assume that we can get the name
682 : * of each rel. It's convenient for code in lazy_scan_heap to always use
683 : * these temp copies.
684 : */
685 115781 : vacrel = palloc0_object(LVRelState);
686 115781 : vacrel->dbname = get_database_name(MyDatabaseId);
687 115781 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
688 115781 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
689 115781 : vacrel->indname = NULL;
690 115781 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
691 115781 : vacrel->verbose = verbose;
692 115781 : errcallback.callback = vacuum_error_callback;
693 115781 : errcallback.arg = vacrel;
694 115781 : errcallback.previous = error_context_stack;
695 115781 : error_context_stack = &errcallback;
696 :
697 : /* Set up high level stuff about rel and its indexes */
698 115781 : vacrel->rel = rel;
699 115781 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
700 : &vacrel->indrels);
701 115781 : vacrel->bstrategy = bstrategy;
702 115781 : if (instrument && vacrel->nindexes > 0)
703 : {
704 : /* Copy index names used by instrumentation (not error reporting) */
705 95898 : indnames = palloc_array(char *, vacrel->nindexes);
706 248775 : for (int i = 0; i < vacrel->nindexes; i++)
707 152877 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
708 : }
709 :
710 : /*
711 : * The index_cleanup param either disables index vacuuming and cleanup or
712 : * forces it to go ahead when we would otherwise apply the index bypass
713 : * optimization. The default is 'auto', which leaves the final decision
714 : * up to lazy_vacuum().
715 : *
716 : * The truncate param allows user to avoid attempting relation truncation,
717 : * though it can't force truncation to happen.
718 : */
719 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
720 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
721 : params.truncate != VACOPTVALUE_AUTO);
722 :
723 : /*
724 : * While VacuumFailSafeActive is reset to false before calling this, we
725 : * still need to reset it here due to recursive calls.
726 : */
727 115781 : VacuumFailsafeActive = false;
728 115781 : vacrel->consider_bypass_optimization = true;
729 115781 : vacrel->do_index_vacuuming = true;
730 115781 : vacrel->do_index_cleanup = true;
731 115781 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
732 115781 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
733 : {
734 : /* Force disable index vacuuming up-front */
735 144 : vacrel->do_index_vacuuming = false;
736 144 : vacrel->do_index_cleanup = false;
737 : }
738 115637 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
739 : {
740 : /* Force index vacuuming. Note that failsafe can still bypass. */
741 19 : vacrel->consider_bypass_optimization = false;
742 : }
743 : else
744 : {
745 : /* Default/auto, make all decisions dynamically */
746 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
747 : }
748 :
749 : /* Initialize page counters explicitly (be tidy) */
750 115781 : vacrel->scanned_pages = 0;
751 115781 : vacrel->eager_scanned_pages = 0;
752 115781 : vacrel->removed_pages = 0;
753 115781 : vacrel->new_frozen_tuple_pages = 0;
754 115781 : vacrel->lpdead_item_pages = 0;
755 115781 : vacrel->missed_dead_pages = 0;
756 115781 : vacrel->nonempty_pages = 0;
757 : /* dead_items_alloc allocates vacrel->dead_items later on */
758 :
759 : /* Allocate/initialize output statistics state */
760 115781 : vacrel->new_rel_tuples = 0;
761 115781 : vacrel->new_live_tuples = 0;
762 115781 : vacrel->indstats = (IndexBulkDeleteResult **)
763 115781 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
764 :
765 : /* Initialize remaining counters (be tidy) */
766 115781 : vacrel->num_index_scans = 0;
767 115781 : vacrel->num_dead_items_resets = 0;
768 115781 : vacrel->total_dead_items_bytes = 0;
769 115781 : vacrel->tuples_deleted = 0;
770 115781 : vacrel->tuples_frozen = 0;
771 115781 : vacrel->lpdead_items = 0;
772 115781 : vacrel->live_tuples = 0;
773 115781 : vacrel->recently_dead_tuples = 0;
774 115781 : vacrel->missed_dead_tuples = 0;
775 :
776 115781 : vacrel->new_all_visible_pages = 0;
777 115781 : vacrel->new_all_visible_all_frozen_pages = 0;
778 115781 : vacrel->new_all_frozen_pages = 0;
779 :
780 115781 : vacrel->worker_usage.vacuum.nlaunched = 0;
781 115781 : vacrel->worker_usage.vacuum.nplanned = 0;
782 115781 : vacrel->worker_usage.cleanup.nlaunched = 0;
783 115781 : vacrel->worker_usage.cleanup.nplanned = 0;
784 :
785 : /*
786 : * Get cutoffs that determine which deleted tuples are considered DEAD,
787 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
788 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
789 : * happen in this order to ensure that the OldestXmin cutoff field works
790 : * as an upper bound on the XIDs stored in the pages we'll actually scan
791 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
792 : *
793 : * Next acquire vistest, a related cutoff that's used in pruning. We use
794 : * vistest in combination with OldestXmin to ensure that
795 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
796 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
797 : * whether a tuple should be frozen or removed. (In the future we might
798 : * want to teach lazy_scan_prune to recompute vistest from time to time,
799 : * to increase the number of dead tuples it can prune away.)
800 : */
801 115781 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
802 115781 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
803 115781 : vacrel->vistest = GlobalVisTestFor(rel);
804 :
805 : /* Initialize state used to track oldest extant XID/MXID */
806 115781 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
807 115781 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
808 :
809 : /*
810 : * Initialize state related to tracking all-visible page skipping. This is
811 : * very important to determine whether or not it is safe to advance the
812 : * relfrozenxid/relminmxid.
813 : */
814 115781 : vacrel->skippedallvis = false;
815 115781 : skipwithvm = true;
816 115781 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
817 : {
818 : /*
819 : * Force aggressive mode, and disable skipping blocks using the
820 : * visibility map (even those set all-frozen)
821 : */
822 194 : vacrel->aggressive = true;
823 194 : skipwithvm = false;
824 : }
825 :
826 115781 : vacrel->skipwithvm = skipwithvm;
827 :
828 : /*
829 : * Set up eager scan tracking state. This must happen after determining
830 : * whether or not the vacuum must be aggressive, because only normal
831 : * vacuums use the eager scan algorithm.
832 : */
833 115781 : heap_vacuum_eager_scan_setup(vacrel, params);
834 :
835 : /* Report the vacuum mode: 'normal' or 'aggressive' */
836 115781 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
837 115781 : vacrel->aggressive
838 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
839 : : PROGRESS_VACUUM_MODE_NORMAL);
840 :
841 115781 : if (verbose)
842 : {
843 13 : if (vacrel->aggressive)
844 1 : ereport(INFO,
845 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
846 : vacrel->dbname, vacrel->relnamespace,
847 : vacrel->relname)));
848 : else
849 12 : ereport(INFO,
850 : (errmsg("vacuuming \"%s.%s.%s\"",
851 : vacrel->dbname, vacrel->relnamespace,
852 : vacrel->relname)));
853 : }
854 :
855 : /*
856 : * Allocate dead_items memory using dead_items_alloc. This handles
857 : * parallel VACUUM initialization as part of allocating shared memory
858 : * space used for dead_items. (But do a failsafe precheck first, to
859 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
860 : * is already dangerously old.)
861 : */
862 115781 : lazy_check_wraparound_failsafe(vacrel);
863 115781 : dead_items_alloc(vacrel, params.nworkers);
864 :
865 : /*
866 : * Call lazy_scan_heap to perform all required heap pruning, index
867 : * vacuuming, and heap vacuuming (plus related processing)
868 : */
869 115781 : lazy_scan_heap(vacrel);
870 :
871 : /*
872 : * Save dead items max_bytes and update the memory usage statistics before
873 : * cleanup, they are freed in parallel vacuum cases during
874 : * dead_items_cleanup().
875 : */
876 115780 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
877 115780 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
878 :
879 : /*
880 : * Free resources managed by dead_items_alloc. This ends parallel mode in
881 : * passing when necessary.
882 : */
883 115780 : dead_items_cleanup(vacrel);
884 : Assert(!IsInParallelMode());
885 :
886 : /*
887 : * Update pg_class entries for each of rel's indexes where appropriate.
888 : *
889 : * Unlike the later update to rel's pg_class entry, this is not critical.
890 : * Maintains relpages/reltuples statistics used by the planner only.
891 : */
892 115780 : if (vacrel->do_index_cleanup)
893 91649 : update_relstats_all_indexes(vacrel);
894 :
895 : /* Done with rel's indexes */
896 115780 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
897 :
898 : /* Optionally truncate rel */
899 115780 : if (should_attempt_truncation(vacrel))
900 203 : lazy_truncate_heap(vacrel);
901 :
902 : /* Pop the error context stack */
903 115780 : error_context_stack = errcallback.previous;
904 :
905 : /* Report that we are now doing final cleanup */
906 115780 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
907 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
908 :
909 : /*
910 : * Prepare to update rel's pg_class entry.
911 : *
912 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
913 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
914 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
915 : */
916 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
917 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
918 : vacrel->cutoffs.relfrozenxid,
919 : vacrel->NewRelfrozenXid));
920 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
921 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
922 : vacrel->cutoffs.relminmxid,
923 : vacrel->NewRelminMxid));
924 115780 : if (vacrel->skippedallvis)
925 : {
926 : /*
927 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
928 : * chose to skip an all-visible page range. The state that tracks new
929 : * values will have missed unfrozen XIDs from the pages we skipped.
930 : */
931 : Assert(!vacrel->aggressive);
932 34 : vacrel->NewRelfrozenXid = InvalidTransactionId;
933 34 : vacrel->NewRelminMxid = InvalidMultiXactId;
934 : }
935 :
936 : /*
937 : * For safety, clamp relallvisible to be not more than what we're setting
938 : * pg_class.relpages to
939 : */
940 115780 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
941 115780 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
942 115780 : if (new_rel_allvisible > new_rel_pages)
943 0 : new_rel_allvisible = new_rel_pages;
944 :
945 : /*
946 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
947 : * all-frozen blocks to the count of all-visible blocks. This matches the
948 : * clamping of relallvisible above.
949 : */
950 115780 : if (new_rel_allfrozen > new_rel_allvisible)
951 0 : new_rel_allfrozen = new_rel_allvisible;
952 :
953 : /*
954 : * Now actually update rel's pg_class entry.
955 : *
956 : * In principle new_live_tuples could be -1 indicating that we (still)
957 : * don't know the tuple count. In practice that can't happen, since we
958 : * scan every page that isn't skipped using the visibility map.
959 : */
960 115780 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
961 : new_rel_allvisible, new_rel_allfrozen,
962 115780 : vacrel->nindexes > 0,
963 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
964 : &frozenxid_updated, &minmulti_updated, false);
965 :
966 : /*
967 : * Report results to the cumulative stats system, too.
968 : *
969 : * Deliberately avoid telling the stats system about LP_DEAD items that
970 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
971 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
972 : * It seems like a good idea to err on the side of not vacuuming again too
973 : * soon in cases where the failsafe prevented significant amounts of heap
974 : * vacuuming.
975 : */
976 115780 : pgstat_report_vacuum(rel,
977 43618 : Max(vacrel->new_live_tuples, 0),
978 115780 : vacrel->recently_dead_tuples +
979 115780 : vacrel->missed_dead_tuples,
980 : starttime);
981 115780 : pgstat_progress_end_command();
982 :
983 115780 : if (instrument)
984 : {
985 100020 : TimestampTz endtime = GetCurrentTimestamp();
986 :
987 100177 : if (verbose || params.log_vacuum_min_duration == 0 ||
988 157 : TimestampDifferenceExceeds(starttime, endtime,
989 157 : params.log_vacuum_min_duration))
990 : {
991 : long secs_dur;
992 : int usecs_dur;
993 : WalUsage walusage;
994 : BufferUsage bufferusage;
995 : StringInfoData buf;
996 : char *msgfmt;
997 : int32 diff;
998 99863 : double read_rate = 0,
999 99863 : write_rate = 0;
1000 : int64 total_blks_hit;
1001 : int64 total_blks_read;
1002 : int64 total_blks_dirtied;
1003 :
1004 99863 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1005 99863 : memset(&walusage, 0, sizeof(WalUsage));
1006 99863 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1007 99863 : memset(&bufferusage, 0, sizeof(BufferUsage));
1008 99863 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1009 :
1010 99863 : total_blks_hit = bufferusage.shared_blks_hit +
1011 99863 : bufferusage.local_blks_hit;
1012 99863 : total_blks_read = bufferusage.shared_blks_read +
1013 99863 : bufferusage.local_blks_read;
1014 99863 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1015 99863 : bufferusage.local_blks_dirtied;
1016 :
1017 99863 : initStringInfo(&buf);
1018 99863 : if (verbose)
1019 : {
1020 : /*
1021 : * Aggressiveness already reported earlier, in dedicated
1022 : * VACUUM VERBOSE ereport
1023 : */
1024 : Assert(!params.is_wraparound);
1025 13 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1026 : }
1027 99850 : else if (params.is_wraparound)
1028 : {
1029 : /*
1030 : * While it's possible for a VACUUM to be both is_wraparound
1031 : * and !aggressive, that's just a corner-case -- is_wraparound
1032 : * implies aggressive. Produce distinct output for the corner
1033 : * case all the same, just in case.
1034 : */
1035 99833 : if (vacrel->aggressive)
1036 99823 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 : else
1038 10 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1039 : }
1040 : else
1041 : {
1042 17 : if (vacrel->aggressive)
1043 13 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 : else
1045 4 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1046 : }
1047 99863 : appendStringInfo(&buf, msgfmt,
1048 : vacrel->dbname,
1049 : vacrel->relnamespace,
1050 : vacrel->relname,
1051 : vacrel->num_index_scans);
1052 137167 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1053 : vacrel->removed_pages,
1054 : new_rel_pages,
1055 : vacrel->scanned_pages,
1056 : orig_rel_pages == 0 ? 100.0 :
1057 37304 : 100.0 * vacrel->scanned_pages /
1058 : orig_rel_pages,
1059 : vacrel->eager_scanned_pages);
1060 99863 : appendStringInfo(&buf,
1061 99863 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1062 : vacrel->tuples_deleted,
1063 99863 : (int64) vacrel->new_rel_tuples,
1064 : vacrel->recently_dead_tuples);
1065 99863 : if (vacrel->missed_dead_tuples > 0)
1066 0 : appendStringInfo(&buf,
1067 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1068 : vacrel->missed_dead_tuples,
1069 : vacrel->missed_dead_pages);
1070 99863 : diff = (int32) (ReadNextTransactionId() -
1071 99863 : vacrel->cutoffs.OldestXmin);
1072 99863 : appendStringInfo(&buf,
1073 99863 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1074 : vacrel->cutoffs.OldestXmin, diff);
1075 99863 : if (frozenxid_updated)
1076 : {
1077 18482 : diff = (int32) (vacrel->NewRelfrozenXid -
1078 18482 : vacrel->cutoffs.relfrozenxid);
1079 18482 : appendStringInfo(&buf,
1080 18482 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1081 : vacrel->NewRelfrozenXid, diff);
1082 : }
1083 99863 : if (minmulti_updated)
1084 : {
1085 7 : diff = (int32) (vacrel->NewRelminMxid -
1086 7 : vacrel->cutoffs.relminmxid);
1087 7 : appendStringInfo(&buf,
1088 7 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1089 : vacrel->NewRelminMxid, diff);
1090 : }
1091 137167 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1092 : vacrel->new_frozen_tuple_pages,
1093 : orig_rel_pages == 0 ? 100.0 :
1094 37304 : 100.0 * vacrel->new_frozen_tuple_pages /
1095 : orig_rel_pages,
1096 : vacrel->tuples_frozen);
1097 :
1098 99863 : appendStringInfo(&buf,
1099 99863 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1100 : vacrel->new_all_visible_pages,
1101 99863 : vacrel->new_all_visible_all_frozen_pages +
1102 99863 : vacrel->new_all_frozen_pages,
1103 : vacrel->new_all_frozen_pages);
1104 99863 : if (vacrel->do_index_vacuuming)
1105 : {
1106 75992 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1107 75977 : appendStringInfoString(&buf, _("index scan not needed: "));
1108 : else
1109 15 : appendStringInfoString(&buf, _("index scan needed: "));
1110 :
1111 75992 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1112 : }
1113 : else
1114 : {
1115 23871 : if (!VacuumFailsafeActive)
1116 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1117 : else
1118 23871 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1119 :
1120 23871 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1121 : }
1122 137167 : appendStringInfo(&buf, msgfmt,
1123 : vacrel->lpdead_item_pages,
1124 : orig_rel_pages == 0 ? 100.0 :
1125 37304 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1126 : vacrel->lpdead_items);
1127 :
1128 99863 : if (vacrel->worker_usage.vacuum.nplanned > 0)
1129 0 : appendStringInfo(&buf,
1130 0 : _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1131 : vacrel->worker_usage.vacuum.nplanned,
1132 : vacrel->worker_usage.vacuum.nlaunched);
1133 :
1134 99863 : if (vacrel->worker_usage.cleanup.nplanned > 0)
1135 0 : appendStringInfo(&buf,
1136 0 : _("parallel workers: index cleanup: %d planned, %d launched\n"),
1137 : vacrel->worker_usage.cleanup.nplanned,
1138 : vacrel->worker_usage.cleanup.nlaunched);
1139 :
1140 252451 : for (int i = 0; i < vacrel->nindexes; i++)
1141 : {
1142 152588 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1143 :
1144 152588 : if (!istat)
1145 152563 : continue;
1146 :
1147 25 : appendStringInfo(&buf,
1148 25 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1149 25 : indnames[i],
1150 : istat->num_pages,
1151 : istat->pages_newly_deleted,
1152 : istat->pages_deleted,
1153 : istat->pages_free);
1154 : }
1155 99863 : if (track_cost_delay_timing)
1156 : {
1157 : /*
1158 : * We bypass the changecount mechanism because this value is
1159 : * only updated by the calling process. We also rely on the
1160 : * above call to pgstat_progress_end_command() to not clear
1161 : * the st_progress_param array.
1162 : */
1163 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1164 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1165 : }
1166 99863 : if (track_io_timing)
1167 : {
1168 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1169 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1170 :
1171 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1172 : read_ms, write_ms);
1173 : }
1174 99863 : if (secs_dur > 0 || usecs_dur > 0)
1175 : {
1176 99863 : read_rate = (double) BLCKSZ * total_blks_read /
1177 99863 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1178 99863 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1179 99863 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1180 : }
1181 99863 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1182 : read_rate, write_rate);
1183 99863 : appendStringInfo(&buf,
1184 99863 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1185 : total_blks_hit,
1186 : total_blks_read,
1187 : total_blks_dirtied);
1188 99863 : appendStringInfo(&buf,
1189 99863 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1190 : walusage.wal_records,
1191 : walusage.wal_fpi,
1192 : walusage.wal_bytes,
1193 : walusage.wal_fpi_bytes,
1194 : walusage.wal_buffers_full);
1195 :
1196 : /*
1197 : * Report the dead items memory usage.
1198 : *
1199 : * The num_dead_items_resets counter increases when we reset the
1200 : * collected dead items, so the counter is non-zero if at least
1201 : * one dead items are collected, even if index vacuuming is
1202 : * disabled.
1203 : */
1204 99863 : appendStringInfo(&buf,
1205 99863 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1206 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1207 99863 : vacrel->num_dead_items_resets),
1208 99863 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1209 : vacrel->num_dead_items_resets,
1210 99863 : (double) dead_items_max_bytes / (1024 * 1024));
1211 99863 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1212 :
1213 99863 : ereport(verbose ? INFO : LOG,
1214 : (errmsg_internal("%s", buf.data)));
1215 99863 : pfree(buf.data);
1216 : }
1217 : }
1218 :
1219 : /* Cleanup index statistics and index names */
1220 291331 : for (int i = 0; i < vacrel->nindexes; i++)
1221 : {
1222 175551 : if (vacrel->indstats[i])
1223 1657 : pfree(vacrel->indstats[i]);
1224 :
1225 175551 : if (instrument)
1226 152875 : pfree(indnames[i]);
1227 : }
1228 115780 : }
1229 :
1230 : /*
1231 : * lazy_scan_heap() -- workhorse function for VACUUM
1232 : *
1233 : * This routine prunes each page in the heap, and considers the need to
1234 : * freeze remaining tuples with storage (not including pages that can be
1235 : * skipped using the visibility map). Also performs related maintenance
1236 : * of the FSM and visibility map. These steps all take place during an
1237 : * initial pass over the target heap relation.
1238 : *
1239 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1240 : * consists of deleting index tuples that point to LP_DEAD items left in
1241 : * heap pages following pruning. Earlier initial pass over the heap will
1242 : * have collected the TIDs whose index tuples need to be removed.
1243 : *
1244 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1245 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1246 : * as LP_UNUSED. This has to happen in a second, final pass over the
1247 : * heap, to preserve a basic invariant that all index AMs rely on: no
1248 : * extant index tuple can ever be allowed to contain a TID that points to
1249 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1250 : * recycling of line pointers to avoid index scans that get confused
1251 : * about which TID points to which tuple immediately after recycling.
1252 : * (Actually, this isn't a concern when target heap relation happens to
1253 : * have no indexes, which allows us to safely apply the one-pass strategy
1254 : * as an optimization).
1255 : *
1256 : * In practice we often have enough space to fit all TIDs, and so won't
1257 : * need to call lazy_vacuum more than once, after our initial pass over
1258 : * the heap has totally finished. Otherwise things are slightly more
1259 : * complicated: our "initial pass" over the heap applies only to those
1260 : * pages that were pruned before we needed to call lazy_vacuum, and our
1261 : * "final pass" over the heap only vacuums these same heap pages.
1262 : * However, we process indexes in full every time lazy_vacuum is called,
1263 : * which makes index processing very inefficient when memory is in short
1264 : * supply.
1265 : */
1266 : static void
1267 115895 : lazy_scan_heap(LVRelState *vacrel)
1268 : {
1269 : ReadStream *stream;
1270 115895 : BlockNumber rel_pages = vacrel->rel_pages,
1271 115895 : blkno = 0,
1272 115895 : next_fsm_block_to_vacuum = 0;
1273 115895 : BlockNumber orig_eager_scan_success_limit =
1274 : vacrel->eager_scan_remaining_successes; /* for logging */
1275 115895 : Buffer vmbuffer = InvalidBuffer;
1276 115895 : const int initprog_index[] = {
1277 : PROGRESS_VACUUM_PHASE,
1278 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1279 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1280 : };
1281 : int64 initprog_val[3];
1282 :
1283 : /* Report that we're scanning the heap, advertising total # of blocks */
1284 115895 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1285 115895 : initprog_val[1] = rel_pages;
1286 115895 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1287 115895 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1288 :
1289 : /* Initialize for the first heap_vac_scan_next_block() call */
1290 115895 : vacrel->current_block = InvalidBlockNumber;
1291 115895 : vacrel->next_unskippable_block = InvalidBlockNumber;
1292 115895 : vacrel->next_unskippable_eager_scanned = false;
1293 115895 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1294 :
1295 : /*
1296 : * Set up the read stream for vacuum's first pass through the heap.
1297 : *
1298 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1299 : * explicit work in heap_vac_scan_next_block.
1300 : */
1301 115895 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1302 : vacrel->bstrategy,
1303 : vacrel->rel,
1304 : MAIN_FORKNUM,
1305 : heap_vac_scan_next_block,
1306 : vacrel,
1307 : sizeof(bool));
1308 :
1309 : while (true)
1310 406304 : {
1311 : Buffer buf;
1312 : Page page;
1313 522199 : bool was_eager_scanned = false;
1314 522199 : int ndeleted = 0;
1315 : bool has_lpdead_items;
1316 522199 : void *per_buffer_data = NULL;
1317 522199 : bool vm_page_frozen = false;
1318 522199 : bool got_cleanup_lock = false;
1319 :
1320 522199 : vacuum_delay_point(false);
1321 :
1322 : /*
1323 : * Regularly check if wraparound failsafe should trigger.
1324 : *
1325 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1326 : * relfrozenxid might start to look dangerously old before we reach
1327 : * that point. This check also provides failsafe coverage for the
1328 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1329 : * param set to 'off'.
1330 : */
1331 522383 : if (vacrel->scanned_pages > 0 &&
1332 406488 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1333 0 : lazy_check_wraparound_failsafe(vacrel);
1334 :
1335 : /*
1336 : * Consider if we definitely have enough space to process TIDs on page
1337 : * already. If we are close to overrunning the available space for
1338 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1339 : * this page. However, let's force at least one page-worth of tuples
1340 : * to be stored as to ensure we do at least some work when the memory
1341 : * configured is so low that we run out before storing anything.
1342 : */
1343 522383 : if (vacrel->dead_items_info->num_items > 0 &&
1344 29378 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1345 : {
1346 : /*
1347 : * Before beginning index vacuuming, we release any pin we may
1348 : * hold on the visibility map page. This isn't necessary for
1349 : * correctness, but we do it anyway to avoid holding the pin
1350 : * across a lengthy, unrelated operation.
1351 : */
1352 12 : if (BufferIsValid(vmbuffer))
1353 : {
1354 12 : ReleaseBuffer(vmbuffer);
1355 12 : vmbuffer = InvalidBuffer;
1356 : }
1357 :
1358 : /* Perform a round of index and heap vacuuming */
1359 12 : vacrel->consider_bypass_optimization = false;
1360 12 : lazy_vacuum(vacrel);
1361 :
1362 : /*
1363 : * Vacuum the Free Space Map to make newly-freed space visible on
1364 : * upper-level FSM pages. Note that blkno is the previously
1365 : * processed block.
1366 : */
1367 12 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1368 : blkno + 1);
1369 12 : next_fsm_block_to_vacuum = blkno;
1370 :
1371 : /* Report that we are once again scanning the heap */
1372 12 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1373 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1374 : }
1375 :
1376 522383 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1377 :
1378 : /* The relation is exhausted. */
1379 522383 : if (!BufferIsValid(buf))
1380 115895 : break;
1381 :
1382 406488 : was_eager_scanned = *((bool *) per_buffer_data);
1383 406488 : CheckBufferIsPinnedOnce(buf);
1384 406304 : page = BufferGetPage(buf);
1385 406304 : blkno = BufferGetBlockNumber(buf);
1386 :
1387 406304 : vacrel->scanned_pages++;
1388 406304 : if (was_eager_scanned)
1389 0 : vacrel->eager_scanned_pages++;
1390 :
1391 : /* Report as block scanned, update error traceback information */
1392 406304 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1393 406304 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1394 : blkno, InvalidOffsetNumber);
1395 :
1396 : /*
1397 : * Pin the visibility map page in case we need to mark the page
1398 : * all-visible. In most cases this will be very cheap, because we'll
1399 : * already have the correct page pinned anyway.
1400 : */
1401 406304 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1402 :
1403 : /*
1404 : * We need a buffer cleanup lock to prune HOT chains and defragment
1405 : * the page in lazy_scan_prune. But when it's not possible to acquire
1406 : * a cleanup lock right away, we may be able to settle for reduced
1407 : * processing using lazy_scan_noprune.
1408 : */
1409 406304 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1410 :
1411 406304 : if (!got_cleanup_lock)
1412 108 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1413 :
1414 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1415 406304 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1416 406304 : vmbuffer))
1417 : {
1418 : /* Processed as new/empty page (lock and pin released) */
1419 1620 : continue;
1420 : }
1421 :
1422 : /*
1423 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1424 : * items in the dead_items area for later vacuuming, count live and
1425 : * recently dead tuples for vacuum logging, and determine if this
1426 : * block could later be truncated. If we encounter any xid/mxids that
1427 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1428 : * for a cleanup lock and call lazy_scan_prune().
1429 : */
1430 404684 : if (!got_cleanup_lock &&
1431 108 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1432 : {
1433 : /*
1434 : * lazy_scan_noprune could not do all required processing. Wait
1435 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1436 : */
1437 : Assert(vacrel->aggressive);
1438 22 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1439 22 : LockBufferForCleanup(buf);
1440 22 : got_cleanup_lock = true;
1441 : }
1442 :
1443 : /*
1444 : * If we have a cleanup lock, we must now prune, freeze, and count
1445 : * tuples. We may have acquired the cleanup lock originally, or we may
1446 : * have gone back and acquired it after lazy_scan_noprune() returned
1447 : * false. Either way, the page hasn't been processed yet.
1448 : *
1449 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1450 : * recently_dead_tuples and live tuples for vacuum logging, determine
1451 : * if the block can later be truncated, and accumulate the details of
1452 : * remaining LP_DEAD line pointers on the page into dead_items. These
1453 : * dead items include those pruned by lazy_scan_prune() as well as
1454 : * line pointers previously marked LP_DEAD.
1455 : */
1456 404684 : if (got_cleanup_lock)
1457 404598 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1458 : vmbuffer,
1459 : &has_lpdead_items, &vm_page_frozen);
1460 :
1461 : /*
1462 : * Count an eagerly scanned page as a failure or a success.
1463 : *
1464 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1465 : * cleanup lock, we won't have frozen the page. However, we only count
1466 : * pages that were too new to require freezing as eager freeze
1467 : * failures.
1468 : *
1469 : * We could gather more information from lazy_scan_noprune() about
1470 : * whether or not there were tuples with XIDs or MXIDs older than the
1471 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1472 : * exclude pages skipped due to cleanup lock contention from eager
1473 : * freeze algorithm caps.
1474 : */
1475 404684 : if (got_cleanup_lock && was_eager_scanned)
1476 : {
1477 : /* Aggressive vacuums do not eager scan. */
1478 : Assert(!vacrel->aggressive);
1479 :
1480 0 : if (vm_page_frozen)
1481 : {
1482 0 : if (vacrel->eager_scan_remaining_successes > 0)
1483 0 : vacrel->eager_scan_remaining_successes--;
1484 :
1485 0 : if (vacrel->eager_scan_remaining_successes == 0)
1486 : {
1487 : /*
1488 : * Report only once that we disabled eager scanning. We
1489 : * may eagerly read ahead blocks in excess of the success
1490 : * or failure caps before attempting to freeze them, so we
1491 : * could reach here even after disabling additional eager
1492 : * scanning.
1493 : */
1494 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1495 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1496 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1497 : orig_eager_scan_success_limit,
1498 : vacrel->dbname, vacrel->relnamespace,
1499 : vacrel->relname)));
1500 :
1501 : /*
1502 : * If we hit our success cap, permanently disable eager
1503 : * scanning by setting the other eager scan management
1504 : * fields to their disabled values.
1505 : */
1506 0 : vacrel->eager_scan_remaining_fails = 0;
1507 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1508 0 : vacrel->eager_scan_max_fails_per_region = 0;
1509 : }
1510 : }
1511 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1512 0 : vacrel->eager_scan_remaining_fails--;
1513 : }
1514 :
1515 : /*
1516 : * Now drop the buffer lock and, potentially, update the FSM.
1517 : *
1518 : * Our goal is to update the freespace map the last time we touch the
1519 : * page. If we'll process a block in the second pass, we may free up
1520 : * additional space on the page, so it is better to update the FSM
1521 : * after the second pass. If the relation has no indexes, or if index
1522 : * vacuuming is disabled, there will be no second heap pass; if this
1523 : * particular page has no dead items, the second heap pass will not
1524 : * touch this page. So, in those cases, update the FSM now.
1525 : *
1526 : * Note: In corner cases, it's possible to miss updating the FSM
1527 : * entirely. If index vacuuming is currently enabled, we'll skip the
1528 : * FSM update now. But if failsafe mode is later activated, or there
1529 : * are so few dead tuples that index vacuuming is bypassed, there will
1530 : * also be no opportunity to update the FSM later, because we'll never
1531 : * revisit this page. Since updating the FSM is desirable but not
1532 : * absolutely required, that's OK.
1533 : */
1534 404684 : if (vacrel->nindexes == 0
1535 385873 : || !vacrel->do_index_vacuuming
1536 309718 : || !has_lpdead_items)
1537 388832 : {
1538 388832 : Size freespace = PageGetHeapFreeSpace(page);
1539 :
1540 388832 : UnlockReleaseBuffer(buf);
1541 388832 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1542 :
1543 : /*
1544 : * Periodically perform FSM vacuuming to make newly-freed space
1545 : * visible on upper FSM pages. This is done after vacuuming if the
1546 : * table has indexes. There will only be newly-freed space if we
1547 : * held the cleanup lock and lazy_scan_prune() was called.
1548 : */
1549 388832 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1550 490 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1551 : {
1552 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1553 : blkno);
1554 0 : next_fsm_block_to_vacuum = blkno;
1555 : }
1556 : }
1557 : else
1558 15852 : UnlockReleaseBuffer(buf);
1559 : }
1560 :
1561 115895 : vacrel->blkno = InvalidBlockNumber;
1562 115895 : if (BufferIsValid(vmbuffer))
1563 43761 : ReleaseBuffer(vmbuffer);
1564 :
1565 : /*
1566 : * Report that everything is now scanned. We never skip scanning the last
1567 : * block in the relation, so we can pass rel_pages here.
1568 : */
1569 115853 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1570 : rel_pages);
1571 :
1572 : /* now we can compute the new value for pg_class.reltuples */
1573 231562 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1574 : vacrel->scanned_pages,
1575 115781 : vacrel->live_tuples);
1576 :
1577 : /*
1578 : * Also compute the total number of surviving heap entries. In the
1579 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1580 : */
1581 115781 : vacrel->new_rel_tuples =
1582 115781 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1583 115781 : vacrel->missed_dead_tuples;
1584 :
1585 115781 : read_stream_end(stream);
1586 :
1587 : /*
1588 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1589 : * related heap vacuuming
1590 : */
1591 115781 : if (vacrel->dead_items_info->num_items > 0)
1592 773 : lazy_vacuum(vacrel);
1593 :
1594 : /*
1595 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1596 : * not there were indexes, and whether or not we bypassed index vacuuming.
1597 : * We can pass rel_pages here because we never skip scanning the last
1598 : * block of the relation.
1599 : */
1600 115781 : if (rel_pages > next_fsm_block_to_vacuum)
1601 43721 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1602 :
1603 : /* report all blocks vacuumed */
1604 115780 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1605 :
1606 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1607 115780 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1608 87222 : lazy_cleanup_all_indexes(vacrel);
1609 115780 : }
1610 :
1611 : /*
1612 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1613 : * for vacuum to process
1614 : *
1615 : * Every time lazy_scan_heap() needs a new block to process during its first
1616 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1617 : * heap_vac_scan_next_block() to get the next block.
1618 : *
1619 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1620 : * various thresholds to skip blocks which do not need to be processed and
1621 : * returns the next block to process or InvalidBlockNumber if there are no
1622 : * remaining blocks.
1623 : *
1624 : * The visibility status of the next block to process and whether or not it
1625 : * was eager scanned is set in the per_buffer_data.
1626 : *
1627 : * callback_private_data contains a reference to the LVRelState, passed to the
1628 : * read stream API during stream setup. The LVRelState is an in/out parameter
1629 : * here (locally named `vacrel`). Vacuum options and information about the
1630 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1631 : * that's all-visible but not all-frozen (to ensure that we don't update
1632 : * relfrozenxid in that case). vacrel also holds information about the next
1633 : * unskippable block -- as bookkeeping for this function.
1634 : */
1635 : static BlockNumber
1636 522383 : heap_vac_scan_next_block(ReadStream *stream,
1637 : void *callback_private_data,
1638 : void *per_buffer_data)
1639 : {
1640 : BlockNumber next_block;
1641 522383 : LVRelState *vacrel = callback_private_data;
1642 :
1643 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1644 522383 : next_block = vacrel->current_block + 1;
1645 :
1646 : /* Have we reached the end of the relation? */
1647 522383 : if (next_block >= vacrel->rel_pages)
1648 : {
1649 115895 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1650 : {
1651 42179 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1652 42179 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1653 : }
1654 115895 : return InvalidBlockNumber;
1655 : }
1656 :
1657 : /*
1658 : * We must be in one of the three following states:
1659 : */
1660 406488 : if (next_block > vacrel->next_unskippable_block ||
1661 179455 : vacrel->next_unskippable_block == InvalidBlockNumber)
1662 : {
1663 : /*
1664 : * 1. We have just processed an unskippable block (or we're at the
1665 : * beginning of the scan). Find the next unskippable block using the
1666 : * visibility map.
1667 : */
1668 : bool skipsallvis;
1669 :
1670 270796 : find_next_unskippable_block(vacrel, &skipsallvis);
1671 :
1672 : /*
1673 : * We now know the next block that we must process. It can be the
1674 : * next block after the one we just processed, or something further
1675 : * ahead. If it's further ahead, we can jump to it, but we choose to
1676 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1677 : * pages. Since we're reading sequentially, the OS should be doing
1678 : * readahead for us, so there's no gain in skipping a page now and
1679 : * then. Skipping such a range might even discourage sequential
1680 : * detection.
1681 : *
1682 : * This test also enables more frequent relfrozenxid advancement
1683 : * during non-aggressive VACUUMs. If the range has any all-visible
1684 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1685 : * real downside.
1686 : */
1687 270796 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1688 : {
1689 3846 : next_block = vacrel->next_unskippable_block;
1690 3846 : if (skipsallvis)
1691 34 : vacrel->skippedallvis = true;
1692 : }
1693 : }
1694 :
1695 : /* Now we must be in one of the two remaining states: */
1696 406488 : if (next_block < vacrel->next_unskippable_block)
1697 : {
1698 : /*
1699 : * 2. We are processing a range of blocks that we could have skipped
1700 : * but chose not to. We know that they are all-visible in the VM,
1701 : * otherwise they would've been unskippable.
1702 : */
1703 135692 : vacrel->current_block = next_block;
1704 : /* Block was not eager scanned */
1705 135692 : *((bool *) per_buffer_data) = false;
1706 135692 : return vacrel->current_block;
1707 : }
1708 : else
1709 : {
1710 : /*
1711 : * 3. We reached the next unskippable block. Process it. On next
1712 : * iteration, we will be back in state 1.
1713 : */
1714 : Assert(next_block == vacrel->next_unskippable_block);
1715 :
1716 270796 : vacrel->current_block = next_block;
1717 270796 : *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1718 270796 : return vacrel->current_block;
1719 : }
1720 : }
1721 :
1722 : /*
1723 : * Find the next unskippable block in a vacuum scan using the visibility map.
1724 : * The next unskippable block and its visibility information is updated in
1725 : * vacrel.
1726 : *
1727 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1728 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1729 : * was concurrently cleared, though. All that matters is that caller scan all
1730 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1731 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1732 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1733 : * to skip such a range is actually made, making everything safe.)
1734 : */
1735 : static void
1736 270796 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1737 : {
1738 270796 : BlockNumber rel_pages = vacrel->rel_pages;
1739 270796 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1740 270796 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1741 270796 : bool next_unskippable_eager_scanned = false;
1742 :
1743 270796 : *skipsallvis = false;
1744 :
1745 410064 : for (;; next_unskippable_block++)
1746 410064 : {
1747 680860 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1748 : next_unskippable_block,
1749 : &next_unskippable_vmbuffer);
1750 :
1751 :
1752 : /*
1753 : * At the start of each eager scan region, normal vacuums with eager
1754 : * scanning enabled reset the failure counter, allowing vacuum to
1755 : * resume eager scanning if it had been suspended in the previous
1756 : * region.
1757 : */
1758 680860 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1759 : {
1760 0 : vacrel->eager_scan_remaining_fails =
1761 0 : vacrel->eager_scan_max_fails_per_region;
1762 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1763 : }
1764 :
1765 : /*
1766 : * A block is unskippable if it is not all visible according to the
1767 : * visibility map.
1768 : */
1769 680860 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1770 : {
1771 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1772 231290 : break;
1773 : }
1774 :
1775 : /*
1776 : * Caller must scan the last page to determine whether it has tuples
1777 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1778 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1779 : * lock on rel to attempt a truncation that fails anyway, just because
1780 : * there are tuples on the last page (it is likely that there will be
1781 : * tuples on other nearby pages as well, but those can be skipped).
1782 : *
1783 : * Implement this by always treating the last block as unsafe to skip.
1784 : */
1785 449570 : if (next_unskippable_block == rel_pages - 1)
1786 39089 : break;
1787 :
1788 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1789 410481 : if (!vacrel->skipwithvm)
1790 417 : break;
1791 :
1792 : /*
1793 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1794 : * already frozen by now), so this page can be skipped.
1795 : */
1796 410064 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1797 406424 : continue;
1798 :
1799 : /*
1800 : * Aggressive vacuums cannot skip any all-visible pages that are not
1801 : * also all-frozen.
1802 : */
1803 3640 : if (vacrel->aggressive)
1804 0 : break;
1805 :
1806 : /*
1807 : * Normal vacuums with eager scanning enabled only skip all-visible
1808 : * but not all-frozen pages if they have hit the failure limit for the
1809 : * current eager scan region.
1810 : */
1811 3640 : if (vacrel->eager_scan_remaining_fails > 0)
1812 : {
1813 0 : next_unskippable_eager_scanned = true;
1814 0 : break;
1815 : }
1816 :
1817 : /*
1818 : * All-visible blocks are safe to skip in a normal vacuum. But
1819 : * remember that the final range contains such a block for later.
1820 : */
1821 3640 : *skipsallvis = true;
1822 : }
1823 :
1824 : /* write the local variables back to vacrel */
1825 270796 : vacrel->next_unskippable_block = next_unskippable_block;
1826 270796 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1827 270796 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1828 270796 : }
1829 :
1830 : /*
1831 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1832 : *
1833 : * Must call here to handle both new and empty pages before calling
1834 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1835 : * with new or empty pages.
1836 : *
1837 : * It's necessary to consider new pages as a special case, since the rules for
1838 : * maintaining the visibility map and FSM with empty pages are a little
1839 : * different (though new pages can be truncated away during rel truncation).
1840 : *
1841 : * Empty pages are not really a special case -- they're just heap pages that
1842 : * have no allocated tuples (including even LP_UNUSED items). You might
1843 : * wonder why we need to handle them here all the same. It's only necessary
1844 : * because of a corner-case involving a hard crash during heap relation
1845 : * extension. If we ever make relation-extension crash safe, then it should
1846 : * no longer be necessary to deal with empty pages here (or new pages, for
1847 : * that matter).
1848 : *
1849 : * Caller must hold at least a shared lock. We might need to escalate the
1850 : * lock in that case, so the type of lock caller holds needs to be specified
1851 : * using 'sharelock' argument.
1852 : *
1853 : * Returns false in common case where caller should go on to call
1854 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1855 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1856 : * behalf.
1857 : *
1858 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1859 : * is passed here because neither empty nor new pages can be eagerly frozen.
1860 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1861 : * the same time that they are set all-visible, and we don't eagerly scan
1862 : * frozen pages.
1863 : */
1864 : static bool
1865 406488 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1866 : Page page, bool sharelock, Buffer vmbuffer)
1867 : {
1868 : Size freespace;
1869 :
1870 406488 : if (PageIsNew(page))
1871 : {
1872 : /*
1873 : * All-zeroes pages can be left over if either a backend extends the
1874 : * relation by a single page, but crashes before the newly initialized
1875 : * page has been written out, or when bulk-extending the relation
1876 : * (which creates a number of empty pages at the tail end of the
1877 : * relation), and then enters them into the FSM.
1878 : *
1879 : * Note we do not enter the page into the visibilitymap. That has the
1880 : * downside that we repeatedly visit this page in subsequent vacuums,
1881 : * but otherwise we'll never discover the space on a promoted standby.
1882 : * The harm of repeated checking ought to normally not be too bad. The
1883 : * space usually should be used at some point, otherwise there
1884 : * wouldn't be any regular vacuums.
1885 : *
1886 : * Make sure these pages are in the FSM, to ensure they can be reused.
1887 : * Do that by testing if there's any space recorded for the page. If
1888 : * not, enter it. We do so after releasing the lock on the heap page,
1889 : * the FSM is approximate, after all.
1890 : */
1891 1590 : UnlockReleaseBuffer(buf);
1892 :
1893 1590 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1894 : {
1895 712 : freespace = BLCKSZ - SizeOfPageHeaderData;
1896 :
1897 712 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1898 : }
1899 :
1900 1590 : return true;
1901 : }
1902 :
1903 404898 : if (PageIsEmpty(page))
1904 : {
1905 : /*
1906 : * It seems likely that caller will always be able to get a cleanup
1907 : * lock on an empty page. But don't take any chances -- escalate to
1908 : * an exclusive lock (still don't need a cleanup lock, though).
1909 : */
1910 30 : if (sharelock)
1911 : {
1912 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1913 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1914 :
1915 0 : if (!PageIsEmpty(page))
1916 : {
1917 : /* page isn't new or empty -- keep lock and pin for now */
1918 0 : return false;
1919 : }
1920 : }
1921 : else
1922 : {
1923 : /* Already have a full cleanup lock (which is more than enough) */
1924 : }
1925 :
1926 : /*
1927 : * Unlike new pages, empty pages are always set all-visible and
1928 : * all-frozen.
1929 : */
1930 30 : if (!PageIsAllVisible(page))
1931 : {
1932 : /* Lock vmbuffer before entering critical section */
1933 0 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
1934 :
1935 0 : START_CRIT_SECTION();
1936 :
1937 : /* mark buffer dirty before writing a WAL record */
1938 0 : MarkBufferDirty(buf);
1939 :
1940 0 : PageSetAllVisible(page);
1941 0 : PageClearPrunable(page);
1942 0 : visibilitymap_set(blkno,
1943 : vmbuffer,
1944 : VISIBILITYMAP_ALL_VISIBLE |
1945 : VISIBILITYMAP_ALL_FROZEN,
1946 0 : vacrel->rel->rd_locator);
1947 :
1948 : /*
1949 : * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1950 : * setting the VM.
1951 : */
1952 0 : if (RelationNeedsWAL(vacrel->rel))
1953 0 : log_heap_prune_and_freeze(vacrel->rel, buf,
1954 : vmbuffer,
1955 : VISIBILITYMAP_ALL_VISIBLE |
1956 : VISIBILITYMAP_ALL_FROZEN,
1957 : InvalidTransactionId, /* conflict xid */
1958 : false, /* cleanup lock */
1959 : PRUNE_VACUUM_SCAN, /* reason */
1960 : NULL, 0,
1961 : NULL, 0,
1962 : NULL, 0,
1963 : NULL, 0);
1964 :
1965 0 : END_CRIT_SECTION();
1966 :
1967 0 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1968 :
1969 : /* Count the newly all-frozen pages for logging */
1970 0 : vacrel->new_all_visible_pages++;
1971 0 : vacrel->new_all_visible_all_frozen_pages++;
1972 : }
1973 :
1974 30 : freespace = PageGetHeapFreeSpace(page);
1975 30 : UnlockReleaseBuffer(buf);
1976 30 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1977 30 : return true;
1978 : }
1979 :
1980 : /* page isn't new or empty -- keep lock and pin */
1981 404868 : return false;
1982 : }
1983 :
1984 : /* qsort comparator for sorting OffsetNumbers */
1985 : static int
1986 4036278 : cmpOffsetNumbers(const void *a, const void *b)
1987 : {
1988 4036278 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1989 : }
1990 :
1991 : /*
1992 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1993 : *
1994 : * Caller must hold pin and buffer cleanup lock on the buffer.
1995 : *
1996 : * vmbuffer is the buffer containing the VM block with visibility information
1997 : * for the heap block, blkno.
1998 : *
1999 : * *has_lpdead_items is set to true or false depending on whether, upon return
2000 : * from this function, any LP_DEAD items are still present on the page.
2001 : *
2002 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2003 : * VM. The caller currently only uses this for determining whether an eagerly
2004 : * scanned page was successfully set all-frozen.
2005 : *
2006 : * Returns the number of tuples deleted from the page during HOT pruning.
2007 : */
2008 : static int
2009 404782 : lazy_scan_prune(LVRelState *vacrel,
2010 : Buffer buf,
2011 : BlockNumber blkno,
2012 : Page page,
2013 : Buffer vmbuffer,
2014 : bool *has_lpdead_items,
2015 : bool *vm_page_frozen)
2016 : {
2017 404782 : Relation rel = vacrel->rel;
2018 : PruneFreezeResult presult;
2019 404782 : PruneFreezeParams params = {
2020 : .relation = rel,
2021 : .buffer = buf,
2022 : .vmbuffer = vmbuffer,
2023 : .reason = PRUNE_VACUUM_SCAN,
2024 : .options = HEAP_PAGE_PRUNE_FREEZE,
2025 404782 : .vistest = vacrel->vistest,
2026 404782 : .cutoffs = &vacrel->cutoffs,
2027 : };
2028 :
2029 : Assert(BufferGetBlockNumber(buf) == blkno);
2030 :
2031 : /*
2032 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2033 : *
2034 : * If the relation has no indexes, we can immediately mark would-be dead
2035 : * items LP_UNUSED.
2036 : *
2037 : * The number of tuples removed from the page is returned in
2038 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2039 : * presult.lpdead_items's final value can be thought of as the number of
2040 : * tuples that were deleted from indexes.
2041 : *
2042 : * We will update the VM after collecting LP_DEAD items and freezing
2043 : * tuples. Pruning will have determined whether or not the page is
2044 : * all-visible.
2045 : */
2046 404782 : if (vacrel->nindexes == 0)
2047 18823 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2048 :
2049 : /*
2050 : * Allow skipping full inspection of pages that the VM indicates are
2051 : * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2052 : * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2053 : * so we must examine the page to make sure it is truly all-frozen and fix
2054 : * it otherwise.
2055 : */
2056 404782 : if (vacrel->skipwithvm)
2057 403486 : params.options |= HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
2058 :
2059 404782 : heap_page_prune_and_freeze(¶ms,
2060 : &presult,
2061 : &vacrel->offnum,
2062 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2063 :
2064 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2065 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2066 :
2067 404782 : if (presult.nfrozen > 0)
2068 : {
2069 : /*
2070 : * We don't increment the new_frozen_tuple_pages instrumentation
2071 : * counter when nfrozen == 0, since it only counts pages with newly
2072 : * frozen tuples (don't confuse that with pages newly set all-frozen
2073 : * in VM).
2074 : */
2075 24473 : vacrel->new_frozen_tuple_pages++;
2076 : }
2077 :
2078 : /*
2079 : * Now save details of the LP_DEAD items from the page in vacrel
2080 : */
2081 404782 : if (presult.lpdead_items > 0)
2082 : {
2083 18153 : vacrel->lpdead_item_pages++;
2084 :
2085 : /*
2086 : * deadoffsets are collected incrementally in
2087 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2088 : * with an indeterminate order, but dead_items_add requires them to be
2089 : * sorted.
2090 : */
2091 18153 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2092 : cmpOffsetNumbers);
2093 :
2094 18153 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2095 : }
2096 :
2097 : /* Finally, add page-local counts to whole-VACUUM counts */
2098 404782 : if (presult.newly_all_visible)
2099 45133 : vacrel->new_all_visible_pages++;
2100 404782 : if (presult.newly_all_visible_frozen)
2101 30075 : vacrel->new_all_visible_all_frozen_pages++;
2102 404782 : if (presult.newly_all_frozen)
2103 26 : vacrel->new_all_frozen_pages++;
2104 :
2105 : /* Capture if the page was newly set frozen */
2106 779489 : *vm_page_frozen = presult.newly_all_visible_frozen ||
2107 374707 : presult.newly_all_frozen;
2108 :
2109 404782 : vacrel->tuples_deleted += presult.ndeleted;
2110 404782 : vacrel->tuples_frozen += presult.nfrozen;
2111 404782 : vacrel->lpdead_items += presult.lpdead_items;
2112 404782 : vacrel->live_tuples += presult.live_tuples;
2113 404782 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2114 :
2115 : /* Can't truncate this page */
2116 404782 : if (presult.hastup)
2117 394809 : vacrel->nonempty_pages = blkno + 1;
2118 :
2119 : /* Did we find LP_DEAD items? */
2120 404782 : *has_lpdead_items = (presult.lpdead_items > 0);
2121 :
2122 404782 : return presult.ndeleted;
2123 : }
2124 :
2125 : /*
2126 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2127 : *
2128 : * Caller need only hold a pin and share lock on the buffer, unlike
2129 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2130 : * performed here, it's quite possible that an earlier opportunistic pruning
2131 : * operation left LP_DEAD items behind. We'll at least collect any such items
2132 : * in dead_items for removal from indexes.
2133 : *
2134 : * For aggressive VACUUM callers, we may return false to indicate that a full
2135 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2136 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2137 : * one or more tuples on the page. We always return true for non-aggressive
2138 : * callers.
2139 : *
2140 : * If this function returns true, *has_lpdead_items gets set to true or false
2141 : * depending on whether, upon return from this function, any LP_DEAD items are
2142 : * present on the page. If this function returns false, *has_lpdead_items
2143 : * is not updated.
2144 : */
2145 : static bool
2146 108 : lazy_scan_noprune(LVRelState *vacrel,
2147 : Buffer buf,
2148 : BlockNumber blkno,
2149 : Page page,
2150 : bool *has_lpdead_items)
2151 : {
2152 : OffsetNumber offnum,
2153 : maxoff;
2154 : int lpdead_items,
2155 : live_tuples,
2156 : recently_dead_tuples,
2157 : missed_dead_tuples;
2158 : bool hastup;
2159 : HeapTupleHeader tupleheader;
2160 108 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2161 108 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2162 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2163 :
2164 : Assert(BufferGetBlockNumber(buf) == blkno);
2165 :
2166 108 : hastup = false; /* for now */
2167 :
2168 108 : lpdead_items = 0;
2169 108 : live_tuples = 0;
2170 108 : recently_dead_tuples = 0;
2171 108 : missed_dead_tuples = 0;
2172 :
2173 108 : maxoff = PageGetMaxOffsetNumber(page);
2174 108 : for (offnum = FirstOffsetNumber;
2175 4136 : offnum <= maxoff;
2176 4028 : offnum = OffsetNumberNext(offnum))
2177 : {
2178 : ItemId itemid;
2179 : HeapTupleData tuple;
2180 :
2181 4050 : vacrel->offnum = offnum;
2182 4050 : itemid = PageGetItemId(page, offnum);
2183 :
2184 4050 : if (!ItemIdIsUsed(itemid))
2185 750 : continue;
2186 :
2187 3724 : if (ItemIdIsRedirected(itemid))
2188 : {
2189 256 : hastup = true;
2190 256 : continue;
2191 : }
2192 :
2193 3468 : if (ItemIdIsDead(itemid))
2194 : {
2195 : /*
2196 : * Deliberately don't set hastup=true here. See same point in
2197 : * lazy_scan_prune for an explanation.
2198 : */
2199 168 : deadoffsets[lpdead_items++] = offnum;
2200 168 : continue;
2201 : }
2202 :
2203 3300 : hastup = true; /* page prevents rel truncation */
2204 3300 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2205 3300 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2206 : &NoFreezePageRelfrozenXid,
2207 : &NoFreezePageRelminMxid))
2208 : {
2209 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2210 86 : if (vacrel->aggressive)
2211 : {
2212 : /*
2213 : * Aggressive VACUUMs must always be able to advance rel's
2214 : * relfrozenxid to a value >= FreezeLimit (and be able to
2215 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2216 : * The ongoing aggressive VACUUM won't be able to do that
2217 : * unless it can freeze an XID (or MXID) from this tuple now.
2218 : *
2219 : * The only safe option is to have caller perform processing
2220 : * of this page using lazy_scan_prune. Caller might have to
2221 : * wait a while for a cleanup lock, but it can't be helped.
2222 : */
2223 22 : vacrel->offnum = InvalidOffsetNumber;
2224 22 : return false;
2225 : }
2226 :
2227 : /*
2228 : * Non-aggressive VACUUMs are under no obligation to advance
2229 : * relfrozenxid (even by one XID). We can be much laxer here.
2230 : *
2231 : * Currently we always just accept an older final relfrozenxid
2232 : * and/or relminmxid value. We never make caller wait or work a
2233 : * little harder, even when it likely makes sense to do so.
2234 : */
2235 : }
2236 :
2237 3278 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2238 3278 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2239 3278 : tuple.t_len = ItemIdGetLength(itemid);
2240 3278 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2241 :
2242 3278 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2243 : buf))
2244 : {
2245 3225 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2246 : case HEAPTUPLE_LIVE:
2247 :
2248 : /*
2249 : * Count both cases as live, just like lazy_scan_prune
2250 : */
2251 3225 : live_tuples++;
2252 :
2253 3225 : break;
2254 14 : case HEAPTUPLE_DEAD:
2255 :
2256 : /*
2257 : * There is some useful work for pruning to do, that won't be
2258 : * done due to failure to get a cleanup lock.
2259 : */
2260 14 : missed_dead_tuples++;
2261 14 : break;
2262 37 : case HEAPTUPLE_RECENTLY_DEAD:
2263 :
2264 : /*
2265 : * Count in recently_dead_tuples, just like lazy_scan_prune
2266 : */
2267 37 : recently_dead_tuples++;
2268 37 : break;
2269 2 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2270 :
2271 : /*
2272 : * Do not count these rows as live, just like lazy_scan_prune
2273 : */
2274 2 : break;
2275 0 : default:
2276 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2277 : break;
2278 : }
2279 : }
2280 :
2281 86 : vacrel->offnum = InvalidOffsetNumber;
2282 :
2283 : /*
2284 : * By here we know for sure that caller can put off freezing and pruning
2285 : * this particular page until the next VACUUM. Remember its details now.
2286 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2287 : */
2288 86 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2289 86 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2290 :
2291 : /* Save any LP_DEAD items found on the page in dead_items */
2292 86 : if (vacrel->nindexes == 0)
2293 : {
2294 : /* Using one-pass strategy (since table has no indexes) */
2295 0 : if (lpdead_items > 0)
2296 : {
2297 : /*
2298 : * Perfunctory handling for the corner case where a single pass
2299 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2300 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2301 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2302 : * but it beats having to maintain specialized heap vacuuming code
2303 : * forever, for vanishingly little benefit.)
2304 : */
2305 0 : hastup = true;
2306 0 : missed_dead_tuples += lpdead_items;
2307 : }
2308 : }
2309 86 : else if (lpdead_items > 0)
2310 : {
2311 : /*
2312 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2313 : * indexes will be deleted during index vacuuming (and then marked
2314 : * LP_UNUSED in the heap)
2315 : */
2316 4 : vacrel->lpdead_item_pages++;
2317 :
2318 4 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2319 :
2320 4 : vacrel->lpdead_items += lpdead_items;
2321 : }
2322 :
2323 : /*
2324 : * Finally, add relevant page-local counts to whole-VACUUM counts
2325 : */
2326 86 : vacrel->live_tuples += live_tuples;
2327 86 : vacrel->recently_dead_tuples += recently_dead_tuples;
2328 86 : vacrel->missed_dead_tuples += missed_dead_tuples;
2329 86 : if (missed_dead_tuples > 0)
2330 3 : vacrel->missed_dead_pages++;
2331 :
2332 : /* Can't truncate this page */
2333 86 : if (hastup)
2334 86 : vacrel->nonempty_pages = blkno + 1;
2335 :
2336 : /* Did we find LP_DEAD items? */
2337 86 : *has_lpdead_items = (lpdead_items > 0);
2338 :
2339 : /* Caller won't need to call lazy_scan_prune with same page */
2340 86 : return true;
2341 : }
2342 :
2343 : /*
2344 : * Main entry point for index vacuuming and heap vacuuming.
2345 : *
2346 : * Removes items collected in dead_items from table's indexes, then marks the
2347 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2348 : * for full details.
2349 : *
2350 : * Also empties dead_items, freeing up space for later TIDs.
2351 : *
2352 : * We may choose to bypass index vacuuming at this point, though only when the
2353 : * ongoing VACUUM operation will definitely only have one index scan/round of
2354 : * index vacuuming.
2355 : */
2356 : static void
2357 785 : lazy_vacuum(LVRelState *vacrel)
2358 : {
2359 : bool bypass;
2360 :
2361 : /* Should not end up here with no indexes */
2362 : Assert(vacrel->nindexes > 0);
2363 : Assert(vacrel->lpdead_item_pages > 0);
2364 :
2365 785 : if (!vacrel->do_index_vacuuming)
2366 : {
2367 : Assert(!vacrel->do_index_cleanup);
2368 11 : dead_items_reset(vacrel);
2369 11 : return;
2370 : }
2371 :
2372 : /*
2373 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2374 : *
2375 : * We currently only do this in cases where the number of LP_DEAD items
2376 : * for the entire VACUUM operation is close to zero. This avoids sharp
2377 : * discontinuities in the duration and overhead of successive VACUUM
2378 : * operations that run against the same table with a fixed workload.
2379 : * Ideally, successive VACUUM operations will behave as if there are
2380 : * exactly zero LP_DEAD items in cases where there are close to zero.
2381 : *
2382 : * This is likely to be helpful with a table that is continually affected
2383 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2384 : * have small aberrations that lead to just a few heap pages retaining
2385 : * only one or two LP_DEAD items. This is pretty common; even when the
2386 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2387 : * impossible to predict whether HOT will be applied in 100% of cases.
2388 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2389 : * HOT through careful tuning.
2390 : */
2391 774 : bypass = false;
2392 774 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2393 : {
2394 : BlockNumber threshold;
2395 :
2396 : Assert(vacrel->num_index_scans == 0);
2397 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2398 : Assert(vacrel->do_index_vacuuming);
2399 : Assert(vacrel->do_index_cleanup);
2400 :
2401 : /*
2402 : * This crossover point at which we'll start to do index vacuuming is
2403 : * expressed as a percentage of the total number of heap pages in the
2404 : * table that are known to have at least one LP_DEAD item. This is
2405 : * much more important than the total number of LP_DEAD items, since
2406 : * it's a proxy for the number of heap pages whose visibility map bits
2407 : * cannot be set on account of bypassing index and heap vacuuming.
2408 : *
2409 : * We apply one further precautionary test: the space currently used
2410 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2411 : * not exceed 32MB. This limits the risk that we will bypass index
2412 : * vacuuming again and again until eventually there is a VACUUM whose
2413 : * dead_items space is not CPU cache resident.
2414 : *
2415 : * We don't take any special steps to remember the LP_DEAD items (such
2416 : * as counting them in our final update to the stats system) when the
2417 : * optimization is applied. Though the accounting used in analyze.c's
2418 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2419 : * rows in its own stats report, that's okay. The discrepancy should
2420 : * be negligible. If this optimization is ever expanded to cover more
2421 : * cases then this may need to be reconsidered.
2422 : */
2423 751 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2424 755 : bypass = (vacrel->lpdead_item_pages < threshold &&
2425 4 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2426 : }
2427 :
2428 774 : if (bypass)
2429 : {
2430 : /*
2431 : * There are almost zero TIDs. Behave as if there were precisely
2432 : * zero: bypass index vacuuming, but do index cleanup.
2433 : *
2434 : * We expect that the ongoing VACUUM operation will finish very
2435 : * quickly, so there is no point in considering speeding up as a
2436 : * failsafe against wraparound failure. (Index cleanup is expected to
2437 : * finish very quickly in cases where there were no ambulkdelete()
2438 : * calls.)
2439 : */
2440 4 : vacrel->do_index_vacuuming = false;
2441 : }
2442 770 : else if (lazy_vacuum_all_indexes(vacrel))
2443 : {
2444 : /*
2445 : * We successfully completed a round of index vacuuming. Do related
2446 : * heap vacuuming now.
2447 : */
2448 770 : lazy_vacuum_heap_rel(vacrel);
2449 : }
2450 : else
2451 : {
2452 : /*
2453 : * Failsafe case.
2454 : *
2455 : * We attempted index vacuuming, but didn't finish a full round/full
2456 : * index scan. This happens when relfrozenxid or relminmxid is too
2457 : * far in the past.
2458 : *
2459 : * From this point on the VACUUM operation will do no further index
2460 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2461 : * back here again.
2462 : */
2463 : Assert(VacuumFailsafeActive);
2464 : }
2465 :
2466 : /*
2467 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2468 : * vacuum)
2469 : */
2470 774 : dead_items_reset(vacrel);
2471 : }
2472 :
2473 : /*
2474 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2475 : *
2476 : * Returns true in the common case when all indexes were successfully
2477 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2478 : * VACUUM operation is at risk of taking too long to finish, leading to
2479 : * wraparound failure.
2480 : */
2481 : static bool
2482 770 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2483 : {
2484 770 : bool allindexes = true;
2485 770 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2486 770 : const int progress_start_index[] = {
2487 : PROGRESS_VACUUM_PHASE,
2488 : PROGRESS_VACUUM_INDEXES_TOTAL
2489 : };
2490 770 : const int progress_end_index[] = {
2491 : PROGRESS_VACUUM_INDEXES_TOTAL,
2492 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2493 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2494 : };
2495 : int64 progress_start_val[2];
2496 : int64 progress_end_val[3];
2497 :
2498 : Assert(vacrel->nindexes > 0);
2499 : Assert(vacrel->do_index_vacuuming);
2500 : Assert(vacrel->do_index_cleanup);
2501 :
2502 : /* Precheck for XID wraparound emergencies */
2503 770 : if (lazy_check_wraparound_failsafe(vacrel))
2504 : {
2505 : /* Wraparound emergency -- don't even start an index scan */
2506 0 : return false;
2507 : }
2508 :
2509 : /*
2510 : * Report that we are now vacuuming indexes and the number of indexes to
2511 : * vacuum.
2512 : */
2513 770 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2514 770 : progress_start_val[1] = vacrel->nindexes;
2515 770 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2516 :
2517 770 : if (!ParallelVacuumIsActive(vacrel))
2518 : {
2519 2205 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2520 : {
2521 1451 : Relation indrel = vacrel->indrels[idx];
2522 1451 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2523 :
2524 1451 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2525 : old_live_tuples,
2526 : vacrel);
2527 :
2528 : /* Report the number of indexes vacuumed */
2529 1451 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2530 1451 : idx + 1);
2531 :
2532 1451 : if (lazy_check_wraparound_failsafe(vacrel))
2533 : {
2534 : /* Wraparound emergency -- end current index scan */
2535 0 : allindexes = false;
2536 0 : break;
2537 : }
2538 : }
2539 : }
2540 : else
2541 : {
2542 : /* Outsource everything to parallel variant */
2543 16 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2544 : vacrel->num_index_scans,
2545 : &(vacrel->worker_usage.vacuum));
2546 :
2547 : /*
2548 : * Do a postcheck to consider applying wraparound failsafe now. Note
2549 : * that parallel VACUUM only gets the precheck and this postcheck.
2550 : */
2551 16 : if (lazy_check_wraparound_failsafe(vacrel))
2552 0 : allindexes = false;
2553 : }
2554 :
2555 : /*
2556 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2557 : * each call here (except calls where we choose to do the failsafe). This
2558 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2559 : * of the failsafe triggering, which prevents the next call from taking
2560 : * place).
2561 : */
2562 : Assert(vacrel->num_index_scans > 0 ||
2563 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2564 : Assert(allindexes || VacuumFailsafeActive);
2565 :
2566 : /*
2567 : * Increase and report the number of index scans. Also, we reset
2568 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2569 : *
2570 : * We deliberately include the case where we started a round of bulk
2571 : * deletes that we weren't able to finish due to the failsafe triggering.
2572 : */
2573 770 : vacrel->num_index_scans++;
2574 770 : progress_end_val[0] = 0;
2575 770 : progress_end_val[1] = 0;
2576 770 : progress_end_val[2] = vacrel->num_index_scans;
2577 770 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2578 :
2579 770 : return allindexes;
2580 : }
2581 :
2582 : /*
2583 : * Read stream callback for vacuum's third phase (second pass over the heap).
2584 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2585 : * if there are no further blocks to vacuum.
2586 : *
2587 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2588 : */
2589 : static BlockNumber
2590 16618 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2591 : void *callback_private_data,
2592 : void *per_buffer_data)
2593 : {
2594 16618 : TidStoreIter *iter = callback_private_data;
2595 : TidStoreIterResult *iter_result;
2596 :
2597 16618 : iter_result = TidStoreIterateNext(iter);
2598 16618 : if (iter_result == NULL)
2599 770 : return InvalidBlockNumber;
2600 :
2601 : /*
2602 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2603 : * It is safe to copy the result, according to TidStoreIterateNext().
2604 : */
2605 15848 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2606 :
2607 15848 : return iter_result->blkno;
2608 : }
2609 :
2610 : /*
2611 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2612 : *
2613 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2614 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2615 : *
2616 : * We may also be able to truncate the line pointer array of the heap pages we
2617 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2618 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2619 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2620 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2621 : * page's line pointer array).
2622 : *
2623 : * Note: the reason for doing this as a second pass is we cannot remove the
2624 : * tuples until we've removed their index entries, and we want to process
2625 : * index entry removal in batches as large as possible.
2626 : */
2627 : static void
2628 770 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2629 : {
2630 : ReadStream *stream;
2631 770 : BlockNumber vacuumed_pages = 0;
2632 770 : Buffer vmbuffer = InvalidBuffer;
2633 : LVSavedErrInfo saved_err_info;
2634 : TidStoreIter *iter;
2635 :
2636 : Assert(vacrel->do_index_vacuuming);
2637 : Assert(vacrel->do_index_cleanup);
2638 : Assert(vacrel->num_index_scans > 0);
2639 :
2640 : /* Report that we are now vacuuming the heap */
2641 770 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2642 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2643 :
2644 : /* Update error traceback information */
2645 770 : update_vacuum_error_info(vacrel, &saved_err_info,
2646 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2647 : InvalidBlockNumber, InvalidOffsetNumber);
2648 :
2649 770 : iter = TidStoreBeginIterate(vacrel->dead_items);
2650 :
2651 : /*
2652 : * Set up the read stream for vacuum's second pass through the heap.
2653 : *
2654 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2655 : * not need to wait for IO and does not perform locking. Once we support
2656 : * parallelism it should still be fine, as presumably the holder of locks
2657 : * would never be blocked by IO while holding the lock.
2658 : */
2659 770 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2660 : READ_STREAM_USE_BATCHING,
2661 : vacrel->bstrategy,
2662 : vacrel->rel,
2663 : MAIN_FORKNUM,
2664 : vacuum_reap_lp_read_stream_next,
2665 : iter,
2666 : sizeof(TidStoreIterResult));
2667 :
2668 : while (true)
2669 15848 : {
2670 : BlockNumber blkno;
2671 : Buffer buf;
2672 : Page page;
2673 : TidStoreIterResult *iter_result;
2674 : Size freespace;
2675 : OffsetNumber offsets[MaxOffsetNumber];
2676 : int num_offsets;
2677 :
2678 16618 : vacuum_delay_point(false);
2679 :
2680 16618 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2681 :
2682 : /* The relation is exhausted */
2683 16618 : if (!BufferIsValid(buf))
2684 770 : break;
2685 :
2686 15848 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2687 :
2688 : Assert(iter_result);
2689 15848 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2690 : Assert(num_offsets <= lengthof(offsets));
2691 :
2692 : /*
2693 : * Pin the visibility map page in case we need to mark the page
2694 : * all-visible. In most cases this will be very cheap, because we'll
2695 : * already have the correct page pinned anyway.
2696 : */
2697 15848 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2698 :
2699 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2700 15848 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2701 15848 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2702 : num_offsets, vmbuffer);
2703 :
2704 : /* Now that we've vacuumed the page, record its available space */
2705 15848 : page = BufferGetPage(buf);
2706 15848 : freespace = PageGetHeapFreeSpace(page);
2707 :
2708 15848 : UnlockReleaseBuffer(buf);
2709 15848 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2710 15848 : vacuumed_pages++;
2711 : }
2712 :
2713 770 : read_stream_end(stream);
2714 770 : TidStoreEndIterate(iter);
2715 :
2716 770 : vacrel->blkno = InvalidBlockNumber;
2717 770 : if (BufferIsValid(vmbuffer))
2718 770 : ReleaseBuffer(vmbuffer);
2719 :
2720 : /*
2721 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2722 : * the second heap pass. No more, no less.
2723 : */
2724 : Assert(vacrel->num_index_scans > 1 ||
2725 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2726 : vacuumed_pages == vacrel->lpdead_item_pages));
2727 :
2728 770 : ereport(DEBUG2,
2729 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2730 : vacrel->relname, vacrel->dead_items_info->num_items,
2731 : vacuumed_pages)));
2732 :
2733 : /* Revert to the previous phase information for error traceback */
2734 770 : restore_vacuum_error_info(vacrel, &saved_err_info);
2735 770 : }
2736 :
2737 : /*
2738 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2739 : * vacrel->dead_items store.
2740 : *
2741 : * Caller must have an exclusive buffer lock on the buffer (though a full
2742 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2743 : * a pin on blkno's visibility map page.
2744 : */
2745 : static void
2746 15848 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2747 : OffsetNumber *deadoffsets, int num_offsets,
2748 : Buffer vmbuffer)
2749 : {
2750 15848 : Page page = BufferGetPage(buffer);
2751 : OffsetNumber unused[MaxHeapTuplesPerPage];
2752 15848 : int nunused = 0;
2753 : TransactionId newest_live_xid;
2754 15848 : TransactionId conflict_xid = InvalidTransactionId;
2755 : bool all_frozen;
2756 : LVSavedErrInfo saved_err_info;
2757 15848 : uint8 vmflags = 0;
2758 :
2759 : Assert(vacrel->do_index_vacuuming);
2760 :
2761 15848 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2762 :
2763 : /* Update error traceback information */
2764 15848 : update_vacuum_error_info(vacrel, &saved_err_info,
2765 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2766 : InvalidOffsetNumber);
2767 :
2768 : /*
2769 : * Before marking dead items unused, check whether the page will become
2770 : * all-visible once that change is applied. This lets us reap the tuples
2771 : * and mark the page all-visible within the same critical section,
2772 : * enabling both changes to be emitted in a single WAL record. Since the
2773 : * visibility checks may perform I/O and allocate memory, they must be
2774 : * done outside the critical section.
2775 : */
2776 15848 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2777 : vacrel->vistest, true,
2778 : deadoffsets, num_offsets,
2779 : &all_frozen, &newest_live_xid,
2780 : &vacrel->offnum))
2781 : {
2782 15711 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2783 15711 : if (all_frozen)
2784 : {
2785 12087 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2786 : Assert(!TransactionIdIsValid(newest_live_xid));
2787 : }
2788 :
2789 : /*
2790 : * Take the lock on the vmbuffer before entering a critical section.
2791 : * The heap page lock must also be held while updating the VM to
2792 : * ensure consistency.
2793 : */
2794 15711 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2795 : }
2796 :
2797 15848 : START_CRIT_SECTION();
2798 :
2799 1107914 : for (int i = 0; i < num_offsets; i++)
2800 : {
2801 : ItemId itemid;
2802 1092066 : OffsetNumber toff = deadoffsets[i];
2803 :
2804 1092066 : itemid = PageGetItemId(page, toff);
2805 :
2806 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2807 1092066 : ItemIdSetUnused(itemid);
2808 1092066 : unused[nunused++] = toff;
2809 : }
2810 :
2811 : Assert(nunused > 0);
2812 :
2813 : /* Attempt to truncate line pointer array now */
2814 15848 : PageTruncateLinePointerArray(page);
2815 :
2816 15848 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2817 : {
2818 : /*
2819 : * The page is guaranteed to have had dead line pointers, so we always
2820 : * set PD_ALL_VISIBLE.
2821 : */
2822 15711 : PageSetAllVisible(page);
2823 15711 : PageClearPrunable(page);
2824 15711 : visibilitymap_set(blkno,
2825 : vmbuffer, vmflags,
2826 15711 : vacrel->rel->rd_locator);
2827 15711 : conflict_xid = newest_live_xid;
2828 : }
2829 :
2830 : /*
2831 : * Mark buffer dirty before we write WAL.
2832 : */
2833 15848 : MarkBufferDirty(buffer);
2834 :
2835 : /* XLOG stuff */
2836 15848 : if (RelationNeedsWAL(vacrel->rel))
2837 : {
2838 14754 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2839 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2840 : vmflags,
2841 : conflict_xid,
2842 : false, /* no cleanup lock required */
2843 : PRUNE_VACUUM_CLEANUP,
2844 : NULL, 0, /* frozen */
2845 : NULL, 0, /* redirected */
2846 : NULL, 0, /* dead */
2847 : unused, nunused);
2848 : }
2849 :
2850 15848 : END_CRIT_SECTION();
2851 :
2852 15848 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
2853 : {
2854 : /* Count the newly set VM page for logging */
2855 15711 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2856 15711 : vacrel->new_all_visible_pages++;
2857 15711 : if (all_frozen)
2858 12087 : vacrel->new_all_visible_all_frozen_pages++;
2859 : }
2860 :
2861 : /* Revert to the previous phase information for error traceback */
2862 15848 : restore_vacuum_error_info(vacrel, &saved_err_info);
2863 15848 : }
2864 :
2865 : /*
2866 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2867 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2868 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2869 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2870 : *
2871 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2872 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2873 : * that it started out with.
2874 : *
2875 : * Returns true when failsafe has been triggered.
2876 : */
2877 : static bool
2878 118132 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2879 : {
2880 : /* Don't warn more than once per VACUUM */
2881 118132 : if (VacuumFailsafeActive)
2882 0 : return true;
2883 :
2884 118132 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2885 : {
2886 23987 : const int progress_index[] = {
2887 : PROGRESS_VACUUM_INDEXES_TOTAL,
2888 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2889 : PROGRESS_VACUUM_MODE
2890 : };
2891 23987 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
2892 :
2893 23987 : VacuumFailsafeActive = true;
2894 :
2895 : /*
2896 : * Abandon use of a buffer access strategy to allow use of all of
2897 : * shared buffers. We assume the caller who allocated the memory for
2898 : * the BufferAccessStrategy will free it.
2899 : */
2900 23987 : vacrel->bstrategy = NULL;
2901 :
2902 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2903 23987 : vacrel->do_index_vacuuming = false;
2904 23987 : vacrel->do_index_cleanup = false;
2905 23987 : vacrel->do_rel_truncate = false;
2906 :
2907 : /* Reset the progress counters and set the failsafe mode */
2908 23987 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
2909 :
2910 23987 : ereport(WARNING,
2911 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2912 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2913 : vacrel->num_index_scans),
2914 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2915 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2916 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2917 :
2918 : /* Stop applying cost limits from this point on */
2919 23987 : VacuumCostActive = false;
2920 23987 : VacuumCostBalance = 0;
2921 :
2922 23987 : return true;
2923 : }
2924 :
2925 94145 : return false;
2926 : }
2927 :
2928 : /*
2929 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2930 : */
2931 : static void
2932 87332 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2933 : {
2934 87332 : double reltuples = vacrel->new_rel_tuples;
2935 87332 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2936 87332 : const int progress_start_index[] = {
2937 : PROGRESS_VACUUM_PHASE,
2938 : PROGRESS_VACUUM_INDEXES_TOTAL
2939 : };
2940 87332 : const int progress_end_index[] = {
2941 : PROGRESS_VACUUM_INDEXES_TOTAL,
2942 : PROGRESS_VACUUM_INDEXES_PROCESSED
2943 : };
2944 : int64 progress_start_val[2];
2945 87332 : int64 progress_end_val[2] = {0, 0};
2946 :
2947 : Assert(vacrel->do_index_cleanup);
2948 : Assert(vacrel->nindexes > 0);
2949 :
2950 : /*
2951 : * Report that we are now cleaning up indexes and the number of indexes to
2952 : * cleanup.
2953 : */
2954 87332 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2955 87332 : progress_start_val[1] = vacrel->nindexes;
2956 87332 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2957 :
2958 87332 : if (!ParallelVacuumIsActive(vacrel))
2959 : {
2960 226190 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2961 : {
2962 138881 : Relation indrel = vacrel->indrels[idx];
2963 138881 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2964 :
2965 277762 : vacrel->indstats[idx] =
2966 138881 : lazy_cleanup_one_index(indrel, istat, reltuples,
2967 : estimated_count, vacrel);
2968 :
2969 : /* Report the number of indexes cleaned up */
2970 138881 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2971 138881 : idx + 1);
2972 : }
2973 : }
2974 : else
2975 : {
2976 : /* Outsource everything to parallel variant */
2977 23 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2978 : vacrel->num_index_scans,
2979 : estimated_count,
2980 : &(vacrel->worker_usage.cleanup));
2981 : }
2982 :
2983 : /* Reset the progress counters */
2984 87332 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2985 87332 : }
2986 :
2987 : /*
2988 : * lazy_vacuum_one_index() -- vacuum index relation.
2989 : *
2990 : * Delete all the index tuples containing a TID collected in
2991 : * vacrel->dead_items. Also update running statistics. Exact
2992 : * details depend on index AM's ambulkdelete routine.
2993 : *
2994 : * reltuples is the number of heap tuples to be passed to the
2995 : * bulkdelete callback. It's always assumed to be estimated.
2996 : * See indexam.sgml for more info.
2997 : *
2998 : * Returns bulk delete stats derived from input stats
2999 : */
3000 : static IndexBulkDeleteResult *
3001 1451 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3002 : double reltuples, LVRelState *vacrel)
3003 : {
3004 : IndexVacuumInfo ivinfo;
3005 : LVSavedErrInfo saved_err_info;
3006 :
3007 1451 : ivinfo.index = indrel;
3008 1451 : ivinfo.heaprel = vacrel->rel;
3009 1451 : ivinfo.analyze_only = false;
3010 1451 : ivinfo.report_progress = false;
3011 1451 : ivinfo.estimated_count = true;
3012 1451 : ivinfo.message_level = DEBUG2;
3013 1451 : ivinfo.num_heap_tuples = reltuples;
3014 1451 : ivinfo.strategy = vacrel->bstrategy;
3015 :
3016 : /*
3017 : * Update error traceback information.
3018 : *
3019 : * The index name is saved during this phase and restored immediately
3020 : * after this phase. See vacuum_error_callback.
3021 : */
3022 : Assert(vacrel->indname == NULL);
3023 1451 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3024 1451 : update_vacuum_error_info(vacrel, &saved_err_info,
3025 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3026 : InvalidBlockNumber, InvalidOffsetNumber);
3027 :
3028 : /* Do bulk deletion */
3029 1451 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3030 : vacrel->dead_items_info);
3031 :
3032 : /* Revert to the previous phase information for error traceback */
3033 1451 : restore_vacuum_error_info(vacrel, &saved_err_info);
3034 1451 : pfree(vacrel->indname);
3035 1451 : vacrel->indname = NULL;
3036 :
3037 1451 : return istat;
3038 : }
3039 :
3040 : /*
3041 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3042 : *
3043 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3044 : * of heap tuples and estimated_count is true if reltuples is an
3045 : * estimated value. See indexam.sgml for more info.
3046 : *
3047 : * Returns bulk delete stats derived from input stats
3048 : */
3049 : static IndexBulkDeleteResult *
3050 138881 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3051 : double reltuples, bool estimated_count,
3052 : LVRelState *vacrel)
3053 : {
3054 : IndexVacuumInfo ivinfo;
3055 : LVSavedErrInfo saved_err_info;
3056 :
3057 138881 : ivinfo.index = indrel;
3058 138881 : ivinfo.heaprel = vacrel->rel;
3059 138881 : ivinfo.analyze_only = false;
3060 138881 : ivinfo.report_progress = false;
3061 138881 : ivinfo.estimated_count = estimated_count;
3062 138881 : ivinfo.message_level = DEBUG2;
3063 :
3064 138881 : ivinfo.num_heap_tuples = reltuples;
3065 138881 : ivinfo.strategy = vacrel->bstrategy;
3066 :
3067 : /*
3068 : * Update error traceback information.
3069 : *
3070 : * The index name is saved during this phase and restored immediately
3071 : * after this phase. See vacuum_error_callback.
3072 : */
3073 : Assert(vacrel->indname == NULL);
3074 138881 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3075 138881 : update_vacuum_error_info(vacrel, &saved_err_info,
3076 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3077 : InvalidBlockNumber, InvalidOffsetNumber);
3078 :
3079 138881 : istat = vac_cleanup_one_index(&ivinfo, istat);
3080 :
3081 : /* Revert to the previous phase information for error traceback */
3082 138881 : restore_vacuum_error_info(vacrel, &saved_err_info);
3083 138881 : pfree(vacrel->indname);
3084 138881 : vacrel->indname = NULL;
3085 :
3086 138881 : return istat;
3087 : }
3088 :
3089 : /*
3090 : * should_attempt_truncation - should we attempt to truncate the heap?
3091 : *
3092 : * Don't even think about it unless we have a shot at releasing a goodly
3093 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3094 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3095 : * be particularly disruptive.
3096 : *
3097 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3098 : * system might be refusing to allocate new XIDs at this point. The system
3099 : * definitely won't return to normal unless and until VACUUM actually advances
3100 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3101 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3102 : * truncate the table under these circumstances, an XID exhaustion error might
3103 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3104 : * There is very little chance of truncation working out when the failsafe is
3105 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3106 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3107 : * we're called.
3108 : */
3109 : static bool
3110 115894 : should_attempt_truncation(LVRelState *vacrel)
3111 : {
3112 : BlockNumber possibly_freeable;
3113 :
3114 115894 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3115 24149 : return false;
3116 :
3117 91745 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3118 91745 : if (possibly_freeable > 0 &&
3119 216 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3120 216 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3121 203 : return true;
3122 :
3123 91542 : return false;
3124 : }
3125 :
3126 : /*
3127 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3128 : */
3129 : static void
3130 203 : lazy_truncate_heap(LVRelState *vacrel)
3131 : {
3132 203 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3133 : BlockNumber new_rel_pages;
3134 : bool lock_waiter_detected;
3135 : int lock_retry;
3136 :
3137 : /* Report that we are now truncating */
3138 203 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3139 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3140 :
3141 : /* Update error traceback information one last time */
3142 203 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3143 : vacrel->nonempty_pages, InvalidOffsetNumber);
3144 :
3145 : /*
3146 : * Loop until no more truncating can be done.
3147 : */
3148 : do
3149 : {
3150 : /*
3151 : * We need full exclusive lock on the relation in order to do
3152 : * truncation. If we can't get it, give up rather than waiting --- we
3153 : * don't want to block other backends, and we don't want to deadlock
3154 : * (which is quite possible considering we already hold a lower-grade
3155 : * lock).
3156 : */
3157 203 : lock_waiter_detected = false;
3158 203 : lock_retry = 0;
3159 : while (true)
3160 : {
3161 403 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3162 201 : break;
3163 :
3164 : /*
3165 : * Check for interrupts while trying to (re-)acquire the exclusive
3166 : * lock.
3167 : */
3168 202 : CHECK_FOR_INTERRUPTS();
3169 :
3170 202 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3171 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3172 : {
3173 : /*
3174 : * We failed to establish the lock in the specified number of
3175 : * retries. This means we give up truncating.
3176 : */
3177 2 : ereport(vacrel->verbose ? INFO : DEBUG2,
3178 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3179 : vacrel->relname)));
3180 3 : return;
3181 : }
3182 :
3183 200 : (void) WaitLatch(MyLatch,
3184 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3185 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3186 : WAIT_EVENT_VACUUM_TRUNCATE);
3187 200 : ResetLatch(MyLatch);
3188 : }
3189 :
3190 : /*
3191 : * Now that we have exclusive lock, look to see if the rel has grown
3192 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3193 : * the newly added pages presumably contain non-deletable tuples.
3194 : */
3195 201 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3196 201 : if (new_rel_pages != orig_rel_pages)
3197 : {
3198 : /*
3199 : * Note: we intentionally don't update vacrel->rel_pages with the
3200 : * new rel size here. If we did, it would amount to assuming that
3201 : * the new pages are empty, which is unlikely. Leaving the numbers
3202 : * alone amounts to assuming that the new pages have the same
3203 : * tuple density as existing ones, which is less unlikely.
3204 : */
3205 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3206 0 : return;
3207 : }
3208 :
3209 : /*
3210 : * Scan backwards from the end to verify that the end pages actually
3211 : * contain no tuples. This is *necessary*, not optional, because
3212 : * other backends could have added tuples to these pages whilst we
3213 : * were vacuuming.
3214 : */
3215 201 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3216 201 : vacrel->blkno = new_rel_pages;
3217 :
3218 201 : if (new_rel_pages >= orig_rel_pages)
3219 : {
3220 : /* can't do anything after all */
3221 1 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3222 1 : return;
3223 : }
3224 :
3225 : /*
3226 : * Okay to truncate.
3227 : */
3228 200 : RelationTruncate(vacrel->rel, new_rel_pages);
3229 :
3230 : /*
3231 : * We can release the exclusive lock as soon as we have truncated.
3232 : * Other backends can't safely access the relation until they have
3233 : * processed the smgr invalidation that smgrtruncate sent out ... but
3234 : * that should happen as part of standard invalidation processing once
3235 : * they acquire lock on the relation.
3236 : */
3237 200 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3238 :
3239 : /*
3240 : * Update statistics. Here, it *is* correct to adjust rel_pages
3241 : * without also touching reltuples, since the tuple count wasn't
3242 : * changed by the truncation.
3243 : */
3244 200 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3245 200 : vacrel->rel_pages = new_rel_pages;
3246 :
3247 200 : ereport(vacrel->verbose ? INFO : DEBUG2,
3248 : (errmsg("table \"%s\": truncated %u to %u pages",
3249 : vacrel->relname,
3250 : orig_rel_pages, new_rel_pages)));
3251 200 : orig_rel_pages = new_rel_pages;
3252 200 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3253 : }
3254 :
3255 : /*
3256 : * Rescan end pages to verify that they are (still) empty of tuples.
3257 : *
3258 : * Returns number of nondeletable pages (last nonempty page + 1).
3259 : */
3260 : static BlockNumber
3261 201 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3262 : {
3263 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3264 : "prefetch size must be power of 2");
3265 :
3266 : BlockNumber blkno;
3267 : BlockNumber prefetchedUntil;
3268 : instr_time starttime;
3269 :
3270 : /* Initialize the starttime if we check for conflicting lock requests */
3271 201 : INSTR_TIME_SET_CURRENT(starttime);
3272 :
3273 : /*
3274 : * Start checking blocks at what we believe relation end to be and move
3275 : * backwards. (Strange coding of loop control is needed because blkno is
3276 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3277 : * in forward direction, so that OS-level readahead can kick in.
3278 : */
3279 201 : blkno = vacrel->rel_pages;
3280 201 : prefetchedUntil = InvalidBlockNumber;
3281 3757 : while (blkno > vacrel->nonempty_pages)
3282 : {
3283 : Buffer buf;
3284 : Page page;
3285 : OffsetNumber offnum,
3286 : maxoff;
3287 : bool hastup;
3288 :
3289 : /*
3290 : * Check if another process requests a lock on our relation. We are
3291 : * holding an AccessExclusiveLock here, so they will be waiting. We
3292 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3293 : * only check if that interval has elapsed once every 32 blocks to
3294 : * keep the number of system calls and actual shared lock table
3295 : * lookups to a minimum.
3296 : */
3297 3558 : if ((blkno % 32) == 0)
3298 : {
3299 : instr_time currenttime;
3300 : instr_time elapsed;
3301 :
3302 124 : INSTR_TIME_SET_CURRENT(currenttime);
3303 124 : elapsed = currenttime;
3304 124 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3305 124 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3306 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3307 : {
3308 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3309 : {
3310 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3311 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3312 : vacrel->relname)));
3313 :
3314 0 : *lock_waiter_detected = true;
3315 0 : return blkno;
3316 : }
3317 0 : starttime = currenttime;
3318 : }
3319 : }
3320 :
3321 : /*
3322 : * We don't insert a vacuum delay point here, because we have an
3323 : * exclusive lock on the table which we want to hold for as short a
3324 : * time as possible. We still need to check for interrupts however.
3325 : */
3326 3558 : CHECK_FOR_INTERRUPTS();
3327 :
3328 3558 : blkno--;
3329 :
3330 : /* If we haven't prefetched this lot yet, do so now. */
3331 3558 : if (prefetchedUntil > blkno)
3332 : {
3333 : BlockNumber prefetchStart;
3334 : BlockNumber pblkno;
3335 :
3336 287 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3337 5253 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3338 : {
3339 4966 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3340 4966 : CHECK_FOR_INTERRUPTS();
3341 : }
3342 287 : prefetchedUntil = prefetchStart;
3343 : }
3344 :
3345 3558 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3346 : vacrel->bstrategy);
3347 :
3348 : /* In this phase we only need shared access to the buffer */
3349 3558 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3350 :
3351 3558 : page = BufferGetPage(buf);
3352 :
3353 3558 : if (PageIsNew(page) || PageIsEmpty(page))
3354 : {
3355 1807 : UnlockReleaseBuffer(buf);
3356 1807 : continue;
3357 : }
3358 :
3359 1751 : hastup = false;
3360 1751 : maxoff = PageGetMaxOffsetNumber(page);
3361 1751 : for (offnum = FirstOffsetNumber;
3362 3500 : offnum <= maxoff;
3363 1749 : offnum = OffsetNumberNext(offnum))
3364 : {
3365 : ItemId itemid;
3366 :
3367 1751 : itemid = PageGetItemId(page, offnum);
3368 :
3369 : /*
3370 : * Note: any non-unused item should be taken as a reason to keep
3371 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3372 : * we must not have cleaned out its index entries.
3373 : */
3374 1751 : if (ItemIdIsUsed(itemid))
3375 : {
3376 2 : hastup = true;
3377 2 : break; /* can stop scanning */
3378 : }
3379 : } /* scan along page */
3380 :
3381 1751 : UnlockReleaseBuffer(buf);
3382 :
3383 : /* Done scanning if we found a tuple here */
3384 1751 : if (hastup)
3385 2 : return blkno + 1;
3386 : }
3387 :
3388 : /*
3389 : * If we fall out of the loop, all the previously-thought-to-be-empty
3390 : * pages still are; we need not bother to look at the last known-nonempty
3391 : * page.
3392 : */
3393 199 : return vacrel->nonempty_pages;
3394 : }
3395 :
3396 : /*
3397 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3398 : * shared memory). Sets both in vacrel for caller.
3399 : *
3400 : * Also handles parallel initialization as part of allocating dead_items in
3401 : * DSM when required.
3402 : */
3403 : static void
3404 115895 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3405 : {
3406 : VacDeadItemsInfo *dead_items_info;
3407 331798 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3408 100008 : autovacuum_work_mem != -1 ?
3409 215903 : autovacuum_work_mem : maintenance_work_mem;
3410 :
3411 : /*
3412 : * Initialize state for a parallel vacuum. As of now, only one worker can
3413 : * be used for an index, so we invoke parallelism only if there are at
3414 : * least two indexes on a table.
3415 : */
3416 115895 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3417 : {
3418 : /*
3419 : * Since parallel workers cannot access data in temporary tables, we
3420 : * can't perform parallel vacuum on them.
3421 : */
3422 6442 : if (RelationUsesLocalBuffers(vacrel->rel))
3423 : {
3424 : /*
3425 : * Give warning only if the user explicitly tries to perform a
3426 : * parallel vacuum on the temporary table.
3427 : */
3428 4 : if (nworkers > 0)
3429 4 : ereport(WARNING,
3430 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3431 : vacrel->relname)));
3432 : }
3433 : else
3434 6438 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3435 : vacrel->nindexes, nworkers,
3436 : vac_work_mem,
3437 6438 : vacrel->verbose ? INFO : DEBUG2,
3438 : vacrel->bstrategy);
3439 :
3440 : /*
3441 : * If parallel mode started, dead_items and dead_items_info spaces are
3442 : * allocated in DSM.
3443 : */
3444 6442 : if (ParallelVacuumIsActive(vacrel))
3445 : {
3446 23 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3447 : &vacrel->dead_items_info);
3448 23 : return;
3449 : }
3450 : }
3451 :
3452 : /*
3453 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3454 : * locally.
3455 : */
3456 :
3457 115872 : dead_items_info = palloc_object(VacDeadItemsInfo);
3458 115872 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3459 115872 : dead_items_info->num_items = 0;
3460 115872 : vacrel->dead_items_info = dead_items_info;
3461 :
3462 115872 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3463 : }
3464 :
3465 : /*
3466 : * Add the given block number and offset numbers to dead_items.
3467 : */
3468 : static void
3469 18157 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3470 : int num_offsets)
3471 : {
3472 18157 : const int prog_index[2] = {
3473 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3474 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3475 : };
3476 : int64 prog_val[2];
3477 :
3478 18157 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3479 18157 : vacrel->dead_items_info->num_items += num_offsets;
3480 :
3481 : /* update the progress information */
3482 18157 : prog_val[0] = vacrel->dead_items_info->num_items;
3483 18157 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3484 18157 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3485 18157 : }
3486 :
3487 : /*
3488 : * Forget all collected dead items.
3489 : */
3490 : static void
3491 785 : dead_items_reset(LVRelState *vacrel)
3492 : {
3493 : /* Update statistics for dead items */
3494 785 : vacrel->num_dead_items_resets++;
3495 785 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3496 :
3497 785 : if (ParallelVacuumIsActive(vacrel))
3498 : {
3499 16 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3500 16 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3501 : &vacrel->dead_items_info);
3502 16 : return;
3503 : }
3504 :
3505 : /* Recreate the tidstore with the same max_bytes limitation */
3506 769 : TidStoreDestroy(vacrel->dead_items);
3507 769 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3508 :
3509 : /* Reset the counter */
3510 769 : vacrel->dead_items_info->num_items = 0;
3511 : }
3512 :
3513 : /*
3514 : * Perform cleanup for resources allocated in dead_items_alloc
3515 : */
3516 : static void
3517 115894 : dead_items_cleanup(LVRelState *vacrel)
3518 : {
3519 115894 : if (!ParallelVacuumIsActive(vacrel))
3520 : {
3521 : /* Don't bother with pfree here */
3522 115871 : return;
3523 : }
3524 :
3525 : /* End parallel mode */
3526 23 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3527 23 : vacrel->pvs = NULL;
3528 : }
3529 :
3530 : #ifdef USE_ASSERT_CHECKING
3531 :
3532 : /*
3533 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3534 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3535 : * reason not to use it outside of asserts.
3536 : */
3537 : bool
3538 : heap_page_is_all_visible(Relation rel, Buffer buf,
3539 : GlobalVisState *vistest,
3540 : bool *all_frozen,
3541 : TransactionId *newest_live_xid,
3542 : OffsetNumber *logging_offnum)
3543 : {
3544 : /*
3545 : * Pass allow_update_vistest as false so that the GlobalVisState
3546 : * boundaries used here match those used by the pruning code we are
3547 : * cross-checking. Allowing an update could move the boundaries between
3548 : * the two calls, causing a spurious assertion failure.
3549 : */
3550 : return heap_page_would_be_all_visible(rel, buf,
3551 : vistest, false,
3552 : NULL, 0,
3553 : all_frozen,
3554 : newest_live_xid,
3555 : logging_offnum);
3556 : }
3557 : #endif
3558 :
3559 : /*
3560 : * Check whether the heap page in buf is all-visible except for the dead
3561 : * tuples referenced in the deadoffsets array.
3562 : *
3563 : * Vacuum uses this to check if a page would become all-visible after reaping
3564 : * known dead tuples. This function does not remove the dead items.
3565 : *
3566 : * This cannot be called in a critical section, as the visibility checks may
3567 : * perform IO and allocate memory.
3568 : *
3569 : * Returns true if the page is all-visible other than the provided
3570 : * deadoffsets and false otherwise.
3571 : *
3572 : * vistest is used to determine visibility. If allow_update_vistest is true,
3573 : * the boundaries of the GlobalVisState may be updated when checking the
3574 : * visibility of the newest live XID on the page.
3575 : *
3576 : * Output parameters:
3577 : *
3578 : * - *all_frozen: true if every tuple on the page is frozen
3579 : * - *newest_live_xid: newest xmin of live tuples on the page
3580 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3581 : * used by vacuum's error callback system.
3582 : *
3583 : * Callers looking to verify that the page is already all-visible can call
3584 : * heap_page_is_all_visible().
3585 : *
3586 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3587 : * If you modify this function, ensure consistency with that code. An
3588 : * assertion cross-checks that both remain in agreement. Do not introduce new
3589 : * side-effects.
3590 : */
3591 : static bool
3592 15848 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3593 : GlobalVisState *vistest,
3594 : bool allow_update_vistest,
3595 : OffsetNumber *deadoffsets,
3596 : int ndeadoffsets,
3597 : bool *all_frozen,
3598 : TransactionId *newest_live_xid,
3599 : OffsetNumber *logging_offnum)
3600 : {
3601 15848 : Page page = BufferGetPage(buf);
3602 15848 : BlockNumber blockno = BufferGetBlockNumber(buf);
3603 : OffsetNumber offnum,
3604 : maxoff;
3605 15848 : bool all_visible = true;
3606 15848 : int matched_dead_count = 0;
3607 :
3608 15848 : *newest_live_xid = InvalidTransactionId;
3609 15848 : *all_frozen = true;
3610 :
3611 : Assert(ndeadoffsets == 0 || deadoffsets);
3612 :
3613 : #ifdef USE_ASSERT_CHECKING
3614 : /* Confirm input deadoffsets[] is strictly sorted */
3615 : if (ndeadoffsets > 1)
3616 : {
3617 : for (int i = 1; i < ndeadoffsets; i++)
3618 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3619 : }
3620 : #endif
3621 :
3622 15848 : maxoff = PageGetMaxOffsetNumber(page);
3623 15848 : for (offnum = FirstOffsetNumber;
3624 1615566 : offnum <= maxoff && all_visible;
3625 1599718 : offnum = OffsetNumberNext(offnum))
3626 : {
3627 : ItemId itemid;
3628 : HeapTupleData tuple;
3629 : TransactionId dead_after;
3630 :
3631 : /*
3632 : * Set the offset number so that we can display it along with any
3633 : * error that occurred while processing this tuple.
3634 : */
3635 1599719 : *logging_offnum = offnum;
3636 1599719 : itemid = PageGetItemId(page, offnum);
3637 :
3638 : /* Unused or redirect line pointers are of no interest */
3639 1599719 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3640 1132430 : continue;
3641 :
3642 1558844 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3643 :
3644 : /*
3645 : * Dead line pointers can have index pointers pointing to them. So
3646 : * they can't be treated as visible
3647 : */
3648 1558844 : if (ItemIdIsDead(itemid))
3649 : {
3650 1091556 : if (!deadoffsets ||
3651 1091555 : matched_dead_count >= ndeadoffsets ||
3652 1091555 : deadoffsets[matched_dead_count] != offnum)
3653 : {
3654 1 : *all_frozen = all_visible = false;
3655 1 : break;
3656 : }
3657 1091555 : matched_dead_count++;
3658 1091555 : continue;
3659 : }
3660 :
3661 : Assert(ItemIdIsNormal(itemid));
3662 :
3663 467288 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3664 467288 : tuple.t_len = ItemIdGetLength(itemid);
3665 467288 : tuple.t_tableOid = RelationGetRelid(rel);
3666 :
3667 : /* Visibility checks may do IO or allocate memory */
3668 : Assert(CritSectionCount == 0);
3669 467288 : switch (HeapTupleSatisfiesVacuumHorizon(&tuple, buf, &dead_after))
3670 : {
3671 467191 : case HEAPTUPLE_LIVE:
3672 : {
3673 : TransactionId xmin;
3674 :
3675 : /* Check heap_prune_record_unchanged_lp_normal comments */
3676 467191 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3677 : {
3678 0 : all_visible = false;
3679 0 : *all_frozen = false;
3680 0 : break;
3681 : }
3682 :
3683 : /*
3684 : * The inserter definitely committed. But we don't know if
3685 : * it is old enough that everyone sees it as committed.
3686 : * Don't check that now.
3687 : *
3688 : * If we scan all tuples without finding one that prevents
3689 : * the page from being all-visible, we then check whether
3690 : * any snapshot still considers the newest XID on the page
3691 : * to be running. In that case, the page is not considered
3692 : * all-visible.
3693 : */
3694 467191 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3695 :
3696 : /* Track newest xmin on page. */
3697 467191 : if (TransactionIdFollows(xmin, *newest_live_xid) &&
3698 : TransactionIdIsNormal(xmin))
3699 15215 : *newest_live_xid = xmin;
3700 :
3701 : /* Check whether this tuple is already frozen or not */
3702 589607 : if (all_visible && *all_frozen &&
3703 122416 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3704 3696 : *all_frozen = false;
3705 : }
3706 467191 : break;
3707 :
3708 97 : case HEAPTUPLE_DEAD:
3709 : case HEAPTUPLE_RECENTLY_DEAD:
3710 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3711 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3712 : {
3713 97 : all_visible = false;
3714 97 : *all_frozen = false;
3715 97 : break;
3716 : }
3717 0 : default:
3718 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3719 : break;
3720 : }
3721 : } /* scan along page */
3722 :
3723 : /*
3724 : * After processing all the live tuples on the page, if the newest xmin
3725 : * among them may still be considered running by any snapshot, the page
3726 : * cannot be all-visible.
3727 : */
3728 15848 : if (all_visible &&
3729 19411 : TransactionIdIsNormal(*newest_live_xid) &&
3730 3661 : GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3731 : allow_update_vistest))
3732 : {
3733 39 : all_visible = false;
3734 39 : *all_frozen = false;
3735 : }
3736 :
3737 : /* Clear the offset information once we have processed the given page. */
3738 15848 : *logging_offnum = InvalidOffsetNumber;
3739 :
3740 15848 : return all_visible;
3741 : }
3742 :
3743 : /*
3744 : * Update index statistics in pg_class if the statistics are accurate.
3745 : */
3746 : static void
3747 91763 : update_relstats_all_indexes(LVRelState *vacrel)
3748 : {
3749 91763 : Relation *indrels = vacrel->indrels;
3750 91763 : int nindexes = vacrel->nindexes;
3751 91763 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3752 :
3753 : Assert(vacrel->do_index_cleanup);
3754 :
3755 230726 : for (int idx = 0; idx < nindexes; idx++)
3756 : {
3757 138963 : Relation indrel = indrels[idx];
3758 138963 : IndexBulkDeleteResult *istat = indstats[idx];
3759 :
3760 138963 : if (istat == NULL || istat->estimated_count)
3761 137316 : continue;
3762 :
3763 : /* Update index statistics */
3764 1647 : vac_update_relstats(indrel,
3765 : istat->num_pages,
3766 : istat->num_index_tuples,
3767 : 0, 0,
3768 : false,
3769 : InvalidTransactionId,
3770 : InvalidMultiXactId,
3771 : NULL, NULL, false);
3772 : }
3773 91763 : }
3774 :
3775 : /*
3776 : * Error context callback for errors occurring during vacuum. The error
3777 : * context messages for index phases should match the messages set in parallel
3778 : * vacuum. If you change this function for those phases, change
3779 : * parallel_vacuum_error_callback() as well.
3780 : */
3781 : static void
3782 105795 : vacuum_error_callback(void *arg)
3783 : {
3784 105795 : LVRelState *errinfo = arg;
3785 :
3786 105795 : switch (errinfo->phase)
3787 : {
3788 1 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3789 1 : if (BlockNumberIsValid(errinfo->blkno))
3790 : {
3791 0 : if (OffsetNumberIsValid(errinfo->offnum))
3792 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3793 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3794 : else
3795 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3796 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3797 : }
3798 : else
3799 1 : errcontext("while scanning relation \"%s.%s\"",
3800 : errinfo->relnamespace, errinfo->relname);
3801 1 : break;
3802 :
3803 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3804 0 : if (BlockNumberIsValid(errinfo->blkno))
3805 : {
3806 0 : if (OffsetNumberIsValid(errinfo->offnum))
3807 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3808 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3809 : else
3810 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3811 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3812 : }
3813 : else
3814 0 : errcontext("while vacuuming relation \"%s.%s\"",
3815 : errinfo->relnamespace, errinfo->relname);
3816 0 : break;
3817 :
3818 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3819 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3820 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3821 0 : break;
3822 :
3823 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3824 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3825 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3826 0 : break;
3827 :
3828 3 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3829 3 : if (BlockNumberIsValid(errinfo->blkno))
3830 3 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3831 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3832 3 : break;
3833 :
3834 105791 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3835 : default:
3836 105791 : return; /* do nothing; the errinfo may not be
3837 : * initialized */
3838 : }
3839 : }
3840 :
3841 : /*
3842 : * Updates the information required for vacuum error callback. This also saves
3843 : * the current information which can be later restored via restore_vacuum_error_info.
3844 : */
3845 : static void
3846 563641 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3847 : int phase, BlockNumber blkno, OffsetNumber offnum)
3848 : {
3849 563641 : if (saved_vacrel)
3850 : {
3851 156950 : saved_vacrel->offnum = vacrel->offnum;
3852 156950 : saved_vacrel->blkno = vacrel->blkno;
3853 156950 : saved_vacrel->phase = vacrel->phase;
3854 : }
3855 :
3856 563641 : vacrel->blkno = blkno;
3857 563641 : vacrel->offnum = offnum;
3858 563641 : vacrel->phase = phase;
3859 563641 : }
3860 :
3861 : /*
3862 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3863 : */
3864 : static void
3865 156950 : restore_vacuum_error_info(LVRelState *vacrel,
3866 : const LVSavedErrInfo *saved_vacrel)
3867 : {
3868 156950 : vacrel->blkno = saved_vacrel->blkno;
3869 156950 : vacrel->offnum = saved_vacrel->offnum;
3870 156950 : vacrel->phase = saved_vacrel->phase;
3871 156950 : }
|